본문 바로가기
인공지능/컴퓨터비전

[OpenCV/Python] 악보 인식(디지털 악보 인식) - 12

by 이민훈 2021. 8. 8.

12. 인식 과정 - 온음표

온음표는 조표, 음표, 쉼표 등 다 걸러지고 남은 객체들에 한하여 인식에 들어가기 때문에,

 

비교적 조건을 주기가 수월합니다.

 

진행 방식은 앞전에 썼던 알고리즘들과 별반 다르지 않으니 바로 코드를 적도록 하겠습니다.

 

# modules.py
import cv2
import numpy as np
import functions as fs
import recognition_modules as rs

def recognition(image, staves, objects):
    key = 0
    time_signature = False
    beats = []  # 박자 리스트
    pitches = []  # 음이름 리스트

    for i in range(1, len(objects) - 1):
        obj = objects[i]
        line = obj[0]
        stats = obj[1]
        stems = obj[2]
        direction = obj[3]
        (x, y, w, h, area) = stats
        staff = staves[line * 5: (line + 1) * 5]
        if not time_signature:  # 조표가 완전히 탐색되지 않음 (아직 박자표를 찾지 못함)
            ts, temp_key = rs.recognize_key(image, staff, stats)
            time_signature = ts
            key += temp_key
        else:  # 조표가 완전히 탐색되었음
            notes = rs.recognize_note(image, staff, stats, stems, direction)
            if len(notes[0]):
                for beat in notes[0]:
                    beats.append(beat)
                for pitch in notes[1]:
                    pitches.append(pitch)
            else:
                rest = rs.recognize_rest(image, staff, stats)
                if rest:
                    beats.append(rest)
                    pitches.append(-1)
                else:
                    whole_note, pitch = rs.recognize_whole_note(image, staff, stats)
                    if whole_note:
                        beats.append(whole_note)
                        pitches.append(pitch)

        cv2.rectangle(image, (x, y, w, h), (255, 0, 0), 1)
        fs.put_text(image, i, (x, y - fs.weighted(20)))

    return image, key, beats, pitches

 

# recognition_modules.py
import functions as fs
import cv2

def recognize_whole_note(image, staff, stats):
    whole_note = 0
    pitch = 0
    (x, y, w, h, area) = stats
    while_note_condition = (
            fs.weighted(22) >= w >= fs.weighted(12) >= h >= fs.weighted(9)
    )
    if while_note_condition:
        dot_rect = (
            x + w,
            y - fs.weighted(10),
            fs.weighted(10),
            fs.weighted(20)
        )
        pixels = fs.count_rect_pixels(image, dot_rect)
        whole_note = -1 if pixels >= fs.weighted(10) else 1
        pitch = recognize_pitch(image, staff, fs.get_center(y, h))

    return whole_note, pitch

 

recognition 함수에서 Main.py로 반환한 값들을 이제 찍어보도록 하겠습니다.

 

음악재생에 필요한 대부분의 요소를 얻었으니, 이 데이터를 가지고 음악 파일을 만든다거나 하는 등 가공이 가능합니다.

 

박자로는 [-1, 2, 2, -2, -16, 8, 2, -2, -2, 2, -4, -32, -4, -4, 2, -1, 4, 4, -8, 16, -8, -16, 32, 16, -8, 8, 1, 8, 8, 8, 1, 2, -16, -1, 8, 4, -16, -16, -2, 4, 2, 8, 8, 8, 8, 2, 2, -32, -32, 8, 1, 32, 16, -8, 16, 1, 8, 8, 4, 2, 8, 8, -32, 32, 32, -8, 2, 16, 16, 4, 2, 16, 4, 4, 4, 8, 8, 2, 1, 1]가 나왔고,


음정으로는 [8, 8, 11, -1, -1, -1, -1, 10, 9, 12, 11, 9, 11, 14, 14, 9, 8, 12, 11, -1, 8, -1, 9, 9, 10, -1, -1, 15, -1, 15, -1, -1, 16, -1, -1, -1, 11, 10, -1, -1, -1, 10, -1, 11, 12, 16, -1, 6, 8, -1, 11, 12, -1, -1, -1, 8, 11, -1, -1, -1, 11, -1, 10, 13, 14, -1, -1, 7, 7, -1, -1, 9, -1, 10, -1, 10, 10, -1, 11, -1]가 나왔습니다.

 

이미지를 띄워보겠습니다.

 

아래는 put_text를 추가한 코드들입니다.

 

# modules.py
import cv2
import numpy as np
import functions as fs
import recognition_modules as rs

def recognition(image, staves, objects):
    key = 0
    time_signature = False
    beats = []  # 박자 리스트
    pitches = []  # 음이름 리스트

    for i in range(1, len(objects) - 1):
        obj = objects[i]
        line = obj[0]
        stats = obj[1]
        stems = obj[2]
        direction = obj[3]
        (x, y, w, h, area) = stats
        staff = staves[line * 5: (line + 1) * 5]
        if not time_signature:  # 조표가 완전히 탐색되지 않음 (아직 박자표를 찾지 못함)
            ts, temp_key = rs.recognize_key(image, staff, stats)
            time_signature = ts
            key += temp_key
            if time_signature:
                fs.put_text(image, key, (x, y + h + fs.weighted(30)))
        else:  # 조표가 완전히 탐색되었음
            notes = rs.recognize_note(image, staff, stats, stems, direction)
            if len(notes[0]):
                for beat in notes[0]:
                    beats.append(beat)
                for pitch in notes[1]:
                    pitches.append(pitch)
            else:
                rest = rs.recognize_rest(image, staff, stats)
                if rest:
                    beats.append(rest)
                    pitches.append(-1)
                else:
                    whole_note, pitch = rs.recognize_whole_note(image, staff, stats)
                    if whole_note:
                        beats.append(whole_note)
                        pitches.append(pitch)

        cv2.rectangle(image, (x, y, w, h), (255, 0, 0), 1)
        fs.put_text(image, i, (x, y - fs.weighted(20)))

    return image, key, beats, pitches

 

# recognition_modules.py
import functions as fs
import cv2

def recognize_note(image, staff, stats, stems, direction):
    (x, y, w, h, area) = stats
    notes = []
    pitches = []
    note_condition = (
            len(stems) and
            w >= fs.weighted(10) and  # 넓이 조건
            h >= fs.weighted(35) and  # 높이 조건
            area >= fs.weighted(95)  # 픽셀 갯수 조건
    )
    if note_condition:
        for i in range(len(stems)):
            stem = stems[i]
            head_exist, head_fill, head_center = recognize_note_head(image, stem, direction)
            if head_exist:
                tail_cnt = recognize_note_tail(image, i, stem, direction)
                dot_exist = recognize_note_dot(image, stem, direction, len(stems), tail_cnt)
                note_classification = (
                    ((not head_fill and tail_cnt == 0 and not dot_exist), 2),
                    ((not head_fill and tail_cnt == 0 and dot_exist), -2),
                    ((head_fill and tail_cnt == 0 and not dot_exist), 4),
                    ((head_fill and tail_cnt == 0 and dot_exist), -4),
                    ((head_fill and tail_cnt == 1 and not dot_exist), 8),
                    ((head_fill and tail_cnt == 1 and dot_exist), -8),
                    ((head_fill and tail_cnt == 2 and not dot_exist), 16),
                    ((head_fill and tail_cnt == 2 and dot_exist), -16),
                    ((head_fill and tail_cnt == 3 and not dot_exist), 32),
                    ((head_fill and tail_cnt == 3 and dot_exist), -32)
                )

                for j in range(len(note_classification)):
                    if note_classification[j][0]:
                        note = note_classification[j][1]
                        pitch = recognize_pitch(image, staff, head_center)
                        notes.append(note)
                        pitches.append(pitch)
                        fs.put_text(image, note, (stem[0] - fs.weighted(10), stem[1] + stem[3] + fs.weighted(30)))
                        fs.put_text(image, pitch, (stem[0] - fs.weighted(10), stem[1] + stem[3] + fs.weighted(60)))
                        break

    return notes, pitches

 

# recognition_modules.py
import functions as fs
import cv2

def recognize_rest(image, staff, stats):
    (x, y, w, h, area) = stats
    rest = 0
    center = fs.get_center(y, h)
    rest_condition = staff[3] > center > staff[1]
    if rest_condition:
        cnt = fs.count_pixels_part(image, y, y + h, x + fs.weighted(1))
        if fs.weighted(35) >= h >= fs.weighted(25):
            if cnt == 3 and fs.weighted(11) >= w >= fs.weighted(7):
                rest = 4
            elif cnt == 1 and fs.weighted(14) >= w >= fs.weighted(11):
                rest = 16
        elif fs.weighted(22) >= h >= fs.weighted(16):
            if fs.weighted(15) >= w >= fs.weighted(9):
                rest = 8
        elif fs.weighted(8) >= h:
            if staff[1] + fs.weighted(5) >= center >= staff[1]:
                rest = 1
            elif staff[2] >= center >= staff[1] + fs.weighted(5):
                rest = 2
        if recognize_rest_dot(image, stats):
            rest *= -1
        if rest:
            fs.put_text(image, rest, (x, y + h + fs.weighted(30)))
            fs.put_text(image, -1, (x, y + h + fs.weighted(60)))

    return rest

 

# recognition_modules.py
import functions as fs
import cv2

def recognize_whole_note(image, staff, stats):
    whole_note = 0
    pitch = 0
    (x, y, w, h, area) = stats
    while_note_condition = (
            fs.weighted(22) >= w >= fs.weighted(12) >= h >= fs.weighted(9)
    )
    if while_note_condition:
        dot_rect = (
            x + w,
            y - fs.weighted(10),
            fs.weighted(10),
            fs.weighted(20)
        )
        pixels = fs.count_rect_pixels(image, dot_rect)
        whole_note = -1 if pixels >= fs.weighted(10) else 1
        pitch = recognize_pitch(image, staff, fs.get_center(y, h))

        fs.put_text(image, whole_note, (x, y + h + fs.weighted(30)))
        fs.put_text(image, pitch, (x, y + h + fs.weighted(60)))

    return whole_note, pitch

 

put_text 부분만 추가하셔도 됩니다.

 

아래는 Main.py

 

# Main.py
import cv2
import os
import numpy as np
import functions as fs
import modules

# 이미지 불러오기
resource_path = os.getcwd() + "/resource/"
image_0 = cv2.imread(resource_path + "music.jpg")

# 1. 보표 영역 추출 및 그 외 노이즈 제거
image_1 = modules.remove_noise(image_0)

# 2. 오선 제거
image_2, staves = modules.remove_staves(image_1)

# 3. 악보 이미지 정규화
image_3, staves = modules.normalization(image_2, staves, 10)

# 4. 객체 검출 과정
image_4, objects = modules.object_detection(image_3, staves)

# 5. 객체 분석 과정
image_5, objects = modules.object_analysis(image_4, objects)

# 6. 인식 과정
image_6, key, beats, pitches = modules.recognition(image_5, staves, objects)

# 이미지 띄우기
cv2.imshow('image', image_6)
k = cv2.waitKey(0)
if k == 27:
    cv2.destroyAllWindows()

 

댓글