12. 인식 과정 - 온음표
온음표는 조표, 음표, 쉼표 등 다 걸러지고 남은 객체들에 한하여 인식에 들어가기 때문에,
비교적 조건을 주기가 수월합니다.
진행 방식은 앞전에 썼던 알고리즘들과 별반 다르지 않으니 바로 코드를 적도록 하겠습니다.
# modules.py
import cv2
import numpy as np
import functions as fs
import recognition_modules as rs
def recognition(image, staves, objects):
key = 0
time_signature = False
beats = [] # 박자 리스트
pitches = [] # 음이름 리스트
for i in range(1, len(objects) - 1):
obj = objects[i]
line = obj[0]
stats = obj[1]
stems = obj[2]
direction = obj[3]
(x, y, w, h, area) = stats
staff = staves[line * 5: (line + 1) * 5]
if not time_signature: # 조표가 완전히 탐색되지 않음 (아직 박자표를 찾지 못함)
ts, temp_key = rs.recognize_key(image, staff, stats)
time_signature = ts
key += temp_key
else: # 조표가 완전히 탐색되었음
notes = rs.recognize_note(image, staff, stats, stems, direction)
if len(notes[0]):
for beat in notes[0]:
beats.append(beat)
for pitch in notes[1]:
pitches.append(pitch)
else:
rest = rs.recognize_rest(image, staff, stats)
if rest:
beats.append(rest)
pitches.append(-1)
else:
whole_note, pitch = rs.recognize_whole_note(image, staff, stats)
if whole_note:
beats.append(whole_note)
pitches.append(pitch)
cv2.rectangle(image, (x, y, w, h), (255, 0, 0), 1)
fs.put_text(image, i, (x, y - fs.weighted(20)))
return image, key, beats, pitches
# recognition_modules.py
import functions as fs
import cv2
def recognize_whole_note(image, staff, stats):
whole_note = 0
pitch = 0
(x, y, w, h, area) = stats
while_note_condition = (
fs.weighted(22) >= w >= fs.weighted(12) >= h >= fs.weighted(9)
)
if while_note_condition:
dot_rect = (
x + w,
y - fs.weighted(10),
fs.weighted(10),
fs.weighted(20)
)
pixels = fs.count_rect_pixels(image, dot_rect)
whole_note = -1 if pixels >= fs.weighted(10) else 1
pitch = recognize_pitch(image, staff, fs.get_center(y, h))
return whole_note, pitch
recognition 함수에서 Main.py로 반환한 값들을 이제 찍어보도록 하겠습니다.
음악재생에 필요한 대부분의 요소를 얻었으니, 이 데이터를 가지고 음악 파일을 만든다거나 하는 등 가공이 가능합니다.
박자로는 [-1, 2, 2, -2, -16, 8, 2, -2, -2, 2, -4, -32, -4, -4, 2, -1, 4, 4, -8, 16, -8, -16, 32, 16, -8, 8, 1, 8, 8, 8, 1, 2, -16, -1, 8, 4, -16, -16, -2, 4, 2, 8, 8, 8, 8, 2, 2, -32, -32, 8, 1, 32, 16, -8, 16, 1, 8, 8, 4, 2, 8, 8, -32, 32, 32, -8, 2, 16, 16, 4, 2, 16, 4, 4, 4, 8, 8, 2, 1, 1]가 나왔고,
음정으로는 [8, 8, 11, -1, -1, -1, -1, 10, 9, 12, 11, 9, 11, 14, 14, 9, 8, 12, 11, -1, 8, -1, 9, 9, 10, -1, -1, 15, -1, 15, -1, -1, 16, -1, -1, -1, 11, 10, -1, -1, -1, 10, -1, 11, 12, 16, -1, 6, 8, -1, 11, 12, -1, -1, -1, 8, 11, -1, -1, -1, 11, -1, 10, 13, 14, -1, -1, 7, 7, -1, -1, 9, -1, 10, -1, 10, 10, -1, 11, -1]가 나왔습니다.
이미지를 띄워보겠습니다.
아래는 put_text를 추가한 코드들입니다.
# modules.py
import cv2
import numpy as np
import functions as fs
import recognition_modules as rs
def recognition(image, staves, objects):
key = 0
time_signature = False
beats = [] # 박자 리스트
pitches = [] # 음이름 리스트
for i in range(1, len(objects) - 1):
obj = objects[i]
line = obj[0]
stats = obj[1]
stems = obj[2]
direction = obj[3]
(x, y, w, h, area) = stats
staff = staves[line * 5: (line + 1) * 5]
if not time_signature: # 조표가 완전히 탐색되지 않음 (아직 박자표를 찾지 못함)
ts, temp_key = rs.recognize_key(image, staff, stats)
time_signature = ts
key += temp_key
if time_signature:
fs.put_text(image, key, (x, y + h + fs.weighted(30)))
else: # 조표가 완전히 탐색되었음
notes = rs.recognize_note(image, staff, stats, stems, direction)
if len(notes[0]):
for beat in notes[0]:
beats.append(beat)
for pitch in notes[1]:
pitches.append(pitch)
else:
rest = rs.recognize_rest(image, staff, stats)
if rest:
beats.append(rest)
pitches.append(-1)
else:
whole_note, pitch = rs.recognize_whole_note(image, staff, stats)
if whole_note:
beats.append(whole_note)
pitches.append(pitch)
cv2.rectangle(image, (x, y, w, h), (255, 0, 0), 1)
fs.put_text(image, i, (x, y - fs.weighted(20)))
return image, key, beats, pitches
# recognition_modules.py
import functions as fs
import cv2
def recognize_note(image, staff, stats, stems, direction):
(x, y, w, h, area) = stats
notes = []
pitches = []
note_condition = (
len(stems) and
w >= fs.weighted(10) and # 넓이 조건
h >= fs.weighted(35) and # 높이 조건
area >= fs.weighted(95) # 픽셀 갯수 조건
)
if note_condition:
for i in range(len(stems)):
stem = stems[i]
head_exist, head_fill, head_center = recognize_note_head(image, stem, direction)
if head_exist:
tail_cnt = recognize_note_tail(image, i, stem, direction)
dot_exist = recognize_note_dot(image, stem, direction, len(stems), tail_cnt)
note_classification = (
((not head_fill and tail_cnt == 0 and not dot_exist), 2),
((not head_fill and tail_cnt == 0 and dot_exist), -2),
((head_fill and tail_cnt == 0 and not dot_exist), 4),
((head_fill and tail_cnt == 0 and dot_exist), -4),
((head_fill and tail_cnt == 1 and not dot_exist), 8),
((head_fill and tail_cnt == 1 and dot_exist), -8),
((head_fill and tail_cnt == 2 and not dot_exist), 16),
((head_fill and tail_cnt == 2 and dot_exist), -16),
((head_fill and tail_cnt == 3 and not dot_exist), 32),
((head_fill and tail_cnt == 3 and dot_exist), -32)
)
for j in range(len(note_classification)):
if note_classification[j][0]:
note = note_classification[j][1]
pitch = recognize_pitch(image, staff, head_center)
notes.append(note)
pitches.append(pitch)
fs.put_text(image, note, (stem[0] - fs.weighted(10), stem[1] + stem[3] + fs.weighted(30)))
fs.put_text(image, pitch, (stem[0] - fs.weighted(10), stem[1] + stem[3] + fs.weighted(60)))
break
return notes, pitches
# recognition_modules.py
import functions as fs
import cv2
def recognize_rest(image, staff, stats):
(x, y, w, h, area) = stats
rest = 0
center = fs.get_center(y, h)
rest_condition = staff[3] > center > staff[1]
if rest_condition:
cnt = fs.count_pixels_part(image, y, y + h, x + fs.weighted(1))
if fs.weighted(35) >= h >= fs.weighted(25):
if cnt == 3 and fs.weighted(11) >= w >= fs.weighted(7):
rest = 4
elif cnt == 1 and fs.weighted(14) >= w >= fs.weighted(11):
rest = 16
elif fs.weighted(22) >= h >= fs.weighted(16):
if fs.weighted(15) >= w >= fs.weighted(9):
rest = 8
elif fs.weighted(8) >= h:
if staff[1] + fs.weighted(5) >= center >= staff[1]:
rest = 1
elif staff[2] >= center >= staff[1] + fs.weighted(5):
rest = 2
if recognize_rest_dot(image, stats):
rest *= -1
if rest:
fs.put_text(image, rest, (x, y + h + fs.weighted(30)))
fs.put_text(image, -1, (x, y + h + fs.weighted(60)))
return rest
# recognition_modules.py
import functions as fs
import cv2
def recognize_whole_note(image, staff, stats):
whole_note = 0
pitch = 0
(x, y, w, h, area) = stats
while_note_condition = (
fs.weighted(22) >= w >= fs.weighted(12) >= h >= fs.weighted(9)
)
if while_note_condition:
dot_rect = (
x + w,
y - fs.weighted(10),
fs.weighted(10),
fs.weighted(20)
)
pixels = fs.count_rect_pixels(image, dot_rect)
whole_note = -1 if pixels >= fs.weighted(10) else 1
pitch = recognize_pitch(image, staff, fs.get_center(y, h))
fs.put_text(image, whole_note, (x, y + h + fs.weighted(30)))
fs.put_text(image, pitch, (x, y + h + fs.weighted(60)))
return whole_note, pitch
put_text 부분만 추가하셔도 됩니다.
아래는 Main.py
# Main.py
import cv2
import os
import numpy as np
import functions as fs
import modules
# 이미지 불러오기
resource_path = os.getcwd() + "/resource/"
image_0 = cv2.imread(resource_path + "music.jpg")
# 1. 보표 영역 추출 및 그 외 노이즈 제거
image_1 = modules.remove_noise(image_0)
# 2. 오선 제거
image_2, staves = modules.remove_staves(image_1)
# 3. 악보 이미지 정규화
image_3, staves = modules.normalization(image_2, staves, 10)
# 4. 객체 검출 과정
image_4, objects = modules.object_detection(image_3, staves)
# 5. 객체 분석 과정
image_5, objects = modules.object_analysis(image_4, objects)
# 6. 인식 과정
image_6, key, beats, pitches = modules.recognition(image_5, staves, objects)
# 이미지 띄우기
cv2.imshow('image', image_6)
k = cv2.waitKey(0)
if k == 27:
cv2.destroyAllWindows()
'인공지능 > 컴퓨터비전' 카테고리의 다른 글
[OpenCV/Python] 악보 인식(디지털 악보 인식) - 11 (0) | 2021.08.07 |
---|---|
[OpenCV/Python] 악보 인식(디지털 악보 인식) - 10 (0) | 2021.08.07 |
[OpenCV/Python] 악보 인식(디지털 악보 인식) - 9 (0) | 2021.08.06 |
[OpenCV/Python] 악보 인식(디지털 악보 인식) - 8 (0) | 2021.08.06 |
[OpenCV/Python] 악보 인식(디지털 악보 인식) - 7 (6) | 2021.08.06 |
댓글