Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main'
Browse files Browse the repository at this point in the history
  • Loading branch information
autumn-2-net committed Sep 20, 2023
2 parents 596f49e + f932419 commit 6f71ff6
Show file tree
Hide file tree
Showing 7 changed files with 44 additions and 13 deletions.
2 changes: 1 addition & 1 deletion configs/base.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ max_batch_size: 8
max_batch_frames: 80000
max_val_batch_size: 1
max_val_batch_frames: 10000
num_valid_plots: 10
num_valid_plots: 100
log_interval: 100
num_sanity_val_steps: 1 # steps of validation at the beginning
val_check_interval: 1000
Expand Down
3 changes: 2 additions & 1 deletion configs/continuous.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ binarization_args:
num_workers: 0
merge_rest: true # merge continuous rest notes
merge_slur: true # merge slurs with the same pitch
round_midi: true # round midi value
round_midi: false # round midi value
slur_tolerance: 0.5 # maximum allowed value of pitch change of a slur to be merged

key_shift_factor: 8
key_shift_range: [-12, 12]
Expand Down
5 changes: 4 additions & 1 deletion configs/discrete.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ binary_data_dir: data/some_ds_quant_spk4_aug8/binary
binarization_args:
num_workers: 0
shuffle: true
merge_rest: true # merge continuous rest notes
merge_slur: true # merge slurs with the same pitch
use_bound_loss: true
use_midi_loss: true

Expand All @@ -18,7 +20,8 @@ pe: rmvpe
pe_ckpt: pretrained/rmvpe/model.pt

# global constants
midi_shift_range: [-6, 6]
key_shift_range: [-12, 12]
key_shift_factor: 8

# neural networks
units_dim: 80 # 768
Expand Down
3 changes: 0 additions & 3 deletions configs/quant_tow_head_model.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,6 @@ base_config:

binary_data_dir: data/some_ds_quant_spk4_aug8/binary

# global constants
midi_shift_range: [-6, 6]

# neural networks
units_dim: 80 # 768
midi_num_bins: 129 # rest = 128
Expand Down
1 change: 0 additions & 1 deletion configs/some_bound.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ pe_ckpt: pretrained/rmvpe/model.pt

# global constants
midi_prob_deviation: 0.5
midi_shift_range: [-6, 6]
rest_threshold: 0.1

# neural networks
Expand Down
3 changes: 2 additions & 1 deletion preprocessing/me_binarizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def __init__(self, config: dict):
self.lr = LengthRegulator().to(self.device)
self.merge_rest = self.binarization_args['merge_rest']
self.merge_slur = self.binarization_args['merge_slur']
self.slur_tolerance = self.binarization_args['slur_tolerance']
self.round_midi = self.binarization_args['round_midi']
self.key_shift_min, self.key_shift_max = self.config['key_shift_range']

Expand Down Expand Up @@ -82,7 +83,7 @@ def load_meta_data(self, raw_data_dir: pathlib.Path, ds_id):

if self.merge_slur:
# merge slurs with the same pitch
note_seq, note_dur = merge_slurs(note_seq, note_dur, note_slur)
note_seq, note_dur = merge_slurs(note_seq, note_dur, note_slur, tolerance=self.slur_tolerance)

if self.merge_rest:
# merge continuous rest notes
Expand Down
40 changes: 35 additions & 5 deletions utils/binarizer_utils.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,52 @@
from typing import Tuple

import librosa
import numpy as np
import parselmouth
import torch

from utils.pitch_utils import interp_f0


def merge_slurs(note_seq: list, note_dur: list, note_slur: list) -> Tuple[list, list]:
# merge slurs with the same pitch
note_seq_merge_slur = [note_seq[0]]
def merge_slurs(note_seq: list, note_dur: list, note_slur: list, tolerance=None) -> Tuple[list, list]:
"""
merge slurs with the same pitch
"""
note_midi = [librosa.note_to_midi(n, round_midi=False) if n != 'rest' else 'rest' for n in note_seq]
prev_min = prev_max = None
note_midi_merge_slur = [note_midi[0]]
note_dur_merge_slur = [note_dur[0]]

def can_be_merged(midi):
if tolerance is None or midi == 'rest' or note_midi_merge_slur[-1] == 'rest':
return note_midi_merge_slur[-1] == midi
return (
abs(midi - note_midi_merge_slur[-1]) <= tolerance
and (prev_min is None or abs(midi - prev_min) <= tolerance)
and (prev_max is None or abs(midi - prev_max) <= tolerance)
)

def get_merged_midi(midi1, dur1, midi2, dur2):
if midi1 == midi2:
return midi1
return (midi1 * dur1 + midi2 * dur2) / (dur1 + dur2)

for i in range(1, len(note_seq)):
if note_slur[i] and note_seq[i] == note_seq[i - 1]:
if note_slur[i] and can_be_merged(note_midi[i]):
# update min and max
prev_min = min(note_midi[i], note_midi_merge_slur[-1]) if prev_min is None else min(prev_min, note_midi[i])
prev_max = max(note_midi[i], note_midi_merge_slur[-1]) if prev_max is None else max(prev_max, note_midi[i])
note_midi_merge_slur[-1] = get_merged_midi(
note_midi_merge_slur[-1], note_dur_merge_slur[-1], note_midi[i], note_dur[i]
)
note_dur_merge_slur[-1] += note_dur[i]
else:
note_seq_merge_slur.append(note_seq[i])
note_midi_merge_slur.append(note_midi[i])
note_dur_merge_slur.append(note_dur[i])
prev_min = prev_max = None
note_seq_merge_slur = [
librosa.midi_to_note(n, cents=True, unicode=False) if n != 'rest' else 'rest' for n in note_midi_merge_slur
]
return note_seq_merge_slur, note_dur_merge_slur


Expand Down

0 comments on commit 6f71ff6

Please sign in to comment.