-
Notifications
You must be signed in to change notification settings - Fork 2
/
etc.yaml
121 lines (113 loc) · 3.2 KB
/
etc.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
tag: ${hydra.job.config_name}
log_dir_name: "${now:%Y-%m-%d}/${now:%H-%M-%S}"
log_dir_path: ./logs/${tag}/${log_dir_name}
hydra:
run:
dir: ${log_dir_path}
job_logging:
handlers:
file:
filename: ${hydra.run.dir}/${hydra.job.name}_${now:%Y-%m-%d}_${now:%H-%M-%S}.log
defaults:
- override hydra/job_logging: colorlog
- override hydra/hydra_logging: colorlog
NOTE_OCTAVES: [E2, F2, F#2, G2, G#2, A2, A#2, B2, C3, C#3, D3, D#3, E3, F3, F#3, G3, G#3, A3, A#3, B3, C4, C#4, D4, D#4, E4, F4, F#4, G4, G#4, A4, A#4, B4, C5, C#5, D5, D#5, E5, F5, F#5, G5, G#5, A5, A#5, B5, C6, C#6, D6]
CHROMATIC_SCALE: [C, C#, D, D#, E, F, F#, G, G#, A, A#, B]
num_strings: 6
log_level: "INFO"
dry_run: false
raise_error: true
comment: ""
random_seed: 1
device: "cuda"
use_amp: false
tqdm: true
tb_log_dir: tensorboard
log_interval_steps: 100
num_workers: 8
print_examples_validation: true
validate_interval_updates: 1000
validate_interval_epochs: 100
evaluate_before_training: false
class_probabilities: false
batch_size: 64
trainer: ClassifierTrainer
run_test_at_end: false
save_weights: true
segment_audio_frames: null
preprocessing:
force_reprocess: False
skip_empty_notes: True
audio:
feature_extractor: raw # Choose from torchaudio, librosa, raw
loader: torchaudio # Choose from torchaudio, librosa
sr: 24000 # Target sample rate
mono: true # Set to true to set audio to mono
resample: true # Set to true to resample audio
audio_load_sec: 5 # Duration of audio loaded in seconds. Null loads all the audio.
slide_window_sec: 2.5 # Duration of the sliding window in seconds. Only used if audio_load_sec is not null.
normalize: false # Set to true if audio should be normalized
feature: cqt # Choose from cqt, spectrogram, melspectrogram, chroma_stft, raw
device: ${device}
chroma_stft_params:
sr: ${audio.sr}
n_fft: 2048
hop_length: 512
n_chroma: 84
cqt_params:
sr: ${audio.sr}
n_bins: 84
hop_length: 512
bins_per_octave: 12
spectrogram_params:
n_fft: 512
win_length: 64
hop_length: 64
melspectrogram_params:
n_fft: 2048
win_length: null
hop_length: null
n_mels: 80
f_min: 0
f_max: null
data:
num_strings: ${num_strings}
log_level: ${log_level}
target_len: 100
target_num_classes: 23
target_num_strings: ${num_strings}
features_dir: null
segments_dir: "./data/segments"
targets_dir: "./data/targets"
timestep_change: "random" # "random" or value
max_input_dim: null
cache_data_mem: False
audio_dir: "./data/audio"
num_workers: ${num_workers}
train_batch_size: 8
valid_batch_size: 1
train_shuffle: False
test_batch_size: 1
train_csv_file: null
valid_csv_file: null
test_csv_file: null
dummy_csv_file: null
onset_target_feature_size: null
augment_onset: 0
skip_cache: true
audio_augmentation:
pitch_shift_steps: null
lr_scheduler:
updates:
warmup:
init_lr: 0.00001
num_warmup: 1000
warmup_strategy: cos
name: ExponentialLR
gamma: 0.998
interval: 100
epoch: null
# name: ReduceLROnPlateau
# metric: loss
# factor: 0.95
# patience: 1