Skip to content

Commit

Permalink
Dataset preprocessing
Browse files Browse the repository at this point in the history
ㄴ debugging
  • Loading branch information
Yeongtae committed Jan 16, 2020
1 parent 9a8ad53 commit 8fad645
Showing 1 changed file with 19 additions and 13 deletions.
32 changes: 19 additions & 13 deletions preprocess_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
trim_top_db = 23
skip_len = 14848

def preprocess_audio(file_list, silence_audio_size, pre_emphasis=True):
def preprocess_audio(file_list, silence_audio_size, pre_emphasis=False):
for F in file_list:
f = open(F, encoding='utf-8')
R = f.readlines()
Expand All @@ -27,10 +27,10 @@ def preprocess_audio(file_list, silence_audio_size, pre_emphasis=True):
data, sampling_rate = librosa.core.load(wav_file, sr)
data = data / np.abs(data).max() *0.999
data_= librosa.effects.trim(data, top_db= trim_top_db, frame_length=trim_fft_size, hop_length=trim_hop_size)[0]
data_ = data_*max_wav_value
if (pre_emphasis):
data_ = np.append(data_[0], data_[1:] - 0.97 * data_[:-1])
data_ = data_ / np.abs(data_).max() * 0.999
data_ = data_ * max_wav_value
data_ = np.append(data_, [0.]*silence_audio_size)
data_ = data_.astype(dtype=np.int16)
write(wav_file, sr, data_)
Expand All @@ -49,33 +49,39 @@ def remove_short_audios(file_name):
data, sampling_rate = librosa.core.load(wav_file, sr)
if(len(data) >= skip_len):
L.append(r)

skiped_file_name = file_name.split('.')[0]+'_skiped.txt'
f = open(skiped_file_name,'w',encoding='utf-8')
if (i % 100 == 0):
print(i)
tmp = file_name.split('.')
tmp.insert(1,'_skipped.')
skipped_file_name = "".join(tmp)
f = open(skipped_file_name,'w',encoding='utf-8')
f.writelines(L)
f.close()

if __name__ == "__main__":
"""
usage
python preprocess_audio.py -f=filelists/ljs_audio_text_test_filelist.txt,filelists/ljs_audio_text_train_filelist.txt,filelists/ljs_audio_text_val_filelist.txt -s=5 -p -r
python preprocess_dataset.py -f=metadata.csv -s=5 -t -p -r
python preprocess_dataset.py -f=metadata.csv
"""
parser = argparse.ArgumentParser()
parser.add_argument('-f', '--file_list', type=str,
help='file list to preprocess')
help='Metadata file list to preprocess')
parser.add_argument('-s', '--silence_padding', type=int, default=0,
help='Adding silence padding at the end of each audio, silence audio size is hop_length * silence padding')
parser.add_argument('-p', '--pre_emphasis', action='store_true',
help="do or don't do pre_emphasis")
help="Doing pre_emphasis")
parser.add_argument('-t', '--trimming', action='store_true',
help="Doing trimming audios")
parser.add_argument('-r', '--remove_short_audios',action='store_true',
help="do or don't remove short audios")
help="Removing short audios in metadata file")
args = parser.parse_args()
file_list = args.file_list.split(',')
silence_audio_size = trim_hop_size * args.silence_padding
remove_short_audios = args.remove_short_audios

preprocess_audio(file_list, silence_audio_size)

if(remove_short_audios):
preprocess_audio(file_list, silence_audio_size, args.pre_emphasis)

if(args.remove_short_audios):
for f in file_list:
remove_short_audios(remove_short_audios)
remove_short_audios(f)

0 comments on commit 8fad645

Please sign in to comment.