Skip to content

Commit

Permalink
GTA synthesis
Browse files Browse the repository at this point in the history
Signed-off-by: begeekmyfriend <[email protected]>
  • Loading branch information
begeekmyfriend committed Apr 28, 2019
1 parent 6f5fe3f commit 1ec3de8
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 19 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ logs will be stored inside **logs-Wavenet**.

**Note:**
- If model argument is not provided, training will default to Tacotron-2 model training. (both models)
- Please refer to train arguments under [train.py](https://github.com/Rayhane-mamah/Tacotron-2/blob/master/train.py) for a set of options you can use.
- Please refer to train arguments under [train.py](https://github.com/begeekmyfriend/Tacotron-2/blob/master/train.py) for a set of options you can use.
- It is now possible to make wavenet preprocessing alone using **wavenet_proprocess.py**.

# Synthesis
Expand Down Expand Up @@ -175,7 +175,7 @@ Synthesizing the **waveforms** conditionned on previously synthesized Mel-spectr
**Note:**
- If model argument is not provided, synthesis will default to Tacotron-2 model synthesis. (End-to-End TTS)
- Please refer to synthesis arguments under [synthesize.py](https://github.com/Rayhane-mamah/Tacotron-2/blob/master/synthesize.py) for a set of options you can use.
- Please refer to synthesis arguments under [synthesize.py](https://github.com/begeekmyfriend/Tacotron-2/blob/master/synthesize.py) for a set of options you can use.


# References and Resources:
Expand Down
2 changes: 1 addition & 1 deletion hparams.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@
tacotron_batch_size = 48, #number of training samples on each training steps
#Tacotron Batch synthesis supports ~16x the training batch size (no gradients during testing).
#Training Tacotron with unmasked paddings makes it aware of them, which makes synthesis times different from training. We thus recommend masking the encoder.
tacotron_synthesis_batch_size = 32 * 16, #DO NOT MAKE THIS BIGGER THAN 1 IF YOU DIDN'T TRAIN TACOTRON WITH "mask_encoder=True"!!
tacotron_synthesis_batch_size = 1, #DO NOT MAKE THIS BIGGER THAN 1 IF YOU DIDN'T TRAIN TACOTRON WITH "mask_encoder=True"!!
tacotron_test_size = 0.05, #% of data to keep as test data, if None, tacotron_test_batches must be not None. (5% is enough to have a good idea about overfit)
tacotron_test_batches = None, #number of test batches.

Expand Down
32 changes: 16 additions & 16 deletions tacotron/synthesizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def synthesize(self, texts, basenames, out_dir, log_dir, mel_filenames):
np_targets = [np.load(mel_filename) for mel_filename in mel_filenames]
target_lengths = [len(np_target) for np_target in np_targets]
padded_targets = self._prepare_targets(np_targets, self._hparams.outputs_per_step)
feed_dict[self.model.mel_targets] = padded_targets.reshape(len(np_targets), -1, 80)
feed_dict[self.model.mel_targets] = padded_targets.reshape(len(np_targets), -1, hparams.num_mels)

if self.gta or not hparams.predict_linear:
mels, alignments = self.session.run([self.mel_outputs, self.alignments], feed_dict=feed_dict)
Expand Down Expand Up @@ -118,37 +118,37 @@ def synthesize(self, texts, basenames, out_dir, log_dir, mel_filenames):
speaker_ids = []
for i, mel in enumerate(mels):
#Get speaker id for global conditioning (only used with GTA generally)
# speaker_id = '<no_g>'
# speaker_ids.append(speaker_id)
speaker_id = '<no_g>'
speaker_ids.append(speaker_id)

# Write the spectrogram to disk
# Note: outputs mel-spectrogram files and target ones have same names, just different folders
# mel_filename = os.path.join(out_dir, 'mel-{:03d}.npy'.format(basenames[i]))
# np.save(mel_filename, mel, allow_pickle=False)
# saved_mels_paths.append(mel_filename)
mel_filename = os.path.join(out_dir, '{}.npy'.format(basenames[i]))
np.save(mel_filename, mel, allow_pickle=False)
saved_mels_paths.append(mel_filename)

if log_dir is not None:
#save wav (mel -> wav)
# wav = audio.inv_mel_spectrogram(mel.T, hparams)
# audio.save_wav(wav, os.path.join(log_dir, 'wavs/wav-{:03d}-mel.wav'.format(basenames[i])), hparams)
wav = audio.inv_mel_spectrogram(mel.T, hparams)
audio.save_wav(wav, os.path.join(log_dir, 'wavs/wav-{}-mel.wav'.format(basenames[i])), hparams)

#save alignments
# plot.plot_alignment(alignments[i], os.path.join(log_dir, 'plots/alignment-{:03d}.png'.format(basenames[i])),
# info='{}'.format(texts[i]), split_title=True)
plot.plot_alignment(alignments[i], os.path.join(log_dir, 'plots/alignment-{}.png'.format(basenames[i])),
info='{}'.format(texts[i]), split_title=True)

#save mel spectrogram plot
# plot.plot_spectrogram(mel, os.path.join(log_dir, 'plots/mel-{:03d}.png'.format(basenames[i])),
# info='{}'.format(texts[i]), split_title=True)
plot.plot_spectrogram(mel, os.path.join(log_dir, 'plots/mel-{}.png'.format(basenames[i])),
info='{}'.format(texts[i]), split_title=True)

if hparams.predict_linear:
if hparams.predict_linear and not self.gta:
#save wav (linear -> wav)
linear_wav = self.session.run(self.linear_wav_outputs, feed_dict={self.linear_spectrograms: linears[i]})
wav = audio.inv_preemphasis(linear_wav, hparams.preemphasis)
audio.save_wav(wav, os.path.join(log_dir, 'wavs/wav-{:03d}-linear.wav'.format(i)), hparams)
audio.save_wav(wav, os.path.join(log_dir, 'wavs/wav-{}-linear.wav'.format(i)), hparams)

#save mel spectrogram plot
# plot.plot_spectrogram(linears[i], os.path.join(log_dir, 'plots/linear-{:03d}.png'.format(basenames[i])),
# info='{}'.format(texts[i]), split_title=True, auto_aspect=True)
plot.plot_spectrogram(linears[i], os.path.join(log_dir, 'plots/linear-{}.png'.format(basenames[i])),
info='{}'.format(texts[i]), split_title=True, auto_aspect=True)

return saved_mels_paths, speaker_ids

Expand Down

0 comments on commit 1ec3de8

Please sign in to comment.