Skip to content

Commit

Permalink
multi-gpu, full batch synth, bugs fix
Browse files Browse the repository at this point in the history
- WaveNet scopes fix
- Tacotron batch synthesis bugs fix, add natural synthesis in batches (restricted to encoder masked models)
- increase model prosody learning by using encoder masks
  • Loading branch information
Rayhane-mamah authored Oct 7, 2018
1 parent 5b2118d commit 826f40b
Show file tree
Hide file tree
Showing 14 changed files with 749 additions and 362 deletions.
160 changes: 117 additions & 43 deletions hparams.py

Large diffs are not rendered by default.

Binary file added papers/ClariNet.pdf
Binary file not shown.
1 change: 1 addition & 0 deletions synthesize.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import argparse
import os
from warnings import warn
from time import sleep

import tensorflow as tf

Expand Down
61 changes: 43 additions & 18 deletions tacotron/feeder.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from sklearn.model_selection import train_test_split
from tacotron.utils.text import text_to_sequence

_batches_per_group = 32
_batches_per_group = 64

class Feeder:
"""
Expand Down Expand Up @@ -78,32 +78,35 @@ def __init__(self, coordinator, metadata_filename, hparams):
tf.placeholder(tf.float32, shape=(None, None), name='token_targets'),
tf.placeholder(tf.float32, shape=(None, None, hparams.num_freq), name='linear_targets'),
tf.placeholder(tf.int32, shape=(None, ), name='targets_lengths'),
tf.placeholder(tf.int32, shape=(hparams.tacotron_num_gpus, None), name='split_infos'),
]

# Create queue for buffering data
queue = tf.FIFOQueue(8, [tf.int32, tf.int32, tf.float32, tf.float32, tf.float32, tf.int32], name='input_queue')
queue = tf.FIFOQueue(8, [tf.int32, tf.int32, tf.float32, tf.float32, tf.float32, tf.int32, tf.int32], name='input_queue')
self._enqueue_op = queue.enqueue(self._placeholders)
self.inputs, self.input_lengths, self.mel_targets, self.token_targets, self.linear_targets, self.targets_lengths = queue.dequeue()
self.inputs, self.input_lengths, self.mel_targets, self.token_targets, self.linear_targets, self.targets_lengths, self.split_infos = queue.dequeue()

self.inputs.set_shape(self._placeholders[0].shape)
self.input_lengths.set_shape(self._placeholders[1].shape)
self.mel_targets.set_shape(self._placeholders[2].shape)
self.token_targets.set_shape(self._placeholders[3].shape)
self.linear_targets.set_shape(self._placeholders[4].shape)
self.targets_lengths.set_shape(self._placeholders[5].shape)
self.split_infos.set_shape(self._placeholders[6].shape)

# Create eval queue for buffering eval data
eval_queue = tf.FIFOQueue(1, [tf.int32, tf.int32, tf.float32, tf.float32, tf.float32, tf.int32], name='eval_queue')
eval_queue = tf.FIFOQueue(1, [tf.int32, tf.int32, tf.float32, tf.float32, tf.float32, tf.int32, tf.int32], name='eval_queue')
self._eval_enqueue_op = eval_queue.enqueue(self._placeholders)
self.eval_inputs, self.eval_input_lengths, self.eval_mel_targets, self.eval_token_targets, \
self.eval_linear_targets, self.eval_targets_lengths = eval_queue.dequeue()
self.eval_linear_targets, self.eval_targets_lengths, self.eval_split_infos = eval_queue.dequeue()

self.eval_inputs.set_shape(self._placeholders[0].shape)
self.eval_input_lengths.set_shape(self._placeholders[1].shape)
self.eval_mel_targets.set_shape(self._placeholders[2].shape)
self.eval_token_targets.set_shape(self._placeholders[3].shape)
self.eval_linear_targets.set_shape(self._placeholders[4].shape)
self.eval_targets_lengths.set_shape(self._placeholders[5].shape)
self.eval_split_infos.set_shape(self._placeholders[6].shape)

def start_threads(self, session):
self._session = session
Expand Down Expand Up @@ -192,29 +195,51 @@ def _get_next_example(self):
linear_target = np.load(os.path.join(self._linear_dir, meta[2]))
return (input_data, mel_target, token_target, linear_target, len(mel_target))

def _prepare_batch(self, batches, outputs_per_step):
assert 0 == len(batches) % self._hparams.tacotron_num_gpus
size_per_device = int(len(batches) / self._hparams.tacotron_num_gpus)
np.random.shuffle(batches)

def _prepare_batch(self, batch, outputs_per_step):
np.random.shuffle(batch)
inputs = self._prepare_inputs([x[0] for x in batch])
input_lengths = np.asarray([len(x[0]) for x in batch], dtype=np.int32)
mel_targets = self._prepare_targets([x[1] for x in batch], outputs_per_step)
#Pad sequences with 1 to infer that the sequence is done
token_targets = self._prepare_token_targets([x[2] for x in batch], outputs_per_step)
linear_targets = self._prepare_targets([x[3] for x in batch], outputs_per_step)
targets_lengths = np.asarray([x[-1] for x in batch], dtype=np.int32) #Used to mask loss
return (inputs, input_lengths, mel_targets, token_targets, linear_targets, targets_lengths)
inputs = None
mel_targets = None
token_targets = None
linear_targets = None
targets_lengths = None
split_infos = []

targets_lengths = np.asarray([x[-1] for x in batches], dtype=np.int32) #Used to mask loss
input_lengths = np.asarray([len(x[0]) for x in batches], dtype=np.int32)

for i in range(self._hparams.tacotron_num_gpus):
batch = batches[size_per_device*i:size_per_device*(i+1)]
input_cur_device, input_max_len = self._prepare_inputs([x[0] for x in batch])
inputs = np.concatenate((inputs, input_cur_device), axis=1) if inputs is not None else input_cur_device
mel_target_cur_device, mel_target_max_len = self._prepare_targets([x[1] for x in batch], outputs_per_step)
mel_targets = np.concatenate(( mel_targets, mel_target_cur_device), axis=1) if mel_targets is not None else mel_target_cur_device

#Pad sequences with 1 to infer that the sequence is done
token_target_cur_device, token_target_max_len = self._prepare_token_targets([x[2] for x in batch], outputs_per_step)
token_targets = np.concatenate((token_targets, token_target_cur_device),axis=1) if token_targets is not None else token_target_cur_device
linear_targets_cur_device, linear_target_max_len = self._prepare_targets([x[3] for x in batch], outputs_per_step)
linear_targets = np.concatenate((linear_targets, linear_targets_cur_device), axis=1) if linear_targets is not None else linear_targets_cur_device
split_infos.append([input_max_len, mel_target_max_len, token_target_max_len, linear_target_max_len])

split_infos = np.asarray(split_infos, dtype=np.int32)
return (inputs, input_lengths, mel_targets, token_targets, linear_targets, targets_lengths, split_infos)

def _prepare_inputs(self, inputs):
max_len = max([len(x) for x in inputs])
return np.stack([self._pad_input(x, max_len) for x in inputs])
return np.stack([self._pad_input(x, max_len) for x in inputs]), max_len

def _prepare_targets(self, targets, alignment):
max_len = max([len(t) for t in targets])
return np.stack([self._pad_target(t, self._round_up(max_len, alignment)) for t in targets])
data_len = self._round_up(max_len, alignment)
return np.stack([self._pad_target(t, data_len) for t in targets]), data_len

def _prepare_token_targets(self, targets, alignment):
max_len = max([len(t) for t in targets]) + 1
return np.stack([self._pad_token_target(t, self._round_up(max_len, alignment)) for t in targets])
data_len = self._round_up(max_len, alignment)
return np.stack([self._pad_token_target(t, data_len) for t in targets]), data_len

def _pad_input(self, x, length):
return np.pad(x, (0, length - x.shape[0]), mode='constant', constant_values=self._pad)
Expand Down
8 changes: 4 additions & 4 deletions tacotron/models/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@ def next_inputs(self, time, outputs, state, sample_ids, stop_token_prediction, n
# learn to stop correctly yet, (stops too soon) one could choose to use the safer option
# to get a correct synthesis
if self.stop_at_any:
finished = tf.reduce_any(finished) #Recommended
finished = tf.reduce_any(tf.reduce_all(finished, axis=0)) #Recommended
else:
finished = tf.reduce_all(finished) #Safer option
finished = tf.reduce_all(tf.reduce_all(finished, axis=0)) #Safer option

# Feed last output frame as next input. outputs is [N, output_dim * r]
next_inputs = outputs[:, -self._output_dim:]
Expand All @@ -60,7 +60,7 @@ def next_inputs(self, time, outputs, state, sample_ids, stop_token_prediction, n


class TacoTrainingHelper(Helper):
def __init__(self, batch_size, targets, stop_targets, hparams, gta, evaluating, global_step):
def __init__(self, batch_size, targets, hparams, gta, evaluating, global_step):
# inputs is [N, T_in], targets is [N, T_out, D]
with tf.name_scope('TacoTrainingHelper'):
self._batch_size = batch_size
Expand Down Expand Up @@ -158,4 +158,4 @@ def _teacher_forcing_ratio_decay(init_tfr, global_step, hparams):
lambda: tf.convert_to_tensor(init_tfr),
lambda: tfr)

return narrow_tfr
return narrow_tfr
Loading

0 comments on commit 826f40b

Please sign in to comment.