forked from Rayhane-mamah/Tacotron-2
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhelpers.py
169 lines (134 loc) · 6.19 KB
/
helpers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
import numpy as np
import tensorflow as tf
from tensorflow.contrib.seq2seq import Helper
class TacoTestHelper(Helper):
def __init__(self, batch_size, hparams):
with tf.name_scope('TacoTestHelper'):
self._batch_size = batch_size
self._output_dim = hparams.num_mels
self._reduction_factor = hparams.outputs_per_step
self.stop_at_any = hparams.stop_at_any
@property
def batch_size(self):
return self._batch_size
@property
def token_output_size(self):
return self._reduction_factor
@property
def sample_ids_shape(self):
return tf.TensorShape([])
@property
def sample_ids_dtype(self):
return np.int32
def initialize(self, name=None):
return (tf.tile([False], [self._batch_size]), _go_frames(self._batch_size, self._output_dim))
def sample(self, time, outputs, state, name=None):
return tf.tile([0], [self._batch_size]) # Return all 0; we ignore them
def next_inputs(self, time, outputs, state, sample_ids, stop_token_prediction, name=None):
'''Stop on EOS. Otherwise, pass the last output as the next input and pass through state.'''
with tf.name_scope('TacoTestHelper'):
#A sequence is finished when the output probability is > 0.5
finished = tf.cast(tf.round(stop_token_prediction), tf.bool)
#Since we are predicting r frames at each step, two modes are
#then possible:
# Stop when the model outputs a p > 0.5 for any frame between r frames (Recommended)
# Stop when the model outputs a p > 0.5 for all r frames (Safer)
#Note:
# With enough training steps, the model should be able to predict when to stop correctly
# and the use of stop_at_any = True would be recommended. If however the model didn't
# learn to stop correctly yet, (stops too soon) one could choose to use the safer option
# to get a correct synthesis
if self.stop_at_any:
finished = tf.reduce_any(tf.reduce_all(finished, axis=0)) #Recommended
else:
finished = tf.reduce_all(tf.reduce_all(finished, axis=0)) #Safer option
# Feed last output frame as next input. outputs is [N, output_dim * r]
next_inputs = outputs[:, -self._output_dim:]
next_state = state
return (finished, next_inputs, next_state)
class TacoTrainingHelper(Helper):
def __init__(self, batch_size, targets, hparams, gta, evaluating, global_step):
# inputs is [N, T_in], targets is [N, T_out, D]
with tf.name_scope('TacoTrainingHelper'):
self._batch_size = batch_size
self._output_dim = hparams.num_mels
self._reduction_factor = hparams.outputs_per_step
self._ratio = tf.convert_to_tensor(hparams.tacotron_teacher_forcing_ratio)
self.gta = gta
self.eval = evaluating
self._hparams = hparams
self.global_step = global_step
r = self._reduction_factor
# Feed every r-th target frame as input
self._targets = targets[:, r-1::r, :]
#Maximal sequence length
self._lengths = tf.tile([tf.shape(self._targets)[1]], [self._batch_size])
@property
def batch_size(self):
return self._batch_size
@property
def token_output_size(self):
return self._reduction_factor
@property
def sample_ids_shape(self):
return tf.TensorShape([])
@property
def sample_ids_dtype(self):
return np.int32
def initialize(self, name=None):
#Compute teacher forcing ratio for this global step.
#In GTA mode, override teacher forcing scheme to work with full teacher forcing
if self.gta:
self._ratio = tf.convert_to_tensor(1.) #Force GTA model to always feed ground-truth
elif self.eval and self._hparams.tacotron_natural_eval:
self._ratio = tf.convert_to_tensor(0.) #Force eval model to always feed predictions
else:
if self._hparams.tacotron_teacher_forcing_mode == 'scheduled':
self._ratio = _teacher_forcing_ratio_decay(self._hparams.tacotron_teacher_forcing_init_ratio,
self.global_step, self._hparams)
return (tf.tile([False], [self._batch_size]), _go_frames(self._batch_size, self._output_dim))
def sample(self, time, outputs, state, name=None):
return tf.tile([0], [self._batch_size]) # Return all 0; we ignore them
def next_inputs(self, time, outputs, state, sample_ids, stop_token_prediction, name=None):
with tf.name_scope(name or 'TacoTrainingHelper'):
#synthesis stop (we let the model see paddings as we mask them when computing loss functions)
finished = (time + 1 >= self._lengths)
#Pick previous outputs randomly with respect to teacher forcing ratio
next_inputs = tf.cond(
tf.less(tf.random_uniform([], minval=0, maxval=1, dtype=tf.float32), self._ratio),
lambda: self._targets[:, time, :], #Teacher-forcing: return true frame
lambda: outputs[:,-self._output_dim:])
#Pass on state
next_state = state
return (finished, next_inputs, next_state)
def _go_frames(batch_size, output_dim):
'''Returns all-zero <GO> frames for a given batch size and output dimension'''
return tf.tile([[0.0]], [batch_size, output_dim])
def _teacher_forcing_ratio_decay(init_tfr, global_step, hparams):
#################################################################
# Narrow Cosine Decay:
# Phase 1: tfr = init
# We only start learning rate decay after 10k steps
# Phase 2: tfr in ]init, final[
# decay reach minimal value at step ~40k
# Phase 3: tfr = final
# clip by minimal teacher forcing ratio value (step >~ 40k)
#################################################################
#Pick final teacher forcing rate value
if hparams.tacotron_teacher_forcing_final_ratio is not None:
alpha = float(hparams.tacotron_teacher_forcing_final_ratio / hparams.tacotron_teacher_forcing_init_ratio)
else:
assert hparams.tacotron_teacher_forcing_decay_alpha is not None
alpha = hparams.tacotron_teacher_forcing_decay_alpha
#Compute natural cosine decay
tfr = tf.train.cosine_decay(init_tfr,
global_step=global_step - hparams.tacotron_teacher_forcing_start_decay, #tfr ~= init at step 10k
decay_steps=hparams.tacotron_teacher_forcing_decay_steps, #tfr ~= final at step ~40k
alpha=alpha, #tfr = alpha% of init_tfr as final value
name='tfr_cosine_decay')
#force teacher forcing ratio to take initial value when global step < start decay step.
narrow_tfr = tf.cond(
tf.less(global_step, tf.convert_to_tensor(hparams.tacotron_teacher_forcing_start_decay)),
lambda: tf.convert_to_tensor(init_tfr),
lambda: tfr)
return narrow_tfr