Sort and Optimise imports

whyxzh · Aug 7, 2018 · 5b2e7db · 5b2e7db
1 parent 76fcab0
commit 5b2e7db
Show file tree

Hide file tree

Showing 32 changed files with 190 additions and 190 deletions.
diff --git a/datasets/audio.py b/datasets/audio.py
@@ -1,16 +1,16 @@
 import librosa
 import librosa.filters
-import numpy as np 
+import numpy as np
+import tensorflow as tf
 from scipy import signal
-import tensorflow as tf 
 from scipy.io import wavfile
 
 
 def load_wav(path, sr):
 	return librosa.core.load(path, sr=sr)[0]
 
 def save_wav(wav, path, sr):
-	wav *= 32767 / max(0.01, np.max(np.abs(wav))) 
+	wav *= 32767 / max(0.01, np.max(np.abs(wav)))
 	#proposed by @dsmiller
 	wavfile.write(path, sr, wav.astype(np.int16))
 
@@ -75,7 +75,7 @@ def inv_linear_spectrogram(linear_spectrogram, hparams):
 		return y
 	else:
 		return _griffin_lim(S ** hparams.power, hparams)
-	
+
 
 def inv_mel_spectrogram(mel_spectrogram, hparams):
 	'''Converts mel spectrogram to waveform using librosa'''
@@ -186,12 +186,12 @@ def _denormalize(D, hparams):
 	if hparams.allow_clipping_in_normalization:
 		if hparams.symmetric_mels:
 			return (((np.clip(D, -hparams.max_abs_value,
-				hparams.max_abs_value) + hparams.max_abs_value) * -hparams.min_level_db / (2 * hparams.max_abs_value)) 
+				hparams.max_abs_value) + hparams.max_abs_value) * -hparams.min_level_db / (2 * hparams.max_abs_value))
 				+ hparams.min_level_db)
 		else:
 			return ((np.clip(D, 0, hparams.max_abs_value) * -hparams.min_level_db / hparams.max_abs_value) + hparams.min_level_db)
 
 	if hparams.symmetric_mels:
 		return (((D + hparams.max_abs_value) * -hparams.min_level_db / (2 * hparams.max_abs_value)) + hparams.min_level_db)
 	else:
-		return ((D * -hparams.min_level_db / hparams.max_abs_value) + hparams.min_level_db)
+		return ((D * -hparams.min_level_db / hparams.max_abs_value) + hparams.min_level_db)
diff --git a/datasets/preprocessor.py b/datasets/preprocessor.py
@@ -1,9 +1,10 @@
+import os
 from concurrent.futures import ProcessPoolExecutor
 from functools import partial
+
+import numpy as np
 from datasets import audio
-import os
-import numpy as np 
-from wavenet_vocoder.util import mulaw_quantize, mulaw, is_mulaw, is_mulaw_quantize
+from wavenet_vocoder.util import is_mulaw, is_mulaw_quantize, mulaw, mulaw_quantize
 
 
 def build_from_path(hparams, input_dirs, mel_dir, linear_dir, wav_dir, n_jobs=12, tqdm=lambda x: x):
@@ -23,7 +24,7 @@ def build_from_path(hparams, input_dirs, mel_dir, linear_dir, wav_dir, n_jobs=12
 		- A list of tuple describing the train examples. this should be written to train.txt
 	"""
 
-	# We use ProcessPoolExecutor to parallelize across processes, this is just for 
+	# We use ProcessPoolExecutor to parallelize across processes, this is just for
 	# optimization purposes and it can be omited
 	executor = ProcessPoolExecutor(max_workers=n_jobs)
 	futures = []
@@ -94,7 +95,7 @@ def _process_utterance(mel_dir, linear_dir, wav_dir, index, wav_path, text, hpar
 		out = mulaw(wav, hparams.quantize_channels)
 		constant_values = mulaw(0., hparams.quantize_channels)
 		out_dtype = np.float32
-	
+
 	else:
 		#[-1, 1]
 		out = wav
@@ -110,7 +111,7 @@ def _process_utterance(mel_dir, linear_dir, wav_dir, index, wav_path, text, hpar
 
 	#Compute the linear scale spectrogram from the wav
 	linear_spectrogram = audio.linearspectrogram(wav, hparams).astype(np.float32)
-	linear_frames = linear_spectrogram.shape[1] 
+	linear_frames = linear_spectrogram.shape[1]
 
 	#sanity check
 	assert linear_frames == mel_frames
@@ -139,4 +140,4 @@ def _process_utterance(mel_dir, linear_dir, wav_dir, index, wav_path, text, hpar
 	np.save(os.path.join(linear_dir, linear_filename), linear_spectrogram.T, allow_pickle=False)
 
 	# Return a tuple describing this training example
-	return (audio_filename, mel_filename, linear_filename, time_steps, mel_frames, text)
+	return (audio_filename, mel_filename, linear_filename, time_steps, mel_frames, text)
diff --git a/datasets/wavenet_preprocessor.py b/datasets/wavenet_preprocessor.py
@@ -1,9 +1,10 @@
+import os
 from concurrent.futures import ProcessPoolExecutor
 from functools import partial
+
+import numpy as np
 from datasets import audio
-import os
-import numpy as np 
-from wavenet_vocoder.util import mulaw_quantize, mulaw, is_mulaw, is_mulaw_quantize
+from wavenet_vocoder.util import is_mulaw, is_mulaw_quantize, mulaw, mulaw_quantize
 
 
 def build_from_path(hparams, input_dir, mel_dir, wav_dir, n_jobs=12, tqdm=lambda x: x):
@@ -23,7 +24,7 @@ def build_from_path(hparams, input_dir, mel_dir, wav_dir, n_jobs=12, tqdm=lambda
 		- A list of tuple describing the train examples. this should be written to train.txt
 	"""
 
-	# We use ProcessPoolExecutor to parallelize across processes, this is just for 
+	# We use ProcessPoolExecutor to parallelize across processes, this is just for
 	# optimization purposes and it can be omited
 	executor = ProcessPoolExecutor(max_workers=n_jobs)
 	futures = []
@@ -88,7 +89,7 @@ def _process_utterance(mel_dir, wav_dir, index, wav_path, hparams):
 		out = mulaw(wav, hparams.quantize_channels)
 		constant_values = mulaw(0., hparams.quantize_channels)
 		out_dtype = np.float32
-	
+
 	else:
 		#[-1, 1]
 		out = wav
@@ -131,4 +132,4 @@ def _process_utterance(mel_dir, wav_dir, index, wav_path, hparams):
 		speaker_id = '<no_g>'
 
 	# Return a tuple describing this training example
-	return (audio_filename, mel_filename, '_', speaker_id, time_steps, mel_frames)
+	return (audio_filename, mel_filename, '_', speaker_id, time_steps, mel_frames)
diff --git a/hparams.py b/hparams.py
@@ -1,6 +1,5 @@
-import tensorflow as tf 
-import numpy as np 
-
+import numpy as np
+import tensorflow as tf
 
 # Default hyperparameters
 hparams = tf.contrib.training.HParams(
@@ -44,17 +43,17 @@
 	signal_normalization = True,
 	allow_clipping_in_normalization = True, #Only relevant if mel_normalization = True
 	symmetric_mels = False, #Whether to scale the data to be symmetric around 0
-	max_abs_value = 4., #max absolute value of data. If symmetric, data will be [-max, max] else [0, max] 
+	max_abs_value = 4., #max absolute value of data. If symmetric, data will be [-max, max] else [0, max]
 	normalize_for_wavenet = True, #whether to rescale to [0, 1] for wavenet.
 
 	#Limits
 	min_level_db = -100,
 	ref_level_db = 20,
 	fmin = 0, #Set this to 75 if your speaker is male! if female, 125 should help taking off noise. (To test depending on dataset)
-	fmax = 7600, 
+	fmax = 7600,
 
 	#Griffin Lim
-	power = 1.5, 
+	power = 1.5,
 	griffin_lim_iters = 60,
 	###########################################################################################################################################
 
@@ -69,7 +68,7 @@
 	enc_conv_channels = 512, #number of encoder convolutions filters for each layer
 	encoder_lstm_units = 256, #number of lstm units for each direction (forward and backward)
 
-	smoothing = False, #Whether to smooth the attention normalization function 
+	smoothing = False, #Whether to smooth the attention normalization function
 	attention_dim = 128, #dimension of attention space
 	attention_filters = 32, #number of attention convolution filters
 	attention_kernel = (31, ), #kernel size of attention convolution
@@ -228,10 +227,10 @@
 	'it appears that oswald had only one caller in response to all of his fpcc activities,',
 	'he relied on the absence of the strychnia.',
 	'scoggins thought it was lighter.',
-	'''would, it is probable, have eventually overcome the reluctance of some of the prisoners at least, 
+	'''would, it is probable, have eventually overcome the reluctance of some of the prisoners at least,
 	and would have possessed so much moral dignity''',
-	'''Sequence to sequence models have enjoyed great success in a variety of tasks such as machine translation, speech recognition, and text summarization. 
-	This project covers a sequence to sequence model trained to predict a speech representation from an input sequence of characters. We show that 
+	'''Sequence to sequence models have enjoyed great success in a variety of tasks such as machine translation, speech recognition, and text summarization.
+	This project covers a sequence to sequence model trained to predict a speech representation from an input sequence of characters. We show that
 	the adopted architecture is able to perform this task with wild success.''',
 	'Thank you so much for your support!',
 	]
@@ -241,4 +240,4 @@
 def hparams_debug_string():
 	values = hparams.values()
 	hp = ['  %s: %s' % (name, values[name]) for name in sorted(values) if name != 'sentences']
-	return 'Hyperparameters:\n' + '\n'.join(hp)
+	return 'Hyperparameters:\n' + '\n'.join(hp)
diff --git a/infolog.py b/infolog.py
@@ -1,10 +1,9 @@
 import atexit
-from datetime import datetime
 import json
-from threading import Thread 
+from datetime import datetime
+from threading import Thread
 from urllib.request import Request, urlopen
 
-
 _format = '%Y-%m-%d %H:%M:%S.%f'
 _file = None
 _run_name = None
@@ -48,4 +47,4 @@ def _send_slack(msg):
 	}).encode())
 
 
-atexit.register(_close_logfile)
+atexit.register(_close_logfile)
diff --git a/preprocess.py b/preprocess.py
@@ -1,9 +1,10 @@
 import argparse
-from multiprocessing import cpu_count
 import os
-from tqdm import tqdm
+from multiprocessing import cpu_count
+
 from datasets import preprocessor
 from hparams import hparams
+from tqdm import tqdm
 
 
 def preprocess(args, input_folders, out_dir, hparams):
@@ -43,9 +44,9 @@ def norm_data(args):
 	if args.dataset.startswith('LJSpeech'):
 		return [os.path.join(args.base_dir, args.dataset)]
 
-	
+
 	if args.dataset == 'M-AILABS':
-		supported_languages = ['en_US', 'en_UK', 'fr_FR', 'it_IT', 'de_DE', 'es_ES', 'ru_RU', 
+		supported_languages = ['en_US', 'en_UK', 'fr_FR', 'it_IT', 'de_DE', 'es_ES', 'ru_RU',
 			'uk_UK', 'pl_PL', 'nl_NL', 'pt_PT', 'fi_FI', 'se_SE', 'tr_TR', 'ar_SA']
 		if args.language not in supported_languages:
 			raise ValueError('Please enter a supported language to use from M-AILABS dataset! \n{}'.format(
@@ -86,7 +87,7 @@ def main():
 	print('initializing preprocessing..')
 	parser = argparse.ArgumentParser()
 	parser.add_argument('--base_dir', default='')
-	parser.add_argument('--hparams', default='', 
+	parser.add_argument('--hparams', default='',
 		help='Hyperparameter overrides as a comma-separated list of name=value pairs')
 	parser.add_argument('--dataset', default='LJSpeech-1.1')
 	parser.add_argument('--language', default='en_US')
@@ -106,4 +107,4 @@ def main():
 
 
 if __name__ == '__main__':
-	main()
+	main()
diff --git a/tacotron/feeder.py b/tacotron/feeder.py
@@ -1,13 +1,13 @@
-import numpy as np 
 import os
 import threading
 import time
 import traceback
-from tacotron.utils.text import text_to_sequence
+
+import numpy as np
+import tensorflow as tf
 from infolog import log
 from sklearn.model_selection import train_test_split
-import tensorflow as tf 
-
+from tacotron.utils.text import text_to_sequence
 
 _batches_per_group = 32
 
@@ -37,7 +37,7 @@ def __init__(self, coordinator, metadata_filename, hparams):
 		if hparams.tacotron_test_size is None:
 			assert hparams.tacotron_test_batches is not None
 
-		test_size = (hparams.tacotron_test_size if hparams.tacotron_test_size is not None 
+		test_size = (hparams.tacotron_test_size if hparams.tacotron_test_size is not None
 			else hparams.tacotron_test_batches * hparams.tacotron_batch_size)
 		indices = np.arange(len(self._metadata))
 		train_indices, test_indices = train_test_split(indices,

diff --git a/tacotron/models/Architecture_wrappers.py b/tacotron/models/Architecture_wrappers.py
@@ -2,17 +2,14 @@
 All notations and variable names were used in concordance with originial tensorflow implementation
 """
 import collections
+
 import numpy as np
 import tensorflow as tf
+from tacotron.models.attention import _compute_attention
 from tensorflow.contrib.rnn import RNNCell
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import rnn_cell_impl
-from tensorflow.python.ops import check_ops
+from tensorflow.python.framework import ops, tensor_shape
+from tensorflow.python.ops import array_ops, check_ops, rnn_cell_impl, tensor_array_ops
 from tensorflow.python.util import nest
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import tensor_array_ops
-from tensorflow.python.framework import tensor_shape
-from tacotron.models.attention import _compute_attention
 
 _zero_state_tensors = rnn_cell_impl._zero_state_tensors
 
@@ -83,7 +80,7 @@ class TacotronDecoderCell(RNNCell):
 
 	* : This is typically taking a vanilla LSTM, wrapping it using tensorflow's attention wrapper,
 	and wrap that with the prenet before doing an input feeding, and with the prediction layer
-	that uses RNN states to project on output space. Actions marked with (*) can be replaced with 
+	that uses RNN states to project on output space. Actions marked with (*) can be replaced with
 	tensorflow's attention wrapper call if it was using cumulative alignments instead of previous alignments only.
 	"""
 
@@ -92,11 +89,11 @@ def __init__(self, prenet, attention_mechanism, rnn_cell, frame_projection, stop
 
 		Args:
 		    prenet: A tensorflow fully connected layer acting as the decoder pre-net
-		    attention_mechanism: A _BaseAttentionMechanism instance, usefull to 
+		    attention_mechanism: A _BaseAttentionMechanism instance, usefull to
 			    learn encoder-decoder alignments
 		    rnn_cell: Instance of RNNCell, main body of the decoder
 		    frame_projection: tensorflow fully connected layer with r * num_mels output units
-		    stop_projection: tensorflow fully connected layer, expected to project to a scalar 
+		    stop_projection: tensorflow fully connected layer, expected to project to a scalar
 			    and through a sigmoid activation
 			mask_finished: Boolean, Whether to mask decoder frames after the <stop_token>
 		"""
@@ -135,7 +132,7 @@ def state_size(self):
 
 	def zero_state(self, batch_size, dtype):
 		"""Return an initial (zero) state tuple for this `AttentionWrapper`.
-		
+
 		Args:
 		  batch_size: `0D` integer tensor: the batch size.
 		  dtype: The internal state data type.
@@ -179,14 +176,14 @@ def __call__(self, inputs, state):
 
 
 		#Compute the attention (context) vector and alignments using
-		#the new decoder cell hidden state as query vector 
+		#the new decoder cell hidden state as query vector
 		#and cumulative alignments to extract location features
 		#The choice of the new cell hidden state (s_{i}) of the last
 		#decoder RNN Cell is based on Luong et Al. (2015):
 		#https://arxiv.org/pdf/1508.04025.pdf
 		previous_alignments = state.alignments
 		previous_alignment_history = state.alignment_history
-		context_vector, alignments, cumulated_alignments = _compute_attention(self._attention_mechanism, 
+		context_vector, alignments, cumulated_alignments = _compute_attention(self._attention_mechanism,
 			LSTM_output,
 			previous_alignments,
 			attention_layer=None)
@@ -209,4 +206,4 @@ def __call__(self, inputs, state):
 			alignments=cumulated_alignments,
 			alignment_history=alignment_history)
 
-		return (cell_outputs, stop_tokens), next_state 
+		return (cell_outputs, stop_tokens), next_state