Skip to content

Commit

Permalink
Fix pylint warnings
Browse files Browse the repository at this point in the history
  • Loading branch information
reuben committed Apr 11, 2019
1 parent a16e468 commit 13757a4
Show file tree
Hide file tree
Showing 10 changed files with 199 additions and 204 deletions.
94 changes: 43 additions & 51 deletions DeepSpeech.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,17 @@
import os
import sys

log_level_index = sys.argv.index('--log_level') + 1 if '--log_level' in sys.argv else 0
os.environ['TF_CPP_MIN_LOG_LEVEL'] = sys.argv[log_level_index] if log_level_index > 0 and log_level_index < len(sys.argv) else '3'
LOG_LEVEL_INDEX = sys.argv.index('--log_level') + 1 if '--log_level' in sys.argv else 0
os.environ['TF_CPP_MIN_LOG_LEVEL'] = sys.argv[LOG_LEVEL_INDEX] if 0 < LOG_LEVEL_INDEX < len(sys.argv) else '3'

import time
import evaluate
import numpy as np
import progressbar
import shutil
import tensorflow as tf

from ds_ctcdecoder import ctc_beam_search_decoder, Scorer
from evaluate import evaluate
from six.moves import zip, range
from tensorflow.python.tools import freeze_graph
from util.config import Config, initialize_globals
Expand Down Expand Up @@ -49,7 +49,7 @@ def create_overlapping_windows(batch_x):
# convolution returns patches of the input tensor as is, and we can create
# overlapping windows over the MFCCs.
eye_filter = tf.constant(np.eye(window_width * num_channels)
.reshape(window_width, num_channels, window_width * num_channels), tf.float32)
.reshape(window_width, num_channels, window_width * num_channels), tf.float32) # pylint: disable=bad-continuation

# Create overlapping windows
batch_x = tf.nn.conv1d(batch_x, eye_filter, stride=1, padding='SAME')
Expand Down Expand Up @@ -172,7 +172,7 @@ def create_model(batch_x, seq_length, dropout, reuse=False, previous_state=None,
# Conveniently, this loss function is implemented in TensorFlow.
# Thus, we can simply make use of this implementation to define our loss.

def calculate_mean_edit_distance_and_loss(iterator, tower, dropout, reuse):
def calculate_mean_edit_distance_and_loss(iterator, dropout, reuse):
r'''
This routine beam search decodes a mini-batch and calculates the loss and mean edit distance.
Next to total and average loss it returns the mean edit distance,
Expand Down Expand Up @@ -246,10 +246,10 @@ def get_tower_results(iterator, optimizer, dropout_rates):
device = Config.available_devices[i]
with tf.device(device):
# Create a scope for all operations of tower i
with tf.name_scope('tower_%d' % i) as scope:
with tf.name_scope('tower_%d' % i):
# Calculate the avg_loss and mean_edit_distance and retrieve the decoded
# batch along with the original batch's labels (Y) of this tower
avg_loss = calculate_mean_edit_distance_and_loss(iterator, i, dropout_rates, reuse=i>0)
avg_loss = calculate_mean_edit_distance_and_loss(iterator, dropout_rates, reuse=i > 0)

# Allow for variables to be re-used by the next tower
tf.get_variable_scope().reuse_variables()
Expand Down Expand Up @@ -460,9 +460,9 @@ class LossWidget(progressbar.widgets.FormatLabel):
def __init__(self):
progressbar.widgets.FormatLabel.__init__(self, format='Loss: %(mean_loss)f')

def __call__(self, progress, data):
def __call__(self, progress, data, **kwargs):
data['mean_loss'] = total_loss / step_count if step_count else 0.0
return progressbar.widgets.FormatLabel.__call__(self, progress, data)
return progressbar.widgets.FormatLabel.__call__(self, progress, data, **kwargs)

if FLAGS.show_progressbar:
pbar = progressbar.ProgressBar(widgets=['Epoch {}'.format(epoch),
Expand Down Expand Up @@ -547,7 +547,7 @@ def __call__(self, progress, data):


def test():
evaluate.evaluate(FLAGS.test_files.split(','), create_model, try_loading)
evaluate(FLAGS.test_files.split(','), create_model, try_loading)


def create_inference_graph(batch_size=1, n_steps=16, tflite=False):
Expand All @@ -570,12 +570,12 @@ def create_inference_graph(batch_size=1, n_steps=16, tflite=False):
# no state management since n_step is expected to be dynamic too (see below)
previous_state = previous_state_c = previous_state_h = None
else:
if not tflite:
previous_state_c = variable_on_cpu('previous_state_c', [batch_size, Config.n_cell_dim], initializer=None)
previous_state_h = variable_on_cpu('previous_state_h', [batch_size, Config.n_cell_dim], initializer=None)
else:
if tflite:
previous_state_c = tf.placeholder(tf.float32, [batch_size, Config.n_cell_dim], name='previous_state_c')
previous_state_h = tf.placeholder(tf.float32, [batch_size, Config.n_cell_dim], name='previous_state_h')
else:
previous_state_c = variable_on_cpu('previous_state_c', [batch_size, Config.n_cell_dim], initializer=None)
previous_state_h = variable_on_cpu('previous_state_h', [batch_size, Config.n_cell_dim], initializer=None)

previous_state = tf.contrib.rnn.LSTMStateTuple(previous_state_c, previous_state_h)

Expand Down Expand Up @@ -620,28 +620,7 @@ def create_inference_graph(batch_size=1, n_steps=16, tflite=False):
)

new_state_c, new_state_h = layers['rnn_output_state']
if not tflite:
zero_state = tf.zeros([batch_size, Config.n_cell_dim], tf.float32)
initialize_c = tf.assign(previous_state_c, zero_state)
initialize_h = tf.assign(previous_state_h, zero_state)
initialize_state = tf.group(initialize_c, initialize_h, name='initialize_state')
with tf.control_dependencies([tf.assign(previous_state_c, new_state_c), tf.assign(previous_state_h, new_state_h)]):
logits = tf.identity(logits, name='logits')

return (
{
'input': input_tensor,
'input_lengths': seq_length,
'input_samples': input_samples,
},
{
'outputs': logits,
'initialize_state': initialize_state,
'mfccs': mfccs,
},
layers
)
else:
if tflite:
logits = tf.identity(logits, name='logits')
new_state_c = tf.identity(new_state_c, name='new_state_c')
new_state_h = tf.identity(new_state_h, name='new_state_h')
Expand All @@ -656,17 +635,32 @@ def create_inference_graph(batch_size=1, n_steps=16, tflite=False):
if FLAGS.use_seq_length:
inputs.update({'input_lengths': seq_length})

return (
inputs,
{
'outputs': logits,
'new_state_c': new_state_c,
'new_state_h': new_state_h,
'mfccs': mfccs,
},
layers
)
outputs = {
'outputs': logits,
'new_state_c': new_state_c,
'new_state_h': new_state_h,
'mfccs': mfccs,
}
else:
zero_state = tf.zeros([batch_size, Config.n_cell_dim], tf.float32)
initialize_c = tf.assign(previous_state_c, zero_state)
initialize_h = tf.assign(previous_state_h, zero_state)
initialize_state = tf.group(initialize_c, initialize_h, name='initialize_state')
with tf.control_dependencies([tf.assign(previous_state_c, new_state_c), tf.assign(previous_state_h, new_state_h)]):
logits = tf.identity(logits, name='logits')

inputs = {
'input': input_tensor,
'input_lengths': seq_length,
'input_samples': input_samples,
}
outputs = {
'outputs': logits,
'initialize_state': initialize_state,
'mfccs': mfccs,
}

return inputs, outputs, layers

def file_relative_read(fname):
return open(os.path.join(os.path.dirname(__file__), fname)).read()
Expand All @@ -680,11 +674,9 @@ def export():
from tensorflow.python.framework.ops import Tensor, Operation

inputs, outputs, _ = create_inference_graph(batch_size=FLAGS.export_batch_size, n_steps=FLAGS.n_steps, tflite=FLAGS.export_tflite)
input_names = ",".join(tensor.op.name for tensor in inputs.values())
output_names_tensors = [ tensor.op.name for tensor in outputs.values() if isinstance(tensor, Tensor)]
output_names_ops = [ tensor.name for tensor in outputs.values() if isinstance(tensor, Operation)]
output_names_tensors = [tensor.op.name for tensor in outputs.values() if isinstance(tensor, Tensor)]
output_names_ops = [op.name for op in outputs.values() if isinstance(op, Operation)]
output_names = ",".join(output_names_tensors + output_names_ops)
input_shapes = ":".join(",".join(map(str, tensor.shape)) for tensor in inputs.values())

if not FLAGS.export_tflite:
mapping = {v.op.name: v for v in tf.global_variables() if not v.op.name.startswith('previous_state_')}
Expand Down Expand Up @@ -828,6 +820,6 @@ def main(_):
tf.reset_default_graph()
do_single_file_inference(FLAGS.one_shot_infer)

if __name__ == '__main__' :
if __name__ == '__main__':
create_flags()
tf.app.run(main)
28 changes: 15 additions & 13 deletions evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,16 @@

import itertools
import json

from multiprocessing import cpu_count

import numpy as np
import progressbar
import tensorflow as tf

from ds_ctcdecoder import ctc_beam_search_decoder_batch, Scorer
from multiprocessing import cpu_count
from six.moves import zip, range
from six.moves import zip

from util.config import Config, initialize_globals
from util.evaluate_tools import calculate_report
from util.feeding import create_dataset
Expand All @@ -27,13 +30,12 @@ def sparse_tensor_value_to_texts(value, alphabet):
return sparse_tuple_to_texts((value.indices, value.values, value.dense_shape), alphabet)


def sparse_tuple_to_texts(tuple, alphabet):
indices = tuple[0]
values = tuple[1]
results = [''] * tuple[2][0]
for i in range(len(indices)):
index = indices[i][0]
results[index] += alphabet.string_from_label(values[i])
def sparse_tuple_to_texts(sp_tuple, alphabet):
indices = sp_tuple[0]
values = sp_tuple[1]
results = [''] * sp_tuple[2][0]
for i, index in enumerate(indices):
results[index[0]] += alphabet.string_from_label(values[i])
# List of strings
return results

Expand Down Expand Up @@ -63,7 +65,7 @@ def evaluate(test_csvs, create_model, try_loading):
inputs=logits,
sequence_length=batch_x_len)

global_step = tf.train.get_or_create_global_step()
tf.train.get_or_create_global_step()

with tf.Session(config=Config.session_config) as session:
# Create a saver using variables from the above newly created graph
Expand Down Expand Up @@ -109,7 +111,7 @@ def evaluate(test_csvs, create_model, try_loading):
# Get number of accessible CPU cores for this process
try:
num_processes = cpu_count()
except:
except NotImplementedError:
num_processes = 1

print('Decoding predictions...')
Expand Down Expand Up @@ -151,12 +153,12 @@ def main(_):
'the --test_files flag.')
exit(1)

from DeepSpeech import create_model, try_loading
from DeepSpeech import create_model, try_loading # pylint: disable=cyclic-import
samples = evaluate(FLAGS.test_files.split(','), create_model, try_loading)

if FLAGS.test_output_file:
# Save decoded tuples as JSON, converting NumPy floats to Python floats
json.dump(samples, open(FLAGS.test_output_file, 'w'), default=lambda x: float(x))
json.dump(samples, open(FLAGS.test_output_file, 'w'), default=float)


if __name__ == '__main__':
Expand Down
85 changes: 43 additions & 42 deletions util/check_characters.py
Original file line number Diff line number Diff line change
@@ -1,55 +1,56 @@
import csv
import sys
import glob

"""
Usage: $ python3 check_characters.py "INFILE"
e.g. $ python3 check_characters.py -csv /home/data/french.csv
e.g. $ python3 check_characters.py -csv ../train.csv,../test.csv
e.g. $ python3 check_characters.py -alpha -csv ../train.csv
e.g. $ python3 check_characters.py -csv ../train.csv,../test.csv
e.g. $ python3 check_characters.py -alpha -csv ../train.csv
Point this script to your transcripts, and it returns
to the terminal the unique set of characters in those
Point this script to your transcripts, and it returns
to the terminal the unique set of characters in those
files (combined).
These files are assumed to be csv, with the transcript being the third field.
The script simply reads all the text from all the files,
storing a set of unique characters that were seen
The script simply reads all the text from all the files,
storing a set of unique characters that were seen
along the way.
"""
import argparse
import csv
import os
import sys

parser = argparse.ArgumentParser()

parser.add_argument("-csv", "--csv-files", help="Str. Filenames as a comma separated list", required=True)
parser.add_argument("-alpha", "--alphabet-format",help="Bool. Print in format for alphabet.txt",action="store_true")
parser.set_defaults(alphabet_format=False)
args = parser.parse_args()
inFiles = [os.path.abspath(i) for i in args.csv_files.split(",")]

print("### Reading in the following transcript files: ###")
print("### {} ###".format(inFiles))

allText = set()
for inFile in (inFiles):
with open(inFile, "r") as csvFile:
reader = csv.reader(csvFile)
try:
next(reader, None) # skip the file header (i.e. "transcript")
for row in reader:
allText |= set(str(row[2]))
except IndexError as ie:
print("Your input file",inFile,"is not formatted properly. Check if there are 3 columns with the 3rd containing the transcript")
sys.exit(-1)
finally:
csvFile.close()

print("### The following unique characters were found in your transcripts: ###")
if args.alphabet_format:
for char in list(allText):
print(char)
print("### ^^^ You can copy-paste these into data/alphabet.txt ###")
else:
print(list(allText))
def main():
parser = argparse.ArgumentParser()

parser.add_argument("-csv", "--csv-files", help="Str. Filenames as a comma separated list", required=True)
parser.add_argument("-alpha", "--alphabet-format", help="Bool. Print in format for alphabet.txt", action="store_true")
args = parser.parse_args()
in_files = [os.path.abspath(i) for i in args.csv_files.split(",")]

print("### Reading in the following transcript files: ###")
print("### {} ###".format(in_files))

all_text = set()
for in_file in in_files:
with open(in_file, "r") as csv_file:
reader = csv.reader(csv_file)
try:
next(reader, None) # skip the file header (i.e. "transcript")
for row in reader:
all_text |= set(str(row[2]))
except IndexError:
print("Your input file", in_file, "is not formatted properly. Check if there are 3 columns with the 3rd containing the transcript")
sys.exit(-1)
finally:
csv_file.close()

print("### The following unique characters were found in your transcripts: ###")
if args.alphabet_format:
for char in list(all_text):
print(char)
print("### ^^^ You can copy-paste these into data/alphabet.txt ###")
else:
print(list(all_text))

if __name__ == '__main__':
main()
Loading

0 comments on commit 13757a4

Please sign in to comment.