Fix pylint warnings

purplesparkle · Apr 11, 2019 · 13757a4 · 13757a4
1 parent a16e468
commit 13757a4
Show file tree

Hide file tree

Showing 10 changed files with 199 additions and 204 deletions.
diff --git a/DeepSpeech.py b/DeepSpeech.py
@@ -5,17 +5,17 @@
 import os
 import sys
 
-log_level_index = sys.argv.index('--log_level') + 1 if '--log_level' in sys.argv else 0
-os.environ['TF_CPP_MIN_LOG_LEVEL'] = sys.argv[log_level_index] if log_level_index > 0 and log_level_index < len(sys.argv) else '3'
+LOG_LEVEL_INDEX = sys.argv.index('--log_level') + 1 if '--log_level' in sys.argv else 0
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = sys.argv[LOG_LEVEL_INDEX] if 0 < LOG_LEVEL_INDEX < len(sys.argv) else '3'
 
 import time
-import evaluate
 import numpy as np
 import progressbar
 import shutil
 import tensorflow as tf
 
 from ds_ctcdecoder import ctc_beam_search_decoder, Scorer
+from evaluate import evaluate
 from six.moves import zip, range
 from tensorflow.python.tools import freeze_graph
 from util.config import Config, initialize_globals
@@ -49,7 +49,7 @@ def create_overlapping_windows(batch_x):
     # convolution returns patches of the input tensor as is, and we can create
     # overlapping windows over the MFCCs.
     eye_filter = tf.constant(np.eye(window_width * num_channels)
-                               .reshape(window_width, num_channels, window_width * num_channels), tf.float32)
+                               .reshape(window_width, num_channels, window_width * num_channels), tf.float32) # pylint: disable=bad-continuation
 
     # Create overlapping windows
     batch_x = tf.nn.conv1d(batch_x, eye_filter, stride=1, padding='SAME')
@@ -172,7 +172,7 @@ def create_model(batch_x, seq_length, dropout, reuse=False, previous_state=None,
 # Conveniently, this loss function is implemented in TensorFlow.
 # Thus, we can simply make use of this implementation to define our loss.
 
-def calculate_mean_edit_distance_and_loss(iterator, tower, dropout, reuse):
+def calculate_mean_edit_distance_and_loss(iterator, dropout, reuse):
     r'''
     This routine beam search decodes a mini-batch and calculates the loss and mean edit distance.
     Next to total and average loss it returns the mean edit distance,
@@ -246,10 +246,10 @@ def get_tower_results(iterator, optimizer, dropout_rates):
             device = Config.available_devices[i]
             with tf.device(device):
                 # Create a scope for all operations of tower i
-                with tf.name_scope('tower_%d' % i) as scope:
+                with tf.name_scope('tower_%d' % i):
                     # Calculate the avg_loss and mean_edit_distance and retrieve the decoded
                     # batch along with the original batch's labels (Y) of this tower
-                    avg_loss = calculate_mean_edit_distance_and_loss(iterator, i, dropout_rates, reuse=i>0)
+                    avg_loss = calculate_mean_edit_distance_and_loss(iterator, dropout_rates, reuse=i > 0)
 
                     # Allow for variables to be re-used by the next tower
                     tf.get_variable_scope().reuse_variables()
@@ -460,9 +460,9 @@ class LossWidget(progressbar.widgets.FormatLabel):
                 def __init__(self):
                     progressbar.widgets.FormatLabel.__init__(self, format='Loss: %(mean_loss)f')
 
-                def __call__(self, progress, data):
+                def __call__(self, progress, data, **kwargs):
                     data['mean_loss'] = total_loss / step_count if step_count else 0.0
-                    return progressbar.widgets.FormatLabel.__call__(self, progress, data)
+                    return progressbar.widgets.FormatLabel.__call__(self, progress, data, **kwargs)
 
             if FLAGS.show_progressbar:
                 pbar = progressbar.ProgressBar(widgets=['Epoch {}'.format(epoch),
@@ -547,7 +547,7 @@ def __call__(self, progress, data):
 
 
 def test():
-    evaluate.evaluate(FLAGS.test_files.split(','), create_model, try_loading)
+    evaluate(FLAGS.test_files.split(','), create_model, try_loading)
 
 
 def create_inference_graph(batch_size=1, n_steps=16, tflite=False):
@@ -570,12 +570,12 @@ def create_inference_graph(batch_size=1, n_steps=16, tflite=False):
         # no state management since n_step is expected to be dynamic too (see below)
         previous_state = previous_state_c = previous_state_h = None
     else:
-        if not tflite:
-            previous_state_c = variable_on_cpu('previous_state_c', [batch_size, Config.n_cell_dim], initializer=None)
-            previous_state_h = variable_on_cpu('previous_state_h', [batch_size, Config.n_cell_dim], initializer=None)
-        else:
+        if tflite:
             previous_state_c = tf.placeholder(tf.float32, [batch_size, Config.n_cell_dim], name='previous_state_c')
             previous_state_h = tf.placeholder(tf.float32, [batch_size, Config.n_cell_dim], name='previous_state_h')
+        else:
+            previous_state_c = variable_on_cpu('previous_state_c', [batch_size, Config.n_cell_dim], initializer=None)
+            previous_state_h = variable_on_cpu('previous_state_h', [batch_size, Config.n_cell_dim], initializer=None)
 
         previous_state = tf.contrib.rnn.LSTMStateTuple(previous_state_c, previous_state_h)
 
@@ -620,28 +620,7 @@ def create_inference_graph(batch_size=1, n_steps=16, tflite=False):
         )
 
     new_state_c, new_state_h = layers['rnn_output_state']
-    if not tflite:
-        zero_state = tf.zeros([batch_size, Config.n_cell_dim], tf.float32)
-        initialize_c = tf.assign(previous_state_c, zero_state)
-        initialize_h = tf.assign(previous_state_h, zero_state)
-        initialize_state = tf.group(initialize_c, initialize_h, name='initialize_state')
-        with tf.control_dependencies([tf.assign(previous_state_c, new_state_c), tf.assign(previous_state_h, new_state_h)]):
-            logits = tf.identity(logits, name='logits')
-
-        return (
-            {
-                'input': input_tensor,
-                'input_lengths': seq_length,
-                'input_samples': input_samples,
-            },
-            {
-                'outputs': logits,
-                'initialize_state': initialize_state,
-                'mfccs': mfccs,
-            },
-            layers
-        )
-    else:
+    if tflite:
         logits = tf.identity(logits, name='logits')
         new_state_c = tf.identity(new_state_c, name='new_state_c')
         new_state_h = tf.identity(new_state_h, name='new_state_h')
@@ -656,17 +635,32 @@ def create_inference_graph(batch_size=1, n_steps=16, tflite=False):
         if FLAGS.use_seq_length:
             inputs.update({'input_lengths': seq_length})
 
-        return (
-            inputs,
-            {
-                'outputs': logits,
-                'new_state_c': new_state_c,
-                'new_state_h': new_state_h,
-                'mfccs': mfccs,
-            },
-            layers
-        )
+        outputs = {
+            'outputs': logits,
+            'new_state_c': new_state_c,
+            'new_state_h': new_state_h,
+            'mfccs': mfccs,
+        }
+    else:
+        zero_state = tf.zeros([batch_size, Config.n_cell_dim], tf.float32)
+        initialize_c = tf.assign(previous_state_c, zero_state)
+        initialize_h = tf.assign(previous_state_h, zero_state)
+        initialize_state = tf.group(initialize_c, initialize_h, name='initialize_state')
+        with tf.control_dependencies([tf.assign(previous_state_c, new_state_c), tf.assign(previous_state_h, new_state_h)]):
+            logits = tf.identity(logits, name='logits')
+
+        inputs = {
+            'input': input_tensor,
+            'input_lengths': seq_length,
+            'input_samples': input_samples,
+        }
+        outputs = {
+            'outputs': logits,
+            'initialize_state': initialize_state,
+            'mfccs': mfccs,
+        }
 
+    return inputs, outputs, layers
 
 def file_relative_read(fname):
     return open(os.path.join(os.path.dirname(__file__), fname)).read()
@@ -680,11 +674,9 @@ def export():
     from tensorflow.python.framework.ops import Tensor, Operation
 
     inputs, outputs, _ = create_inference_graph(batch_size=FLAGS.export_batch_size, n_steps=FLAGS.n_steps, tflite=FLAGS.export_tflite)
-    input_names = ",".join(tensor.op.name for tensor in inputs.values())
-    output_names_tensors = [ tensor.op.name for tensor in outputs.values() if isinstance(tensor, Tensor)]
-    output_names_ops = [ tensor.name for tensor in outputs.values() if isinstance(tensor, Operation)]
+    output_names_tensors = [tensor.op.name for tensor in outputs.values() if isinstance(tensor, Tensor)]
+    output_names_ops = [op.name for op in outputs.values() if isinstance(op, Operation)]
     output_names = ",".join(output_names_tensors + output_names_ops)
-    input_shapes = ":".join(",".join(map(str, tensor.shape)) for tensor in inputs.values())
 
     if not FLAGS.export_tflite:
         mapping = {v.op.name: v for v in tf.global_variables() if not v.op.name.startswith('previous_state_')}
@@ -828,6 +820,6 @@ def main(_):
         tf.reset_default_graph()
         do_single_file_inference(FLAGS.one_shot_infer)
 
-if __name__ == '__main__' :
+if __name__ == '__main__':
     create_flags()
     tf.app.run(main)
diff --git a/evaluate.py b/evaluate.py
@@ -4,13 +4,16 @@
 
 import itertools
 import json
+
+from multiprocessing import cpu_count
+
 import numpy as np
 import progressbar
 import tensorflow as tf
 
 from ds_ctcdecoder import ctc_beam_search_decoder_batch, Scorer
-from multiprocessing import cpu_count
-from six.moves import zip, range
+from six.moves import zip
+
 from util.config import Config, initialize_globals
 from util.evaluate_tools import calculate_report
 from util.feeding import create_dataset
@@ -27,13 +30,12 @@ def sparse_tensor_value_to_texts(value, alphabet):
     return sparse_tuple_to_texts((value.indices, value.values, value.dense_shape), alphabet)
 
 
-def sparse_tuple_to_texts(tuple, alphabet):
-    indices = tuple[0]
-    values = tuple[1]
-    results = [''] * tuple[2][0]
-    for i in range(len(indices)):
-        index = indices[i][0]
-        results[index] += alphabet.string_from_label(values[i])
+def sparse_tuple_to_texts(sp_tuple, alphabet):
+    indices = sp_tuple[0]
+    values = sp_tuple[1]
+    results = [''] * sp_tuple[2][0]
+    for i, index in enumerate(indices):
+        results[index[0]] += alphabet.string_from_label(values[i])
     # List of strings
     return results
 
@@ -63,7 +65,7 @@ def evaluate(test_csvs, create_model, try_loading):
                           inputs=logits,
                           sequence_length=batch_x_len)
 
-    global_step = tf.train.get_or_create_global_step()
+    tf.train.get_or_create_global_step()
 
     with tf.Session(config=Config.session_config) as session:
         # Create a saver using variables from the above newly created graph
@@ -109,7 +111,7 @@ def evaluate(test_csvs, create_model, try_loading):
     # Get number of accessible CPU cores for this process
     try:
         num_processes = cpu_count()
-    except:
+    except NotImplementedError:
         num_processes = 1
 
     print('Decoding predictions...')
@@ -151,12 +153,12 @@ def main(_):
                   'the --test_files flag.')
         exit(1)
 
-    from DeepSpeech import create_model, try_loading
+    from DeepSpeech import create_model, try_loading # pylint: disable=cyclic-import
     samples = evaluate(FLAGS.test_files.split(','), create_model, try_loading)
 
     if FLAGS.test_output_file:
         # Save decoded tuples as JSON, converting NumPy floats to Python floats
-        json.dump(samples, open(FLAGS.test_output_file, 'w'), default=lambda x: float(x))
+        json.dump(samples, open(FLAGS.test_output_file, 'w'), default=float)
 
 
 if __name__ == '__main__':

diff --git a/util/check_characters.py b/util/check_characters.py
@@ -1,55 +1,56 @@
-import csv
-import sys
-import glob
-
 """
 Usage: $ python3 check_characters.py "INFILE"
  e.g.  $ python3 check_characters.py -csv /home/data/french.csv
- e.g.  $ python3 check_characters.py -csv ../train.csv,../test.csv 
- e.g.  $ python3 check_characters.py -alpha -csv ../train.csv 
+ e.g.  $ python3 check_characters.py -csv ../train.csv,../test.csv
+ e.g.  $ python3 check_characters.py -alpha -csv ../train.csv
 
-Point this script to your transcripts, and it returns 
-to the terminal the unique set of characters in those 
+Point this script to your transcripts, and it returns
+to the terminal the unique set of characters in those
 files (combined).
 
 These files are assumed to be csv, with the transcript being the third field.
 
-The script simply reads all the text from all the files, 
-storing a set of unique characters that were seen 
+The script simply reads all the text from all the files,
+storing a set of unique characters that were seen
 along the way.
 """
 import argparse
+import csv
 import os
+import sys
 
-parser = argparse.ArgumentParser()
-
-parser.add_argument("-csv", "--csv-files", help="Str. Filenames as a comma separated list", required=True)
-parser.add_argument("-alpha", "--alphabet-format",help="Bool. Print in format for alphabet.txt",action="store_true")
-parser.set_defaults(alphabet_format=False)
-args = parser.parse_args()
-inFiles = [os.path.abspath(i) for i in args.csv_files.split(",")]
-
-print("### Reading in the following transcript files: ###")
-print("### {} ###".format(inFiles))
-
-allText = set()
-for inFile in (inFiles):
-    with open(inFile, "r") as csvFile:
-        reader = csv.reader(csvFile)
-        try:
-            next(reader, None)  # skip the file header (i.e. "transcript")
-            for row in reader:
-                allText |= set(str(row[2]))
-        except IndexError as ie:
-            print("Your input file",inFile,"is not formatted properly. Check if there are 3 columns with the 3rd containing the transcript")
-            sys.exit(-1)
-        finally:
-            csvFile.close()
-
-print("### The following unique characters were found in your transcripts: ###")
-if args.alphabet_format:
-    for char in list(allText):
-        print(char)
-    print("### ^^^ You can copy-paste these into data/alphabet.txt ###")
-else:
-    print(list(allText))
+def main():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("-csv", "--csv-files", help="Str. Filenames as a comma separated list", required=True)
+    parser.add_argument("-alpha", "--alphabet-format", help="Bool. Print in format for alphabet.txt", action="store_true")
+    args = parser.parse_args()
+    in_files = [os.path.abspath(i) for i in args.csv_files.split(",")]
+
+    print("### Reading in the following transcript files: ###")
+    print("### {} ###".format(in_files))
+
+    all_text = set()
+    for in_file in in_files:
+        with open(in_file, "r") as csv_file:
+            reader = csv.reader(csv_file)
+            try:
+                next(reader, None)  # skip the file header (i.e. "transcript")
+                for row in reader:
+                    all_text |= set(str(row[2]))
+            except IndexError:
+                print("Your input file", in_file, "is not formatted properly. Check if there are 3 columns with the 3rd containing the transcript")
+                sys.exit(-1)
+            finally:
+                csv_file.close()
+
+    print("### The following unique characters were found in your transcripts: ###")
+    if args.alphabet_format:
+        for char in list(all_text):
+            print(char)
+        print("### ^^^ You can copy-paste these into data/alphabet.txt ###")
+    else:
+        print(list(all_text))
+
+if __name__ == '__main__':
+    main()