Merge pull request sherjilozair#109 from GaoleMeng/add-comments

Add explanatory comments to files
serenade0816 · May 3, 2018 · 61ac043 · 61ac043
2 parents c692c8b + 4b23880
commit 61ac043
Show file tree

Hide file tree

Showing 2 changed files with 22 additions and 3 deletions.
diff --git a/model.py b/model.py
@@ -12,6 +12,7 @@ def __init__(self, args, training=True):
             args.batch_size = 1
             args.seq_length = 1
 
+        # choose different rnn cell 
         if args.model == 'rnn':
             cell_fn = rnn.BasicRNNCell
         elif args.model == 'gru':
@@ -23,6 +24,7 @@ def __init__(self, args, training=True):
         else:
             raise Exception("model type not supported: {}".format(args.model))
 
+        # warp multi layered rnn cell into one cell with dropout
         cells = []
         for _ in range(args.num_layers):
             cell = cell_fn(args.rnn_size)
@@ -31,41 +33,48 @@ def __init__(self, args, training=True):
                                           input_keep_prob=args.input_keep_prob,
                                           output_keep_prob=args.output_keep_prob)
             cells.append(cell)
-
         self.cell = cell = rnn.MultiRNNCell(cells, state_is_tuple=True)
 
+        # input/target data (int32 since input is char-level)
         self.input_data = tf.placeholder(
             tf.int32, [args.batch_size, args.seq_length])
         self.targets = tf.placeholder(
             tf.int32, [args.batch_size, args.seq_length])
         self.initial_state = cell.zero_state(args.batch_size, tf.float32)
 
+        # softmax output layer, use softmax to classify
         with tf.variable_scope('rnnlm'):
             softmax_w = tf.get_variable("softmax_w",
                                         [args.rnn_size, args.vocab_size])
             softmax_b = tf.get_variable("softmax_b", [args.vocab_size])
 
+        # transform input to embedding
         embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size])
         inputs = tf.nn.embedding_lookup(embedding, self.input_data)
 
         # dropout beta testing: double check which one should affect next line
         if training and args.output_keep_prob:
             inputs = tf.nn.dropout(inputs, args.output_keep_prob)
 
+        # unstack the input to fits in rnn model
         inputs = tf.split(inputs, args.seq_length, 1)
         inputs = [tf.squeeze(input_, [1]) for input_ in inputs]
 
+        # loop function for rnn_decoder, which take the previous i-th cell's output and generate the (i+1)-th cell's input
         def loop(prev, _):
             prev = tf.matmul(prev, softmax_w) + softmax_b
             prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
             return tf.nn.embedding_lookup(embedding, prev_symbol)
 
+        # rnn_decoder to generate the ouputs and final state. When we are not training the model, we use the loop function.
         outputs, last_state = legacy_seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if not training else None, scope='rnnlm')
         output = tf.reshape(tf.concat(outputs, 1), [-1, args.rnn_size])
 
-
+        # output layer
         self.logits = tf.matmul(output, softmax_w) + softmax_b
         self.probs = tf.nn.softmax(self.logits)
+
+        # loss is calculate by the log loss and taking the average.
         loss = legacy_seq2seq.sequence_loss_by_example(
                 [self.logits],
                 [tf.reshape(self.targets, [-1])],
@@ -75,10 +84,14 @@ def loop(prev, _):
         self.final_state = last_state
         self.lr = tf.Variable(0.0, trainable=False)
         tvars = tf.trainable_variables()
+
+        # calculate gradients
         grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                 args.grad_clip)
         with tf.name_scope('optimizer'):
             optimizer = tf.train.AdamOptimizer(self.lr)
+
+        # apply gradient change to the all the trainable variable.
         self.train_op = optimizer.apply_gradients(zip(grads, tvars))
 
         # instrument tensorboard

diff --git a/utils.py b/utils.py
@@ -25,6 +25,7 @@ def __init__(self, data_dir, batch_size, seq_length, encoding='utf-8'):
         self.create_batches()
         self.reset_batch_pointer()
 
+    # preprocess data for the first time.
     def preprocess(self, input_file, vocab_file, tensor_file):
         with codecs.open(input_file, "r", encoding=self.encoding) as f:
             data = f.read()
@@ -38,6 +39,8 @@ def preprocess(self, input_file, vocab_file, tensor_file):
         self.tensor = np.array(list(map(self.vocab.get, data)))
         np.save(tensor_file, self.tensor)
 
+
+    # load the preprocessed the data if the data has been processed before.
     def load_preprocessed(self, vocab_file, tensor_file):
         with open(vocab_file, 'rb') as f:
             self.chars = cPickle.load(f)
@@ -46,7 +49,7 @@ def load_preprocessed(self, vocab_file, tensor_file):
         self.tensor = np.load(tensor_file)
         self.num_batches = int(self.tensor.size / (self.batch_size *
                                                    self.seq_length))
-
+    # seperate the whole data into different batches.
     def create_batches(self):
         self.num_batches = int(self.tensor.size / (self.batch_size *
                                                    self.seq_length))
@@ -56,9 +59,12 @@ def create_batches(self):
         if self.num_batches == 0:
             assert False, "Not enough data. Make seq_length and batch_size small."
 
+        # reshape the original data into the length self.num_batches * self.batch_size * self.seq_length for convenience.
         self.tensor = self.tensor[:self.num_batches * self.batch_size * self.seq_length]
         xdata = self.tensor
         ydata = np.copy(self.tensor)
+
+        #ydata is the xdata with one position shift.
         ydata[:-1] = xdata[1:]
         ydata[-1] = xdata[0]
         self.x_batches = np.split(xdata.reshape(self.batch_size, -1),