Skip to content

Commit

Permalink
Merge pull request sherjilozair#109 from GaoleMeng/add-comments
Browse files Browse the repository at this point in the history
Add explanatory comments to files
  • Loading branch information
sherjilozair authored May 3, 2018
2 parents c692c8b + 4b23880 commit 61ac043
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 3 deletions.
17 changes: 15 additions & 2 deletions model.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ def __init__(self, args, training=True):
args.batch_size = 1
args.seq_length = 1

# choose different rnn cell
if args.model == 'rnn':
cell_fn = rnn.BasicRNNCell
elif args.model == 'gru':
Expand All @@ -23,6 +24,7 @@ def __init__(self, args, training=True):
else:
raise Exception("model type not supported: {}".format(args.model))

# warp multi layered rnn cell into one cell with dropout
cells = []
for _ in range(args.num_layers):
cell = cell_fn(args.rnn_size)
Expand All @@ -31,41 +33,48 @@ def __init__(self, args, training=True):
input_keep_prob=args.input_keep_prob,
output_keep_prob=args.output_keep_prob)
cells.append(cell)

self.cell = cell = rnn.MultiRNNCell(cells, state_is_tuple=True)

# input/target data (int32 since input is char-level)
self.input_data = tf.placeholder(
tf.int32, [args.batch_size, args.seq_length])
self.targets = tf.placeholder(
tf.int32, [args.batch_size, args.seq_length])
self.initial_state = cell.zero_state(args.batch_size, tf.float32)

# softmax output layer, use softmax to classify
with tf.variable_scope('rnnlm'):
softmax_w = tf.get_variable("softmax_w",
[args.rnn_size, args.vocab_size])
softmax_b = tf.get_variable("softmax_b", [args.vocab_size])

# transform input to embedding
embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size])
inputs = tf.nn.embedding_lookup(embedding, self.input_data)

# dropout beta testing: double check which one should affect next line
if training and args.output_keep_prob:
inputs = tf.nn.dropout(inputs, args.output_keep_prob)

# unstack the input to fits in rnn model
inputs = tf.split(inputs, args.seq_length, 1)
inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

# loop function for rnn_decoder, which take the previous i-th cell's output and generate the (i+1)-th cell's input
def loop(prev, _):
prev = tf.matmul(prev, softmax_w) + softmax_b
prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
return tf.nn.embedding_lookup(embedding, prev_symbol)

# rnn_decoder to generate the ouputs and final state. When we are not training the model, we use the loop function.
outputs, last_state = legacy_seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if not training else None, scope='rnnlm')
output = tf.reshape(tf.concat(outputs, 1), [-1, args.rnn_size])


# output layer
self.logits = tf.matmul(output, softmax_w) + softmax_b
self.probs = tf.nn.softmax(self.logits)

# loss is calculate by the log loss and taking the average.
loss = legacy_seq2seq.sequence_loss_by_example(
[self.logits],
[tf.reshape(self.targets, [-1])],
Expand All @@ -75,10 +84,14 @@ def loop(prev, _):
self.final_state = last_state
self.lr = tf.Variable(0.0, trainable=False)
tvars = tf.trainable_variables()

# calculate gradients
grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
args.grad_clip)
with tf.name_scope('optimizer'):
optimizer = tf.train.AdamOptimizer(self.lr)

# apply gradient change to the all the trainable variable.
self.train_op = optimizer.apply_gradients(zip(grads, tvars))

# instrument tensorboard
Expand Down
8 changes: 7 additions & 1 deletion utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def __init__(self, data_dir, batch_size, seq_length, encoding='utf-8'):
self.create_batches()
self.reset_batch_pointer()

# preprocess data for the first time.
def preprocess(self, input_file, vocab_file, tensor_file):
with codecs.open(input_file, "r", encoding=self.encoding) as f:
data = f.read()
Expand All @@ -38,6 +39,8 @@ def preprocess(self, input_file, vocab_file, tensor_file):
self.tensor = np.array(list(map(self.vocab.get, data)))
np.save(tensor_file, self.tensor)


# load the preprocessed the data if the data has been processed before.
def load_preprocessed(self, vocab_file, tensor_file):
with open(vocab_file, 'rb') as f:
self.chars = cPickle.load(f)
Expand All @@ -46,7 +49,7 @@ def load_preprocessed(self, vocab_file, tensor_file):
self.tensor = np.load(tensor_file)
self.num_batches = int(self.tensor.size / (self.batch_size *
self.seq_length))

# seperate the whole data into different batches.
def create_batches(self):
self.num_batches = int(self.tensor.size / (self.batch_size *
self.seq_length))
Expand All @@ -56,9 +59,12 @@ def create_batches(self):
if self.num_batches == 0:
assert False, "Not enough data. Make seq_length and batch_size small."

# reshape the original data into the length self.num_batches * self.batch_size * self.seq_length for convenience.
self.tensor = self.tensor[:self.num_batches * self.batch_size * self.seq_length]
xdata = self.tensor
ydata = np.copy(self.tensor)

#ydata is the xdata with one position shift.
ydata[:-1] = xdata[1:]
ydata[-1] = xdata[0]
self.x_batches = np.split(xdata.reshape(self.batch_size, -1),
Expand Down

0 comments on commit 61ac043

Please sign in to comment.