Skip to content

Commit

Permalink
final code
Browse files Browse the repository at this point in the history
  • Loading branch information
priya-dwivedi committed Mar 19, 2018
1 parent f309fe3 commit 2cbf5ec
Show file tree
Hide file tree
Showing 23 changed files with 27,044 additions and 441 deletions.
170 changes: 131 additions & 39 deletions analysis_data_length.ipynb

Large diffs are not rendered by default.

7,649 changes: 7,649 additions & 0 deletions attention_visualization.ipynb

Large diffs are not rendered by default.

695 changes: 346 additions & 349 deletions code/.idea/workspace.xml

Large diffs are not rendered by default.

53 changes: 53 additions & 0 deletions code/ensemble.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@

from __future__ import print_function
import os
from collections import Counter
import string
import io

cwd = os.getcwd()

import json
import sys

### Usage python ensembly.py bidaf_json rnet_json output_json

def ensemble(preds_bidaf, preds_rnet):
new_predictions = {}
for key, value in preds_bidaf.iteritems():
uuid = key
ans_bidaf = value[0]
prob_bidaf = value[1]
if key in preds_rnet: # find the same key
val_rnet = preds_rnet[key]
ans_rnet = val_rnet[0]
prob_rnet = val_rnet[1]
else:
print("key not found in rnet_json")

## Check which prob is higher and then assign new answer
if prob_bidaf >= prob_rnet:
final_answer = ans_bidaf
else:
final_answer = ans_rnet

new_predictions[uuid] = final_answer
return new_predictions


file_bidaf = str(sys.argv[1])
file_rnet = str(sys.argv[2])
json_out_path = 'predictions.json'

# preds_bidaf = json.load(open(file_bidaf))
# preds_rnet = json.load(open(file_rnet))

preds_bidaf = json.load(open(os.path.join(cwd,file_bidaf)))
preds_rnet = json.load(open(os.path.join(cwd, file_rnet)))

new_predictions = ensemble(preds_bidaf, preds_rnet)

print("Writing predictions to %s..." % json_out_path)
with io.open(json_out_path, 'w', encoding='utf-8') as f:
f.write(unicode(json.dumps(new_predictions, ensure_ascii=False)))
print("Wrote predictions to %s" % json_out_path)
9 changes: 4 additions & 5 deletions code/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

from qa_model import QAModel
from vocab import get_glove
from official_eval_helper import get_json_data, generate_answers
from official_eval_helper import get_json_data, generate_answers, generate_answers_prob


logging.basicConfig(level=logging.INFO)
Expand Down Expand Up @@ -58,7 +58,7 @@


## Bool flags to select different models
tf.app.flags.DEFINE_bool("do_char_embed", True, "Include char embedding -True/False")
tf.app.flags.DEFINE_bool("do_char_embed", False, "Include char embedding -True/False")
tf.app.flags.DEFINE_bool("add_highway_layer", True, "Add highway layer to concatenated embeddings -True/False")
tf.app.flags.DEFINE_bool("cnn_encoder", False, "Add CNN Encoder Layer -True/False")
tf.app.flags.DEFINE_bool("rnet_attention", False, "Perform RNET QP and SM attention-True/False")
Expand All @@ -74,10 +74,10 @@


## Hyperparameters for CNN Encoder
tf.app.flags.DEFINE_integer("filter_size_encoder", 20, "Size of filter for cnn encoder")
tf.app.flags.DEFINE_integer("filter_size_encoder", 50, "Size of filter for cnn encoder")

## Hyperparameters for BIDAF
tf.app.flags.DEFINE_integer("hidden_size_modeling", 100, "Size of modeling layer") #
tf.app.flags.DEFINE_integer("hidden_size_modeling", 150, "Size of modeling layer") #

# How often to print, save, eval
tf.app.flags.DEFINE_integer("print_every", 1, "How many iterations to do per print.")
Expand All @@ -93,7 +93,6 @@
tf.app.flags.DEFINE_string("json_in_path", "", "For official_eval mode, path to JSON input file. You need to specify this for official_eval_mode.")
tf.app.flags.DEFINE_string("json_out_path", "predictions.json", "Output path for official_eval mode. Defaults to predictions.json")


FLAGS = tf.app.flags.FLAGS
os.environ["CUDA_VISIBLE_DEVICES"] = str(FLAGS.gpu)

Expand Down
232 changes: 232 additions & 0 deletions code/main_ensemble.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,232 @@
# Copyright 2018 Stanford University
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""This file contains the entrypoint to the rest of the code"""

from __future__ import absolute_import
from __future__ import division

import os
import io
import json
import sys
import logging

import tensorflow as tf

from qa_model import QAModel
from vocab import get_glove
from official_eval_helper import get_json_data, generate_answers, generate_answers_prob


logging.basicConfig(level=logging.INFO)

MAIN_DIR = os.path.relpath(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) # relative path of the main directory
DEFAULT_DATA_DIR = os.path.join(MAIN_DIR, "data") # relative path of data dir
EXPERIMENTS_DIR = os.path.join(MAIN_DIR, "experiments") # relative path of experiments dir


# High-level options
tf.app.flags.DEFINE_integer("gpu", 0, "Which GPU to use, if you have multiple.")
tf.app.flags.DEFINE_string("mode", "train", "Available modes: train / show_examples / official_eval")
tf.app.flags.DEFINE_string("experiment_name", "", "Unique name for your experiment. This will create a directory by this name in the experiments/ directory, which will hold all data related to this experiment")
tf.app.flags.DEFINE_integer("num_epochs", 0, "Number of epochs to train. 0 means train indefinitely")

# Hyperparameters
tf.app.flags.DEFINE_float("learning_rate", 0.001, "Learning rate.")
tf.app.flags.DEFINE_float("max_gradient_norm", 5.0, "Clip gradients to this norm.")
tf.app.flags.DEFINE_float("dropout", 0.15, "Fraction of units randomly dropped on non-recurrent connections.")
tf.app.flags.DEFINE_integer("batch_size", 20, "Batch size to use")
tf.app.flags.DEFINE_integer("hidden_size_encoder", 200, "Size of the hidden states") #150 for bidaf ; #200 otherwise
tf.app.flags.DEFINE_integer("hidden_size_qp_matching", 150, "Size of the hidden states")
tf.app.flags.DEFINE_integer("hidden_size_sm_matching", 50, "Size of the hidden states")
tf.app.flags.DEFINE_integer("hidden_size_fully_connected", 200, "Size of the hidden states")
tf.app.flags.DEFINE_integer("context_len", 300, "The maximum context length of your model")
tf.app.flags.DEFINE_integer("question_len", 30, "The maximum question length of your model")
tf.app.flags.DEFINE_integer("embedding_size", 100, "Size of the pretrained word vectors. This needs to be one of the available GloVe dimensions: 50/100/200/300")


## Bool flags to select different models
tf.app.flags.DEFINE_bool("do_char_embed", True, "Include char embedding -True/False")
tf.app.flags.DEFINE_bool("add_highway_layer", True, "Add highway layer to concatenated embeddings -True/False")
tf.app.flags.DEFINE_bool("cnn_encoder", False, "Add CNN Encoder Layer -True/False")
tf.app.flags.DEFINE_bool("rnet_attention", True, "Perform RNET QP and SM attention-True/False")
tf.app.flags.DEFINE_bool("bidaf_attention", False, "Use BIDAF Attention-True/False")
tf.app.flags.DEFINE_bool("answer_pointer_RNET", True, "Use Answer Pointer from RNET-True/False")
tf.app.flags.DEFINE_bool("smart_span", True, "Select start and end idx based on smart conditions-True/False")

## Hyperparameters for Char CNN
tf.app.flags.DEFINE_integer("char_embedding_size", 8, "Size of char embedding") #as suggested in handout
tf.app.flags.DEFINE_integer("word_max_len", 16, "max length for each word") # 99th percentile from Jupyter notebook
tf.app.flags.DEFINE_integer("char_out_size", 100, "num filters char CNN/out size") # same as filer size; as suggested in handout
tf.app.flags.DEFINE_integer("window_width", 5, "Kernel size for char cnn") #as suggested in handout


## Hyperparameters for CNN Encoder
tf.app.flags.DEFINE_integer("filter_size_encoder", 20, "Size of filter for cnn encoder")

## Hyperparameters for BIDAF
tf.app.flags.DEFINE_integer("hidden_size_modeling", 150, "Size of modeling layer") #

# How often to print, save, eval
tf.app.flags.DEFINE_integer("print_every", 1, "How many iterations to do per print.")
tf.app.flags.DEFINE_integer("save_every", 500, "How many iterations to do per save.")
tf.app.flags.DEFINE_integer("eval_every", 500, "How many iterations to do per calculating loss/f1/em on dev set. Warning: this is fairly time-consuming so don't do it too often.")
tf.app.flags.DEFINE_integer("keep", 1, "How many checkpoints to keep. 0 indicates keep all (you shouldn't need to do keep all though - it's very storage intensive).")

# Reading and saving data
tf.app.flags.DEFINE_string("train_dir", "", "Training directory to save the model parameters and other info. Defaults to experiments/{experiment_name}")
tf.app.flags.DEFINE_string("glove_path", "", "Path to glove .txt file. Defaults to data/glove.6B.{embedding_size}d.txt")
tf.app.flags.DEFINE_string("data_dir", DEFAULT_DATA_DIR, "Where to find preprocessed SQuAD data for training. Defaults to data/")
tf.app.flags.DEFINE_string("ckpt_load_dir", "", "For official_eval mode, which directory to load the checkpoint fron. You need to specify this for official_eval mode.")
tf.app.flags.DEFINE_string("json_in_path", "", "For official_eval mode, path to JSON input file. You need to specify this for official_eval_mode.")
tf.app.flags.DEFINE_string("json_out_path", "predictions.json", "Output path for official_eval mode. Defaults to predictions.json")

FLAGS = tf.app.flags.FLAGS
os.environ["CUDA_VISIBLE_DEVICES"] = str(FLAGS.gpu)


def initialize_model(session, model, train_dir, expect_exists):
"""
Initializes model from train_dir.
Inputs:
session: TensorFlow session
model: QAModel
train_dir: path to directory where we'll look for checkpoint
expect_exists: If True, throw an error if no checkpoint is found.
If False, initialize fresh model if no checkpoint is found.
"""
print "Looking for model at %s..." % train_dir
ckpt = tf.train.get_checkpoint_state(train_dir)
v2_path = ckpt.model_checkpoint_path + ".index" if ckpt else ""
if ckpt and (tf.gfile.Exists(ckpt.model_checkpoint_path) or tf.gfile.Exists(v2_path)):
print "Reading model parameters from %s" % ckpt.model_checkpoint_path
model.saver.restore(session, ckpt.model_checkpoint_path)
else:
if expect_exists:
raise Exception("There is no saved checkpoint at %s" % train_dir)
else:
print "There is no saved checkpoint at %s. Creating model with fresh parameters." % train_dir
session.run(tf.global_variables_initializer())
print 'Num params: %d' % sum(v.get_shape().num_elements() for v in tf.trainable_variables())


def main(unused_argv):
# Print an error message if you've entered flags incorrectly
if len(unused_argv) != 1:
raise Exception("There is a problem with how you entered flags: %s" % unused_argv)

# Check for Python 2
if sys.version_info[0] != 2:
raise Exception("ERROR: You must use Python 2 but you are running Python %i" % sys.version_info[0])

# Print out Tensorflow version
print "This code was developed and tested on TensorFlow 1.4.1. Your TensorFlow version: %s" % tf.__version__

# Define train_dir
if not FLAGS.experiment_name and not FLAGS.train_dir and FLAGS.mode != "official_eval":
raise Exception("You need to specify either --experiment_name or --train_dir")
FLAGS.train_dir = FLAGS.train_dir or os.path.join(EXPERIMENTS_DIR, FLAGS.experiment_name)

# Initialize bestmodel directory
bestmodel_dir = os.path.join(FLAGS.train_dir, "best_checkpoint")

# Define path for glove vecs
FLAGS.glove_path = FLAGS.glove_path or os.path.join(DEFAULT_DATA_DIR, "glove.6B.{}d.txt".format(FLAGS.embedding_size))

# Load embedding matrix and vocab mappings
emb_matrix, word2id, id2word = get_glove(FLAGS.glove_path, FLAGS.embedding_size)

# Get filepaths to train/dev datafiles for tokenized queries, contexts and answers
train_context_path = os.path.join(FLAGS.data_dir, "train.context")
train_qn_path = os.path.join(FLAGS.data_dir, "train.question")
train_ans_path = os.path.join(FLAGS.data_dir, "train.span")
dev_context_path = os.path.join(FLAGS.data_dir, "dev.context")
dev_qn_path = os.path.join(FLAGS.data_dir, "dev.question")
dev_ans_path = os.path.join(FLAGS.data_dir, "dev.span")

# Initialize model
qa_model = QAModel(FLAGS, id2word, word2id, emb_matrix)

# Some GPU settings
config=tf.ConfigProto()
config.gpu_options.allow_growth = True

# Split by mode
if FLAGS.mode == "train":

# Setup train dir and logfile
if not os.path.exists(FLAGS.train_dir):
os.makedirs(FLAGS.train_dir)
file_handler = logging.FileHandler(os.path.join(FLAGS.train_dir, "log.txt"))
logging.getLogger().addHandler(file_handler)

# Save a record of flags as a .json file in train_dir
with open(os.path.join(FLAGS.train_dir, "flags.json"), 'w') as fout:
json.dump(FLAGS.__flags, fout)

# Make bestmodel dir if necessary
if not os.path.exists(bestmodel_dir):
os.makedirs(bestmodel_dir)

with tf.Session(config=config) as sess:

# Load most recent model
initialize_model(sess, qa_model, FLAGS.train_dir, expect_exists=False)

# Train
qa_model.train(sess, train_context_path, train_qn_path, train_ans_path, dev_qn_path, dev_context_path, dev_ans_path)


elif FLAGS.mode == "show_examples":
with tf.Session(config=config) as sess:

# Load best model
initialize_model(sess, qa_model, bestmodel_dir, expect_exists=True)

# Show examples with F1/EM scores
_, _ = qa_model.check_f1_em(sess, dev_context_path, dev_qn_path, dev_ans_path, "dev", num_samples=10, print_to_screen=True)


elif FLAGS.mode == "official_eval":
if FLAGS.json_in_path == "":
raise Exception("For official_eval mode, you need to specify --json_in_path")
if FLAGS.ckpt_load_dir == "":
raise Exception("For official_eval mode, you need to specify --ckpt_load_dir")

# Read the JSON data from file
qn_uuid_data, context_token_data, qn_token_data = get_json_data(FLAGS.json_in_path)

with tf.Session(config=config) as sess:

# Load model from ckpt_load_dir
initialize_model(sess, qa_model, FLAGS.ckpt_load_dir, expect_exists=True)

# Get a predicted answer for each example in the data
# Return a mapping answers_dict from uuid to answer
answers_dict = generate_answers_prob(sess, qa_model, word2id, qn_uuid_data, context_token_data, qn_token_data)

# Write the uuid->answer mapping a to json file in root dir
print "Writing predictions to %s..." % FLAGS.json_out_path
with io.open(FLAGS.json_out_path, 'w', encoding='utf-8') as f:
f.write(unicode(json.dumps(answers_dict, ensure_ascii=False)))
print "Wrote predictions to %s" % FLAGS.json_out_path


else:
raise Exception("Unexpected value of FLAGS.mode: %s" % FLAGS.mode)

if __name__ == "__main__":
tf.app.run()
Loading

0 comments on commit 2cbf5ec

Please sign in to comment.