Skip to content

Commit

Permalink
position ok
Browse files Browse the repository at this point in the history
  • Loading branch information
qolina committed Aug 23, 2017
1 parent 8f0897d commit 84eb147
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 46 deletions.
4 changes: 2 additions & 2 deletions src/cmd.trigger.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/bin/sh

#python trigger_lstm.py -train ../ni_data/pre_processed_feng/tmp.train -test ../ni_data/pre_processed_feng/tmp.test -tag ../ni_data/pre_processed_feng/labellist -embed ../ni_data/pre_processed_feng/wordvector -vocab ../ni_data/pre_processed_feng/wordlist -model ../ni_data/models/model.trigger
CUDA_VISIBLE_DEVICES=3 python trigger_lstm.py -train ../ni_data/pre_processed_feng/tmp.train -test ../ni_data/pre_processed_feng/tmp.test -tag ../ni_data/pre_processed_feng/labellist -embed ../ni_data/pre_processed_feng/wordvector -vocab ../ni_data/pre_processed_feng/wordlist -model ../ni_data/models/model.trigger

CUDA_VISIBLE_DEVICES=3 python trigger_lstm.py -train ../ni_data/EngMix/train.triggerEvent.txt -dev ../ni_data/EngMix/dev.triggerEvent.txt -test ../ni_data/EngMix/test.triggerEvent.txt -embed ../ni_data/ace.pretrain300.vectors -model ../ni_data/models/model.trigger
#CUDA_VISIBLE_DEVICES=3 python trigger_lstm.py -train ../ni_data/EngMix/train.triggerEvent.txt -dev ../ni_data/EngMix/dev.triggerEvent.txt -test ../ni_data/EngMix/test.triggerEvent.txt -embed ../ni_data/ace.pretrain300.vectors -model ../ni_data/models/model.trigger

#CUDA_VISIBLE_DEVICES=3 python trigger_lstm.py -train ../ni_data/EngMix/train.triggerEvent.txt -dev ../ni_data/EngMix/dev.triggerEvent.txt -test ../ni_data/EngMix/test.triggerEvent.txt -embed ../ni_data/sskip.100.vectors -model ../ni_data/models/model.trigger
54 changes: 31 additions & 23 deletions src/lstm_trigger.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,19 @@ def __init__(self, pretrain_embedding, pretrain_embed_dim, lstm_hidden_dim, voca
embedding_dim += random_dim
else:
self.word_embeddings = nn.Embedding(vocab_size, pretrain_embed_dim)
#print "## word embedding init", self.word_embeddings.weight.data[:5, :5]
if pretrain_embedding is not None:
self.word_embeddings.weight.data.copy_(torch.from_numpy(pretrain_embedding))
#print "## word embedding upd from pretrain", self.word_embeddings.weight.data[:5, :5]
#print "## pretrain embedding", pretrain_embedding[:5, :5]

#self.word_embeddings.weight.requires_grad = False
self.word_embeddings.weight.requires_grad = True
self.drop = nn.Dropout(dropout)
self.bilstm_flag = bilstm
self.lstm_layer = num_layers

# conv layer
self.cnn_flag = True
self.cnn_flag = False
self.position_size = 250
self.position_dim = 5
self.position_embedding = nn.Embedding(self.position_size, self.position_dim)
Expand Down Expand Up @@ -103,13 +106,17 @@ def prep_position(self, sentence):
positions = [autograd.Variable(torch.LongTensor(position), requires_grad=False) for position in positions_arr]
return positions

def forward(self, sentence, gpu):
debug = True
def forward(self, sentence, gpu, debug=False):
self.hidden = self.init_hidden(gpu)

embeds = self.word_embeddings(sentence)
positions = self.prep_position(sentence)
#print embeds
if debug:
print "## word embedding:", type(self.word_embeddings.weight.data), self.word_embeddings.weight.data.size()
print self.word_embeddings.weight.data[:5, :5]
print "## position embedding:", type(self.position_embedding.weight.data), self.position_embedding.weight.data.size()
print self.position_embedding.weight.data[:5]
print "## embeds", embeds.data[:10]

if self.random_embed:
pretrain_embeds = self.pretrain_word_embeddings(sentence)
Expand All @@ -124,33 +131,37 @@ def forward(self, sentence, gpu):
self.maxp2 = nn.MaxPool1d(len(sentence)-self.kernal_size2+1)

for word_id, position in enumerate(positions):
if debug and word_id == 0:
print "## -------------- word_id", word_id
print position.data.view(1, -1)
if gpu: position = position.cuda()
pos_embeds = self.position_embedding(position)
comb_embeds = torch.cat((embeds, pos_embeds), 1)
inputs = self.lstmformat2cnn(comb_embeds)
if debug:
if debug and word_id == 0:
print "## maxp1:", type(self.maxp1)
print "## maxp2:", type(self.maxp2)
print "## input:", type(inputs.data), inputs.data.size()
print inputs
print "## pos_embeds:", type(pos_embeds.data), pos_embeds.data.size()
print pos_embeds.data[:5]

c1 = self.conv1(inputs) # batch_size*out_channels*(sent_length-conv_width+1)
if debug:
if debug and word_id == 0:
print "## c1:", type(c1.data), c1.data.size()
p1 = self.maxp1(c1) # batch_size * out_channels * 1
if debug:
if debug and word_id == 0:
print "## p1:", type(p1.data), p1.data.size()

c2 = self.conv2(inputs)
if debug:
if debug and word_id == 0:
print "## c2:", type(c2.data), c2.data.size()
p2 = self.maxp2(c2)
if debug:
if debug and word_id == 0:
print "## p2:", type(p2.data), p2.data.size()

c1_embed_temp = self.cnnformat2lstm(p1)
c2_embed_temp = self.cnnformat2lstm(p2)
if debug:
if debug and word_id == 0:
print "## c1_embed_temp:", type(c1_embed_temp.data), c1_embed_temp.data.size()
print "## c2_embed_temp:", type(c2_embed_temp.data), c2_embed_temp.data.size()
if word_id == 0:
Expand All @@ -159,20 +170,18 @@ def forward(self, sentence, gpu):
else:
c1_embed = torch.cat((c1_embed, c1_embed_temp), 0)
c2_embed = torch.cat((c2_embed, c2_embed_temp), 0)
if debug:
print "## c1_embed:", type(c1_embed.data), c1_embed.data.size()
print c1_embed
print "## c2_embed:", type(c2_embed.data), c2_embed.data.size()
print c2_embed
#c1_embed.append(c1_embed_temp)
#c2_embed.append(c2_embed_temp)
#c1_embed = torch.cat(c1_embed, 0)
#c2_embed = torch.cat(c2_embed, 0)
if debug:
print "## c1_embed:", type(c1_embed.data), c1_embed.data.size()
print c1_embed.data[:5, :5]
print "## c2_embed:", type(c2_embed.data), c2_embed.data.size()
print c2_embed.data[:5, :5]

embeds = self.drop(embeds)
lstm_out, self.hidden = self.lstm(
embeds.view(len(sentence), 1, -1), self.hidden)
lstm_out = lstm_out.view(len(sentence), -1)
if debug:
print "## lstm out:", lstm_out.data[:10, :10]
hidden_in = lstm_out
if self.cnn_flag:
#c1_embed= c1_embed.expand(len(sentence), c1_embed.size()[1])
Expand All @@ -183,7 +192,6 @@ def forward(self, sentence, gpu):
hidden_snd = F.relu(hidden_snd)
tag_space = self.hidden2tag(hidden_snd)
tag_scores = F.log_softmax(tag_space)
#tag_scores = F.softmax(tag_space)
tag_space_iden = self.hidden2tag_iden(hidden_snd)
return tag_space, tag_scores, tag_space_iden


51 changes: 30 additions & 21 deletions src/trigger_lstm.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def eval_model(data, model, loss_function, data_flag, gpu):
loss_all = 0
gold_results = []
pred_results = []
for sent, tags, gold_triggers in data:
for sent, tags, gold_triggers in data[:100]:

sentence_in = arr2tensor(sent)
targets = arr2tensor(tags)
Expand Down Expand Up @@ -176,17 +176,18 @@ def parseArgs(args):

def main():

#training_data, test_data, vocab, tags_data, pretrain_embedding, model_path = load_data()
#model_path = model_path + "_" + time.strftime("%Y%m%d%H%M%S", time.gmtime()) + "_"
#if False:
# dev_sent_ids = random.sample(range(len(training_data)), 500)
# dev_data = [training_data[i] for i in dev_sent_ids]
# training_data = [training_data[i] for i in range(len(training_data)) if i not in dev_sent_ids]
#else:
# training_data = training_data[:-500]
# dev_data = training_data[-500:]

training_data, dev_data, test_data, vocab, tags_data, pretrain_embedding, model_path = load_data2()
if 1:
training_data, test_data, vocab, tags_data, pretrain_embedding, model_path = load_data()
model_path = model_path + "_" + time.strftime("%Y%m%d%H%M%S", time.gmtime()) + "_"
if False:
dev_sent_ids = random.sample(range(len(training_data)), 500)
dev_data = [training_data[i] for i in dev_sent_ids]
training_data = [training_data[i] for i in range(len(training_data)) if i not in dev_sent_ids]
else:
training_data = training_data[:-500]
dev_data = training_data[-500:]
else:
training_data, dev_data, test_data, vocab, tags_data, pretrain_embedding, model_path = load_data2()

vocab_size = len(vocab)
pretrain_vocab_size, pretrain_embed_dim = pretrain_embedding.shape
Expand Down Expand Up @@ -216,14 +217,14 @@ def main():
bilstm = True
num_layers = 1
iteration_num = 200
Hidden_dim = 300
Hidden_dim = 100
learning_rate = 0.05
Embedding_dim = pretrain_embed_dim

conv_width1 = 2
conv_width2 = 3
conv_filter_num = 300
hidden_dim_snd = 300
conv_filter_num = 100
hidden_dim_snd = 100
para_arr = [vocab_size, tagset_size, Embedding_dim, Hidden_dim]
para_arr.extend([dropout, bilstm, num_layers, gpu, iteration_num, learning_rate])
para_arr.extend([len(training_data), len(dev_data), len(test_data)])
Expand All @@ -235,18 +236,22 @@ def main():
#sys.exit(0)

# init model
#model = LSTMTrigger(pretrain_embedding, pretrain_embed_dim, Hidden_dim, vocab_size, tagset_size, dropout, bilstm, num_layers, random_dim, gpu, conv_width1, conv_width2, conv_filter_num, hidden_dim_snd)
model = LSTMTrigger(None, pretrain_embed_dim, Hidden_dim, vocab_size, tagset_size, dropout, bilstm, num_layers, random_dim, gpu, conv_width1, conv_width2, conv_filter_num, hidden_dim_snd)
model = LSTMTrigger(pretrain_embedding, pretrain_embed_dim, Hidden_dim, vocab_size, tagset_size, dropout, bilstm, num_layers, random_dim, gpu, conv_width1, conv_width2, conv_filter_num, hidden_dim_snd)
# do not use pretrain embedding
#model = LSTMTrigger(None, pretrain_embed_dim, Hidden_dim, vocab_size, tagset_size, dropout, bilstm, num_layers, random_dim, gpu, conv_width1, conv_width2, conv_filter_num, hidden_dim_snd)

#loss_function = nn.NLLLoss()
loss_function = nn.CrossEntropyLoss()
parameters = filter(lambda a:a.requires_grad, model.parameters())
optimizer = optim.SGD(parameters, lr=learning_rate)

#parameters = filter(lambda a:a.requires_grad, model.parameters())
optimizer = optim.SGD(model.parameters(), lr=learning_rate)
#optimizer = optim.Adadelta(parameters, lr=learning_rate)

# training
best_f1 = -1.0
for epoch in range(iteration_num):
for sent, tags, gold_triggers in training_data:
training_id = 0
for sent, tags, gold_triggers in training_data[:100]:
iden_tags = [1 if tag != 0 else tag for tag in tags]

model.zero_grad()
Expand All @@ -261,14 +266,18 @@ def main():
targets = targets.cuda()
iden_targets = iden_targets.cuda()

tag_space, tag_scores, tag_space_iden = model(sentence_in, gpu)
if training_id < 1:
tag_space, tag_scores, tag_space_iden = model(sentence_in, gpu, debug=True)
else:
tag_space, tag_scores, tag_space_iden = model(sentence_in, gpu)

#loss = loss_function(tag_scores, targets)
loss = loss_function(tag_space, targets) + loss_function(tag_space_iden, iden_targets)
#loss_iden = loss_function(tag_space_iden, iden_targets)
#loss += loss_iden
loss.backward()
optimizer.step()
training_id += 1

loss_train, prf_train, prf_train_iden = eval_model(training_data, model, loss_function, "train", gpu)
print "## train results on epoch:", epoch, Tab, loss_train, time.asctime(), Tab,
Expand Down

0 comments on commit 84eb147

Please sign in to comment.