Skip to content

Commit

Permalink
viterbi
Browse files Browse the repository at this point in the history
  • Loading branch information
HMJW committed Jun 9, 2020
1 parent 8479668 commit 44edccc
Show file tree
Hide file tree
Showing 5 changed files with 62 additions and 88 deletions.
2 changes: 1 addition & 1 deletion config.ini
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,6 @@ lr = 1e-3
clip = 5.0

[Run]
batch_size = 128
batch_size = 1
epochs = 50
patience = 10
4 changes: 2 additions & 2 deletions run.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@
subparser = subcommand.add_subparser(name, subparsers)
subparser.add_argument('--conf', '-c', default='config.ini',
help='path to config file')
subparser.add_argument('--model', '-m', default='exp/fullptb_eval_loss_and_acc/model.char',
subparser.add_argument('--model', '-m', default='exp/HMM/model.char',
help='path to model file')
subparser.add_argument('--vocab', '-v', default='exp/fullptb_eval_loss_and_acc/vocab.char',
subparser.add_argument('--vocab', '-v', default='exp/HMM/vocab.char',
help='path to vocab file')
subparser.add_argument('--device', '-d', default='-1',
help='ID of GPU to use')
Expand Down
17 changes: 5 additions & 12 deletions tagger/cmds/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ def __call__(self, config):
if config.preprocess or not os.path.exists(config.vocab):
vocab = Vocab.from_corpus(corpus=train, min_freq=1)
vocab.collect(corpus=train, min_freq=1)
vocab.read_embeddings(Embedding.load(config.fembed, config.unk))
torch.save(vocab, config.vocab)
else:
vocab = torch.load(config.vocab)
Expand All @@ -55,26 +54,24 @@ def __call__(self, config):

print("Load the dataset")
trainset = TextDataset(vocab.numericalize(train))
devset = TextDataset(vocab.numericalize(dev))
testset = TextDataset(vocab.numericalize(test))
# set the data loaders
train_loader = batchify(trainset, config.batch_size, True)
dev_loader = batchify(devset, config.batch_size)
test_loader = batchify(testset, config.batch_size)
test_loader = batchify(testset, config.batch_size)
print(f"{'train:':6} {len(trainset):5} sentences, {train.nwords} words in total, "
f"{len(train_loader):3} batches provided")
print(f"{'dev:':6} {len(devset):5} sentences, {dev.nwords} words in total, "
f"{len(dev_loader):3} batches provided")
print(f"{'test:':6} {len(testset):5} sentences, {test.nwords} words in total, "
f"{len(test_loader):3} batches provided")

print("Create the model")
tagger = Tagger(config, vocab.embed).to(config.device)
tagger = Tagger(config).to(config.device)
print(f"{tagger}\n")

optimizer = Adam(tagger.parameters(), config.lr)
model = Model(config, vocab, tagger, optimizer)

test_metric = model.evaluate(test_loader)
print(f"{'test:':6} {test_metric}")
exit()
total_time = timedelta()
best_e, best_metric = 1, SpanF1Method(vocab)
last_loss, count = 0, 0
Expand All @@ -87,10 +84,6 @@ def __call__(self, config):
print(f"Epoch {epoch} / {config.epochs}:")
loss, train_metric = model.evaluate(train_loader)
print(f"{'train:':6} Loss: {loss:.4f} {train_metric}")
# loss, dev_metric = model.evaluate(dev_loader)
# print(f"{'dev:':6} Loss: {loss:.4f} {dev_metric}")
# loss, test_metric = model.evaluate(test_loader)
# print(f"{'test:':6} Loss: {loss:.4f} {test_metric}")

t = datetime.now() - start
# save the model if it is the best so far
Expand Down
16 changes: 5 additions & 11 deletions tagger/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,25 +41,19 @@ def train(self, loader):
def evaluate(self, loader):
self.tagger.eval()

loss, metric = 0, AccuracyMethod()
metric = AccuracyMethod()

for words, chars, labels, possible_labels in loader:
mask = words.ne(self.vocab.pad_index)
lens = mask.sum(dim=1)
targets = torch.split(labels[mask], lens.tolist())

s_emit = self.tagger(words, chars)
logZ = self.tagger.crf.get_logZ(s_emit, mask)

s_emit[~possible_labels] -= 100000
possible_logZ = self.tagger.crf.get_logZ(s_emit, mask)
loss += (logZ - possible_logZ) * words.size(0)
predicts = self.tagger.crf.viterbi(s_emit, mask)

s_emit = self.tagger(words)
s_emit[~possible_labels] = 0
predicts = self.tagger.viterbi(s_emit, mask)
metric(predicts, targets)
loss /= len(loader)

return float(loss), metric
return metric

@torch.no_grad()
def predict(self, loader):
Expand Down
111 changes: 49 additions & 62 deletions tagger/tagger.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,81 +10,69 @@

class Tagger(nn.Module):

def __init__(self, config, embed):
def __init__(self, config):
super(Tagger, self).__init__()

self.config = config
# the embedding layer
self.pretrained = nn.Embedding.from_pretrained(embed)
self.word_embed = nn.Embedding(num_embeddings=config.n_words,
embedding_dim=config.n_embed)
# the char-lstm layer
self.char_lstm = CHAR_LSTM(n_chars=config.n_chars,
n_embed=config.n_char_embed,
n_out=config.n_char_out)
self.embed_dropout = nn.Dropout(p=config.embed_dropout)

# the word-lstm layer
self.lstm = nn.LSTM(input_size=config.n_embed + config.n_char_out,
hidden_size=config.n_lstm_hidden,
batch_first=True,
bidirectional=True)

# the MLP layers
self.hid = nn.Linear(config.n_lstm_hidden * 2, config.n_lstm_hidden)
self.activation = nn.Tanh()
self.out = nn.Linear(config.n_lstm_hidden, config.n_labels)

# CRF layer
self.crf = CRF(config.n_labels)

self.pad_index = config.pad_index
self.unk_index = config.unk_index
self.n_tags = config.n_labels
self.n_words = config.n_words
self.trans = nn.Parameter(torch.Tensor(config.n_labels, config.n_labels))
self.emits = nn.Parameter(torch.Tensor(config.n_labels, config.n_words))
self.strans = nn.Parameter(torch.Tensor(config.n_labels))
self.etrans = nn.Parameter(torch.Tensor(config.n_labels))

self.reset_parameters()

def extra_repr(self):
info = f"n_tags={self.n_tags}, n_words={self.n_words}"

return info

def reset_parameters(self):
# init Linear
nn.init.xavier_uniform_(self.hid.weight)
nn.init.xavier_uniform_(self.out.weight)
# init word emb
nn.init.zeros_(self.word_embed.weight)
nn.init.constant_(self.trans, 1 / self.n_tags)
nn.init.constant_(self.strans, 1 / self.n_tags)
nn.init.constant_(self.emits, 1 / self.n_tags)
nn.init.constant_(self.trans, 1 / self.n_words)

def forward(self, words, chars):
def forward(self, words):
# get the mask and lengths of given batch
mask = words.ne(self.pad_index)
lens = mask.sum(dim=1)
# set the indices larger than num_embeddings to unk_index
ext_mask = words.ge(self.word_embed.num_embeddings)
ext_words = words.masked_fill(ext_mask, self.unk_index)

# get outputs from embedding layers
word_embed = self.pretrained(words) + self.word_embed(ext_words)
char_embed = self.char_lstm(chars[mask])
char_embed = pad_sequence(torch.split(char_embed, lens.tolist()), True)

# concatenate the word and char representations
x = torch.cat((word_embed, char_embed), dim=-1)
x = self.embed_dropout(x)

sorted_lens, indices = torch.sort(lens, descending=True)
inverse_indices = indices.argsort()
x = pack_padded_sequence(x[indices], sorted_lens, True)
x, _ = self.lstm(x)
x, _ = pad_packed_sequence(x, True)
x = x[inverse_indices]

x = self.hid(x)
x = self.activation(x)
x = self.out(x)

return x
batch_size = words.size(0)
x = self.emits.unsqueeze(0).repeat(batch_size,1,1).gather(-1, words.unsqueeze(1).repeat(1,self.n_tags,1))

return x.transpose(1, 2)

def viterbi(self, emit, mask):
emit, mask = emit.transpose(0, 1), mask.t()
T, B, N = emit.shape
lens = mask.sum(dim=0)
delta = emit.new_zeros(T, B, N)
paths = emit.new_zeros(T, B, N, dtype=torch.long)

delta[0] = torch.log(self.strans) + torch.log(emit[0]) # [B, N]
for i in range(1, T):
trans_i = self.trans.unsqueeze(0) # [1, N, N]
emit_i = emit[i].unsqueeze(1) # [B, 1, N]
scores = torch.log(trans_i) + torch.log(emit_i) + delta[i - 1].unsqueeze(2) # [B, N, N]
delta[i], paths[i] = torch.max(scores, dim=1)

predicts = []
for i, length in enumerate(lens):
prev = torch.argmax(delta[length - 1, i] + self.etrans)

predict = [prev]
for j in reversed(range(1, length)):
prev = paths[j, i, prev]
predict.append(prev)
# flip the predicted sequence before appending it to the list
predicts.append(paths.new_tensor(predict).flip(0))

return predicts

@classmethod
def load(cls, fname):
device = 'cuda' if torch.cuda.is_available() else 'cpu'
state = torch.load(fname, map_location=device)
parser = cls(state['config'], state['embed'])
parser = cls(state['config'])
parser.load_state_dict(state['state_dict'])
parser.to(device)

Expand All @@ -93,7 +81,6 @@ def load(cls, fname):
def save(self, fname):
state = {
'config': self.config,
'embed': self.pretrained.weight,
'state_dict': self.state_dict()
}
torch.save(state, fname)

0 comments on commit 44edccc

Please sign in to comment.