Skip to content

Commit

Permalink
Add TextLSTM
Browse files Browse the repository at this point in the history
  • Loading branch information
graykode committed Jan 26, 2019
1 parent 5ca4266 commit 5d46c89
Show file tree
Hide file tree
Showing 6 changed files with 165 additions and 6 deletions.
72 changes: 72 additions & 0 deletions 3-2.TextLSTM/TextLSTM-Tensor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
'''
code by Tae Hwan Jung(Jeff Jung) @graykode
'''
import tensorflow as tf
import numpy as np

tf.reset_default_graph()

char_arr = [c for c in 'abcdefghijklmnopqrstuvwxyz']
num_dic = {n: i for i, n in enumerate(char_arr)}
n_class = len(num_dic) # number of class(=number of vocab)

seq_data = ['make', 'need', 'coal', 'word', 'love', 'hate', 'live', 'home', 'hash', 'star']

# TextLSTM Parameters
n_step = 3
n_hidden = 128

def make_batch(seq_data):
input_batch, target_batch = [], []

for seq in seq_data:
input = [num_dic[n] for n in seq[:-1]] # 'm', 'a' , 'k' is input
target = num_dic[seq[-1]] # 'e' is target
input_batch.append(np.eye(n_class)[input])
target_batch.append(np.eye(n_class)[target])

return input_batch, target_batch

# Model
X = tf.placeholder(tf.float32, [None, n_step, n_class]) # [batch_size, n_step, n_class]
Y = tf.placeholder(tf.float32, [None, n_class]) # [batch_size, n_class]

W = tf.Variable(tf.random_normal([n_hidden, n_class]))
b = tf.Variable(tf.random_normal([n_class]))

cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden)
outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)

# outputs : [batch_size, n_step, n_hidden]
outputs = tf.transpose(outputs, [1, 0, 2]) # [n_step, batch_size, n_hidden]
outputs = outputs[-1] # [batch_size, n_hidden]
model = tf.matmul(outputs, W) + b # model : [batch_size, n_class]

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=model, labels=Y))
optimizer = tf.train.AdamOptimizer(0.001).minimize(cost)

prediction = tf.cast(tf.argmax(model, 1), tf.int32)

# Training
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

input_batch, target_batch = make_batch(seq_data)

for epoch in range(1000):
_, loss = sess.run([optimizer, cost], feed_dict={X: input_batch, Y: target_batch})
if (epoch + 1)%100 == 0:
print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))

input = [sen[:3] for sen in seq_data]
print(input)

predict = sess.run([prediction], feed_dict={X: input_batch})

output = []
for pre in [pre for pre in predict[0]]:
for key, value in num_dic.items():
if value == pre:
output.append(key)
print(output)
84 changes: 84 additions & 0 deletions 3-2.TextLSTM/TextLSTM-Torch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
'''
code by Tae Hwan Jung(Jeff Jung) @graykode
'''
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable

dtype = torch.FloatTensor

char_arr = [c for c in 'abcdefghijklmnopqrstuvwxyz']
num_dic = {n: i for i, n in enumerate(char_arr)}
n_class = len(num_dic) # number of class(=number of vocab)

seq_data = ['make', 'need', 'coal', 'word', 'love', 'hate', 'live', 'home', 'hash', 'star']

# TextLSTM Parameters
n_step = 3
n_hidden = 128

def make_batch(seq_data):
input_batch, target_batch = [], []

for seq in seq_data:
input = [num_dic[n] for n in seq[:-1]] # 'm', 'a' , 'k' is input
target = num_dic[seq[-1]] # 'e' is target
input_batch.append(np.eye(n_class)[input])
target_batch.append(target)

return Variable(torch.Tensor(input_batch)), Variable(torch.LongTensor(target_batch))

class TextLSTM(nn.Module):
def __init__(self):
super(TextLSTM, self).__init__()

self.lstm = nn.LSTM(input_size=n_class, hidden_size=n_hidden)
self.W = nn.Parameter(torch.randn([n_hidden, n_class]).type(dtype))
self.b = nn.Parameter(torch.randn([n_class]).type(dtype))

def forward(self, X):
input = X.transpose(0, 1) # X : [n_step, batch_size, n_class]

hidden_state = Variable(torch.zeros(1, len(X), n_hidden)) # [num_layers(=1) * num_directions(=1), batch_size, n_hidden]
cell_state = Variable(torch.zeros(1, len(X), n_hidden)) # [num_layers(=1) * num_directions(=1), batch_size, n_hidden]

# final_hidden_state, final_cell_state : [num_layers(=1) * num_directions(=2), batch_size, n_hidden]
outputs, (final_hidden_state, final_cell_state) = self.lstm(input, (hidden_state, cell_state))
outputs = outputs[-1] # [batch_size, n_hidden]
model = torch.mm(outputs, self.W) + self.b # model : [batch_size, n_class]
return model

input_batch, target_batch = make_batch(seq_data)

model = TextLSTM()

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

output = model(input_batch)

# Training
for epoch in range(1000):
optimizer.zero_grad()

output = model(input_batch)
loss = criterion(output, target_batch)
if (epoch + 1) % 100 == 0:
print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))

loss.backward()
optimizer.step()

input = [sen[:3] for sen in seq_data]
print(input)

predict = model(input_batch).data.max(1, keepdim=True)[1]

output = []
for pre in [pre for pre in predict]:
for key, value in num_dic.items():
if value == pre:
output.append(key)
print(output)
File renamed without changes.
File renamed without changes.
2 changes: 1 addition & 1 deletion 4-3.Bi-LSTM(Attention)/Bi-LSTM(Attention)-Torch.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def forward(self, X):

# final_hidden_state, final_cell_state : [num_layers(=1) * num_directions(=2), batch_size, n_hidden]
output, (final_hidden_state, final_cell_state) = self.lstm(input, (hidden_state, cell_state))
output = output.permute(1, 0, 2) # output : [batch_size, num_layers(=1) * num_directions(=2), n_hidden]
output = output.permute(1, 0, 2) # output : [batch_size, len_seq, n_hidden]
attn_output = self.attention_net(output, final_hidden_state)
return self.out(attn_output) # [batch_size, num_classes]

Expand Down
13 changes: 8 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

#### 2. CNN(Convolutional Neural Network)

- 2-1. [TextCNN](https://github.com/graykode/nlp-tutorial/tree/master/2-1.TextCNN) - **POS/NEG Classification**
- 2-1. [TextCNN](https://github.com/graykode/nlp-tutorial/tree/master/2-1.TextCNN) - **Binary Sentiment Classification**
- Paper - [Convolutional Neural Networks for Sentence Classification(2014)](http://www.aclweb.org/anthology/D14-1181)
- 2-2. DCNN(Dynamic Convolutional Neural Network)

Expand All @@ -38,6 +38,7 @@
- 3-2. TextLSTM - **Autocomplete**
- Paper - [LONG SHORT-TERM MEMORY(1997)](https://www.bioinf.jku.at/publications/older/2604.pdf)
- 3-3. Bi-LSTM - **Sentences Classification**
- Paper - [Bidirectional LSTM-CRF Models for Sequence Tagging(2015)](https://arxiv.org/abs/1508.01991)



Expand All @@ -46,13 +47,15 @@
- 4-1. [Sequence2Sequence](https://github.com/graykode/nlp-tutorial/tree/master/4-1.Seq2Seq) - **Change Word**
- Paper - [Learning Phrase Representations using RNN Encoder–Decoder
for Statistical Machine Translation(2014)](https://arxiv.org/pdf/1406.1078.pdf)
- 4-2. [Attention Mechanism](https://github.com/graykode/nlp-tutorial/tree/master/4-2.Attention) - **Translate**
- 4-2. [Seq2Seq with Attention](https://github.com/graykode/nlp-tutorial/tree/master/4-2.Attention) - **Translate**
- Paper - [Neural Machine Translation by Jointly Learning to Align and Translate(2014)](https://arxiv.org/abs/1409.0473)
- 4-3. Bi-LSTM with Attention - **POS/NEG Classification**
- 4-4. The Transformer - **Translate**
- 4-3. [Bi-LSTM with Attention](https://github.com/graykode/nlp-tutorial/tree/master/4-3.Bi-LSTM(Attention)) - **Binary Sentiment Classification**
- 4-4. [The Transformer](https://github.com/graykode/nlp-tutorial/tree/master/4-4.Transformer) - **Translate**
- Paper - [Attention Is All You Need(2017)](https://arxiv.org/abs/1706.03762)

#### 6. New Trend Model


#### 5. Model based on Transformer

- 6-1. BERT

Expand Down

0 comments on commit 5d46c89

Please sign in to comment.