Add TextLSTM

lxdasi004 · Jan 26, 2019 · 5d46c89 · 5d46c89
1 parent 5ca4266
commit 5d46c89
Show file tree

Hide file tree

Showing 6 changed files with 165 additions and 6 deletions.
diff --git a/3-2.TextLSTM/TextLSTM-Tensor.py b/3-2.TextLSTM/TextLSTM-Tensor.py
@@ -0,0 +1,72 @@
+'''
+  code by Tae Hwan Jung(Jeff Jung) @graykode
+'''
+import tensorflow as tf
+import numpy as np
+
+tf.reset_default_graph()
+
+char_arr = [c for c in 'abcdefghijklmnopqrstuvwxyz']
+num_dic = {n: i for i, n in enumerate(char_arr)}
+n_class = len(num_dic) # number of class(=number of vocab)
+
+seq_data = ['make', 'need', 'coal', 'word', 'love', 'hate', 'live', 'home', 'hash', 'star']
+
+# TextLSTM Parameters
+n_step = 3
+n_hidden = 128
+
+def make_batch(seq_data):
+    input_batch, target_batch = [], []
+
+    for seq in seq_data:
+        input = [num_dic[n] for n in seq[:-1]] # 'm', 'a' , 'k' is input
+        target = num_dic[seq[-1]] # 'e' is target
+        input_batch.append(np.eye(n_class)[input])
+        target_batch.append(np.eye(n_class)[target])
+
+    return input_batch, target_batch
+
+# Model
+X = tf.placeholder(tf.float32, [None, n_step, n_class]) # [batch_size, n_step, n_class]
+Y = tf.placeholder(tf.float32, [None, n_class])         # [batch_size, n_class]
+
+W = tf.Variable(tf.random_normal([n_hidden, n_class]))
+b = tf.Variable(tf.random_normal([n_class]))
+
+cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden)
+outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)
+
+# outputs : [batch_size, n_step, n_hidden]
+outputs = tf.transpose(outputs, [1, 0, 2]) # [n_step, batch_size, n_hidden]
+outputs = outputs[-1] # [batch_size, n_hidden]
+model = tf.matmul(outputs, W) + b # model : [batch_size, n_class]
+
+cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=model, labels=Y))
+optimizer = tf.train.AdamOptimizer(0.001).minimize(cost)
+
+prediction = tf.cast(tf.argmax(model, 1), tf.int32)
+
+# Training
+init = tf.global_variables_initializer()
+sess = tf.Session()
+sess.run(init)
+
+input_batch, target_batch = make_batch(seq_data)
+
+for epoch in range(1000):
+    _, loss = sess.run([optimizer, cost], feed_dict={X: input_batch, Y: target_batch})
+    if (epoch + 1)%100 == 0:
+        print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))
+
+input = [sen[:3] for sen in seq_data]
+print(input)
+
+predict =  sess.run([prediction], feed_dict={X: input_batch})
+
+output = []
+for pre in [pre for pre in predict[0]]:
+    for key, value in num_dic.items():
+        if value == pre:
+            output.append(key)
+print(output)
diff --git a/3-2.TextLSTM/TextLSTM-Torch.py b/3-2.TextLSTM/TextLSTM-Torch.py
@@ -0,0 +1,84 @@
+'''
+  code by Tae Hwan Jung(Jeff Jung) @graykode
+'''
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.autograd import Variable
+
+dtype = torch.FloatTensor
+
+char_arr = [c for c in 'abcdefghijklmnopqrstuvwxyz']
+num_dic = {n: i for i, n in enumerate(char_arr)}
+n_class = len(num_dic) # number of class(=number of vocab)
+
+seq_data = ['make', 'need', 'coal', 'word', 'love', 'hate', 'live', 'home', 'hash', 'star']
+
+# TextLSTM Parameters
+n_step = 3
+n_hidden = 128
+
+def make_batch(seq_data):
+    input_batch, target_batch = [], []
+
+    for seq in seq_data:
+        input = [num_dic[n] for n in seq[:-1]] # 'm', 'a' , 'k' is input
+        target = num_dic[seq[-1]] # 'e' is target
+        input_batch.append(np.eye(n_class)[input])
+        target_batch.append(target)
+
+    return Variable(torch.Tensor(input_batch)), Variable(torch.LongTensor(target_batch))
+
+class TextLSTM(nn.Module):
+    def __init__(self):
+        super(TextLSTM, self).__init__()
+
+        self.lstm = nn.LSTM(input_size=n_class, hidden_size=n_hidden)
+        self.W = nn.Parameter(torch.randn([n_hidden, n_class]).type(dtype))
+        self.b = nn.Parameter(torch.randn([n_class]).type(dtype))
+
+    def forward(self, X):
+        input = X.transpose(0, 1)  # X : [n_step, batch_size, n_class]
+
+        hidden_state = Variable(torch.zeros(1, len(X), n_hidden))   # [num_layers(=1) * num_directions(=1), batch_size, n_hidden]
+        cell_state = Variable(torch.zeros(1, len(X), n_hidden))     # [num_layers(=1) * num_directions(=1), batch_size, n_hidden]
+
+        # final_hidden_state, final_cell_state : [num_layers(=1) * num_directions(=2), batch_size, n_hidden]
+        outputs, (final_hidden_state, final_cell_state) = self.lstm(input, (hidden_state, cell_state))
+        outputs = outputs[-1]  # [batch_size, n_hidden]
+        model = torch.mm(outputs, self.W) + self.b  # model : [batch_size, n_class]
+        return model
+
+input_batch, target_batch = make_batch(seq_data)
+
+model = TextLSTM()
+
+criterion = nn.CrossEntropyLoss()
+optimizer = optim.Adam(model.parameters(), lr=0.001)
+
+output = model(input_batch)
+
+# Training
+for epoch in range(1000):
+    optimizer.zero_grad()
+
+    output = model(input_batch)
+    loss = criterion(output, target_batch)
+    if (epoch + 1) % 100 == 0:
+        print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))
+
+    loss.backward()
+    optimizer.step()
+
+input = [sen[:3] for sen in seq_data]
+print(input)
+
+predict = model(input_batch).data.max(1, keepdim=True)[1]
+
+output = []
+for pre in [pre for pre in predict]:
+    for key, value in num_dic.items():
+        if value == pre:
+            output.append(key)
+print(output)
diff --git a/4-2.Attention/Attention-Tensor.py → ...q(Attention)/Seq2Seq(Attention)-Tensor.py b/4-2.Attention/Attention-Tensor.py → ...q(Attention)/Seq2Seq(Attention)-Tensor.py
diff --git a/4-2.Attention/Attention-Torch.py → ...eq(Attention)/Seq2Seq(Attention)-Torch.py b/4-2.Attention/Attention-Torch.py → ...eq(Attention)/Seq2Seq(Attention)-Torch.py
diff --git a/4-3.Bi-LSTM(Attention)/Bi-LSTM(Attention)-Torch.py b/4-3.Bi-LSTM(Attention)/Bi-LSTM(Attention)-Torch.py
@@ -60,7 +60,7 @@ def forward(self, X):
 
         # final_hidden_state, final_cell_state : [num_layers(=1) * num_directions(=2), batch_size, n_hidden]
         output, (final_hidden_state, final_cell_state) = self.lstm(input, (hidden_state, cell_state))
-        output = output.permute(1, 0, 2) # output : [batch_size, num_layers(=1) * num_directions(=2), n_hidden]
+        output = output.permute(1, 0, 2) # output : [batch_size, len_seq, n_hidden]
         attn_output = self.attention_net(output, final_hidden_state)
         return self.out(attn_output) # [batch_size, num_classes]
 

diff --git a/README.md b/README.md
@@ -25,7 +25,7 @@
 
 #### 2. CNN(Convolutional Neural Network)
 
-- 2-1. [TextCNN](https://github.com/graykode/nlp-tutorial/tree/master/2-1.TextCNN) - **POS/NEG Classification**
+- 2-1. [TextCNN](https://github.com/graykode/nlp-tutorial/tree/master/2-1.TextCNN) - **Binary Sentiment Classification**
   - Paper - [Convolutional Neural Networks for Sentence Classification(2014)](http://www.aclweb.org/anthology/D14-1181)
 - 2-2. DCNN(Dynamic Convolutional Neural Network)
 
@@ -38,6 +38,7 @@
 - 3-2. TextLSTM - **Autocomplete**
   - Paper - [LONG SHORT-TERM MEMORY(1997)](https://www.bioinf.jku.at/publications/older/2604.pdf)
 - 3-3. Bi-LSTM - **Sentences Classification**
+  - Paper - [Bidirectional LSTM-CRF Models for Sequence Tagging(2015)](https://arxiv.org/abs/1508.01991)
 
 
 
@@ -46,13 +47,15 @@
 - 4-1. [Sequence2Sequence](https://github.com/graykode/nlp-tutorial/tree/master/4-1.Seq2Seq) - **Change Word**
   - Paper - [Learning Phrase Representations using RNN Encoder–Decoder
     for Statistical Machine Translation(2014)](https://arxiv.org/pdf/1406.1078.pdf)
-- 4-2. [Attention Mechanism](https://github.com/graykode/nlp-tutorial/tree/master/4-2.Attention) - **Translate**
+- 4-2. [Seq2Seq with Attention](https://github.com/graykode/nlp-tutorial/tree/master/4-2.Attention) - **Translate**
   - Paper - [Neural Machine Translation by Jointly Learning to Align and Translate(2014)](https://arxiv.org/abs/1409.0473)
-- 4-3. Bi-LSTM with Attention - **POS/NEG Classification**
-- 4-4. The Transformer - **Translate**
+- 4-3. [Bi-LSTM with Attention](https://github.com/graykode/nlp-tutorial/tree/master/4-3.Bi-LSTM(Attention)) - **Binary Sentiment Classification**
+- 4-4. [The Transformer](https://github.com/graykode/nlp-tutorial/tree/master/4-4.Transformer) - **Translate**
   - Paper - [Attention Is All You Need(2017)](https://arxiv.org/abs/1706.03762)
 
-#### 6. New Trend Model
+
+
+#### 5. Model based on Transformer
 
 - 6-1. BERT