forked from byzhang/Zoph_RNN
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathLSTM.h
108 lines (79 loc) · 2.7 KB
/
LSTM.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
//The LSTM file that contains all the info for the LSTM that is needed for forward and backward propagation for gradient calculations
#ifndef LSTM_IH_H
#define LSTM_IH_H
#include <Eigen/Dense>
#include "Eigen_Util.h"
#include "model.h"
//Forward declaration
template<typename dType>
class neuralMT_model;
template<typename dType>
class Input_To_Hidden_Layer;
template<typename dType>
class LSTM_IH_Node {
public:
//Pointer to the model struct, so it can access all of the weight matrices
Input_To_Hidden_Layer<precision> *model;
//--------------------------------------------------GPU parameters------------------------------------
int minibatch_size;
int LSTM_size;
int index;
bool dropout;
dType dropout_rate;
dType *d_dropout_mask;
bool attention_model = false; //this will only be true for the upper layer on the target side of the LSTM
bool feed_input = false;
bool multi_attention = false;
//host pointers
dType *h_o_t;
dType *h_c_t;
dType *h_d_ERRt_ht;
int *h_input_vocab_indices_01;
int *h_input_vocab_indices;
dType *h_f_t;
dType *h_c_t_prev;
dType *h_c_prime_t_tanh;
dType *h_i_t;
dType *h_h_t_prev;
dType *h_sparse_lookup;
dType *h_h_t;
//device pointers
dType *d_d_ERRnTOtp1_ht;
dType *d_d_ERRnTOtp1_ct;
dType *d_d_ERRt_ht;
dType *d_o_t;
dType *d_c_t;
int *d_input_vocab_indices_01;
int *d_input_vocab_indices;
dType *d_f_t;
dType *d_c_t_prev;
dType *d_c_prime_t_tanh;
dType *d_i_t;
dType *d_h_t_prev;
dType *d_sparse_lookup;
dType *d_h_t;
dType *d_zeros; //points to a zero matrix that can be used for d_ERRt_ht in backprop
dType *d_ERRnTOt_h_tild;
dType *d_ERRnTOt_h_tild_cpy;
dType *d_h_tild;
dType *d_bi_dir_ht; //the pointer to send h_t to the bi_directional layer
//Constructor
LSTM_IH_Node(int LSTM_size,int minibatch_size,int vocab_size,struct Input_To_Hidden_Layer<dType> *m,int index,dType *d_zero_ptr,bool dropout,
dType dropout_rate);
void init_LSTM_GPU(int LSTM_size,int minibatch_size,int vocab_size,struct Input_To_Hidden_Layer<dType> *m);
void update_vectors_forward_GPU(int *d_input_vocab_indices,int *d_input_vocab_indices_01,
dType *d_h_t_prev,dType *d_c_t_prev);
//Compute the forward values for the LSTM node
//This is after the node has recieved the previous hidden and cell state values
void forward_prop();
void forward_prop_GPU();
void back_prop_GPU(int index);
//Update the gradient matrices
void compute_gradients_GPU();
void backprop_prep_GPU(dType *d_d_ERRnTOtp1_ht,dType *d_d_ERRnTOtp1_ct);//,dType *d_d_ERRt_ht);
void update_vectors_forward_decoder(int *d_input_vocab_indices,int *d_input_vocab_indices_01);
void dump_LSTM(std::ofstream &LSTM_dump_stream,std::string intro);
void send_h_t_above();
void attention_extra();
};
#endif