@@ -94,13 +94,13 @@ class ResponseSelector(DIETClassifier):
94
94
# sizes of hidden layers before the embedding layer
95
95
# for input words and responses
96
96
# the number of hidden layers is thus equal to the length of this list
97
- HIDDEN_LAYERS_SIZES : {TEXT : [], LABEL : []},
97
+ HIDDEN_LAYERS_SIZES : {TEXT : [256 , 128 ], LABEL : [256 , 128 ]},
98
98
# Whether to share the hidden layer weights between input words and intent labels
99
99
SHARE_HIDDEN_LAYERS : False ,
100
100
# number of units in transformer
101
- TRANSFORMER_SIZE : 256 ,
101
+ TRANSFORMER_SIZE : None ,
102
102
# number of transformer layers
103
- NUM_TRANSFORMER_LAYERS : 2 ,
103
+ NUM_TRANSFORMER_LAYERS : 0 ,
104
104
# number of attention heads in transformer
105
105
NUM_HEADS : 4 ,
106
106
# max sequence length if pos_encoding='emb'
@@ -154,7 +154,7 @@ class ResponseSelector(DIETClassifier):
154
154
# use a unidirectional or bidirectional encoder
155
155
UNIDIRECTIONAL_ENCODER : False ,
156
156
# if true apply dropout to sparse tensors
157
- SPARSE_INPUT_DROPOUT : True ,
157
+ SPARSE_INPUT_DROPOUT : False ,
158
158
# visualization of accuracy
159
159
# how often to calculate training accuracy
160
160
EVAL_NUM_EPOCHS : 20 , # small values may hurt performance
0 commit comments