Skip to content

Commit cd30622

Browse files
authored
Merge pull request RasaHQ#5636 from RasaHQ/input-dropout
Input dropout
2 parents 0f4c2ea + 0c60176 commit cd30622

File tree

7 files changed

+52
-13
lines changed

7 files changed

+52
-13
lines changed

changelog/5635.improvement.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Update dependencies based on the ``dependabot`` check.

changelog/5636.improvement.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1-
Update dependencies based on the ``dependabot`` check.
1+
Add dropout between ``FFNN`` and ``DenseForSparse`` layers in ``DIETClassifier``,
2+
``ResponseSelector`` and ``EmbeddingIntentClassifier`` controlled by ``use_dense_input_dropout`` config parameter.

docs/nlu/components.rst

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -922,7 +922,9 @@ EmbeddingIntentClassifier
922922
| drop_rate | 0.2 | Dropout rate for encoder. Value should be between 0 and 1. |
923923
| | | The higher the value the higher the regularization effect. |
924924
+---------------------------------+------------------+--------------------------------------------------------------+
925-
| use_sparse_input_dropout | True | If 'True' apply dropout to sparse tensors. |
925+
| use_sparse_input_dropout | False | If 'True' apply dropout to sparse input tensors. |
926+
+---------------------------------+------------------+--------------------------------------------------------------+
927+
| use_dense_input_dropout | False | If 'True' apply dropout to dense input tensors. |
926928
+---------------------------------+------------------+--------------------------------------------------------------+
927929
| evaluate_every_number_of_epochs | 20 | How often to calculate validation accuracy. |
928930
| | | Set to '-1' to evaluate just once at the end of training. |
@@ -1486,7 +1488,9 @@ ResponseSelector
14861488
| drop_rate_attention | 0.0 | Dropout rate for attention. Value should be between 0 and 1. |
14871489
| | | The higher the value the higher the regularization effect. |
14881490
+---------------------------------+-------------------+--------------------------------------------------------------+
1489-
| use_sparse_input_dropout | False | If 'True' apply dropout to sparse tensors. |
1491+
| use_sparse_input_dropout | False | If 'True' apply dropout to sparse input tensors. |
1492+
+---------------------------------+-------------------+--------------------------------------------------------------+
1493+
| use_dense_input_dropout | False | If 'True' apply dropout to dense input tensors. |
14901494
+---------------------------------+-------------------+--------------------------------------------------------------+
14911495
| evaluate_every_number_of_epochs | 20 | How often to calculate validation accuracy. |
14921496
| | | Set to '-1' to evaluate just once at the end of training. |
@@ -1715,7 +1719,9 @@ DIETClassifier
17151719
| drop_rate_attention | 0.0 | Dropout rate for attention. Value should be between 0 and 1. |
17161720
| | | The higher the value the higher the regularization effect. |
17171721
+---------------------------------+------------------+--------------------------------------------------------------+
1718-
| use_sparse_input_dropout | True | If 'True' apply dropout to sparse tensors. |
1722+
| use_sparse_input_dropout | True | If 'True' apply dropout to sparse input tensors. |
1723+
+---------------------------------+------------------+--------------------------------------------------------------+
1724+
| use_dense_input_dropout | True | If 'True' apply dropout to dense input tensors. |
17191725
+---------------------------------+------------------+--------------------------------------------------------------+
17201726
| evaluate_every_number_of_epochs | 20 | How often to calculate validation accuracy. |
17211727
| | | Set to '-1' to evaluate just once at the end of training. |

rasa/nlu/classifiers/diet_classifier.py

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
SIMILARITY_TYPE,
5656
NUM_NEG,
5757
SPARSE_INPUT_DROPOUT,
58+
DENSE_INPUT_DROPOUT,
5859
MASKED_LM,
5960
ENTITY_RECOGNITION,
6061
TENSORBOARD_LOG_DIR,
@@ -188,8 +189,10 @@ def required_components(cls) -> List[Type[Component]]:
188189
DROP_RATE_ATTENTION: 0,
189190
# Sparsity of the weights in dense layers
190191
WEIGHT_SPARSITY: 0.8,
191-
# If 'True' apply dropout to sparse tensors
192+
# If 'True' apply dropout to sparse input tensors
192193
SPARSE_INPUT_DROPOUT: True,
194+
# If 'True' apply dropout to dense input tensors
195+
DENSE_INPUT_DROPOUT: True,
193196
# ## Evaluation parameters
194197
# How often calculate validation accuracy.
195198
# Small values may hurt performance, e.g. model accuracy.
@@ -1075,7 +1078,10 @@ def _prepare_sparse_dense_layers(
10751078
)
10761079

10771080
def _prepare_input_layers(self, name: Text) -> None:
1078-
self._tf_layers[f"sparse_dropout.{name}"] = layers.SparseDropout(
1081+
self._tf_layers[f"sparse_input_dropout.{name}"] = layers.SparseDropout(
1082+
rate=self.config[DROP_RATE]
1083+
)
1084+
self._tf_layers[f"dense_input_dropout.{name}"] = tf.keras.layers.Dropout(
10791085
rate=self.config[DROP_RATE]
10801086
)
10811087
self._prepare_sparse_dense_layers(
@@ -1172,21 +1178,30 @@ def _combine_sparse_dense_features(
11721178
mask: tf.Tensor,
11731179
name: Text,
11741180
sparse_dropout: bool = False,
1181+
dense_dropout: bool = False,
11751182
) -> tf.Tensor:
11761183

11771184
dense_features = []
11781185

11791186
for f in features:
11801187
if isinstance(f, tf.SparseTensor):
11811188
if sparse_dropout:
1182-
_f = self._tf_layers[f"sparse_dropout.{name}"](f, self._training)
1189+
_f = self._tf_layers[f"sparse_input_dropout.{name}"](
1190+
f, self._training
1191+
)
11831192
else:
11841193
_f = f
11851194
dense_features.append(self._tf_layers[f"sparse_to_dense.{name}"](_f))
11861195
else:
11871196
dense_features.append(f)
11881197

1189-
return tf.concat(dense_features, axis=-1) * mask
1198+
outputs = tf.concat(dense_features, axis=-1) * mask
1199+
if dense_dropout:
1200+
outputs = self._tf_layers[f"dense_input_dropout.{name}"](
1201+
outputs, self._training
1202+
)
1203+
1204+
return outputs
11901205

11911206
def _features_as_seq_ids(
11921207
self, features: List[Union[np.ndarray, tf.Tensor, tf.SparseTensor]], name: Text
@@ -1213,9 +1228,12 @@ def _create_bow(
12131228
mask: tf.Tensor,
12141229
name: Text,
12151230
sparse_dropout: bool = False,
1231+
dense_dropout: bool = False,
12161232
) -> tf.Tensor:
12171233

1218-
x = self._combine_sparse_dense_features(features, mask, name, sparse_dropout)
1234+
x = self._combine_sparse_dense_features(
1235+
features, mask, name, sparse_dropout, dense_dropout
1236+
)
12191237
x = tf.reduce_sum(x, axis=1) # convert to bag-of-words
12201238
return self._tf_layers[f"ffnn.{name}"](x, self._training)
12211239

@@ -1224,6 +1242,8 @@ def _create_sequence(
12241242
features: List[Union[tf.Tensor, tf.SparseTensor]],
12251243
mask: tf.Tensor,
12261244
name: Text,
1245+
sparse_dropout: bool = False,
1246+
dense_dropout: bool = False,
12271247
masked_lm_loss: bool = False,
12281248
sequence_ids: bool = False,
12291249
) -> Tuple[tf.Tensor, tf.Tensor, Optional[tf.Tensor], Optional[tf.Tensor]]:
@@ -1233,7 +1253,7 @@ def _create_sequence(
12331253
seq_ids = None
12341254

12351255
inputs = self._combine_sparse_dense_features(
1236-
features, mask, name, sparse_dropout=self.config[SPARSE_INPUT_DROPOUT]
1256+
features, mask, name, sparse_dropout, dense_dropout,
12371257
)
12381258

12391259
inputs = self._tf_layers[f"ffnn.{name}"](inputs, self._training)
@@ -1387,7 +1407,9 @@ def batch_loss(
13871407
tf_batch_data[TEXT_FEATURES],
13881408
mask_text,
13891409
self.text_name,
1390-
self.config[MASKED_LM],
1410+
sparse_dropout=self.config[SPARSE_INPUT_DROPOUT],
1411+
dense_dropout=self.config[DENSE_INPUT_DROPOUT],
1412+
masked_lm_loss=self.config[MASKED_LM],
13911413
sequence_ids=True,
13921414
)
13931415

rasa/nlu/classifiers/embedding_intent_classifier.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
SIMILARITY_TYPE,
2323
NUM_NEG,
2424
SPARSE_INPUT_DROPOUT,
25+
DENSE_INPUT_DROPOUT,
2526
MASKED_LM,
2627
ENTITY_RECOGNITION,
2728
INTENT_CLASSIFICATION,
@@ -127,6 +128,8 @@ def required_components(cls) -> List[Type[Component]]:
127128
WEIGHT_SPARSITY: 0.0,
128129
# If 'True' apply dropout to sparse tensors
129130
SPARSE_INPUT_DROPOUT: False,
131+
# If 'True' apply dropout to dense input tensors
132+
DENSE_INPUT_DROPOUT: False,
130133
# ## Evaluation parameters
131134
# How often calculate validation accuracy.
132135
# Small values may hurt performance, e.g. model accuracy.

rasa/nlu/selectors/response_selector.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
SIMILARITY_TYPE,
4040
NUM_NEG,
4141
SPARSE_INPUT_DROPOUT,
42+
DENSE_INPUT_DROPOUT,
4243
MASKED_LM,
4344
ENTITY_RECOGNITION,
4445
INTENT_CLASSIFICATION,
@@ -179,8 +180,10 @@ def required_components(cls) -> List[Type[Component]]:
179180
DROP_RATE: 0.2,
180181
# Dropout rate for attention
181182
DROP_RATE_ATTENTION: 0,
182-
# If 'True' apply dropout to sparse tensors
183+
# If 'True' apply dropout to sparse input tensors
183184
SPARSE_INPUT_DROPOUT: False,
185+
# If 'True' apply dropout to dense input tensors
186+
DENSE_INPUT_DROPOUT: False,
184187
# ## Evaluation parameters
185188
# How often calculate validation accuracy.
186189
# Small values may hurt performance, e.g. model accuracy.
@@ -467,7 +470,9 @@ def batch_loss(
467470
tf_batch_data[TEXT_FEATURES],
468471
mask_text,
469472
self.text_name,
470-
self.config[MASKED_LM],
473+
sparse_dropout=self.config[SPARSE_INPUT_DROPOUT],
474+
dense_dropout=self.config[DENSE_INPUT_DROPOUT],
475+
masked_lm_loss=self.config[MASKED_LM],
471476
sequence_ids=True,
472477
)
473478

rasa/utils/tensorflow/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
MASKED_LM = "use_masked_language_model"
4747

4848
SPARSE_INPUT_DROPOUT = "use_sparse_input_dropout"
49+
DENSE_INPUT_DROPOUT = "use_dense_input_dropout"
4950

5051
RANKING_LENGTH = "ranking_length"
5152

0 commit comments

Comments
 (0)