Merge pull request RasaHQ#5636 from RasaHQ/input-dropout

Ghostvv · web-flow · commit cd30622032c6 · 2020-04-21T22:16:05.000+02:00
Input dropout
diff --git a/changelog/5635.improvement.rst b/changelog/5635.improvement.rst
@@ -0,0 +1 @@
+Update dependencies based on the ``dependabot`` check.
diff --git a/changelog/5636.improvement.rst b/changelog/5636.improvement.rst
@@ -1 +1,2 @@
-Update dependencies based on the ``dependabot`` check.
+Add dropout between ``FFNN`` and ``DenseForSparse`` layers in ``DIETClassifier``,
+``ResponseSelector`` and ``EmbeddingIntentClassifier`` controlled by ``use_dense_input_dropout`` config parameter.
diff --git a/docs/nlu/components.rst b/docs/nlu/components.rst
@@ -922,7 +922,9 @@ EmbeddingIntentClassifier
          | drop_rate                       | 0.2              | Dropout rate for encoder. Value should be between 0 and 1.   |
          |                                 |                  | The higher the value the higher the regularization effect.   |
          +---------------------------------+------------------+--------------------------------------------------------------+
-         | use_sparse_input_dropout        | True             | If 'True' apply dropout to sparse tensors.                   |
+         | use_sparse_input_dropout        | False            | If 'True' apply dropout to sparse input tensors.             |
+         +---------------------------------+------------------+--------------------------------------------------------------+
+         | use_dense_input_dropout         | False            | If 'True' apply dropout to dense input tensors.              |
          +---------------------------------+------------------+--------------------------------------------------------------+
          | evaluate_every_number_of_epochs | 20               | How often to calculate validation accuracy.                  |
          |                                 |                  | Set to '-1' to evaluate just once at the end of training.    |
@@ -1486,7 +1488,9 @@ ResponseSelector
          | drop_rate_attention             | 0.0               | Dropout rate for attention. Value should be between 0 and 1. |
          |                                 |                   | The higher the value the higher the regularization effect.   |
          +---------------------------------+-------------------+--------------------------------------------------------------+
-         | use_sparse_input_dropout        | False             | If 'True' apply dropout to sparse tensors.                   |
+         | use_sparse_input_dropout        | False             | If 'True' apply dropout to sparse input tensors.             |
+         +---------------------------------+-------------------+--------------------------------------------------------------+
+         | use_dense_input_dropout         | False             | If 'True' apply dropout to dense input tensors.              |
          +---------------------------------+-------------------+--------------------------------------------------------------+
          | evaluate_every_number_of_epochs | 20                | How often to calculate validation accuracy.                  |
          |                                 |                   | Set to '-1' to evaluate just once at the end of training.    |
@@ -1715,7 +1719,9 @@ DIETClassifier
          | drop_rate_attention             | 0.0              | Dropout rate for attention. Value should be between 0 and 1. |
          |                                 |                  | The higher the value the higher the regularization effect.   |
          +---------------------------------+------------------+--------------------------------------------------------------+
-         | use_sparse_input_dropout        | True             | If 'True' apply dropout to sparse tensors.                   |
+         | use_sparse_input_dropout        | True             | If 'True' apply dropout to sparse input tensors.             |
+         +---------------------------------+------------------+--------------------------------------------------------------+
+         | use_dense_input_dropout         | True             | If 'True' apply dropout to dense input tensors.              |
          +---------------------------------+------------------+--------------------------------------------------------------+
          | evaluate_every_number_of_epochs | 20               | How often to calculate validation accuracy.                  |
          |                                 |                  | Set to '-1' to evaluate just once at the end of training.    |
diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py
@@ -55,6 +55,7 @@
     SIMILARITY_TYPE,
     NUM_NEG,
     SPARSE_INPUT_DROPOUT,
+    DENSE_INPUT_DROPOUT,
     MASKED_LM,
     ENTITY_RECOGNITION,
     TENSORBOARD_LOG_DIR,
@@ -188,8 +189,10 @@ def required_components(cls) -> List[Type[Component]]:
         DROP_RATE_ATTENTION: 0,
         # Sparsity of the weights in dense layers
         WEIGHT_SPARSITY: 0.8,
-        # If 'True' apply dropout to sparse tensors
+        # If 'True' apply dropout to sparse input tensors
         SPARSE_INPUT_DROPOUT: True,
+        # If 'True' apply dropout to dense input tensors
+        DENSE_INPUT_DROPOUT: True,
         # ## Evaluation parameters
         # How often calculate validation accuracy.
         # Small values may hurt performance, e.g. model accuracy.
@@ -1075,7 +1078,10 @@ def _prepare_sparse_dense_layers(
                 )
 
     def _prepare_input_layers(self, name: Text) -> None:
-        self._tf_layers[f"sparse_dropout.{name}"] = layers.SparseDropout(
+        self._tf_layers[f"sparse_input_dropout.{name}"] = layers.SparseDropout(
+            rate=self.config[DROP_RATE]
+        )
+        self._tf_layers[f"dense_input_dropout.{name}"] = tf.keras.layers.Dropout(
             rate=self.config[DROP_RATE]
         )
         self._prepare_sparse_dense_layers(
@@ -1172,21 +1178,30 @@ def _combine_sparse_dense_features(
         mask: tf.Tensor,
         name: Text,
         sparse_dropout: bool = False,
+        dense_dropout: bool = False,
     ) -> tf.Tensor:
 
         dense_features = []
 
         for f in features:
             if isinstance(f, tf.SparseTensor):
                 if sparse_dropout:
-                    _f = self._tf_layers[f"sparse_dropout.{name}"](f, self._training)
+                    _f = self._tf_layers[f"sparse_input_dropout.{name}"](
+                        f, self._training
+                    )
                 else:
                     _f = f
                 dense_features.append(self._tf_layers[f"sparse_to_dense.{name}"](_f))
             else:
                 dense_features.append(f)
 
-        return tf.concat(dense_features, axis=-1) * mask
+        outputs = tf.concat(dense_features, axis=-1) * mask
+        if dense_dropout:
+            outputs = self._tf_layers[f"dense_input_dropout.{name}"](
+                outputs, self._training
+            )
+
+        return outputs
 
     def _features_as_seq_ids(
         self, features: List[Union[np.ndarray, tf.Tensor, tf.SparseTensor]], name: Text
@@ -1213,9 +1228,12 @@ def _create_bow(
         mask: tf.Tensor,
         name: Text,
         sparse_dropout: bool = False,
+        dense_dropout: bool = False,
     ) -> tf.Tensor:
 
-        x = self._combine_sparse_dense_features(features, mask, name, sparse_dropout)
+        x = self._combine_sparse_dense_features(
+            features, mask, name, sparse_dropout, dense_dropout
+        )
         x = tf.reduce_sum(x, axis=1)  # convert to bag-of-words
         return self._tf_layers[f"ffnn.{name}"](x, self._training)
 
@@ -1224,6 +1242,8 @@ def _create_sequence(
         features: List[Union[tf.Tensor, tf.SparseTensor]],
         mask: tf.Tensor,
         name: Text,
+        sparse_dropout: bool = False,
+        dense_dropout: bool = False,
         masked_lm_loss: bool = False,
         sequence_ids: bool = False,
     ) -> Tuple[tf.Tensor, tf.Tensor, Optional[tf.Tensor], Optional[tf.Tensor]]:
@@ -1233,7 +1253,7 @@ def _create_sequence(
             seq_ids = None
 
         inputs = self._combine_sparse_dense_features(
-            features, mask, name, sparse_dropout=self.config[SPARSE_INPUT_DROPOUT]
+            features, mask, name, sparse_dropout, dense_dropout,
         )
 
         inputs = self._tf_layers[f"ffnn.{name}"](inputs, self._training)
@@ -1387,7 +1407,9 @@ def batch_loss(
             tf_batch_data[TEXT_FEATURES],
             mask_text,
             self.text_name,
-            self.config[MASKED_LM],
+            sparse_dropout=self.config[SPARSE_INPUT_DROPOUT],
+            dense_dropout=self.config[DENSE_INPUT_DROPOUT],
+            masked_lm_loss=self.config[MASKED_LM],
             sequence_ids=True,
         )
 
diff --git a/rasa/nlu/classifiers/embedding_intent_classifier.py b/rasa/nlu/classifiers/embedding_intent_classifier.py
@@ -22,6 +22,7 @@
     SIMILARITY_TYPE,
     NUM_NEG,
     SPARSE_INPUT_DROPOUT,
+    DENSE_INPUT_DROPOUT,
     MASKED_LM,
     ENTITY_RECOGNITION,
     INTENT_CLASSIFICATION,
@@ -127,6 +128,8 @@ def required_components(cls) -> List[Type[Component]]:
         WEIGHT_SPARSITY: 0.0,
         # If 'True' apply dropout to sparse tensors
         SPARSE_INPUT_DROPOUT: False,
+        # If 'True' apply dropout to dense input tensors
+        DENSE_INPUT_DROPOUT: False,
         # ## Evaluation parameters
         # How often calculate validation accuracy.
         # Small values may hurt performance, e.g. model accuracy.
diff --git a/rasa/nlu/selectors/response_selector.py b/rasa/nlu/selectors/response_selector.py
@@ -39,6 +39,7 @@
     SIMILARITY_TYPE,
     NUM_NEG,
     SPARSE_INPUT_DROPOUT,
+    DENSE_INPUT_DROPOUT,
     MASKED_LM,
     ENTITY_RECOGNITION,
     INTENT_CLASSIFICATION,
@@ -179,8 +180,10 @@ def required_components(cls) -> List[Type[Component]]:
         DROP_RATE: 0.2,
         # Dropout rate for attention
         DROP_RATE_ATTENTION: 0,
-        # If 'True' apply dropout to sparse tensors
+        # If 'True' apply dropout to sparse input tensors
         SPARSE_INPUT_DROPOUT: False,
+        # If 'True' apply dropout to dense input tensors
+        DENSE_INPUT_DROPOUT: False,
         # ## Evaluation parameters
         # How often calculate validation accuracy.
         # Small values may hurt performance, e.g. model accuracy.
@@ -467,7 +470,9 @@ def batch_loss(
             tf_batch_data[TEXT_FEATURES],
             mask_text,
             self.text_name,
-            self.config[MASKED_LM],
+            sparse_dropout=self.config[SPARSE_INPUT_DROPOUT],
+            dense_dropout=self.config[DENSE_INPUT_DROPOUT],
+            masked_lm_loss=self.config[MASKED_LM],
             sequence_ids=True,
         )
 
diff --git a/rasa/utils/tensorflow/constants.py b/rasa/utils/tensorflow/constants.py
@@ -46,6 +46,7 @@
 MASKED_LM = "use_masked_language_model"
 
 SPARSE_INPUT_DROPOUT = "use_sparse_input_dropout"
+DENSE_INPUT_DROPOUT = "use_dense_input_dropout"
 
 RANKING_LENGTH = "ranking_length"
 

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	+Update dependencies based on the ``dependabot`` check.
Original file line number	Diff line number	Diff line change
`@@ -1 +1,2 @@`
`1`		-Update dependencies based on the ``dependabot`` check.
	`1`	+Add dropout between ``FFNN`` and ``DenseForSparse`` layers in ``DIETClassifier``,
	`2`	+``ResponseSelector`` and ``EmbeddingIntentClassifier`` controlled by ``use_dense_input_dropout`` config parameter.