Skip to content

Commit d3b58fe

Browse files
committed
merge tf2
2 parents 1fd5cb6 + 24a8242 commit d3b58fe

File tree

16 files changed

+735
-392
lines changed

16 files changed

+735
-392
lines changed

docs/core/policies.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,15 @@ In order to get reproducible training results for the same inputs you can
192192
set the ``random_seed`` attribute of the ``KerasPolicy`` to any integer.
193193

194194

195+
.. _embedding_policy:
196+
197+
Embedding Policy
198+
^^^^^^^^^^^^^^^^
199+
200+
``EmbeddingPolicy`` got renamed to ``TEDPolicy``.
201+
Please take a look at :ref:`ted_policy` for more details.
202+
203+
195204
.. _ted_policy:
196205

197206
TED Policy

rasa/core/policies/embedding_policy.py

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,10 @@
88
from rasa.utils.tensorflow.constants import (
99
LABEL,
1010
HIDDEN_LAYERS_SIZES,
11+
TRANSFORMER_SIZE,
1112
NUM_TRANSFORMER_LAYERS,
13+
NUM_HEADS,
14+
MAX_SEQ_LENGTH,
1215
BATCH_SIZES,
1316
BATCH_STRATEGY,
1417
EPOCHS,
@@ -19,21 +22,22 @@
1922
NUM_NEG,
2023
EVAL_NUM_EXAMPLES,
2124
EVAL_NUM_EPOCHS,
22-
C_EMB,
23-
C2,
25+
NEG_MARGIN_SCALE,
26+
REGULARIZATION_CONSTANT,
2427
SCALE_LOSS,
2528
USE_MAX_SIM_NEG,
2629
MU_NEG,
2730
MU_POS,
2831
EMBED_DIM,
29-
TRANSFORMER_SIZE,
30-
MAX_SEQ_LENGTH,
31-
NUM_HEADS,
3232
DROPRATE_DIALOGUE,
3333
DROPRATE_LABEL,
34+
DROPRATE_ATTENTION,
35+
KEY_RELATIVE_ATTENTION,
36+
VALUE_RELATIVE_ATTENTION,
37+
MAX_RELATIVE_POSITION,
3438
)
3539
from rasa.utils.common import raise_warning
36-
from rasa.utils.tensorflow.tf_models import RasaModel
40+
from rasa.utils.tensorflow.models import RasaModel
3741

3842
logger = logging.getLogger(__name__)
3943

@@ -91,20 +95,28 @@ class EmbeddingPolicy(TEDPolicy):
9195
# scale loss inverse proportionally to confidence of correct prediction
9296
SCALE_LOSS: True,
9397
# regularization
94-
# the scale of L2 regularization
95-
C2: 0.001,
98+
# the scale of regularization
99+
REGULARIZATION_CONSTANT: 0.001,
96100
# the scale of how important is to minimize the maximum similarity
97101
# between embeddings of different labels
98-
C_EMB: 0.8,
102+
NEG_MARGIN_SCALE: 0.8,
99103
# dropout rate for dial nn
100104
DROPRATE_DIALOGUE: 0.1,
101105
# dropout rate for bot nn
102106
DROPRATE_LABEL: 0.0,
107+
# dropout rate for attention
108+
DROPRATE_ATTENTION: 0,
103109
# visualization of accuracy
104110
# how often calculate validation accuracy
105111
EVAL_NUM_EPOCHS: 20, # small values may hurt performance
106112
# how many examples to use for hold out validation set
107113
EVAL_NUM_EXAMPLES: 0, # large values may hurt performance
114+
# if true use key relative embeddings in attention
115+
KEY_RELATIVE_ATTENTION: False,
116+
# if true use key relative embeddings in attention
117+
VALUE_RELATIVE_ATTENTION: False,
118+
# max position for relative embeddings
119+
MAX_RELATIVE_POSITION: None,
108120
}
109121
# end default properties (DOC MARKER - don't remove)
110122

rasa/core/policies/ted_policy.py

Lines changed: 39 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,10 @@
2121
from rasa.core.constants import DEFAULT_POLICY_PRIORITY, DIALOGUE
2222
from rasa.core.trackers import DialogueStateTracker
2323
from rasa.utils import train_utils
24-
from rasa.utils.tensorflow import tf_layers
25-
from rasa.utils.tensorflow.tf_models import RasaModel
26-
from rasa.utils.tensorflow.tf_model_data import RasaModelData, FeatureSignature
24+
from rasa.utils.tensorflow import layers
25+
from rasa.utils.tensorflow.transformer import TransformerEncoder
26+
from rasa.utils.tensorflow.models import RasaModel
27+
from rasa.utils.tensorflow.model_data import RasaModelData, FeatureSignature
2728
from rasa.utils.tensorflow.constants import (
2829
LABEL,
2930
HIDDEN_LAYERS_SIZES,
@@ -41,15 +42,19 @@
4142
NUM_NEG,
4243
EVAL_NUM_EXAMPLES,
4344
EVAL_NUM_EPOCHS,
44-
C_EMB,
45-
C2,
45+
NEG_MARGIN_SCALE,
46+
REGULARIZATION_CONSTANT,
4647
SCALE_LOSS,
4748
USE_MAX_SIM_NEG,
4849
MU_NEG,
4950
MU_POS,
5051
EMBED_DIM,
5152
DROPRATE_DIALOGUE,
5253
DROPRATE_LABEL,
54+
DROPRATE_ATTENTION,
55+
KEY_RELATIVE_ATTENTION,
56+
VALUE_RELATIVE_ATTENTION,
57+
MAX_RELATIVE_POSITION,
5358
)
5459

5560

@@ -111,20 +116,28 @@ class TEDPolicy(Policy):
111116
# scale loss inverse proportionally to confidence of correct prediction
112117
SCALE_LOSS: True,
113118
# regularization
114-
# the scale of L2 regularization
115-
C2: 0.001,
119+
# the scale of regularization
120+
REGULARIZATION_CONSTANT: 0.001,
116121
# the scale of how important is to minimize the maximum similarity
117122
# between embeddings of different labels
118-
C_EMB: 0.8,
123+
NEG_MARGIN_SCALE: 0.8,
119124
# dropout rate for dial nn
120125
DROPRATE_DIALOGUE: 0.1,
121126
# dropout rate for bot nn
122127
DROPRATE_LABEL: 0.0,
128+
# dropout rate for attention
129+
DROPRATE_ATTENTION: 0,
123130
# visualization of accuracy
124131
# how often calculate validation accuracy
125132
EVAL_NUM_EPOCHS: 20, # small values may hurt performance
126133
# how many examples to use for hold out validation set
127134
EVAL_NUM_EXAMPLES: 0, # large values may hurt performance
135+
# if true use key relative embeddings in attention
136+
KEY_RELATIVE_ATTENTION: False,
137+
# if true use key relative embeddings in attention
138+
VALUE_RELATIVE_ATTENTION: False,
139+
# max position for relative embeddings
140+
MAX_RELATIVE_POSITION: None,
128141
}
129142
# end default properties (DOC MARKER - don't remove)
130143

@@ -246,8 +259,6 @@ def train(
246259
) -> None:
247260
"""Train the policy on given training trackers."""
248261

249-
logger.debug("Started training embedding policy.")
250-
251262
# set numpy random seed
252263
np.random.seed(self.config[RANDOM_SEED])
253264

@@ -268,8 +279,8 @@ def train(
268279
model_data = self._create_model_data(training_data.X, training_data.y)
269280
if model_data.is_empty():
270281
logger.error(
271-
"Can not train TED policy. No data was provided. "
272-
"Skipping training of the policy."
282+
f"Can not train '{self.__class__.__name__}'. No data was provided. "
283+
f"Skipping training of the policy."
273284
)
274285
return
275286

@@ -488,50 +499,53 @@ def _check_data(self) -> None:
488499
)
489500

490501
def _prepare_layers(self) -> None:
491-
self._tf_layers["loss.label"] = tf_layers.DotProductLoss(
502+
self._tf_layers["loss.label"] = layers.DotProductLoss(
492503
self.config[NUM_NEG],
493504
self.config[LOSS_TYPE],
494505
self.config[MU_POS],
495506
self.config[MU_NEG],
496507
self.config[USE_MAX_SIM_NEG],
497-
self.config[C_EMB],
508+
self.config[NEG_MARGIN_SCALE],
498509
self.config[SCALE_LOSS],
499510
# set to 1 to get deterministic behaviour
500511
parallel_iterations=1 if self.random_seed is not None else 1000,
501512
)
502-
self._tf_layers["ffnn.dialogue"] = tf_layers.Ffnn(
513+
self._tf_layers["ffnn.dialogue"] = layers.Ffnn(
503514
self.config[HIDDEN_LAYERS_SIZES][DIALOGUE],
504515
self.config[DROPRATE_DIALOGUE],
505-
self.config[C2],
516+
self.config[REGULARIZATION_CONSTANT],
506517
layer_name_suffix=DIALOGUE,
507518
)
508-
self._tf_layers["ffnn.label"] = tf_layers.Ffnn(
519+
self._tf_layers["ffnn.label"] = layers.Ffnn(
509520
self.config[HIDDEN_LAYERS_SIZES][LABEL],
510521
self.config[DROPRATE_LABEL],
511-
self.config[C2],
522+
self.config[REGULARIZATION_CONSTANT],
512523
layer_name_suffix=LABEL,
513524
)
514-
self._tf_layers["transformer"] = tf_layers.TransformerEncoder(
525+
self._tf_layers["transformer"] = TransformerEncoder(
515526
self.config[NUM_TRANSFORMER_LAYERS],
516527
self.config[TRANSFORMER_SIZE],
517528
self.config[NUM_HEADS],
518529
self.config[TRANSFORMER_SIZE] * 4,
519530
self.config[MAX_SEQ_LENGTH],
520-
self.config[C2],
531+
self.config[REGULARIZATION_CONSTANT],
521532
dropout_rate=self.config[DROPRATE_DIALOGUE],
522-
attention_dropout_rate=0,
533+
attention_dropout_rate=self.config[DROPRATE_ATTENTION],
523534
unidirectional=True,
535+
use_key_relative_position=self.config[KEY_RELATIVE_ATTENTION],
536+
use_value_relative_position=self.config[VALUE_RELATIVE_ATTENTION],
537+
max_relative_position=self.config[MAX_RELATIVE_POSITION],
524538
name=DIALOGUE + "_encoder",
525539
)
526-
self._tf_layers["embed.dialogue"] = tf_layers.Embed(
540+
self._tf_layers["embed.dialogue"] = layers.Embed(
527541
self.config[EMBED_DIM],
528-
self.config[C2],
542+
self.config[REGULARIZATION_CONSTANT],
529543
DIALOGUE,
530544
self.config[SIMILARITY_TYPE],
531545
)
532-
self._tf_layers["embed.label"] = tf_layers.Embed(
546+
self._tf_layers["embed.label"] = layers.Embed(
533547
self.config[EMBED_DIM],
534-
self.config[C2],
548+
self.config[REGULARIZATION_CONSTANT],
535549
LABEL,
536550
self.config[SIMILARITY_TYPE],
537551
)

0 commit comments

Comments
 (0)