|
21 | 21 | from rasa.core.constants import DEFAULT_POLICY_PRIORITY
|
22 | 22 | from rasa.core.trackers import DialogueStateTracker
|
23 | 23 | from rasa.utils import train_utils
|
24 |
| -from rasa.utils.tensorflow import tf_layers |
25 |
| -from rasa.utils.tensorflow.tf_models import RasaModel |
26 |
| -from rasa.utils.tensorflow.tf_model_data import RasaModelData, FeatureSignature |
| 24 | +from rasa.utils.tensorflow import layers |
| 25 | +from rasa.utils.tensorflow.transformer import TransformerEncoder |
| 26 | +from rasa.utils.tensorflow.models import RasaModel |
| 27 | +from rasa.utils.tensorflow.model_data import RasaModelData, FeatureSignature |
27 | 28 | from rasa.utils.tensorflow.constants import (
|
28 | 29 | HIDDEN_LAYERS_SIZES_LABEL,
|
29 | 30 | TRANSFORMER_SIZE,
|
|
40 | 41 | NUM_NEG,
|
41 | 42 | EVAL_NUM_EXAMPLES,
|
42 | 43 | EVAL_NUM_EPOCHS,
|
43 |
| - C_EMB, |
44 |
| - C2, |
| 44 | + NEG_MARGIN_SCALE, |
| 45 | + REGULARIZATION_CONSTANT, |
45 | 46 | SCALE_LOSS,
|
46 | 47 | USE_MAX_SIM_NEG,
|
47 | 48 | MU_NEG,
|
|
50 | 51 | HIDDEN_LAYERS_SIZES_DIALOGUE,
|
51 | 52 | DROPRATE_DIALOGUE,
|
52 | 53 | DROPRATE_LABEL,
|
| 54 | + DROPRATE_ATTENTION, |
| 55 | + KEY_RELATIVE_ATTENTION, |
| 56 | + VALUE_RELATIVE_ATTENTION, |
| 57 | + MAX_RELATIVE_POSITION, |
53 | 58 | )
|
54 | 59 |
|
55 | 60 |
|
@@ -114,20 +119,28 @@ class TEDPolicy(Policy):
|
114 | 119 | # scale loss inverse proportionally to confidence of correct prediction
|
115 | 120 | SCALE_LOSS: True,
|
116 | 121 | # regularization
|
117 |
| - # the scale of L2 regularization |
118 |
| - C2: 0.001, |
| 122 | + # the scale of regularization |
| 123 | + REGULARIZATION_CONSTANT: 0.001, |
119 | 124 | # the scale of how important is to minimize the maximum similarity
|
120 | 125 | # between embeddings of different labels
|
121 |
| - C_EMB: 0.8, |
| 126 | + NEG_MARGIN_SCALE: 0.8, |
122 | 127 | # dropout rate for dial nn
|
123 | 128 | DROPRATE_DIALOGUE: 0.1,
|
124 | 129 | # dropout rate for bot nn
|
125 | 130 | DROPRATE_LABEL: 0.0,
|
| 131 | + # dropout rate for attention |
| 132 | + DROPRATE_ATTENTION: 0, |
126 | 133 | # visualization of accuracy
|
127 | 134 | # how often calculate validation accuracy
|
128 | 135 | EVAL_NUM_EPOCHS: 20, # small values may hurt performance
|
129 | 136 | # how many examples to use for hold out validation set
|
130 | 137 | EVAL_NUM_EXAMPLES: 0, # large values may hurt performance
|
| 138 | + # if true use key relative embeddings in attention |
| 139 | + KEY_RELATIVE_ATTENTION: False, |
| 140 | + # if true use key relative embeddings in attention |
| 141 | + VALUE_RELATIVE_ATTENTION: False, |
| 142 | + # max position for relative embeddings |
| 143 | + MAX_RELATIVE_POSITION: None, |
131 | 144 | }
|
132 | 145 | # end default properties (DOC MARKER - don't remove)
|
133 | 146 |
|
@@ -471,50 +484,53 @@ def __init__(
|
471 | 484 | self._prepare_layers()
|
472 | 485 |
|
473 | 486 | def _prepare_layers(self) -> None:
|
474 |
| - self._tf_layers["loss.label"] = tf_layers.DotProductLoss( |
| 487 | + self._tf_layers["loss.label"] = layers.DotProductLoss( |
475 | 488 | self.config[NUM_NEG],
|
476 | 489 | self.config[LOSS_TYPE],
|
477 | 490 | self.config[MU_POS],
|
478 | 491 | self.config[MU_NEG],
|
479 | 492 | self.config[USE_MAX_SIM_NEG],
|
480 |
| - self.config[C_EMB], |
| 493 | + self.config[NEG_MARGIN_SCALE], |
481 | 494 | self.config[SCALE_LOSS],
|
482 | 495 | # set to 1 to get deterministic behaviour
|
483 | 496 | parallel_iterations=1 if self.random_seed is not None else 1000,
|
484 | 497 | )
|
485 |
| - self._tf_layers["ffnn.dialogue"] = tf_layers.Ffnn( |
| 498 | + self._tf_layers["ffnn.dialogue"] = layers.Ffnn( |
486 | 499 | self.config[HIDDEN_LAYERS_SIZES_DIALOGUE],
|
487 | 500 | self.config[DROPRATE_DIALOGUE],
|
488 |
| - self.config[C2], |
| 501 | + self.config[REGULARIZATION_CONSTANT], |
489 | 502 | layer_name_suffix="dialogue",
|
490 | 503 | )
|
491 |
| - self._tf_layers["ffnn.label"] = tf_layers.Ffnn( |
| 504 | + self._tf_layers["ffnn.label"] = layers.Ffnn( |
492 | 505 | self.config[HIDDEN_LAYERS_SIZES_LABEL],
|
493 | 506 | self.config[DROPRATE_LABEL],
|
494 |
| - self.config[C2], |
| 507 | + self.config[REGULARIZATION_CONSTANT], |
495 | 508 | layer_name_suffix="label",
|
496 | 509 | )
|
497 |
| - self._tf_layers["transformer"] = tf_layers.TransformerEncoder( |
| 510 | + self._tf_layers["transformer"] = TransformerEncoder( |
498 | 511 | self.config[NUM_TRANSFORMER_LAYERS],
|
499 | 512 | self.config[TRANSFORMER_SIZE],
|
500 | 513 | self.config[NUM_HEADS],
|
501 | 514 | self.config[TRANSFORMER_SIZE] * 4,
|
502 | 515 | self.config[MAX_SEQ_LENGTH],
|
503 |
| - self.config[C2], |
| 516 | + self.config[REGULARIZATION_CONSTANT], |
504 | 517 | dropout_rate=self.config[DROPRATE_DIALOGUE],
|
505 |
| - attention_dropout_rate=0, |
| 518 | + attention_dropout_rate=self.config[DROPRATE_ATTENTION], |
506 | 519 | unidirectional=True,
|
| 520 | + use_key_relative_position=self.config[KEY_RELATIVE_ATTENTION], |
| 521 | + use_value_relative_position=self.config[VALUE_RELATIVE_ATTENTION], |
| 522 | + max_relative_position=self.config[MAX_RELATIVE_POSITION], |
507 | 523 | name="dialogue_encoder",
|
508 | 524 | )
|
509 |
| - self._tf_layers["embed.dialogue"] = tf_layers.Embed( |
| 525 | + self._tf_layers["embed.dialogue"] = layers.Embed( |
510 | 526 | self.config[EMBED_DIM],
|
511 |
| - self.config[C2], |
| 527 | + self.config[REGULARIZATION_CONSTANT], |
512 | 528 | "dialogue",
|
513 | 529 | self.config[SIMILARITY_TYPE],
|
514 | 530 | )
|
515 |
| - self._tf_layers["embed.label"] = tf_layers.Embed( |
| 531 | + self._tf_layers["embed.label"] = layers.Embed( |
516 | 532 | self.config[EMBED_DIM],
|
517 |
| - self.config[C2], |
| 533 | + self.config[REGULARIZATION_CONSTANT], |
518 | 534 | "label",
|
519 | 535 | self.config[SIMILARITY_TYPE],
|
520 | 536 | )
|
|
0 commit comments