21
21
from rasa .core .constants import DEFAULT_POLICY_PRIORITY , DIALOGUE
22
22
from rasa .core .trackers import DialogueStateTracker
23
23
from rasa .utils import train_utils
24
- from rasa .utils .tensorflow import tf_layers
25
- from rasa .utils .tensorflow .tf_models import RasaModel
26
- from rasa .utils .tensorflow .tf_model_data import RasaModelData , FeatureSignature
24
+ from rasa .utils .tensorflow import layers
25
+ from rasa .utils .tensorflow .transformer import TransformerEncoder
26
+ from rasa .utils .tensorflow .models import RasaModel
27
+ from rasa .utils .tensorflow .model_data import RasaModelData , FeatureSignature
27
28
from rasa .utils .tensorflow .constants import (
28
29
LABEL ,
29
30
HIDDEN_LAYERS_SIZES ,
41
42
NUM_NEG ,
42
43
EVAL_NUM_EXAMPLES ,
43
44
EVAL_NUM_EPOCHS ,
44
- C_EMB ,
45
- C2 ,
45
+ NEG_MARGIN_SCALE ,
46
+ REGULARIZATION_CONSTANT ,
46
47
SCALE_LOSS ,
47
48
USE_MAX_SIM_NEG ,
48
49
MU_NEG ,
49
50
MU_POS ,
50
51
EMBED_DIM ,
51
52
DROPRATE_DIALOGUE ,
52
53
DROPRATE_LABEL ,
54
+ DROPRATE_ATTENTION ,
55
+ KEY_RELATIVE_ATTENTION ,
56
+ VALUE_RELATIVE_ATTENTION ,
57
+ MAX_RELATIVE_POSITION ,
53
58
)
54
59
55
60
@@ -111,20 +116,28 @@ class TEDPolicy(Policy):
111
116
# scale loss inverse proportionally to confidence of correct prediction
112
117
SCALE_LOSS : True ,
113
118
# regularization
114
- # the scale of L2 regularization
115
- C2 : 0.001 ,
119
+ # the scale of regularization
120
+ REGULARIZATION_CONSTANT : 0.001 ,
116
121
# the scale of how important is to minimize the maximum similarity
117
122
# between embeddings of different labels
118
- C_EMB : 0.8 ,
123
+ NEG_MARGIN_SCALE : 0.8 ,
119
124
# dropout rate for dial nn
120
125
DROPRATE_DIALOGUE : 0.1 ,
121
126
# dropout rate for bot nn
122
127
DROPRATE_LABEL : 0.0 ,
128
+ # dropout rate for attention
129
+ DROPRATE_ATTENTION : 0 ,
123
130
# visualization of accuracy
124
131
# how often calculate validation accuracy
125
132
EVAL_NUM_EPOCHS : 20 , # small values may hurt performance
126
133
# how many examples to use for hold out validation set
127
134
EVAL_NUM_EXAMPLES : 0 , # large values may hurt performance
135
+ # if true use key relative embeddings in attention
136
+ KEY_RELATIVE_ATTENTION : False ,
137
+ # if true use key relative embeddings in attention
138
+ VALUE_RELATIVE_ATTENTION : False ,
139
+ # max position for relative embeddings
140
+ MAX_RELATIVE_POSITION : None ,
128
141
}
129
142
# end default properties (DOC MARKER - don't remove)
130
143
@@ -246,8 +259,6 @@ def train(
246
259
) -> None :
247
260
"""Train the policy on given training trackers."""
248
261
249
- logger .debug ("Started training embedding policy." )
250
-
251
262
# set numpy random seed
252
263
np .random .seed (self .config [RANDOM_SEED ])
253
264
@@ -268,8 +279,8 @@ def train(
268
279
model_data = self ._create_model_data (training_data .X , training_data .y )
269
280
if model_data .is_empty ():
270
281
logger .error (
271
- "Can not train TED policy . No data was provided. "
272
- "Skipping training of the policy."
282
+ f "Can not train ' { self . __class__ . __name__ } ' . No data was provided. "
283
+ f "Skipping training of the policy."
273
284
)
274
285
return
275
286
@@ -488,50 +499,53 @@ def _check_data(self) -> None:
488
499
)
489
500
490
501
def _prepare_layers (self ) -> None :
491
- self ._tf_layers ["loss.label" ] = tf_layers .DotProductLoss (
502
+ self ._tf_layers ["loss.label" ] = layers .DotProductLoss (
492
503
self .config [NUM_NEG ],
493
504
self .config [LOSS_TYPE ],
494
505
self .config [MU_POS ],
495
506
self .config [MU_NEG ],
496
507
self .config [USE_MAX_SIM_NEG ],
497
- self .config [C_EMB ],
508
+ self .config [NEG_MARGIN_SCALE ],
498
509
self .config [SCALE_LOSS ],
499
510
# set to 1 to get deterministic behaviour
500
511
parallel_iterations = 1 if self .random_seed is not None else 1000 ,
501
512
)
502
- self ._tf_layers ["ffnn.dialogue" ] = tf_layers .Ffnn (
513
+ self ._tf_layers ["ffnn.dialogue" ] = layers .Ffnn (
503
514
self .config [HIDDEN_LAYERS_SIZES ][DIALOGUE ],
504
515
self .config [DROPRATE_DIALOGUE ],
505
- self .config [C2 ],
516
+ self .config [REGULARIZATION_CONSTANT ],
506
517
layer_name_suffix = DIALOGUE ,
507
518
)
508
- self ._tf_layers ["ffnn.label" ] = tf_layers .Ffnn (
519
+ self ._tf_layers ["ffnn.label" ] = layers .Ffnn (
509
520
self .config [HIDDEN_LAYERS_SIZES ][LABEL ],
510
521
self .config [DROPRATE_LABEL ],
511
- self .config [C2 ],
522
+ self .config [REGULARIZATION_CONSTANT ],
512
523
layer_name_suffix = LABEL ,
513
524
)
514
- self ._tf_layers ["transformer" ] = tf_layers . TransformerEncoder (
525
+ self ._tf_layers ["transformer" ] = TransformerEncoder (
515
526
self .config [NUM_TRANSFORMER_LAYERS ],
516
527
self .config [TRANSFORMER_SIZE ],
517
528
self .config [NUM_HEADS ],
518
529
self .config [TRANSFORMER_SIZE ] * 4 ,
519
530
self .config [MAX_SEQ_LENGTH ],
520
- self .config [C2 ],
531
+ self .config [REGULARIZATION_CONSTANT ],
521
532
dropout_rate = self .config [DROPRATE_DIALOGUE ],
522
- attention_dropout_rate = 0 ,
533
+ attention_dropout_rate = self . config [ DROPRATE_ATTENTION ] ,
523
534
unidirectional = True ,
535
+ use_key_relative_position = self .config [KEY_RELATIVE_ATTENTION ],
536
+ use_value_relative_position = self .config [VALUE_RELATIVE_ATTENTION ],
537
+ max_relative_position = self .config [MAX_RELATIVE_POSITION ],
524
538
name = DIALOGUE + "_encoder" ,
525
539
)
526
- self ._tf_layers ["embed.dialogue" ] = tf_layers .Embed (
540
+ self ._tf_layers ["embed.dialogue" ] = layers .Embed (
527
541
self .config [EMBED_DIM ],
528
- self .config [C2 ],
542
+ self .config [REGULARIZATION_CONSTANT ],
529
543
DIALOGUE ,
530
544
self .config [SIMILARITY_TYPE ],
531
545
)
532
- self ._tf_layers ["embed.label" ] = tf_layers .Embed (
546
+ self ._tf_layers ["embed.label" ] = layers .Embed (
533
547
self .config [EMBED_DIM ],
534
- self .config [C2 ],
548
+ self .config [REGULARIZATION_CONSTANT ],
535
549
LABEL ,
536
550
self .config [SIMILARITY_TYPE ],
537
551
)
0 commit comments