4
4
from collections import Counter , OrderedDict
5
5
from copy import deepcopy
6
6
from os .path import relpath
7
- from typing import Any , Dict , List , Optional , Set , Text , Tuple
7
+ from typing import Any , Dict , List , Optional , Set , Text , Tuple , Callable
8
8
9
9
import rasa .nlu .utils
10
10
from rasa .utils .common import raise_warning , lazy_property
11
- from rasa .nlu .constants import RESPONSE , RESPONSE_KEY_ATTRIBUTE
11
+ from rasa .nlu .constants import ENTITIES , INTENT , RESPONSE , RESPONSE_KEY_ATTRIBUTE
12
12
from rasa .nlu .training_data .message import Message
13
13
from rasa .nlu .training_data .util import check_duplicate_synonym
14
14
from rasa .nlu .utils import list_to_str
@@ -75,21 +75,35 @@ def merge(self, *others: "TrainingData") -> "TrainingData":
75
75
nlg_stories ,
76
76
)
77
77
78
- def filter_by_intent (self , intent : Text ):
79
- """Filter training examples """
78
+ def filter_training_examples (
79
+ self , condition : Callable [[Message ], bool ]
80
+ ) -> "TrainingData" :
81
+ """Filter training examples.
80
82
81
- training_examples = []
82
- for ex in self .training_examples :
83
- if ex .get ("intent" ) == intent :
84
- training_examples .append (ex )
83
+ Args:
84
+ condition: A function that will be applied to filter training examples.
85
+
86
+ Returns:
87
+ TrainingData: A TrainingData with filtered training examples.
88
+ """
85
89
86
90
return TrainingData (
87
- training_examples ,
91
+ list ( filter ( condition , self . training_examples )) ,
88
92
self .entity_synonyms ,
89
93
self .regex_features ,
90
94
self .lookup_tables ,
91
95
)
92
96
97
+ def filter_by_intent (self , intent : Text ) -> "TrainingData" :
98
+ """Filter training examples."""
99
+ raise_warning (
100
+ "The `filter_by_intent` function is deprecated. "
101
+ "Please use `filter_training_examples` instead." ,
102
+ DeprecationWarning ,
103
+ stacklevel = 2 ,
104
+ )
105
+ return self .filter_training_examples (lambda ex : intent == ex .get (INTENT ))
106
+
93
107
def __hash__ (self ) -> int :
94
108
from rasa .core import utils as core_utils
95
109
@@ -105,49 +119,49 @@ def sanitize_examples(examples: List[Message]) -> List[Message]:
105
119
Remove trailing whitespaces from intent and response annotations and drop duplicate examples."""
106
120
107
121
for ex in examples :
108
- if ex .get ("intent" ):
109
- ex .set ("intent" , ex .get ("intent" ).strip ())
122
+ if ex .get (INTENT ):
123
+ ex .set (INTENT , ex .get (INTENT ).strip ())
110
124
111
- if ex .get ("response" ):
112
- ex .set ("response" , ex .get ("response" ).strip ())
125
+ if ex .get (RESPONSE ):
126
+ ex .set (RESPONSE , ex .get (RESPONSE ).strip ())
113
127
114
128
return list (OrderedDict .fromkeys (examples ))
115
129
116
130
@lazy_property
117
131
def intent_examples (self ) -> List [Message ]:
118
- return [ex for ex in self .training_examples if ex .get ("intent" )]
132
+ return [ex for ex in self .training_examples if ex .get (INTENT )]
119
133
120
134
@lazy_property
121
135
def response_examples (self ) -> List [Message ]:
122
- return [ex for ex in self .training_examples if ex .get ("response" )]
136
+ return [ex for ex in self .training_examples if ex .get (RESPONSE )]
123
137
124
138
@lazy_property
125
139
def entity_examples (self ) -> List [Message ]:
126
- return [ex for ex in self .training_examples if ex .get ("entities" )]
140
+ return [ex for ex in self .training_examples if ex .get (ENTITIES )]
127
141
128
142
@lazy_property
129
143
def intents (self ) -> Set [Text ]:
130
144
"""Returns the set of intents in the training data."""
131
- return {ex .get ("intent" ) for ex in self .training_examples } - {None }
145
+ return {ex .get (INTENT ) for ex in self .training_examples } - {None }
132
146
133
147
@lazy_property
134
148
def responses (self ) -> Set [Text ]:
135
149
"""Returns the set of responses in the training data."""
136
- return {ex .get ("response" ) for ex in self .training_examples } - {None }
150
+ return {ex .get (RESPONSE ) for ex in self .training_examples } - {None }
137
151
138
152
@lazy_property
139
153
def retrieval_intents (self ) -> Set [Text ]:
140
154
"""Returns the total number of response types in the training data"""
141
155
return {
142
- ex .get ("intent" )
156
+ ex .get (INTENT )
143
157
for ex in self .training_examples
144
- if ex .get ("response" ) is not None
158
+ if ex .get (RESPONSE ) is not None
145
159
}
146
160
147
161
@lazy_property
148
162
def examples_per_intent (self ) -> Dict [Text , int ]:
149
163
"""Calculates the number of examples per intent."""
150
- intents = [ex .get ("intent" ) for ex in self .training_examples ]
164
+ intents = [ex .get (INTENT ) for ex in self .training_examples ]
151
165
return dict (Counter (intents ))
152
166
153
167
@lazy_property
@@ -299,7 +313,7 @@ def sorted_intent_examples(self) -> List[Message]:
299
313
"""Sorts the intent examples by the name of the intent and then response"""
300
314
301
315
return sorted (
302
- self .intent_examples , key = lambda e : (e .get ("intent" ), e .get ("response" ))
316
+ self .intent_examples , key = lambda e : (e .get (INTENT ), e .get (RESPONSE ))
303
317
)
304
318
305
319
def validate (self ) -> None :
@@ -393,7 +407,7 @@ def split_nlu_examples(
393
407
) -> Tuple [list , list ]:
394
408
train , test = [], []
395
409
for intent , count in self .examples_per_intent .items ():
396
- ex = [e for e in self .intent_examples if e .data ["intent" ] == intent ]
410
+ ex = [e for e in self .intent_examples if e .data [INTENT ] == intent ]
397
411
if random_seed is not None :
398
412
random .Random (random_seed ).shuffle (ex )
399
413
else :
0 commit comments