Skip to content

Commit 2b79796

Browse files
authored
Merge branch 'master' into metada-on-session-start-action
2 parents e4b3e6a + d55e868 commit 2b79796

File tree

13 files changed

+233
-83
lines changed

13 files changed

+233
-83
lines changed

changelog/5587.misc.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Fixed misleading error message for a situation when ``rasa train nlu --nlu`` is given a file with a wrong format.
2+
3+
Before this change the output message was always: ``No NLU data given.``
4+
5+
Now in case the format is wrong the command prints: ``Path `nlu_data` doesn't contain valid NLU data in it. Please verify the data format. The NLU model training will be skipped now.``

changelog/5614.misc.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Replace ``TrainingData.filter_by_intent`` function with a more general function which filters training
2+
examples using a filtering function.
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
## deny
2+
- non, merci
3+
- non merci
4+
- non

examples/formbot/actions.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,9 @@ def slot_mappings(self) -> Dict[Text, Union[Dict, List[Dict]]]:
2929
return {
3030
"cuisine": self.from_entity(entity="cuisine", not_intent="chitchat"),
3131
"num_people": [
32-
self.from_entity(entity="number", intent=["inform", "request_restaurant"]),
32+
self.from_entity(
33+
entity="number", intent=["inform", "request_restaurant"]
34+
),
3335
],
3436
"outdoor_seating": [
3537
self.from_entity(entity="seating"),

poetry.lock

Lines changed: 86 additions & 48 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -67,15 +67,15 @@ showcontent = false
6767
python = "^3.6"
6868
boto3 = "^1.12"
6969
requests = "^2.23"
70-
matplotlib = "~3.1"
70+
matplotlib = ">=3.1,<3.3"
7171
attrs = "~19.3"
7272
jsonpickle = "~1.3"
7373
redis = "^3.4"
7474
numpy = "^1.16"
7575
scipy = "^1.4.1"
7676
absl-py = "^0.9"
7777
apscheduler = "~3.6"
78-
tqdm = "~4.31.0"
78+
tqdm = ">=4.31,<4.46"
7979
networkx = "~2.4.0"
8080
fbmessenger = "~6.0.0"
8181
pykwalify = "~1.7.0"
@@ -87,7 +87,7 @@ python-telegram-bot = "^11.1"
8787
twilio = "~6.26"
8888
webexteamssdk = "~1.1.1"
8989
mattermostwrapper = "~2.2"
90-
rocketchat_API = "~0.6.31"
90+
rocketchat_API = ">=0.6.31,<1.4.0"
9191
colorhash = "~1.0.2"
9292
pika = "~1.1.0"
9393
jsonschema = "~3.2"
@@ -104,7 +104,7 @@ cloudpickle = ">=1.2,<1.4"
104104
multidict = "^4.6"
105105
aiohttp = "~3.6"
106106
questionary = "~1.5.1"
107-
python-socketio = "~4.4"
107+
python-socketio = ">=4.4,<4.6"
108108
python-engineio = ">=3.11,<3.13"
109109
pydot = "~1.4"
110110
async_generator = "~1.10"
@@ -126,15 +126,15 @@ oauth2client = "4.1.3"
126126
[tool.poetry.dev-dependencies]
127127
pytest-cov = "^2.8.1"
128128
pytest-localserver = "^0.5.0"
129-
pytest-sanic = "^1.6.0"
129+
pytest-sanic = "^1.6.1"
130130
pytest-asyncio = "^0.10.0"
131131
pytest-xdist = "^1.31.0"
132132
pytest = "^5.3.4"
133133
freezegun = "^0.3.14"
134134
responses = "^0.10.9"
135135
nbsphinx = "~0.5"
136136
aioresponses = "^0.6.2"
137-
moto = "==1.3.8"
137+
moto = "==1.3.14"
138138
fakeredis = "^1.4.0"
139139
mongomock = "^3.18.0"
140140
black = "^19.10b0"
@@ -152,7 +152,7 @@ sphinxcontrib-programoutput = "==0.11"
152152
pygments = "^2.6.1"
153153
sphinxcontrib-httpdomain = "==1.6.1"
154154
sphinxcontrib-websupport = "==1.1.0"
155-
sphinxcontrib-trio = "==1.0.2"
155+
sphinxcontrib-trio = "==1.1.1"
156156
sphinx-tabs = "==1.1.13"
157157
sphinx-autodoc-typehints = "==1.6.0"
158158
rasabaster = "^0.7.23"

rasa/nlu/selectors/response_selector.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,9 @@ def preprocess_train_data(self, training_data: TrainingData) -> RasaModelData:
272272
"""
273273

274274
if self.retrieval_intent:
275-
training_data = training_data.filter_by_intent(self.retrieval_intent)
275+
training_data = training_data.filter_training_examples(
276+
lambda ex: self.retrieval_intent == ex.get(INTENT)
277+
)
276278
else:
277279
# retrieval intent was left to its default value
278280
logger.info(

rasa/nlu/training_data/training_data.py

Lines changed: 37 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@
44
from collections import Counter, OrderedDict
55
from copy import deepcopy
66
from os.path import relpath
7-
from typing import Any, Dict, List, Optional, Set, Text, Tuple
7+
from typing import Any, Dict, List, Optional, Set, Text, Tuple, Callable
88

99
import rasa.nlu.utils
1010
from rasa.utils.common import raise_warning, lazy_property
11-
from rasa.nlu.constants import RESPONSE, RESPONSE_KEY_ATTRIBUTE
11+
from rasa.nlu.constants import ENTITIES, INTENT, RESPONSE, RESPONSE_KEY_ATTRIBUTE
1212
from rasa.nlu.training_data.message import Message
1313
from rasa.nlu.training_data.util import check_duplicate_synonym
1414
from rasa.nlu.utils import list_to_str
@@ -75,21 +75,35 @@ def merge(self, *others: "TrainingData") -> "TrainingData":
7575
nlg_stories,
7676
)
7777

78-
def filter_by_intent(self, intent: Text):
79-
"""Filter training examples """
78+
def filter_training_examples(
79+
self, condition: Callable[[Message], bool]
80+
) -> "TrainingData":
81+
"""Filter training examples.
8082
81-
training_examples = []
82-
for ex in self.training_examples:
83-
if ex.get("intent") == intent:
84-
training_examples.append(ex)
83+
Args:
84+
condition: A function that will be applied to filter training examples.
85+
86+
Returns:
87+
TrainingData: A TrainingData with filtered training examples.
88+
"""
8589

8690
return TrainingData(
87-
training_examples,
91+
list(filter(condition, self.training_examples)),
8892
self.entity_synonyms,
8993
self.regex_features,
9094
self.lookup_tables,
9195
)
9296

97+
def filter_by_intent(self, intent: Text) -> "TrainingData":
98+
"""Filter training examples."""
99+
raise_warning(
100+
"The `filter_by_intent` function is deprecated. "
101+
"Please use `filter_training_examples` instead.",
102+
DeprecationWarning,
103+
stacklevel=2,
104+
)
105+
return self.filter_training_examples(lambda ex: intent == ex.get(INTENT))
106+
93107
def __hash__(self) -> int:
94108
from rasa.core import utils as core_utils
95109

@@ -105,49 +119,49 @@ def sanitize_examples(examples: List[Message]) -> List[Message]:
105119
Remove trailing whitespaces from intent and response annotations and drop duplicate examples."""
106120

107121
for ex in examples:
108-
if ex.get("intent"):
109-
ex.set("intent", ex.get("intent").strip())
122+
if ex.get(INTENT):
123+
ex.set(INTENT, ex.get(INTENT).strip())
110124

111-
if ex.get("response"):
112-
ex.set("response", ex.get("response").strip())
125+
if ex.get(RESPONSE):
126+
ex.set(RESPONSE, ex.get(RESPONSE).strip())
113127

114128
return list(OrderedDict.fromkeys(examples))
115129

116130
@lazy_property
117131
def intent_examples(self) -> List[Message]:
118-
return [ex for ex in self.training_examples if ex.get("intent")]
132+
return [ex for ex in self.training_examples if ex.get(INTENT)]
119133

120134
@lazy_property
121135
def response_examples(self) -> List[Message]:
122-
return [ex for ex in self.training_examples if ex.get("response")]
136+
return [ex for ex in self.training_examples if ex.get(RESPONSE)]
123137

124138
@lazy_property
125139
def entity_examples(self) -> List[Message]:
126-
return [ex for ex in self.training_examples if ex.get("entities")]
140+
return [ex for ex in self.training_examples if ex.get(ENTITIES)]
127141

128142
@lazy_property
129143
def intents(self) -> Set[Text]:
130144
"""Returns the set of intents in the training data."""
131-
return {ex.get("intent") for ex in self.training_examples} - {None}
145+
return {ex.get(INTENT) for ex in self.training_examples} - {None}
132146

133147
@lazy_property
134148
def responses(self) -> Set[Text]:
135149
"""Returns the set of responses in the training data."""
136-
return {ex.get("response") for ex in self.training_examples} - {None}
150+
return {ex.get(RESPONSE) for ex in self.training_examples} - {None}
137151

138152
@lazy_property
139153
def retrieval_intents(self) -> Set[Text]:
140154
"""Returns the total number of response types in the training data"""
141155
return {
142-
ex.get("intent")
156+
ex.get(INTENT)
143157
for ex in self.training_examples
144-
if ex.get("response") is not None
158+
if ex.get(RESPONSE) is not None
145159
}
146160

147161
@lazy_property
148162
def examples_per_intent(self) -> Dict[Text, int]:
149163
"""Calculates the number of examples per intent."""
150-
intents = [ex.get("intent") for ex in self.training_examples]
164+
intents = [ex.get(INTENT) for ex in self.training_examples]
151165
return dict(Counter(intents))
152166

153167
@lazy_property
@@ -299,7 +313,7 @@ def sorted_intent_examples(self) -> List[Message]:
299313
"""Sorts the intent examples by the name of the intent and then response"""
300314

301315
return sorted(
302-
self.intent_examples, key=lambda e: (e.get("intent"), e.get("response"))
316+
self.intent_examples, key=lambda e: (e.get(INTENT), e.get(RESPONSE))
303317
)
304318

305319
def validate(self) -> None:
@@ -393,7 +407,7 @@ def split_nlu_examples(
393407
) -> Tuple[list, list]:
394408
train, test = [], []
395409
for intent, count in self.examples_per_intent.items():
396-
ex = [e for e in self.intent_examples if e.data["intent"] == intent]
410+
ex = [e for e in self.intent_examples if e.data[INTENT] == intent]
397411
if random_seed is not None:
398412
random.Random(random_seed).shuffle(ex)
399413
else:

rasa/train.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,13 @@ async def _train_nlu_async(
424424
fixed_model_name: Optional[Text] = None,
425425
persist_nlu_training_data: bool = False,
426426
):
427+
if not nlu_data:
428+
print_error(
429+
"No NLU data given. Please provide NLU data in order to train "
430+
"a Rasa NLU model using the '--nlu' argument."
431+
)
432+
return
433+
427434
# training NLU only hence the training files still have to be selected
428435
file_importer = TrainingDataImporter.load_nlu_importer_from_config(
429436
config, training_data_paths=[nlu_data]
@@ -432,8 +439,9 @@ async def _train_nlu_async(
432439
training_datas = await file_importer.get_nlu_data()
433440
if training_datas.is_empty():
434441
print_error(
435-
"No NLU data given. Please provide NLU data in order to train "
436-
"a Rasa NLU model using the '--nlu' argument."
442+
f"Path '{nlu_data}' doesn't contain valid NLU data in it. "
443+
"Please verify the data format. "
444+
"The NLU model training will be skipped now."
437445
)
438446
return
439447

tests/conftest.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
DEFAULT_STORIES_FILE,
4040
END_TO_END_STORY_FILE,
4141
MOODBOT_MODEL_PATH,
42+
INCORRECT_NLU_DATA,
4243
)
4344
from tests.utilities import update_number_of_epochs
4445

@@ -142,6 +143,11 @@ def default_nlu_data() -> Text:
142143
return DEFAULT_NLU_DATA
143144

144145

146+
@pytest.fixture(scope="session")
147+
def incorrect_nlu_data() -> Text:
148+
return INCORRECT_NLU_DATA
149+
150+
145151
@pytest.fixture(scope="session")
146152
def end_to_end_story_file() -> Text:
147153
return END_TO_END_STORY_FILE

0 commit comments

Comments
 (0)