Skip to content

Commit 794073a

Browse files
committed
Fix result returned by sanitize_examples()
Sanitize example should return a list of messages whose order should follow its inputs. See RasaHQ#4414 (comment) for details.
1 parent a68655b commit 794073a

File tree

1 file changed

+5
-4
lines changed

1 file changed

+5
-4
lines changed

rasa/nlu/training_data/training_data.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,21 @@
22
import os
33
import random
44
import warnings
5+
from _collections import OrderedDict
56
from collections import Counter
67
from copy import deepcopy
78
from os.path import relpath
89
from typing import Any, Dict, List, Optional, Set, Text, Tuple
910

10-
from rasa.nlu.utils import list_to_str
1111
import rasa.nlu.utils
1212
import rasa.utils.common as rasa_utils
13-
from rasa.nlu.training_data.message import Message
14-
from rasa.nlu.training_data.util import check_duplicate_synonym
1513
from rasa.nlu.constants import (
1614
MESSAGE_RESPONSE_ATTRIBUTE,
1715
MESSAGE_RESPONSE_KEY_ATTRIBUTE,
1816
)
17+
from rasa.nlu.training_data.message import Message
18+
from rasa.nlu.training_data.util import check_duplicate_synonym
19+
from rasa.nlu.utils import list_to_str
1920

2021
DEFAULT_TRAINING_DATA_OUTPUT_PATH = "training_data.json"
2122

@@ -115,7 +116,7 @@ def sanitize_examples(examples: List[Message]) -> List[Message]:
115116
if ex.get("response"):
116117
ex.set("response", ex.get("response").strip())
117118

118-
examples = list(set(examples))
119+
examples = list(OrderedDict.fromkeys(examples))
119120

120121
return examples
121122

0 commit comments

Comments
 (0)