@@ -259,7 +259,7 @@ def test_markdown_single_sections():
259
259
assert td_syn_only .entity_synonyms == {"Chines" : "chinese" , "Chinese" : "chinese" }
260
260
261
261
262
- def test_repeated_entities ():
262
+ def test_repeated_entities (tmp_path ):
263
263
data = """
264
264
{
265
265
"rasa_nlu_data": {
@@ -279,21 +279,20 @@ def test_repeated_entities():
279
279
]
280
280
}
281
281
}"""
282
- with tempfile .NamedTemporaryFile (suffix = "_tmp_training_data.json" ) as f :
283
- f .write (data .encode (io_utils .DEFAULT_ENCODING ))
284
- f .flush ()
285
- td = training_data .load_data (f .name )
286
- assert len (td .entity_examples ) == 1
287
- example = td .entity_examples [0 ]
288
- entities = example .get ("entities" )
289
- assert len (entities ) == 1
290
- tokens = WhitespaceTokenizer ().tokenize (example , attribute = TEXT )
291
- start , end = MitieEntityExtractor .find_entity (entities [0 ], example .text , tokens )
292
- assert start == 9
293
- assert end == 10
294
-
295
-
296
- def test_multiword_entities ():
282
+ f = tmp_path / "tmp_training_data.json"
283
+ f .write_text (data , io_utils .DEFAULT_ENCODING )
284
+ td = training_data .load_data (str (f ))
285
+ assert len (td .entity_examples ) == 1
286
+ example = td .entity_examples [0 ]
287
+ entities = example .get ("entities" )
288
+ assert len (entities ) == 1
289
+ tokens = WhitespaceTokenizer ().tokenize (example , attribute = TEXT )
290
+ start , end = MitieEntityExtractor .find_entity (entities [0 ], example .text , tokens )
291
+ assert start == 9
292
+ assert end == 10
293
+
294
+
295
+ def test_multiword_entities (tmp_path ):
297
296
data = """
298
297
{
299
298
"rasa_nlu_data": {
@@ -313,21 +312,20 @@ def test_multiword_entities():
313
312
]
314
313
}
315
314
}"""
316
- with tempfile .NamedTemporaryFile (suffix = "_tmp_training_data.json" ) as f :
317
- f .write (data .encode (io_utils .DEFAULT_ENCODING ))
318
- f .flush ()
319
- td = training_data .load_data (f .name )
320
- assert len (td .entity_examples ) == 1
321
- example = td .entity_examples [0 ]
322
- entities = example .get ("entities" )
323
- assert len (entities ) == 1
324
- tokens = WhitespaceTokenizer ().tokenize (example , attribute = TEXT )
325
- start , end = MitieEntityExtractor .find_entity (entities [0 ], example .text , tokens )
326
- assert start == 4
327
- assert end == 7
328
-
329
-
330
- def test_nonascii_entities ():
315
+ f = tmp_path / "tmp_training_data.json"
316
+ f .write_text (data , io_utils .DEFAULT_ENCODING )
317
+ td = training_data .load_data (str (f ))
318
+ assert len (td .entity_examples ) == 1
319
+ example = td .entity_examples [0 ]
320
+ entities = example .get ("entities" )
321
+ assert len (entities ) == 1
322
+ tokens = WhitespaceTokenizer ().tokenize (example , attribute = TEXT )
323
+ start , end = MitieEntityExtractor .find_entity (entities [0 ], example .text , tokens )
324
+ assert start == 4
325
+ assert end == 7
326
+
327
+
328
+ def test_nonascii_entities (tmp_path ):
331
329
data = """
332
330
{
333
331
"luis_schema_version": "5.0",
@@ -345,22 +343,21 @@ def test_nonascii_entities():
345
343
}
346
344
]
347
345
}"""
348
- with tempfile .NamedTemporaryFile (suffix = "_tmp_training_data.json" ) as f :
349
- f .write (data .encode (io_utils .DEFAULT_ENCODING ))
350
- f .flush ()
351
- td = training_data .load_data (f .name )
352
- assert len (td .entity_examples ) == 1
353
- example = td .entity_examples [0 ]
354
- entities = example .get ("entities" )
355
- assert len (entities ) == 1
356
- entity = entities [0 ]
357
- assert entity ["value" ] == "ßäæ ?€ö)"
358
- assert entity ["start" ] == 19
359
- assert entity ["end" ] == 27
360
- assert entity ["entity" ] == "description"
361
-
362
-
363
- def test_entities_synonyms ():
346
+ f = tmp_path / "tmp_training_data.json"
347
+ f .write_text (data , io_utils .DEFAULT_ENCODING )
348
+ td = training_data .load_data (str (f ))
349
+ assert len (td .entity_examples ) == 1
350
+ example = td .entity_examples [0 ]
351
+ entities = example .get ("entities" )
352
+ assert len (entities ) == 1
353
+ entity = entities [0 ]
354
+ assert entity ["value" ] == "ßäæ ?€ö)"
355
+ assert entity ["start" ] == 19
356
+ assert entity ["end" ] == 27
357
+ assert entity ["entity" ] == "description"
358
+
359
+
360
+ def test_entities_synonyms (tmp_path ):
364
361
data = """
365
362
{
366
363
"rasa_nlu_data": {
@@ -398,11 +395,10 @@ def test_entities_synonyms():
398
395
]
399
396
}
400
397
}"""
401
- with tempfile .NamedTemporaryFile (suffix = "_tmp_training_data.json" ) as f :
402
- f .write (data .encode (io_utils .DEFAULT_ENCODING ))
403
- f .flush ()
404
- td = training_data .load_data (f .name )
405
- assert td .entity_synonyms ["New York City" ] == "nyc"
398
+ f = tmp_path / "tmp_training_data.json"
399
+ f .write_text (data , io_utils .DEFAULT_ENCODING )
400
+ td = training_data .load_data (str (f ))
401
+ assert td .entity_synonyms ["New York City" ] == "nyc"
406
402
407
403
408
404
def cmp_message_list (firsts , seconds ):
@@ -531,3 +527,24 @@ def test_load_data_from_non_existing_file():
531
527
532
528
def test_is_empty ():
533
529
assert TrainingData ().is_empty ()
530
+
531
+
532
+ def test_custom_attributes (tmp_path ):
533
+ data = """
534
+ {
535
+ "rasa_nlu_data": {
536
+ "common_examples" : [
537
+ {
538
+ "intent": "happy",
539
+ "text": "I'm happy.",
540
+ "sentiment": 0.8
541
+ }
542
+ ]
543
+ }
544
+ }"""
545
+ f = tmp_path / "tmp_training_data.json"
546
+ f .write_text (data , io_utils .DEFAULT_ENCODING )
547
+ td = training_data .load_data (str (f ))
548
+ assert len (td .training_examples ) == 1
549
+ example = td .training_examples [0 ]
550
+ assert example .get ("sentiment" ) == 0.8
0 commit comments