Skip to content

Commit 77217d5

Browse files
committed
Fix output_spans with dict_combine fix: hankcs#1727
1 parent 396568c commit 77217d5

File tree

3 files changed

+21
-2
lines changed

3 files changed

+21
-2
lines changed

hanlp/components/tokenizers/transformer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ def generate_prediction_filename(self, tst_data, save_dir):
202202

203203
def prediction_to_human(self, pred, vocab, batch, rebuild_span=False):
204204
output_spans = self.config.get('output_spans', None)
205-
tokens = self.spans_to_tokens(pred, batch, rebuild_span)
205+
tokens = self.spans_to_tokens(pred, batch, rebuild_span or output_spans)
206206
if output_spans:
207207
subtoken_spans = batch['token_subtoken_offsets']
208208
results = []

hanlp/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# Author: hankcs
33
# Date: 2019-12-28 19:26
44

5-
__version__ = '2.1.0-beta.26'
5+
__version__ = '2.1.0-beta.27'
66
"""HanLP version"""
77

88

tests/test_mtl.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,25 @@ def test_transform(self):
8787
self.assertSequenceEqual(mtl("我的用户ID跟你的用户id不同", tasks=task)[task],
8888
['我', '的', '用户ID', '跟', '你', '的', '用户', 'id', '不同'])
8989

90+
def test_tok_offset(self):
91+
task = 'tok/fine'
92+
tok = mtl[task]
93+
tok.config.output_spans = True
94+
tok.dict_force = None
95+
tok.dict_combine = None
96+
sent = '我先去看医生'
97+
98+
for t, b, e in mtl(sent, tasks=task)[task]:
99+
self.assertEqual(t, sent[b:e])
100+
101+
tok.dict_combine = {'先去'}
102+
for t, b, e in mtl(sent, tasks=task)[task]:
103+
self.assertEqual(t, sent[b:e])
104+
105+
tok.config.output_spans = False
106+
tok.dict_force = None
107+
tok.dict_combine = None
108+
90109

91110
if __name__ == '__main__':
92111
unittest.main()

0 commit comments

Comments
 (0)