Skip to content

Commit

Permalink
Fix output_spans with dict_combine fix: hankcs#1727
Browse files Browse the repository at this point in the history
  • Loading branch information
hankcs committed Apr 20, 2022
1 parent 396568c commit 77217d5
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 2 deletions.
2 changes: 1 addition & 1 deletion hanlp/components/tokenizers/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ def generate_prediction_filename(self, tst_data, save_dir):

def prediction_to_human(self, pred, vocab, batch, rebuild_span=False):
output_spans = self.config.get('output_spans', None)
tokens = self.spans_to_tokens(pred, batch, rebuild_span)
tokens = self.spans_to_tokens(pred, batch, rebuild_span or output_spans)
if output_spans:
subtoken_spans = batch['token_subtoken_offsets']
results = []
Expand Down
2 changes: 1 addition & 1 deletion hanlp/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# Author: hankcs
# Date: 2019-12-28 19:26

__version__ = '2.1.0-beta.26'
__version__ = '2.1.0-beta.27'
"""HanLP version"""


Expand Down
19 changes: 19 additions & 0 deletions tests/test_mtl.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,25 @@ def test_transform(self):
self.assertSequenceEqual(mtl("我的用户ID跟你的用户id不同", tasks=task)[task],
['我', '的', '用户ID', '跟', '你', '的', '用户', 'id', '不同'])

def test_tok_offset(self):
task = 'tok/fine'
tok = mtl[task]
tok.config.output_spans = True
tok.dict_force = None
tok.dict_combine = None
sent = '我先去看医生'

for t, b, e in mtl(sent, tasks=task)[task]:
self.assertEqual(t, sent[b:e])

tok.dict_combine = {'先去'}
for t, b, e in mtl(sent, tasks=task)[task]:
self.assertEqual(t, sent[b:e])

tok.config.output_spans = False
tok.dict_force = None
tok.dict_combine = None


if __name__ == '__main__':
unittest.main()

0 comments on commit 77217d5

Please sign in to comment.