Skip to content

Commit

Permalink
Optimize merging sub-tokens
Browse files Browse the repository at this point in the history
  • Loading branch information
hankcs committed Mar 22, 2022
1 parent e843a4e commit 4467a3c
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 3 deletions.
4 changes: 2 additions & 2 deletions hanlp/components/tokenizers/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,8 +181,8 @@ def spans_to_tokens(self, spans, batch, rebuild_span=False):
combined = text[offsets[spans_per_sent[start - delta][0]][0]:
offsets[spans_per_sent[end - delta - 1][1] - 1][1]]
else:
combined = tokens[start:end]
buffer.append(''.join(combined))
combined = ''.join(tokens[start:end])
buffer.append(combined)
offset = end
if rebuild_span:
start -= delta
Expand Down
2 changes: 1 addition & 1 deletion hanlp/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# Author: hankcs
# Date: 2019-12-28 19:26

__version__ = '2.1.0-beta.18'
__version__ = '2.1.0-beta.19'
"""HanLP version"""


Expand Down

0 comments on commit 4467a3c

Please sign in to comment.