Skip to content

Commit

Permalink
tokenize arrows
Browse files Browse the repository at this point in the history
  • Loading branch information
aajanki committed Aug 23, 2023
1 parent ded7d62 commit 2e0d63f
Showing 1 changed file with 3 additions and 2 deletions.
5 changes: 3 additions & 2 deletions fi/fi.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,18 @@
from spacy.lang.char_classes import LIST_HYPHENS, LIST_CURRENCY, CURRENCY, UNITS
from spacy.lang.char_classes import ALPHA, ALPHA_LOWER, ALPHA_UPPER


LIST_QUOTES = [x for x in LIST_QUOTES if x not in ['', ',']]
LIST_QUOTES = LIST_QUOTES + ["‹", "›"]
CONCAT_QUOTES = ''.join(LIST_QUOTES)

LIST_PUNCT = LIST_PUNCT + ["•"]
CONCAT_PUNCT = ''.join(LIST_PUNCT)

LIST_ICONS = LIST_ICONS + ["←", "↑", "→", "↓"]

_prefixes = (
[
"§", "%", "=", "—", "–", r"\+(?![0-9])", "→",
"§", "%", "=", "—", "–", r"\+(?![0-9])",
r"[/-](?=[{a}])".format(a=ALPHA)
]
+ LIST_PUNCT
Expand Down

0 comments on commit 2e0d63f

Please sign in to comment.