directly use Levenshtein.normalized_similarity

chrisjbryant · Apr 13, 2022 · e63a1e8 · e63a1e8
1 parent 7f41822
commit e63a1e8
Showing 1 changed file with 2 additions and 2 deletions.
diff --git a/errant/en/classifier.py b/errant/en/classifier.py
@@ -214,7 +214,7 @@ def get_two_sided_type(o_toks, c_toks):
                 # Use string similarity to detect true spelling errors.
                 else:
                     # Normalised Lev distance works better than Lev ratio
-                    str_sim = 1-Levenshtein.normalized_distance(o_toks[0].lower_, c_toks[0].lower_)
+                    str_sim = Levenshtein.normalized_similarity(o_toks[0].lower_, c_toks[0].lower_)
                     # WARNING: THIS IS AN APPROXIMATION.
                     # Thresholds tuned manually on FCE_train + W&I_train
                     # str_sim > 0.55 is almost always a true spelling error
@@ -328,7 +328,7 @@ def get_two_sided_type(o_toks, c_toks):
         # These rules are quite language specific.
         if o_toks[0].text.isalpha() and c_toks[0].text.isalpha():
             # Normalised Lev distance works better than Lev ratio
-            str_sim = 1-Levenshtein.normalized_distance(o_toks[0].lower_, c_toks[0].lower_)
+            str_sim = Levenshtein.normalized_similarity(o_toks[0].lower_, c_toks[0].lower_)
             # WARNING: THIS IS AN APPROXIMATION.
             # Thresholds tuned manually on FCE_train + W&I_train
             # A. Short sequences are likely to be SPELL or function word errors