Skip to content

Commit

Permalink
Py36 pylinting (#137)
Browse files Browse the repository at this point in the history
* pyupgrade --py36-plus
* pylint
  • Loading branch information
barrust authored Oct 4, 2022
1 parent 03e2991 commit 4008719
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 49 deletions.
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,10 @@ line-length = 120
target-version = ['py36']
include = '\.pyi?$'

[tool.pylint.'MESSAGES CONTROL']
max-line-length = 120
max-args = 6

[build-system]
requires = ["setuptools>=61.2.0", "wheel"]
build-backend = "setuptools.build_meta"
Expand Down
2 changes: 1 addition & 1 deletion spellchecker/info.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@
__version__ = "0.7.0"
__credits__ = ["Peter Norvig"]
__url__ = "https://github.com/barrust/pyspellchecker"
__bugtrack_url__ = "{0}/issues".format(__url__)
__bugtrack_url__ = f"{__url__}/issues"
42 changes: 18 additions & 24 deletions spellchecker/spellchecker.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from .utils import KeyT, _parse_into_words, ensure_unicode, load_file, write_file


class SpellChecker(object):
class SpellChecker:
"""The SpellChecker class encapsulates the basics needed to accomplish a
simple spell checking algorithm. It is based on the work by
Peter Norvig (https://norvig.com/spell-correct.html)
Expand Down Expand Up @@ -55,12 +55,12 @@ def __init__(
if not isinstance(language, Iterable) or isinstance(language, (str, bytes)):
language = [language] # type: ignore
for lang in language:
filename = "resources/{}.json.gz".format(lang.lower())
filename = f"resources/{lang.lower()}.json.gz"
try:
json_open = pkgutil.get_data("spellchecker", filename)
except FileNotFoundError:
msg = ("The provided dictionary language ({}) does not exist!").format(lang.lower())
raise ValueError(msg)
except FileNotFoundError as exc:
msg = f"The provided dictionary language ({lang.lower()}) does not exist!"
raise ValueError(msg) from exc
if json_open:
lang_dict = json.loads(gzip.decompress(json_open).decode("utf-8"))
self._word_frequency.load_json(lang_dict)
Expand All @@ -77,8 +77,7 @@ def __getitem__(self, key: KeyT) -> int:

def __iter__(self) -> typing.Generator[str, None, None]:
"""setup iter support"""
for word in self._word_frequency.dictionary:
yield word
yield from self._word_frequency.dictionary

@classmethod
def languages(cls) -> typing.Iterable[str]:
Expand Down Expand Up @@ -106,8 +105,7 @@ def distance(self, val: int) -> None:
"""set the distance parameter"""
tmp = 2
try:
int(val)
if val > 0 and val <= 2:
if 0 < int(val) <= 2:
tmp = val
except (ValueError, TypeError):
pass
Expand Down Expand Up @@ -178,13 +176,13 @@ def candidates(self, word: KeyT) -> typing.Optional[typing.Set[str]]:
return {word}

# get edit distance 1...
res = [x for x in self.edit_distance_1(word)]
res = list(self.edit_distance_1(word))
tmp = self.known(res)
if tmp:
return tmp
# if still not found, use the edit distance 1 to calc edit distance 2
if self._distance == 2:
tmp = self.known([x for x in self.__edit_distance_alt(res)])
tmp = self.known(list(self.__edit_distance_alt(res)))
if tmp:
return tmp
return None
Expand All @@ -198,7 +196,7 @@ def known(self, words: typing.Iterable[KeyT]) -> typing.Set[str]:
set: The set of those words from the input that are in the corpus"""
tmp_words = [ensure_unicode(w) for w in words]
tmp = [w if self._case_sensitive else w.lower() for w in tmp_words]
return set(w for w in tmp if w in self._word_frequency.dictionary and self._check_if_should_check(w))
return {w for w in tmp if w in self._word_frequency.dictionary and self._check_if_should_check(w)}

def unknown(self, words: typing.Iterable[KeyT]) -> typing.Set[str]:
"""The subset of `words` that do not appear in the dictionary
Expand All @@ -209,7 +207,7 @@ def unknown(self, words: typing.Iterable[KeyT]) -> typing.Set[str]:
set: The set of those words from the input that are not in the corpus"""
tmp_words = [ensure_unicode(w) for w in words]
tmp = [w if self._case_sensitive else w.lower() for w in tmp_words if self._check_if_should_check(w)]
return set(w for w in tmp if w not in self._word_frequency.dictionary)
return {w for w in tmp if w not in self._word_frequency.dictionary}

def edit_distance_1(self, word: KeyT) -> typing.Set[str]:
"""Compute all strings that are one edit away from `word` using only
Expand Down Expand Up @@ -256,9 +254,9 @@ def __edit_distance_alt(self, words: typing.Iterable[KeyT]) -> typing.List[str]:
def _check_if_should_check(self, word: str) -> bool:
if len(word) == 1 and word in string.punctuation:
return False
elif (len(word) > self._word_frequency.longest_word_length + 3): # magic number to allow removal of up to 2 letters.
if len(word) > self._word_frequency.longest_word_length + 3: # allow removal of up to 2 letters
return False
elif word.lower() == 'nan': # nan passes the float(word) so this will bypass that issue (#125)
if word.lower() == 'nan': # nan passes the float(word) so this will bypass that issue (#125)
return True
try: # check if it is a number (int, float, etc)
float(word)
Expand All @@ -269,7 +267,7 @@ def _check_if_should_check(self, word: str) -> bool:
return True


class WordFrequency(object):
class WordFrequency:
"""Store the `dictionary` as a word frequency list while allowing for
different methods to load the data and update over time"""

Expand Down Expand Up @@ -309,8 +307,7 @@ def __getitem__(self, key: KeyT) -> int:

def __iter__(self) -> typing.Generator[str, None, None]:
"""turn on iter support"""
for word in self._dictionary:
yield word
yield from self._dictionary

def pop(self, key: KeyT, default: typing.Optional[int] = None) -> int:
"""Remove the key and return the associated value or default if not
Expand Down Expand Up @@ -383,8 +380,7 @@ def keys(self) -> typing.Generator[str, None, None]:
str: The next key in the dictionary
Note:
This is the same as `spellchecker.words()`"""
for key in self._dictionary.keys():
yield key
yield from self._dictionary.keys()

def words(self) -> typing.Generator[str, None, None]:
"""Iterator over the words in the dictionary
Expand All @@ -393,8 +389,7 @@ def words(self) -> typing.Generator[str, None, None]:
str: The next word in the dictionary
Note:
This is the same as `spellchecker.keys()`"""
for word in self._dictionary.keys():
yield word
yield from self._dictionary.keys()

def items(self) -> typing.Generator[typing.Tuple[str, int], None, None]:
"""Iterator over the words in the dictionary
Expand All @@ -404,8 +399,7 @@ def items(self) -> typing.Generator[typing.Tuple[str, int], None, None]:
int: The number of instances in the dictionary
Note:
This is the same as `dict.items()`"""
for word in self._dictionary.keys():
yield word, self._dictionary[word]
yield from self._dictionary.items()

def load_dictionary(self, filename: str, encoding: str = "utf-8") -> None:
"""Load in a pre-built word frequency list
Expand Down
9 changes: 5 additions & 4 deletions spellchecker/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,9 @@ def decorator_wrapper(func):
@functools.wraps(func)
def test_inner(*args, **kwargs):
if [int(x) for x in version.split(".")] <= [int(x) for x in __version__.split(".")]:
msg = "The function {} must be fully removed as it is depricated and must be removed by version {}".format(
func.__name__, version
msg = (
f"The function {func.__name__} must be fully removed as it is depricated"
f" and must be removed by version {version}"
)
raise AssertionError(msg)
return func(*args, **kwargs)
Expand All @@ -46,7 +47,7 @@ def decorator_wrapper(func):
def function_wrapper(*args, **kwargs):
func_name = func.__name__
if func_name not in function_wrapper.deprecated_items:
msg = "Function {} is now deprecated! {}".format(func.__name__, message)
msg = f"Function {func.__name__} is now deprecated! {message}"
warnings.warn(msg, category=DeprecationWarning, stacklevel=2)
function_wrapper.deprecated_items.add(func_name)

Expand Down Expand Up @@ -105,7 +106,7 @@ def load_file(filename: str, encoding: str) -> typing.Generator[KeyT, None, None
with __gzip_read(filename, mode="rt", encoding=encoding) as data:
yield data
else:
with open(filename, mode="r", encoding=encoding) as fobj:
with open(filename, encoding=encoding) as fobj:
yield fobj.read()


Expand Down
37 changes: 17 additions & 20 deletions tests/spellchecker_test.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# coding=UTF-8
""" Unittest class """

import unittest
Expand Down Expand Up @@ -171,22 +170,22 @@ def test_missing_dictionary(self):
def test_load_external_dictionary(self):
"""test loading a local dictionary"""
here = os.path.dirname(__file__)
filepath = "{}/resources/small_dictionary.json".format(here)
filepath = f"{here}/resources/small_dictionary.json"
spell = SpellChecker(language=None, local_dictionary=filepath)
self.assertEqual(spell["a"], 1)
self.assertTrue("apple" in spell)

def test_edit_distance_one(self):
"""test a case where edit distance must be one"""
here = os.path.dirname(__file__)
filepath = "{}/resources/small_dictionary.json".format(here)
filepath = f"{here}/resources/small_dictionary.json"
spell = SpellChecker(language=None, local_dictionary=filepath, distance=1)
self.assertEqual(spell.candidates("hike"), {"bike"})

def test_edit_distance_two(self):
"""test a case where edit distance must be two"""
here = os.path.dirname(__file__)
filepath = "{}/resources/small_dictionary.json".format(here)
filepath = f"{here}/resources/small_dictionary.json"
spell = SpellChecker(language=None, local_dictionary=filepath)
self.assertEqual(spell.candidates("ale"), {"a", "apple"})

Expand All @@ -209,7 +208,7 @@ def test_edit_distance_invalud(self):
def test_load_text_file(self):
"""test loading a text file"""
here = os.path.dirname(__file__)
filepath = "{}/resources/small_doc.txt".format(here)
filepath = f"{here}/resources/small_doc.txt"
spell = SpellChecker(language=None) # just from this doc!
spell.word_frequency.load_text_file(filepath)
self.assertEqual(spell["a"], 3)
Expand Down Expand Up @@ -289,11 +288,11 @@ def test_unique_words(self):
def test_import_export_json(self):
"""test the export functionality as json"""
here = os.path.dirname(__file__)
filepath = "{}/resources/small_dictionary.json".format(here)
filepath = f"{here}/resources/small_dictionary.json"

spell = SpellChecker(language=None, local_dictionary=filepath)
spell.word_frequency.add("meh")
new_filepath = "{}/resources/small_dictionary_new.json".format(here)
new_filepath = f"{here}/resources/small_dictionary_new.json"
spell.export(new_filepath, gzipped=False)

sp = SpellChecker(language=None, local_dictionary=new_filepath)
Expand All @@ -305,11 +304,11 @@ def test_import_export_json(self):
def test_import_export_gzip(self):
"""test the export functionality as gzip"""
here = os.path.dirname(__file__)
filepath = "{}/resources/small_dictionary.json".format(here)
filepath = f"{here}/resources/small_dictionary.json"

spell = SpellChecker(language=None, local_dictionary=filepath)
spell.word_frequency.add("meh")
new_filepath = "{}/resources/small_dictionary_new.json.gz".format(here)
new_filepath = f"{here}/resources/small_dictionary_new.json.gz"
spell.export(new_filepath, gzipped=True)

sp = SpellChecker(language=None, local_dictionary=new_filepath)
Expand Down Expand Up @@ -407,7 +406,7 @@ def test_adding_unicode(self):
self.assertEqual("ñ" in spell.word_frequency.letters, True)

here = os.path.dirname(__file__)
new_filepath = "{}/resources/small_dictionary_new.json.gz".format(here)
new_filepath = f"{here}/resources/small_dictionary_new.json.gz"
spell.export(new_filepath, gzipped=True)

spell2 = SpellChecker(language=None, local_dictionary=new_filepath)
Expand All @@ -419,11 +418,10 @@ def test_tokenizer_file(self):
"""def using a custom tokenizer for file loading"""

def tokens(txt):
for x in txt.split():
yield x
yield from txt.split()

here = os.path.dirname(__file__)
filepath = "{}/resources/small_doc.txt".format(here)
filepath = f"{here}/resources/small_doc.txt"
spell = SpellChecker(language=None) # just from this doc!
spell.word_frequency.load_text_file(filepath, tokenizer=tokens)
self.assertEqual(spell["a"], 3)
Expand All @@ -437,11 +435,10 @@ def test_tokenizer_provided(self):
"""Test passing in a tokenizer"""

def tokens(txt):
for x in txt.split():
yield x
yield from txt.split()

here = os.path.dirname(__file__)
filepath = "{}/resources/small_doc.txt".format(here)
filepath = f"{here}/resources/small_doc.txt"
spell = SpellChecker(language=None, tokenizer=tokens) # just from this doc!
spell.word_frequency.load_text_file(filepath)
self.assertEqual(spell["a"], 3)
Expand All @@ -457,7 +454,7 @@ def test_bytes_input(self):
var = b"bike"

here = os.path.dirname(__file__)
filepath = "{}/resources/small_dictionary.json".format(here)
filepath = f"{here}/resources/small_dictionary.json"
spell = SpellChecker(language=None, local_dictionary=filepath)

self.assertTrue(var in spell)
Expand All @@ -467,12 +464,12 @@ def test_split_words(self):
"""test using split_words"""
spell = SpellChecker()
res = spell.split_words("This isn't a good test, but it is a test!!!!")
self.assertEqual(set(res), set(["This", "isn't", "a", "good", "test", "but", "it", "is", "a", "test"]))
self.assertEqual(set(res), {"This", "isn't", "a", "good", "test", "but", "it", "is", "a", "test"})

def test_iter_spellchecker(self):
"""Test using the iterator on the SpellChecker"""
here = os.path.dirname(__file__)
filepath = "{}/resources/small_dictionary.json".format(here)
filepath = f"{here}/resources/small_dictionary.json"

spell = SpellChecker(language=None, local_dictionary=filepath)

Expand All @@ -485,7 +482,7 @@ def test_iter_spellchecker(self):
def test_iter_word_frequency(self):
"""Test using the iterator on the WordFrequency"""
here = os.path.dirname(__file__)
filepath = "{}/resources/small_dictionary.json".format(here)
filepath = f"{here}/resources/small_dictionary.json"

spell = SpellChecker(language=None, local_dictionary=filepath)

Expand Down

0 comments on commit 4008719

Please sign in to comment.