Skip to content

gh-134752: Improve speed of test_tokenize.StringPrefixTest.test_prefixes. #134766

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 27, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 45 additions & 27 deletions Lib/test/test_tokenize.py
Original file line number Diff line number Diff line change
Expand Up @@ -3241,39 +3241,40 @@ def test_exact_flag(self):


class StringPrefixTest(unittest.TestCase):
def test_prefixes(self):
# Get the list of defined string prefixes. I don't see an
# obvious documented way of doing this, but probably the best
# thing is to split apart tokenize.StringPrefix.

# Make sure StringPrefix begins and ends in parens.
self.assertEqual(tokenize.StringPrefix[0], '(')
self.assertEqual(tokenize.StringPrefix[-1], ')')

# Then split apart everything else by '|'.
defined_prefixes = set(tokenize.StringPrefix[1:-1].split('|'))

# Now compute the actual string prefixes, by exec-ing all
# valid prefix combinations, followed by an empty string.

# Try all prefix lengths until we find a length that has zero
# valid prefixes. This will miss the case where for example
# there are no valid 3 character prefixes, but there are valid
# 4 character prefixes. That seems extremely unlikely.

# Note that the empty prefix is being included, because length
# starts at 0. That's expected, since StringPrefix includes
# the empty prefix.
@staticmethod
def determine_valid_prefixes():
# Try all lengths until we find a length that has zero valid
# prefixes. This will miss the case where for example there
# are no valid 3 character prefixes, but there are valid 4
# character prefixes. That seems unlikely.

single_char_valid_prefixes = set()

# Find all of the single character string prefixes. Just get
# the lowercase version, we'll deal with combinations of upper
# and lower case later. I'm using this logic just in case
# some uppercase-only prefix is added.
for letter in itertools.chain(string.ascii_lowercase, string.ascii_uppercase):
try:
eval(f'{letter}""')
single_char_valid_prefixes.add(letter.lower())
except SyntaxError:
pass

# This logic assumes that all combinations of valid prefixes only use
# the characters that are valid single character prefixes. That seems
# like a valid assumption, but if it ever changes this will need
# adjusting.
valid_prefixes = set()
for length in itertools.count():
num_at_this_length = 0
for prefix in (
"".join(l) for l in list(itertools.combinations(string.ascii_lowercase, length))
"".join(l)
for l in itertools.combinations(single_char_valid_prefixes, length)
):
for t in itertools.permutations(prefix):
for u in itertools.product(*[(c, c.upper()) for c in t]):
p = ''.join(u)
p = "".join(u)
if p == "not":
# 'not' can never be a string prefix,
# because it's a valid expression: not ""
Expand All @@ -3289,9 +3290,26 @@ def test_prefixes(self):
except SyntaxError:
pass
if num_at_this_length == 0:
break
return valid_prefixes


def test_prefixes(self):
# Get the list of defined string prefixes. I don't see an
# obvious documented way of doing this, but probably the best
# thing is to split apart tokenize.StringPrefix.

# Make sure StringPrefix begins and ends in parens. We're
# assuming it's of the form "(a|b|ab)", if a, b, and cd are
# valid string prefixes.
self.assertEqual(tokenize.StringPrefix[0], '(')
self.assertEqual(tokenize.StringPrefix[-1], ')')

# Then split apart everything else by '|'.
defined_prefixes = set(tokenize.StringPrefix[1:-1].split('|'))

self.assertEqual(defined_prefixes, valid_prefixes)
# Now compute the actual allowed string prefixes and compare
# to what is defined in the tokenize module.
self.assertEqual(defined_prefixes, self.determine_valid_prefixes())


if __name__ == "__main__":
Expand Down
Loading