Skip to content

Commit

Permalink
Fix quote pair RegEx matching for all quote types
Browse files Browse the repository at this point in the history
  • Loading branch information
Th-Underscore committed Jan 13, 2025
1 parent facb415 commit 4d8a694
Showing 1 changed file with 2 additions and 1 deletion.
3 changes: 2 additions & 1 deletion modules/html_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,11 @@ def replace_quotes(text):
('‘', '’'), # Alternative single quotes
('“', '”'), # Unicode quotes (numeric entities)
('“', '”'), # Unicode quotes (hex entities)
('\u201C', '\u201D'), # Unicode quotes (literal chars)
]

# Create a regex pattern that matches any of the quote pairs, including newlines
pattern = '|'.join(f'({re.escape(open_q)})(.*?)({re.escape(close_q)})' for open_q, close_q in quote_pairs)
pattern = f"({'|'.join(f'{re.escape(open_q)}' for open_q, _ in quote_pairs)})(.*?)({'|'.join(f'{re.escape(close_q)}' for _, close_q in quote_pairs)})"

# Replace matched patterns with <q> tags, keeping original quotes
replaced_text = re.sub(pattern, lambda m: f'<q>{m.group(1)}{m.group(2)}{m.group(3)}</q>', text, flags=re.DOTALL)
Expand Down

0 comments on commit 4d8a694

Please sign in to comment.