Skip to content

Commit

Permalink
improved preprocessing for siunitx notation, LaTeX math, and HTML com…
Browse files Browse the repository at this point in the history
…ments
  • Loading branch information
gpoore committed Jun 4, 2020
1 parent cd663c7 commit 136e930
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 14 deletions.
6 changes: 5 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@

## v0.4.0 (dev)

* Fixed catastrophic backtracking in LaTeX math regex (#11).
* Improved preprocessing for siunitx notation, LaTeX math, and HTML comments.
Fixed catastrophic backtracking in LaTeX math regex (#11). Added support
for newlines in HTML comments. The preprocessor now skips backslash
escapes, inline code containing newlines, and fenced code blocks (as long as
they do not start on the same line as a list marker).



Expand Down
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,10 @@ text, which typically appears as italics. Text can be styled using Markdown
notation, or with HTML. Remember to preview quizzes after conversion to QTI,
especially when using any significant amount of HTML.

While indented Markdown code blocks are supported, fenced code blocks should
be preferred. Indented code can interfere with the preprocessor that strips
HTML comments and handles LaTeX math and siunitx notation.


### Titles

Expand Down
51 changes: 39 additions & 12 deletions text2qti/markdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,25 +394,52 @@ def sub_siunitx_to_plain_latex(self, string: str, in_math: bool=False) -> str:
return self.siunitx_latex_macros_re.sub(lambda match: self._siunitx_dispatch(match, in_math), string)


html_comment_pattern = r'(?P<html_comment><!--.*?-->)'
inline_code_pattern = r'(?P<code>(?<!`)(?P<code_delim>`+)(?!`).+?(?<!`)(?P=code_delim)(?!`))'
inline_math_pattern = r'(?<!\$)\$(?!\$)(?P<math>[^ \t\n](?:[^$\n]|\n[ \t]*[^ \t$\n])*)(?<![ \t\n])\$(?!\$)'
html_comment_or_inline_code_math_siunitx_re = re.compile('|'.join([html_comment_pattern,
inline_code_pattern,
inline_math_pattern,
siunitx_latex_macros_pattern]))
skip = r'(?P<skip>\\.|\\\n|\$\$+(?!\$))'
html_comment_pattern = r'(?P<html_comment><!--(?:.|\n)*?-->)'
block_code_pattern = (
r'^(?P<block_code>'
r'(?P<indent>[ \t]*)(?P<block_code_delim>```+(?!`)|~~~+(?!~)).*?\n'
r'(?:[ \t]*\n|(?P=indent).*\n)*?'
r'(?P=indent)(?P=block_code_delim)[ \t]*(?:\n|$)'
r')'
)
inline_code_pattern = (
r'(?P<inline_code>'
r'(?P<inline_code_delim>`+(?!`))'
r'(?:.|\n[ \t]*(?![ \t\n]))+?'
r'(?<!`)(?P=inline_code_delim)(?!`)'
r')'
)
inline_math_pattern = (
r'\$(?=[^ \t\n])'
r'(?P<math>(?:[^$\n\\]|\\.|\\?\n[ \t]*(?:[^ \t\n$]))+)'
r'(?<![ \t\n])\$(?!\$)'
)
patterns = '|'.join([
block_code_pattern,
siunitx_latex_macros_pattern,
skip,
html_comment_pattern,
inline_code_pattern,
inline_math_pattern,
])
skip_or_html_comment_or_code_math_siunitx_re = re.compile(patterns, re.MULTILINE)

def _html_comment_or_inline_code_math_siunitx_dispatch(self, match: typing.Match[str]) -> str:
'''
Process LaTeX math and siunitx regex matches into Canvas image tags,
while stripping HTML comments and leaving inline code matches
unchanged.
while stripping HTML comments and leaving things like backslash
escapes and code unchanged.
'''
lastgroup = match.lastgroup
if lastgroup == 'html_comment':
return ''
if lastgroup == 'code_delim':
return match.group('code')
if lastgroup == 'skip':
return match.group('skip')
if lastgroup == 'block_code':
return match.group('block_code')
if lastgroup == 'inline_code':
return match.group('inline_code')
if lastgroup == 'math':
math = match.group('math')
math = math.replace('\n ', ' ').replace('\n', ' ')
Expand All @@ -431,7 +458,7 @@ def sub_math_siunitx_to_canvas_img(self, string: str) -> str:
Convert all siunitx macros in a string into plain LaTeX. Then convert
this LaTeX and all $-delimited LaTeX into Canvas img tags.
'''
return self.html_comment_or_inline_code_math_siunitx_re.sub(self._html_comment_or_inline_code_math_siunitx_dispatch, string)
return self.skip_or_html_comment_or_code_math_siunitx_re.sub(self._html_comment_or_inline_code_math_siunitx_dispatch, string)

def md_to_html_xml(self, markdown_string: str, strip_p_tags: bool=False) -> str:
'''
Expand Down
2 changes: 1 addition & 1 deletion text2qti/version.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# -*- coding: utf-8 -*-

from .fmtversion import get_version_plus_info
__version__, __version_info__ = get_version_plus_info(0, 4, 0, 'dev', 1)
__version__, __version_info__ = get_version_plus_info(0, 4, 0, 'dev', 2)

0 comments on commit 136e930

Please sign in to comment.