-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcore.py
396 lines (326 loc) · 12.7 KB
/
core.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
from .utils import declare, remove_inner_outer_quotes
import logging
from .parsers import BaseParser, sub_func
import re
import os
import datetime
from importlib import import_module
from importlib.util import find_spec
'''
Core MDiocre conversion class
'''
logger = logging.getLogger('mdiocre.core')
RE_HTML_COMMENTS = re.compile(r'<!--:(.+?)-->')
RE_ASSIGNMENT = re.compile(r'.+=.+')
RE_KEYWORD = re.compile(r'.+:.+')
RE_ESCAPE = re.compile(r'(\\)(.{1})')
# This stupid complicated RE is necessary to enable reading commas in variables
RE_CONCAT = re.compile(
# Case 1
r'('+ # Start capture group
r'\"[^\"]*\"'+ # double-quoted strings
r'|'+ # or
r'\'[^\']*\''+ # single quoted strings
r'|'+ # or
r'\w+'+ # word
r'|'+ # or
r'\(.+\)'+ # function
r')'+ # End capture group
r',\s*'+ # Concat operator
r'|'+ # Or
# Case 2
r'('+ # Start capture group
r'\"[^\"]*\"'+ # double-quoted strings
r'|'+ # or
r'\'[^\']*\''+ # single quoted strings
r'|' # or
r'\w+'+ # word
r'|'+ # or
r'\(.+\)'+ # function
r')'+ # End capture group
r'$' # EOL
)
class MDiocre():
'''
Main class to process source files and render HTML files.
Args:
parser (Optional): a BaseParser-derived object. If both
`parser_name` and `parser` are defined, `parser` takes the
priority.
parser_name (str, Optional): The parser name. See :meth:`switch_parser`
for which ones are currently implemented.
'''
def __init__(self, parser=None, parser_name=None):
if parser is None:
if parser_name is None:
# use markdown by default
self.switch_parser("markdown")
else:
# type checking
declare(parser_name, str)
self.switch_parser(parser_name)
else:
if not issubclass(parser, BaseParser):
raise ImportError("class {} must be a subclass of {}".format(parser.__name__, BaseParser.__name__)) from None
self.parser = parser
def switch_parser(self, name):
'''
Switch parsers by using an identifier or a class (not an instance!)
derived from BaseParser.
To implement a new parser, it must be a class with inherited from
:class:`BaseParser`, Its name and file name must also
match, e.g. a parser with the `html` identifier must
be in `html.py` and have the class name of `HtmlParser`.
Args:
name (string | :class:`BaseParser` ): Parser name or type.
If passed as a string, it will only take the following
values:
`markdown`, `html`, `rst`, `zim`, `gem`
As a type, this function accepts it as long as it contains
a `to_variables`.
.. warning::
Passing a `string` to `switch_parser` is deprecated as of
version 3.5. It will be removed in a future release.
Returns:
None.
'''
if isinstance(name, type):
if issubclass(name, BaseParser):
self.parser = name()
return
# specifications for names
# e.g. "markdown" -> MarkdownParser in parsers/markdown.py
# or "rst" -> RstParser in parsers/rst.py
module_name = '.parsers.{}'.format(name.lower())
class_name = '{}Parser'.format(name.capitalize())
# Switch parser
if find_spec(module_name, 'mdiocre'):
module = import_module(module_name, 'mdiocre')
else:
logger.error("{}: error occured: {}".format(name, e))
raise Exception("Can't find any suitable modules")
try:
# internal-only
module = import_module(module_name, 'mdiocre')
except ModuleNotFoundError as e:
logger.error("{}: error occured: {}".format(name, e))
raise e
else:
module_class = getattr(module,class_name)
if not issubclass(module_class, BaseParser):
logger.error("{}: class {} must be a subclass of {}, not using.".format(name, class_name, BaseParser.__name__))
else:
self.parser = module_class()
def render(self, template, variables):
'''
Renders a template with the specified variables.
Due to the mechanism, template variables are separate from the
page's variables. The converted page is defined in the
``content`` variable, and can be used by templates to render
the documents.
Args:
template (string): A string containing formatted comments.
variables (VariableManager): Variable object to use with
the template.
Returns:
The processed string.
'''
# type checking
declare(template, str)
declare(variables, VariableManager)
def render_sub_func(match):
return sub_func(match, variables)
# template variables are processed separately since
# the content is already proecessed
converted = re.sub(RE_HTML_COMMENTS, render_sub_func, template)
return converted
def process(self, string, ignore_content=False):
'''
Process a string into a variable dictionary to use
e.g. with :meth:`render`.
The string is processed according to
a parser that converts it to HTML and extracts any MDiocre
"commands". For Markdown and HTML, these are stuff that is
prefixed with `<!--:`, for RST, it's `:mdiocre:`.
More details about the conversion process can be found in
:class:`VariableManager`.
As of 3.1, this is really a wrapper for all the parsers.
Args:
string (string): A string containing MDiocre commands.
ignore_content (bool, Optional): If True, it will not convert
the string to the `content` variable.
Returns:
A VariableManager object containing the processed variables,
that also contains the converted HTML under the ``content``
variable, if `ignore_content` is `False`.
'''
# type checking
declare(string, str)
declare(ignore_content, bool)
v = VariableManager()
return self.parser.to_variables(string, v, ignore_content=ignore_content)
class VariableManager():
'''
Variable manager.
Variables are stored as a dictionary under `self.variables`. The identifiers
can be any character except the = operator, which serves to separate the identifier
and the value.
.. warning::
There are a few reserved variables, which their names cannot be used, namely:
* **content** : The contents of a page that will be put into a template
* **mdiocre-gen-timestamp** : Timestamp of the generated content
.. note::
The ``mdiocre-template`` variable is required when using the :class:`Wizard`.
'''
def __init__(self):
self.variables = {}
self.reserved_variable_names = ['content', 'mdiocre-gen-timestamp']
# system variables
self.variables['mdiocre-gen-timestamp'] = datetime.datetime.now().isoformat()
def get(self, variable):
'''
Gets a variable from the variables list and returns its value.
Args:
variable (string): Name of the variable.
Returns:
String contents of the variable, or an empty string if the
variable is not found.
'''
# type checking
declare(variable, str)
variable = variable.strip()
try:
return str(self.variables[variable])
except KeyError:
return ''
def parse_keyword(self, query):
'''
Currently called from :meth:`function`, this implements a few commands,
or "keywords" that can be used to implement a little more modularity
in one's templates/contents.
Operands are separated by the colon `:`, on the left hand side is the
keyword, on the right hand is the argument, assumed to be a string.
The keyword is case-insensitive.
They can be one of the following:
* **Include** : Essentially, it literally includes a file into the template or content. It can be used to set global variables, include common banners, etc. Its argument is a file RELATIVE TO THE WORKING DIRECTORY THE SCRIPT IS CALLED IN!
Example: ``Include: ../variables.html``
* **Using** : Load a Python script. You can use it to define a few functions which can be useful with the function call feature during assignment, for example to dynamically convert a few strings of text.
Example: ``Using: ../_functions.py``
.. warning::
The `using` keyword executes raw Python code, so it may pose a
security risk! Use with caution, and double-check your source
files!
Args:
query (string): Expects a string in the form of ``keyword : argument``.
Returns:
None (or SyntaxError).
'''
# type checking
declare(query, str)
if not (re.match(RE_KEYWORD, query)):
raise SyntaxError(f'<{query}> is neither a keyword nor an assign statement')
keyword, value = query.split(':', 1)
keyword = keyword.strip().lower()
value = value.strip()
# value quote
if value[0] == '"' or value[0] == "'":
value = remove_inner_outer_quotes(value)
if keyword == "include":
# include a raw file
file_ = os.path.abspath(value)
if os.path.isfile(file_):
with open(file_, "r") as f_:
# using MDiocre to render the include, calling this
# in itself. epic
m = MDiocre(parser_name='html')
return m.render(f_.read(), self)
return ''
elif keyword == "using":
file_ = os.path.abspath(value)
# TODO: -----YIKES------------------------
if os.path.isfile(file_):
with open(file_, "r") as f_:
exec(f_.read(), globals())
# ----------------------------------------
return ''
else:
raise SyntaxError(f'Supported keywords: include; using.')
def assign(self, query):
'''
Assigns a variable to a value.
The variable name has almost no limitations (especially not
limitations usually posed by a programming language), but it is
terminated by the `=` symbol.
The value can be one of the following:
* **String** : if the value has quotes (single or double) around it.
Example query: ``My Variable = "Toast"``
* **Concatenation** : if two or more variable names are specified, with a comma separating each.
Example query: ``My Variable = Var 1, Var 2``
* **Value assignment** : if the value is a variable name. This is assumed to be the default. Will assign
to an empty string if the variable is not found.
Example query: ``My Variable = Something else``
* **Function calls** : if a function is defined using the ``using``
keyword, it may be used for dynamic data conversion and processing. Surrounded by parentheses,
the word directly after it is the function name, followed by its arguments, surrounded by spaces.
Like regular Python, strings need to be in quotes. Arguments may be names of variables that are
already defined up to that point, they will be automatically substituted.
Example query: ``RSSDate = (toRFC822 PubDate)``
..warning::
The function call feature executes raw Python code, so it may pose a
security risk! Use with caution, and double-check your source
files!
Args:
query (string): Expects a string in the form of ``variable = value``.
Returns:
None (or SyntaxError). If the variable is successfully assigned, its
value will be added to the object's ``variables`` dictionary.
'''
# type checking
declare(query, str)
if not (re.match(RE_ASSIGNMENT, query)):
# if string matches " Something : Something else " do that instead
return self.parse_keyword(query)
# query expected to be "variable = value"
ident, value = query.split('=', 1)
ident = ident.strip()
value = value.strip()
# check valid identifier
if ident in self.reserved_variable_names:
raise SyntaxError(f'assignment <{query}>: variable name "{ident}" cannot be used!')
concat_tokens = re.findall(RE_CONCAT, value)
concat_tokens = list(map(lambda tok: tok[0] or tok[1], concat_tokens))
concat_tokens = [x.strip() for x in concat_tokens]
# start with a blank value
value = ''
for var in concat_tokens:
# append each token according to the order
# they appear
if var[0] == '"' or var[0] == "'":
# token is a string
value += remove_inner_outer_quotes(var)
# render all escaped characters
def escape(match):
return match.groups()[1]
value = re.sub(RE_ESCAPE, escape, value)
elif var[0] == "(":
# token is a function call
if var[-1] != ")":
raise SyntaxError(f'Unmatched ( in assignment of {ident}')
fn_tokens = [x.strip() for x in var[1:-1].split(" ")]
for i in range(1, len(fn_tokens)):
# transform arguments into variable contents
if fn_tokens[i] in self.variables:
escaped_var = self.variables[fn_tokens[i]].replace('"','\\"').replace("'","\\'")
fn_tokens[i] = f"'{escaped_var}'"
# TODO: -----YIKES------------------------
loc = {}
exec(f"__retval = {fn_tokens[0]}({','.join(fn_tokens[1:])})", globals(), loc)
value += (loc['__retval'])
# ----------------------------------------
else:
try:
value += self.variables[var]
except KeyError:
value += ''
self.variables[ident] = value
return ''