-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathnpybabel.py
executable file
·140 lines (125 loc) · 6.29 KB
/
npybabel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#!/usr/bin/env python
# EASY-INSTALL-ENTRY-SCRIPT: 'Babel==0.9.6','console_scripts','pybabel'
__requires__ = 'Babel==0.9.6'
import sys
from pkg_resources import load_entry_point
import re
import json
from lxml import etree as elt
from babel.messages import extract
if __name__ == '__main__':
sys.exit(
load_entry_point('Babel==0.9.6', 'console_scripts', 'pybabel')()
)
XMLJS_EXPR = re.compile(r"""(?:\_t *\( *((?:"(?:[^"\\]|\\.)*")|(?:'(?:[^'\\]|\\.)*')) *\))""")
TRANSLATION_FLAG_COMMENT = "openerp-web"
# List of etree._Element subclasses that we choose to ignore when parsing XML.
# We include the *Base ones just in case, currently they seem to be subclasses of the _* ones.
SKIPPED_ELEMENT_TYPES = (elt._Comment, elt._ProcessingInstruction, elt.CommentBase, elt.PIBase)
def extract_xmljs(fileobj, keywords, comment_tags, options):
"""Extract messages from Javascript code embedded into XML documents.
This complements the ``extract_javascript`` extractor which works
only on pure .js files, and the``extract_qweb`` extractor, which only
extracts XML text.
:param fileobj: the file-like object the messages should be extracted
from
:param keywords: a list of keywords (i.e. function names) that should
be recognized as translation functions
:param comment_tags: a list of translator tags to search for and
include in the results
:param options: a dictionary of additional options (optional)
:return: an iterator over ``(lineno, funcname, message, comments)``
tuples
:rtype: ``iterator``
"""
assert False, """ the XMLJS extractor does not work and was removed:
* Babel apparently does not accept two extractors for the same set of files
so it would not run the xmljs extractor at all, extraction of JS stuff
needs to be done from the XML extractor
* The regex above fails up if there are back-slashed quotes within the
translatable string (the string marked with _t), it just won't match the
string
* While extraction succeeds on XML entities (e.g. "), translation
matching will fail if those entities are kept in the PO msgid as the
XML parser will get an un-escaped string, without those entities (so a
text extractor will extract ``Found match "%s"``, but the msgid
of the PO file must be ``Found match "%s"`` or the translation will fail
* single-quoted strings are not valid JSON string, so single-quoted strings
matched by the regex (likely since XML attributes are double-quoted,
single quotes within them don't have to be escaped) will blow up when
json-parsed for their content
I think that's about it.
If this extractor is reimplemented, it should be integrated into
extract_qweb, either in the current pass (probably not a good idea) or as
a separate pass using iterparse, matching either elements with t-js or
some other kinds of t-* directives (@t-esc, @t-raw, @t-att, others?),
shove the attribute content into a StringIO and pass *that* to Babel's
own extract_javascript; then add a line offset in order to yield the
correct line number.
"""
content = fileobj.read()
found = XMLJS_EXPR.finditer(content)
index = 0
line_nbr = 0
for f in found:
msg = f.group(1)
msg = json.loads(msg)
while index < f.start():
if content[index] == "\n":
line_nbr += 1
index += 1
yield (line_nbr, None, msg, [TRANSLATION_FLAG_COMMENT])
def extract_qweb(fileobj, keywords, comment_tags, options):
"""Extract messages from qweb template files.
:param fileobj: the file-like object the messages should be extracted
from
:param keywords: a list of keywords (i.e. function names) that should
be recognized as translation functions
:param comment_tags: a list of translator tags to search for and
include in the results
:param options: a dictionary of additional options (optional)
:return: an iterator over ``(lineno, funcname, message, comments)``
tuples
:rtype: ``iterator``
"""
result = []
def handle_text(text, lineno):
text = (text or "").strip()
if len(text) > 1: # Avoid mono-char tokens like ':' ',' etc.
result.append((lineno, None, text, [TRANSLATION_FLAG_COMMENT]))
# not using elementTree.iterparse because we need to skip sub-trees in case
# the ancestor element had a reason to be skipped
def iter_elements(current_element):
for el in current_element:
if isinstance(el, SKIPPED_ELEMENT_TYPES): continue
if "t-js" not in el.attrib and \
not ("t-jquery" in el.attrib and "t-operation" not in el.attrib) and \
not ("t-translation" in el.attrib and el.attrib["t-translation"].strip() == "off"):
handle_text(el.text, el.sourceline)
for att in ('title', 'alt', 'label'):
if att in el.attrib:
handle_text(el.attrib[att], el.sourceline)
iter_elements(el)
handle_text(el.tail, el.sourceline)
tree = elt.parse(fileobj)
iter_elements(tree.getroot())
return result
def extract_javascript(fileobj, keywords, comment_tags, options):
"""Extract messages from Javascript source files. This extractor delegates
to babel's buit-in javascript extractor, but adds a special comment
used as a flag to identify web translations.
:param fileobj: the file-like object the messages should be extracted
from
:param keywords: a list of keywords (i.e. function names) that should
be recognized as translation functions
:param comment_tags: a list of translator tags to search for and
include in the results
:param options: a dictionary of additional options (optional)
:return: an iterator over ``(lineno, funcname, message, comments)``
tuples
:rtype: ``iterator``
"""
for (message_lineno, funcname, messages, comments) in \
extract.extract_javascript(fileobj, keywords, comment_tags, options):
comments.append(TRANSLATION_FLAG_COMMENT)
yield (message_lineno, funcname, messages, comments)