@@ -88,14 +88,16 @@ class HTMLSerializer(object):
88
88
# miscellaneous options
89
89
emit_doctype = 'preserve'
90
90
inject_meta_charset = True
91
+ lang_attr = 'preserve'
91
92
strip_whitespace = False
92
93
sanitize = False
93
94
94
95
options = ("quote_attr_values" , "quote_char" , "use_best_quote_char" ,
95
96
"minimize_boolean_attributes" , "use_trailing_solidus" ,
96
97
"space_before_trailing_solidus" , "omit_optional_tags" ,
97
98
"strip_whitespace" , "inject_meta_charset" , "escape_lt_in_attrs" ,
98
- "escape_rcdata" , "resolve_entities" , "emit_doctype" , "sanitize" )
99
+ "escape_rcdata" , "resolve_entities" , "emit_doctype" , "lang_attr" ,
100
+ "sanitize" )
99
101
100
102
def __init__ (self , ** kwargs ):
101
103
"""Initialize HTMLSerializer.
@@ -114,6 +116,11 @@ def __init__(self, **kwargs):
114
116
* emit_doctype='preserve' preserves the doctype, if any, unchanged
115
117
inject_meta_charset=True|False
116
118
..?
119
+ lang_attr='preserve'|'xml'|'html'
120
+ Whether to translate 'lang' attributes.
121
+ * lang_attr='preserve' does no translation
122
+ * lang_attr='xml' translates 'lang' to 'xml:lang'
123
+ * lang_attr='html' translates 'xml:lang' to 'lang'
117
124
quote_attr_values=True|False
118
125
Whether to quote attribute values that don't require quoting
119
126
per HTML5 parsing rules.
@@ -288,6 +295,18 @@ def serialize(self, treewalker, encoding=None):
288
295
attrs = attrs .items ()
289
296
attributes = []
290
297
for k ,v in attrs :
298
+
299
+ # clean up xml:lang
300
+ if k == '{http://www.w3.org/XML/1998/namespace}lang' :
301
+ k = 'xml:lang'
302
+ if self .lang_attr == 'xml' :
303
+ if k == 'lang' and not ('xml:lang' in attrs or
304
+ '{http://www.w3.org/XML/1998/namespace}lang' in attrs ):
305
+ k = 'xml:lang'
306
+ elif self .lang_attr == 'html' :
307
+ if k == 'xml:lang' and not ('lang' in attrs ):
308
+ k = 'lang'
309
+
291
310
if encoding :
292
311
k = k .encode (encoding , "strict" )
293
312
attributes .append (' ' )
0 commit comments