From 97dc01b92e75aa6eeb548dc03bd694d65c8b50b0 Mon Sep 17 00:00:00 2001 From: RevanProdigalKnight Date: Tue, 19 Apr 2016 09:47:54 -0400 Subject: [PATCH] Updates old HTML formatter --- codeformatter/lib/htmlbeautifier/__init__.py | 108 +++++++++++-------- 1 file changed, 65 insertions(+), 43 deletions(-) diff --git a/codeformatter/lib/htmlbeautifier/__init__.py b/codeformatter/lib/htmlbeautifier/__init__.py index 96b637b..60c876a 100644 --- a/codeformatter/lib/htmlbeautifier/__init__.py +++ b/codeformatter/lib/htmlbeautifier/__init__.py @@ -1,3 +1,5 @@ +# For support with this formatter, tag @RevanProdigalKnight in the issue + from __future__ import print_function import sys import re @@ -15,7 +17,6 @@ def __init__(self): self.indent_char = ' ' self.indent_with_tabs = False self.expand_tags = False - self.expand_javascript = False self.minimum_attribute_count = 2 self.first_attribute_on_new_line = False self.reduce_empty_tags = False @@ -27,12 +28,11 @@ def __repr__(self): indent_char = [%s] indent_with_tabs = [%s] expand_tags = [%s] -expand_javascript = [%s] minimum_attribute_count = %d first_attribute_on_new_line = [%s] reduce_empty_tags = [%s] exception_on_tag_mismatch = [%s] -custom_singletons = [%s]""" % (self.indent_size, self.indent_char, self.indent_with_tabs, self.expand_tags, self.expand_javascript, self.minimum_attribute_count, self.first_attribute_on_new_line, self.reduce_empty_tags, self.exception_on_tag_mismatch, self.custom_singletons) +custom_singletons = [%s]""" % (self.indent_size, self.indent_char, self.indent_with_tabs, self.expand_tags, self.minimum_attribute_count, self.first_attribute_on_new_line, self.reduce_empty_tags, self.exception_on_tag_mismatch, self.custom_singletons) def default_options(): return BeautifierOptions() @@ -62,7 +62,6 @@ def __init__(self, source_text, opts=default_options()): self.opts = opts self.exception_on_tag_mismatch = opts.exception_on_tag_mismatch self.expand_tags = opts.expand_tags - self.expand_javascript = opts.expand_javascript self.minimum_attribute_count = opts.minimum_attribute_count self.first_attribute_on_new_line = opts.first_attribute_on_new_line self.reduce_empty_tags = opts.reduce_empty_tags @@ -75,14 +74,16 @@ def __init__(self, source_text, opts=default_options()): self.tab_size = sublime.load_settings('Preferences.sublime-settings').get('tab_size',4) self.indent_level = 0 # These are the tags that are currently defined as being void by the HTML5 spec, and should be self-closing (a.k.a. singletons) - self.singletons = r'<(area|base|br|col|command|embed|hr|img|input|keygen|link|meta|param|source|track|wbr<%= custom %>)([^>]*)>' + self.singletons = r'<(area|base|br|col|command|embed|hr|img|input|keygen|link|meta|param|source|track|wbr<%= custom %>)([^>]*?)/?>(?:\s*?)?' if not opts.custom_singletons == '': self.singletons = re.sub(r'<%= custom %>','|' + opts.custom_singletons,self.singletons) else: self.singletons = re.sub(r'<%= custom %>','',self.singletons) - - def remove_newlines(self,str): - return re.sub(r'\n\s*',r'',str.group(1)) + # Compile singletons regex since it's used so often (twice before the loop, then once per loop iteration) + self.singletons = re.compile(self.singletons,re.I) + self.removed_css = [] + self.removed_js = [] + self.removed_comments = [] def expand_tag(self,str): _str = str.group(0) # cache the original string in a variable for faster access @@ -115,64 +116,83 @@ def expand_tag(self,str): tag += '\n' + (((self.indent_level * self.indent_size) + extra_tabs) * self.indent_char) + (indent * ' ') + l return tag + def remove_newlines(self,ch=''): return lambda str: re.sub(r'\n\s*',ch,str.group(0)) + + def remove(self,pattern,replacement,findList,raw): + pattern = re.compile(r'(?<=\n)\s*?' + pattern,re.S|re.I) + findList.extend(pattern.findall(raw)) + return pattern.sub((lambda match: match.group(0)[:-len(match.group(0).lstrip())] + replacement),raw) # Preserve the indentation from the beginning of the match + + def remove_js(self,raw): return self.remove(r']*>.*?','/* SCRIPT */',self.removed_js,raw) + def remove_css(self,raw): return self.remove(r']*>.*?','/* STYLE */',self.removed_css,raw) + def remove_comments(self,raw): return self.remove(r'','/* COMMENT */',self.removed_comments,raw) + + def reindent(self,raw,match): + prev_newline = r'(?<=\n)' + lowest_indent = -1 + for l in re.split(r'\n',raw): + indent = len(l) - len(l.strip()) + if lowest_indent == -1 or lowest_indent > indent: + lowest_indent = indent + indent = len(match.group(1)) * self.indent_char + return indent + re.sub(prev_newline,indent,re.sub(prev_newline + (lowest_indent * self.indent_char),'',raw.lstrip())); # Force new indentation + + def getNextFrom(self,_list): + it = iter(_list) + return lambda match: self.reindent(it.next(),match) + + def replace(self,pattern,replaceList,raw): return re.compile(r'(?<=\n)(\s*?)' + pattern,re.S|re.I).sub(self.getNextFrom(replaceList),raw) + def replace_comments(self,raw): return self.replace(r'/\* COMMENT \*/',self.removed_comments,raw) + def replace_css(self,raw): return self.replace(r'/\* STYLE \*/',self.removed_css,raw) + def replace_js(self,raw): return self.replace(r'/\* SCRIPT \*/',self.removed_js,raw) + def beautify(self): beautiful = '' + + replaceWithSpace = self.remove_newlines(' ') + raw = self.source_text - # Replace single-line javascript comments with block comments so that expansion of the elements inside doesn't - # become un-commented, ignoring commented CDATA tags - if self.expand_javascript: - raw = re.sub(r'(?<=\s)//(?!)(.+)\n',r'/*\1*/\n',raw) - raw = re.sub(r';+',r';',raw) + # Remove JS, CSS, and comments from raw source + raw = self.remove_js(raw) + raw = self.remove_css(raw) + raw = self.remove_comments(raw) - # Add newlines before/after tags (excluding CDATA). This separates single-line HTML comments into 3 lines as well + # Add newlines before/after tags (excluding CDATA) raw = re.sub(r'(<[^! ]|(?|(?)',r'\n\1',raw) raw = re.sub(r'(>|(?))',r'\1\n',raw) - # Add newlines before=after javascript braces/switch cases/comments - if self.expand_javascript: - raw = re.sub(r'(\}|\*/)',r'\n\1',raw) - raw = re.sub(r'(\{|/\*|(?|endif\]-->))([^\]]+)\]',r'[\n\1\n]',raw)# Split javascript array entries onto new lines - raw = re.sub(r'(\[[^\[\]]{0,10}\])',self.remove_newlines,raw)# Fix javascript regex that was broken by the previous regex replace - raw = re.sub(r',(?!;$)([^:;\{]+:[^,])',r',\n\1',raw) # Split javascript object entries onto new lines - raw = re.sub(r'({[^\{}]{0,10}})',self.remove_newlines,raw)# Fix javascript regex that was broken by the previous regex replace - raw = re.sub(r'((for|while)\s+?(\([^\)]+\))\s+?\{)',self.remove_newlines,raw) # Put all the content of a loop def on the same line - raw = re.sub(r'\},\s*?\{',r'},\n{',raw) - # Fix CSS that will have been expanded by this option as well so that new CSS rulesets begin on their own line - raw = re.sub(r'\}(.*?)(\{|;)',r'}\n\1\2',raw) - # Fix AngularJS/Blade/etc brace ({{}}) templates that will have been broken into multiple lines - raw = re.sub(r'(\{{2,})(.*?)(\}{2,})',r'\1 \2 \3',re.sub(r'(\{(?:\s*\{)+[\s\S]*?\}(?:\s*\})+)',self.remove_newlines,raw)) - - raw = re.sub(r'("[^"]*")',self.remove_newlines,raw) # Put all content between double-quote marks back on the same line - raw = re.sub(self.singletons,r'<\1\2/>',raw) # Replace all singleton tags with /-delimited tags (XHTML style) - raw = raw.replace('//>','/>') # Fix the singleton tags if they were already /-delimited + # Fix AngularJS/Blade/etc brace ({{}}, {{::}}, etc) templates that will have been broken into multiple lines + raw = re.sub(r'(\{{2,}(?:::)?)\s?(.*?)\s?(\}{2,})',r'\1 \2 \3',re.sub(r'\{(?:\s*\{)+\s?[\s\S]*?\s?\}(?:\s*\})+',self.remove_newlines(),raw)) + + raw = re.sub(r'"[^"]*"',replaceWithSpace,raw) # Put all content between double-quote marks back on the same line + + # Re-join start tags that are already on multiple lines (ignore end tags) + raw = re.compile(r'(?<=\n)<(?!/).*?>(?=\n)',re.S).sub(replaceWithSpace,raw) + + raw = self.singletons.sub(r'<\1\2/>',raw) # Replace all singleton tags with /-delimited tags (XHTML style) + raw = self.singletons.sub(replaceWithSpace,raw) + raw = re.sub(r'(?)','',raw) raw = re.sub(r'\n{2,}',r'\n',raw) # Replace multiple newlines with just one for l in re.split('\n',raw): l = l.strip() # Trim whitespace from the line - if l == '' or l == ';': continue # If the line has no content (HTML or JavaScript), skip + if l == '': continue # If the line has no content, skip # If the line starts with |(?:',l): self.indent_level -= 1 - # If the line starts with }, a switch case, or the end of a block comment, reduce indentation - if self.expand_javascript and re.match(r'\}|\]|(?:case [^:]+|default):|\*/',l): self.indent_level -= 1 beautiful += (self.indent_char * self.indent_level * self.indent_size) if self.expand_tags: beautiful += re.sub(r'^<.*>$',self.expand_tag,l) - # beautiful += re.sub(r'(<[^/!][^>]+>)',self.expand_tag,l) else: beautiful += l beautiful += '\n' - if re.search(self.singletons,l): pass # If the tag is a singleton, indentation stays the same + if self.singletons.search(l): pass # If the tag is a singleton, indentation stays the same else: # If the line starts with a begin CDATA/block comment tag or a tag, indent the next line if re.match(r'',r'',beautiful) + # Replace JS, CSS, and comments in the opposite order of their removal + beautiful = self.replace_comments(beautiful) + beautiful = self.replace_css(beautiful) + beautiful = self.replace_js(beautiful) return beautiful