Skip to content

Commit

Permalink
Updates old HTML formatter
Browse files Browse the repository at this point in the history
  • Loading branch information
RevanProdigalKnight committed Apr 19, 2016
1 parent 9263ed2 commit 97dc01b
Showing 1 changed file with 65 additions and 43 deletions.
108 changes: 65 additions & 43 deletions codeformatter/lib/htmlbeautifier/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# For support with this formatter, tag @RevanProdigalKnight in the issue

from __future__ import print_function
import sys
import re
Expand All @@ -15,7 +17,6 @@ def __init__(self):
self.indent_char = ' '
self.indent_with_tabs = False
self.expand_tags = False
self.expand_javascript = False
self.minimum_attribute_count = 2
self.first_attribute_on_new_line = False
self.reduce_empty_tags = False
Expand All @@ -27,12 +28,11 @@ def __repr__(self):
indent_char = [%s]
indent_with_tabs = [%s]
expand_tags = [%s]
expand_javascript = [%s]
minimum_attribute_count = %d
first_attribute_on_new_line = [%s]
reduce_empty_tags = [%s]
exception_on_tag_mismatch = [%s]
custom_singletons = [%s]""" % (self.indent_size, self.indent_char, self.indent_with_tabs, self.expand_tags, self.expand_javascript, self.minimum_attribute_count, self.first_attribute_on_new_line, self.reduce_empty_tags, self.exception_on_tag_mismatch, self.custom_singletons)
custom_singletons = [%s]""" % (self.indent_size, self.indent_char, self.indent_with_tabs, self.expand_tags, self.minimum_attribute_count, self.first_attribute_on_new_line, self.reduce_empty_tags, self.exception_on_tag_mismatch, self.custom_singletons)

def default_options():
return BeautifierOptions()
Expand Down Expand Up @@ -62,7 +62,6 @@ def __init__(self, source_text, opts=default_options()):
self.opts = opts
self.exception_on_tag_mismatch = opts.exception_on_tag_mismatch
self.expand_tags = opts.expand_tags
self.expand_javascript = opts.expand_javascript
self.minimum_attribute_count = opts.minimum_attribute_count
self.first_attribute_on_new_line = opts.first_attribute_on_new_line
self.reduce_empty_tags = opts.reduce_empty_tags
Expand All @@ -75,14 +74,16 @@ def __init__(self, source_text, opts=default_options()):
self.tab_size = sublime.load_settings('Preferences.sublime-settings').get('tab_size',4)
self.indent_level = 0
# These are the tags that are currently defined as being void by the HTML5 spec, and should be self-closing (a.k.a. singletons)
self.singletons = r'<(area|base|br|col|command|embed|hr|img|input|keygen|link|meta|param|source|track|wbr<%= custom %>)([^>]*)>'
self.singletons = r'<(area|base|br|col|command|embed|hr|img|input|keygen|link|meta|param|source|track|wbr<%= custom %>)([^>]*?)/?>(?:\s*?</\1>)?'
if not opts.custom_singletons == '':
self.singletons = re.sub(r'<%= custom %>','|' + opts.custom_singletons,self.singletons)
else:
self.singletons = re.sub(r'<%= custom %>','',self.singletons)

def remove_newlines(self,str):
return re.sub(r'\n\s*',r'',str.group(1))
# Compile singletons regex since it's used so often (twice before the loop, then once per loop iteration)
self.singletons = re.compile(self.singletons,re.I)
self.removed_css = []
self.removed_js = []
self.removed_comments = []

def expand_tag(self,str):
_str = str.group(0) # cache the original string in a variable for faster access
Expand Down Expand Up @@ -115,74 +116,95 @@ def expand_tag(self,str):
tag += '\n' + (((self.indent_level * self.indent_size) + extra_tabs) * self.indent_char) + (indent * ' ') + l
return tag

def remove_newlines(self,ch=''): return lambda str: re.sub(r'\n\s*',ch,str.group(0))

def remove(self,pattern,replacement,findList,raw):
pattern = re.compile(r'(?<=\n)\s*?' + pattern,re.S|re.I)
findList.extend(pattern.findall(raw))
return pattern.sub((lambda match: match.group(0)[:-len(match.group(0).lstrip())] + replacement),raw) # Preserve the indentation from the beginning of the match

def remove_js(self,raw): return self.remove(r'<script[^>]*>.*?</script>','/* SCRIPT */',self.removed_js,raw)
def remove_css(self,raw): return self.remove(r'<style[^>]*>.*?</style>','/* STYLE */',self.removed_css,raw)
def remove_comments(self,raw): return self.remove(r'<!--.*?-->','/* COMMENT */',self.removed_comments,raw)

def reindent(self,raw,match):
prev_newline = r'(?<=\n)'
lowest_indent = -1
for l in re.split(r'\n',raw):
indent = len(l) - len(l.strip())
if lowest_indent == -1 or lowest_indent > indent:
lowest_indent = indent
indent = len(match.group(1)) * self.indent_char
return indent + re.sub(prev_newline,indent,re.sub(prev_newline + (lowest_indent * self.indent_char),'',raw.lstrip())); # Force new indentation

def getNextFrom(self,_list):
it = iter(_list)
return lambda match: self.reindent(it.next(),match)

def replace(self,pattern,replaceList,raw): return re.compile(r'(?<=\n)(\s*?)' + pattern,re.S|re.I).sub(self.getNextFrom(replaceList),raw)
def replace_comments(self,raw): return self.replace(r'/\* COMMENT \*/',self.removed_comments,raw)
def replace_css(self,raw): return self.replace(r'/\* STYLE \*/',self.removed_css,raw)
def replace_js(self,raw): return self.replace(r'/\* SCRIPT \*/',self.removed_js,raw)

def beautify(self):
beautiful = ''

replaceWithSpace = self.remove_newlines(' ')

raw = self.source_text

# Replace single-line javascript comments with block comments so that expansion of the elements inside doesn't
# become un-commented, ignoring commented CDATA tags
if self.expand_javascript:
raw = re.sub(r'(?<=\s)//(?!<!\[CDATA\[|\]\]>)(.+)\n',r'/*\1*/\n',raw)
raw = re.sub(r';+',r';',raw)
# Remove JS, CSS, and comments from raw source
raw = self.remove_js(raw)
raw = self.remove_css(raw)
raw = self.remove_comments(raw)

# Add newlines before/after tags (excluding CDATA). This separates single-line HTML comments into 3 lines as well
# Add newlines before/after tags (excluding CDATA)
raw = re.sub(r'(<[^! ]|(?<!/\*|//)\]\]>|(?<!<!\[endif\])-->)',r'\n\1',raw)
raw = re.sub(r'(>|(?<!/\*|//)<!\[CDATA\[|<!--(?!\[if .+?\]>))',r'\1\n',raw)

# Add newlines before=after javascript braces/switch cases/comments
if self.expand_javascript:
raw = re.sub(r'(\}|\*/)',r'\n\1',raw)
raw = re.sub(r'(\{|/\*|(?<!\();(?!\))|(?:case [^:]+|default):)',r'\1\n',raw)
raw = re.sub(r'\[(?!(?:if .+?\]>|endif\]-->))([^\]]+)\]',r'[\n\1\n]',raw)# Split javascript array entries onto new lines
raw = re.sub(r'(\[[^\[\]]{0,10}\])',self.remove_newlines,raw)# Fix javascript regex that was broken by the previous regex replace
raw = re.sub(r',(?!;$)([^:;\{]+:[^,])',r',\n\1',raw) # Split javascript object entries onto new lines
raw = re.sub(r'({[^\{}]{0,10}})',self.remove_newlines,raw)# Fix javascript regex that was broken by the previous regex replace
raw = re.sub(r'((for|while)\s+?(\([^\)]+\))\s+?\{)',self.remove_newlines,raw) # Put all the content of a loop def on the same line
raw = re.sub(r'\},\s*?\{',r'},\n{',raw)
# Fix CSS that will have been expanded by this option as well so that new CSS rulesets begin on their own line
raw = re.sub(r'\}(.*?)(\{|;)',r'}\n\1\2',raw)
# Fix AngularJS/Blade/etc brace ({{}}) templates that will have been broken into multiple lines
raw = re.sub(r'(\{{2,})(.*?)(\}{2,})',r'\1 \2 \3',re.sub(r'(\{(?:\s*\{)+[\s\S]*?\}(?:\s*\})+)',self.remove_newlines,raw))

raw = re.sub(r'("[^"]*")',self.remove_newlines,raw) # Put all content between double-quote marks back on the same line
raw = re.sub(self.singletons,r'<\1\2/>',raw) # Replace all singleton tags with /-delimited tags (XHTML style)
raw = raw.replace('//>','/>') # Fix the singleton tags if they were already /-delimited
# Fix AngularJS/Blade/etc brace ({{}}, {{::}}, etc) templates that will have been broken into multiple lines
raw = re.sub(r'(\{{2,}(?:::)?)\s?(.*?)\s?(\}{2,})',r'\1 \2 \3',re.sub(r'\{(?:\s*\{)+\s?[\s\S]*?\s?\}(?:\s*\})+',self.remove_newlines(),raw))

raw = re.sub(r'"[^"]*"',replaceWithSpace,raw) # Put all content between double-quote marks back on the same line

# Re-join start tags that are already on multiple lines (ignore end tags)
raw = re.compile(r'(?<=\n)<(?!/).*?>(?=\n)',re.S).sub(replaceWithSpace,raw)

raw = self.singletons.sub(r'<\1\2/>',raw) # Replace all singleton tags with /-delimited tags (XHTML style)
raw = self.singletons.sub(replaceWithSpace,raw)
raw = re.sub(r'(?<!\s)\s(?=/?>)','',raw)
raw = re.sub(r'\n{2,}',r'\n',raw) # Replace multiple newlines with just one

for l in re.split('\n',raw):
l = l.strip() # Trim whitespace from the line
if l == '' or l == ';': continue # If the line has no content (HTML or JavaScript), skip
if l == '': continue # If the line has no content, skip

# If the line starts with </, or an end CDATA/block comment tag, reduce indentation
if re.match(r'</|]]>|(?:<!\[endif\])?-->',l): self.indent_level -= 1
# If the line starts with }, a switch case, or the end of a block comment, reduce indentation
if self.expand_javascript and re.match(r'\}|\]|(?:case [^:]+|default):|\*/',l): self.indent_level -= 1

beautiful += (self.indent_char * self.indent_level * self.indent_size)
if self.expand_tags:
beautiful += re.sub(r'^<.*>$',self.expand_tag,l)
# beautiful += re.sub(r'(<[^/!][^>]+>)',self.expand_tag,l)
else:
beautiful += l
beautiful += '\n'

if re.search(self.singletons,l): pass # If the tag is a singleton, indentation stays the same
if self.singletons.search(l): pass # If the tag is a singleton, indentation stays the same
else:
# If the line starts with a begin CDATA/block comment tag or a tag, indent the next line
if re.match(r'<!--|<!\[CDATA\[|<[^/?! ]',l): self.indent_level += 1
# If the line starts with a block comment, switch case, or ends with {, indent the next line}
if self.expand_javascript:
if re.match(r'/\*|(?:case [^:]+|default):',l) or re.search(r'(?:\{|\[)$',l): self.indent_level += 1

# If the end of the document is not at the same indentation as the beginning, the tags aren't matched
if not self.indent_level == 0 and self.exception_on_tag_mismatch:
raise Exception("Mismatched tags")

# Put all matched start/end tags with no content between them on the same line and return
if self.reduce_empty_tags:
beautiful = re.sub(r'<([\w\-]+)([^>]*)>\s+</\1>',r'<\1\2></\1>',beautiful)
beautiful = re.sub(r'<(\S+)([^>]*)>\s+</\1>',r'<\1\2></\1>',beautiful)

# Put all single-line comments back on a single line - I separated them out earlier for simplicity's sake
beautiful = re.sub(r'<!--\n\s+(.+)\n\s+-->',r'<!-- \1 -->',beautiful)
# Replace JS, CSS, and comments in the opposite order of their removal
beautiful = self.replace_comments(beautiful)
beautiful = self.replace_css(beautiful)
beautiful = self.replace_js(beautiful)

return beautiful

0 comments on commit 97dc01b

Please sign in to comment.