Skip to content

Commit

Permalink
Try to deal with non latin encoding
Browse files Browse the repository at this point in the history
  • Loading branch information
buglloc committed Sep 28, 2018
1 parent 84f79a3 commit c0dd214
Show file tree
Hide file tree
Showing 9 changed files with 107 additions and 10 deletions.
17 changes: 17 additions & 0 deletions gixy/formatters/_jinja.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from __future__ import absolute_import
from jinja2 import Environment, PackageLoader

from gixy.utils.text import to_text


def load_template(name):
env = Environment(loader=PackageLoader('gixy', 'formatters/templates'), trim_blocks=True, lstrip_blocks=True)
env.filters['to_text'] = to_text_filter
return env.get_template(name)


def to_text_filter(text):
try:
return text.encode('latin1').decode('utf-8')
except UnicodeEncodeError:
return to_text(text)
5 changes: 2 additions & 3 deletions gixy/formatters/console.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
from __future__ import absolute_import
from jinja2 import Environment, PackageLoader

from gixy.formatters.base import BaseFormatter
from gixy.formatters._jinja import load_template


class ConsoleFormatter(BaseFormatter):
def __init__(self):
super(ConsoleFormatter, self).__init__()
env = Environment(loader=PackageLoader('gixy', 'formatters/templates'), trim_blocks=True, lstrip_blocks=True)
self.template = env.get_template('console.j2')
self.template = load_template('console.j2')

def format_reports(self, reports, stats):
return self.template.render(reports=reports, stats=stats)
2 changes: 1 addition & 1 deletion gixy/formatters/templates/console.j2
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ Additional info: {{ issue.help_url }}
Reason: {{ issue.reason }}
{% endif %}
Pseudo config:
{{ issue.config }}
{{ issue.config | to_text }}

{% if not loop.last %}
------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion gixy/formatters/templates/text.j2
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ Additional info: {{ issue.help_url }}
Reason: {{ issue.reason }}
{% endif %}
Pseudo config:
{{ issue.config }}
{{ issue.config | to_text }}

{% if not loop.last %}
------------------------------------------------
Expand Down
5 changes: 2 additions & 3 deletions gixy/formatters/text.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
from __future__ import absolute_import
from jinja2 import Environment, PackageLoader

from gixy.formatters.base import BaseFormatter
from gixy.formatters._jinja import load_template


class TextFormatter(BaseFormatter):
def __init__(self):
super(TextFormatter, self).__init__()
env = Environment(loader=PackageLoader('gixy', 'formatters/templates'), trim_blocks=True, lstrip_blocks=True)
self.template = env.get_template('text.j2')
self.template = load_template('text.j2')

def format_reports(self, reports, stats):
return self.template.render(reports=reports, stats=stats)
5 changes: 3 additions & 2 deletions gixy/parser/nginx_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from gixy.core.exceptions import InvalidConfiguration
from gixy.parser import raw_parser
from gixy.directives import block, directive
from gixy.utils.text import to_native

LOG = logging.getLogger(__name__)

Expand Down Expand Up @@ -69,14 +70,14 @@ def directive_factory(self, parsed_type, parsed_name, parsed_args):
return None

if klass.is_block:
args = [str(v).strip() for v in parsed_args[0]]
args = [to_native(v).strip() for v in parsed_args[0]]
children = parsed_args[1]

inst = klass(parsed_name, args)
self.parse_block(children, inst)
return inst
else:
args = [str(v).strip() for v in parsed_args]
args = [to_native(v).strip() for v in parsed_args]
return klass(parsed_name, args)

def _get_directive_class(self, parsed_type, parsed_name):
Expand Down
Empty file added gixy/utils/__init__.py
Empty file.
72 changes: 72 additions & 0 deletions gixy/utils/text.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
from __future__ import absolute_import
from six import PY3, text_type, binary_type


def to_bytes(obj, encoding='latin1', errors='strict', nonstring='replace'):
if isinstance(obj, binary_type):
return obj

if isinstance(obj, text_type):
try:
# Try this first as it's the fastest
return obj.encode(encoding, errors)
except UnicodeEncodeError:
return b'failed_to_encode'

if nonstring == 'simplerepr':
try:

value = str(obj)
except UnicodeError:
try:
value = repr(obj)
except UnicodeError:
# Giving up
return b'failed_to_encode'
elif nonstring == 'passthru':
return obj
elif nonstring == 'replace':
return b'failed_to_encode'
elif nonstring == 'strict':
raise TypeError('obj must be a string type')
else:
raise TypeError('Invalid value %s for to_bytes\' nonstring parameter' % nonstring)

return to_bytes(value, encoding, errors)


def to_text(obj, encoding='latin1', errors='strict', nonstring='replace'):
if isinstance(obj, text_type):
return obj

if isinstance(obj, binary_type):
try:
return obj.decode(encoding, errors)
except UnicodeEncodeError:
return u'failed_to_encode'

if nonstring == 'simplerepr':
try:
value = str(obj)
except UnicodeError:
try:
value = repr(obj)
except UnicodeError:
# Giving up
return u'failed_to_encode'
elif nonstring == 'passthru':
return obj
elif nonstring == 'replace':
return u'failed_to_encode'
elif nonstring == 'strict':
raise TypeError('obj must be a string type')
else:
raise TypeError('Invalid value %s for to_text\'s nonstring parameter' % nonstring)

return to_text(value, encoding, errors)


if PY3:
to_native = to_text
else:
to_native = to_bytes
9 changes: 9 additions & 0 deletions tests/parser/test_nginx_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,15 @@ def test_dump_simple():
assert_equal(listen.args, ['80'])


def test_encoding():
configs = [
'bar "\xD1\x82\xD0\xB5\xD1\x81\xD1\x82";'
]

for i, config in enumerate(configs):
_parse(config)


def assert_config(config, expected):
tree = _parse(config)
assert_is_instance(tree, Directive)
Expand Down

0 comments on commit c0dd214

Please sign in to comment.