Skip to content

Commit

Permalink
fixes ascii/unicode path encoding issue
Browse files Browse the repository at this point in the history
  • Loading branch information
espeed committed Nov 11, 2013
1 parent 9f09f1d commit 7f104cd
Showing 2 changed files with 29 additions and 23 deletions.
6 changes: 6 additions & 0 deletions bulbs/tests/element_tests.py
Original file line number Diff line number Diff line change
@@ -48,6 +48,12 @@ def test_delete_vertex(self):
j2 = self.vertices.get(james._id)
assert j2 == None

def test_ascii_encoding(self):
# http://stackoverflow.com/questions/19824952/unicodeencodeerror-bulbs-and-neo4j-create-model
data = {u'name': u'Aname M\xf6ller'}
vertex = self.vertices.create(data)
assert vertex.name == data['name']


class VertexTestCase(BulbsTestCase):

46 changes: 23 additions & 23 deletions bulbs/utils.py
Original file line number Diff line number Diff line change
@@ -7,6 +7,8 @@
import sys
import inspect
import logging
import numbers
import codecs

import time
import datetime
@@ -15,33 +17,19 @@



#
# Unicode - see Armin's http://lucumr.pocoo.org/2013/7/2/the-updated-guide-to-unicode/
#

def decoding_dance(s, charset='utf-8', errors='replace'):
return s.encode('latin1').decode(charset, errors)

def encoding_dance(s, charset='utf-8', errors='replace'):
if isinstance(s, bytes):
return s.decode('latin1', errors)
return s.encode(charset).decode('latin1', errors)


#
# Python 3
#

if sys.version < '3':
import codecs
# import ujson as json
from urllib import quote, quote_plus, urlencode
from urlparse import urlsplit, urlparse

def u(x):
#return codecs.unicode_escape_decode(x)[0]
return decoding_dance(x)

# def u(x):
# return codecs.unicode_escape_decode(x)[0]

else:
# ujson is faster but hasn't been ported to Python 3 yet
# import json
@@ -51,8 +39,18 @@ def u(x):
long = int
unicode = str

def u(x):
return x
# def u(x):
# return x

# NOTE: now using the same unicode func for both Python 2 and Python 3
# http://stackoverflow.com/questions/6625782/unicode-literals-that-work-in-python-3-and-2
# Unicode - see Armin's http://lucumr.pocoo.org/2013/7/2/the-updated-guide-to-unicode/
def u(x):
byte_string, length = codecs.unicode_escape_encode(x)
unicode_string, length = codecs.unicode_escape_decode(x)
return unicode_string



#
# Logging
@@ -146,12 +144,14 @@ def build_path(*args):
# don't include segment if it's None
# quote_plus doesn't work for neo4j index lookups;
# for example, this won't work: index/node/test_idxV/name/James+Thornton
segments = [quote(u(str(segment)), safe='') for segment in args if segment is not None]
segments = [quote(u(to_string(segment)), safe='') for segment in args if segment is not None]
path = "/".join(segments)
return path

#def quote_segment(segment):
# return segment if type(segment) == unicode else
def to_string(value):
# maybe convert a number to a string
return value if not isinstance(value, numbers.Number) else str(value)


#
# Time Utils

0 comments on commit 7f104cd

Please sign in to comment.