Skip to content

Commit 641b64c

Browse files
committed
Now tests work consistently in py2 and 3
It's a nice way of saying that there is still one failing, consistently.
1 parent bf942a9 commit 641b64c

File tree

9 files changed

+178
-110
lines changed

9 files changed

+178
-110
lines changed

gitdb/const.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
from gitdb.utils.encoding import force_bytes
2-
3-
NULL_BYTE = force_bytes("\0")
1+
BYTE_SPACE = b' '
2+
NULL_BYTE = b'\0'
43
NULL_HEX_SHA = "0" * 40
54
NULL_BIN_SHA = NULL_BYTE * 20

gitdb/db/base.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
hex_to_bin
1010
)
1111

12+
from gitdb.utils.encoding import force_text
1213
from gitdb.exc import (
1314
BadObject,
1415
AmbiguousObjectName
@@ -122,8 +123,6 @@ def db_path(self, rela_path):
122123
"""
123124
:return: the given relative path relative to our database root, allowing
124125
to pontentially access datafiles"""
125-
from gitdb.utils.encoding import force_text
126-
127126
return join(self._root_path, force_text(rela_path))
128127
#} END interface
129128

@@ -234,12 +233,12 @@ def update_cache(self, force=False):
234233

235234
def partial_to_complete_sha_hex(self, partial_hexsha):
236235
"""
237-
:return: 20 byte binary sha1 from the given less-than-40 byte hexsha
236+
:return: 20 byte binary sha1 from the given less-than-40 byte hexsha (bytes or str)
238237
:param partial_hexsha: hexsha with less than 40 byte
239238
:raise AmbiguousObjectName: """
240239
databases = list()
241240
_databases_recursive(self, databases)
242-
241+
partial_hexsha = force_text(partial_hexsha)
243242
len_partial_hexsha = len(partial_hexsha)
244243
if len_partial_hexsha % 2 != 0:
245244
partial_binsha = hex_to_bin(partial_hexsha + "0")

gitdb/db/loose.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ def readable_db_object_path(self, hexsha):
109109

110110
def partial_to_complete_sha_hex(self, partial_hexsha):
111111
""":return: 20 byte binary sha1 string which matches the given name uniquely
112-
:param name: hexadecimal partial name
112+
:param name: hexadecimal partial name (bytes or ascii string)
113113
:raise AmbiguousObjectName:
114114
:raise BadObject: """
115115
candidate = None

gitdb/fun.py

Lines changed: 154 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@
1414
from itertools import islice
1515
from functools import reduce
1616

17-
from gitdb.utils.compat import izip, buffer, xrange
17+
from gitdb.const import NULL_BYTE, BYTE_SPACE
18+
from gitdb.utils.encoding import force_text
19+
from gitdb.utils.compat import izip, buffer, xrange, PY3
1820
from gitdb.typ import (
1921
str_blob_type,
2022
str_commit_type,
@@ -30,12 +32,12 @@
3032
delta_types = (OFS_DELTA, REF_DELTA)
3133

3234
type_id_to_type_map = {
33-
0 : "", # EXT 1
35+
0 : b'', # EXT 1
3436
1 : str_commit_type,
3537
2 : str_tree_type,
3638
3 : str_blob_type,
3739
4 : str_tag_type,
38-
5 : "", # EXT 2
40+
5 : b'', # EXT 2
3941
OFS_DELTA : "OFS_DELTA", # OFFSET DELTA
4042
REF_DELTA : "REF_DELTA" # REFERENCE DELTA
4143
}
@@ -394,11 +396,9 @@ def loose_object_header_info(m):
394396
:return: tuple(type_string, uncompressed_size_in_bytes) the type string of the
395397
object as well as its uncompressed size in bytes.
396398
:param m: memory map from which to read the compressed object data"""
397-
from gitdb.const import NULL_BYTE
398-
399399
decompress_size = 8192 # is used in cgit as well
400400
hdr = decompressobj().decompress(m, decompress_size)
401-
type_name, size = hdr[:hdr.find(NULL_BYTE)].split(" ".encode("ascii"))
401+
type_name, size = hdr[:hdr.find(NULL_BYTE)].split(BYTE_SPACE)
402402

403403
return type_name, int(size)
404404

@@ -413,12 +413,21 @@ def pack_object_header_info(data):
413413
type_id = (c >> 4) & 7 # numeric type
414414
size = c & 15 # starting size
415415
s = 4 # starting bit-shift size
416-
while c & 0x80:
417-
c = byte_ord(data[i])
418-
i += 1
419-
size += (c & 0x7f) << s
420-
s += 7
421-
# END character loop
416+
if PY3:
417+
while c & 0x80:
418+
c = data[i]
419+
i += 1
420+
size += (c & 0x7f) << s
421+
s += 7
422+
# END character loop
423+
else:
424+
while c & 0x80:
425+
c = ord(data[i])
426+
i += 1
427+
size += (c & 0x7f) << s
428+
s += 7
429+
# END character loop
430+
# end performance at expense of maintenance ...
422431
return (type_id, size, i)
423432

424433
def create_pack_object_header(obj_type, obj_size):
@@ -429,16 +438,29 @@ def create_pack_object_header(obj_type, obj_size):
429438
:param obj_type: pack type_id of the object
430439
:param obj_size: uncompressed size in bytes of the following object stream"""
431440
c = 0 # 1 byte
432-
hdr = str() # output string
433-
434-
c = (obj_type << 4) | (obj_size & 0xf)
435-
obj_size >>= 4
436-
while obj_size:
437-
hdr += chr(c | 0x80)
438-
c = obj_size & 0x7f
439-
obj_size >>= 7
440-
#END until size is consumed
441-
hdr += chr(c)
441+
if PY3:
442+
hdr = bytearray() # output string
443+
444+
c = (obj_type << 4) | (obj_size & 0xf)
445+
obj_size >>= 4
446+
while obj_size:
447+
hdr.append(c | 0x80)
448+
c = obj_size & 0x7f
449+
obj_size >>= 7
450+
#END until size is consumed
451+
hdr.append(c)
452+
else:
453+
hdr = bytes() # output string
454+
455+
c = (obj_type << 4) | (obj_size & 0xf)
456+
obj_size >>= 4
457+
while obj_size:
458+
hdr += chr(c | 0x80)
459+
c = obj_size & 0x7f
460+
obj_size >>= 7
461+
#END until size is consumed
462+
hdr += chr(c)
463+
# end handle interpreter
442464
return hdr
443465

444466
def msb_size(data, offset=0):
@@ -449,24 +471,36 @@ def msb_size(data, offset=0):
449471
i = 0
450472
l = len(data)
451473
hit_msb = False
452-
while i < l:
453-
c = byte_ord(data[i+offset])
454-
size |= (c & 0x7f) << i*7
455-
i += 1
456-
if not c & 0x80:
457-
hit_msb = True
458-
break
459-
# END check msb bit
460-
# END while in range
474+
if PY3:
475+
while i < l:
476+
c = data[i+offset]
477+
size |= (c & 0x7f) << i*7
478+
i += 1
479+
if not c & 0x80:
480+
hit_msb = True
481+
break
482+
# END check msb bit
483+
# END while in range
484+
else:
485+
while i < l:
486+
c = ord(data[i+offset])
487+
size |= (c & 0x7f) << i*7
488+
i += 1
489+
if not c & 0x80:
490+
hit_msb = True
491+
break
492+
# END check msb bit
493+
# END while in range
494+
# end performance ...
461495
if not hit_msb:
462496
raise AssertionError("Could not find terminating MSB byte in data stream")
463497
return i+offset, size
464498

465499
def loose_object_header(type, size):
466500
"""
467-
:return: string representing the loose object header, which is immediately
501+
:return: bytes representing the loose object header, which is immediately
468502
followed by the content stream of size 'size'"""
469-
return "%s %i\0" % (type, size)
503+
return ('%s %i\0' % (force_text(type), size)).encode('ascii')
470504

471505
def write_object(type, size, read, write, chunk_size=chunk_size):
472506
"""
@@ -611,48 +645,93 @@ def apply_delta_data(src_buf, src_buf_size, delta_buf, delta_buf_size, write):
611645
**Note:** transcribed to python from the similar routine in patch-delta.c"""
612646
i = 0
613647
db = delta_buf
614-
while i < delta_buf_size:
615-
c = ord(db[i])
616-
i += 1
617-
if c & 0x80:
618-
cp_off, cp_size = 0, 0
619-
if (c & 0x01):
620-
cp_off = ord(db[i])
621-
i += 1
622-
if (c & 0x02):
623-
cp_off |= (ord(db[i]) << 8)
624-
i += 1
625-
if (c & 0x04):
626-
cp_off |= (ord(db[i]) << 16)
627-
i += 1
628-
if (c & 0x08):
629-
cp_off |= (ord(db[i]) << 24)
630-
i += 1
631-
if (c & 0x10):
632-
cp_size = ord(db[i])
633-
i += 1
634-
if (c & 0x20):
635-
cp_size |= (ord(db[i]) << 8)
636-
i += 1
637-
if (c & 0x40):
638-
cp_size |= (ord(db[i]) << 16)
639-
i += 1
640-
641-
if not cp_size:
642-
cp_size = 0x10000
643-
644-
rbound = cp_off + cp_size
645-
if (rbound < cp_size or
646-
rbound > src_buf_size):
647-
break
648-
write(buffer(src_buf, cp_off, cp_size))
649-
elif c:
650-
write(db[i:i+c])
651-
i += c
652-
else:
653-
raise ValueError("unexpected delta opcode 0")
654-
# END handle command byte
655-
# END while processing delta data
648+
if PY3:
649+
while i < delta_buf_size:
650+
c = db[i]
651+
i += 1
652+
if c & 0x80:
653+
cp_off, cp_size = 0, 0
654+
if (c & 0x01):
655+
cp_off = db[i]
656+
i += 1
657+
if (c & 0x02):
658+
cp_off |= (db[i] << 8)
659+
i += 1
660+
if (c & 0x04):
661+
cp_off |= (db[i] << 16)
662+
i += 1
663+
if (c & 0x08):
664+
cp_off |= (db[i] << 24)
665+
i += 1
666+
if (c & 0x10):
667+
cp_size = db[i]
668+
i += 1
669+
if (c & 0x20):
670+
cp_size |= (db[i] << 8)
671+
i += 1
672+
if (c & 0x40):
673+
cp_size |= (db[i] << 16)
674+
i += 1
675+
676+
if not cp_size:
677+
cp_size = 0x10000
678+
679+
rbound = cp_off + cp_size
680+
if (rbound < cp_size or
681+
rbound > src_buf_size):
682+
break
683+
write(buffer(src_buf, cp_off, cp_size))
684+
elif c:
685+
write(db[i:i+c])
686+
i += c
687+
else:
688+
raise ValueError("unexpected delta opcode 0")
689+
# END handle command byte
690+
# END while processing delta data
691+
else:
692+
while i < delta_buf_size:
693+
c = ord(db[i])
694+
i += 1
695+
if c & 0x80:
696+
cp_off, cp_size = 0, 0
697+
if (c & 0x01):
698+
cp_off = ord(db[i])
699+
i += 1
700+
if (c & 0x02):
701+
cp_off |= (ord(db[i]) << 8)
702+
i += 1
703+
if (c & 0x04):
704+
cp_off |= (ord(db[i]) << 16)
705+
i += 1
706+
if (c & 0x08):
707+
cp_off |= (ord(db[i]) << 24)
708+
i += 1
709+
if (c & 0x10):
710+
cp_size = ord(db[i])
711+
i += 1
712+
if (c & 0x20):
713+
cp_size |= (ord(db[i]) << 8)
714+
i += 1
715+
if (c & 0x40):
716+
cp_size |= (ord(db[i]) << 16)
717+
i += 1
718+
719+
if not cp_size:
720+
cp_size = 0x10000
721+
722+
rbound = cp_off + cp_size
723+
if (rbound < cp_size or
724+
rbound > src_buf_size):
725+
break
726+
write(buffer(src_buf, cp_off, cp_size))
727+
elif c:
728+
write(db[i:i+c])
729+
i += c
730+
else:
731+
raise ValueError("unexpected delta opcode 0")
732+
# END handle command byte
733+
# END while processing delta data
734+
# end save byte_ord call and prevent performance regression in py2
656735

657736
# yes, lets use the exact same error message that git uses :)
658737
assert i == delta_buf_size, "delta replay has gone wild"

0 commit comments

Comments
 (0)