14
14
from itertools import islice
15
15
from functools import reduce
16
16
17
- from gitdb .utils .compat import izip , buffer , xrange
17
+ from gitdb .const import NULL_BYTE , BYTE_SPACE
18
+ from gitdb .utils .encoding import force_text
19
+ from gitdb .utils .compat import izip , buffer , xrange , PY3
18
20
from gitdb .typ import (
19
21
str_blob_type ,
20
22
str_commit_type ,
30
32
delta_types = (OFS_DELTA , REF_DELTA )
31
33
32
34
type_id_to_type_map = {
33
- 0 : "" , # EXT 1
35
+ 0 : b'' , # EXT 1
34
36
1 : str_commit_type ,
35
37
2 : str_tree_type ,
36
38
3 : str_blob_type ,
37
39
4 : str_tag_type ,
38
- 5 : "" , # EXT 2
40
+ 5 : b'' , # EXT 2
39
41
OFS_DELTA : "OFS_DELTA" , # OFFSET DELTA
40
42
REF_DELTA : "REF_DELTA" # REFERENCE DELTA
41
43
}
@@ -394,11 +396,9 @@ def loose_object_header_info(m):
394
396
:return: tuple(type_string, uncompressed_size_in_bytes) the type string of the
395
397
object as well as its uncompressed size in bytes.
396
398
:param m: memory map from which to read the compressed object data"""
397
- from gitdb .const import NULL_BYTE
398
-
399
399
decompress_size = 8192 # is used in cgit as well
400
400
hdr = decompressobj ().decompress (m , decompress_size )
401
- type_name , size = hdr [:hdr .find (NULL_BYTE )].split (" " . encode ( "ascii" ) )
401
+ type_name , size = hdr [:hdr .find (NULL_BYTE )].split (BYTE_SPACE )
402
402
403
403
return type_name , int (size )
404
404
@@ -413,12 +413,21 @@ def pack_object_header_info(data):
413
413
type_id = (c >> 4 ) & 7 # numeric type
414
414
size = c & 15 # starting size
415
415
s = 4 # starting bit-shift size
416
- while c & 0x80 :
417
- c = byte_ord (data [i ])
418
- i += 1
419
- size += (c & 0x7f ) << s
420
- s += 7
421
- # END character loop
416
+ if PY3 :
417
+ while c & 0x80 :
418
+ c = data [i ]
419
+ i += 1
420
+ size += (c & 0x7f ) << s
421
+ s += 7
422
+ # END character loop
423
+ else :
424
+ while c & 0x80 :
425
+ c = ord (data [i ])
426
+ i += 1
427
+ size += (c & 0x7f ) << s
428
+ s += 7
429
+ # END character loop
430
+ # end performance at expense of maintenance ...
422
431
return (type_id , size , i )
423
432
424
433
def create_pack_object_header (obj_type , obj_size ):
@@ -429,16 +438,29 @@ def create_pack_object_header(obj_type, obj_size):
429
438
:param obj_type: pack type_id of the object
430
439
:param obj_size: uncompressed size in bytes of the following object stream"""
431
440
c = 0 # 1 byte
432
- hdr = str () # output string
433
-
434
- c = (obj_type << 4 ) | (obj_size & 0xf )
435
- obj_size >>= 4
436
- while obj_size :
437
- hdr += chr (c | 0x80 )
438
- c = obj_size & 0x7f
439
- obj_size >>= 7
440
- #END until size is consumed
441
- hdr += chr (c )
441
+ if PY3 :
442
+ hdr = bytearray () # output string
443
+
444
+ c = (obj_type << 4 ) | (obj_size & 0xf )
445
+ obj_size >>= 4
446
+ while obj_size :
447
+ hdr .append (c | 0x80 )
448
+ c = obj_size & 0x7f
449
+ obj_size >>= 7
450
+ #END until size is consumed
451
+ hdr .append (c )
452
+ else :
453
+ hdr = bytes () # output string
454
+
455
+ c = (obj_type << 4 ) | (obj_size & 0xf )
456
+ obj_size >>= 4
457
+ while obj_size :
458
+ hdr += chr (c | 0x80 )
459
+ c = obj_size & 0x7f
460
+ obj_size >>= 7
461
+ #END until size is consumed
462
+ hdr += chr (c )
463
+ # end handle interpreter
442
464
return hdr
443
465
444
466
def msb_size (data , offset = 0 ):
@@ -449,24 +471,36 @@ def msb_size(data, offset=0):
449
471
i = 0
450
472
l = len (data )
451
473
hit_msb = False
452
- while i < l :
453
- c = byte_ord (data [i + offset ])
454
- size |= (c & 0x7f ) << i * 7
455
- i += 1
456
- if not c & 0x80 :
457
- hit_msb = True
458
- break
459
- # END check msb bit
460
- # END while in range
474
+ if PY3 :
475
+ while i < l :
476
+ c = data [i + offset ]
477
+ size |= (c & 0x7f ) << i * 7
478
+ i += 1
479
+ if not c & 0x80 :
480
+ hit_msb = True
481
+ break
482
+ # END check msb bit
483
+ # END while in range
484
+ else :
485
+ while i < l :
486
+ c = ord (data [i + offset ])
487
+ size |= (c & 0x7f ) << i * 7
488
+ i += 1
489
+ if not c & 0x80 :
490
+ hit_msb = True
491
+ break
492
+ # END check msb bit
493
+ # END while in range
494
+ # end performance ...
461
495
if not hit_msb :
462
496
raise AssertionError ("Could not find terminating MSB byte in data stream" )
463
497
return i + offset , size
464
498
465
499
def loose_object_header (type , size ):
466
500
"""
467
- :return: string representing the loose object header, which is immediately
501
+ :return: bytes representing the loose object header, which is immediately
468
502
followed by the content stream of size 'size'"""
469
- return " %s %i\0 " % (type , size )
503
+ return ( ' %s %i\0 ' % (force_text ( type ) , size )). encode ( 'ascii' )
470
504
471
505
def write_object (type , size , read , write , chunk_size = chunk_size ):
472
506
"""
@@ -611,48 +645,93 @@ def apply_delta_data(src_buf, src_buf_size, delta_buf, delta_buf_size, write):
611
645
**Note:** transcribed to python from the similar routine in patch-delta.c"""
612
646
i = 0
613
647
db = delta_buf
614
- while i < delta_buf_size :
615
- c = ord (db [i ])
616
- i += 1
617
- if c & 0x80 :
618
- cp_off , cp_size = 0 , 0
619
- if (c & 0x01 ):
620
- cp_off = ord (db [i ])
621
- i += 1
622
- if (c & 0x02 ):
623
- cp_off |= (ord (db [i ]) << 8 )
624
- i += 1
625
- if (c & 0x04 ):
626
- cp_off |= (ord (db [i ]) << 16 )
627
- i += 1
628
- if (c & 0x08 ):
629
- cp_off |= (ord (db [i ]) << 24 )
630
- i += 1
631
- if (c & 0x10 ):
632
- cp_size = ord (db [i ])
633
- i += 1
634
- if (c & 0x20 ):
635
- cp_size |= (ord (db [i ]) << 8 )
636
- i += 1
637
- if (c & 0x40 ):
638
- cp_size |= (ord (db [i ]) << 16 )
639
- i += 1
640
-
641
- if not cp_size :
642
- cp_size = 0x10000
643
-
644
- rbound = cp_off + cp_size
645
- if (rbound < cp_size or
646
- rbound > src_buf_size ):
647
- break
648
- write (buffer (src_buf , cp_off , cp_size ))
649
- elif c :
650
- write (db [i :i + c ])
651
- i += c
652
- else :
653
- raise ValueError ("unexpected delta opcode 0" )
654
- # END handle command byte
655
- # END while processing delta data
648
+ if PY3 :
649
+ while i < delta_buf_size :
650
+ c = db [i ]
651
+ i += 1
652
+ if c & 0x80 :
653
+ cp_off , cp_size = 0 , 0
654
+ if (c & 0x01 ):
655
+ cp_off = db [i ]
656
+ i += 1
657
+ if (c & 0x02 ):
658
+ cp_off |= (db [i ] << 8 )
659
+ i += 1
660
+ if (c & 0x04 ):
661
+ cp_off |= (db [i ] << 16 )
662
+ i += 1
663
+ if (c & 0x08 ):
664
+ cp_off |= (db [i ] << 24 )
665
+ i += 1
666
+ if (c & 0x10 ):
667
+ cp_size = db [i ]
668
+ i += 1
669
+ if (c & 0x20 ):
670
+ cp_size |= (db [i ] << 8 )
671
+ i += 1
672
+ if (c & 0x40 ):
673
+ cp_size |= (db [i ] << 16 )
674
+ i += 1
675
+
676
+ if not cp_size :
677
+ cp_size = 0x10000
678
+
679
+ rbound = cp_off + cp_size
680
+ if (rbound < cp_size or
681
+ rbound > src_buf_size ):
682
+ break
683
+ write (buffer (src_buf , cp_off , cp_size ))
684
+ elif c :
685
+ write (db [i :i + c ])
686
+ i += c
687
+ else :
688
+ raise ValueError ("unexpected delta opcode 0" )
689
+ # END handle command byte
690
+ # END while processing delta data
691
+ else :
692
+ while i < delta_buf_size :
693
+ c = ord (db [i ])
694
+ i += 1
695
+ if c & 0x80 :
696
+ cp_off , cp_size = 0 , 0
697
+ if (c & 0x01 ):
698
+ cp_off = ord (db [i ])
699
+ i += 1
700
+ if (c & 0x02 ):
701
+ cp_off |= (ord (db [i ]) << 8 )
702
+ i += 1
703
+ if (c & 0x04 ):
704
+ cp_off |= (ord (db [i ]) << 16 )
705
+ i += 1
706
+ if (c & 0x08 ):
707
+ cp_off |= (ord (db [i ]) << 24 )
708
+ i += 1
709
+ if (c & 0x10 ):
710
+ cp_size = ord (db [i ])
711
+ i += 1
712
+ if (c & 0x20 ):
713
+ cp_size |= (ord (db [i ]) << 8 )
714
+ i += 1
715
+ if (c & 0x40 ):
716
+ cp_size |= (ord (db [i ]) << 16 )
717
+ i += 1
718
+
719
+ if not cp_size :
720
+ cp_size = 0x10000
721
+
722
+ rbound = cp_off + cp_size
723
+ if (rbound < cp_size or
724
+ rbound > src_buf_size ):
725
+ break
726
+ write (buffer (src_buf , cp_off , cp_size ))
727
+ elif c :
728
+ write (db [i :i + c ])
729
+ i += c
730
+ else :
731
+ raise ValueError ("unexpected delta opcode 0" )
732
+ # END handle command byte
733
+ # END while processing delta data
734
+ # end save byte_ord call and prevent performance regression in py2
656
735
657
736
# yes, lets use the exact same error message that git uses :)
658
737
assert i == delta_buf_size , "delta replay has gone wild"
0 commit comments