Skip to content

Commit 184a776

Browse files
committed
crc needs to be done on the pack object header as well, of course
1 parent 98a19ac commit 184a776

File tree

2 files changed

+19
-5
lines changed

2 files changed

+19
-5
lines changed

gitdb/pack.py

+18-4
Original file line numberDiff line numberDiff line change
@@ -127,15 +127,20 @@ def pack_object_at(data, offset, as_stream):
127127
# END handle info
128128
# END handle stream
129129

130-
def write_stream_to_pack(read, write, zstream, want_crc=False):
130+
def write_stream_to_pack(read, write, zstream, base_crc=None):
131131
"""Copy a stream as read from read function, zip it, and write the result.
132132
Count the number of written bytes and return it
133-
:param want_crc: if True, the crc will be generated over the compressed data.
134-
:return: tuple(no bytes read, no bytes written, crc32) crc might be 0 if want_crc
133+
:param base_crc: if not None, the crc will be the base for all compressed data
134+
we consecutively write and generate a crc32 from. If None, no crc will be generated
135+
:return: tuple(no bytes read, no bytes written, crc32) crc might be 0 if base_crc
135136
was false"""
136137
br = 0 # bytes read
137138
bw = 0 # bytes written
139+
want_crc = base_crc is not None
138140
crc = 0
141+
if want_crc:
142+
crc = base_crc
143+
#END initialize crc
139144

140145
while True:
141146
chunk = read(chunk_size)
@@ -651,6 +656,9 @@ def __init__(self, pack_or_index_path):
651656

652657
def _set_cache_(self, attr):
653658
# currently this can only be _offset_map
659+
# TODO: make this a simple sorted offset array which can be bisected
660+
# to find the respective entry, from which we can take a +1 easily
661+
# This might be slower, but should also be much lighter in memory !
654662
offsets_sorted = sorted(self._index.offsets())
655663
last_offset = len(self._pack.data()) - self._pack.footer_size
656664
assert offsets_sorted, "Cannot handle empty indices"
@@ -926,15 +934,21 @@ def write_pack(cls, object_iter, pack_write, index_write=None,
926934
actual_count = 0
927935
for obj in objs:
928936
actual_count += 1
937+
crc = 0
929938

930939
# object header
931940
hdr = create_pack_object_header(obj.type_id, obj.size)
941+
if index_write:
942+
crc = crc32(hdr)
943+
else:
944+
crc = None
945+
#END handle crc
932946
pwrite(hdr)
933947

934948
# data stream
935949
zstream = zlib.compressobj(zlib_compression)
936950
ostream = obj.stream
937-
br, bw, crc = write_stream_to_pack(ostream.read, pwrite, zstream, want_crc = index_write)
951+
br, bw, crc = write_stream_to_pack(ostream.read, pwrite, zstream, base_crc = crc)
938952
assert(br == obj.size)
939953
if wants_index:
940954
index.append(obj.binsha, crc, ofs)

gitdb/test/test_pack.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ def rewind_streams():
234234
count = 0
235235
for info in entity.info_iter():
236236
count += 1
237-
for use_crc in reversed(range(2)):
237+
for use_crc in range(2):
238238
assert entity.is_valid_stream(info.binsha, use_crc)
239239
# END for each crc mode
240240
#END for each info

0 commit comments

Comments
 (0)