Skip to content

Commit 98a19ac

Browse files
committed
Implemented remainder of the test, and it already shows that something is wrong with my packs. Probably something stupid ;)
1 parent e83210d commit 98a19ac

File tree

3 files changed

+76
-27
lines changed

3 files changed

+76
-27
lines changed

gitdb/pack.py

+47-15
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
zlib,
1313
LazyMixin,
1414
unpack_from,
15+
bin_to_hex,
1516
file_contents_ro_filepath,
1617
)
1718

@@ -60,6 +61,7 @@
6061
from binascii import crc32
6162

6263
from itertools import izip
64+
import tempfile
6365
import array
6466
import os
6567
import sys
@@ -176,9 +178,10 @@ def append(self, binsha, crc, offset):
176178
"""Append one piece of object information"""
177179
self._objs.append((binsha, crc, offset))
178180

179-
def write(self, pack_binsha, write):
181+
def write(self, pack_sha, write):
180182
"""Write the index file using the given write method
181-
:param pack_binsha: sha over the whole pack that we index"""
183+
:param pack_sha: binary sha over the whole pack that we index
184+
:return: sha1 binary sha over all index file contents"""
182185
# sort for sha1 hash
183186
self._objs.sort(key=lambda o: o[0])
184187

@@ -192,11 +195,10 @@ def write(self, pack_binsha, write):
192195
for t in self._objs:
193196
tmplist[ord(t[0][0])] += 1
194197
#END prepare fanout
195-
196198
for i in xrange(255):
197199
v = tmplist[i]
198200
sha_write(pack('>L', v))
199-
tmplist[i+1] = v
201+
tmplist[i+1] += v
200202
#END write each fanout entry
201203
sha_write(pack('>L', tmplist[255]))
202204

@@ -226,9 +228,11 @@ def write(self, pack_binsha, write):
226228
#END for each offset
227229

228230
# trailer
229-
assert(len(pack_binsha) == 20)
230-
sha_write(pack_binsha)
231-
write(sha_writer.sha(as_hex=False))
231+
assert(len(pack_sha) == 20)
232+
sha_write(pack_sha)
233+
sha = sha_writer.sha(as_hex=False)
234+
write(sha)
235+
return sha
232236

233237

234238

@@ -767,7 +771,9 @@ def is_valid_stream(self, sha, use_crc=False):
767771
"""
768772
Verify that the stream at the given sha is valid.
769773
770-
:param use_crc: if True, the index' crc for the sha is used to determine
774+
:param use_crc: if True, the index' crc is run over the compressed stream of
775+
the object, which is much faster than checking the sha1. It is also
776+
more prone to unnoticed corruption or manipulation.
771777
:param sha: 20 byte sha1 of the object whose stream to verify
772778
whether the compressed stream of the object is valid. If it is
773779
a delta, this only verifies that the delta's data is valid, not the
@@ -890,7 +896,8 @@ def write_pack(cls, object_iter, pack_write, index_write=None,
890896
this would be the place to put it. Otherwise we have to pre-iterate and store
891897
all items into a list to get the number, which uses more memory than necessary.
892898
:param zlib_compression: the zlib compression level to use
893-
:return: binary sha over all the contents of the pack
899+
:return: tuple(pack_sha, index_binsha) binary sha over all the contents of the pack
900+
and over all contents of the index. If index_write was None, index_binsha will be None
894901
:note: The destination of the write functions is up to the user. It could
895902
be a socket, or a file for instance
896903
:note: writes only undeltified objects"""
@@ -944,16 +951,41 @@ def write_pack(cls, object_iter, pack_write, index_write=None,
944951
#END count assertion
945952

946953
# write footer
947-
binsha = pack_writer.sha(as_hex = False)
948-
assert len(binsha) == 20
949-
pack_write(binsha)
950-
ofs += len(binsha) # just for completeness ;)
954+
pack_sha = pack_writer.sha(as_hex = False)
955+
assert len(pack_sha) == 20
956+
pack_write(pack_sha)
957+
ofs += len(pack_sha) # just for completeness ;)
951958

959+
index_sha = None
952960
if wants_index:
953-
index.write(binsha, index_write)
961+
index_sha = index.write(pack_sha, index_write)
954962
#END handle index
955963

956-
return binsha
964+
return pack_sha, index_sha
965+
966+
@classmethod
967+
def create(cls, object_iter, base_dir, object_count = None, zlib_compression = zlib.Z_BEST_SPEED):
968+
"""Create a new on-disk entity comprised of a properly named pack file and a properly named
969+
and corresponding index file. The pack contains all OStream objects contained in object iter.
970+
:param base_dir: directory which is to contain the files
971+
:return: PackEntity instance initialized with the new pack
972+
:note: for more information on the other parameters see the write_pack method"""
973+
pack_fd, pack_path = tempfile.mkstemp('', 'pack', base_dir)
974+
index_fd, index_path = tempfile.mkstemp('', 'index', base_dir)
975+
pack_write = lambda d: os.write(pack_fd, d)
976+
index_write = lambda d: os.write(index_fd, d)
977+
978+
pack_binsha, index_binsha = cls.write_pack(object_iter, pack_write, index_write, object_count, zlib_compression)
979+
os.close(pack_fd)
980+
os.close(index_fd)
981+
982+
fmt = "pack-%s.%s"
983+
new_pack_path = os.path.join(base_dir, fmt % (bin_to_hex(pack_binsha), 'pack'))
984+
new_index_path = os.path.join(base_dir, fmt % (bin_to_hex(pack_binsha), 'idx'))
985+
os.rename(pack_path, new_pack_path)
986+
os.rename(index_path, new_index_path)
987+
988+
return cls(new_pack_path)
957989

958990

959991
#} END interface

gitdb/test/lib.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -42,19 +42,22 @@ def with_rw_directory(func):
4242
def wrapper(self):
4343
path = tempfile.mktemp(prefix=func.__name__)
4444
os.mkdir(path)
45+
keep = False
4546
try:
4647
try:
4748
return func(self, path)
4849
except Exception:
4950
print >> sys.stderr, "Test %s.%s failed, output is at %r" % (type(self).__name__, func.__name__, path)
51+
keep = True
5052
raise
5153
finally:
5254
# Need to collect here to be sure all handles have been closed. It appears
5355
# a windows-only issue. In fact things should be deleted, as well as
5456
# memory maps closed, once objects go out of scope. For some reason
5557
# though this is not the case here unless we collect explicitly.
56-
gc.collect()
57-
shutil.rmtree(path)
58+
if not keep:
59+
gc.collect()
60+
shutil.rmtree(path)
5861
# END handle exception
5962
# END wrapper
6063

gitdb/test/test_pack.py

+24-10
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,10 @@ def test_pack_entity(self, rw_dir):
188188
pack_path = tempfile.mktemp('', "pack", rw_dir)
189189
index_path = tempfile.mktemp('', 'index', rw_dir)
190190
iteration = 0
191+
def rewind_streams():
192+
for obj in pack_objs:
193+
obj.stream.seek(0)
194+
#END utility
191195
for ppath, ipath, num_obj in zip((pack_path, )*2, (index_path, None), (len(pack_objs), None)):
192196
pfile = open(ppath, 'wb')
193197
iwrite = None
@@ -198,33 +202,43 @@ def test_pack_entity(self, rw_dir):
198202

199203
# make sure we rewind the streams ... we work on the same objects over and over again
200204
if iteration > 0:
201-
for obj in pack_objs:
202-
obj.stream.seek(0)
205+
rewind_streams()
203206
#END rewind streams
204207
iteration += 1
205208

206-
binsha = PackEntity.write_pack(pack_objs, pfile.write, iwrite, object_count=num_obj)
209+
pack_sha, index_sha = PackEntity.write_pack(pack_objs, pfile.write, iwrite, object_count=num_obj)
207210
pfile.close()
208211
assert os.path.getsize(ppath) > 100
209212

210213
# verify pack
211214
pf = PackFile(ppath)
212215
assert pf.size() == len(pack_objs)
213216
assert pf.version() == PackFile.pack_version_default
214-
assert pf.checksum() == binsha
217+
assert pf.checksum() == pack_sha
215218

216219
# verify index
217220
if ipath is not None:
221+
ifile.close()
218222
assert os.path.getsize(ipath) > 100
219-
223+
idx = PackIndexFile(ipath)
224+
assert idx.version() == PackIndexFile.index_version_default
225+
assert idx.packfile_checksum() == pack_sha
226+
assert idx.indexfile_checksum() == index_sha
227+
assert idx.size() == len(pack_objs)
220228
#END verify files exist
221-
222-
if ifile:
223-
ifile.close()
224-
#END handle index
225229
#END for each packpath, indexpath pair
226230

227-
#
231+
# verify the packs throughly
232+
rewind_streams()
233+
entity = PackEntity.create(pack_objs, rw_dir)
234+
count = 0
235+
for info in entity.info_iter():
236+
count += 1
237+
for use_crc in reversed(range(2)):
238+
assert entity.is_valid_stream(info.binsha, use_crc)
239+
# END for each crc mode
240+
#END for each info
241+
assert count == len(pack_objs)
228242

229243

230244
def test_pack_64(self):

0 commit comments

Comments
 (0)