5
5
"""Contains PackIndexFile and PackFile implementations"""
6
6
from gitdb .exc import (
7
7
BadObject ,
8
- UnsupportedOperation
8
+ UnsupportedOperation ,
9
+ ParseError
9
10
)
10
11
from util import (
11
12
zlib ,
15
16
)
16
17
17
18
from fun import (
19
+ create_pack_object_header ,
18
20
pack_object_header_info ,
19
21
is_equal_canonical_sha ,
20
22
type_id_to_type_map ,
47
49
DeltaApplyReader ,
48
50
Sha1Writer ,
49
51
NullStream ,
52
+ FlexibleSha1Writer
50
53
)
51
54
52
55
from struct import (
53
56
pack ,
54
57
unpack ,
55
58
)
56
59
60
+ from binascii import crc32
61
+
57
62
from itertools import izip
58
63
import array
59
64
import os
@@ -119,10 +124,113 @@ def pack_object_at(data, offset, as_stream):
119
124
return abs_data_offset , ODeltaPackInfo (offset , type_id , uncomp_size , delta_info )
120
125
# END handle info
121
126
# END handle stream
122
-
127
+
128
+ def write_stream_to_pack (read , write , zstream , want_crc = False ):
129
+ """Copy a stream as read from read function, zip it, and write the result.
130
+ Count the number of written bytes and return it
131
+ :param want_crc: if True, the crc will be generated over the compressed data.
132
+ :return: tuple(no bytes read, no bytes written, crc32) crc might be 0 if want_crc
133
+ was false"""
134
+ br = 0 # bytes read
135
+ bw = 0 # bytes written
136
+ crc = 0
137
+
138
+ while True :
139
+ chunk = read (chunk_size )
140
+ br += len (chunk )
141
+ compressed = zstream .compress (chunk )
142
+ bw += len (compressed )
143
+ write (compressed ) # cannot assume return value
144
+
145
+ if want_crc :
146
+ crc = crc32 (compressed , crc )
147
+ #END handle crc
148
+
149
+ if len (chunk ) != chunk_size :
150
+ break
151
+ #END copy loop
152
+
153
+ compressed = zstream .flush ()
154
+ bw += len (compressed )
155
+ write (compressed )
156
+ if want_crc :
157
+ crc = crc32 (compressed , crc )
158
+ #END handle crc
159
+
160
+ return (br , bw , crc )
161
+
162
+
123
163
#} END utilities
124
164
125
165
166
+ class IndexWriter (object ):
167
+ """Utility to cache index information, allowing to write all information later
168
+ in one go to the given stream
169
+ :note: currently only writes v2 indices"""
170
+ __slots__ = '_objs'
171
+
172
+ def __init__ (self ):
173
+ self ._objs = list ()
174
+
175
+ def append (self , binsha , crc , offset ):
176
+ """Append one piece of object information"""
177
+ self ._objs .append ((binsha , crc , offset ))
178
+
179
+ def write (self , pack_binsha , write ):
180
+ """Write the index file using the given write method
181
+ :param pack_binsha: sha over the whole pack that we index"""
182
+ # sort for sha1 hash
183
+ self ._objs .sort (key = lambda o : o [0 ])
184
+
185
+ sha_writer = FlexibleSha1Writer (write )
186
+ sha_write = sha_writer .write
187
+ sha_write (PackIndexFile .index_v2_signature )
188
+ sha_write (pack (">L" , PackIndexFile .index_version_default ))
189
+
190
+ # fanout
191
+ tmplist = list ((0 ,)* 256 ) # fanout or list with 64 bit offsets
192
+ for t in self ._objs :
193
+ tmplist [ord (t [0 ][0 ])] += 1
194
+ #END prepare fanout
195
+
196
+ for i in xrange (255 ):
197
+ v = tmplist [i ]
198
+ sha_write (pack ('>L' , v ))
199
+ tmplist [i + 1 ] = v
200
+ #END write each fanout entry
201
+ sha_write (pack ('>L' , tmplist [255 ]))
202
+
203
+ # sha1 ordered
204
+ # save calls, that is push them into c
205
+ sha_write ('' .join (t [0 ] for t in self ._objs ))
206
+
207
+ # crc32
208
+ for t in self ._objs :
209
+ sha_write (pack ('>L' , t [1 ]& 0xffffffff ))
210
+ #END for each crc
211
+
212
+ tmplist = list ()
213
+ # offset 32
214
+ for t in self ._objs :
215
+ ofs = t [2 ]
216
+ if ofs > 0x7fffffff :
217
+ tmplist .append (ofs )
218
+ ofs = 0x80000000 + len (tmplist )- 1
219
+ #END hande 64 bit offsets
220
+ sha_write (pack ('>L' , ofs & 0xffffffff ))
221
+ #END for each offset
222
+
223
+ # offset 64
224
+ for ofs in tmplist :
225
+ sha_write (pack (">Q" , ofs ))
226
+ #END for each offset
227
+
228
+ # trailer
229
+ assert (len (pack_binsha ) == 20 )
230
+ sha_write (pack_binsha )
231
+ write (sha_writer .sha (as_hex = False ))
232
+
233
+
126
234
127
235
class PackIndexFile (LazyMixin ):
128
236
"""A pack index provides offsets into the corresponding pack, allowing to find
@@ -135,6 +243,8 @@ class PackIndexFile(LazyMixin):
135
243
136
244
# used in v2 indices
137
245
_sha_list_offset = 8 + 1024
246
+ index_v2_signature = '\377 tOc'
247
+ index_version_default = 2
138
248
139
249
def __init__ (self , indexpath ):
140
250
super (PackIndexFile , self ).__init__ ()
@@ -155,7 +265,7 @@ def _set_cache_(self, attr):
155
265
# to access the fanout table or related properties
156
266
157
267
# CHECK VERSION
158
- self ._version = (self ._data [:4 ] == ' \377 tOc' and 2 ) or 1
268
+ self ._version = (self ._data [:4 ] == self . index_v2_signature and 2 ) or 1
159
269
if self ._version == 2 :
160
270
version_id = unpack_from (">L" , self ._data , 4 )[0 ]
161
271
assert version_id == self ._version , "Unsupported index version: %i" % version_id
@@ -383,6 +493,8 @@ class PackFile(LazyMixin):
383
493
case"""
384
494
385
495
__slots__ = ('_packpath' , '_data' , '_size' , '_version' )
496
+ pack_signature = 0x5041434b # 'PACK'
497
+ pack_version_default = 2
386
498
387
499
# offset into our data at which the first object starts
388
500
first_object_offset = 3 * 4 # header bytes
@@ -396,15 +508,19 @@ def _set_cache_(self, attr):
396
508
self ._data = file_contents_ro_filepath (self ._packpath )
397
509
398
510
# read the header information
399
- type_id , self ._version , self ._size = unpack_from (">4sLL " , self ._data , 0 )
511
+ type_id , self ._version , self ._size = unpack_from (">LLL " , self ._data , 0 )
400
512
401
513
# TODO: figure out whether we should better keep the lock, or maybe
402
514
# add a .keep file instead ?
403
515
else : # must be '_size' or '_version'
404
516
# read header info - we do that just with a file stream
405
- type_id , self ._version , self ._size = unpack (">4sLL " , open (self ._packpath ).read (12 ))
517
+ type_id , self ._version , self ._size = unpack (">LLL " , open (self ._packpath ).read (12 ))
406
518
# END handle header
407
519
520
+ if type_id != self .pack_signature :
521
+ raise ParseError ("Invalid pack signature: %i" % type_id )
522
+ #END assert type id
523
+
408
524
def _iter_objects (self , start_offset , as_stream = True ):
409
525
"""Handle the actual iteration of objects within this pack"""
410
526
data = self ._data
@@ -759,7 +875,8 @@ def collect_streams(self, sha):
759
875
760
876
761
877
@classmethod
762
- def create (cls , object_iter , pack_write , index_write = None ):
878
+ def write_pack (cls , object_iter , pack_write , index_write = None ,
879
+ object_count = None , zlib_compression = zlib .Z_BEST_SPEED ):
763
880
"""
764
881
Create a new pack by putting all objects obtained by the object_iterator
765
882
into a pack which is written using the pack_write method.
@@ -769,9 +886,74 @@ def create(cls, object_iter, pack_write, index_write=None):
769
886
:param pack_write: function to receive strings to write into the pack stream
770
887
:param indx_write: if not None, the function writes the index file corresponding
771
888
to the pack.
889
+ :param object_count: if you can provide the amount of objects in your iteration,
890
+ this would be the place to put it. Otherwise we have to pre-iterate and store
891
+ all items into a list to get the number, which uses more memory than necessary.
892
+ :param zlib_compression: the zlib compression level to use
893
+ :return: binary sha over all the contents of the pack
772
894
:note: The destination of the write functions is up to the user. It could
773
- be a socket, or a file for instance"""
895
+ be a socket, or a file for instance
896
+ :note: writes only undeltified objects"""
897
+ objs = object_iter
898
+ if not object_count :
899
+ if not isinstance (object_iter , (tuple , list )):
900
+ objs = list (object_iter )
901
+ #END handle list type
902
+ object_count = len (objs )
903
+ #END handle object
904
+
905
+ pack_writer = FlexibleSha1Writer (pack_write )
906
+ pwrite = pack_writer .write
907
+ ofs = 0 # current offset into the pack file
908
+ index = None
909
+ wants_index = index_write is not None
910
+
911
+ # write header
912
+ pwrite (pack ('>LLL' , PackFile .pack_signature , PackFile .pack_version_default , object_count ))
913
+ ofs += 12
914
+
915
+ if wants_index :
916
+ index = IndexWriter ()
917
+ #END handle index header
918
+
919
+ actual_count = 0
920
+ for obj in objs :
921
+ actual_count += 1
922
+
923
+ # object header
924
+ hdr = create_pack_object_header (obj .type_id , obj .size )
925
+ pwrite (hdr )
926
+
927
+ # data stream
928
+ zstream = zlib .compressobj (zlib_compression )
929
+ ostream = obj .stream
930
+ br , bw , crc = write_stream_to_pack (ostream .read , pwrite , zstream , want_crc = index_write )
931
+ assert (br == obj .size )
932
+ if wants_index :
933
+ index .append (obj .binsha , crc , ofs )
934
+ #END handle index
935
+
936
+ ofs += len (hdr ) + bw
937
+ if actual_count == object_count :
938
+ break
939
+ #END abort once we are done
940
+ #END for each object
941
+
942
+ if actual_count != object_count :
943
+ raise ValueError ("Expected to write %i objects into pack, but received only %i from iterators" % (object_count , actual_count ))
944
+ #END count assertion
945
+
946
+ # write footer
947
+ binsha = pack_writer .sha (as_hex = False )
948
+ assert len (binsha ) == 20
949
+ pack_write (binsha )
950
+ ofs += len (binsha ) # just for completeness ;)
951
+
952
+ if wants_index :
953
+ index .write (binsha , index_write )
954
+ #END handle index
774
955
956
+ return binsha
775
957
776
958
777
959
#} END interface
0 commit comments