Skip to content

Commit 8ae52a5

Browse files
committed
First version with some working reference tests. The db implementation is still a mess, it really needs to be separated into interfaces and implementations, sorted by type , like pure, pygit(at some point) and so on. This would already allow database implementations to be mixed and matched. One further step to be taken another day would be to 'interfacify' object and reference types, so they could be replaced by different implementations as well including full isinstance support (as isinstance would only check for the base interface). To ease this, the interfaces would just keep their original names, but the implementation would move to types like PureObject, PureSymbolicReference, etc. etc
1 parent 2b2ca10 commit 8ae52a5

File tree

5 files changed

+330
-10
lines changed

5 files changed

+330
-10
lines changed

doc/source/changes.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,11 @@ Changelog
55
****
66
NEXT
77
****
8-
* Added interface to allow transporting git data: **TransportDBMixin**
8+
* Added interface to allow transporting git data: **TransportDB**
99
* Added interface to allow reference resolution: **RefParseMixin**
1010
* Added interface to handle git related paths: **RepositoryPathsMixin**
1111
* Added interface to read and write git-like configuration: **ConfigurationMixin**
12+
* Added **RevParseMixin** providing reference resolution.
1213
* Added implementation of git datbase with support for transportation and reference resolution: **RefGitDB**
1314
* Renamed type *GitDB* to **GitODB** to differentiate its object-only property to the **RefGitDB**
1415

gitdb/db/base.py

+35-3
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434

3535

3636
__all__ = ( 'ObjectDBR', 'ObjectDBW', 'FileDBBase', 'CompoundDB', 'CachingDB',
37-
'TransportDBMixin', 'RefParseMixin', 'ConfigurationMixin', 'RepositoryPathsMixin',
37+
'TransportDB', 'NameResolveMixin', 'ConfigurationMixin', 'RepositoryPathsMixin',
3838
'RefSpec', 'FetchInfo', 'PushInfo')
3939

4040

@@ -411,7 +411,7 @@ class FetchInfo(object):
411411
FAST_FORWARD, ERROR = [ 1 << x for x in range(8) ]
412412

413413

414-
class TransportDBMixin(object):
414+
class TransportDB(object):
415415
"""A database which allows to transport objects from and to different locations
416416
which are specified by urls (location) and refspecs (what to transport,
417417
see http://www.kernel.org/pub/software/scm/git/docs/git-fetch.html).
@@ -468,10 +468,16 @@ def push(self, url, refspecs, progress=None, **kwargs):
468468
:raise: if any issue arises during transport or if the url cannot be handled"""
469469
raise NotImplementedError()
470470

471+
@property
472+
def remotes(self):
473+
""":return: An IterableList of Remote objects allowing to access and manipulate remotes
474+
:note: Remote objects can also be used for the actual push or fetch operation"""
475+
raise NotImplementedError()
476+
471477
#}end interface
472478

473479

474-
class RefParseMixin(object):
480+
class NameResolveMixin(object):
475481
"""Interface allowing to resolve symbolic names or partial hexadecimal shas into
476482
actual binary shas. The actual feature set depends on the implementation though,
477483
but should follow git-rev-parse."""
@@ -481,6 +487,32 @@ def resolve(self, name):
481487
in the rev-parse documentation http://www.kernel.org/pub/software/scm/git/docs/git-rev-parse.html"""
482488
raise NotImplementedError()
483489

490+
491+
class RefDBMixin(object):
492+
"""Database providing reference objects which in turn point to database objects
493+
like Commits or Tag(Object)s.
494+
495+
The returned types are compatible to the interfaces of the pure python
496+
reference implementation in GitDB.ref"""
497+
498+
@property
499+
def references(self):
500+
""":return: iterable list of all Reference objects representing tags, heads
501+
and remote references. This is the most general method to obtain any
502+
references."""
503+
raise NotImplementedError()
504+
505+
@property
506+
def heads(self):
507+
""":return: IterableList with HeadReference objects pointing to all
508+
heads in the repository."""
509+
raise NotImplementedError()
510+
511+
@property
512+
def tags(self):
513+
""":return: An IterableList of TagReferences that are available in this repo"""
514+
raise NotImplementedError()
515+
484516

485517
class RepositoryPathsMixin(object):
486518
"""Represents basic functionality of a full git repository. This involves an

gitdb/db/git.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,11 @@
77
ObjectDBW,
88
FileDBBase,
99
RepositoryPathsMixin,
10-
ConfigurationMixin
10+
ConfigurationMixin,
1111
)
1212

13+
from resolve import NameResolvePureMixin
14+
1315
from loose import LooseObjectDB
1416
from pack import PackedDB
1517
from ref import ReferenceDB
@@ -94,7 +96,7 @@ def set_ostream(self, ostream):
9496
#} END objectdbw interface
9597

9698

97-
class RefGitDB(GitODB, RepositoryPathsMixin, ConfigurationMixin):
99+
class RefGitDB(GitODB, RepositoryPathsMixin, ConfigurationMixin, NameResolvePureMixin):
98100
"""Git like database with support for object lookup as well as reference resolution.
99101
Our rootpath is set to the actual .git directory (bare on unbare).
100102

gitdb/db/resolve.py

+285
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,285 @@
1+
"""Module with an implementation for refspec parsing. It is the pure-python
2+
version assuming compatible interface for reference and object types"""
3+
4+
from base import NameResolveMixin
5+
from gitdb.exc import BadObject
6+
from gitdb.ref import SymbolicReference
7+
from gitdb.object.base import Object
8+
from gitdb.util import (
9+
join,
10+
isdir,
11+
isfile,
12+
hex_to_bin,
13+
bin_to_hex,
14+
is_git_dir
15+
)
16+
from string import digits
17+
import os
18+
import re
19+
20+
__all__ = ["NameResolvePureMixin"]
21+
22+
#{ Utilities
23+
24+
def short_to_long(odb, hexsha):
25+
""":return: long hexadecimal sha1 from the given less-than-40 byte hexsha
26+
or None if no candidate could be found.
27+
:param hexsha: hexsha with less than 40 byte"""
28+
try:
29+
return bin_to_hex(odb.partial_to_complete_sha_hex(hexsha))
30+
except BadObject:
31+
return None
32+
# END exception handling
33+
34+
35+
def name_to_object(repo, name, return_ref=False):
36+
"""
37+
:return: object specified by the given name, hexshas ( short and long )
38+
as well as references are supported
39+
:param return_ref: if name specifies a reference, we will return the reference
40+
instead of the object. Otherwise it will raise BadObject
41+
"""
42+
hexsha = None
43+
44+
# is it a hexsha ? Try the most common ones, which is 7 to 40
45+
if repo.re_hexsha_shortened.match(name):
46+
if len(name) != 40:
47+
# find long sha for short sha
48+
hexsha = short_to_long(repo.odb, name)
49+
else:
50+
hexsha = name
51+
# END handle short shas
52+
#END find sha if it matches
53+
54+
# if we couldn't find an object for what seemed to be a short hexsha
55+
# try to find it as reference anyway, it could be named 'aaa' for instance
56+
if hexsha is None:
57+
for base in ('%s', 'refs/%s', 'refs/tags/%s', 'refs/heads/%s', 'refs/remotes/%s', 'refs/remotes/%s/HEAD'):
58+
try:
59+
hexsha = SymbolicReference.dereference_recursive(repo, base % name)
60+
if return_ref:
61+
return SymbolicReference(repo, base % name)
62+
#END handle symbolic ref
63+
break
64+
except ValueError:
65+
pass
66+
# END for each base
67+
# END handle hexsha
68+
69+
# didn't find any ref, this is an error
70+
if return_ref:
71+
raise BadObject("Couldn't find reference named %r" % name)
72+
#END handle return ref
73+
74+
# tried everything ? fail
75+
if hexsha is None:
76+
raise BadObject(name)
77+
# END assert hexsha was found
78+
79+
return Object.new_from_sha(repo, hex_to_bin(hexsha))
80+
81+
def deref_tag(tag):
82+
"""Recursively dereference a tag and return the resulting object"""
83+
while True:
84+
try:
85+
tag = tag.object
86+
except AttributeError:
87+
break
88+
# END dereference tag
89+
return tag
90+
91+
def to_commit(obj):
92+
"""Convert the given object to a commit if possible and return it"""
93+
if obj.type == 'tag':
94+
obj = deref_tag(obj)
95+
96+
if obj.type != "commit":
97+
raise ValueError("Cannot convert object %r to type commit" % obj)
98+
# END verify type
99+
return obj
100+
101+
def rev_parse(repo, rev):
102+
"""
103+
:return: Object at the given revision, either Commit, Tag, Tree or Blob
104+
:param rev: git-rev-parse compatible revision specification, please see
105+
http://www.kernel.org/pub/software/scm/git/docs/git-rev-parse.html
106+
for details
107+
:note: Currently there is no access to the rev-log, rev-specs may only contain
108+
topological tokens such ~ and ^.
109+
:raise BadObject: if the given revision could not be found
110+
:raise ValueError: If rev couldn't be parsed
111+
:raise IndexError: If invalid reflog index is specified"""
112+
113+
# colon search mode ?
114+
if rev.startswith(':/'):
115+
# colon search mode
116+
raise NotImplementedError("commit by message search ( regex )")
117+
# END handle search
118+
119+
obj = None
120+
ref = None
121+
output_type = "commit"
122+
start = 0
123+
parsed_to = 0
124+
lr = len(rev)
125+
while start < lr:
126+
if rev[start] not in "^~:@":
127+
start += 1
128+
continue
129+
# END handle start
130+
131+
token = rev[start]
132+
133+
if obj is None:
134+
# token is a rev name
135+
if start == 0:
136+
ref = repo.head.ref
137+
else:
138+
if token == '@':
139+
ref = name_to_object(repo, rev[:start], return_ref=True)
140+
else:
141+
obj = name_to_object(repo, rev[:start])
142+
#END handle token
143+
#END handle refname
144+
145+
if ref is not None:
146+
obj = ref.commit
147+
#END handle ref
148+
# END initialize obj on first token
149+
150+
151+
start += 1
152+
153+
# try to parse {type}
154+
if start < lr and rev[start] == '{':
155+
end = rev.find('}', start)
156+
if end == -1:
157+
raise ValueError("Missing closing brace to define type in %s" % rev)
158+
output_type = rev[start+1:end] # exclude brace
159+
160+
# handle type
161+
if output_type == 'commit':
162+
pass # default
163+
elif output_type == 'tree':
164+
try:
165+
obj = to_commit(obj).tree
166+
except (AttributeError, ValueError):
167+
pass # error raised later
168+
# END exception handling
169+
elif output_type in ('', 'blob'):
170+
if obj.type == 'tag':
171+
obj = deref_tag(obj)
172+
else:
173+
# cannot do anything for non-tags
174+
pass
175+
# END handle tag
176+
elif token == '@':
177+
# try single int
178+
assert ref is not None, "Require Reference to access reflog"
179+
revlog_index = None
180+
try:
181+
# transform reversed index into the format of our revlog
182+
revlog_index = -(int(output_type)+1)
183+
except ValueError:
184+
# TODO: Try to parse the other date options, using parse_date
185+
# maybe
186+
raise NotImplementedError("Support for additional @{...} modes not implemented")
187+
#END handle revlog index
188+
189+
try:
190+
entry = ref.log_entry(revlog_index)
191+
except IndexError:
192+
raise IndexError("Invalid revlog index: %i" % revlog_index)
193+
#END handle index out of bound
194+
195+
obj = Object.new_from_sha(repo, hex_to_bin(entry.newhexsha))
196+
197+
# make it pass the following checks
198+
output_type = None
199+
else:
200+
raise ValueError("Invalid output type: %s ( in %s )" % (output_type, rev))
201+
# END handle output type
202+
203+
# empty output types don't require any specific type, its just about dereferencing tags
204+
if output_type and obj.type != output_type:
205+
raise ValueError("Could not accomodate requested object type %r, got %s" % (output_type, obj.type))
206+
# END verify ouput type
207+
208+
start = end+1 # skip brace
209+
parsed_to = start
210+
continue
211+
# END parse type
212+
213+
# try to parse a number
214+
num = 0
215+
if token != ":":
216+
found_digit = False
217+
while start < lr:
218+
if rev[start] in digits:
219+
num = num * 10 + int(rev[start])
220+
start += 1
221+
found_digit = True
222+
else:
223+
break
224+
# END handle number
225+
# END number parse loop
226+
227+
# no explicit number given, 1 is the default
228+
# It could be 0 though
229+
if not found_digit:
230+
num = 1
231+
# END set default num
232+
# END number parsing only if non-blob mode
233+
234+
235+
parsed_to = start
236+
# handle hiererarchy walk
237+
try:
238+
if token == "~":
239+
obj = to_commit(obj)
240+
for item in xrange(num):
241+
obj = obj.parents[0]
242+
# END for each history item to walk
243+
elif token == "^":
244+
obj = to_commit(obj)
245+
# must be n'th parent
246+
if num:
247+
obj = obj.parents[num-1]
248+
elif token == ":":
249+
if obj.type != "tree":
250+
obj = obj.tree
251+
# END get tree type
252+
obj = obj[rev[start:]]
253+
parsed_to = lr
254+
else:
255+
raise ValueError("Invalid token: %r" % token)
256+
# END end handle tag
257+
except (IndexError, AttributeError):
258+
raise BadObject("Invalid Revision in %s" % rev)
259+
# END exception handling
260+
# END parse loop
261+
262+
# still no obj ? Its probably a simple name
263+
if obj is None:
264+
obj = name_to_object(repo, rev)
265+
parsed_to = lr
266+
# END handle simple name
267+
268+
if obj is None:
269+
raise ValueError("Revision specifier could not be parsed: %s" % rev)
270+
271+
if parsed_to != lr:
272+
raise ValueError("Didn't consume complete rev spec %s, consumed part: %s" % (rev, rev[:parsed_to]))
273+
274+
return obj
275+
276+
#} END utilities
277+
278+
class NameResolvePureMixin(NameResolveMixin):
279+
"""Pure-Python refparse implementation"""
280+
281+
re_hexsha_only = re.compile('^[0-9A-Fa-f]{40}$')
282+
re_hexsha_shortened = re.compile('^[0-9A-Fa-f]{4,40}$')
283+
284+
def resolve(self, name):
285+
return rev_parse(self, name)

0 commit comments

Comments
 (0)