Skip to content

Commit

Permalink
allow filepath to be used with other filters
Browse files Browse the repository at this point in the history
  • Loading branch information
kanghj committed Jan 17, 2019
1 parent da5fc66 commit eb3804e
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 14 deletions.
11 changes: 2 additions & 9 deletions pydriller/git_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,25 +266,18 @@ def _useless_line(self, line: str):
return not line or line.startswith('//') or line.startswith('#') or line.startswith("/*") or \
line.startswith("'''") or line.startswith('"""') or line.startswith("*")

def get_commits_modified_file(self, filepath: str) -> List[Commit]:
def get_commits_modified_file(self, filepath: str) -> List[str]:
all_commits = self.get_list_commits()

dict_commits = {}
for commit in all_commits:
dict_commits[commit.hash] = commit

# getting the absolute path of the file
path = str(Path(filepath).absolute())
commits = []
try:
commits = self.git.log("--follow", "--format=%H", path).split('\n')
except GitCommandError:
logger.debug("Could not find information of file %s", path)

list_commits = []
for commit in commits:
# I don't have a better idea than this:
# unfortunately, this will call `git` for every commit
list_commits.append(dict_commits[commit.strip()])

return list_commits
return commits
12 changes: 7 additions & 5 deletions pydriller/repository_mining.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ def __init__(self, path_to_repo: Union[str, List[str]],
self._only_authors = only_authors
self._only_commits = only_commits
self._filepath = filepath
self._filepath_commits = None

def _sanity_check_repos(self, path_to_repo):
if not isinstance(path_to_repo, str) and not isinstance(path_to_repo, list):
Expand Down Expand Up @@ -151,11 +152,7 @@ def traverse_commits(self) -> Generator[Commit, None, None]:
logger.info('Analyzing git repository in {}'.format(git_repo.path))

if self._filepath is not None:
commits = git_repo.get_commits_modified_file(self._filepath)
for commit in commits:
logger.info('Commit #{} in {} from {}'.format(commit.hash, commit.committer_date, commit.author.name))
yield commit
continue
self._filepath_commits = git_repo.get_commits_modified_file(self._filepath)

for commit in git_repo.get_list_commits(self._only_in_branch, not self._reversed_order):
logger.info('Commit #{} in {} from {}'.format(commit.hash, commit.committer_date, commit.author.name))
Expand Down Expand Up @@ -187,6 +184,11 @@ def _is_commit_filtered(self, commit: Commit):
if self._only_commits is not None and commit.hash not in self._only_commits:
logger.debug("Commit filtered because it is not one of the specified commits")
return True
if self._filepath is not None:
if self._filepath_commits is not None:
if commit.hash not in self._filepath_commits:
return True

return False

def _has_modification_with_file_type(self, commit):
Expand Down
12 changes: 12 additions & 0 deletions tests/test_git_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,4 +403,16 @@ def test_get_commits_modified_file():
gr = GitRepository('test-repos/test1/')

commits = gr.get_commits_modified_file('test-repos/test1/file2.java')

assert len(commits) == 3
assert '09f6182cef737db02a085e1d018963c7a29bde5a' in commits
assert '6411e3096dd2070438a17b225f44475136e54e3a' in commits
assert 'a88c84ddf42066611e76e6cb690144e5357d132c' in commits


def test_get_commits_modified_file_missing_file():
gr = GitRepository('test-repos/test1/')

commits = gr.get_commits_modified_file('test-repos/test1/non-existing-file.java')

assert len(commits) == 0
21 changes: 21 additions & 0 deletions tests/test_repository_mining.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,24 @@ def test_badly_formatted_url():

with pytest.raises(Exception):
list(RepositoryMining(path_to_repo='test').traverse_commits())


def test_filepath():
dt = datetime(2018, 6, 6)
assert 4 == len(list(RepositoryMining(
path_to_repo='test-repos/test5',
filepath='test-repos/test5/A.java',
to=dt).traverse_commits()))

def test_filepath_with_rename():
dt = datetime(2018, 6, 6)
commits = list(RepositoryMining(
path_to_repo='test-repos/test1',
filepath='test-repos/test1/file4.java',
to=dt).traverse_commits())
assert 2 == len(commits)

commit_hashes = [commit.hash for commit in commits]

assert 'da39b1326dbc2edfe518b90672734a08f3c13458' in commit_hashes
assert 'a88c84ddf42066611e76e6cb690144e5357d132c' in commit_hashes

0 comments on commit eb3804e

Please sign in to comment.