diff --git a/build.gradle b/build.gradle index dfe250a8..7fc9d45a 100644 --- a/build.gradle +++ b/build.gradle @@ -61,6 +61,7 @@ buildConfig { buildConfigField 'boolean', 'LONGEVITY_ENABLED', project.hasProperty('longevity-enabled') ? project.property('longevity-enabled').toString() : 'false' buildConfigField 'long', 'HEARTBEAT_RATE', project.hasProperty('heartbeat-rate') ? project.property('heartbeat-rate').toString() : '60000' buildConfigField 'boolean', 'META_HASHER_ENABLED', project.hasProperty('meta-hasher-enabled') ? project.property('meta-hasher-enabled').toString() : 'true' + buildConfigField 'boolean', 'DISTANCES_ENABLED', project.hasProperty('distances-enabled') ? project.property('distances-enabled').toString() : 'true' buildConfig } diff --git a/src/main/kotlin/app/api/Api.kt b/src/main/kotlin/app/api/Api.kt index 85161b3f..140945ee 100644 --- a/src/main/kotlin/app/api/Api.kt +++ b/src/main/kotlin/app/api/Api.kt @@ -3,13 +3,7 @@ package app.api -import app.model.Author -import app.model.Commit -import app.model.Fact -import app.model.Process -import app.model.ProcessEntry -import app.model.Repo -import app.model.User +import app.model.* interface Api { companion object { @@ -36,4 +30,6 @@ interface Api { fun postAuthors(authorsList: List): Result fun postProcessCreate(requestNumEntries: Int): Result fun postProcess(processEntries: List): Result + fun postAuthorDistances(authorDistanceList: List): + Result } diff --git a/src/main/kotlin/app/api/MockApi.kt b/src/main/kotlin/app/api/MockApi.kt index 0a65a9b0..13d86a8a 100644 --- a/src/main/kotlin/app/api/MockApi.kt +++ b/src/main/kotlin/app/api/MockApi.kt @@ -4,13 +4,7 @@ package app.api import app.Logger -import app.model.Author -import app.model.Commit -import app.model.Repo -import app.model.Fact -import app.model.Process -import app.model.ProcessEntry -import app.model.User +import app.model.* class MockApi( // GET requests. var mockUser: User = User(), @@ -25,6 +19,7 @@ class MockApi( // GET requests. var receivedUsers: MutableList = mutableListOf() var receivedProcessCreate: MutableList = mutableListOf() var receivedProcess: MutableList = mutableListOf() + var receivedDistances: MutableList = mutableListOf() // DELETE requests. var receivedDeletedCommits: MutableList = mutableListOf() @@ -93,4 +88,12 @@ class MockApi( // GET requests. receivedProcess.add(Process(entries = processEntries)) return Result() } + + override fun postAuthorDistances(authorDistanceList: + List): Result { + Logger.debug { "MockApi: postAuthorDistances request (${authorDistanceList + .size} distances)" } + receivedDistances.addAll(authorDistanceList) + return Result() + } } diff --git a/src/main/kotlin/app/api/ServerApi.kt b/src/main/kotlin/app/api/ServerApi.kt index ae4cbb40..891a9909 100644 --- a/src/main/kotlin/app/api/ServerApi.kt +++ b/src/main/kotlin/app/api/ServerApi.kt @@ -6,16 +6,7 @@ package app.api import app.BuildConfig import app.Logger import app.config.Configurator -import app.model.Author -import app.model.AuthorGroup -import app.model.Commit -import app.model.CommitGroup -import app.model.Fact -import app.model.FactGroup -import app.model.Process -import app.model.ProcessEntry -import app.model.Repo -import app.model.User +import app.model.* import com.github.kittinunf.fuel.core.FuelManager import com.github.kittinunf.fuel.core.Method import com.github.kittinunf.fuel.core.Request @@ -125,6 +116,12 @@ class ServerApi (private val configurator: Configurator) : Api { .body(process.serialize()) } + private fun createRequestPostAuthorDistances(distances: + AuthorDistanceGroup): Request { + return post("/distances").header(getContentTypeHeader()) + .body(distances.serialize()) + } + private fun makeRequest(request: Request, requestName: String, parser: (ByteArray) -> T): Result { @@ -214,4 +211,10 @@ class ServerApi (private val configurator: Configurator) : Api { return makeRequest(createRequestPostProcess(process), "postProcess", {}) } + override fun postAuthorDistances(authorDistanceList: List): + Result { + val distances = AuthorDistanceGroup(authorDistanceList) + return makeRequest(createRequestPostAuthorDistances(distances), + "postDistances", {}) + } } diff --git a/src/main/kotlin/app/hashers/AuthorDistanceHasher.kt b/src/main/kotlin/app/hashers/AuthorDistanceHasher.kt new file mode 100644 index 00000000..db213de4 --- /dev/null +++ b/src/main/kotlin/app/hashers/AuthorDistanceHasher.kt @@ -0,0 +1,61 @@ +// Copyright 2018 Sourcerer Inc. All Rights Reserved. +// Author: Liubov Yaronskaya (lyaronskaya@sourcerer.io) + +package app.hashers + +import app.api.Api +import app.model.AuthorDistance +import app.model.Repo +import io.reactivex.Observable +import java.util.concurrent.TimeUnit + +class AuthorDistanceHasher( + private val serverRepo: Repo, + private val api: Api, + private val emails: HashSet, + private val userEmails: HashSet) { + fun updateFromObservable(observable: Observable, + onError: (Throwable) -> Unit) { + val authorScores = hashMapOf() + emails.forEach { authorScores[it] = 0.0 } + + // Store the time of the earliest commit for a path by user. + val authorPathLastContribution = hashMapOf() + + observable.subscribe({ + val email = it.email!! + val paths = it.paths!! + val time = it.date!! + if (email in userEmails) { + paths.forEach { path -> + authorPathLastContribution[path] = time + } + } + else { + val score = paths + .filter { path -> path in authorPathLastContribution } + .filter { path -> + val authorTime = authorPathLastContribution[path]!! + val timeDelta = TimeUnit.DAYS.convert( + authorTime - time, TimeUnit.SECONDS) + timeDelta < 365 + }.size + authorScores[email] = authorScores[email]!! + score + } + }, onError, { + val stats = mutableListOf() + authorScores.forEach { email, value -> + if (email !in userEmails) { + stats.add(AuthorDistance(serverRepo, email, value)) + } + } + postDistancesToServer(stats) + }) + } + + private fun postDistancesToServer(stats: List) { + if (stats.isNotEmpty()) { + api.postAuthorDistances(stats).onErrorThrow() + } + } +} diff --git a/src/main/kotlin/app/hashers/CodeLongevity.kt b/src/main/kotlin/app/hashers/CodeLongevity.kt index 950a2214..ceda54b1 100644 --- a/src/main/kotlin/app/hashers/CodeLongevity.kt +++ b/src/main/kotlin/app/hashers/CodeLongevity.kt @@ -320,7 +320,7 @@ class CodeLongevity( /** * Scans the repo to extract code line ages. */ - fun updateFromObservable(diffObservable: Observable = + fun updateFromObservable(diffObservable: Observable = CommitCrawler.getJGitObservable(git), onError: (Throwable) -> Unit = {}, api: Api, @@ -396,7 +396,7 @@ class CodeLongevity( * the revisions of the repo. */ fun getLinesList(tail : RevCommit? = null, - diffObservable: Observable = + diffObservable: Observable = CommitCrawler.getJGitObservable(git), onError: (Throwable) -> Unit = {}) : List { val codeLines: MutableList = mutableListOf() @@ -411,7 +411,7 @@ class CodeLongevity( * the revisions of the repo. */ fun getLinesObservable(tail : RevCommit? = null, - diffObservable: Observable, + diffObservable: Observable, onError: (Throwable) -> Unit) : Observable = Observable.create { subscriber -> @@ -448,7 +448,7 @@ class CodeLongevity( // to the diff. Traverse the diffs backwards to handle double // renames properly. // TODO(alex): cover file renames by tests (see APP-132 issue). - for ((diff, editList) in diffs.asReversed()) { + for ((diff, editList) in diffs!!.asReversed()) { val oldPath = diff.oldPath val oldId = diff.oldId.toObjectId() val newPath = diff.newPath @@ -483,7 +483,7 @@ class CodeLongevity( Logger.trace { "ins ($insStart, $insEnd)" } for (idx in insStart until insEnd) { - val from = RevCommitLine(commit, newId, + val from = RevCommitLine(commit!!, newId, newPath, idx, false) try { val to = lines[idx] @@ -514,7 +514,7 @@ class CodeLongevity( val tmpLines = ArrayList(delCount) for (idx in delStart until delEnd) { - tmpLines.add(RevCommitLine(commit, oldId, + tmpLines.add(RevCommitLine(commit!!, oldId, oldPath, idx, true)) } lines.addAll(delStart, tmpLines) diff --git a/src/main/kotlin/app/hashers/CommitCrawler.kt b/src/main/kotlin/app/hashers/CommitCrawler.kt index f784e904..0a5be12f 100644 --- a/src/main/kotlin/app/hashers/CommitCrawler.kt +++ b/src/main/kotlin/app/hashers/CommitCrawler.kt @@ -1,5 +1,6 @@ // Copyright 2017 Sourcerer Inc. All Rights Reserved. // Author: Anatoly Kislov (anatoly@sourcerer.io) +// Author: Liubov Yaronskaya (lyaronskaya@sourcerer.io) package app.hashers @@ -29,7 +30,12 @@ import org.eclipse.jgit.treewalk.TreeWalk import org.eclipse.jgit.util.io.DisabledOutputStream import java.util.LinkedList -data class JgitPair(val commit: RevCommit, val list: List) +data class JgitData(var commit: RevCommit? = null, + var list: List? = null, + var paths: List? = null, + var date: Long? = null, + var email: String? = null) + data class JgitDiff(val diffEntry: DiffEntry, val editList: EditList) /** @@ -96,9 +102,14 @@ object CommitCrawler { fun getJGitObservable(git: Git, totalCommitCount: Int = 0, + extractCommit: Boolean = true, + extractDiffs: Boolean = true, + extractPaths: Boolean = false, + extractDate: Boolean = false, + extractEmail: Boolean = false, filteredEmails: HashSet? = null, tail : RevCommit? = null) : - Observable = Observable.create { subscriber -> + Observable = Observable.create { subscriber -> val repo: Repository = git.repository val revWalk = RevWalk(repo) val head: RevCommit = @@ -156,9 +167,9 @@ object CommitCrawler { commit = parentCommit continue } + val paths = mutableListOf() val diffEntries = df.scan(parentCommit, commit) - val diffEdits = diffEntries .filter { diff -> diff.changeType != DiffEntry.ChangeType.COPY } @@ -199,24 +210,46 @@ object CommitCrawler { getIgnoredPaths(repo, diff.getNewId().toObjectId()) } - !ignoredPaths.any { path -> + if (!ignoredPaths.any { path -> if (path.endsWith("/")) { filePath.startsWith(path) } else { path == filePath } + }) { + paths.add(filePath) + true + } else false + } + + val jgitData = JgitData() + if (extractCommit) { + jgitData.commit = commit + } + if (extractDiffs) { + val diffEdits = diffEntries + .map { diff -> + JgitDiff(diff, df.toFileHeader(diff).toEditList()) } + .filter { diff -> + diff.editList.fold(0) { acc, edit -> + acc + edit.lengthA + edit.lengthB + } < MAX_DIFF_SIZE + } + jgitData.list = diffEdits } - .map { diff -> - JgitDiff(diff, df.toFileHeader(diff).toEditList()) + if (extractPaths) { + jgitData.paths = paths } - .filter { diff -> - diff.editList.fold(0) { acc, edit -> - acc + edit.lengthA + edit.lengthB - } < MAX_DIFF_SIZE + if (extractDate) { + jgitData.date = commit.authorIdent.getWhen().time / 1000 } - subscriber.onNext(JgitPair(commit, diffEdits)) + if (extractEmail) { + jgitData.email = email + } + + subscriber.onNext(jgitData) commit = parentCommit } @@ -229,12 +262,12 @@ object CommitCrawler { } fun getObservable(git: Git, - jgitObservable: Observable, + jgitObservable: Observable, repo: Repo): Observable { - return jgitObservable.map( { (jgitCommit, jgitDiffs) -> + return jgitObservable.map( { (jgitCommit, jgitDiffs, _) -> // Mapping and stats extraction. - val commit = Commit(jgitCommit) - commit.diffs = getDiffFiles(git.repository, jgitDiffs) + val commit = Commit(jgitCommit!!) + commit.diffs = getDiffFiles(git.repository, jgitDiffs!!) // Count lines on all non-binary files. This is additional // statistics to CommitStats because not all file extensions diff --git a/src/main/kotlin/app/hashers/RepoHasher.kt b/src/main/kotlin/app/hashers/RepoHasher.kt index da0ae0c4..f1ba7ba6 100644 --- a/src/main/kotlin/app/hashers/RepoHasher.kt +++ b/src/main/kotlin/app/hashers/RepoHasher.kt @@ -7,8 +7,6 @@ import app.BuildConfig import app.Logger import app.api.Api import app.config.Configurator -import app.extractors.Extractor -import app.extractors.Heuristics import app.model.Author import app.model.LocalRepo import app.model.ProcessEntry @@ -73,7 +71,7 @@ class RepoHasher(private val api: Api, filteredEmails } else null val jgitObservable = CommitCrawler.getJGitObservable(git, - rehashes.size, crawlerEmails + rehashes.size, filteredEmails = crawlerEmails ).publish() val observable = CommitCrawler.getObservable(git, jgitObservable, serverRepo) @@ -98,6 +96,15 @@ class RepoHasher(private val api: Api, commitsCount = commitsCount, userEmails = userEmail) } + if (BuildConfig.DISTANCES_ENABLED) { + val userEmails = configurator.getUser().emails.map { it.email }.toHashSet() + val pathsObservable = CommitCrawler.getJGitObservable(git, + extractCommit = false, extractDate = true, + extractDiffs = false, extractEmail = true, + extractPaths = true) + AuthorDistanceHasher(serverRepo, api, emails, userEmails) + .updateFromObservable(pathsObservable, onError) + } // Start and synchronously wait until all subscribers complete. Logger.print("Stats computation. May take a while...") diff --git a/src/main/kotlin/app/model/AuthorDistance.kt b/src/main/kotlin/app/model/AuthorDistance.kt new file mode 100644 index 00000000..9fb959a8 --- /dev/null +++ b/src/main/kotlin/app/model/AuthorDistance.kt @@ -0,0 +1,38 @@ +// Copyright 2018 Sourcerer Inc. All Rights Reserved. +// Author: Liubov Yaronskaya (lyaronskaya@sourcerer.io) + +package app.model + +import app.Protos +import com.google.protobuf.InvalidProtocolBufferException +import java.security.InvalidParameterException + +data class AuthorDistance( + var repo: Repo = Repo(), + var email: String = "", + var score: Double = 0.0 +) { + @Throws(InvalidParameterException::class) + constructor(proto: Protos.AuthorDistance) : this() { + repo = Repo(rehash = proto.repoRehash) + email = proto.email + score = proto.score + } + + @Throws(InvalidProtocolBufferException::class) + constructor(bytes: ByteArray) : this(Protos.AuthorDistance.parseFrom(bytes)) + + constructor(serialized: String) : this(serialized.toByteArray()) + + fun getProto(): Protos.AuthorDistance { + return Protos.AuthorDistance.newBuilder() + .setRepoRehash(repo.rehash) + .setEmail(email) + .setScore(score) + .build() + } + + fun serialize(): ByteArray { + return getProto().toByteArray() + } +} diff --git a/src/main/kotlin/app/model/AuthorDistanceGroup.kt b/src/main/kotlin/app/model/AuthorDistanceGroup.kt new file mode 100644 index 00000000..b6aee1f7 --- /dev/null +++ b/src/main/kotlin/app/model/AuthorDistanceGroup.kt @@ -0,0 +1,33 @@ +// Copyright 2018 Sourcerer Inc. All Rights Reserved. +// Author: Liubov Yaronskaya (lyaronskaya@sourcerer.io) + +package app.model + +import app.Protos +import com.google.protobuf.InvalidProtocolBufferException +import java.security.InvalidParameterException + +data class AuthorDistanceGroup( + var stats: List = listOf() +) { + @Throws(InvalidParameterException::class) + constructor(proto: Protos.AuthorDistanceGroup) : this() { + stats = proto.authorDistancesList.map { AuthorDistance(it) } + } + + @Throws(InvalidProtocolBufferException::class) + constructor(bytes: ByteArray) : this(Protos.AuthorDistanceGroup.parseFrom + (bytes)) + + constructor(serialized: String) : this(serialized.toByteArray()) + + fun getProto(): Protos.AuthorDistanceGroup { + return Protos.AuthorDistanceGroup.newBuilder() + .addAllAuthorDistances(stats.map { it.getProto() }) + .build() + } + + fun serialize(): ByteArray { + return getProto().toByteArray() + } +} diff --git a/src/main/proto/sourcerer.proto b/src/main/proto/sourcerer.proto index c63d7f65..3aaa2e65 100644 --- a/src/main/proto/sourcerer.proto +++ b/src/main/proto/sourcerer.proto @@ -1,5 +1,6 @@ // Copyright 2017 Sourcerer, Inc. All Rights Reserved. // Author: Anatoly Kislov (anatoly@sourcerer.io) +// Author: Liubov Yaronskaya (lyaronskaya@sourcerer.io) syntax = "proto3"; @@ -155,3 +156,13 @@ message ProcessEntry { uint32 status = 2; uint32 error_code = 3; } + +message AuthorDistance { + string email = 1; + double score = 2; + string repo_rehash = 3; +} + +message AuthorDistanceGroup { + repeated AuthorDistance author_distances = 1; +} diff --git a/src/test/kotlin/test/tests/hashers/AuthorDistanceHasherTest.kt b/src/test/kotlin/test/tests/hashers/AuthorDistanceHasherTest.kt new file mode 100644 index 00000000..7b1d0cd0 --- /dev/null +++ b/src/test/kotlin/test/tests/hashers/AuthorDistanceHasherTest.kt @@ -0,0 +1,84 @@ +// Copyright 2018 Sourcerer Inc. All Rights Reserved. +// Author: Liubov Yaronskaya (lyaronskaya@sourcerer.io) + +package test.tests.hashers + +import app.FactCodes +import app.api.MockApi +import app.hashers.AuthorDistanceHasher +import app.hashers.CommitCrawler +import app.model.Author +import app.model.AuthorDistance +import app.model.Fact +import app.model.Repo +import org.eclipse.jgit.api.Git +import org.jetbrains.spek.api.Spek +import org.jetbrains.spek.api.dsl.given +import org.jetbrains.spek.api.dsl.it +import test.utils.TestRepo +import java.io.File +import java.util.* +import kotlin.test.assertTrue +import kotlin.test.fail + +class AuthorDistanceHasherTest : Spek({ + given("repo with a file") { + val testRepoPath = "../author_distance_hasher" + val testRepo = TestRepo(testRepoPath) + val serverRepo = Repo(rehash = "test_repo_rehash") + val api = MockApi(mockRepo = serverRepo) + val fileName = "test1.txt" + val author1 = Author("First Author", "first.author@gmail.com") + val author2 = Author("Second Author", "second.author@gmail.com") + val author3 = Author("Third Author", "third.author@gmail.com") + val emails = hashSetOf(author1.email, author2.email, author3.email) + + testRepo.createFile(fileName, listOf("line1", "line2")) + testRepo.commit(message = "initial commit", + author = author1, + date = Calendar.Builder().setDate(2017, 1, 1).setTimeOfDay + (0, 0, 0).build().time) + + testRepo.deleteLines(fileName, 1, 1) + testRepo.commit(message = "delete second line", + author = author2, + date = Calendar.Builder().setDate(2017, 1, 1).setTimeOfDay + (0, 1, 0).build().time) + + testRepo.deleteLines(fileName, 0, 0) + testRepo.commit(message = "delete first line", + author = author1, + date = Calendar.Builder().setDate(2018, 1, 1).setTimeOfDay + (0, 1, 0).build().time) + testRepo.insertLines(fileName, 0, listOf("line1")) + testRepo.commit(message = "add first line", + author = author3, + date = Calendar.Builder().setDate(2019, 1, 1).setTimeOfDay + (0, 1, 0).build().time) + + val gitHasher = Git.open(File(testRepoPath)) + it("extracts colleagues") { + val observable = CommitCrawler.getJGitObservable(gitHasher, + extractCommit = false, extractDate = true, + extractDiffs = false, extractEmail = true, + extractPaths = true) + AuthorDistanceHasher(serverRepo, api, emails, + hashSetOf(author2.email)).updateFromObservable(observable, + onError = { _ -> fail("exception") }) + + assertTrue(api.receivedDistances.contains( + AuthorDistance(repo = serverRepo, + email = author1.email, + score = 1.0))) + + assertTrue(api.receivedDistances.contains( + AuthorDistance(repo = serverRepo, + email = author3.email, + score = 0.0))) + } + + afterGroup { + testRepo.destroy() + } + } +})