Skip to content
This repository has been archived by the owner on Sep 3, 2024. It is now read-only.

Commit

Permalink
merge
Browse files Browse the repository at this point in the history
  • Loading branch information
alanbraz committed Feb 25, 2022
1 parent ed3d5d4 commit 551acab
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 17 deletions.
62 changes: 62 additions & 0 deletions tranform_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#
# Copyright 2021- IBM Inc. All rights reserved
# SPDX-License-Identifier: Apache2.0
#
import sys
from utils.cloudant_utils import cloudant_db as db
from utils import print_json


def main(args):
repos = [r for r in db.get_query_result({
"type": "Repo",
"releases.10": {"$exists": True}
}, ["_id", "releases", "stars", "watchers"], limit=1, raw_result=True)["docs"]]

releases = [{"repo": r["_id"], "release_tag": re["tag"], "release_date": re["published_at"], "stars": 0 } for r in repos for re in r["releases"] ][::-1]
# print_json(releases)

total_stars, stars_post_cutoff, stars_pre_cutoff = getStars(repos[0])
total_watchers, watchers_post_cutoff, watchers_pre_cutoff = getWatchers(repos[0])

for release in releases:
updateRelease(release, stars_post_cutoff, stars_pre_cutoff, "stars")
updateRelease(release, watchers_post_cutoff, watchers_pre_cutoff, "watchers")
print_json(releases)


def updateRelease(release, events, initial_count, field):
# events = []
for e in events:
if e < release["release_date"]:
initial_count += 1
# events.append(e)
else:
release[field] = initial_count
# release["new_events"] = len(events)
# release["events"] = events


def getStars(repo):
stars_post_cutoff = list(db[repo["_id"] + "/stargazers"]["stargazers"].values())[::-1]
total_stars = repo["stars"]
stars_pre_cutoff = total_stars - len(stars_post_cutoff)
# print_json(stars_post_cutoff)
# print("\n total stars after cutoff (2020-01-01) :", len(stars_post_cutoff))
# print(total_stars, watchers_pre_cutoff, stars_post_cutoff[0])
return total_stars, stars_post_cutoff, stars_pre_cutoff


def getWatchers(repo):
watchers_post_cutoff = list(db[repo["_id"] + "/watchers"]["watchers"].values())[::-1]
print(repo)
total_watchers = repo["watchers"]
watchers_pre_cutoff = total_watchers - len(watchers_post_cutoff)
# print_json(watchers_post_cutoff)
# print("\n total stars after cutoff (2020-01-01) :", len(watchers_post_cutoff))
# print(total_watchers, watchers_pre_cutoff, watchers_post_cutoff[0])
return total_watchers, watchers_post_cutoff, watchers_pre_cutoff


if __name__ == "__main__":
main(sys.argv[1:])
7 changes: 5 additions & 2 deletions utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# Copyright 2021- IBM Inc. All rights reserved
# SPDX-License-Identifier: Apache2.0
#
import moment
import moment, json
ISO_FORMAT = "%Y-%m-%dT%H:%M:%SZ"
ISO_SHORT_FORMAT = "%Y-%m-%d"

Expand All @@ -13,4 +13,7 @@ def now_short():
return moment.utcnow().zero.strftime(ISO_SHORT_FORMAT)

def format_date_utc_iso(d):
return moment.date(d).timezone("UTC").strftime(ISO_FORMAT)
return moment.date(d).timezone("UTC").strftime(ISO_FORMAT)

def print_json(data):
print(json.dumps(data, indent=2))
29 changes: 14 additions & 15 deletions utils/github_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -797,16 +797,14 @@ def get_commit_history(repo_name, cut_date="2000"):
hasNextPage
endCursor
}
edges {
cursor
node {
committedDate
message
committer {
nodes {
committer {
date
user {
login
email
}
}
message
}
}
}
Expand All @@ -820,14 +818,14 @@ def get_commit_history(repo_name, cut_date="2000"):
if "errors" in res:
print(json.dumps(res["errors"], indent=2))
try:
for r in res["data"]["repository"]["defaultBranchRef"]["target"]["history"]["edges"]:
for r in res["data"]["repository"]["defaultBranchRef"]["target"]["history"]["nodes"]:
if r:
committedDate = format_date_utc_iso(r["node"]["committedDate"])
committedDate = format_date_utc_iso(r["committer"]["date"])
if committedDate < cut_date:
break
committedMsg = r["node"]["message"]
committerEmail = r["node"]["committer"]["email"]
commits.append({"date": committedDate, "message": committedMsg, "email": committerEmail})
committedMsg = r["message"]
committerLogin = r["committer"]["user"]["login"]
commits.append({"date": committedDate, "message": committedMsg, "committer": committerLogin})
cursor = res["data"]["repository"]["defaultBranchRef"]["target"]["history"]["pageInfo"]["endCursor"]
hasNextPage = res["data"]["repository"]["defaultBranchRef"]["target"]["history"]["pageInfo"]["hasNextPage"]
except Exception as e:
Expand Down Expand Up @@ -1031,7 +1029,7 @@ def get_README_history(repo_name, cut_date="2000"):
cursor = None
hasNextPage = True
while hasNextPage:
print(len(response.keys()), cursor)
# print(len(response.keys()), cursor)
body = """
{
repository(owner: "%s", name: "%s") {
Expand Down Expand Up @@ -1072,7 +1070,7 @@ def get_README_history(repo_name, cut_date="2000"):
", after: \"{}\"".format(cursor) if cursor else "",
)
res = graphql_api(body)
print (res)
# print (res)
if "errors" in res:
print(json.dumps(res["errors"], indent=2))
try:
Expand All @@ -1093,7 +1091,8 @@ def get_README_history(repo_name, cut_date="2000"):
'committeDate': committedDate,
'login': n['author']['user']['login'] if n['author']['user'] else None,
'additions': n['additions'],
'deletions': n['deletions']
'deletions': n['deletions'],
'url': n['url']
})
#todo recursively make a post request to get the history of each commit url

Expand Down

0 comments on commit 551acab

Please sign in to comment.