forked from cncf/gitdm
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdo-it.sh
executable file
·231 lines (210 loc) · 9.55 KB
/
do-it.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
#!/bin/bash
# provide debugging output when desired
function DEBUG {
[ "$GITDM_DEBUG" == "on" ] && echo "DEBUG: $1"
}
# enable/disable debugging output
GITDM_DEBUG=${GITDM_DEBUG:-"off"}
# determine if a given parameter is a date matching the format YYYY-MM-DD,
# i.e. 2013-09-13 This is used to decide if git should specify a start
# date with '--since YYYY-MM-DD' rather than use an absolute changeset id
function IS_DATE {
[[ $1 =~ ^[0-9][0-9][0-9][0-9]\-[0-9][0-9]\-[0-9][0-9]$ ]]
}
GITBASE=${GITBASE:-~/git/openstack}
RELEASE=${RELEASE:-havana}
BASEDIR=$(pwd)
CONFIGDIR=$(pwd)/openstack-config
TEMPDIR=${TEMPDIR:-$(mktemp -d $(pwd)/dmtmp-XXXXXX)}
GITLOGARGS="--no-merges --numstat -M --find-copies-harder"
REPOBASE=${REPOBASE:-http://review.openstack.org/p/openstack}
UPDATE_GIT=${UPDATE_GIT:-y}
GIT_STATS=${GIT_STATS:-y}
# LP_STATS disabled by default, they take forever
LP_STATS=${LP_STATS:-n}
QUERY_LP=${QUERY_LP:-y}
GERRIT_STATS=${GERRIT_STATS:-y}
REMOVE_TEMPDIR=${REMOVE_TEMPDIR:-y}
TIMESTAMP=`date`
# brief header to prepend to all of the analysis results
OUTPUT_HEADER="Statistics generated at ${TIMESTAMP}"
if [ ! -d .venv ]; then
echo "Creating a virtualenv"
./tools/install_venv.sh
fi
if [ "$UPDATE_GIT" = "y" ]; then
echo "Updating projects from git"
if [ ! -d ${GITBASE} ] ; then
DEBUG "Creating missing ${GITBASE}"
mkdir -p ${GITBASE}
fi
grep -v '^#' ${CONFIGDIR}/${RELEASE} |
while read project x; do
if [ ! -d ${GITBASE}/${project} ] ; then
DEBUG "Cloning missing ${project} from ${REPOBASE}/${project}"
git clone ${REPOBASE}/${project} ${GITBASE}/${project}
fi
cd ${GITBASE}/${project}
DEBUG "Fetching updates to ${project}"
git fetch origin 2>/dev/null
done
fi
if [ "$GIT_STATS" = "y" ] ; then
echo "Generating git commit logs"
grep -v '^#' ${CONFIGDIR}/${RELEASE} |
while read project revisions excludes x; do
DEBUG "Generating git commit log for ${project}"
cd ${GITBASE}/${project}
# match possible dates of the format YYYY-MM-DD to use in
# supplying git with a '--since DATE' paramter instead of a
# range of changeset ids
if IS_DATE $revisions; then
DEBUG "Matched a git --since date of '${revisions}'"
revisions="--since ${revisions}"
fi
git log ${GITLOGARGS} ${revisions} > "${TEMPDIR}/${project}-commits.log"
if [ -n "$excludes" ]; then
awk "/^commit /{ok=1} /^commit ${excludes}/{ok=0} {if(ok) {print}}" \
< "${TEMPDIR}/${project}-commits.log" > "${TEMPDIR}/${project}-commits.log.new"
mv "${TEMPDIR}/${project}-commits.log.new" "${TEMPDIR}/${project}-commits.log"
fi
done
echo "Generating git statistics"
cd ${BASEDIR}
grep -v '^#' ${CONFIGDIR}/${RELEASE} |
while read project x; do
DEBUG "Generating git stats for ${project}"
echo "${OUTPUT_HEADER}" > "${TEMPDIR}/${project}-git-stats.txt"
python gitdm -l 20 -n < "${TEMPDIR}/${project}-commits.log" >> "${TEMPDIR}/${project}-git-stats.txt"
# also create a full dump with csv for further downstream processing
echo "${OUTPUT_HEADER}" > "${TEMPDIR}/${project}-git-stats.csv"
python gitdm -n -y -z -x "${TEMPDIR}/${project}-git-stats.csv" < "${TEMPDIR}/${project}-commits.log" >> "${TEMPDIR}/${project}-git-stats-all.txt"
done
DEBUG "Generating aggregate git stats for all projects"
grep -v '^#' ${CONFIGDIR}/${RELEASE} |
while read project x; do
cat "${TEMPDIR}/${project}-commits.log" >> "${TEMPDIR}/git-commits.log"
done
echo "${OUTPUT_HEADER}" > "${TEMPDIR}/git-stats.txt"
python gitdm -n -y -z -x "${TEMPDIR}/git-stats.csv" < "${TEMPDIR}/git-commits.log" >> "${TEMPDIR}/git-stats.txt"
fi
if [ "$LP_STATS" = "y" ] ; then
echo "Generating a list of bugs"
cd ${BASEDIR}
grep -v '^#' ${CONFIGDIR}/${RELEASE} |
while read project x; do
DEBUG "Generating a list of defects for ${project}"
if [ ! -f "${TEMPDIR}/${project}-bugs.log" -a "$QUERY_LP" = "y" ]; then
./tools/with_venv.sh python launchpad/buglist.py ${project} ${RELEASE} > "${TEMPDIR}/${project}-bugs.log"
fi
while read id person date x; do
emails=$(awk "/^$person / {print \$2}" ${CONFIGDIR}/launchpad-ids.txt)
echo $id $person $date $emails
done < "${TEMPDIR}/${project}-bugs.log" > "${TEMPDIR}/${project}-bugs.log.new"
mv "${TEMPDIR}/${project}-bugs.log.new" "${TEMPDIR}/${project}-bugs.log"
done
echo "Generating launchpad statistics"
cd ${BASEDIR}
echo "${OUTPUT_HEADER}" > "${TEMPDIR}/${project}-lp-stats.txt"
grep -v '^#' ${CONFIGDIR}/${RELEASE} |
while read project x; do
DEBUG "Generating launchpad stats for ${project}"
grep -v '<unknown>' "${TEMPDIR}/${project}-bugs.log" |
python lpdm -l 20 >> "${TEMPDIR}/${project}-lp-stats.txt"
done
DEBUG "Generating aggregate launchpad stats for all projects"
> "${TEMPDIR}/lp-bugs.log"
grep -v '^#' ${CONFIGDIR}/${RELEASE} |
while read project x; do
grep -v '<unknown>' "${TEMPDIR}/${project}-bugs.log" >> "${TEMPDIR}/lp-bugs.log"
done
echo "${OUTPUT_HEADER}" > "${TEMPDIR}/lp-stats.txt"
grep -v '<unknown>' "${TEMPDIR}/lp-bugs.log" |
python lpdm -l 20 >> "${TEMPDIR}/lp-stats.txt"
fi
if [ "$GERRIT_STATS" = "y" ] ; then
echo "Generating a list of Change-Ids"
grep -v '^#' ${CONFIGDIR}/${RELEASE} |
while read project revisions x; do
cd "${GITBASE}/${project}"
# match possible dates of the format YYYY-MM-DD to use in
# supplying git with a '--since DATE' paramter instead of a
# range of changeset ids
if IS_DATE $revisions; then
DEBUG "Matched a git --since date of '${revisions}'"
revisions="--since ${revisions}"
fi
git log ${revisions} |
awk '/^ Change-Id: / { print $2 }' |
split -l 100 -d - "${TEMPDIR}/${project}-${RELEASE}-change-ids-"
done
cd ${TEMPDIR}
grep -v '^#' ${CONFIGDIR}/${RELEASE} |
while read project x; do
> ${project}-${RELEASE}-reviews.json
for f in ${project}-${RELEASE}-change-ids-??; do
echo "Querying gerrit: ${f}"
ssh -p 29418 review.openstack.org \
gerrit query --all-approvals --format=json \
$(awk -v ORS=' OR ' '{print}' $f | sed 's/ OR $//') \
< /dev/null >> "${project}-${RELEASE}-reviews.json"
done
done
echo "Generating a list of commit IDs"
grep -v '^#' ${CONFIGDIR}/${RELEASE} |
while read project revisions x; do
DEBUG "Generating a list of commit IDs for ${project}"
cd "${GITBASE}/${project}"
# match possible dates of the format YYYY-MM-DD to use in
# supplying git with a '--since DATE' paramter instead of a
# range of changeset ids
if IS_DATE $revisions; then
DEBUG "Matched a git --since date of '${revisions}'"
revisions="--since ${revisions}"
fi
git log --pretty=format:%H $revisions > \
"${TEMPDIR}/${project}-${RELEASE}-commit-ids.txt"
done
echo "Parsing the gerrit queries"
cd ${BASEDIR}
grep -v '^#' ${CONFIGDIR}/${RELEASE} |
while read project x; do
DEBUG "Parsing the gerrit queries for ${project}"
python gerrit/parse-reviews.py \
"${TEMPDIR}/${project}-${RELEASE}-commit-ids.txt" \
"${CONFIGDIR}/launchpad-ids.txt" \
< "${TEMPDIR}/${project}-${RELEASE}-reviews.json" \
> "${TEMPDIR}/${project}-${RELEASE}-reviewers.txt"
done
echo "Generating gerrit statistics"
cd ${BASEDIR}
echo "${OUTPUT_HEADER}" > "${TEMPDIR}/${project}-gerrit-stats.txt"
echo "${OUTPUT_HEADER}" > "${TEMPDIR}/${project}-gerrit-stats-all.txt"
grep -v '^#' ${CONFIGDIR}/${RELEASE} |
while read project x; do
DEBUG "Generating gerrit statistics for ${project}"
python gerritdm -l 20 \
< "${TEMPDIR}/${project}-${RELEASE}-reviewers.txt" \
>> "${TEMPDIR}/${project}-gerrit-stats.txt"
python gerritdm -z \
< "${TEMPDIR}/${project}-${RELEASE}-reviewers.txt" \
>> "${TEMPDIR}/${project}-gerrit-stats-all.txt"
done
DEBUG "Generating aggregate gerrit statistics for all projects"
> "${TEMPDIR}/gerrit-reviewers.txt"
grep -v '^#' ${CONFIGDIR}/${RELEASE} |
while read project x; do
cat "${TEMPDIR}/${project}-${RELEASE}-reviewers.txt" >> "${TEMPDIR}/gerrit-reviewers.txt"
done
echo "${OUTPUT_HEADER}" > "${TEMPDIR}/gerrit-stats.txt"
echo "${OUTPUT_HEADER}" > "${TEMPDIR}/gerrit-stats-all.txt"
python gerritdm -l 20 < "${TEMPDIR}/gerrit-reviewers.txt" >> "${TEMPDIR}/gerrit-stats.txt"
python gerritdm -z < "${TEMPDIR}/gerrit-reviewers.txt" >> "${TEMPDIR}/gerrit-stats-all.txt"
fi
DEBUG "Cleaning up"
cd ${BASEDIR}
rm -rf ${RELEASE} && mkdir ${RELEASE}
mv ${TEMPDIR}/*stats.txt ${RELEASE}
mv ${TEMPDIR}/*stats-all.txt ${RELEASE}
mv ${TEMPDIR}/*.csv ${RELEASE}
[ "$REMOVE_TEMPDIR" = "y" ] && rm -rf ${TEMPDIR} || echo "Not removing ${TEMPDIR}"