Skip to content

Commit 4e1112e

Browse files
author
Andrew Or
committed
[Release] Update contributors list format and sort it
Additionally, we now warn the user when a duplicate author name arises, in which case he/she needs to resolve it manually.
1 parent 6069880 commit 4e1112e

File tree

4 files changed

+30
-15
lines changed

4 files changed

+30
-15
lines changed

.gitignore

+1-1
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ checkpoint
5151
derby.log
5252
dist/
5353
dev/create-release/*txt
54-
dev/create-release/*new
54+
dev/create-release/*final
5555
spark-*-bin-*.tgz
5656
unit-tests.log
5757
/lib/

.rat-excludes

+1
Original file line numberDiff line numberDiff line change
@@ -64,3 +64,4 @@ dist/*
6464
logs
6565
.*scalastyle-output.xml
6666
.*dependency-reduced-pom.xml
67+
dev/create-release/known_translations

dev/create-release/generate-contributors.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -192,9 +192,9 @@ def populate(issue_type, components):
192192
print "==================================================================================\n"
193193

194194
# Write to contributors file ordered by author names
195-
# Each line takes the format "Author name - semi-colon delimited contributions"
196-
# e.g. Andrew Or - Bug fixes in Windows, Core, and Web UI; improvements in Core
197-
# e.g. Tathagata Das - Bug fixes and new features in Streaming
195+
# Each line takes the format " * Author name -- semi-colon delimited contributions"
196+
# e.g. * Andrew Or -- Bug fixes in Windows, Core, and Web UI; improvements in Core
197+
# e.g. * Tathagata Das -- Bug fixes and new features in Streaming
198198
contributors_file = open(contributors_file_name, "w")
199199
authors = author_info.keys()
200200
authors.sort()
@@ -223,7 +223,7 @@ def populate(issue_type, components):
223223
# E.g. andrewor14/SPARK-3425/SPARK-1157/SPARK-6672
224224
if author in invalid_authors and invalid_authors[author]:
225225
author = author + "/" + "/".join(invalid_authors[author])
226-
line = "%s - %s" % (author, contribution)
226+
line = " * %s -- %s" % (author, contribution)
227227
contributors_file.write(line + "\n")
228228
contributors_file.close()
229229
print "Contributors list is successfully written to %s!" % contributors_file_name

dev/create-release/translate-contributors.py

+24-10
Original file line numberDiff line numberDiff line change
@@ -43,14 +43,12 @@
4343
if not GITHUB_API_TOKEN:
4444
sys.exit("GITHUB_API_TOKEN must be set")
4545

46-
# Write new contributors list to <old_file_name>.new
46+
# Write new contributors list to <old_file_name>.final
4747
if not os.path.isfile(contributors_file_name):
4848
print "Contributors file %s does not exist!" % contributors_file_name
4949
print "Have you run ./generate-contributors.py yet?"
5050
sys.exit(1)
5151
contributors_file = open(contributors_file_name, "r")
52-
new_contributors_file_name = contributors_file_name + ".new"
53-
new_contributors_file = open(new_contributors_file_name, "w")
5452
warnings = []
5553

5654
# In non-interactive mode, this script will choose the first replacement that is valid
@@ -73,7 +71,7 @@
7371
known_translations_file = open(known_translations_file_name, "r")
7472
for line in known_translations_file:
7573
if line.startswith("#"): continue
76-
[old_name, new_name] = line.split(" - ")
74+
[old_name, new_name] = line.strip("\n").split(" - ")
7775
known_translations[old_name] = new_name
7876
known_translations_file.close()
7977

@@ -147,16 +145,16 @@ def generate_candidates(author, issues):
147145
# If no such name exists, the original name is used (without the JIRA numbers).
148146
print "\n========================== Translating contributor list =========================="
149147
lines = contributors_file.readlines()
148+
contributions = []
150149
for i, line in enumerate(lines):
151-
temp_author = line.split(" - ")[0]
150+
temp_author = line.strip(" * ").split(" -- ")[0]
152151
print "Processing author %s (%d/%d)" % (temp_author, i + 1, len(lines))
153152
if not temp_author:
154-
error_msg = " ERROR: Expected the following format <author> - <contributions>\n"
153+
error_msg = " ERROR: Expected the following format \" * <author> -- <contributions>\"\n"
155154
error_msg += " ERROR: Actual = %s" % line
156155
print error_msg
157156
warnings.append(error_msg)
158-
new_contributors_file.write(line)
159-
new_contributors_file.flush()
157+
contributions.append(line)
160158
continue
161159
author = temp_author.split("/")[0]
162160
# Use the local copy of known translations where possible
@@ -222,10 +220,26 @@ def generate_candidates(author, issues):
222220
known_translations_file.write("%s - %s\n" % (author, new_author))
223221
known_translations_file.flush()
224222
line = line.replace(temp_author, author)
225-
new_contributors_file.write(line)
226-
new_contributors_file.flush()
223+
contributions.append(line)
227224
print "==================================================================================\n"
228225
contributors_file.close()
226+
known_translations_file.close()
227+
228+
# Sort the contributions before writing them to the new file.
229+
# Additionally, check if there are any duplicate author rows.
230+
# This could happen if the same user has both a valid full
231+
# name (e.g. Andrew Or) and an invalid one (andrewor14).
232+
# If so, warn the user about this at the end.
233+
contributions.sort()
234+
all_authors = set()
235+
new_contributors_file_name = contributors_file_name + ".final"
236+
new_contributors_file = open(new_contributors_file_name, "w")
237+
for line in contributions:
238+
author = line.strip(" * ").split(" -- ")[0]
239+
if author in all_authors:
240+
warnings.append("Detected duplicate author name %s. Please merge these manually." % author)
241+
all_authors.add(author)
242+
new_contributors_file.write(line)
229243
new_contributors_file.close()
230244

231245
print "Translated contributors list successfully written to %s!" % new_contributors_file_name

0 commit comments

Comments
 (0)