forked from cncf/gitdm
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmerge_jsons.rb
129 lines (123 loc) · 4.41 KB
/
merge_jsons.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
require 'json'
require 'pry'
require './mgetc'
def merge_jsons(primary_json, new_json, email_map)
# set dbg = true to have verbose output
dbg = false
# primary JSON
pdata = JSON.parse File.read primary_json
users = {}
np = 0
pdata.each_with_index do |user, index|
login = user['login'].downcase
email = user['email'].downcase
users[[login, email]] = user
users[login] = [] unless users.key?(login)
users[login] << user
users[email] = [] unless users.key?(email)
users[email] << user
np += 1
end
# new JSON
data = JSON.parse File.read new_json
nusers = {}
nn = 0
e = l = le = n = 0
a = s = lo = 0
answers = {}
json_cache = 'merge_json_cache.json'
begin
answers = JSON.parse File.read json_cache
rescue
end
data.each_with_index do |user, index|
ologin = user['login']
login = ologin.downcase
email = user['email'].downcase
pri_user = nil
mode = nil
commits = "#{user['commits']}"
if users.key?([login, email])
pri_user = users[[login, email]]
mode = 'le'
le += 1
else
if users.key?(login)
pri_user = users[login].first
mode = 'l '
l += 1
else
if users.key?(email)
pri_user = users[email].first
mode = ' e'
e += 1
end
end
end
if pri_user
commits += ",#{pri_user['commits']}"
if user['affiliation'] != pri_user['affiliation'] && pri_user['affiliation'] != '?' && pri_user['affiliation'] != '(Unknown)' && pri_user['affiliation'] != 'NotFound'
answer = 'y'
if user['affiliation'] != '?' && user['affiliation'] != '(Unknown)' && user['affiliation'] != 'NotFound'
puts "\n#{mode} Use primary/old affiliation: '#{pri_user['affiliation']}' (#{pri_user['source']})\n#{mode} instead of new '#{user['affiliation']}' (#{user['source']})\nfor #{ologin}/#{email}/#{commits} ?"
if answers.key?(email)
answer = answers[email]
puts "#{answer}\n"
else
answer = mgetc.downcase
exit(1) if answer == 'q'
answers[email] = answer if %w(y n).include?(answer)
pretty = JSON.pretty_generate answers
File.write json_cache, pretty
end
end
if answer == 'y' || answer == 'Y'
puts "#{mode} Using primary/old affiliation '#{pri_user['affiliation']}' instead of new '#{user['affiliation']}' for #{ologin}/#{email}/#{commits}"
user['affiliation'] = pri_user['affiliation']
user['source'] = pri_user['source'] unless pri_user['source'].nil?
a += 1
end
end
if user['sex'] != pri_user['sex'] || user['sex_prob'] != pri_user['sex_prob']
if (pri_user['sex'] != nil || pri_user['sex_prob'] != nil) && (user['sex'] == nil || user['sex_prob'] == nil)
puts "#{mode} Using primary gender '#{pri_user['sex']}, #{pri_user['sex_prob']}' instead of new '#{user['sex']}, #{user['sex_prob']}' for #{ologin}/#{email}/#{commits}" if dbg
user['sex'] = pri_user['sex']
user['sex_prob'] = pri_user['sex_prob']
s += 1
end
end
if user['country_id'] != pri_user['country_id'] || user['tz'] != pri_user['tz']
if (pri_user['country_id'] != nil || pri_user['tz'] != nil) && (user['country_id'] == nil || user['tz'] == nil)
puts "#{mode} Using primary location '#{pri_user['country_id']}, #{pri_user['tz']}' instead of new '#{user['country_id']}, #{user['tz']}' for #{ologin}/#{email}/#{commits}" if dbg
user['country_id'] = pri_user['country_id']
user['tz'] = pri_user['tz']
lo += 1
end
end
else
n += 1
end
nusers[[login, email]] = user
nn += 1
end
p = 0
pdata.each_with_index do |user, index|
login = user['login'].downcase
email = user['email'].downcase
unless nusers.key?([login, email])
nusers[[login, email]] = user
p += 1
end
end
users = nusers.values.sort_by { |u| [-u['commits'], u['login'], u['email']] }
puts "Primary users #{np}, new users #{nn}, merge: le #{le}, l #{l}, e #{e}, n #{n}, p #{p}"
puts "Overwrites aff #{a}, gender #{s}, location #{lo}"
# Write JSON back
pretty = JSON.pretty_generate users
File.write new_json, pretty
end
if ARGV.size < 3
puts "Missing arguments: github_users.old github_users.json cncf-config/email-map"
exit(1)
end
merge_jsons(ARGV[0], ARGV[1], ARGV[2])