forked from emeryberger/CSrankings
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmerge-csv.py
executable file
·41 lines (35 loc) · 1.43 KB
/
merge-csv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#!/usr/bin/env python
from collections import *
import gzip
import xmltodict
import collections
import json
import csv
import re
import sys
import operator
def csv2dict_str_str(fname):
"""Takes a CSV file and returns a dictionary of pairs."""
with open(fname, mode='r') as infile:
rdr = csv.reader(infile)
d = {unicode(rows[0].strip(), 'utf-8'): unicode(rows[1].strip(), 'utf-8') for rows in rdr}
return d
facultydict1 = csv2dict_str_str('faculty-affiliations.csv')
facultydict = OrderedDict(sorted(facultydict1.items(), key=lambda t: t[0]))
homepages = csv2dict_str_str('homepages.csv')
scholarLinks1 = csv2dict_str_str('scholar.csv')
scholarLinks = OrderedDict(sorted(scholarLinks1.items(), key=lambda t: t[0]))
with open('csrankings.csv', mode='wb') as outfile:
fieldnames = ['name', 'affiliation', 'homepage', 'scholarid']
writer = csv.DictWriter(outfile, fieldnames=fieldnames)
writer.writeheader()
for authorName in facultydict:
if authorName == "name":
continue
if scholarLinks.get(authorName, "XX") == "NOENTRYYET":
scholarLinks[authorName] = "NOSCHOLARPAGE"
entry = { 'name' : authorName.encode('utf8'),
'affiliation' : facultydict[authorName].encode('utf8'),
'homepage' : homepages[authorName].encode('utf8'),
'scholarid' : scholarLinks.get(authorName, "NOSCHOLARPAGE") }
writer.writerow(entry)