Skip to content

Commit

Permalink
Removed OOPSLA, bumped up threshold to 6 pages, refactored Python code.
Browse files Browse the repository at this point in the history
  • Loading branch information
emeryberger committed Jun 20, 2016
1 parent 19f2847 commit 4afa0c9
Show file tree
Hide file tree
Showing 7 changed files with 1,181 additions and 4,804 deletions.
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@ update-dblp:
mv dblp-fixed.xml dblp.xml
@echo "Done."

faculty-coauthors.csv: dblp.xml util/generate-faculty-coauthors.py
faculty-coauthors.csv: dblp.xml util/generate-faculty-coauthors.py util/csrankings.py
@echo "Rebuilding the co-author database (faculty-coauthors.csv)."
python util/generate-faculty-coauthors.py
@echo "Done."

generated-author-info.csv: faculty-affiliations.csv dblp.xml util/regenerate-data.py
generated-author-info.csv: faculty-affiliations.csv dblp.xml util/regenerate-data.py util/csrankings.py
@echo "Rebuilding the publication database (generated-author-info.csv)."
python util/regenerate-data.py
@echo "Done."
Expand Down
3,485 changes: 448 additions & 3,037 deletions faculty-coauthors.csv

Large diffs are not rendered by default.

2,307 changes: 724 additions & 1,583 deletions generated-author-info.csv

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion index.html
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ <h1>Computer Science Rankings (beta)</h1>
<tr>
<td>
Programming languages
<small><em>PLDI, POPL, OOPSLA</em></small>
<small><em>PLDI, POPL</em></small>
</td>
<td>
<input type="checkbox" name="field_1" id="field_1" value="1.0"/>
Expand Down
2 changes: 1 addition & 1 deletion util/csrankings-util.py → util/csrankings.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def pagecount(input):


areadict = {
'proglang' : ['POPL', 'PLDI', 'OOPSLA'],
'proglang' : ['POPL', 'PLDI'],
'highperf' : ['SC', 'PPOPP'],
'logic' : ['CAV', 'LICS'],
'softeng' : ['ICSE', 'ICSE (2)', 'SIGSOFT FSE', 'ESEC/SIGSOFT FSE'],
Expand Down
93 changes: 1 addition & 92 deletions util/generate-faculty-coauthors.py
Original file line number Diff line number Diff line change
@@ -1,85 +1,5 @@
from lxml import etree as ElementTree
import htmlentitydefs
import csv
import operator
import re

# import gzip

generateLog = True

parser = ElementTree.XMLParser(attribute_defaults=True, load_dtd=True)

# Author paper count threshold - the author must have written at least this many top papers to count as a co-author.
# This is meant to generally exclude students.
authorPaperCountThreshold = 5

# Papers must be at least 4 pages long to count.
pageCountThreshold = 4
# Match ordinary page numbers (as in 10-17).
pageCounterNormal = re.compile('(\d+)-(\d+)')
# Match page number in the form volume:page (as in 12:140-12:150).
pageCounterColon = re.compile('[0-9]+:([1-9][0-9]*)-[0-9]+:([1-9][0-9]*)')

def pagecount(input):
if (input is None):
return 0
pageCounterMatcher1 = pageCounterNormal.match(input)
pageCounterMatcher2 = pageCounterColon.match(input)
start = 0
end = 0
count = 0

if (not (pageCounterMatcher1 is None)):
start = int(pageCounterMatcher1.group(1))
end = int(pageCounterMatcher1.group(2))
count = end-start+1
else:
if (not (pageCounterMatcher2 is None)):
start = int(pageCounterMatcher2.group(1))
end = int(pageCounterMatcher2.group(2))
count = end-start+1
return count
from csrankings import *


areadict = {
'proglang' : ['POPL', 'PLDI', 'OOPSLA'],
'highperf' : ['SC', 'PPOPP'],
'logic' : ['CAV', 'LICS'],
'softeng' : ['ICSE', 'ICSE (2)', 'SIGSOFT FSE', 'ESEC/SIGSOFT FSE'],
'opsys' : ['SOSP', 'OSDI'],
'arch' : ['ISCA', 'MICRO', 'ASPLOS'],
'theory' : ['STOC', 'FOCS','SODA'],
'networks' : ['SIGCOMM', 'INFOCOM', 'NSDI'],
'security' : ['IEEE Symposium on Security and Privacy', 'ACM Conference on Computer and Communications Security', 'USENIX Security Symposium'],
'mlmining' : ['NIPS', 'ICML','KDD'],
'ai' : ['AAAI', 'IJCAI'],
'database' : ['PODS', 'VLDB', 'PVLDB', 'SIGMOD Conference'],
'graphics' : ['ACM Trans. Graph.', 'SIGGRAPH'],
'metrics' : ['SIGMETRICS','IMC','Internet Measurement Conference'],
'web' : ['WWW', 'SIGIR'],
'hci' : ['CHI','UbiComp','UIST'],
'nlp' : ['EMNLP','ACL','ACL (1)','NAACL'],
'vision' : ['CVPR','ICCV'],
'mobile' : ['MobiSys','MobiCom','MOBICOM','SenSys'],
'robotics' : ['ICRA','IROS','Robotics: Science and Systems']
}

# Build a dictionary mapping conferences to areas.
# e.g., confdict['CVPR'] = 'vision'.
confdict = {}
for k, v in areadict.items():
for item in v:
confdict[item] = k

# The list of all areas.
arealist = areadict.keys();

# Consider pubs in this range only.
startyear = 1990
endyear = 2016


def parseDBLP(facultydict):
coauthors = {}
papersWritten = {}
Expand Down Expand Up @@ -203,17 +123,6 @@ def parseDBLP(facultydict):
return 0


def csv2dict_str_str(fname):
with open(fname, mode='r') as infile:
reader = csv.reader(infile)
#for rows in reader:
# print rows[0], "-->", rows[1]
d = {unicode(rows[0].strip(),'utf-8'): unicode(rows[1].strip(),'utf-8') for rows in reader}
return d

def sortdictionary(d):
return sorted(d.iteritems(), key=operator.itemgetter(1), reverse = True)

facultydict = csv2dict_str_str('faculty-affiliations.csv')

parseDBLP(facultydict)
Expand Down
92 changes: 4 additions & 88 deletions util/regenerate-data.py
Original file line number Diff line number Diff line change
@@ -1,86 +1,13 @@
from lxml import etree as ElementTree
import htmlentitydefs
import csv
import operator
import re

# import gzip

generateLog = True

parser = ElementTree.XMLParser(attribute_defaults=True, load_dtd=True)

# Papers must be at least 4 pages long to count.
pageCountThreshold = 4
# Match ordinary page numbers (as in 10-17).
pageCounterNormal = re.compile('(\d+)-(\d+)')
# Match page number in the form volume:page (as in 12:140-12:150).
pageCounterColon = re.compile('[0-9]+:([1-9][0-9]*)-[0-9]+:([1-9][0-9]*)')

def pagecount(input):
if (input is None):
return 0
pageCounterMatcher1 = pageCounterNormal.match(input)
pageCounterMatcher2 = pageCounterColon.match(input)
start = 0
end = 0
count = 0

if (not (pageCounterMatcher1 is None)):
start = int(pageCounterMatcher1.group(1))
end = int(pageCounterMatcher1.group(2))
count = end-start+1
else:
if (not (pageCounterMatcher2 is None)):
start = int(pageCounterMatcher2.group(1))
end = int(pageCounterMatcher2.group(2))
count = end-start+1
return count
from csrankings import *


areadict = {
'proglang' : ['POPL', 'PLDI', 'OOPSLA'],
'highperf' : ['SC', 'PPOPP'],
'logic' : ['CAV', 'LICS'],
'softeng' : ['ICSE', 'ICSE (2)', 'SIGSOFT FSE', 'ESEC/SIGSOFT FSE'],
'opsys' : ['SOSP', 'OSDI'],
'arch' : ['ISCA', 'MICRO', 'ASPLOS'],
'theory' : ['STOC', 'FOCS','SODA'],
'networks' : ['SIGCOMM', 'INFOCOM', 'NSDI'],
'security' : ['IEEE Symposium on Security and Privacy', 'ACM Conference on Computer and Communications Security', 'USENIX Security Symposium'],
'mlmining' : ['NIPS', 'ICML','KDD'],
'ai' : ['AAAI', 'IJCAI'],
'database' : ['PODS', 'VLDB', 'PVLDB', 'SIGMOD Conference'],
'graphics' : ['ACM Trans. Graph.', 'SIGGRAPH'],
'metrics' : ['SIGMETRICS','IMC','Internet Measurement Conference'],
'web' : ['WWW', 'SIGIR'],
'hci' : ['CHI','UbiComp','UIST'],
'nlp' : ['EMNLP','ACL','ACL (1)','NAACL'],
'vision' : ['CVPR','ICCV'],
'mobile' : ['MobiSys','MobiCom','MOBICOM','SenSys'],
'robotics' : ['ICRA','IROS','Robotics: Science and Systems']
}

# Build a dictionary mapping conferences to areas.
# e.g., confdict['CVPR'] = 'vision'.
confdict = {}
for k, v in areadict.items():
for item in v:
confdict[item] = k

# The list of all areas.
arealist = areadict.keys();

# Consider pubs in this range only.
startyear = 1990
endyear = 2016


def parseDBLP(facultydict):
authlogs = {}
interestingauthors = {}
authorscores = {}
authorscoresAdjusted = {}
coauthors = {}
papersWritten = {}
counter = 0

with open('dblp.xml', mode='r') as f:

Expand Down Expand Up @@ -186,17 +113,6 @@ def parseDBLP(facultydict):
return (interestingauthors, authorscores, authorscoresAdjusted)


def csv2dict_str_str(fname):
with open(fname, mode='r') as infile:
reader = csv.reader(infile)
#for rows in reader:
# print rows[0], "-->", rows[1]
d = {unicode(rows[0].strip(),'utf-8'): unicode(rows[1].strip(),'utf-8') for rows in reader}
return d

def sortdictionary(d):
return sorted(d.iteritems(), key=operator.itemgetter(1), reverse = True)

facultydict = csv2dict_str_str('faculty-affiliations.csv')

if (generateLog):
Expand Down

0 comments on commit 4afa0c9

Please sign in to comment.