Removed OOPSLA, bumped up threshold to 6 pages, refactored Python code.

anda · Jun 20, 2016 · 4afa0c9 · 4afa0c9
1 parent 19f2847
commit 4afa0c9
Show file tree

Hide file tree

Showing 7 changed files with 1,181 additions and 4,804 deletions.
diff --git a/Makefile b/Makefile
@@ -17,12 +17,12 @@ update-dblp:
 	mv dblp-fixed.xml dblp.xml
 	@echo "Done."
 
-faculty-coauthors.csv: dblp.xml util/generate-faculty-coauthors.py
+faculty-coauthors.csv: dblp.xml util/generate-faculty-coauthors.py util/csrankings.py
 	@echo "Rebuilding the co-author database (faculty-coauthors.csv)."
 	python util/generate-faculty-coauthors.py
 	@echo "Done."
 
-generated-author-info.csv: faculty-affiliations.csv dblp.xml util/regenerate-data.py
+generated-author-info.csv: faculty-affiliations.csv dblp.xml util/regenerate-data.py util/csrankings.py
 	@echo "Rebuilding the publication database (generated-author-info.csv)."
 	python util/regenerate-data.py
 	@echo "Done."

diff --git a/faculty-coauthors.csv b/faculty-coauthors.csv
diff --git a/generated-author-info.csv b/generated-author-info.csv
diff --git a/index.html b/index.html
@@ -280,7 +280,7 @@ <h1>Computer Science Rankings (beta)</h1>
 		    <tr>
 		      <td>
 			Programming languages
-			<small><em>PLDI, POPL, OOPSLA</em></small>
+			<small><em>PLDI, POPL</em></small>
 		      </td>
 		      <td>
 			<input type="checkbox" name="field_1" id="field_1" value="1.0"/>

diff --git a/util/csrankings-util.py → util/csrankings.py b/util/csrankings-util.py → util/csrankings.py
@@ -43,7 +43,7 @@ def pagecount(input):
 
 
 areadict = {
-    'proglang' : ['POPL', 'PLDI', 'OOPSLA'],
+    'proglang' : ['POPL', 'PLDI'],
     'highperf' : ['SC', 'PPOPP'],
     'logic' : ['CAV', 'LICS'],
     'softeng' : ['ICSE', 'ICSE (2)', 'SIGSOFT FSE', 'ESEC/SIGSOFT FSE'],

diff --git a/util/generate-faculty-coauthors.py b/util/generate-faculty-coauthors.py
@@ -1,85 +1,5 @@
-from lxml import etree as ElementTree
-import htmlentitydefs
-import csv
-import operator
-import re
-
-# import gzip
-
-generateLog = True
-
-parser = ElementTree.XMLParser(attribute_defaults=True, load_dtd=True)
-
-# Author paper count threshold - the author must have written at least this many top papers to count as a co-author.
-# This is meant to generally exclude students.
-authorPaperCountThreshold = 5
-
-# Papers must be at least 4 pages long to count.
-pageCountThreshold = 4
-# Match ordinary page numbers (as in 10-17).
-pageCounterNormal = re.compile('(\d+)-(\d+)')
-# Match page number in the form volume:page (as in 12:140-12:150).
-pageCounterColon = re.compile('[0-9]+:([1-9][0-9]*)-[0-9]+:([1-9][0-9]*)')
-
-def pagecount(input):
-    if (input is None):
-        return 0
-    pageCounterMatcher1 = pageCounterNormal.match(input)
-    pageCounterMatcher2 = pageCounterColon.match(input)
-    start = 0
-    end = 0
-    count = 0
-
-    if (not (pageCounterMatcher1 is None)):
-        start = int(pageCounterMatcher1.group(1))
-        end   = int(pageCounterMatcher1.group(2))
-        count = end-start+1
-    else:
-        if (not (pageCounterMatcher2 is None)):
-            start = int(pageCounterMatcher2.group(1))
-            end   = int(pageCounterMatcher2.group(2))
-            count = end-start+1
-    return count
+from csrankings import *
 
-
-areadict = {
-    'proglang' : ['POPL', 'PLDI', 'OOPSLA'],
-    'highperf' : ['SC', 'PPOPP'],
-    'logic' : ['CAV', 'LICS'],
-    'softeng' : ['ICSE', 'ICSE (2)', 'SIGSOFT FSE', 'ESEC/SIGSOFT FSE'],
-    'opsys' : ['SOSP', 'OSDI'],
-    'arch' : ['ISCA', 'MICRO', 'ASPLOS'],
-    'theory' : ['STOC', 'FOCS','SODA'],
-    'networks' : ['SIGCOMM', 'INFOCOM', 'NSDI'],
-    'security' : ['IEEE Symposium on Security and Privacy', 'ACM Conference on Computer and Communications Security', 'USENIX Security Symposium'],
-    'mlmining' : ['NIPS', 'ICML','KDD'],
-    'ai' : ['AAAI', 'IJCAI'],
-    'database' : ['PODS', 'VLDB', 'PVLDB', 'SIGMOD Conference'],
-    'graphics' : ['ACM Trans. Graph.', 'SIGGRAPH'],
-    'metrics' : ['SIGMETRICS','IMC','Internet Measurement Conference'],
-    'web' : ['WWW', 'SIGIR'],
-    'hci' : ['CHI','UbiComp','UIST'],
-    'nlp' : ['EMNLP','ACL','ACL (1)','NAACL'],
-    'vision' : ['CVPR','ICCV'],
-    'mobile' : ['MobiSys','MobiCom','MOBICOM','SenSys'],
-    'robotics' : ['ICRA','IROS','Robotics: Science and Systems']
-}
-
-# Build a dictionary mapping conferences to areas.
-# e.g., confdict['CVPR'] = 'vision'.
-confdict = {}
-for k, v in areadict.items():
-    for item in v:
-        confdict[item] = k
-
-# The list of all areas.
-arealist = areadict.keys();
-
-# Consider pubs in this range only.
-startyear = 1990
-endyear   = 2016
-
-
 def parseDBLP(facultydict):
     coauthors = {}
     papersWritten = {}
@@ -203,17 +123,6 @@ def parseDBLP(facultydict):
     return 0
 
 
-def csv2dict_str_str(fname):
-    with open(fname, mode='r') as infile:
-        reader = csv.reader(infile)
-        #for rows in reader:
-        #    print rows[0], "-->", rows[1]
-        d = {unicode(rows[0].strip(),'utf-8'): unicode(rows[1].strip(),'utf-8') for rows in reader}
-    return d
-
-def sortdictionary(d):
-    return sorted(d.iteritems(), key=operator.itemgetter(1), reverse = True)    
-
 facultydict = csv2dict_str_str('faculty-affiliations.csv')
 
 parseDBLP(facultydict)

diff --git a/util/regenerate-data.py b/util/regenerate-data.py
@@ -1,86 +1,13 @@
-from lxml import etree as ElementTree
-import htmlentitydefs
-import csv
-import operator
-import re
-
-# import gzip
-
-generateLog = True
-
-parser = ElementTree.XMLParser(attribute_defaults=True, load_dtd=True)
-
-# Papers must be at least 4 pages long to count.
-pageCountThreshold = 4
-# Match ordinary page numbers (as in 10-17).
-pageCounterNormal = re.compile('(\d+)-(\d+)')
-# Match page number in the form volume:page (as in 12:140-12:150).
-pageCounterColon = re.compile('[0-9]+:([1-9][0-9]*)-[0-9]+:([1-9][0-9]*)')
-
-def pagecount(input):
-    if (input is None):
-        return 0
-    pageCounterMatcher1 = pageCounterNormal.match(input)
-    pageCounterMatcher2 = pageCounterColon.match(input)
-    start = 0
-    end = 0
-    count = 0
-
-    if (not (pageCounterMatcher1 is None)):
-        start = int(pageCounterMatcher1.group(1))
-        end   = int(pageCounterMatcher1.group(2))
-        count = end-start+1
-    else:
-        if (not (pageCounterMatcher2 is None)):
-            start = int(pageCounterMatcher2.group(1))
-            end   = int(pageCounterMatcher2.group(2))
-            count = end-start+1
-    return count
+from csrankings import *
 
-
-areadict = {
-    'proglang' : ['POPL', 'PLDI', 'OOPSLA'],
-    'highperf' : ['SC', 'PPOPP'],
-    'logic' : ['CAV', 'LICS'],
-    'softeng' : ['ICSE', 'ICSE (2)', 'SIGSOFT FSE', 'ESEC/SIGSOFT FSE'],
-    'opsys' : ['SOSP', 'OSDI'],
-    'arch' : ['ISCA', 'MICRO', 'ASPLOS'],
-    'theory' : ['STOC', 'FOCS','SODA'],
-    'networks' : ['SIGCOMM', 'INFOCOM', 'NSDI'],
-    'security' : ['IEEE Symposium on Security and Privacy', 'ACM Conference on Computer and Communications Security', 'USENIX Security Symposium'],
-    'mlmining' : ['NIPS', 'ICML','KDD'],
-    'ai' : ['AAAI', 'IJCAI'],
-    'database' : ['PODS', 'VLDB', 'PVLDB', 'SIGMOD Conference'],
-    'graphics' : ['ACM Trans. Graph.', 'SIGGRAPH'],
-    'metrics' : ['SIGMETRICS','IMC','Internet Measurement Conference'],
-    'web' : ['WWW', 'SIGIR'],
-    'hci' : ['CHI','UbiComp','UIST'],
-    'nlp' : ['EMNLP','ACL','ACL (1)','NAACL'],
-    'vision' : ['CVPR','ICCV'],
-    'mobile' : ['MobiSys','MobiCom','MOBICOM','SenSys'],
-    'robotics' : ['ICRA','IROS','Robotics: Science and Systems']
-}
-
-# Build a dictionary mapping conferences to areas.
-# e.g., confdict['CVPR'] = 'vision'.
-confdict = {}
-for k, v in areadict.items():
-    for item in v:
-        confdict[item] = k
-
-# The list of all areas.
-arealist = areadict.keys();
-
-# Consider pubs in this range only.
-startyear = 1990
-endyear   = 2016
-
-
 def parseDBLP(facultydict):
     authlogs = {}
     interestingauthors = {}
     authorscores = {}
     authorscoresAdjusted = {}
+    coauthors = {}
+    papersWritten = {}
+    counter = 0
 
     with open('dblp.xml', mode='r') as f:
 
@@ -186,17 +113,6 @@ def parseDBLP(facultydict):
         return (interestingauthors, authorscores, authorscoresAdjusted)
 
 
-def csv2dict_str_str(fname):
-    with open(fname, mode='r') as infile:
-        reader = csv.reader(infile)
-        #for rows in reader:
-        #    print rows[0], "-->", rows[1]
-        d = {unicode(rows[0].strip(),'utf-8'): unicode(rows[1].strip(),'utf-8') for rows in reader}
-    return d
-
-def sortdictionary(d):
-    return sorted(d.iteritems(), key=operator.itemgetter(1), reverse = True)    
-
 facultydict = csv2dict_str_str('faculty-affiliations.csv')
 
 if (generateLog):