forked from csev/py4e
-
Notifications
You must be signed in to change notification settings - Fork 0
/
gyear.py
73 lines (60 loc) · 1.68 KB
/
gyear.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import sqlite3
import time
import urllib
import zlib
conn = sqlite3.connect('index.sqlite')
conn.text_factory = str
cur = conn.cursor()
# Determine the top ten organizations
cur.execute('''SELECT Messages.id, sender FROM Messages
JOIN Senders ON Messages.sender_id = Senders.id''')
sendorgs = dict()
for message_row in cur :
sender = message_row[1]
pieces = sender.split("@")
if len(pieces) != 2 : continue
dns = pieces[1]
sendorgs[dns] = sendorgs.get(dns,0) + 1
# pick the top schools
orgs = sorted(sendorgs, key=sendorgs.get, reverse=True)
orgs = orgs[:10]
print "Top 10 Organizations"
print orgs
# orgs = ['total'] + orgs
# Read through the messages
counts = dict()
years = list()
cur.execute('''SELECT Messages.id, sender, sent_at FROM Messages
JOIN Senders ON Messages.sender_id = Senders.id''')
for message_row in cur :
sender = message_row[1]
pieces = sender.split("@")
if len(pieces) != 2 : continue
dns = pieces[1]
if dns not in orgs : continue
year = message_row[2][:4]
if year not in years : years.append(year)
key = (year, dns)
counts[key] = counts.get(key,0) + 1
tkey = (year, 'total')
counts[tkey] = counts.get(tkey,0) + 1
years.sort()
print counts
print years
fhand = open('gline.js','w')
fhand.write("gline = [ ['Year'")
for org in orgs:
fhand.write(",'"+org+"'")
fhand.write("]")
# for year in years[1:-1]:
for year in years:
fhand.write(",\n['"+year+"'")
for org in orgs:
key = (year, org)
val = counts.get(key,0)
fhand.write(","+str(val))
fhand.write("]");
fhand.write("\n];\n")
fhand.close()
print "Data written to gline.js"
print "Open gline.htm in a browser to view"