Skip to content

Commit

Permalink
add report to categorize_subjects
Browse files Browse the repository at this point in the history
  • Loading branch information
jamesturk committed Jan 13, 2011
1 parent b2a133b commit c95f242
Showing 1 changed file with 22 additions and 1 deletion.
23 changes: 22 additions & 1 deletion fiftystates/backend/categorize_subjects.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@

def categorize_subjects(state, data_dir, process_all):
categorizer = defaultdict(set)
categories_per_bill = defaultdict(int)
uncategorized = defaultdict(int)

reader = csv.reader(open(os.path.join(data_dir, state+'.csv')))

# build category mapping
Expand All @@ -73,10 +76,28 @@ def categorize_subjects(state, data_dir, process_all):
for bill in db.bills.find(spec):
subjects = set()
for ss in bill.get('scraped_subjects', []):
subjects.update(categorizer[ss])
categories = categorizer[ss]
if not categories:
uncategorized[ss] += 1
subjects.update(categories)
bill['subjects'] = list(subjects)

# increment # of bills with # of subjects
categories_per_bill[len(subjects)] += 1

db.bills.save(bill)

print 'Categories per bill'
print '-------------------'
for ncats, total in sorted(categories_per_bill.items()):
print '%s categories: %s bills' % (ncats, total)

print 'Uncategorized'
print '-------------'
subjects_i = sorted([(v,k) for k,v in uncategorized.items()], reverse=True)
for n, category in subjects_i:
print '%s,%s' % (n, category)

if __name__ == '__main__':
parser = argparse.ArgumentParser(
description='apply subject categorization for bills for a given state',
Expand Down

0 comments on commit c95f242

Please sign in to comment.