This repository was archived by the owner on Dec 23, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathenrol_undp.py
62 lines (51 loc) · 1.62 KB
/
enrol_undp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import logging
import dl
import messytables
import xypath
import re
import orm
import requests
import lxml.html
"""Value: dsID, region, indID, period, value, source, is_number
DataSet: dsID, last_updated, last_scraped, name
Indicator: indID, name, units
"""
log = logging.getLogger("unicef")
log.addHandler(logging.StreamHandler())
log.addHandler(logging.FileHandler("unicef.log"))
log.level = logging.WARN
dataset = {"dsID": "enrol-undp",
"last_updated": None,
"last_scraped": orm.now(),
"name": "UNDP - Education"
}
indicator = {"indID": "PVE120",
"name": "Combined gross enrolment in education (both sexes)",
"units": "Percentage"
}
output_template = {"dsID": "enrol-undp",
"is_number": True,
"indID": "PVE120",
}
def getstats():
url = 'http://hdr.undp.org/en/content/combined-gross-enrolment-education-both-sexes'
handle = dl.grab(url)
mts = messytables.any.any_tableset(handle)
saves = 0
mt = mts.tables[0]
table = xypath.Table.from_messy(mt)
pivot, = table.filter(lambda c: 'Country' in c.value)
years = pivot.fill(xypath.RIGHT)
countries = pivot.fill(xypath.DOWN)
for year, country, value in years.junction(countries):
output = dict(output_template)
output['source'] = url
output['region'] = country.value.strip()
output['value'] = value.value.strip()
orm.Value(**output).save()
saves = saves + 1
assert saves
if __name__ == "__main__":
orm.DataSet(**dataset).save()
orm.Indicator(**indicator).save()
getstats()