forked from AllenDowney/ThinkStats2
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprobability.py
171 lines (128 loc) · 4.19 KB
/
probability.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
"""This file contains code used in "Think Stats",
by Allen B. Downey, available from greenteapress.com
Copyright 2014 Allen B. Downey
License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
"""
from __future__ import print_function
import math
import numpy as np
import nsfg
import first
import thinkstats2
import thinkplot
def MakeHists(live):
"""Plot Hists for live births
live: DataFrame
others: DataFrame
"""
hist = thinkstats2.Hist(np.floor(live.agepreg), label='agepreg')
thinkplot.PrePlot(2, cols=2)
thinkplot.SubPlot(1)
thinkplot.Hist(hist)
thinkplot.Config(xlabel='years',
ylabel='frequency',
axis=[0, 45, 0, 700])
thinkplot.SubPlot(2)
thinkplot.Pmf(hist)
thinkplot.Save(root='probability_agepreg_hist',
xlabel='years',
axis=[0, 45, 0, 700])
def MakeFigures(firsts, others):
"""Plot Pmfs of pregnancy length.
firsts: DataFrame
others: DataFrame
"""
# plot the PMFs
first_pmf = thinkstats2.Pmf(firsts.prglngth, label='first')
other_pmf = thinkstats2.Pmf(others.prglngth, label='other')
width = 0.45
thinkplot.PrePlot(2, cols=2)
thinkplot.Hist(first_pmf, align='right', width=width)
thinkplot.Hist(other_pmf, align='left', width=width)
thinkplot.Config(xlabel='weeks',
ylabel='probability',
axis=[27, 46, 0, 0.6])
thinkplot.PrePlot(2)
thinkplot.SubPlot(2)
thinkplot.Pmfs([first_pmf, other_pmf])
thinkplot.Save(root='probability_nsfg_pmf',
xlabel='weeks',
axis=[27, 46, 0, 0.6])
# plot the differences in the PMFs
weeks = range(35, 46)
diffs = []
for week in weeks:
p1 = first_pmf.Prob(week)
p2 = other_pmf.Prob(week)
diff = 100 * (p1 - p2)
diffs.append(diff)
thinkplot.Bar(weeks, diffs)
thinkplot.Save(root='probability_nsfg_diffs',
title='Difference in PMFs',
xlabel='weeks',
ylabel='percentage points',
legend=False)
def BiasPmf(pmf, label=''):
"""Returns the Pmf with oversampling proportional to value.
If pmf is the distribution of true values, the result is the
distribution that would be seen if values are oversampled in
proportion to their values; for example, if you ask students
how big their classes are, large classes are oversampled in
proportion to their size.
Args:
pmf: Pmf object.
label: string label for the new Pmf.
Returns:
Pmf object
"""
new_pmf = pmf.Copy(label=label)
for x, p in pmf.Items():
new_pmf.Mult(x, x)
new_pmf.Normalize()
return new_pmf
def UnbiasPmf(pmf, label=''):
"""Returns the Pmf with oversampling proportional to 1/value.
Args:
pmf: Pmf object.
label: string label for the new Pmf.
Returns:
Pmf object
"""
new_pmf = pmf.Copy(label=label)
for x, p in pmf.Items():
new_pmf.Mult(x, 1.0/x)
new_pmf.Normalize()
return new_pmf
def ClassSizes():
"""Generate PMFs of observed and actual class size.
"""
# start with the actual distribution of class sizes from the book
d = { 7: 8, 12: 8, 17: 14, 22: 4,
27: 6, 32: 12, 37: 8, 42: 3, 47: 2 }
# form the pmf
pmf = thinkstats2.Pmf(d, label='actual')
print('mean', pmf.Mean())
print('var', pmf.Var())
# compute the biased pmf
biased_pmf = BiasPmf(pmf, label='observed')
print('mean', biased_pmf.Mean())
print('var', biased_pmf.Var())
# unbias the biased pmf
unbiased_pmf = UnbiasPmf(biased_pmf, label='unbiased')
print('mean', unbiased_pmf.Mean())
print('var', unbiased_pmf.Var())
# plot the Pmfs
thinkplot.PrePlot(2)
thinkplot.Pmfs([pmf, biased_pmf])
thinkplot.Save(root='class_size1',
xlabel='class size',
ylabel='PMF',
axis=[0, 52, 0, 0.27])
def main(script):
live, firsts, others = first.MakeFrames()
MakeFigures(firsts, others)
MakeHists(live)
ClassSizes()
if __name__ == '__main__':
import sys
main(*sys.argv)