My favorites | Sign in
Project Home Downloads Wiki Issues Source
Checkout   Browse   Changes    
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
"""This file contains code for use with "Think Stats",
by Allen B. Downey, available from greenteapress.com

Copyright 2010 Allen B. Downey
License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
"""

import descriptive
import itertools
import Pmf

import Cdf
import random
import thinkstats


def SimulateSample(expected, cdf, num_nuts, factor=10, stir=-1):
"""Generates a Hist of simulated nuts.

Args:
expected: Pmf of expected values
cdf: cdf of expected values
num_nuts: number of times to nuts

Returns:
Hist object
"""
if stir == -1:
t = cdf.Sample(num_nuts)
else:
vat = MakeVat(expected, num_nuts, factor=factor, stir=stir)
t = vat[:num_nuts]

#print stir, PercentAdjacent(t)
hist = Pmf.MakeHistFromList(t)
return hist


def ChiSquared(expected, observed):
"""Compute the Chi-squared statistic for two tables.

Args:
expected: Hist of expected values
observed: Hist of observed values

Returns:
float chi-squared statistic
"""
total = 0.0
for x, exp in expected.Items():
obs = observed.Freq(x)
total += (obs - exp)**2 / exp
return total


def Test(expected, observed, num_trials=1000, stir=-1):
"""Run a simulation to estimate the p-value of the observed values.

Args:
expected: Hist of expected values
observed: Hist of observed values
num_trials: how many simulations to run

Returns:
float p-value
"""

# compute the chi-squared stat
threshold = ChiSquared(expected, observed)
print 'chi-squared', threshold

print 'simulated %d trials' % num_trials
chi2s = []
count = 0.0
num_nuts = observed.Total()
cdf = Cdf.MakeCdfFromHist(expected)

for _ in range(num_trials):
simulated = SimulateSample(expected, cdf, num_nuts, stir=stir)
chi2 = ChiSquared(expected, simulated)
chi2s.append(chi2)
if chi2 >= threshold:
count += 1

print 'max chi2', max(chi2s)

pvalue = count / num_trials
print 'p-value', pvalue

return pvalue


def ConvertToCount(sample, count_per):
"""Convert from weight to count.

sample: Hist or Pmf that maps from category to weight in pounds
count_per: dictionary that maps from category to count per ounce
"""
for value, count in sample.Items():
sample.Mult(value, 16 * count_per[value])


def MakeVat(expected, num_nuts, factor=10, stir=0.0):
"""Makes a list of nuts with the given amount of stirring."""
t = []
for value, freq in expected.Items():
t.extend([value] * int(freq * factor))

if stir == -1:
random.shuffle(t)
else:
[RandomSwap(t) for i in xrange(int(num_nuts*factor*stir))]

return t


def RandomSwap(t):
"""Chooses two random elements of the list and swaps them."""
i, j = [random.randrange(len(t)) for i in range(2)]
t[i], t[j] = t[j], t[i]


def PercentAdjacent(t):
"""Computes the fraction of adjacent pairs that are the same."""
count = 0.0
for i in range(len(t) - 1):
if t[i] == t[i+1]:
count += 1
return count / len(t)


def main():
# make a Hist of observed values
count_per = dict(cashew=17, brazil=7, almond=22, peanut=28)
#count_per = dict(cashew=12, brazil=5, almond=18, peanut=24)
sample = dict(cashew=6, brazil=3, almond=5, peanut=6)

observed = Pmf.MakeHistFromDict(sample)
ConvertToCount(observed, count_per)

advertised = dict(cashew=40, brazil=15, almond=20, peanut=25)
expected = Pmf.MakePmfFromDict(advertised)
ConvertToCount(expected, count_per)
expected.Normalize(observed.Total())

for value, e in expected.Items():
o = observed.Freq(value)
print value, e, o, 100 * (o - e)/ e, '%'

num_nuts = observed.Total()

for stir in [1.1, 1.15, 1.2, 1.25, 1.3, -1]:
vat = MakeVat(expected, num_nuts, factor=10, stir=stir)
print stir, PercentAdjacent(vat)
Test(expected, observed, num_trials=100, stir=stir)

if __name__ == "__main__":
main()

Change log

r216 by allendowney on Jan 4, 2012   Diff
Revised an exercise.
Go to: 
Project members, sign in to write a code review

Older revisions

r184 by allendowney on Aug 29, 2011   Diff
Adding jimmy.py
r175 by allendowney on Jun 10, 2011   Diff
Updating rarefaction.py
r169 by allendowney on May 31, 2011   Diff
Adding rarefaction_test.py
All revisions of this file

File info

Size: 4146 bytes, 158 lines
Powered by Google Project Hosting