1 """
2 This module implements classes to represent an arbitrary Bayesian random variable.
3
4 This is experimental code! do not use for serious applications!
5
6 """
7
8
9 from BIP.Bayes import like
10 from numpy import arange
11 from numpy import array
12 from numpy import compress
13 from numpy import exp
14 from numpy import greater
15 from numpy import less
16 from numpy import ones
17 from numpy import searchsorted
18 from numpy import sqrt
19 import pylab as P
20 from scipy import stats
21 import sys
22
23 from BIP.Viz.asciihist import Histogram
24
25 __docformat__ = "restructuredtext en"
26
27 discrete_conjugate_priors = {
28 'Bernoulli':'Beta',
29 'Binomial':'Beta',
30 'Negative Binomial':'Beta',
31 'Poisson':'Gamma',
32 'Multinomial':'Dirichlet',
33 'Geometric':'Beta'
34 }
35 continuous_conjugate_priors = {
36 'Uniform':'Pareto',
37 'Exponential':'Gamma',
38 'Normal':'Normal',
39 'Pareto':'Gamma',
40 'Gamma':'Gamma',
41 'Inverse Gamma':'Gamma'
42 }
43
44
45 -def BayesVar(priortype, pars, range, resolution=1024):
46 """
47 Factory function for continuous and discrete variables
48 """
49 if isinstance(priortype, stats.rv_continuous):
50 return __BayesC(priortype, pars, range, resolution)
51 if isinstance(disttype, stats.rv_discrete):
52 return __BayesD(priortype, pars, range, resolution)
53
54
56 """
57 Bayesian random variate.
58 """
59 - def __init__(self, disttype, pars, rang, resolution=1024):
60 '''
61 Initializes random variable.
62
63 :parameters:
64 - `disttype`: must be a valid RNG class from scipy.stats
65 - `pars`: are the parameters of the distribution.
66 - `rang`: range of the variable support.
67 - `resolution`: resolution of the support.
68 '''
69
70 self.distn = disttype.name
71 self._flavorize(disttype(*pars), disttype)
72 self.pars = pars
73 self.rang = rang
74 self.res = (rang[1]-rang[0]) * 1. / resolution
75 self.likelihood = None
76 self.data = None
77 self.posterior = array([])
78
80 '''
81 :Return:
82 ascii histogram of the variable
83 '''
84 if self.posterior.any():
85 d = self.posterior
86 else:
87 d = self.get_posterior_sample(200000)
88 name = self.distn + self.pars.__str__()
89 h = Histogram(d, bins=10)
90 return name + '\n' + h.vertical()
91
93 '''
94 Add methods from distribution type
95 '''
96 self.cdf = pt.cdf
97 self.isf = pt.isf
98 if isinstance(ptbase, stats.rv_continuous):
99 self.pdf = pt.pdf
100 elif isinstance(ptbase, stats.rv_discrete):
101 self.pdf = pt.pmf
102 else: sys.exit('Invalid distribution object')
103 self.ppf = pt.ppf
104 self.rvs = pt.rvs
106 """
107 Calculate likelihood function
108 """
109 if self.data != None:
110 d = self.data
111 sc = self.pars[1]
112 m = self.rang[0]
113 M = self.rang[1]
114 step = self.res
115 likefun = self._likelihood(model)
116 lik = exp(array([likefun((d, i, d.var())) for i in arange(m, M, step)]))
117 self.likelihood = lik / sum(lik)
118
120 """
121 Updates variable with information from dataset
122
123 :Parameters:
124 - `data`: sequence of numbers
125 - `model`: probabilistic model underlying data
126 """
127 self.data = array(data)
128 self._update(model.dist.name)
129
131 '''
132 Returns a sample from the prior distribution
133
134 :Parameters:
135 - `n`: Sample size.
136 '''
137 return self.rvs(size=n)
138
140 """
141 Returns the prior PDF.
142 """
143 return self.pdf(arange(self.rang[0], self.rang[1], self.res))
144
146 """
147 Return a sample of the posterior distribution.
148 Uses SIR algorithm.
149
150 :Parameters:
151 - `n`: Sample size.
152 """
153 if self.posterior.any():
154 k = stats.kde.gaussian_kde(self.posterior)
155 s = k.resample(n)
156 else:
157 s = self.get_prior_sample(n)
158 if self.data != None:
159 m = self.rang[0]
160 M = self.rang[1]
161 step = self.res
162 supp = arange(m, M, step)
163 s = compress(less(s.ravel(), M) & greater(s.ravel(), m), s)
164 d = stats.uniform.rvs(loc=0, scale=1, size=len(s))
165 w = self.pdf(supp) * self.likelihood
166 w = w / sum(w)
167 sx = searchsorted(supp, s)
168 w = w[sx-1]
169 post = compress(d < w, s)
170 self.posterior = post
171 return post
172 else:
173 return array([])
174
176 '''
177 Defines parametric family of the likelihood function.
178 Returns likelihood function.
179
180 :Parameters:
181 - `dname`: must be a string.
182 :Return:
183 lambda function to calculate the likelihood.
184 '''
185 like_funs = {
186 'norm': lambda(x):like.Normal(x[0], x[1], 1. / x[2]),
187 'expon': lambda(x):(1. / x[2]) ** x[0].size * exp(-(1. / x[2]) * sum(x[0])),
188 'beta': lambda(x):like.Beta(x[0], x[1], x[2]),
189 'uniform': lambda(x): like.Uniform(x[0], x[1]-2 * sqrt(x[2]), x[1] + 2 * sqrt(x[2]))
190 }
191 return like_funs[dname]
192
193
194
195 - def _post_from_conjugate(self, dname, * pars):
196 '''
197 Returns posterior distribution function using conjugate prior theory
198 '''
199 if not self.data:
200 return
201 if dname == 'bernoulli':
202 pdist = stats.beta(pars[0])
203
204
205 -class __BayesC(_BayesVar, stats.rv_continuous):
206 - def __init__(self, priortype, pars, range, resolution=512):
208
209 -class __BayesD(_BayesVar, stats.rv_discrete):
210 - def __init__(self, priortype, pars, range, resolution=512):
212
213 if __name__ == "__main__":
214
215 bv = BayesVar(stats.norm, (3, 1), range=(0, 5), resolution=1000)
216 data = stats.uniform(1, 3).rvs(500)
217 bv.add_data(data, stats.uniform(1, 3))
218 print bv
219 p = bv.get_posterior_sample(200000)
220 print bv
221 P.plot(arange(bv.rang[0], bv.rang[1], bv.res), bv.likelihood / max(bv.likelihood), 'ro', lw=2)
222 P.plot(arange(bv.rang[0], bv.rang[1], bv.res), bv.get_prior_dist(), 'g+', lw=2)
223 print p
224 P.hist(p, normed=1)
225 P.legend(['Likelihood', 'Prior', 'Posterior'])
226 P.title('Bayesian inference')
227 P.savefig('bayesvar.png', dpi=400)
228 P.show()
229