Package trunk :: Package BIP :: Package Bayes :: Module lhs
[hide private]

Source Code for Module trunk.BIP.Bayes.lhs

  1  #!/usr/bin/python 
  2  # -*- coding:utf-8 -*- 
  3  #----------------------------------------------------------------------------- 
  4  # Name:        lhs.py 
  5  # Project:  Bayesian-Inference 
  6  # Purpose:      
  7  # 
  8  # Author:      Flávio Codeço Coelho<fccoelho@gmail.com> 
  9  # 
 10  # Created:     2008-11-26 
 11  # Copyright:   (c) 2008 by the Author 
 12  # Licence:     GPL 
 13  #----------------------------------------------------------------------------- 
 14  __docformat__ = "restructuredtext en" 
 15  #from pylab import plot, figure,hist,show, savefig, legend 
 16  import scipy.stats as stats 
 17  import numpy 
 18  from numpy.linalg import cholesky,inv 
 19  from numpy.random import uniform 
 20   
21 -def lhsFromSample(sample,siz=100):
22 """ 23 Latin Hypercube Sample from a set of values. 24 For univariate distributions only 25 26 :Parameters: 27 - `sample`: list, tuple of array 28 - `siz`: Number or shape tuple for the output sample 29 """ 30 #TODO: add support to correlation restricted multivariate samples 31 if not isinstance(sample, (list,tuple,numpy.ndarray)): 32 raise TypeError('sample is not a list, tuple or numpy vector') 33 n = siz 34 if isinstance(siz,(tuple,list)): 35 n=numpy.product(siz) 36 perc = numpy.arange(0,100.,100./n) 37 numpy.random.shuffle(perc) 38 smp = [stats.uniform(i,100./n).rvs() for i in perc] 39 v = numpy.array([stats.scoreatpercentile(sample,p) for p in smp]) 40 if isinstance(siz,(tuple,list)): 41 v.shape = siz 42 return v
43
44 -def lhsFromDensity(kde,siz=100):
45 ''' 46 LHS sampling from a variable's Kernel density estimate. 47 48 :Parameters: 49 - `kde`: scipy.stats.kde.gaussian_kde object 50 - `siz`: Number or shape tuple for the output sample 51 ''' 52 if not isinstance(kde,scipy.stats.kde.gaussian_kde): 53 raise TypeError("kde is not a density object") 54 if isinstance(siz,(tuple,list)): 55 n=numpy.product(siz) 56 s = kde.resample(n) 57 v = lhsFromSample(s,n) 58 if isinstance(siz,(tuple,list)): 59 v.shape = siz 60 return v
61 62
63 -def lhs(dist, parms, siz=100, noCorrRestr=False, corrmat=None):
64 ''' 65 Latin Hypercube sampling of any distribution. 66 dist is is a scipy.stats random number generator 67 such as stats.norm, stats.beta, etc 68 parms is a tuple with the parameters needed for 69 the specified distribution. 70 71 :Parameters: 72 - `dist`: random number generator from scipy.stats module or a list of them. 73 - `parms`: tuple of parameters as required for dist, or a list of them. 74 - `siz` :number or shape tuple for the output sample 75 - `noCorrRestr`: if true, does not enforce correlation structure on the sample. 76 - `corrmat`: Correlation matrix 77 ''' 78 if not isinstance(dist,(list,tuple)): 79 dists = [dist] 80 parms = [parms] 81 else: 82 assert len(dist) == len(parms) 83 dists = dist 84 indices=rank_restr(nvars=len(dists), smp=siz, noCorrRestr=noCorrRestr, Corrmat=corrmat) 85 smplist = [] 86 for j,d in enumerate(dists): 87 if not isinstance(d, (stats.rv_discrete,stats.rv_continuous)): 88 raise TypeError('dist is not a scipy.stats distribution object') 89 n=siz 90 if isinstance(siz,(tuple,list)): 91 n=numpy.product(siz) 92 #force type to float for sage compatibility 93 pars = tuple([float(k) for k in parms[j]]) 94 #perc = numpy.arange(1.,n+1)/(n+1) 95 step = 1./(n) 96 perc = numpy.arange(0, 1, step) #class boundaries 97 s_pos = [uniform(i, i+ step) for i in perc[:]]#[i+ step/2. for i in perc[:]] 98 v = d(*pars).ppf(s_pos) 99 #print len(v), step, perc 100 index=map(int,indices[j]-1) 101 v = v[index] 102 if isinstance(siz,(tuple,list)): 103 v.shape = siz 104 smplist.append(v) 105 if len(dists) == 1: 106 return smplist[0] 107 return smplist
108
109 -def rank_restr(nvars=4, smp=100, noCorrRestr=False, Corrmat=None):
110 """ 111 Returns the indices for sampling variables with 112 the desired correlation structure. 113 114 :Parameters: 115 - `nvars`: number of variables 116 - `smp`: number of samples 117 - `noCorrRestr`: No correlation restriction if True 118 - `Corrmat`: Correlation matrix. If None, assure uncorrelated samples. 119 """ 120 if isinstance(smp,(tuple,list)): 121 smp=numpy.product(smp) 122 def shuf(s): 123 s1=[] 124 for i in xrange(nvars): 125 numpy.random.shuffle(s) 126 s1.append(s.copy()) 127 return s1
128 if noCorrRestr or nvars ==1: 129 x = [stats.randint.rvs(0,smp+0,size=smp) for i in xrange(nvars)] 130 else: 131 if Corrmat == None: 132 C=numpy.core.numeric.identity(nvars) 133 else: 134 if Corrmat.shape[0] != nvars: 135 raise TypeError('Correlation matrix must be of rank %s'%nvars) 136 C=numpy.matrix(Corrmat) 137 s0=numpy.arange(1.,smp+1)/(smp+1.) 138 s=stats.norm().ppf(s0) 139 s1 = shuf(s) 140 S=numpy.matrix(s1) 141 P=cholesky(C) 142 Q=cholesky(numpy.corrcoef(S)) 143 144 Final=S.transpose()*inv(Q).transpose()*P.transpose() 145 x = [stats.stats.rankdata(Final.transpose()[i,]) for i in xrange(nvars)] 146 return x 147 148 if __name__=='__main__': 149 dist = stats.uniform,stats.uniform 150 parms = (0,1.),(0,1.) 151 print lhs(dist,parms,siz=4) 152 153 import pylab as P 154 #dist = stats.norm 155 dist = stats.beta 156 #pars = (50,2) 157 pars = (1,5) #beta 158 b = lhs(dist,pars,1000) 159 cm = numpy.array([[1,.8],[.8,1]]) 160 c=lhs([dist,dist], [pars,pars],2000,False, cm) 161 #print stats.pearsonr(c[0],c[1]), stats.spearmanr(c[0],c[1]) 162 #P.hist(c[0],normed=1)#, label='c0 sample') 163 P.scatter(c[0],c[1]) 164 #P.hist(c[1],normed=1)#, label='c1 sample') 165 #print c[0].shape,c[1].shape 166 n = dist(*pars).rvs(size=20) 167 #hist(n.ravel(),facecolor='r',alpha =0.3,normed=1, label='Regular sample') 168 #plot(numpy.arange(min(min(c),min(n)),max(max(c),max(n)),.1),dist(*pars).pdf(numpy.arange(min(min(c),min(n)),max(max(c),max(n)),.1)),label='PDF') 169 #legend() 170 #savefig('lhs.png',dpi=400) 171 # lhs([stats.norm]*19,[(0,1)]*19,17,False,numpy.identity(19)) 172 P.show() 173 174 175 176 #TODO: Extend lhsFromSample to allow multivariate correlated sampling 177