27 (Requires pstat.py module.) 29 ################################################# 30 ####### Written by: Gary Strangman ########### 31 ####### Last modified: Dec 18, 2007 ########### 32 ################################################# 34 A collection of basic statistical functions for python. The function 37 IMPORTANT: There are really *3* sets of functions. The first set has an 'l' 38 prefix, which can be used with list or tuple arguments. The second set has 39 an 'a' prefix, which can accept NumPy array arguments. These latter 40 functions are defined only when NumPy is available on the system. The third 41 type has NO prefix (i.e., has the name that appears below). Functions of 42 this set are members of a "Dispatch" class, c/o David Ascher. This class 43 allows different functions to be called depending on the type of the passed 44 arguments. Thus, stats.mean is a member of the Dispatch class and 45 stats.mean(range(20)) will call stats.lmean(range(20)) while 46 stats.mean(Numeric.arange(20)) will call stats.amean(Numeric.arange(20)). 47 This is a handy way to keep consistent function names when different 48 argument types require different functions to be called. Having 49 implementated the Dispatch class, however, means that to get info on 50 a given function, you must use the REAL function name ... that is 51 "print stats.lmean.__doc__" or "print stats.amean.__doc__" work fine, 52 while "print stats.mean.__doc__" will print the doc for the Dispatch 53 class. NUMPY FUNCTIONS ('a' prefix) generally have more argument options 54 but should otherwise be consistent with the corresponding list functions. 56 Disclaimers: The function list is obviously incomplete and, worse, the 57 functions are not optimized. All functions have been tested (some more 58 so than others), but they are far from bulletproof. Thus, as with any 59 free software, no warranty or guarantee is expressed or implied. :-) A 60 few extra functions that don't appear in the list below can be found by 61 interested treasure-hunters. These functions don't necessarily have 62 both list and array versions but were deemed useful 64 CENTRAL TENDENCY: geometricmean 75 skewtest (for Numpy arrays only) 76 kurtosistest (for Numpy arrays only) 77 normaltest (for Numpy arrays only) 79 ALTERED VERSIONS: tmean (for Numpy arrays only) 80 tvar (for Numpy arrays only) 81 tmin (for Numpy arrays only) 82 tmax (for Numpy arrays only) 83 tstdev (for Numpy arrays only) 84 tsem (for Numpy arrays only) 87 FREQUENCY STATS: itemfreq 94 VARIABILITY: obrientransform 97 signaltonoise (for Numpy arrays only) 104 zmap (for Numpy arrays only) 106 TRIMMING FCNS: threshold (for Numpy arrays only) 109 round (round all vals to 'n' decimals; Numpy only) 111 CORRELATION FCNS: covariance (for Numpy arrays only) 112 correlation (for Numpy arrays only) 120 INFERENTIAL STATS: ttest_1samp 131 PROBABILITY CALCS: chisqprob 140 ANOVA FUNCTIONS: F_oneway 143 SUPPORT FUNCTIONS: writecc 145 sign (for Numpy arrays only) 226 import math, string, copy
236 The Dispatch class, care of David Ascher, allows different functions to 237 be called depending on the argument types. This way, there can be one 238 function name regardless of the argument type. To access function doc 239 in stats.py module, prefix the function with an 'l' or 'a' for list or 240 array arguments, respectively. That is, print stats.lmean.__doc__ or 241 print stats.amean.__doc__ or whatever. 246 for func, types
in tuples:
248 if t
in self._dispatch.keys():
249 raise ValueError,
"can't have two dispatches on "+str(t)
254 if type(arg1)
not in self.
_types:
255 raise TypeError,
"don't know how to dispatch %s arguments" % type(arg1)
256 return apply(self.
_dispatch[type(arg1)], (arg1,) + args, kw)
271 Calculates the geometric mean of the values in the passed list. 272 That is: n-th root of (x1 * x2 * ... * xn). Assumes a '1D' list. 274 Usage: lgeometricmean(inlist) 277 one_over_n = 1.0/len(inlist)
279 mult = mult * pow(item,one_over_n)
285 Calculates the harmonic mean of the values in the passed list. 286 That is: n / (1/x1 + 1/x2 + ... + 1/xn). Assumes a '1D' list. 288 Usage: lharmonicmean(inlist) 293 return len(inlist) / sum
298 Returns the arithematic mean of the values in the passed list. 299 Assumes a '1D' list, but will function on the 1st dim of an array(!). 306 return sum/float(len(inlist))
311 Returns the computed median value of a list of numbers, given the 312 number of bins to use for the histogram (more bins brings the computed value 313 closer to the median score, default number of bins = 1000). See G.W. 314 Heiman's Basic Stats (1st Edition), or CRC Probability & Statistics. 316 Usage: lmedian (inlist, numbins=1000) 318 (hist, smallest, binsize, extras) =
histogram(inlist,numbins,[min(inlist),max(inlist)])
320 for i
in range(len(cumhist)):
321 if cumhist[i]>=len(inlist)/2.0:
324 LRL = smallest + binsize*cfbin
325 cfbelow = cumhist[cfbin-1]
326 freq = float(hist[cfbin])
327 median = LRL + ((len(inlist)/2.0 - cfbelow)/float(freq))*binsize
333 Returns the 'middle' score of the passed list. If there is an even 334 number of scores, the mean of the 2 middle scores is returned. 336 Usage: lmedianscore(inlist) 339 newlist = copy.deepcopy(inlist)
341 if len(newlist) % 2 == 0:
342 index = len(newlist)/2
343 median = float(newlist[index] + newlist[index-1]) /2
345 index = len(newlist)/2
346 median = newlist[index]
352 Returns a list of the modal (most common) score(s) in the passed 353 list. If there is more than one such score, all are returned. The 354 bin-count for the mode(s) is also returned. 357 Returns: bin-count for mode(s), a list of modal value(s) 360 scores = pstat.unique(inlist)
364 freq.append(inlist.count(item))
370 indx = freq.index(maxfreq)
371 mode.append(scores[indx])
385 Calculates the nth moment about the mean for a sample (defaults to 386 the 1st moment). Used to calculate coefficients of skewness and kurtosis. 388 Usage: lmoment(inlist,moment=1) 389 Returns: appropriate moment (r) from ... 1/n * SUM((inlist(i)-mean)**r) 398 s = s + (x-mn)**moment
404 Returns the coefficient of variation, as defined in CRC Standard 405 Probability and Statistics, p.6. 407 Usage: lvariation(inlist) 414 Returns the skewness of a distribution, as defined in Numerical 415 Recipies (alternate defn in CRC Standard Probability and Statistics, p.6.) 424 Returns the kurtosis of a distribution, as defined in Numerical 425 Recipies (alternate defn in CRC Standard Probability and Statistics, p.6.) 427 Usage: lkurtosis(inlist) 434 Returns some descriptive statistics of the passed list (assumed to be 1D). 436 Usage: ldescribe(inlist) 437 Returns: n, mean, standard deviation, skew, kurtosis 440 mm = (min(inlist),max(inlist))
445 return n, mm, m, sd, sk, kurt
454 Returns a list of pairs. Each pair consists of one of the scores in inlist 455 and it's frequency count. Assumes a 1D list is passed. 457 Usage: litemfreq(inlist) 458 Returns: a 2D frequency table (col [0:n-1]=scores, col n=frequencies) 460 scores = pstat.unique(inlist)
464 freq.append(inlist.count(item))
465 return pstat.abut(scores, freq)
470 Returns the score at a given percentile relative to the distribution 473 Usage: lscoreatpercentile(inlist,percent) 476 print "\nDividing percent>1 by 100 in lscoreatpercentile().\n" 477 percent = percent / 100.0
478 targetcf = percent*len(inlist)
479 h, lrl, binsize, extras =
histogram(inlist)
480 cumhist =
cumsum(copy.deepcopy(h))
481 for i
in range(len(cumhist)):
482 if cumhist[i] >= targetcf:
484 score = binsize * ((targetcf - cumhist[i-1]) / float(h[i])) + (lrl+binsize*i)
490 Returns the percentile value of a score relative to the distribution 491 given by inlist. Formula depends on the values used to histogram the data(!). 493 Usage: lpercentileofscore(inlist,score,histbins=10,defaultlimits=None) 496 h, lrl, binsize, extras =
histogram(inlist,histbins,defaultlimits)
497 cumhist =
cumsum(copy.deepcopy(h))
498 i = int((score - lrl)/float(binsize))
499 pct = (cumhist[i-1]+((score-(lrl+binsize*i))/float(binsize))*h[i])/float(len(inlist)) * 100
503 def lhistogram (inlist,numbins=10,defaultreallimits=None,printextras=0):
505 Returns (i) a list of histogram bin counts, (ii) the smallest value 506 of the histogram binning, and (iii) the bin width (the last 2 are not 507 necessarily integers). Default number of bins is 10. If no sequence object 508 is given for defaultreallimits, the routine picks (usually non-pretty) bins 509 spanning all the numbers in the inlist. 511 Usage: lhistogram (inlist, numbins=10, defaultreallimits=None,suppressoutput=0) 512 Returns: list of bin values, lowerreallimit, binsize, extrapoints 514 if (defaultreallimits <>
None):
515 if type(defaultreallimits)
not in [ListType,TupleType]
or len(defaultreallimits)==1:
516 lowerreallimit = defaultreallimits
517 upperreallimit = 1.000001 * max(inlist)
519 lowerreallimit = defaultreallimits[0]
520 upperreallimit = defaultreallimits[1]
521 binsize = (upperreallimit-lowerreallimit)/float(numbins)
523 estbinwidth=(max(inlist)-min(inlist))/float(numbins) +1e-6
524 binsize = ((max(inlist)-min(inlist)+estbinwidth))/float(numbins)
525 lowerreallimit = min(inlist) - binsize/2
530 if (num-lowerreallimit) < 0:
531 extrapoints = extrapoints + 1
533 bintoincrement = int((num-lowerreallimit)/float(binsize))
534 bins[bintoincrement] = bins[bintoincrement] + 1
536 extrapoints = extrapoints + 1
537 if (extrapoints > 0
and printextras == 1):
538 print '\nPoints outside given histogram range =',extrapoints
539 return (bins, lowerreallimit, binsize, extrapoints)
542 def lcumfreq(inlist,numbins=10,defaultreallimits=None):
544 Returns a cumulative frequency histogram, using the histogram function. 546 Usage: lcumfreq(inlist,numbins=10,defaultreallimits=None) 547 Returns: list of cumfreq bin values, lowerreallimit, binsize, extrapoints 549 h,l,b,e =
histogram(inlist,numbins,defaultreallimits)
550 cumhist =
cumsum(copy.deepcopy(h))
554 def lrelfreq(inlist,numbins=10,defaultreallimits=None):
556 Returns a relative frequency histogram, using the histogram function. 558 Usage: lrelfreq(inlist,numbins=10,defaultreallimits=None) 559 Returns: list of cumfreq bin values, lowerreallimit, binsize, extrapoints 561 h,l,b,e =
histogram(inlist,numbins,defaultreallimits)
562 for i
in range(len(h)):
563 h[i] = h[i]/float(len(inlist))
573 Computes a transform on input data (any number of columns). Used to 574 test for homogeneity of variance prior to running one-way stats. From 575 Maxwell and Delaney, p.112. 577 Usage: lobrientransform(*args) 578 Returns: transformed data for use in an ANOVA 587 nargs.append(copy.deepcopy(args[i]))
588 n[i] = float(len(nargs[i]))
590 m[i] =
mean(nargs[i])
592 for i
in range(n[j]):
593 t1 = (n[j]-1.5)*n[j]*(nargs[j][i]-m[j])**2
594 t2 = 0.5*v[j]*(n[j]-1.0)
595 t3 = (n[j]-1.0)*(n[j]-2.0)
596 nargs[j][i] = (t1-t2) / float(t3)
599 if v[j] -
mean(nargs[j]) > TINY:
602 raise ValueError,
'Problem in obrientransform.' 609 Returns the variance of the values in the passed list using 610 N for the denominator (i.e., DESCRIBES the sample variance only). 612 Usage: lsamplevar(inlist) 618 deviations.append(item-mn)
619 return ss(deviations)/float(n)
624 Returns the standard deviation of the values in the passed list using 625 N for the denominator (i.e., DESCRIBES the sample stdev only). 627 Usage: lsamplestdev(inlist) 634 Returns the estimated covariance of the values in the passed 635 array (i.e., N-1). Dimension can equal None (ravel array first), an 636 integer (the dimension over which to operate), or a sequence (operate 637 over multiple dimensions). Set keepdims=1 to return an array with the 638 same number of dimensions as inarray. 640 Usage: lcov(x,y,keepdims=0) 646 xdeviations = [0]*len(x)
647 ydeviations = [0]*len(y)
648 for i
in range(len(x)):
649 xdeviations[i] = x[i] - xmn
650 ydeviations[i] = y[i] - ymn
652 for i
in range(len(xdeviations)):
653 ss = ss + xdeviations[i]*ydeviations[i]
659 Returns the variance of the values in the passed list using N-1 660 for the denominator (i.e., for estimating population variance). 666 deviations = [0]*len(inlist)
667 for i
in range(len(inlist)):
668 deviations[i] = inlist[i] - mn
669 return ss(deviations)/float(n-1)
674 Returns the standard deviation of the values in the passed list 675 using N-1 in the denominator (i.e., to estimate population stdev). 677 Usage: lstdev(inlist) 679 return math.sqrt(
var(inlist))
684 Returns the standard error of the values in the passed list using N-1 685 in the denominator (i.e., to estimate population standard error). 687 Usage: lsterr(inlist) 689 return stdev(inlist) / float(math.sqrt(len(inlist)))
694 Returns the estimated standard error of the mean (sx-bar) of the 695 values in the passed list. sem = stdev / sqrt(n) 701 return sd/math.sqrt(n)
704 def lz (inlist, score):
706 Returns the z-score for a given input score, given that score and the 707 list from which that score came. Not appropriate for population calculations. 709 Usage: lz(inlist, score) 717 Returns a list of z-scores, one for each score in the passed list. 723 zscores.append(
z(inlist,item))
733 Slices off the passed proportion of items from BOTH ends of the passed 734 list (i.e., with proportiontocut=0.1, slices 'leftmost' 10% AND 'rightmost' 735 10% of scores. Assumes list is sorted by magnitude. Slices off LESS if 736 proportion results in a non-integer slice index (i.e., conservatively 737 slices off proportiontocut). 739 Usage: ltrimboth (l,proportiontocut) 740 Returns: trimmed version of list l 742 lowercut = int(proportiontocut*len(l))
743 uppercut = len(l) - lowercut
744 return l[lowercut:uppercut]
747 def ltrim1 (l,proportiontocut,tail='right'):
749 Slices off the passed proportion of items from ONE end of the passed 750 list (i.e., if proportiontocut=0.1, slices off 'leftmost' or 'rightmost' 751 10% of scores). Slices off LESS if proportion results in a non-integer 752 slice index (i.e., conservatively slices off proportiontocut). 754 Usage: ltrim1 (l,proportiontocut,tail='right') or set tail='left' 755 Returns: trimmed version of list l 759 uppercut = len(l) - int(proportiontocut*len(l))
761 lowercut = int(proportiontocut*len(l))
763 return l[lowercut:uppercut]
772 Interactively determines the type of data and then runs the 773 appropriated statistic for paired group data. 776 Returns: appropriate statistic name, value, and probability 779 while samples
not in [
'i',
'r','I','
R','c','C']:
780 print '\nIndependent or related samples, or correlation (i,r,c): ',
781 samples = raw_input()
783 if samples
in [
'i',
'I',
'r','R']: 784 print '\nComparing variances ...',
787 f,p =
F_oneway(pstat.colex(r,0),pstat.colex(r,1))
789 vartype=
'unequal, p='+str(round(p,4))
793 if samples
in [
'i',
'I']:
796 print '\nIndependent samples t-test: ', round(t,4),round(p,4)
798 if len(x)>20
or len(y)>20:
800 print '\nRank Sums test (NONparametric, n>20): ', round(z,4),round(p,4)
803 print '\nMann-Whitney U-test (NONparametric, ns<20): ', round(u,4),round(p,4)
808 print '\nRelated samples t-test: ', round(t,4),round(p,4)
811 print '\nWilcoxon T-test (NONparametric): ', round(t,4),round(p,4)
814 while corrtype
not in [
'c',
'C',
'r','R','d','D']:
815 print '\nIs the data Continuous, Ranked, or Dichotomous (c,r,d): ',
816 corrtype = raw_input()
817 if corrtype
in [
'c',
'C']:
819 print '\nLinear regression for continuous variables ...' 820 lol = [[
'Slope',
'Intercept',
'r','Prob','SEestimate'],[round(m,4),round(b,4),round(r,4),round(p,4),round(see,4)]]
822 elif corrtype
in [
'r','R']: 824 print '\nCorrelation for ranked variables ...' 825 print "Spearman's r: ",round(r,4),round(p,4)
828 print '\nAssuming x contains a dichotomous variable ...' 829 print 'Point Biserial r: ',round(r,4),round(p,4)
836 Calculates a Pearson correlation coefficient and the associated 837 probability value. Taken from Heiman's Basic Statistics for the Behav. 840 Usage: lpearsonr(x,y) where x and y are equal-length lists 841 Returns: Pearson's r value, two-tailed p-value 845 raise ValueError,
'Input values not paired in pearsonr. Aborting.' 855 t = r*math.sqrt(df/((1.0-r+TINY)*(1.0+r+TINY)))
856 prob =
betai(0.5*df,0.5,df/float(df+t*t))
862 Calculates Lin's concordance correlation coefficient. 864 Usage: alincc(x,y) where x, y are equal-length arrays 867 covar =
lcov(x,y)*(len(x)-1)/float(len(x))
868 xvar =
lvar(x)*(len(x)-1)/float(len(x))
869 yvar =
lvar(y)*(len(y)-1)/float(len(y))
870 lincc = (2 * covar) / ((xvar+yvar) +((
amean(x)-
amean(y))**2))
876 Calculates a Spearman rank-order correlation coefficient. Taken 877 from Heiman's Basic Statistics for the Behav. Sci (1st), p.192. 879 Usage: lspearmanr(x,y) where x and y are equal-length lists 880 Returns: Spearman's r, two-tailed p-value 884 raise ValueError,
'Input values not paired in spearmanr. Aborting.' 889 rs = 1 - 6*dsq / float(n*(n**2-1))
890 t = rs * math.sqrt((n-2) / ((rs+1.0)*(1.0-rs)))
892 probrs =
betai(0.5*df,0.5,df/(df+t*t))
900 Calculates a point-biserial correlation coefficient and the associated 901 probability value. Taken from Heiman's Basic Statistics for the Behav. 904 Usage: lpointbiserialr(x,y) where x,y are equal-length lists 905 Returns: Point-biserial r, two-tailed p-value 909 raise ValueError,
'INPUT VALUES NOT PAIRED IN pointbiserialr. ABORTING.' 910 data = pstat.abut(x,y)
911 categories = pstat.unique(x)
912 if len(categories) <> 2:
913 raise ValueError,
"Exactly 2 categories required for pointbiserialr()." 915 codemap = pstat.abut(categories,range(2))
916 recoded = pstat.recode(data,codemap,0)
917 x = pstat.linexand(data,0,categories[0])
918 y = pstat.linexand(data,0,categories[1])
919 xmean =
mean(pstat.colex(x,1))
920 ymean =
mean(pstat.colex(y,1))
922 adjust = math.sqrt((len(x)/float(n))*(len(y)/float(n)))
923 rpb = (ymean - xmean)/
samplestdev(pstat.colex(data,1))*adjust
925 t = rpb*math.sqrt(df/((1.0-rpb+TINY)*(1.0+rpb+TINY)))
926 prob =
betai(0.5*df,0.5,df/(df+t*t))
932 Calculates Kendall's tau ... correlation of ordinal data. Adapted 933 from function kendl1 in Numerical Recipies. Needs good test-routine.@@@ 935 Usage: lkendalltau(x,y) 936 Returns: Kendall's tau, two-tailed p-value 941 for j
in range(len(x)-1):
942 for k
in range(j,len(y)):
958 tau = iss / math.sqrt(n1*n2)
959 svar = (4.0*len(x)+10.0) / (9.0*len(x)*(len(x)-1))
960 z = tau / math.sqrt(svar)
961 prob =
erfcc(abs(z)/1.4142136)
967 Calculates a regression line on x,y pairs. 969 Usage: llinregress(x,y) x,y are equal-length lists of x-y coordinates 970 Returns: slope, intercept, r, two-tailed prob, sterr-of-estimate 974 raise ValueError,
'Input values not paired in linregress. Aborting.' 983 z = 0.5*math.log((1.0+r+TINY)/(1.0-r+TINY))
985 t = r*math.sqrt(df/((1.0-r+TINY)*(1.0+r+TINY)))
986 prob =
betai(0.5*df,0.5,df/(df+t*t))
988 intercept = ymean - slope*xmean
990 return slope, intercept, r, prob, sterrest
999 Calculates the t-obtained for the independent samples T-test on ONE group 1000 of scores a, given a population mean. If printit=1, results are printed 1001 to the screen. If printit='filename', the results are output to 'filename' 1002 using the given writemode (default=append). Returns t-value, and prob. 1004 Usage: lttest_1samp(a,popmean,Name='Sample',printit=0,writemode='a') 1005 Returns: t-value, two-tailed prob 1011 svar = ((n-1)*v)/float(df)
1012 t = (x-popmean)/math.sqrt(svar*(1.0/n))
1013 prob =
betai(0.5*df,0.5,float(df)/(df+t*t))
1016 statname =
'Single-sample T-test.' 1018 'Population',
'--',popmean,0,0,0,
1019 name,n,x,v,min(a),max(a),
1024 def lttest_ind (a, b, printit=0, name1='Samp1', name2='Samp2', writemode='a'):
1026 Calculates the t-obtained T-test on TWO INDEPENDENT samples of 1027 scores a, and b. From Numerical Recipies, p.483. If printit=1, results 1028 are printed to the screen. If printit='filename', the results are output 1029 to 'filename' using the given writemode (default=append). Returns t-value, 1032 Usage: lttest_ind(a,b,printit=0,name1='Samp1',name2='Samp2',writemode='a') 1033 Returns: t-value, two-tailed prob 1042 svar = ((n1-1)*v1+(n2-1)*v2)/float(df)
1043 t = (x1-x2)/math.sqrt(svar*(1.0/n1 + 1.0/n2))
1044 prob =
betai(0.5*df,0.5,df/(df+t*t))
1047 statname =
'Independent samples T-test.' 1049 name1,n1,x1,v1,min(a),max(a),
1050 name2,n2,x2,v2,min(b),max(b),
1055 def lttest_rel (a,b,printit=0,name1='Sample1',name2='Sample2',writemode='a'):
1057 Calculates the t-obtained T-test on TWO RELATED samples of scores, 1058 a and b. From Numerical Recipies, p.483. If printit=1, results are 1059 printed to the screen. If printit='filename', the results are output to 1060 'filename' using the given writemode (default=append). Returns t-value, 1063 Usage: lttest_rel(a,b,printit=0,name1='Sample1',name2='Sample2',writemode='a') 1064 Returns: t-value, two-tailed prob 1067 raise ValueError,
'Unequal length lists in ttest_rel.' 1074 for i
in range(len(a)):
1075 cov = cov + (a[i]-x1) * (b[i]-x2)
1077 cov = cov / float(df)
1078 sd = math.sqrt((v1+v2 - 2.0*cov)/float(n))
1080 prob =
betai(0.5*df,0.5,df/(df+t*t))
1083 statname =
'Related samples T-test.' 1085 name1,n,x1,v1,min(a),max(a),
1086 name2,n,x2,v2,min(b),max(b),
1093 Calculates a one-way chi square for list of observed frequencies and returns 1094 the result. If no expected frequencies are given, the total N is assumed to 1095 be equally distributed across all groups. 1097 Usage: lchisquare(f_obs, f_exp=None) f_obs = list of observed cell freq. 1098 Returns: chisquare-statistic, associated p-value 1102 f_exp = [
sum(f_obs)/float(k)] * len(f_obs)
1104 for i
in range(len(f_obs)):
1105 chisq = chisq + (f_obs[i]-f_exp[i])**2 / float(f_exp[i])
1111 Computes the Kolmogorov-Smirnof statistic on 2 samples. From 1112 Numerical Recipies in C, page 493. 1114 Usage: lks_2samp(data1,data2) data1&2 are lists of values for 2 conditions 1115 Returns: KS D-value, associated p-value 1128 while j1 < n1
and j2 < n2:
1132 fn1 = (j1)/float(en1)
1135 fn2 = (j2)/float(en2)
1138 if math.fabs(dt) > math.fabs(d):
1141 en = math.sqrt(en1*en2/float(en1+en2))
1142 prob =
ksprob((en+0.12+0.11/en)*abs(d))
1150 Calculates a Mann-Whitney U statistic on the provided scores and 1151 returns the result. Use only when the n in each condition is < 20 and 1152 you have 2 independent samples of ranks. NOTE: Mann-Whitney U is 1153 significant if the u-obtained is LESS THAN or equal to the critical 1154 value of U found in the tables. Equivalent to Kruskal-Wallis H with 1157 Usage: lmannwhitneyu(data) 1158 Returns: u-statistic, one-tailed p-value (i.e., p(z(U))) 1163 rankx = ranked[0:n1]
1165 u1 = n1*n2 + (n1*(n1+1))/2.0 -
sum(rankx)
1171 raise ValueError,
'All numbers are identical in lmannwhitneyu' 1172 sd = math.sqrt(T*n1*n2*(n1+n2+1)/12.0)
1173 z = abs((bigu-n1*n2/2.0) / sd)
1174 return smallu, 1.0 -
zprob(z)
1179 Corrects for ties in Mann Whitney U and Kruskal Wallis H tests. See 1180 Siegel, S. (1956) Nonparametric Statistics for the Behavioral Sciences. 1181 New York: McGraw-Hill. Code adapted from |Stat rankind.c code. 1183 Usage: ltiecorrect(rankvals) 1184 Returns: T correction factor for U or H 1191 if sorted[i] == sorted[i+1]:
1193 while (i<n-1)
and (sorted[i] == sorted[i+1]):
1196 T = T + nties**3 - nties
1198 T = T / float(n**3-n)
1204 Calculates the rank sums statistic on the provided scores and 1205 returns the result. Use only when the n in each condition is > 20 and you 1206 have 2 independent samples of ranks. 1208 Usage: lranksums(x,y) 1209 Returns: a z-statistic, two-tailed p-value 1218 expected = n1*(n1+n2+1) / 2.0
1219 z = (s - expected) / math.sqrt(n1*n2*(n1+n2+1)/12.0)
1220 prob = 2*(1.0 -
zprob(abs(z)))
1226 Calculates the Wilcoxon T-test for related samples and returns the 1227 result. A non-parametric T-test. 1229 Usage: lwilcoxont(x,y) 1230 Returns: a t-statistic, two-tail probability estimate 1232 if len(x) <> len(y):
1233 raise ValueError,
'Unequal N in wilcoxont. Aborting.' 1235 for i
in range(len(x)):
1244 for i
in range(len(absd)):
1246 r_minus = r_minus + absranked[i]
1248 r_plus = r_plus + absranked[i]
1249 wt = min(r_plus, r_minus)
1250 mn = count * (count+1) * 0.25
1251 se = math.sqrt(count*(count+1)*(2.0*count+1.0)/24.0)
1252 z = math.fabs(wt-mn) / se
1253 prob = 2*(1.0 -
zprob(abs(z)))
1259 The Kruskal-Wallis H-test is a non-parametric ANOVA for 3 or more 1260 groups, requiring at least 5 subjects in each group. This function 1261 calculates the Kruskal-Wallis H-test for 3 or more independent samples 1262 and returns the result. 1264 Usage: lkruskalwallish(*args) 1265 Returns: H-statistic (corrected for ties), associated p-value 1271 for i
in range(len(args)):
1275 for i
in range(len(args)):
1276 args[i] = ranked[0:n[i]]
1279 for i
in range(len(args)):
1280 rsums.append(
sum(args[i])**2)
1281 rsums[i] = rsums[i] / float(n[i])
1284 h = 12.0 / (totaln*(totaln+1)) * ssbn - 3*(totaln+1)
1287 raise ValueError,
'All numbers are identical in lkruskalwallish' 1294 Friedman Chi-Square is a non-parametric, one-way within-subjects 1295 ANOVA. This function calculates the Friedman Chi-square test for repeated 1296 measures and returns the result, along with the associated probability 1297 value. It assumes 3 or more repeated measures. Only 3 levels requires a 1298 minimum of 10 subjects in the study. Four levels requires 5 subjects per 1301 Usage: lfriedmanchisquare(*args) 1302 Returns: chi-square statistic, associated p-value 1306 raise ValueError,
'Less than 3 levels. Friedman test not appropriate.' 1308 data = apply(pstat.abut,tuple(args))
1309 for i
in range(len(data)):
1313 ssbn = ssbn +
sum(args[i])**2
1314 chisq = 12.0 / (k*n*(k+1)) * ssbn - 3*n*(k+1)
1324 Returns the (1-tailed) probability value associated with the provided 1325 chi-square value and df. Adapted from chisq.c in Gary Perlman's |Stat. 1327 Usage: lchisqprob(chisq,df) 1337 if chisq <=0
or df < 1:
1349 s = 2.0 *
zprob(-math.sqrt(chisq))
1351 chisq = 0.5 * (df - 1.0)
1360 e = math.log(math.sqrt(math.pi))
1371 e = 1.0 / math.sqrt(math.pi) / math.sqrt(a)
1374 e = e * (a/float(z))
1384 Returns the complementary error function erfc(x) with fractional 1385 error everywhere less than 1.2e-7. Adapted from Numerical Recipies. 1390 t = 1.0 / (1.0+0.5*z)
1391 ans = t * math.exp(-z*z-1.26551223 + t*(1.00002368+t*(0.37409196+t*(0.09678418+t*(-0.18628806+t*(0.27886807+t*(-1.13520398+t*(1.48851587+t*(-0.82215223+t*0.17087277)))))))))
1400 Returns the area under the normal curve 'to the left of' the given z value. 1402 for z<0, zprob(z) = 1-tail probability 1403 for z>0, 1.0-zprob(z) = 1-tail probability 1404 for any z, 2.0*(1.0-zprob(abs(z))) = 2-tail probability 1405 Adapted from z.c in Gary Perlman's |Stat. 1413 y = 0.5 * math.fabs(z)
1414 if y >= (Z_MAX*0.5):
1418 x = ((((((((0.000124818987 * w
1419 -0.001075204047) * w +0.005198775019) * w
1420 -0.019198292004) * w +0.059054035642) * w
1421 -0.151968751364) * w +0.319152932694) * w
1422 -0.531923007300) * w +0.797884560593) * y * 2.0
1425 x = (((((((((((((-0.000045255659 * y
1426 +0.000152529290) * y -0.000019538132) * y
1427 -0.000676904986) * y +0.001390604284) * y
1428 -0.000794620820) * y -0.002034254874) * y
1429 +0.006549791214) * y -0.010557625006) * y
1430 +0.011630447319) * y -0.009279453341) * y
1431 +0.005353579108) * y -0.002141268741) * y
1432 +0.000535310849) * y +0.999936657524
1434 prob = ((x+1.0)*0.5)
1436 prob = ((1.0-x)*0.5)
1442 Computes a Kolmolgorov-Smirnov t-test significance level. Adapted from 1445 Usage: lksprob(alam) 1451 for j
in range(1,201):
1452 term = fac*math.exp(a2*j*j)
1454 if math.fabs(term) <= (0.001*termbf)
or math.fabs(term) < (1.0e-8*sum):
1457 termbf = math.fabs(term)
1463 Returns the (1-tailed) significance level (p-value) of an F 1464 statistic given the degrees of freedom for the numerator (dfR-dfF) and 1465 the degrees of freedom for the denominator (dfF). 1467 Usage: lfprob(dfnum, dfden, F) where usually dfnum=dfbn, dfden=dfwn 1469 p =
betai(0.5*dfden, 0.5*dfnum, dfden/float(dfden+dfnum*F))
1475 This function evaluates the continued fraction form of the incomplete 1476 Beta function, betai. (Adapted from: Numerical Recipies in C.) 1478 Usage: lbetacf(a,b,x) 1488 for i
in range(ITMAX+1):
1491 d = em*(b-em)*x/((qam+tem)*(a+tem))
1494 d = -(a+em)*(qab+em)*x/((qap+tem)*(a+tem))
1502 if (abs(az-aold)<(EPS*abs(az))):
1504 print 'a or b too big, or ITMAX too small in Betacf.' 1509 Returns the gamma function of xx. 1510 Gamma(z) = Integral(0,infinity) of t^(z-1)exp(-t) dt. 1511 (Adapted from: Numerical Recipies in C.) 1516 coeff = [76.18009173, -86.50532033, 24.01409822, -1.231739516,
1517 0.120858003e-2, -0.536382e-5]
1520 tmp = tmp - (x+0.5)*math.log(tmp)
1522 for j
in range(len(coeff)):
1524 ser = ser + coeff[j]/x
1525 return -tmp + math.log(2.50662827465*ser)
1530 Returns the incomplete beta function: 1532 I-sub-x(a,b) = 1/B(a,b)*(Integral(0,x) of t^(a-1)(1-t)^(b-1) dt) 1534 where a,b>0 and B(a,b) = G(a)*G(b)/(G(a+b)) where G(a) is the gamma 1535 function of a. The continued fraction formulation is implemented here, 1536 using the betacf function. (Adapted from: Numerical Recipies in C.) 1538 Usage: lbetai(a,b,x) 1540 if (x<0.0
or x>1.0):
1541 raise ValueError,
'Bad x in lbetai' 1542 if (x==0.0
or x==1.0):
1547 if (x<(a+1.0)/(a+b+2.0)):
1548 return bt*
betacf(a,b,x)/float(a)
1550 return 1.0-bt*
betacf(b,a,1.0-x)/float(b)
1559 Performs a 1-way ANOVA, returning an F-value and probability given 1560 any number of groups. From Heiman, pp.394-7. 1562 Usage: F_oneway(*lists) where *lists is any number of lists, one per 1564 Returns: F value, one-tailed p-value 1571 tmp = map(N.array,lists)
1572 means = map(amean,tmp)
1573 vars = map(avar,tmp)
1575 for i
in range(len(lists)):
1576 alldata = alldata + lists[i]
1577 alldata = N.array(alldata)
1587 msb = ssbn/float(dfbn)
1588 msw = sswn/float(dfwn)
1590 prob =
fprob(dfbn,dfwn,f)
1596 Returns an F-statistic given the following: 1597 ER = error associated with the null hypothesis (the Restricted model) 1598 EF = error associated with the alternate hypothesis (the Full model) 1599 dfR-dfF = degrees of freedom of the numerator 1600 dfF = degrees of freedom associated with the denominator/Full model 1602 Usage: lF_value(ER,EF,dfnum,dfden) 1604 return ((ER-EF)/float(dfnum) / (EF/float(dfden)))
1611 def writecc (listoflists,file,writetype='w',extra=2):
1613 Writes a list of lists to a file in columns, customized by the max 1614 size of items within the columns (max size of items in col, +2 characters) 1615 to specified file. File-overwrite is the default. 1617 Usage: writecc (listoflists,file,writetype='w',extra=2) 1620 if type(listoflists[0])
not in [ListType,TupleType]:
1621 listoflists = [listoflists]
1622 outfile = open(file,writetype)
1624 list2print = copy.deepcopy(listoflists)
1625 for i
in range(len(listoflists)):
1626 if listoflists[i] == [
'\n']
or listoflists[i]==
'\n' or listoflists[i]==
'dashes':
1627 rowstokill = rowstokill + [i]
1628 rowstokill.reverse()
1629 for row
in rowstokill:
1631 maxsize = [0]*len(list2print[0])
1632 for col
in range(len(list2print[0])):
1633 items = pstat.colex(list2print,col)
1634 items = map(pstat.makestr,items)
1635 maxsize[col] = max(map(len,items)) + extra
1636 for row
in listoflists:
1637 if row == [
'\n']
or row ==
'\n':
1639 elif row == [
'dashes']
or row ==
'dashes':
1640 dashes = [0]*len(maxsize)
1641 for j
in range(len(maxsize)):
1642 dashes[j] =
'-'*(maxsize[j]-2)
1643 outfile.write(pstat.lineincustcols(dashes,maxsize))
1645 outfile.write(pstat.lineincustcols(row,maxsize))
1653 Simulate a counting system from an n-dimensional list. 1655 Usage: lincr(l,cap) l=list to increment, cap=max values for each list pos'n 1656 Returns: next set of values for list l, OR -1 (if overflow) 1659 for i
in range(len(l)):
1660 if l[i] > cap[i]
and i < len(l)-1:
1663 elif l[i] > cap[i]
and i == len(l)-1:
1670 Returns the sum of the items in the passed list. 1682 Returns a list consisting of the cumulative sum of the items in the 1685 Usage: lcumsum(inlist) 1687 newlist = copy.deepcopy(inlist)
1688 for i
in range(1,len(newlist)):
1689 newlist[i] = newlist[i] + newlist[i-1]
1695 Squares each value in the passed list, adds up these squares and 1708 Multiplies elements in list1 and list2, element by element, and 1709 returns the sum of all resulting multiplications. Must provide equal 1712 Usage: lsummult(list1,list2) 1714 if len(list1) <> len(list2):
1715 raise ValueError,
"Lists not equal length in summult." 1717 for item1,item2
in pstat.abut(list1,list2):
1724 Takes pairwise differences of the values in lists x and y, squares 1725 these differences, and returns the sum of these squares. 1727 Usage: lsumdiffsquared(x,y) 1728 Returns: sum[(x[i]-y[i])**2] 1731 for i
in range(len(x)):
1732 sds = sds + (x[i]-y[i])**2
1738 Adds the values in the passed list, squares the sum, and returns 1741 Usage: lsquare_of_sums(inlist) 1742 Returns: sum(inlist[i])**2 1750 Shellsort algorithm. Sorts a 1D-list. 1752 Usage: lshellsort(inlist) 1753 Returns: sorted-inlist, sorting-index-vector (for original list) 1756 svec = copy.deepcopy(inlist)
1760 for i
in range(gap,n):
1761 for j
in range(i-gap,-1,-gap):
1762 while j>=0
and svec[j]>svec[j+gap]:
1764 svec[j] = svec[j+gap]
1767 ivec[j] = ivec[j+gap]
1776 Ranks the data in inlist, dealing with ties appropritely. Assumes 1777 a 1D inlist. Adapted from Gary Perlman's |Stat ranksort. 1779 Usage: lrankdata(inlist) 1780 Returns: a list of length equal to inlist, containing rank scores 1788 sumranks = sumranks + i
1789 dupcount = dupcount + 1
1790 if i==n-1
or svec[i] <> svec[i+1]:
1791 averank = sumranks / float(dupcount) + 1
1792 for j
in range(i-dupcount+1,i+1):
1793 newlist[ivec[j]] = averank
1799 def outputpairedstats(fname,writemode,name1,n1,m1,se1,min1,max1,name2,n2,m2,se2,min2,max2,statname,stat,prob):
1801 Prints or write to a file stats for two groups, using the name, n, 1802 mean, sterr, min and max for each group, as well as the statistic name, 1803 its value, and the associated p-value. 1805 Usage: outputpairedstats(fname,writemode, 1806 name1,n1,mean1,stderr1,min1,max1, 1807 name2,n2,mean2,stderr2,min2,max2, 1817 if prob < 0.001: suffix =
' ***' 1818 elif prob < 0.01: suffix =
' **' 1819 elif prob < 0.05: suffix =
' *' 1820 title = [[
'Name',
'N',
'Mean',
'SD',
'Min',
'Max']]
1821 lofl = title+[[name1,n1,round(m1,3),round(math.sqrt(se1),3),min1,max1],
1822 [name2,n2,round(m2,3),round(math.sqrt(se2),3),min2,max2]]
1823 if type(fname)<>StringType
or len(fname)==0:
1830 if stat.shape == ():
1832 if prob.shape == ():
1836 print 'Test statistic = ',round(stat,3),
' p = ',round(prob,3),suffix
1839 file = open(fname,writemode)
1840 file.write(
'\n'+statname+
'\n\n')
1843 file = open(fname,
'a')
1845 if stat.shape == ():
1847 if prob.shape == ():
1851 file.write(pstat.list2string([
'\nTest statistic = ',round(stat,4),
' p = ',round(prob,4),suffix,
'\n\n']))
1858 Returns an integer representing a binary vector, where 1=within- 1859 subject factor, 0=between. Input equals the entire data 2D list (i.e., 1860 column 0=random factor, column -1=measured values (those two are skipped). 1861 Note: input data is in |Stat format ... a list of lists ("2D list") with 1862 one row per measured value, first column=subject identifier, last column= 1863 score, one in-between column per factor (these columns contain level 1864 designations on each factor). See also stats.anova.__doc__. 1866 Usage: lfindwithin(data) data in |Stat format 1869 numfact = len(data[0])-1
1871 for col
in range(1,numfact):
1872 examplelevel = pstat.unique(pstat.colex(data,col))[0]
1873 rows = pstat.linexand(data,col,examplelevel)
1874 factsubjs = pstat.unique(pstat.colex(rows,0))
1875 allsubjs = pstat.unique(pstat.colex(data,0))
1876 if len(factsubjs) == len(allsubjs):
1877 withinvec = withinvec + (1 << col)
1888 geometricmean = Dispatch ( (lgeometricmean, (ListType, TupleType)), )
1889 harmonicmean = Dispatch ( (lharmonicmean, (ListType, TupleType)), )
1890 mean = Dispatch ( (lmean, (ListType, TupleType)), )
1891 median = Dispatch ( (lmedian, (ListType, TupleType)), )
1892 medianscore = Dispatch ( (lmedianscore, (ListType, TupleType)), )
1893 mode = Dispatch ( (lmode, (ListType, TupleType)), )
1896 moment = Dispatch ( (lmoment, (ListType, TupleType)), )
1897 variation = Dispatch ( (lvariation, (ListType, TupleType)), )
1898 skew = Dispatch ( (lskew, (ListType, TupleType)), )
1899 kurtosis = Dispatch ( (lkurtosis, (ListType, TupleType)), )
1900 describe = Dispatch ( (ldescribe, (ListType, TupleType)), )
1903 itemfreq = Dispatch ( (litemfreq, (ListType, TupleType)), )
1904 scoreatpercentile = Dispatch ( (lscoreatpercentile, (ListType, TupleType)), )
1905 percentileofscore = Dispatch ( (lpercentileofscore, (ListType, TupleType)), )
1906 histogram = Dispatch ( (lhistogram, (ListType, TupleType)), )
1907 cumfreq = Dispatch ( (lcumfreq, (ListType, TupleType)), )
1908 relfreq = Dispatch ( (lrelfreq, (ListType, TupleType)), )
1911 obrientransform = Dispatch ( (lobrientransform, (ListType, TupleType)), )
1912 samplevar = Dispatch ( (lsamplevar, (ListType, TupleType)), )
1913 samplestdev = Dispatch ( (lsamplestdev, (ListType, TupleType)), )
1914 var = Dispatch ( (lvar, (ListType, TupleType)), )
1915 stdev = Dispatch ( (lstdev, (ListType, TupleType)), )
1916 sterr = Dispatch ( (lsterr, (ListType, TupleType)), )
1917 sem = Dispatch ( (lsem, (ListType, TupleType)), )
1918 z = Dispatch ( (lz, (ListType, TupleType)), )
1919 zs = Dispatch ( (lzs, (ListType, TupleType)), )
1922 trimboth = Dispatch ( (ltrimboth, (ListType, TupleType)), )
1923 trim1 = Dispatch ( (ltrim1, (ListType, TupleType)), )
1926 paired = Dispatch ( (lpaired, (ListType, TupleType)), )
1927 pearsonr = Dispatch ( (lpearsonr, (ListType, TupleType)), )
1928 spearmanr = Dispatch ( (lspearmanr, (ListType, TupleType)), )
1929 pointbiserialr = Dispatch ( (lpointbiserialr, (ListType, TupleType)), )
1930 kendalltau = Dispatch ( (lkendalltau, (ListType, TupleType)), )
1931 linregress = Dispatch ( (llinregress, (ListType, TupleType)), )
1934 ttest_1samp = Dispatch ( (lttest_1samp, (ListType, TupleType)), )
1935 ttest_ind = Dispatch ( (lttest_ind, (ListType, TupleType)), )
1936 ttest_rel = Dispatch ( (lttest_rel, (ListType, TupleType)), )
1937 chisquare = Dispatch ( (lchisquare, (ListType, TupleType)), )
1938 ks_2samp = Dispatch ( (lks_2samp, (ListType, TupleType)), )
1939 mannwhitneyu = Dispatch ( (lmannwhitneyu, (ListType, TupleType)), )
1940 ranksums = Dispatch ( (lranksums, (ListType, TupleType)), )
1941 tiecorrect = Dispatch ( (ltiecorrect, (ListType, TupleType)), )
1942 wilcoxont = Dispatch ( (lwilcoxont, (ListType, TupleType)), )
1943 kruskalwallish = Dispatch ( (lkruskalwallish, (ListType, TupleType)), )
1944 friedmanchisquare = Dispatch ( (lfriedmanchisquare, (ListType, TupleType)), )
1947 chisqprob = Dispatch ( (lchisqprob, (IntType, FloatType)), )
1948 zprob = Dispatch ( (lzprob, (IntType, FloatType)), )
1949 ksprob = Dispatch ( (lksprob, (IntType, FloatType)), )
1950 fprob = Dispatch ( (lfprob, (IntType, FloatType)), )
1951 betacf = Dispatch ( (lbetacf, (IntType, FloatType)), )
1952 betai = Dispatch ( (lbetai, (IntType, FloatType)), )
1953 erfcc = Dispatch ( (lerfcc, (IntType, FloatType)), )
1954 gammln = Dispatch ( (lgammln, (IntType, FloatType)), )
1957 F_oneway = Dispatch ( (lF_oneway, (ListType, TupleType)), )
1958 F_value = Dispatch ( (lF_value, (ListType, TupleType)), )
1961 incr = Dispatch ( (lincr, (ListType, TupleType)), )
1962 sum = Dispatch ( (lsum, (ListType, TupleType)), )
1963 cumsum = Dispatch ( (lcumsum, (ListType, TupleType)), )
1964 ss = Dispatch ( (lss, (ListType, TupleType)), )
1965 summult = Dispatch ( (lsummult, (ListType, TupleType)), )
1966 square_of_sums = Dispatch ( (lsquare_of_sums, (ListType, TupleType)), )
1967 sumdiffsquared = Dispatch ( (lsumdiffsquared, (ListType, TupleType)), )
1968 shellsort = Dispatch ( (lshellsort, (ListType, TupleType)), )
1969 rankdata = Dispatch ( (lrankdata, (ListType, TupleType)), )
1970 findwithin = Dispatch ( (lfindwithin, (ListType, TupleType)), )
1995 import numpy.linalg
as LA
2004 Calculates the geometric mean of the values in the passed array. 2005 That is: n-th root of (x1 * x2 * ... * xn). Defaults to ALL values in 2006 the passed array. Use dimension=None to flatten array first. REMEMBER: if 2007 dimension=0, it collapses over dimension 0 ('rows' in a 2D array) only, and 2008 if dimension is a sequence, it collapses over all specified dimensions. If 2009 keepdims is set to 1, the resulting array will have as many dimensions as 2010 inarray, with only 1 'level' per dim that was collapsed over. 2012 Usage: ageometricmean(inarray,dimension=None,keepdims=0) 2013 Returns: geometric mean computed over dim(s) listed in dimension 2015 inarray = N.array(inarray,N.float_)
2016 if dimension ==
None:
2017 inarray = N.ravel(inarray)
2019 mult = N.power(inarray,1.0/size)
2020 mult = N.multiply.reduce(mult)
2021 elif type(dimension)
in [IntType,FloatType]:
2022 size = inarray.shape[dimension]
2023 mult = N.power(inarray,1.0/size)
2024 mult = N.multiply.reduce(mult,dimension)
2026 shp = list(inarray.shape)
2028 sum = N.reshape(sum,shp)
2030 dims = list(dimension)
2033 size = N.array(N.multiply.reduce(N.take(inarray.shape,dims)),N.float_)
2034 mult = N.power(inarray,1.0/size)
2036 mult = N.multiply.reduce(mult,dim)
2038 shp = list(inarray.shape)
2041 mult = N.reshape(mult,shp)
2047 Calculates the harmonic mean of the values in the passed array. 2048 That is: n / (1/x1 + 1/x2 + ... + 1/xn). Defaults to ALL values in 2049 the passed array. Use dimension=None to flatten array first. REMEMBER: if 2050 dimension=0, it collapses over dimension 0 ('rows' in a 2D array) only, and 2051 if dimension is a sequence, it collapses over all specified dimensions. If 2052 keepdims is set to 1, the resulting array will have as many dimensions as 2053 inarray, with only 1 'level' per dim that was collapsed over. 2055 Usage: aharmonicmean(inarray,dimension=None,keepdims=0) 2056 Returns: harmonic mean computed over dim(s) in dimension 2058 inarray = inarray.astype(N.float_)
2059 if dimension ==
None:
2060 inarray = N.ravel(inarray)
2062 s = N.add.reduce(1.0 / inarray)
2063 elif type(dimension)
in [IntType,FloatType]:
2064 size = float(inarray.shape[dimension])
2065 s = N.add.reduce(1.0/inarray, dimension)
2067 shp = list(inarray.shape)
2069 s = N.reshape(s,shp)
2071 dims = list(dimension)
2074 for i
in range(len(inarray.shape)):
2077 tinarray = N.transpose(inarray,nondims+dims)
2078 idx = [0] *len(nondims)
2080 size = len(N.ravel(inarray))
2081 s =
asum(1.0 / inarray)
2083 s = N.reshape([s],N.ones(len(inarray.shape)))
2086 loopcap = N.array(tinarray.shape[0:len(nondims)]) -1
2087 s = N.zeros(loopcap+1,N.float_)
2088 while incr(idx,loopcap) <> -1:
2089 s[idx] =
asum(1.0/tinarray[idx])
2090 size = N.multiply.reduce(N.take(inarray.shape,dims))
2092 shp = list(inarray.shape)
2095 s = N.reshape(s,shp)
2099 def amean (inarray,dimension=None,keepdims=0):
2101 Calculates the arithmatic mean of the values in the passed array. 2102 That is: 1/n * (x1 + x2 + ... + xn). Defaults to ALL values in the 2103 passed array. Use dimension=None to flatten array first. REMEMBER: if 2104 dimension=0, it collapses over dimension 0 ('rows' in a 2D array) only, and 2105 if dimension is a sequence, it collapses over all specified dimensions. If 2106 keepdims is set to 1, the resulting array will have as many dimensions as 2107 inarray, with only 1 'level' per dim that was collapsed over. 2109 Usage: amean(inarray,dimension=None,keepdims=0) 2110 Returns: arithematic mean calculated over dim(s) in dimension 2112 if inarray.dtype
in [N.int_, N.short,N.ubyte]:
2113 inarray = inarray.astype(N.float_)
2114 if dimension ==
None:
2115 inarray = N.ravel(inarray)
2116 sum = N.add.reduce(inarray)
2117 denom = float(len(inarray))
2118 elif type(dimension)
in [IntType,FloatType]:
2119 sum =
asum(inarray,dimension)
2120 denom = float(inarray.shape[dimension])
2122 shp = list(inarray.shape)
2124 sum = N.reshape(sum,shp)
2126 dims = list(dimension)
2131 sum = N.add.reduce(sum,dim)
2132 denom = N.array(N.multiply.reduce(N.take(inarray.shape,dims)),N.float_)
2134 shp = list(inarray.shape)
2137 sum = N.reshape(sum,shp)
2143 Calculates the COMPUTED median value of an array of numbers, given the 2144 number of bins to use for the histogram (more bins approaches finding the 2145 precise median value of the array; default number of bins = 1000). From 2146 G.W. Heiman's Basic Stats, or CRC Probability & Statistics. 2147 NOTE: THIS ROUTINE ALWAYS uses the entire passed array (flattens it first). 2149 Usage: amedian(inarray,numbins=1000) 2150 Returns: median calculated over ALL values in inarray 2152 inarray = N.ravel(inarray)
2153 (hist, smallest, binsize, extras) =
ahistogram(inarray,numbins,[min(inarray),max(inarray)])
2154 cumhist = N.cumsum(hist)
2155 otherbins = N.greater_equal(cumhist,len(inarray)/2.0)
2156 otherbins = list(otherbins)
2157 cfbin = otherbins.index(1)
2158 LRL = smallest + binsize*cfbin
2159 cfbelow = N.add.reduce(hist[0:cfbin])
2161 median = LRL + ((len(inarray)/2.0-cfbelow)/float(freq))*binsize
2167 Returns the 'middle' score of the passed array. If there is an even 2168 number of scores, the mean of the 2 middle scores is returned. Can function 2169 with 1D arrays, or on the FIRST dimension of 2D arrays (i.e., dimension can 2170 be None, to pre-flatten the array, or else dimension must equal 0). 2172 Usage: amedianscore(inarray,dimension=None) 2173 Returns: 'middle' score of the array, or the mean of the 2 middle scores 2175 if dimension ==
None:
2176 inarray = N.ravel(inarray)
2178 inarray = N.sort(inarray,dimension)
2179 if inarray.shape[dimension] % 2 == 0:
2180 indx = inarray.shape[dimension]/2
2181 median = N.asarray(inarray[indx]+inarray[indx-1]) / 2.0
2183 indx = inarray.shape[dimension] / 2
2184 median = N.take(inarray,[indx],dimension)
2185 if median.shape == (1,):
2192 Returns an array of the modal (most common) score in the passed array. 2193 If there is more than one such score, ONLY THE FIRST is returned. 2194 The bin-count for the modal values is also returned. Operates on whole 2195 array (dimension=None), or on a given dimension. 2197 Usage: amode(a, dimension=None) 2198 Returns: array of bin-counts for mode(s), array of corresponding modal values 2201 if dimension ==
None:
2204 scores = pstat.aunique(N.ravel(a))
2205 testshape = list(a.shape)
2206 testshape[dimension] = 1
2207 oldmostfreq = N.zeros(testshape)
2208 oldcounts = N.zeros(testshape)
2209 for score
in scores:
2210 template = N.equal(a,score)
2211 counts =
asum(template,dimension,1)
2212 mostfrequent = N.where(counts>oldcounts,score,oldmostfreq)
2213 oldcounts = N.where(counts>oldcounts,counts,oldcounts)
2214 oldmostfreq = mostfrequent
2215 return oldcounts, mostfrequent
2220 Returns the arithmetic mean of all values in an array, ignoring values 2221 strictly outside the sequence passed to 'limits'. Note: either limit 2222 in the sequence, or the value of limits itself, can be set to None. The 2223 inclusive list/tuple determines whether the lower and upper limiting bounds 2224 (respectively) are open/exclusive (0) or closed/inclusive (1). 2226 Usage: atmean(a,limits=None,inclusive=(1,1)) 2228 if a.dtype
in [N.int_, N.short,N.ubyte]:
2229 a = a.astype(N.float_)
2232 assert type(limits)
in [ListType,TupleType,N.ndarray],
"Wrong type for limits in atmean" 2233 if inclusive[0]: lowerfcn = N.greater_equal
2234 else: lowerfcn = N.greater
2235 if inclusive[1]: upperfcn = N.less_equal
2236 else: upperfcn = N.less
2237 if limits[0] > N.maximum.reduce(N.ravel(a))
or limits[1] < N.minimum.reduce(N.ravel(a)):
2238 raise ValueError,
"No array values within given limits (atmean)." 2239 elif limits[0]==
None and limits[1]<>
None:
2240 mask = upperfcn(a,limits[1])
2241 elif limits[0]<>
None and limits[1]==
None:
2242 mask = lowerfcn(a,limits[0])
2243 elif limits[0]<>
None and limits[1]<>
None:
2244 mask = lowerfcn(a,limits[0])*upperfcn(a,limits[1])
2245 s = float(N.add.reduce(N.ravel(a*mask)))
2246 n = float(N.add.reduce(N.ravel(mask)))
2250 def atvar(a,limits=None,inclusive=(1,1)):
2252 Returns the sample variance of values in an array, (i.e., using N-1), 2253 ignoring values strictly outside the sequence passed to 'limits'. 2254 Note: either limit in the sequence, or the value of limits itself, 2255 can be set to None. The inclusive list/tuple determines whether the lower 2256 and upper limiting bounds (respectively) are open/exclusive (0) or 2257 closed/inclusive (1). ASSUMES A FLAT ARRAY (OR ELSE PREFLATTENS). 2259 Usage: atvar(a,limits=None,inclusive=(1,1)) 2261 a = a.astype(N.float_)
2262 if limits ==
None or limits == [
None,
None]:
2264 assert type(limits)
in [ListType,TupleType,N.ndarray],
"Wrong type for limits in atvar" 2265 if inclusive[0]: lowerfcn = N.greater_equal
2266 else: lowerfcn = N.greater
2267 if inclusive[1]: upperfcn = N.less_equal
2268 else: upperfcn = N.less
2269 if limits[0] > N.maximum.reduce(N.ravel(a))
or limits[1] < N.minimum.reduce(N.ravel(a)):
2270 raise ValueError,
"No array values within given limits (atvar)." 2271 elif limits[0]==
None and limits[1]<>
None:
2272 mask = upperfcn(a,limits[1])
2273 elif limits[0]<>
None and limits[1]==
None:
2274 mask = lowerfcn(a,limits[0])
2275 elif limits[0]<>
None and limits[1]<>
None:
2276 mask = lowerfcn(a,limits[0])*upperfcn(a,limits[1])
2278 a = N.compress(mask,a)
2282 def atmin(a,lowerlimit=None,dimension=None,inclusive=1):
2284 Returns the minimum value of a, along dimension, including only values less 2285 than (or equal to, if inclusive=1) lowerlimit. If the limit is set to None, 2286 all values in the array are used. 2288 Usage: atmin(a,lowerlimit=None,dimension=None,inclusive=1) 2290 if inclusive: lowerfcn = N.greater
2291 else: lowerfcn = N.greater_equal
2292 if dimension ==
None:
2295 if lowerlimit ==
None:
2296 lowerlimit = N.minimum.reduce(N.ravel(a))-11
2297 biggest = N.maximum.reduce(N.ravel(a))
2298 ta = N.where(lowerfcn(a,lowerlimit),a,biggest)
2299 return N.minimum.reduce(ta,dimension)
2302 def atmax(a,upperlimit,dimension=None,inclusive=1):
2304 Returns the maximum value of a, along dimension, including only values greater 2305 than (or equal to, if inclusive=1) upperlimit. If the limit is set to None, 2306 a limit larger than the max value in the array is used. 2308 Usage: atmax(a,upperlimit,dimension=None,inclusive=1) 2310 if inclusive: upperfcn = N.less
2311 else: upperfcn = N.less_equal
2312 if dimension ==
None:
2315 if upperlimit ==
None:
2316 upperlimit = N.maximum.reduce(N.ravel(a))+1
2317 smallest = N.minimum.reduce(N.ravel(a))
2318 ta = N.where(upperfcn(a,upperlimit),a,smallest)
2319 return N.maximum.reduce(ta,dimension)
2324 Returns the standard deviation of all values in an array, ignoring values 2325 strictly outside the sequence passed to 'limits'. Note: either limit 2326 in the sequence, or the value of limits itself, can be set to None. The 2327 inclusive list/tuple determines whether the lower and upper limiting bounds 2328 (respectively) are open/exclusive (0) or closed/inclusive (1). 2330 Usage: atstdev(a,limits=None,inclusive=(1,1)) 2332 return N.sqrt(
tvar(a,limits,inclusive))
2335 def atsem(a,limits=None,inclusive=(1,1)):
2337 Returns the standard error of the mean for the values in an array, 2338 (i.e., using N for the denominator), ignoring values strictly outside 2339 the sequence passed to 'limits'. Note: either limit in the sequence, 2340 or the value of limits itself, can be set to None. The inclusive list/tuple 2341 determines whether the lower and upper limiting bounds (respectively) are 2342 open/exclusive (0) or closed/inclusive (1). 2344 Usage: atsem(a,limits=None,inclusive=(1,1)) 2346 sd =
tstdev(a,limits,inclusive)
2347 if limits ==
None or limits == [
None,
None]:
2348 n = float(len(N.ravel(a)))
2349 limits = [min(a)-1, max(a)+1]
2350 assert type(limits)
in [ListType,TupleType,N.ndarray],
"Wrong type for limits in atsem" 2351 if inclusive[0]: lowerfcn = N.greater_equal
2352 else: lowerfcn = N.greater
2353 if inclusive[1]: upperfcn = N.less_equal
2354 else: upperfcn = N.less
2355 if limits[0] > N.maximum.reduce(N.ravel(a))
or limits[1] < N.minimum.reduce(N.ravel(a)):
2356 raise ValueError,
"No array values within given limits (atsem)." 2357 elif limits[0]==
None and limits[1]<>
None:
2358 mask = upperfcn(a,limits[1])
2359 elif limits[0]<>
None and limits[1]==
None:
2360 mask = lowerfcn(a,limits[0])
2361 elif limits[0]<>
None and limits[1]<>
None:
2362 mask = lowerfcn(a,limits[0])*upperfcn(a,limits[1])
2363 term1 = N.add.reduce(N.ravel(a*a*mask))
2364 n = float(N.add.reduce(N.ravel(mask)))
2365 return sd/math.sqrt(n)
2374 Calculates the nth moment about the mean for a sample (defaults to the 2375 1st moment). Generally used to calculate coefficients of skewness and 2376 kurtosis. Dimension can equal None (ravel array first), an integer 2377 (the dimension over which to operate), or a sequence (operate over 2378 multiple dimensions). 2380 Usage: amoment(a,moment=1,dimension=None) 2381 Returns: appropriate moment along given dimension 2383 if dimension ==
None:
2389 mn =
amean(a,dimension,1)
2390 s = N.power((a-mn),moment)
2391 return amean(s,dimension)
2396 Returns the coefficient of variation, as defined in CRC Standard 2397 Probability and Statistics, p.6. Dimension can equal None (ravel array 2398 first), an integer (the dimension over which to operate), or a 2399 sequence (operate over multiple dimensions). 2401 Usage: avariation(a,dimension=None) 2408 Returns the skewness of a distribution (normal ==> 0.0; >0 means extra 2409 weight in left tail). Use askewtest() to see if it's close enough. 2410 Dimension can equal None (ravel array first), an integer (the 2411 dimension over which to operate), or a sequence (operate over multiple 2414 Usage: askew(a, dimension=None) 2415 Returns: skew of vals in a along dimension, returning ZERO where all vals equal 2417 denom = N.power(
amoment(a,2,dimension),1.5)
2418 zero = N.equal(denom,0)
2419 if type(denom) == N.ndarray
and asum(zero) <> 0:
2420 print "Number of zeros in askew: ",
asum(zero)
2421 denom = denom + zero
2422 return N.where(zero, 0,
amoment(a,3,dimension)/denom)
2427 Returns the kurtosis of a distribution (normal ==> 3.0; >3 means 2428 heavier in the tails, and usually more peaked). Use akurtosistest() 2429 to see if it's close enough. Dimension can equal None (ravel array 2430 first), an integer (the dimension over which to operate), or a 2431 sequence (operate over multiple dimensions). 2433 Usage: akurtosis(a,dimension=None) 2434 Returns: kurtosis of values in a along dimension, and ZERO where all vals equal 2436 denom = N.power(
amoment(a,2,dimension),2)
2437 zero = N.equal(denom,0)
2438 if type(denom) == N.ndarray
and asum(zero) <> 0:
2439 print "Number of zeros in akurtosis: ",
asum(zero)
2440 denom = denom + zero
2441 return N.where(zero,0,
amoment(a,4,dimension)/denom)
2446 Returns several descriptive statistics of the passed array. Dimension 2447 can equal None (ravel array first), an integer (the dimension over 2448 which to operate), or a sequence (operate over multiple dimensions). 2450 Usage: adescribe(inarray,dimension=None) 2451 Returns: n, (min,max), mean, standard deviation, skew, kurtosis 2453 if dimension ==
None:
2454 inarray = N.ravel(inarray)
2456 n = inarray.shape[dimension]
2457 mm = (N.minimum.reduce(inarray),N.maximum.reduce(inarray))
2458 m =
amean(inarray,dimension)
2459 sd =
astdev(inarray,dimension)
2460 skew =
askew(inarray,dimension)
2462 return n, mm, m, sd, skew, kurt
2471 Tests whether the skew is significantly different from a normal 2472 distribution. Dimension can equal None (ravel array first), an 2473 integer (the dimension over which to operate), or a sequence (operate 2474 over multiple dimensions). 2476 Usage: askewtest(a,dimension=None) 2477 Returns: z-score and 2-tail z-probability 2479 if dimension ==
None:
2482 b2 =
askew(a,dimension)
2483 n = float(a.shape[dimension])
2484 y = b2 * N.sqrt(((n+1)*(n+3)) / (6.0*(n-2)) )
2485 beta2 = ( 3.0*(n*n+27*n-70)*(n+1)*(n+3) ) / ( (n-2.0)*(n+5)*(n+7)*(n+9) )
2486 W2 = -1 + N.sqrt(2*(beta2-1))
2487 delta = 1/N.sqrt(N.log(N.sqrt(W2)))
2488 alpha = N.sqrt(2/(W2-1))
2489 y = N.where(y==0,1,y)
2490 Z = delta*N.log(y/alpha + N.sqrt((y/alpha)**2+1))
2491 return Z, (1.0-
zprob(Z))*2
2496 Tests whether a dataset has normal kurtosis (i.e., 2497 kurtosis=3(n-1)/(n+1)) Valid only for n>20. Dimension can equal None 2498 (ravel array first), an integer (the dimension over which to operate), 2499 or a sequence (operate over multiple dimensions). 2501 Usage: akurtosistest(a,dimension=None) 2502 Returns: z-score and 2-tail z-probability, returns 0 for bad pixels 2504 if dimension ==
None:
2507 n = float(a.shape[dimension])
2509 print "akurtosistest only valid for n>=20 ... continuing anyway, n=",n
2511 E = 3.0*(n-1) /(n+1)
2512 varb2 = 24.0*n*(n-2)*(n-3) / ((n+1)*(n+1)*(n+3)*(n+5))
2513 x = (b2-E)/N.sqrt(varb2)
2514 sqrtbeta1 = 6.0*(n*n-5*n+2)/((n+7)*(n+9)) * N.sqrt((6.0*(n+3)*(n+5))/
2516 A = 6.0 + 8.0/sqrtbeta1 *(2.0/sqrtbeta1 + N.sqrt(1+4.0/(sqrtbeta1**2)))
2517 term1 = 1 -2/(9.0*A)
2518 denom = 1 +x*N.sqrt(2/(A-4.0))
2519 denom = N.where(N.less(denom,0), 99, denom)
2520 term2 = N.where(N.equal(denom,0), term1, N.power((1-2.0/A)/denom,1/3.0))
2521 Z = ( term1 - term2 ) / N.sqrt(2/(9.0*A))
2522 Z = N.where(N.equal(denom,99), 0, Z)
2523 return Z, (1.0-
zprob(Z))*2
2528 Tests whether skew and/OR kurtosis of dataset differs from normal 2529 curve. Can operate over multiple dimensions. Dimension can equal 2530 None (ravel array first), an integer (the dimension over which to 2531 operate), or a sequence (operate over multiple dimensions). 2533 Usage: anormaltest(a,dimension=None) 2534 Returns: z-score and 2-tail probability 2536 if dimension ==
None:
2541 k2 = N.power(s,2) + N.power(k,2)
2551 Returns a 2D array of item frequencies. Column 1 contains item values, 2552 column 2 contains their respective counts. Assumes a 1D array is passed. 2556 Returns: a 2D frequency table (col [0:n-1]=scores, col n=frequencies) 2558 scores = pstat.aunique(a)
2559 scores = N.sort(scores)
2560 freq = N.zeros(len(scores))
2561 for i
in range(len(scores)):
2562 freq[i] = N.add.reduce(N.equal(a,scores[i]))
2563 return N.array(pstat.aabut(scores, freq))
2568 Usage: ascoreatpercentile(inarray,percent) 0<percent<100 2569 Returns: score at given percentile, relative to inarray distribution 2571 percent = percent / 100.0
2572 targetcf = percent*len(inarray)
2573 h, lrl, binsize, extras =
histogram(inarray)
2575 for i
in range(len(cumhist)):
2576 if cumhist[i] >= targetcf:
2578 score = binsize * ((targetcf - cumhist[i-1]) / float(h[i])) + (lrl+binsize*i)
2584 Note: result of this function depends on the values used to histogram 2587 Usage: apercentileofscore(inarray,score,histbins=10,defaultlimits=None) 2588 Returns: percentile-position of score (0-100) relative to inarray 2590 h, lrl, binsize, extras =
histogram(inarray,histbins,defaultlimits)
2592 i = int((score - lrl)/float(binsize))
2593 pct = (cumhist[i-1]+((score-(lrl+binsize*i))/float(binsize))*h[i])/float(len(inarray)) * 100
2597 def ahistogram (inarray,numbins=10,defaultlimits=None,printextras=1):
2599 Returns (i) an array of histogram bin counts, (ii) the smallest value 2600 of the histogram binning, and (iii) the bin width (the last 2 are not 2601 necessarily integers). Default number of bins is 10. Defaultlimits 2602 can be None (the routine picks bins spanning all the numbers in the 2603 inarray) or a 2-sequence (lowerlimit, upperlimit). Returns all of the 2604 following: array of bin values, lowerreallimit, binsize, extrapoints. 2606 Usage: ahistogram(inarray,numbins=10,defaultlimits=None,printextras=1) 2607 Returns: (array of bin counts, bin-minimum, min-width, #-points-outside-range) 2609 inarray = N.ravel(inarray)
2610 if (defaultlimits <>
None):
2611 lowerreallimit = defaultlimits[0]
2612 upperreallimit = defaultlimits[1]
2613 binsize = (upperreallimit-lowerreallimit) / float(numbins)
2615 Min = N.minimum.reduce(inarray)
2616 Max = N.maximum.reduce(inarray)
2617 estbinwidth = float(Max - Min)/float(numbins) + 1e-6
2618 binsize = (Max-Min+estbinwidth)/float(numbins)
2619 lowerreallimit = Min - binsize/2.0
2620 bins = N.zeros(numbins)
2624 if (num-lowerreallimit) < 0:
2625 extrapoints = extrapoints + 1
2627 bintoincrement = int((num-lowerreallimit) / float(binsize))
2628 bins[bintoincrement] = bins[bintoincrement] + 1
2630 extrapoints = extrapoints + 1
2631 if (extrapoints > 0
and printextras == 1):
2632 print '\nPoints outside given histogram range =',extrapoints
2633 return (bins, lowerreallimit, binsize, extrapoints)
2638 Returns a cumulative frequency histogram, using the histogram function. 2639 Defaultreallimits can be None (use all data), or a 2-sequence containing 2640 lower and upper limits on values to include. 2642 Usage: acumfreq(a,numbins=10,defaultreallimits=None) 2643 Returns: array of cumfreq bin values, lowerreallimit, binsize, extrapoints 2645 h,l,b,e =
histogram(a,numbins,defaultreallimits)
2647 return cumhist,l,b,e
2652 Returns a relative frequency histogram, using the histogram function. 2653 Defaultreallimits can be None (use all data), or a 2-sequence containing 2654 lower and upper limits on values to include. 2656 Usage: arelfreq(a,numbins=10,defaultreallimits=None) 2657 Returns: array of cumfreq bin values, lowerreallimit, binsize, extrapoints 2659 h,l,b,e =
histogram(a,numbins,defaultreallimits)
2660 h = N.array(h/float(a.shape[0]))
2670 Computes a transform on input data (any number of columns). Used to 2671 test for homogeneity of variance prior to running one-way stats. Each 2672 array in *args is one level of a factor. If an F_oneway() run on the 2673 transformed data and found significant, variances are unequal. From 2674 Maxwell and Delaney, p.112. 2676 Usage: aobrientransform(*args) *args = 1D arrays, one per level of factor 2677 Returns: transformed data for use in an ANOVA 2681 n = N.zeros(k,N.float_)
2682 v = N.zeros(k,N.float_)
2683 m = N.zeros(k,N.float_)
2686 nargs.append(args[i].astype(N.float_))
2687 n[i] = float(len(nargs[i]))
2688 v[i] =
var(nargs[i])
2689 m[i] =
mean(nargs[i])
2691 for i
in range(n[j]):
2692 t1 = (n[j]-1.5)*n[j]*(nargs[j][i]-m[j])**2
2693 t2 = 0.5*v[j]*(n[j]-1.0)
2694 t3 = (n[j]-1.0)*(n[j]-2.0)
2695 nargs[j][i] = (t1-t2) / float(t3)
2698 if v[j] -
mean(nargs[j]) > TINY:
2701 raise ValueError,
'Lack of convergence in obrientransform.' 2703 return N.array(nargs)
2708 Returns the sample standard deviation of the values in the passed 2709 array (i.e., using N). Dimension can equal None (ravel array first), 2710 an integer (the dimension over which to operate), or a sequence 2711 (operate over multiple dimensions). Set keepdims=1 to return an array 2712 with the same number of dimensions as inarray. 2714 Usage: asamplevar(inarray,dimension=None,keepdims=0) 2716 if dimension ==
None:
2717 inarray = N.ravel(inarray)
2720 mn =
amean(inarray,dimension)[:,N.NewAxis]
2722 mn =
amean(inarray,dimension,keepdims=1)
2723 deviations = inarray - mn
2724 if type(dimension) == ListType:
2727 n = n*inarray.shape[d]
2729 n = inarray.shape[dimension]
2730 svar =
ass(deviations,dimension,keepdims) / float(n)
2736 Returns the sample standard deviation of the values in the passed 2737 array (i.e., using N). Dimension can equal None (ravel array first), 2738 an integer (the dimension over which to operate), or a sequence 2739 (operate over multiple dimensions). Set keepdims=1 to return an array 2740 with the same number of dimensions as inarray. 2742 Usage: asamplestdev(inarray,dimension=None,keepdims=0) 2744 return N.sqrt(
asamplevar(inarray,dimension,keepdims))
2749 Calculates signal-to-noise. Dimension can equal None (ravel array 2750 first), an integer (the dimension over which to operate), or a 2751 sequence (operate over multiple dimensions). 2753 Usage: asignaltonoise(instack,dimension=0): 2754 Returns: array containing the value of (mean/stdev) along dimension, 2757 m =
mean(instack,dimension)
2758 sd =
stdev(instack,dimension)
2759 return N.where(sd==0,0,m/sd)
2762 def acov (x,y, dimension=None,keepdims=0):
2764 Returns the estimated covariance of the values in the passed 2765 array (i.e., N-1). Dimension can equal None (ravel array first), an 2766 integer (the dimension over which to operate), or a sequence (operate 2767 over multiple dimensions). Set keepdims=1 to return an array with the 2768 same number of dimensions as inarray. 2770 Usage: acov(x,y,dimension=None,keepdims=0) 2772 if dimension ==
None:
2776 xmn =
amean(x,dimension,1)
2777 xdeviations = x - xmn
2778 ymn =
amean(y,dimension,1)
2779 ydeviations = y - ymn
2780 if type(dimension) == ListType:
2785 n = x.shape[dimension]
2786 covar = N.sum(xdeviations*ydeviations)/float(n-1)
2790 def avar (inarray, dimension=None,keepdims=0):
2792 Returns the estimated population variance of the values in the passed 2793 array (i.e., N-1). Dimension can equal None (ravel array first), an 2794 integer (the dimension over which to operate), or a sequence (operate 2795 over multiple dimensions). Set keepdims=1 to return an array with the 2796 same number of dimensions as inarray. 2798 Usage: avar(inarray,dimension=None,keepdims=0) 2800 if dimension ==
None:
2801 inarray = N.ravel(inarray)
2803 mn =
amean(inarray,dimension,1)
2804 deviations = inarray - mn
2805 if type(dimension) == ListType:
2808 n = n*inarray.shape[d]
2810 n = inarray.shape[dimension]
2811 var =
ass(deviations,dimension,keepdims)/float(n-1)
2815 def astdev (inarray, dimension=None, keepdims=0):
2817 Returns the estimated population standard deviation of the values in 2818 the passed array (i.e., N-1). Dimension can equal None (ravel array 2819 first), an integer (the dimension over which to operate), or a 2820 sequence (operate over multiple dimensions). Set keepdims=1 to return 2821 an array with the same number of dimensions as inarray. 2823 Usage: astdev(inarray,dimension=None,keepdims=0) 2825 return N.sqrt(
avar(inarray,dimension,keepdims))
2828 def asterr (inarray, dimension=None, keepdims=0):
2830 Returns the estimated population standard error of the values in the 2831 passed array (i.e., N-1). Dimension can equal None (ravel array 2832 first), an integer (the dimension over which to operate), or a 2833 sequence (operate over multiple dimensions). Set keepdims=1 to return 2834 an array with the same number of dimensions as inarray. 2836 Usage: asterr(inarray,dimension=None,keepdims=0) 2838 if dimension ==
None:
2839 inarray = N.ravel(inarray)
2841 return astdev(inarray,dimension,keepdims) / float(N.sqrt(inarray.shape[dimension]))
2844 def asem (inarray, dimension=None, keepdims=0):
2846 Returns the standard error of the mean (i.e., using N) of the values 2847 in the passed array. Dimension can equal None (ravel array first), an 2848 integer (the dimension over which to operate), or a sequence (operate 2849 over multiple dimensions). Set keepdims=1 to return an array with the 2850 same number of dimensions as inarray. 2852 Usage: asem(inarray,dimension=None, keepdims=0) 2854 if dimension ==
None:
2855 inarray = N.ravel(inarray)
2857 if type(dimension) == ListType:
2860 n = n*inarray.shape[d]
2862 n = inarray.shape[dimension]
2863 s =
asamplestdev(inarray,dimension,keepdims) / N.sqrt(n-1)
2869 Returns the z-score of a given input score, given thearray from which 2870 that score came. Not appropriate for population calculations, nor for 2881 Returns a 1D array of z-scores, one for each score in the passed array, 2882 computed relative to the passed array. 2888 zscores.append(
z(a,item))
2889 return N.array(zscores)
2892 def azmap (scores, compare, dimension=0):
2894 Returns an array of z-scores the shape of scores (e.g., [x,y]), compared to 2895 array passed to compare (e.g., [time,x,y]). Assumes collapsing over dim 0 2896 of the compare array. 2898 Usage: azs(scores, compare, dimension=0) 2900 mns =
amean(compare,dimension)
2902 return (scores - mns) / sstd
2913 Like Numeric.clip() except that values <threshmid or >threshmax are replaced 2914 by newval instead of by threshmin/threshmax (respectively). 2916 Usage: athreshold(a,threshmin=None,threshmax=None,newval=0) 2917 Returns: a, with values <threshmin or >threshmax replaced with newval 2919 mask = N.zeros(a.shape)
2920 if threshmin <>
None:
2921 mask = mask + N.where(a<threshmin,1,0)
2922 if threshmax <>
None:
2923 mask = mask + N.where(a>threshmax,1,0)
2924 mask = N.clip(mask,0,1)
2925 return N.where(mask,newval,a)
2930 Slices off the passed proportion of items from BOTH ends of the passed 2931 array (i.e., with proportiontocut=0.1, slices 'leftmost' 10% AND 2932 'rightmost' 10% of scores. You must pre-sort the array if you want 2933 "proper" trimming. Slices off LESS if proportion results in a 2934 non-integer slice index (i.e., conservatively slices off 2937 Usage: atrimboth (a,proportiontocut) 2938 Returns: trimmed version of array a 2940 lowercut = int(proportiontocut*len(a))
2941 uppercut = len(a) - lowercut
2942 return a[lowercut:uppercut]
2947 Slices off the passed proportion of items from ONE end of the passed 2948 array (i.e., if proportiontocut=0.1, slices off 'leftmost' or 'rightmost' 2949 10% of scores). Slices off LESS if proportion results in a non-integer 2950 slice index (i.e., conservatively slices off proportiontocut). 2952 Usage: atrim1(a,proportiontocut,tail='right') or set tail='left' 2953 Returns: trimmed version of array a 2955 if string.lower(tail) ==
'right':
2957 uppercut = len(a) - int(proportiontocut*len(a))
2958 elif string.lower(tail) ==
'left':
2959 lowercut = int(proportiontocut*len(a))
2961 return a[lowercut:uppercut]
2970 Computes the covariance matrix of a matrix X. Requires a 2D matrix input. 2972 Usage: acovariance(X) 2973 Returns: covariance matrix of X 2975 if len(X.shape) <> 2:
2976 raise TypeError,
"acovariance requires 2D matrices" 2979 return N.dot(N.transpose(X),X) / float(n) - N.multiply.outer(mX,mX)
2984 Computes the correlation matrix of a matrix X. Requires a 2D matrix input. 2986 Usage: acorrelation(X) 2987 Returns: correlation matrix of X 2991 return C / N.sqrt(N.multiply.outer(V,V))
2996 Interactively determines the type of data in x and y, and then runs the 2997 appropriated statistic for paired group data. 2999 Usage: apaired(x,y) x,y = the two arrays of values to be compared 3000 Returns: appropriate statistic name, value, and probability 3003 while samples
not in [
'i',
'r','I','
R','c','C']:
3004 print '\nIndependent or related samples, or correlation (i,r,c): ',
3005 samples = raw_input()
3007 if samples
in [
'i',
'I',
'r','R']: 3008 print '\nComparing variances ...',
3011 f,p =
F_oneway(pstat.colex(r,0),pstat.colex(r,1))
3013 vartype=
'unequal, p='+str(round(p,4))
3017 if samples
in [
'i',
'I']:
3020 print '\nIndependent samples t-test: ', round(t,4),round(p,4)
3022 if len(x)>20
or len(y)>20:
3024 print '\nRank Sums test (NONparametric, n>20): ', round(z,4),round(p,4)
3027 print '\nMann-Whitney U-test (NONparametric, ns<20): ', round(u,4),round(p,4)
3032 print '\nRelated samples t-test: ', round(t,4),round(p,4)
3035 print '\nWilcoxon T-test (NONparametric): ', round(t,4),round(p,4)
3038 while corrtype
not in [
'c',
'C',
'r','R','d','D']:
3039 print '\nIs the data Continuous, Ranked, or Dichotomous (c,r,d): ',
3040 corrtype = raw_input()
3041 if corrtype
in [
'c',
'C']:
3043 print '\nLinear regression for continuous variables ...' 3044 lol = [[
'Slope',
'Intercept',
'r','Prob','SEestimate'],[round(m,4),round(b,4),round(r,4),round(p,4),round(see,4)]]
3046 elif corrtype
in [
'r','R']: 3048 print '\nCorrelation for ranked variables ...' 3049 print "Spearman's r: ",round(r,4),round(p,4)
3052 print '\nAssuming x contains a dichotomous variable ...' 3053 print 'Point Biserial r: ',round(r,4),round(p,4)
3060 Calculates Dice's coefficient ... (2*number of common terms)/(number of terms in x + 3061 number of terms in y). Returns a value between 0 (orthogonal) and 1. 3068 common = len(x.intersection(y))
3069 total = float(len(x) + len(y))
3070 return 2*common/total
3075 Calculates intraclass correlation coefficients using simple, Type I sums of squares. 3076 If only one variable is passed, assumed it's an Nx2 matrix 3078 Usage: icc(x,y=None,verbose=0) 3079 Returns: icc rho, prob ####PROB IS A GUESS BASED ON PEARSON 3083 all = N.concatenate([x,y],0)
3089 pairmeans = (x+y)/2.
3090 withinss =
ass(x-pairmeans) +
ass(y-pairmeans)
3091 withindf = float(len(x))
3092 betwdf = float(len(x)-1)
3093 withinms = withinss / withindf
3094 betweenms = (totalss-withinss) / betwdf
3095 rho = (betweenms-withinms)/(withinms+betweenms)
3096 t = rho*math.sqrt(betwdf/((1.0-rho+TINY)*(1.0+rho+TINY)))
3097 prob =
abetai(0.5*betwdf,0.5,betwdf/(betwdf+t*t),verbose)
3103 Calculates Lin's concordance correlation coefficient. 3105 Usage: alincc(x,y) where x, y are equal-length arrays 3110 covar =
acov(x,y)*(len(x)-1)/float(len(x))
3111 xvar =
avar(x)*(len(x)-1)/float(len(x))
3112 yvar =
avar(y)*(len(y)-1)/float(len(y))
3113 lincc = (2 * covar) / ((xvar+yvar) +((
amean(x)-
amean(y))**2))
3119 Calculates a Pearson correlation coefficient and returns p. Taken 3120 from Heiman's Basic Statistics for the Behav. Sci (2nd), p.195. 3122 Usage: apearsonr(x,y,verbose=1) where x,y are equal length arrays 3123 Returns: Pearson's r, two-tailed p-value 3129 r_num = n*(N.add.reduce(x*y)) - N.add.reduce(x)*N.add.reduce(y)
3133 t = r*math.sqrt(df/((1.0-r+TINY)*(1.0+r+TINY)))
3134 prob =
abetai(0.5*df,0.5,df/(df+t*t),verbose)
3140 Calculates a Spearman rank-order correlation coefficient. Taken 3141 from Heiman's Basic Statistics for the Behav. Sci (1st), p.192. 3143 Usage: aspearmanr(x,y) where x,y are equal-length arrays 3144 Returns: Spearman's r, two-tailed p-value 3150 dsq = N.add.reduce((rankx-ranky)**2)
3151 rs = 1 - 6*dsq / float(n*(n**2-1))
3152 t = rs * math.sqrt((n-2) / ((rs+1.0)*(1.0-rs)))
3154 probrs =
abetai(0.5*df,0.5,df/(df+t*t))
3162 Calculates a point-biserial correlation coefficient and the associated 3163 probability value. Taken from Heiman's Basic Statistics for the Behav. 3166 Usage: apointbiserialr(x,y) where x,y are equal length arrays 3167 Returns: Point-biserial r, two-tailed p-value 3170 categories = pstat.aunique(x)
3171 data = pstat.aabut(x,y)
3172 if len(categories) <> 2:
3173 raise ValueError,
"Exactly 2 categories required (in x) for pointbiserialr()." 3175 codemap = pstat.aabut(categories,N.arange(2))
3176 recoded = pstat.arecode(data,codemap,0)
3177 x = pstat.alinexand(data,0,categories[0])
3178 y = pstat.alinexand(data,0,categories[1])
3179 xmean =
amean(pstat.acolex(x,1))
3180 ymean =
amean(pstat.acolex(y,1))
3182 adjust = math.sqrt((len(x)/float(n))*(len(y)/float(n)))
3183 rpb = (ymean - xmean)/
asamplestdev(pstat.acolex(data,1))*adjust
3185 t = rpb*math.sqrt(df/((1.0-rpb+TINY)*(1.0+rpb+TINY)))
3186 prob =
abetai(0.5*df,0.5,df/(df+t*t))
3192 Calculates Kendall's tau ... correlation of ordinal data. Adapted 3193 from function kendl1 in Numerical Recipies. Needs good test-cases.@@@ 3195 Usage: akendalltau(x,y) 3196 Returns: Kendall's tau, two-tailed p-value 3201 for j
in range(len(x)-1):
3202 for k
in range(j,len(y)):
3218 tau = iss / math.sqrt(n1*n2)
3219 svar = (4.0*len(x)+10.0) / (9.0*len(x)*(len(x)-1))
3220 z = tau / math.sqrt(svar)
3221 prob =
erfcc(abs(z)/1.4142136)
3227 Calculates a regression line on two arrays, x and y, corresponding to x,y 3228 pairs. If a single 2D array is passed, alinregress finds dim with 2 levels 3229 and splits data into x,y pairs along that dim. 3231 Usage: alinregress(*args) args=2 equal-length arrays, or one 2D array 3232 Returns: slope, intercept, r, two-tailed prob, sterr-of-the-estimate, n 3249 r_num = n*(N.add.reduce(x*y)) - N.add.reduce(x)*N.add.reduce(y)
3252 z = 0.5*math.log((1.0+r+TINY)/(1.0-r+TINY))
3254 t = r*math.sqrt(df/((1.0-r+TINY)*(1.0+r+TINY)))
3255 prob =
abetai(0.5*df,0.5,df/(df+t*t))
3257 intercept = ymean - slope*xmean
3259 return slope, intercept, r, prob, sterrest, n
3263 Calculates a regression line on one 1D array (x) and one N-D array (y). 3265 Returns: slope, intercept, r, two-tailed prob, sterr-of-the-estimate, n 3271 x = N.ravel(args[0])
3274 x = N.ravel(args[:,0])
3279 x = x.astype(N.float_)
3280 y = y.astype(N.float_)
3284 shp = N.ones(len(y.shape))
3287 print x.shape, y.shape
3288 r_num = n*(N.add.reduce(x*y,0)) - N.add.reduce(x)*N.add.reduce(y,0)
3290 zerodivproblem = N.equal(r_den,0)
3291 r_den = N.where(zerodivproblem,1,r_den)
3293 r = N.where(zerodivproblem,0.0,r)
3294 z = 0.5*N.log((1.0+r+TINY)/(1.0-r+TINY))
3296 t = r*N.sqrt(df/((1.0-r+TINY)*(1.0+r+TINY)))
3297 prob =
abetai(0.5*df,0.5,df/(df+t*t))
3300 s_den = N.where(ss==0,1,ss)
3301 slope = r_num / s_den
3302 intercept = ymean - slope*xmean
3304 return slope, intercept, r, prob, sterrest, n
3313 Calculates the t-obtained for the independent samples T-test on ONE group 3314 of scores a, given a population mean. If printit=1, results are printed 3315 to the screen. If printit='filename', the results are output to 'filename' 3316 using the given writemode (default=append). Returns t-value, and prob. 3318 Usage: attest_1samp(a,popmean,Name='Sample',printit=0,writemode='a') 3319 Returns: t-value, two-tailed prob 3321 if type(a) != N.ndarray:
3327 svar = ((n-1)*v) / float(df)
3328 t = (x-popmean)/math.sqrt(svar*(1.0/n))
3329 prob =
abetai(0.5*df,0.5,df/(df+t*t))
3332 statname =
'Single-sample T-test.' 3334 'Population',
'--',popmean,0,0,0,
3335 name,n,x,v,N.minimum.reduce(N.ravel(a)),
3336 N.maximum.reduce(N.ravel(a)),
3341 def attest_ind (a, b, dimension=None, printit=0, name1='Samp1', name2='Samp2',writemode='a'):
3343 Calculates the t-obtained T-test on TWO INDEPENDENT samples of scores 3344 a, and b. From Numerical Recipies, p.483. If printit=1, results are 3345 printed to the screen. If printit='filename', the results are output 3346 to 'filename' using the given writemode (default=append). Dimension 3347 can equal None (ravel array first), or an integer (the dimension over 3348 which to operate on a and b). 3350 Usage: attest_ind (a,b,dimension=None,printit=0, 3351 Name1='Samp1',Name2='Samp2',writemode='a') 3352 Returns: t-value, two-tailed p-value 3354 if dimension ==
None:
3358 x1 =
amean(a,dimension)
3359 x2 =
amean(b,dimension)
3360 v1 =
avar(a,dimension)
3361 v2 =
avar(b,dimension)
3362 n1 = a.shape[dimension]
3363 n2 = b.shape[dimension]
3365 svar = ((n1-1)*v1+(n2-1)*v2) / float(df)
3366 zerodivproblem = N.equal(svar,0)
3367 svar = N.where(zerodivproblem,1,svar)
3368 t = (x1-x2)/N.sqrt(svar*(1.0/n1 + 1.0/n2))
3369 t = N.where(zerodivproblem,1.0,t)
3370 probs =
abetai(0.5*df,0.5,float(df)/(df+t*t))
3372 if type(t) == N.ndarray:
3373 probs = N.reshape(probs,t.shape)
3374 if probs.shape == (1,):
3378 if type(t) == N.ndarray:
3380 if type(probs) == N.ndarray:
3382 statname =
'Independent samples T-test.' 3384 name1,n1,x1,v1,N.minimum.reduce(N.ravel(a)),
3385 N.maximum.reduce(N.ravel(a)),
3386 name2,n2,x2,v2,N.minimum.reduce(N.ravel(b)),
3387 N.maximum.reduce(N.ravel(b)),
3394 Tries to compute a t-value from a p-value (or pval array) and associated df. 3395 SLOW for large numbers of elements(!) as it re-computes p-values 20 times 3396 (smaller step-sizes) at which point it decides it's done. Keeps the signs 3397 of the input array. Returns 1000 (or -1000) if t>100. 3399 Usage: ap2t(pval,df) 3400 Returns: an array of t-values with the shape of pval 3402 pval = N.array(pval)
3403 signs = N.sign(pval)
3405 t = N.ones(pval.shape,N.float_)*50
3406 step = N.ones(pval.shape,N.float_)*25
3407 print "Initial ap2t() prob calc" 3408 prob =
abetai(0.5*df,0.5,float(df)/(df+t*t))
3409 print 'ap2t() iter: ',
3412 t = N.where(pval<prob,t+step,t-step)
3413 prob =
abetai(0.5*df,0.5,float(df)/(df+t*t))
3417 t = N.where(t>99.9,1000,t)
3422 def attest_rel (a,b,dimension=None,printit=0,name1='Samp1',name2='Samp2',writemode='a'):
3424 Calculates the t-obtained T-test on TWO RELATED samples of scores, a 3425 and b. From Numerical Recipies, p.483. If printit=1, results are 3426 printed to the screen. If printit='filename', the results are output 3427 to 'filename' using the given writemode (default=append). Dimension 3428 can equal None (ravel array first), or an integer (the dimension over 3429 which to operate on a and b). 3431 Usage: attest_rel(a,b,dimension=None,printit=0, 3432 name1='Samp1',name2='Samp2',writemode='a') 3433 Returns: t-value, two-tailed p-value 3435 if dimension ==
None:
3440 raise ValueError,
'Unequal length arrays.' 3441 x1 =
amean(a,dimension)
3442 x2 =
amean(b,dimension)
3443 v1 =
avar(a,dimension)
3444 v2 =
avar(b,dimension)
3445 n = a.shape[dimension]
3447 d = (a-b).astype(
'd')
3449 denom = N.sqrt((n*N.add.reduce(d*d,dimension) - N.add.reduce(d,dimension)**2) /df)
3450 zerodivproblem = N.equal(denom,0)
3451 denom = N.where(zerodivproblem,1,denom)
3452 t = N.add.reduce(d,dimension) / denom
3453 t = N.where(zerodivproblem,1.0,t)
3454 probs =
abetai(0.5*df,0.5,float(df)/(df+t*t))
3455 if type(t) == N.ndarray:
3456 probs = N.reshape(probs,t.shape)
3457 if probs.shape == (1,):
3461 statname =
'Related samples T-test.' 3463 name1,n,x1,v1,N.minimum.reduce(N.ravel(a)),
3464 N.maximum.reduce(N.ravel(a)),
3465 name2,n,x2,v2,N.minimum.reduce(N.ravel(b)),
3466 N.maximum.reduce(N.ravel(b)),
3474 Calculates a one-way chi square for array of observed frequencies and returns 3475 the result. If no expected frequencies are given, the total N is assumed to 3476 be equally distributed across all groups. 3479 Usage: achisquare(f_obs, f_exp=None) f_obs = array of observed cell freq. 3480 Returns: chisquare-statistic, associated p-value 3485 f_exp = N.array([
sum(f_obs)/float(k)] * len(f_obs),N.float_)
3486 f_exp = f_exp.astype(N.float_)
3487 chisq = N.add.reduce((f_obs-f_exp)**2 / f_exp)
3493 Computes the Kolmogorov-Smirnof statistic on 2 samples. Modified from 3494 Numerical Recipies in C, page 493. Returns KS D-value, prob. Not ufunc- 3497 Usage: aks_2samp(data1,data2) where data1 and data2 are 1D arrays 3498 Returns: KS D-value, p-value 3508 d = N.zeros(data1.shape[1:],N.float_)
3509 data1 = N.sort(data1,0)
3510 data2 = N.sort(data2,0)
3511 while j1 < n1
and j2 < n2:
3515 fn1 = (j1)/float(en1)
3518 fn2 = (j2)/float(en2)
3521 if abs(dt) > abs(d):
3524 en = math.sqrt(en1*en2/float(en1+en2))
3525 prob =
aksprob((en+0.12+0.11/en)*N.fabs(d))
3533 Calculates a Mann-Whitney U statistic on the provided scores and 3534 returns the result. Use only when the n in each condition is < 20 and 3535 you have 2 independent samples of ranks. REMEMBER: Mann-Whitney U is 3536 significant if the u-obtained is LESS THAN or equal to the critical 3539 Usage: amannwhitneyu(x,y) where x,y are arrays of values for 2 conditions 3540 Returns: u-statistic, one-tailed p-value (i.e., p(z(U))) 3544 ranked =
rankdata(N.concatenate((x,y)))
3545 rankx = ranked[0:n1]
3547 u1 = n1*n2 + (n1*(n1+1))/2.0 -
sum(rankx)
3553 raise ValueError,
'All numbers are identical in amannwhitneyu' 3554 sd = math.sqrt(T*n1*n2*(n1+n2+1)/12.0)
3555 z = abs((bigu-n1*n2/2.0) / sd)
3556 return smallu, 1.0 -
azprob(z)
3561 Tie-corrector for ties in Mann Whitney U and Kruskal Wallis H tests. 3562 See Siegel, S. (1956) Nonparametric Statistics for the Behavioral 3563 Sciences. New York: McGraw-Hill. Code adapted from |Stat rankind.c 3566 Usage: atiecorrect(rankvals) 3567 Returns: T correction factor for U or H 3574 if sorted[i] == sorted[i+1]:
3576 while (i<n-1)
and (sorted[i] == sorted[i+1]):
3579 T = T + nties**3 - nties
3581 T = T / float(n**3-n)
3587 Calculates the rank sums statistic on the provided scores and returns 3590 Usage: aranksums(x,y) where x,y are arrays of values for 2 conditions 3591 Returns: z-statistic, two-tailed p-value 3595 alldata = N.concatenate((x,y))
3600 expected = n1*(n1+n2+1) / 2.0
3601 z = (s - expected) / math.sqrt(n1*n2*(n1+n2+1)/12.0)
3602 prob = 2*(1.0 -
azprob(abs(z)))
3608 Calculates the Wilcoxon T-test for related samples and returns the 3609 result. A non-parametric T-test. 3611 Usage: awilcoxont(x,y) where x,y are equal-length arrays for 2 conditions 3612 Returns: t-statistic, two-tailed p-value 3614 if len(x) <> len(y):
3615 raise ValueError,
'Unequal N in awilcoxont. Aborting.' 3617 d = N.compress(N.not_equal(d,0),d)
3623 for i
in range(len(absd)):
3625 r_minus = r_minus + absranked[i]
3627 r_plus = r_plus + absranked[i]
3628 wt = min(r_plus, r_minus)
3629 mn = count * (count+1) * 0.25
3630 se = math.sqrt(count*(count+1)*(2.0*count+1.0)/24.0)
3631 z = math.fabs(wt-mn) / se
3632 z = math.fabs(wt-mn) / se
3633 prob = 2*(1.0 -
zprob(abs(z)))
3639 The Kruskal-Wallis H-test is a non-parametric ANOVA for 3 or more 3640 groups, requiring at least 5 subjects in each group. This function 3641 calculates the Kruskal-Wallis H and associated p-value for 3 or more 3642 independent samples. 3644 Usage: akruskalwallish(*args) args are separate arrays for 3+ conditions 3645 Returns: H-statistic (corrected for ties), associated p-value 3647 assert len(args) == 3,
"Need at least 3 groups in stats.akruskalwallish()" 3652 for i
in range(len(args)):
3653 all = all + args[i].tolist()
3656 for i
in range(len(args)):
3657 args[i] = ranked[0:n[i]]
3660 for i
in range(len(args)):
3661 rsums.append(
sum(args[i])**2)
3662 rsums[i] = rsums[i] / float(n[i])
3665 h = 12.0 / (totaln*(totaln+1)) * ssbn - 3*(totaln+1)
3668 raise ValueError,
'All numbers are identical in akruskalwallish' 3675 Friedman Chi-Square is a non-parametric, one-way within-subjects 3676 ANOVA. This function calculates the Friedman Chi-square test for 3677 repeated measures and returns the result, along with the associated 3678 probability value. It assumes 3 or more repeated measures. Only 3 3679 levels requires a minimum of 10 subjects in the study. Four levels 3680 requires 5 subjects per level(??). 3682 Usage: afriedmanchisquare(*args) args are separate arrays for 2+ conditions 3683 Returns: chi-square statistic, associated p-value 3687 raise ValueError,
'\nLess than 3 levels. Friedman test not appropriate.\n' 3689 data = apply(pstat.aabut,args)
3690 data = data.astype(N.float_)
3691 for i
in range(len(data)):
3694 chisq = 12.0 / (k*n*(k+1)) * ssbn - 3*n*(k+1)
3704 Returns the (1-tail) probability value associated with the provided chi-square 3705 value and df. Heavily modified from chisq.c in Gary Perlman's |Stat. Can 3706 handle multiple dimensions. 3708 Usage: achisqprob(chisq,df) chisq=chisquare stat., df=degrees of freedom 3713 exponents = N.where(N.less(x,-BIG),-BIG,x)
3714 return N.exp(exponents)
3716 if type(chisq) == N.ndarray:
3720 chisq = N.array([chisq])
3722 return N.ones(chisq.shape,N.float)
3723 probs = N.zeros(chisq.shape,N.float_)
3724 probs = N.where(N.less_equal(chisq,0),1.0,probs)
3734 s = 2.0 *
azprob(-N.sqrt(chisq))
3737 chisq = 0.5 * (df - 1.0)
3739 z = N.ones(probs.shape,N.float_)
3741 z = 0.5 *N.ones(probs.shape,N.float_)
3743 e = N.zeros(probs.shape,N.float_)
3745 e = N.log(N.sqrt(N.pi)) *N.ones(probs.shape,N.float_)
3747 mask = N.zeros(probs.shape)
3748 a_big = N.greater(a,BIG)
3749 a_big_frozen = -1 *N.ones(probs.shape,N.float_)
3750 totalelements = N.multiply.reduce(N.array(probs.shape))
3751 while asum(mask)<>totalelements:
3756 newmask = N.greater(z,chisq)
3757 a_big_frozen = N.where(newmask*N.equal(mask,0)*a_big, s, a_big_frozen)
3758 mask = N.clip(newmask+mask,0,1)
3760 z = N.ones(probs.shape,N.float_)
3761 e = N.ones(probs.shape,N.float_)
3763 z = 0.5 *N.ones(probs.shape,N.float_)
3764 e = 1.0 / N.sqrt(N.pi) / N.sqrt(a) * N.ones(probs.shape,N.float_)
3766 mask = N.zeros(probs.shape)
3767 a_notbig_frozen = -1 *N.ones(probs.shape,N.float_)
3768 while asum(mask)<>totalelements:
3769 e = e * (a/z.astype(N.float_))
3773 newmask = N.greater(z,chisq)
3774 a_notbig_frozen = N.where(newmask*N.equal(mask,0)*(1-a_big),
3775 c*y+s2, a_notbig_frozen)
3776 mask = N.clip(newmask+mask,0,1)
3777 probs = N.where(N.equal(probs,1),1,
3778 N.where(N.greater(a,BIG),a_big_frozen,a_notbig_frozen))
3786 Returns the complementary error function erfc(x) with fractional error 3787 everywhere less than 1.2e-7. Adapted from Numerical Recipies. Can 3788 handle multiple dimensions. 3793 t = 1.0 / (1.0+0.5*z)
3794 ans = t * N.exp(-z*z-1.26551223 + t*(1.00002368+t*(0.37409196+t*(0.09678418+t*(-0.18628806+t*(0.27886807+t*(-1.13520398+t*(1.48851587+t*(-0.82215223+t*0.17087277)))))))))
3795 return N.where(N.greater_equal(x,0), ans, 2.0-ans)
3800 Returns the area under the normal curve 'to the left of' the given z value. 3802 for z<0, zprob(z) = 1-tail probability 3803 for z>0, 1.0-zprob(z) = 1-tail probability 3804 for any z, 2.0*(1.0-zprob(abs(z))) = 2-tail probability 3805 Adapted from z.c in Gary Perlman's |Stat. Can handle multiple dimensions. 3807 Usage: azprob(z) where z is a z-value 3810 x = (((((((((((((-0.000045255659 * y
3811 +0.000152529290) * y -0.000019538132) * y
3812 -0.000676904986) * y +0.001390604284) * y
3813 -0.000794620820) * y -0.002034254874) * y
3814 +0.006549791214) * y -0.010557625006) * y
3815 +0.011630447319) * y -0.009279453341) * y
3816 +0.005353579108) * y -0.002141268741) * y
3817 +0.000535310849) * y +0.999936657524
3821 x = ((((((((0.000124818987 * w
3822 -0.001075204047) * w +0.005198775019) * w
3823 -0.019198292004) * w +0.059054035642) * w
3824 -0.151968751364) * w +0.319152932694) * w
3825 -0.531923007300) * w +0.797884560593) * N.sqrt(w) * 2.0
3829 x = N.zeros(z.shape,N.float_)
3831 x = N.where(N.less(y,1.0),wfunc(y*y),yfunc(y-2.0))
3832 x = N.where(N.greater(y,Z_MAX*0.5),1.0,x)
3833 prob = N.where(N.greater(z,0),(x+1)*0.5,(1-x)*0.5)
3839 Returns the probability value for a K-S statistic computed via ks_2samp. 3840 Adapted from Numerical Recipies. Can handle multiple dimensions. 3842 Usage: aksprob(alam) 3844 if type(alam) == N.ndarray:
3845 frozen = -1 *N.ones(alam.shape,N.float64)
3846 alam = alam.astype(N.float64)
3849 frozen = N.array(-1.)
3850 alam = N.array(alam,N.float64)
3852 mask = N.zeros(alam.shape)
3853 fac = 2.0 *N.ones(alam.shape,N.float_)
3854 sum = N.zeros(alam.shape,N.float_)
3855 termbf = N.zeros(alam.shape,N.float_)
3856 a2 = N.array(-2.0*alam*alam,N.float64)
3857 totalelements = N.multiply.reduce(N.array(mask.shape))
3858 for j
in range(1,201):
3859 if asum(mask) == totalelements:
3861 exponents = (a2*j*j)
3862 overflowmask = N.less(exponents,-746)
3863 frozen = N.where(overflowmask,0,frozen)
3864 mask = mask+overflowmask
3865 term = fac*N.exp(exponents)
3867 newmask = N.where(N.less_equal(abs(term),(0.001*termbf)) +
3868 N.less(abs(term),1.0e-8*sum), 1, 0)
3869 frozen = N.where(newmask*N.equal(mask,0), sum, frozen)
3870 mask = N.clip(mask+newmask,0,1)
3874 return N.where(N.equal(frozen,-1), 1.0, frozen)
3876 return N.where(N.equal(frozen,-1), 1.0, frozen)[0]
3881 Returns the 1-tailed significance level (p-value) of an F statistic 3882 given the degrees of freedom for the numerator (dfR-dfF) and the degrees 3883 of freedom for the denominator (dfF). Can handle multiple dims for F. 3885 Usage: afprob(dfnum, dfden, F) where usually dfnum=dfbn, dfden=dfwn 3887 if type(F) == N.ndarray:
3888 return abetai(0.5*dfden, 0.5*dfnum, dfden/(1.0*dfden+dfnum*F))
3890 return abetai(0.5*dfden, 0.5*dfnum, dfden/float(dfden+dfnum*F))
3895 Evaluates the continued fraction form of the incomplete Beta function, 3896 betai. (Adapted from: Numerical Recipies in C.) Can handle multiple 3899 Usage: abetacf(a,b,x,verbose=1) 3905 if type(x) == N.ndarray:
3906 frozen = N.ones(x.shape,N.float_) *-1
3909 frozen = N.array([-1])
3911 mask = N.zeros(x.shape)
3917 for i
in range(ITMAX+1):
3918 if N.sum(N.ravel(N.equal(frozen,-1)))==0:
3922 d = em*(b-em)*x/((qam+tem)*(a+tem))
3925 d = -(a+em)*(qab+em)*x/((qap+tem)*(a+tem))
3933 newmask = N.less(abs(az-aold),EPS*abs(az))
3934 frozen = N.where(newmask*N.equal(mask,0), az, frozen)
3935 mask = N.clip(mask+newmask,0,1)
3936 noconverge =
asum(N.equal(frozen,-1))
3937 if noconverge <> 0
and verbose:
3938 print 'a or b too big, or ITMAX too small in Betacf for ',noconverge,
' elements' 3947 Returns the gamma function of xx. 3948 Gamma(z) = Integral(0,infinity) of t^(z-1)exp(-t) dt. 3949 Adapted from: Numerical Recipies in C. Can handle multiple dims ... but 3950 probably doesn't normally have to. 3954 coeff = [76.18009173, -86.50532033, 24.01409822, -1.231739516,
3955 0.120858003e-2, -0.536382e-5]
3958 tmp = tmp - (x+0.5)*N.log(tmp)
3960 for j
in range(len(coeff)):
3962 ser = ser + coeff[j]/x
3963 return -tmp + N.log(2.50662827465*ser)
3968 Returns the incomplete beta function: 3970 I-sub-x(a,b) = 1/B(a,b)*(Integral(0,x) of t^(a-1)(1-t)^(b-1) dt) 3972 where a,b>0 and B(a,b) = G(a)*G(b)/(G(a+b)) where G(a) is the gamma 3973 function of a. The continued fraction formulation is implemented 3974 here, using the betacf function. (Adapted from: Numerical Recipies in 3975 C.) Can handle multiple dimensions. 3977 Usage: abetai(a,b,x,verbose=1) 3980 if type(a) == N.ndarray:
3981 if asum(N.less(x,0)+N.greater(x,1)) <> 0:
3982 raise ValueError,
'Bad x in abetai' 3983 x = N.where(N.equal(x,0),TINY,x)
3984 x = N.where(N.equal(x,1.0),1-TINY,x)
3986 bt = N.where(N.equal(x,0)+N.equal(x,1), 0, -1)
3990 exponents = N.where(N.less(exponents,-740),-740,exponents)
3991 bt = N.exp(exponents)
3992 if type(x) == N.ndarray:
3993 ans = N.where(N.less(x,(a+1)/(a+b+2.0)),
3994 bt*
abetacf(a,b,x,verbose)/float(a),
3995 1.0-bt*
abetacf(b,a,1.0-x,verbose)/float(b))
3997 if x<(a+1)/(a+b+2.0):
3998 ans = bt*
abetacf(a,b,x,verbose)/float(a)
4000 ans = 1.0-bt*
abetacf(b,a,1.0-x,verbose)/float(b)
4008 import LinearAlgebra, operator
4013 Calculates a linear model fit ... anova/ancova/lin-regress/t-test/etc. Taken 4015 Peterson et al. Statistical limitations in functional neuroimaging 4016 I. Non-inferential methods and statistical models. Phil Trans Royal Soc 4017 Lond B 354: 1239-1260. 4019 Usage: aglm(data,para) 4020 Returns: statistic, p-value ??? 4022 if len(para) <> len(data):
4023 print "data and para must be same length in aglm" 4026 p = pstat.aunique(para)
4027 x = N.zeros((n,len(p)))
4028 for l
in range(len(p)):
4029 x[:,l] = N.equal(para,p[l])
4030 b = N.dot(N.dot(LA.inv(N.dot(N.transpose(x),x)),
4033 diffs = (data - N.dot(x,b))
4034 s_sq = 1./(n-len(p)) * N.dot(N.transpose(diffs), diffs)
4040 t = N.dot(c,b) / N.sqrt(s_sq*fact)
4041 probs =
abetai(0.5*df,0.5,float(df)/(df+t*t))
4047 Performs a 1-way ANOVA, returning an F-value and probability given 4048 any number of groups. From Heiman, pp.394-7. 4050 Usage: aF_oneway (*args) where *args is 2 or more arrays, one per 4052 Returns: f-value, probability 4059 tmp = map(N.array,args)
4060 means = map(amean,tmp)
4061 vars = map(avar,tmp)
4063 alldata = N.concatenate(args)
4073 msb = ssbn/float(dfbn)
4074 msw = sswn/float(dfwn)
4076 prob =
fprob(dfbn,dfwn,f)
4082 Returns an F-statistic given the following: 4083 ER = error associated with the null hypothesis (the Restricted model) 4084 EF = error associated with the alternate hypothesis (the Full model) 4085 dfR = degrees of freedom the Restricted model 4086 dfF = degrees of freedom associated with the Restricted model 4088 return ((ER-EF)/float(dfR-dfF) / (EF/float(dfF)))
4092 Enum = round(Enum,3)
4093 Eden = round(Eden,3)
4094 dfnum = round(Enum,3)
4095 dfden = round(dfden,3)
4097 prob = round(prob,3)
4099 if prob < 0.001: suffix =
' ***' 4100 elif prob < 0.01: suffix =
' **' 4101 elif prob < 0.05: suffix =
' *' 4102 title = [[
'EF/ER',
'DF',
'Mean Square',
'F-value',
'prob',
'']]
4103 lofl = title+[[Enum, dfnum, round(Enum/float(dfnum),3), f, prob, suffix],
4104 [Eden, dfden, round(Eden/float(dfden),3),
'',
'',
'']]
4111 Returns an F-statistic given the following: 4112 ER = error associated with the null hypothesis (the Restricted model) 4113 EF = error associated with the alternate hypothesis (the Full model) 4114 dfR = degrees of freedom the Restricted model 4115 dfF = degrees of freedom associated with the Restricted model 4116 where ER and EF are matrices from a multivariate F calculation. 4118 if type(ER)
in [IntType, FloatType]:
4119 ER = N.array([[ER]])
4120 if type(EF)
in [IntType, FloatType]:
4121 EF = N.array([[EF]])
4122 n_um = (LA.det(ER) - LA.det(EF)) / float(dfnum)
4123 d_en = LA.det(EF) / float(dfden)
4134 Returns: array shape of a, with -1 where a<0 and +1 where a>=0 4137 if ((type(a) == type(1.4))
or (type(a) == type(1))):
4138 return a-a-N.less(a,0)+N.greater(a,0)
4140 return N.zeros(N.shape(a))-N.less(a,0)+N.greater(a,0)
4143 def asum (a, dimension=None,keepdims=0):
4145 An alternative to the Numeric.add.reduce function, which allows one to 4146 (1) collapse over multiple dimensions at once, and/or (2) to retain 4147 all dimensions in the original array (squashing one down to size. 4148 Dimension can equal None (ravel array first), an integer (the 4149 dimension over which to operate), or a sequence (operate over multiple 4150 dimensions). If keepdims=1, the resulting array will have as many 4151 dimensions as the input array. 4153 Usage: asum(a, dimension=None, keepdims=0) 4154 Returns: array summed along 'dimension'(s), same _number_ of dims if keepdims=1 4156 if type(a) == N.ndarray
and a.dtype
in [N.int_, N.short, N.ubyte]:
4157 a = a.astype(N.float_)
4158 if dimension ==
None:
4159 s = N.sum(N.ravel(a))
4160 elif type(dimension)
in [IntType,FloatType]:
4161 s = N.add.reduce(a, dimension)
4165 s = N.reshape(s,shp)
4167 dims = list(dimension)
4172 s = N.add.reduce(s,dim)
4177 s = N.reshape(s,shp)
4183 Returns an array consisting of the cumulative sum of the items in the 4184 passed array. Dimension can equal None (ravel array first), an 4185 integer (the dimension over which to operate), or a sequence (operate 4186 over multiple dimensions, but this last one just barely makes sense). 4188 Usage: acumsum(a,dimension=None) 4190 if dimension ==
None:
4193 if type(dimension)
in [ListType, TupleType, N.ndarray]:
4194 dimension = list(dimension)
4198 a = N.add.accumulate(a,d)
4201 return N.add.accumulate(a,dimension)
4204 def ass(inarray, dimension=None, keepdims=0):
4206 Squares each value in the passed array, adds these squares & returns 4207 the result. Unfortunate function name. :-) Defaults to ALL values in 4208 the array. Dimension can equal None (ravel array first), an integer 4209 (the dimension over which to operate), or a sequence (operate over 4210 multiple dimensions). Set keepdims=1 to maintain the original number 4213 Usage: ass(inarray, dimension=None, keepdims=0) 4214 Returns: sum-along-'dimension' for (inarray*inarray) 4216 if dimension ==
None:
4217 inarray = N.ravel(inarray)
4219 return asum(inarray*inarray,dimension,keepdims)
4222 def asummult (array1,array2,dimension=None,keepdims=0):
4224 Multiplies elements in array1 and array2, element by element, and 4225 returns the sum (along 'dimension') of all resulting multiplications. 4226 Dimension can equal None (ravel array first), an integer (the 4227 dimension over which to operate), or a sequence (operate over multiple 4228 dimensions). A trivial function, but included for completeness. 4230 Usage: asummult(array1,array2,dimension=None,keepdims=0) 4232 if dimension ==
None:
4233 array1 = N.ravel(array1)
4234 array2 = N.ravel(array2)
4236 return asum(array1*array2,dimension,keepdims)
4241 Adds the values in the passed array, squares that sum, and returns the 4242 result. Dimension can equal None (ravel array first), an integer (the 4243 dimension over which to operate), or a sequence (operate over multiple 4244 dimensions). If keepdims=1, the returned array will have the same 4245 NUMBER of dimensions as the original. 4247 Usage: asquare_of_sums(inarray, dimension=None, keepdims=0) 4248 Returns: the square of the sum over dim(s) in dimension 4250 if dimension ==
None:
4251 inarray = N.ravel(inarray)
4253 s =
asum(inarray,dimension,keepdims)
4254 if type(s) == N.ndarray:
4255 return s.astype(N.float_)*s
4262 Takes pairwise differences of the values in arrays a and b, squares 4263 these differences, and returns the sum of these squares. Dimension 4264 can equal None (ravel array first), an integer (the dimension over 4265 which to operate), or a sequence (operate over multiple dimensions). 4266 keepdims=1 means the return shape = len(a.shape) = len(b.shape) 4268 Usage: asumdiffsquared(a,b) 4269 Returns: sum[ravel(a-b)**2] 4271 if dimension ==
None:
4272 inarray = N.ravel(a)
4274 return asum((a-b)**2,dimension,keepdims)
4279 Shellsort algorithm. Sorts a 1D-array. 4281 Usage: ashellsort(inarray) 4282 Returns: sorted-inarray, sorting-index-vector (for original array) 4289 for i
in range(gap,n):
4290 for j
in range(i-gap,-1,-gap):
4291 while j>=0
and svec[j]>svec[j+gap]:
4293 svec[j] = svec[j+gap]
4296 ivec[j] = ivec[j+gap]
4305 Ranks the data in inarray, dealing with ties appropritely. Assumes 4306 a 1D inarray. Adapted from Gary Perlman's |Stat ranksort. 4308 Usage: arankdata(inarray) 4309 Returns: array of length equal to inarray, containing rank scores 4315 newarray = N.zeros(n,N.float_)
4317 sumranks = sumranks + i
4318 dupcount = dupcount + 1
4319 if i==n-1
or svec[i] <> svec[i+1]:
4320 averank = sumranks / float(dupcount) + 1
4321 for j
in range(i-dupcount+1,i+1):
4322 newarray[ivec[j]] = averank
4330 Returns a binary vector, 1=within-subject factor, 0=between. Input 4331 equals the entire data array (i.e., column 1=random factor, last 4332 column = measured values. 4334 Usage: afindwithin(data) data in |Stat format 4336 numfact = len(data[0])-2
4337 withinvec = [0]*numfact
4338 for col
in range(1,numfact+1):
4339 rows = pstat.linexand(data,col,pstat.unique(pstat.colex(data,1))[0])
4340 if len(pstat.unique(pstat.colex(rows,0))) < len(rows):
4341 withinvec[col-1] = 1
4352 geometricmean = Dispatch ( (lgeometricmean, (ListType, TupleType)),
4353 (ageometricmean, (N.ndarray,)) )
4354 harmonicmean = Dispatch ( (lharmonicmean, (ListType, TupleType)),
4355 (aharmonicmean, (N.ndarray,)) )
4356 mean = Dispatch ( (lmean, (ListType, TupleType)),
4357 (amean, (N.ndarray,)) )
4358 median = Dispatch ( (lmedian, (ListType, TupleType)),
4359 (amedian, (N.ndarray,)) )
4360 medianscore = Dispatch ( (lmedianscore, (ListType, TupleType)),
4361 (amedianscore, (N.ndarray,)) )
4362 mode = Dispatch ( (lmode, (ListType, TupleType)),
4363 (amode, (N.ndarray,)) )
4364 tmean = Dispatch ( (atmean, (N.ndarray,)) )
4365 tvar = Dispatch ( (atvar, (N.ndarray,)) )
4366 tstdev = Dispatch ( (atstdev, (N.ndarray,)) )
4367 tsem = Dispatch ( (atsem, (N.ndarray,)) )
4370 moment = Dispatch ( (lmoment, (ListType, TupleType)),
4371 (amoment, (N.ndarray,)) )
4372 variation = Dispatch ( (lvariation, (ListType, TupleType)),
4373 (avariation, (N.ndarray,)) )
4374 skew = Dispatch ( (lskew, (ListType, TupleType)),
4375 (askew, (N.ndarray,)) )
4376 kurtosis = Dispatch ( (lkurtosis, (ListType, TupleType)),
4377 (akurtosis, (N.ndarray,)) )
4378 describe = Dispatch ( (ldescribe, (ListType, TupleType)),
4379 (adescribe, (N.ndarray,)) )
4383 skewtest = Dispatch ( (askewtest, (ListType, TupleType)),
4384 (askewtest, (N.ndarray,)) )
4385 kurtosistest = Dispatch ( (akurtosistest, (ListType, TupleType)),
4386 (akurtosistest, (N.ndarray,)) )
4387 normaltest = Dispatch ( (anormaltest, (ListType, TupleType)),
4388 (anormaltest, (N.ndarray,)) )
4391 itemfreq = Dispatch ( (litemfreq, (ListType, TupleType)),
4392 (aitemfreq, (N.ndarray,)) )
4393 scoreatpercentile = Dispatch ( (lscoreatpercentile, (ListType, TupleType)),
4394 (ascoreatpercentile, (N.ndarray,)) )
4395 percentileofscore = Dispatch ( (lpercentileofscore, (ListType, TupleType)),
4396 (apercentileofscore, (N.ndarray,)) )
4397 histogram = Dispatch ( (lhistogram, (ListType, TupleType)),
4398 (ahistogram, (N.ndarray,)) )
4399 cumfreq = Dispatch ( (lcumfreq, (ListType, TupleType)),
4400 (acumfreq, (N.ndarray,)) )
4401 relfreq = Dispatch ( (lrelfreq, (ListType, TupleType)),
4402 (arelfreq, (N.ndarray,)) )
4405 obrientransform = Dispatch ( (lobrientransform, (ListType, TupleType)),
4406 (aobrientransform, (N.ndarray,)) )
4407 samplevar = Dispatch ( (lsamplevar, (ListType, TupleType)),
4408 (asamplevar, (N.ndarray,)) )
4409 samplestdev = Dispatch ( (lsamplestdev, (ListType, TupleType)),
4410 (asamplestdev, (N.ndarray,)) )
4411 signaltonoise =
Dispatch( (asignaltonoise, (N.ndarray,)),)
4412 var = Dispatch ( (lvar, (ListType, TupleType)),
4413 (avar, (N.ndarray,)) )
4414 stdev = Dispatch ( (lstdev, (ListType, TupleType)),
4415 (astdev, (N.ndarray,)) )
4416 sterr = Dispatch ( (lsterr, (ListType, TupleType)),
4417 (asterr, (N.ndarray,)) )
4418 sem = Dispatch ( (lsem, (ListType, TupleType)),
4419 (asem, (N.ndarray,)) )
4420 z = Dispatch ( (lz, (ListType, TupleType)),
4421 (az, (N.ndarray,)) )
4422 zs = Dispatch ( (lzs, (ListType, TupleType)),
4423 (azs, (N.ndarray,)) )
4427 trimboth = Dispatch ( (ltrimboth, (ListType, TupleType)),
4428 (atrimboth, (N.ndarray,)) )
4429 trim1 = Dispatch ( (ltrim1, (ListType, TupleType)),
4430 (atrim1, (N.ndarray,)) )
4433 paired = Dispatch ( (lpaired, (ListType, TupleType)),
4434 (apaired, (N.ndarray,)) )
4435 lincc = Dispatch ( (llincc, (ListType, TupleType)),
4436 (alincc, (N.ndarray,)) )
4437 pearsonr = Dispatch ( (lpearsonr, (ListType, TupleType)),
4438 (apearsonr, (N.ndarray,)) )
4439 spearmanr = Dispatch ( (lspearmanr, (ListType, TupleType)),
4440 (aspearmanr, (N.ndarray,)) )
4441 pointbiserialr = Dispatch ( (lpointbiserialr, (ListType, TupleType)),
4442 (apointbiserialr, (N.ndarray,)) )
4443 kendalltau = Dispatch ( (lkendalltau, (ListType, TupleType)),
4444 (akendalltau, (N.ndarray,)) )
4445 linregress = Dispatch ( (llinregress, (ListType, TupleType)),
4446 (alinregress, (N.ndarray,)) )
4449 ttest_1samp = Dispatch ( (lttest_1samp, (ListType, TupleType)),
4450 (attest_1samp, (N.ndarray,)) )
4451 ttest_ind = Dispatch ( (lttest_ind, (ListType, TupleType)),
4452 (attest_ind, (N.ndarray,)) )
4453 ttest_rel = Dispatch ( (lttest_rel, (ListType, TupleType)),
4454 (attest_rel, (N.ndarray,)) )
4455 chisquare = Dispatch ( (lchisquare, (ListType, TupleType)),
4456 (achisquare, (N.ndarray,)) )
4457 ks_2samp = Dispatch ( (lks_2samp, (ListType, TupleType)),
4458 (aks_2samp, (N.ndarray,)) )
4459 mannwhitneyu = Dispatch ( (lmannwhitneyu, (ListType, TupleType)),
4460 (amannwhitneyu, (N.ndarray,)) )
4461 tiecorrect = Dispatch ( (ltiecorrect, (ListType, TupleType)),
4462 (atiecorrect, (N.ndarray,)) )
4463 ranksums = Dispatch ( (lranksums, (ListType, TupleType)),
4464 (aranksums, (N.ndarray,)) )
4465 wilcoxont = Dispatch ( (lwilcoxont, (ListType, TupleType)),
4466 (awilcoxont, (N.ndarray,)) )
4467 kruskalwallish = Dispatch ( (lkruskalwallish, (ListType, TupleType)),
4468 (akruskalwallish, (N.ndarray,)) )
4469 friedmanchisquare = Dispatch ( (lfriedmanchisquare, (ListType, TupleType)),
4470 (afriedmanchisquare, (N.ndarray,)) )
4473 chisqprob = Dispatch ( (lchisqprob, (IntType, FloatType)),
4474 (achisqprob, (N.ndarray,)) )
4475 zprob = Dispatch ( (lzprob, (IntType, FloatType)),
4476 (azprob, (N.ndarray,)) )
4477 ksprob = Dispatch ( (lksprob, (IntType, FloatType)),
4478 (aksprob, (N.ndarray,)) )
4479 fprob = Dispatch ( (lfprob, (IntType, FloatType)),
4480 (afprob, (N.ndarray,)) )
4481 betacf = Dispatch ( (lbetacf, (IntType, FloatType)),
4482 (abetacf, (N.ndarray,)) )
4483 betai = Dispatch ( (lbetai, (IntType, FloatType)),
4484 (abetai, (N.ndarray,)) )
4485 erfcc = Dispatch ( (lerfcc, (IntType, FloatType)),
4486 (aerfcc, (N.ndarray,)) )
4487 gammln = Dispatch ( (lgammln, (IntType, FloatType)),
4488 (agammln, (N.ndarray,)) )
4491 F_oneway = Dispatch ( (lF_oneway, (ListType, TupleType)),
4492 (aF_oneway, (N.ndarray,)) )
4493 F_value = Dispatch ( (lF_value, (ListType, TupleType)),
4494 (aF_value, (N.ndarray,)) )
4497 incr = Dispatch ( (lincr, (ListType, TupleType, N.ndarray)), )
4498 sum = Dispatch ( (lsum, (ListType, TupleType)),
4499 (asum, (N.ndarray,)) )
4500 cumsum = Dispatch ( (lcumsum, (ListType, TupleType)),
4501 (acumsum, (N.ndarray,)) )
4502 ss = Dispatch ( (lss, (ListType, TupleType)),
4503 (ass, (N.ndarray,)) )
4504 summult = Dispatch ( (lsummult, (ListType, TupleType)),
4505 (asummult, (N.ndarray,)) )
4506 square_of_sums = Dispatch ( (lsquare_of_sums, (ListType, TupleType)),
4507 (asquare_of_sums, (N.ndarray,)) )
4508 sumdiffsquared = Dispatch ( (lsumdiffsquared, (ListType, TupleType)),
4509 (asumdiffsquared, (N.ndarray,)) )
4510 shellsort = Dispatch ( (lshellsort, (ListType, TupleType)),
4511 (ashellsort, (N.ndarray,)) )
4512 rankdata = Dispatch ( (lrankdata, (ListType, TupleType)),
4513 (arankdata, (N.ndarray,)) )
4514 findwithin = Dispatch ( (lfindwithin, (ListType, TupleType)),
4515 (afindwithin, (N.ndarray,)) )
def atstdev(a, limits=None, inclusive=(1, 1))
def amoment(a, moment=1, dimension=None)
AMOMENTS #############.
def apercentileofscore(inarray, score, histbins=10, defaultlimits=None)
def asign(a)
ASUPPORT FUNCTIONS ########.
def lsquare_of_sums(inlist)
def amasslinregress(args)
def F_value_multivariate(ER, EF, dfnum, dfden)
def lpaired(x, y)
CORRELATION FUNCTIONS ######.
def lpointbiserialr(x, y)
def atmean(a, limits=None, inclusive=(1, 1))
def akurtosis(a, dimension=None)
def acov(x, y, dimension=None, keepdims=0)
def avar(inarray, dimension=None, keepdims=0)
def abetai(a, b, x, verbose=1)
def ltrim1(l, proportiontocut, tail='right')
def atmin(a, lowerlimit=None, dimension=None, inclusive=1)
def asem(inarray, dimension=None, keepdims=0)
def atrim1(a, proportiontocut, tail='right')
def lharmonicmean(inlist)
def lrelfreq(inlist, numbins=10, defaultreallimits=None)
def atrimboth(a, proportiontocut)
def lfriedmanchisquare(args)
def attest_rel(a, b, dimension=None, printit=0, name1='Samp1', name2='Samp2', writemode='a')
def akurtosistest(a, dimension=None)
def avariation(a, dimension=None)
def ageometricmean(inarray, dimension=None, keepdims=0)
ACENTRAL TENDENCY ########.
def attest_ind(a, b, dimension=None, printit=0, name1='Samp1', name2='Samp2', writemode='a')
def acovariance(X)
ACORRELATION FUNCTIONS ######.
def afprob(dfnum, dfden, F)
def lmoment(inlist, moment=1)
MOMENTS #############.
def lF_oneway(lists)
ANOVA CALCULATIONS #######.
def __init__(self, tuples)
def acumfreq(a, numbins=10, defaultreallimits=None)
def apearsonr(x, y, verbose=1)
def abetacf(a, b, x, verbose=1)
def azmap(scores, compare, dimension=0)
def lpercentileofscore(inlist, score, histbins=10, defaultlimits=None)
def outputpairedstats(fname, writemode, name1, n1, m1, se1, min1, max1, name2, n2, m2, se2, min2, max2, statname, stat, prob)
def amedianscore(inarray, dimension=None)
obrientransform
VARIABILITY:
def lttest_rel(a, b, printit=0, name1='Sample1', name2='Sample2', writemode='a')
def atiecorrect(rankvals)
def lttest_ind(a, b, printit=0, name1='Samp1', name2='Samp2', writemode='a')
def asummult(array1, array2, dimension=None, keepdims=0)
DISPATCH CODE ##############.
def lmedian(inlist, numbins=1000)
def askew(a, dimension=None)
def atmax(a, upperlimit, dimension=None, inclusive=1)
def apointbiserialr(x, y)
def lsumdiffsquared(x, y)
def amedian(inarray, numbins=1000)
def ltrimboth(l, proportiontocut)
TRIMMING FUNCTIONS #######.
chisqprob
PROBABILITY CALCS:
def ahistogram(inarray, numbins=10, defaultlimits=None, printextras=1)
def asumdiffsquared(a, b, dimension=None, keepdims=0)
def aobrientransform(args)
AVARIABILITY FUNCTIONS #####.
def ass(inarray, dimension=None, keepdims=0)
def lchisquare(f_obs, f_exp=None)
def lfprob(dfnum, dfden, F)
def writecc(listoflists, file, writetype='w', extra=2)
SUPPORT FUNCTIONS #######.
def outputfstats(Enum, Eden, dfnum, dfden, f, prob)
def attest_1samp(a, popmean, printit=0, name='Sample', writemode='a')
AINFERENTIAL STATISTICS #####.
def lcov(x, y, keepdims=0)
def icc(x, y=None, verbose=0)
def lcumfreq(inlist, numbins=10, defaultreallimits=None)
def aharmonicmean(inarray, dimension=None, keepdims=0)
def atsem(a, limits=None, inclusive=(1, 1))
def anormaltest(a, dimension=None)
def aks_2samp(data1, data2)
def lscoreatpercentile(inlist, percent)
def asquare_of_sums(inarray, dimension=None, keepdims=0)
def lhistogram(inlist, numbins=10, defaultreallimits=None, printextras=0)
def asamplevar(inarray, dimension=None, keepdims=0)
def athreshold(a, threshmin=None, threshmax=None, newval=0)
ATRIMMING FUNCTIONS #######.
def achisquare(f_obs, f_exp=None)
def asterr(inarray, dimension=None, keepdims=0)
def ltiecorrect(rankvals)
def aF_value(ER, EF, dfR, dfF)
def lF_value(ER, EF, dfnum, dfden)
def litemfreq(inlist)
FREQUENCY STATS ##########.
def amean(inarray, dimension=None, keepdims=0)
def lobrientransform(args)
VARIABILITY FUNCTIONS ######.
def asamplestdev(inarray, dimension=None, keepdims=0)
def atvar(a, limits=None, inclusive=(1, 1))
def __call__(self, arg1, args, kw)
def lkruskalwallish(args)
def lsummult(list1, list2)
def askewtest(a, dimension=None)
NORMALITY TESTS ##########.
def afriedmanchisquare(args)
def lchisqprob(chisq, df)
PROBABILITY CALCULATIONS ####.
def acumsum(a, dimension=None)
def lks_2samp(data1, data2)
def arelfreq(a, numbins=10, defaultreallimits=None)
def adescribe(inarray, dimension=None)
def achisqprob(chisq, df)
APROBABILITY CALCULATIONS ####.
def astdev(inarray, dimension=None, keepdims=0)
def lttest_1samp(a, popmean, printit=0, name='Sample', writemode='a')
INFERENTIAL STATISTICS #####.
def akruskalwallish(args)
def asignaltonoise(instack, dimension=0)
def amode(a, dimension=None)
def lgeometricmean(inlist)
def asum(a, dimension=None, keepdims=0)
def aitemfreq(a)
AFREQUENCY FUNCTIONS #######.
def ascoreatpercentile(inarray, percent)