00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 """
00025 pstat.py module
00026
00027 #################################################
00028 ####### Written by: Gary Strangman ###########
00029 ####### Last modified: Dec 18, 2007 ###########
00030 #################################################
00031
00032 This module provides some useful list and array manipulation routines
00033 modeled after those found in the |Stat package by Gary Perlman, plus a
00034 number of other useful list/file manipulation functions. The list-based
00035 functions include:
00036
00037 abut (source,*args)
00038 simpleabut (source, addon)
00039 colex (listoflists,cnums)
00040 collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
00041 dm (listoflists,criterion)
00042 flat (l)
00043 linexand (listoflists,columnlist,valuelist)
00044 linexor (listoflists,columnlist,valuelist)
00045 linedelimited (inlist,delimiter)
00046 lineincols (inlist,colsize)
00047 lineincustcols (inlist,colsizes)
00048 list2string (inlist)
00049 makelol(inlist)
00050 makestr(x)
00051 printcc (lst,extra=2)
00052 printincols (listoflists,colsize)
00053 pl (listoflists)
00054 printl(listoflists)
00055 replace (lst,oldval,newval)
00056 recode (inlist,listmap,cols='all')
00057 remap (listoflists,criterion)
00058 roundlist (inlist,num_digits_to_round_floats_to)
00059 sortby(listoflists,sortcols)
00060 unique (inlist)
00061 duplicates(inlist)
00062 writedelimited (listoflists, delimiter, file, writetype='w')
00063
00064 Some of these functions have alternate versions which are defined only if
00065 Numeric (NumPy) can be imported. These functions are generally named as
00066 above, with an 'a' prefix.
00067
00068 aabut (source, *args)
00069 acolex (a,indices,axis=1)
00070 acollapse (a,keepcols,collapsecols,sterr=0,ns=0)
00071 adm (a,criterion)
00072 alinexand (a,columnlist,valuelist)
00073 alinexor (a,columnlist,valuelist)
00074 areplace (a,oldval,newval)
00075 arecode (a,listmap,col='all')
00076 arowcompare (row1, row2)
00077 arowsame (row1, row2)
00078 asortrows(a,axis=0)
00079 aunique(inarray)
00080 aduplicates(inarray)
00081
00082 Currently, the code is all but completely un-optimized. In many cases, the
00083 array versions of functions amount simply to aliases to built-in array
00084 functions/methods. Their inclusion here is for function name consistency.
00085 """
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108 import stats
00109 import string, copy
00110 from types import *
00111
00112 __version__ = 0.4
00113
00114
00115
00116
00117
00118
00119
00120 def abut (source,*args):
00121 """
00122 Like the |Stat abut command. It concatenates two lists side-by-side
00123 and returns the result. '2D' lists are also accomodated for either argument
00124 (source or addon). CAUTION: If one list is shorter, it will be repeated
00125 until it is as long as the longest list. If this behavior is not desired,
00126 use pstat.simpleabut().
00127
00128 Usage: abut(source, args) where args=any # of lists
00129 Returns: a list of lists as long as the LONGEST list past, source on the
00130 'left', lists in <args> attached consecutively on the 'right'
00131 """
00132
00133 if type(source) not in [ListType,TupleType]:
00134 source = [source]
00135 for addon in args:
00136 if type(addon) not in [ListType,TupleType]:
00137 addon = [addon]
00138 if len(addon) < len(source):
00139 if len(source) % len(addon) == 0:
00140 repeats = len(source)/len(addon)
00141 origadd = copy.deepcopy(addon)
00142 for i in range(repeats-1):
00143 addon = addon + origadd
00144 else:
00145 repeats = len(source)/len(addon)+1
00146 origadd = copy.deepcopy(addon)
00147 for i in range(repeats-1):
00148 addon = addon + origadd
00149 addon = addon[0:len(source)]
00150 elif len(source) < len(addon):
00151 if len(addon) % len(source) == 0:
00152 repeats = len(addon)/len(source)
00153 origsour = copy.deepcopy(source)
00154 for i in range(repeats-1):
00155 source = source + origsour
00156 else:
00157 repeats = len(addon)/len(source)+1
00158 origsour = copy.deepcopy(source)
00159 for i in range(repeats-1):
00160 source = source + origsour
00161 source = source[0:len(addon)]
00162
00163 source = simpleabut(source,addon)
00164 return source
00165
00166
00167 def simpleabut (source, addon):
00168 """
00169 Concatenates two lists as columns and returns the result. '2D' lists
00170 are also accomodated for either argument (source or addon). This DOES NOT
00171 repeat either list to make the 2 lists of equal length. Beware of list pairs
00172 with different lengths ... the resulting list will be the length of the
00173 FIRST list passed.
00174
00175 Usage: simpleabut(source,addon) where source, addon=list (or list-of-lists)
00176 Returns: a list of lists as long as source, with source on the 'left' and
00177 addon on the 'right'
00178 """
00179 if type(source) not in [ListType,TupleType]:
00180 source = [source]
00181 if type(addon) not in [ListType,TupleType]:
00182 addon = [addon]
00183 minlen = min(len(source),len(addon))
00184 list = copy.deepcopy(source)
00185 if type(source[0]) not in [ListType,TupleType]:
00186 if type(addon[0]) not in [ListType,TupleType]:
00187 for i in range(minlen):
00188 list[i] = [source[i]] + [addon[i]]
00189 else:
00190 for i in range(minlen):
00191 list[i] = [source[i]] + addon[i]
00192 else:
00193 if type(addon[0]) not in [ListType,TupleType]:
00194 for i in range(minlen):
00195 list[i] = source[i] + [addon[i]]
00196 else:
00197 for i in range(minlen):
00198 list[i] = source[i] + addon[i]
00199 source = list
00200 return source
00201
00202
00203 def colex (listoflists,cnums):
00204 """
00205 Extracts from listoflists the columns specified in the list 'cnums'
00206 (cnums can be an integer, a sequence of integers, or a string-expression that
00207 corresponds to a slice operation on the variable x ... e.g., 'x[3:]' will colex
00208 columns 3 onward from the listoflists).
00209
00210 Usage: colex (listoflists,cnums)
00211 Returns: a list-of-lists corresponding to the columns from listoflists
00212 specified by cnums, in the order the column numbers appear in cnums
00213 """
00214 global index
00215 column = 0
00216 if type(cnums) in [ListType,TupleType]:
00217 index = cnums[0]
00218 column = map(lambda x: x[index], listoflists)
00219 for col in cnums[1:]:
00220 index = col
00221 column = abut(column,map(lambda x: x[index], listoflists))
00222 elif type(cnums) == StringType:
00223 evalstring = 'map(lambda x: x'+cnums+', listoflists)'
00224 column = eval(evalstring)
00225 else:
00226 index = cnums
00227 column = map(lambda x: x[index], listoflists)
00228 return column
00229
00230
00231 def collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None):
00232 """
00233 Averages data in collapsecol, keeping all unique items in keepcols
00234 (using unique, which keeps unique LISTS of column numbers), retaining the
00235 unique sets of values in keepcols, the mean for each. Setting fcn1
00236 and/or fcn2 to point to a function rather than None (e.g., stats.sterr, len)
00237 will append those results (e.g., the sterr, N) after each calculated mean.
00238 cfcn is the collapse function to apply (defaults to mean, defined here in the
00239 pstat module to avoid circular imports with stats.py, but harmonicmean or
00240 others could be passed).
00241
00242 Usage: collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
00243 Returns: a list of lists with all unique permutations of entries appearing in
00244 columns ("conditions") specified by keepcols, abutted with the result of
00245 cfcn (if cfcn=None, defaults to the mean) of each column specified by
00246 collapsecols.
00247 """
00248 def collmean (inlist):
00249 s = 0
00250 for item in inlist:
00251 s = s + item
00252 return s/float(len(inlist))
00253
00254 if type(keepcols) not in [ListType,TupleType]:
00255 keepcols = [keepcols]
00256 if type(collapsecols) not in [ListType,TupleType]:
00257 collapsecols = [collapsecols]
00258 if cfcn == None:
00259 cfcn = collmean
00260 if keepcols == []:
00261 means = [0]*len(collapsecols)
00262 for i in range(len(collapsecols)):
00263 avgcol = colex(listoflists,collapsecols[i])
00264 means[i] = cfcn(avgcol)
00265 if fcn1:
00266 try:
00267 test = fcn1(avgcol)
00268 except:
00269 test = 'N/A'
00270 means[i] = [means[i], test]
00271 if fcn2:
00272 try:
00273 test = fcn2(avgcol)
00274 except:
00275 test = 'N/A'
00276 try:
00277 means[i] = means[i] + [len(avgcol)]
00278 except TypeError:
00279 means[i] = [means[i],len(avgcol)]
00280 return means
00281 else:
00282 values = colex(listoflists,keepcols)
00283 uniques = unique(values)
00284 uniques.sort()
00285 newlist = []
00286 if type(keepcols) not in [ListType,TupleType]: keepcols = [keepcols]
00287 for item in uniques:
00288 if type(item) not in [ListType,TupleType]: item =[item]
00289 tmprows = linexand(listoflists,keepcols,item)
00290 for col in collapsecols:
00291 avgcol = colex(tmprows,col)
00292 item.append(cfcn(avgcol))
00293 if fcn1 <> None:
00294 try:
00295 test = fcn1(avgcol)
00296 except:
00297 test = 'N/A'
00298 item.append(test)
00299 if fcn2 <> None:
00300 try:
00301 test = fcn2(avgcol)
00302 except:
00303 test = 'N/A'
00304 item.append(test)
00305 newlist.append(item)
00306 return newlist
00307
00308
00309 def dm (listoflists,criterion):
00310 """
00311 Returns rows from the passed list of lists that meet the criteria in
00312 the passed criterion expression (a string as a function of x; e.g., 'x[3]>=9'
00313 will return all rows where the 4th column>=9 and "x[2]=='N'" will return rows
00314 with column 2 equal to the string 'N').
00315
00316 Usage: dm (listoflists, criterion)
00317 Returns: rows from listoflists that meet the specified criterion.
00318 """
00319 function = 'filter(lambda x: '+criterion+',listoflists)'
00320 lines = eval(function)
00321 return lines
00322
00323
00324 def flat(l):
00325 """
00326 Returns the flattened version of a '2D' list. List-correlate to the a.ravel()()
00327 method of NumPy arrays.
00328
00329 Usage: flat(l)
00330 """
00331 newl = []
00332 for i in range(len(l)):
00333 for j in range(len(l[i])):
00334 newl.append(l[i][j])
00335 return newl
00336
00337
00338 def linexand (listoflists,columnlist,valuelist):
00339 """
00340 Returns the rows of a list of lists where col (from columnlist) = val
00341 (from valuelist) for EVERY pair of values (columnlist[i],valuelists[i]).
00342 len(columnlist) must equal len(valuelist).
00343
00344 Usage: linexand (listoflists,columnlist,valuelist)
00345 Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ALL i
00346 """
00347 if type(columnlist) not in [ListType,TupleType]:
00348 columnlist = [columnlist]
00349 if type(valuelist) not in [ListType,TupleType]:
00350 valuelist = [valuelist]
00351 criterion = ''
00352 for i in range(len(columnlist)):
00353 if type(valuelist[i])==StringType:
00354 critval = '\'' + valuelist[i] + '\''
00355 else:
00356 critval = str(valuelist[i])
00357 criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' and'
00358 criterion = criterion[0:-3]
00359 function = 'filter(lambda x: '+criterion+',listoflists)'
00360 lines = eval(function)
00361 return lines
00362
00363
00364 def linexor (listoflists,columnlist,valuelist):
00365 """
00366 Returns the rows of a list of lists where col (from columnlist) = val
00367 (from valuelist) for ANY pair of values (colunmlist[i],valuelist[i[).
00368 One value is required for each column in columnlist. If only one value
00369 exists for columnlist but multiple values appear in valuelist, the
00370 valuelist values are all assumed to pertain to the same column.
00371
00372 Usage: linexor (listoflists,columnlist,valuelist)
00373 Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ANY i
00374 """
00375 if type(columnlist) not in [ListType,TupleType]:
00376 columnlist = [columnlist]
00377 if type(valuelist) not in [ListType,TupleType]:
00378 valuelist = [valuelist]
00379 criterion = ''
00380 if len(columnlist) == 1 and len(valuelist) > 1:
00381 columnlist = columnlist*len(valuelist)
00382 for i in range(len(columnlist)):
00383 if type(valuelist[i])==StringType:
00384 critval = '\'' + valuelist[i] + '\''
00385 else:
00386 critval = str(valuelist[i])
00387 criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' or'
00388 criterion = criterion[0:-2]
00389 function = 'filter(lambda x: '+criterion+',listoflists)'
00390 lines = eval(function)
00391 return lines
00392
00393
00394 def linedelimited (inlist,delimiter):
00395 """
00396 Returns a string composed of elements in inlist, with each element
00397 separated by 'delimiter.' Used by function writedelimited. Use '\t'
00398 for tab-delimiting.
00399
00400 Usage: linedelimited (inlist,delimiter)
00401 """
00402 outstr = ''
00403 for item in inlist:
00404 if type(item) <> StringType:
00405 item = str(item)
00406 outstr = outstr + item + delimiter
00407 outstr = outstr[0:-1]
00408 return outstr
00409
00410
00411 def lineincols (inlist,colsize):
00412 """
00413 Returns a string composed of elements in inlist, with each element
00414 right-aligned in columns of (fixed) colsize.
00415
00416 Usage: lineincols (inlist,colsize) where colsize is an integer
00417 """
00418 outstr = ''
00419 for item in inlist:
00420 if type(item) <> StringType:
00421 item = str(item)
00422 size = len(item)
00423 if size <= colsize:
00424 for i in range(colsize-size):
00425 outstr = outstr + ' '
00426 outstr = outstr + item
00427 else:
00428 outstr = outstr + item[0:colsize+1]
00429 return outstr
00430
00431
00432 def lineincustcols (inlist,colsizes):
00433 """
00434 Returns a string composed of elements in inlist, with each element
00435 right-aligned in a column of width specified by a sequence colsizes. The
00436 length of colsizes must be greater than or equal to the number of columns
00437 in inlist.
00438
00439 Usage: lineincustcols (inlist,colsizes)
00440 Returns: formatted string created from inlist
00441 """
00442 outstr = ''
00443 for i in range(len(inlist)):
00444 if type(inlist[i]) <> StringType:
00445 item = str(inlist[i])
00446 else:
00447 item = inlist[i]
00448 size = len(item)
00449 if size <= colsizes[i]:
00450 for j in range(colsizes[i]-size):
00451 outstr = outstr + ' '
00452 outstr = outstr + item
00453 else:
00454 outstr = outstr + item[0:colsizes[i]+1]
00455 return outstr
00456
00457
00458 def list2string (inlist,delimit=' '):
00459 """
00460 Converts a 1D list to a single long string for file output, using
00461 the string.join function.
00462
00463 Usage: list2string (inlist,delimit=' ')
00464 Returns: the string created from inlist
00465 """
00466 stringlist = map(makestr,inlist)
00467 return string.join(stringlist,delimit)
00468
00469
00470 def makelol(inlist):
00471 """
00472 Converts a 1D list to a 2D list (i.e., a list-of-lists). Useful when you
00473 want to use put() to write a 1D list one item per line in the file.
00474
00475 Usage: makelol(inlist)
00476 Returns: if l = [1,2,'hi'] then returns [[1],[2],['hi']] etc.
00477 """
00478 x = []
00479 for item in inlist:
00480 x.append([item])
00481 return x
00482
00483
00484 def makestr (x):
00485 if type(x) <> StringType:
00486 x = str(x)
00487 return x
00488
00489
00490 def printcc (lst,extra=2):
00491 """
00492 Prints a list of lists in columns, customized by the max size of items
00493 within the columns (max size of items in col, plus 'extra' number of spaces).
00494 Use 'dashes' or '\\n' in the list-of-lists to print dashes or blank lines,
00495 respectively.
00496
00497 Usage: printcc (lst,extra=2)
00498 Returns: None
00499 """
00500 if type(lst[0]) not in [ListType,TupleType]:
00501 lst = [lst]
00502 rowstokill = []
00503 list2print = copy.deepcopy(lst)
00504 for i in range(len(lst)):
00505 if lst[i] == ['\n'] or lst[i]=='\n' or lst[i]=='dashes' or lst[i]=='' or lst[i]==['']:
00506 rowstokill = rowstokill + [i]
00507 rowstokill.reverse()
00508 for row in rowstokill:
00509 del list2print[row]
00510 maxsize = [0]*len(list2print[0])
00511 for col in range(len(list2print[0])):
00512 items = colex(list2print,col)
00513 items = map(makestr,items)
00514 maxsize[col] = max(map(len,items)) + extra
00515 for row in lst:
00516 if row == ['\n'] or row == '\n' or row == '' or row == ['']:
00517 print
00518 elif row == ['dashes'] or row == 'dashes':
00519 dashes = [0]*len(maxsize)
00520 for j in range(len(maxsize)):
00521 dashes[j] = '-'*(maxsize[j]-2)
00522 print lineincustcols(dashes,maxsize)
00523 else:
00524 print lineincustcols(row,maxsize)
00525 return None
00526
00527
00528 def printincols (listoflists,colsize):
00529 """
00530 Prints a list of lists in columns of (fixed) colsize width, where
00531 colsize is an integer.
00532
00533 Usage: printincols (listoflists,colsize)
00534 Returns: None
00535 """
00536 for row in listoflists:
00537 print lineincols(row,colsize)
00538 return None
00539
00540
00541 def pl (listoflists):
00542 """
00543 Prints a list of lists, 1 list (row) at a time.
00544
00545 Usage: pl(listoflists)
00546 Returns: None
00547 """
00548 for row in listoflists:
00549 if row[-1] == '\n':
00550 print row,
00551 else:
00552 print row
00553 return None
00554
00555
00556 def printl(listoflists):
00557 """Alias for pl."""
00558 pl(listoflists)
00559 return
00560
00561
00562 def replace (inlst,oldval,newval):
00563 """
00564 Replaces all occurrences of 'oldval' with 'newval', recursively.
00565
00566 Usage: replace (inlst,oldval,newval)
00567 """
00568 lst = inlst*1
00569 for i in range(len(lst)):
00570 if type(lst[i]) not in [ListType,TupleType]:
00571 if lst[i]==oldval: lst[i]=newval
00572 else:
00573 lst[i] = replace(lst[i],oldval,newval)
00574 return lst
00575
00576
00577 def recode (inlist,listmap,cols=None):
00578 """
00579 Changes the values in a list to a new set of values (useful when
00580 you need to recode data from (e.g.) strings to numbers. cols defaults
00581 to None (meaning all columns are recoded).
00582
00583 Usage: recode (inlist,listmap,cols=None) cols=recode cols, listmap=2D list
00584 Returns: inlist with the appropriate values replaced with new ones
00585 """
00586 lst = copy.deepcopy(inlist)
00587 if cols != None:
00588 if type(cols) not in [ListType,TupleType]:
00589 cols = [cols]
00590 for col in cols:
00591 for row in range(len(lst)):
00592 try:
00593 idx = colex(listmap,0).index(lst[row][col])
00594 lst[row][col] = listmap[idx][1]
00595 except ValueError:
00596 pass
00597 else:
00598 for row in range(len(lst)):
00599 for col in range(len(lst)):
00600 try:
00601 idx = colex(listmap,0).index(lst[row][col])
00602 lst[row][col] = listmap[idx][1]
00603 except ValueError:
00604 pass
00605 return lst
00606
00607
00608 def remap (listoflists,criterion):
00609 """
00610 Remaps values in a given column of a 2D list (listoflists). This requires
00611 a criterion as a function of 'x' so that the result of the following is
00612 returned ... map(lambda x: 'criterion',listoflists).
00613
00614 Usage: remap(listoflists,criterion) criterion=string
00615 Returns: remapped version of listoflists
00616 """
00617 function = 'map(lambda x: '+criterion+',listoflists)'
00618 lines = eval(function)
00619 return lines
00620
00621
00622 def roundlist (inlist,digits):
00623 """
00624 Goes through each element in a 1D or 2D inlist, and applies the following
00625 function to all elements of FloatType ... round(element,digits).
00626
00627 Usage: roundlist(inlist,digits)
00628 Returns: list with rounded floats
00629 """
00630 if type(inlist[0]) in [IntType, FloatType]:
00631 inlist = [inlist]
00632 l = inlist*1
00633 for i in range(len(l)):
00634 for j in range(len(l[i])):
00635 if type(l[i][j])==FloatType:
00636 l[i][j] = round(l[i][j],digits)
00637 return l
00638
00639
00640 def sortby(listoflists,sortcols):
00641 """
00642 Sorts a list of lists on the column(s) specified in the sequence
00643 sortcols.
00644
00645 Usage: sortby(listoflists,sortcols)
00646 Returns: sorted list, unchanged column ordering
00647 """
00648 newlist = abut(colex(listoflists,sortcols),listoflists)
00649 newlist.sort()
00650 try:
00651 numcols = len(sortcols)
00652 except TypeError:
00653 numcols = 1
00654 crit = '[' + str(numcols) + ':]'
00655 newlist = colex(newlist,crit)
00656 return newlist
00657
00658
00659 def unique (inlist):
00660 """
00661 Returns all unique items in the passed list. If the a list-of-lists
00662 is passed, unique LISTS are found (i.e., items in the first dimension are
00663 compared).
00664
00665 Usage: unique (inlist)
00666 Returns: the unique elements (or rows) in inlist
00667 """
00668 uniques = []
00669 for item in inlist:
00670 if item not in uniques:
00671 uniques.append(item)
00672 return uniques
00673
00674 def duplicates(inlist):
00675 """
00676 Returns duplicate items in the FIRST dimension of the passed list.
00677
00678 Usage: duplicates (inlist)
00679 """
00680 dups = []
00681 for i in range(len(inlist)):
00682 if inlist[i] in inlist[i+1:]:
00683 dups.append(inlist[i])
00684 return dups
00685
00686
00687 def nonrepeats(inlist):
00688 """
00689 Returns items that are NOT duplicated in the first dim of the passed list.
00690
00691 Usage: nonrepeats (inlist)
00692 """
00693 nonrepeats = []
00694 for i in range(len(inlist)):
00695 if inlist.count(inlist[i]) == 1:
00696 nonrepeats.append(inlist[i])
00697 return nonrepeats
00698
00699
00700
00701
00702
00703
00704
00705
00706
00707
00708
00709
00710
00711
00712
00713
00714
00715
00716
00717 try:
00718 import numpy as N
00719
00720 def aabut (source, *args):
00721 """
00722 Like the |Stat abut command. It concatenates two arrays column-wise
00723 and returns the result. CAUTION: If one array is shorter, it will be
00724 repeated until it is as long as the other.
00725
00726 Usage: aabut (source, args) where args=any # of arrays
00727 Returns: an array as long as the LONGEST array past, source appearing on the
00728 'left', arrays in <args> attached on the 'right'.
00729 """
00730 if len(source.shape)==1:
00731 width = 1
00732 source = N.resize(source,[source.shape[0],width])
00733 else:
00734 width = source.shape[1]
00735 for addon in args:
00736 if len(addon.shape)==1:
00737 width = 1
00738 addon = N.resize(addon,[source.shape[0],width])
00739 else:
00740 width = source.shape[1]
00741 if len(addon) < len(source):
00742 addon = N.resize(addon,[source.shape[0],addon.shape[1]])
00743 elif len(source) < len(addon):
00744 source = N.resize(source,[addon.shape[0],source.shape[1]])
00745 source = N.concatenate((source,addon),1)
00746 return source
00747
00748
00749 def acolex (a,indices,axis=1):
00750 """
00751 Extracts specified indices (a list) from passed array, along passed
00752 axis (column extraction is default). BEWARE: A 1D array is presumed to be a
00753 column-array (and that the whole array will be returned as a column).
00754
00755 Usage: acolex (a,indices,axis=1)
00756 Returns: the columns of a specified by indices
00757 """
00758 if type(indices) not in [ListType,TupleType,N.ndarray]:
00759 indices = [indices]
00760 if len(N.shape(a)) == 1:
00761 cols = N.resize(a,[a.shape[0],1])
00762 else:
00763 cols = N.take(a,indices,axis)
00764 return cols
00765
00766
00767 def acollapse (a,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None):
00768 """
00769 Averages data in collapsecol, keeping all unique items in keepcols
00770 (using unique, which keeps unique LISTS of column numbers), retaining
00771 the unique sets of values in keepcols, the mean for each. If stderror or
00772 N of the mean are desired, set either or both parameters to 1.
00773
00774 Usage: acollapse (a,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
00775 Returns: unique 'conditions' specified by the contents of columns specified
00776 by keepcols, abutted with the mean(s) of column(s) specified by
00777 collapsecols
00778 """
00779 def acollmean (inarray):
00780 return N.sum(N.ravel(inarray))
00781
00782 if type(keepcols) not in [ListType,TupleType,N.ndarray]:
00783 keepcols = [keepcols]
00784 if type(collapsecols) not in [ListType,TupleType,N.ndarray]:
00785 collapsecols = [collapsecols]
00786
00787 if cfcn == None:
00788 cfcn = acollmean
00789 if keepcols == []:
00790 avgcol = acolex(a,collapsecols)
00791 means = N.sum(avgcol)/float(len(avgcol))
00792 if fcn1<>None:
00793 try:
00794 test = fcn1(avgcol)
00795 except:
00796 test = N.array(['N/A']*len(means))
00797 means = aabut(means,test)
00798 if fcn2<>None:
00799 try:
00800 test = fcn2(avgcol)
00801 except:
00802 test = N.array(['N/A']*len(means))
00803 means = aabut(means,test)
00804 return means
00805 else:
00806 if type(keepcols) not in [ListType,TupleType,N.ndarray]:
00807 keepcols = [keepcols]
00808 values = colex(a,keepcols)
00809 uniques = unique(values)
00810 uniques.sort()
00811 newlist = []
00812 for item in uniques:
00813 if type(item) not in [ListType,TupleType,N.ndarray]:
00814 item =[item]
00815 tmprows = alinexand(a,keepcols,item)
00816 for col in collapsecols:
00817 avgcol = acolex(tmprows,col)
00818 item.append(acollmean(avgcol))
00819 if fcn1<>None:
00820 try:
00821 test = fcn1(avgcol)
00822 except:
00823 test = 'N/A'
00824 item.append(test)
00825 if fcn2<>None:
00826 try:
00827 test = fcn2(avgcol)
00828 except:
00829 test = 'N/A'
00830 item.append(test)
00831 newlist.append(item)
00832 try:
00833 new_a = N.array(newlist)
00834 except TypeError:
00835 new_a = N.array(newlist,'O')
00836 return new_a
00837
00838
00839 def adm (a,criterion):
00840 """
00841 Returns rows from the passed list of lists that meet the criteria in
00842 the passed criterion expression (a string as a function of x).
00843
00844 Usage: adm (a,criterion) where criterion is like 'x[2]==37'
00845 """
00846 function = 'filter(lambda x: '+criterion+',a)'
00847 lines = eval(function)
00848 try:
00849 lines = N.array(lines)
00850 except:
00851 lines = N.array(lines,dtype='O')
00852 return lines
00853
00854
00855 def isstring(x):
00856 if type(x)==StringType:
00857 return 1
00858 else:
00859 return 0
00860
00861
00862 def alinexand (a,columnlist,valuelist):
00863 """
00864 Returns the rows of an array where col (from columnlist) = val
00865 (from valuelist). One value is required for each column in columnlist.
00866
00867 Usage: alinexand (a,columnlist,valuelist)
00868 Returns: the rows of a where columnlist[i]=valuelist[i] for ALL i
00869 """
00870 if type(columnlist) not in [ListType,TupleType,N.ndarray]:
00871 columnlist = [columnlist]
00872 if type(valuelist) not in [ListType,TupleType,N.ndarray]:
00873 valuelist = [valuelist]
00874 criterion = ''
00875 for i in range(len(columnlist)):
00876 if type(valuelist[i])==StringType:
00877 critval = '\'' + valuelist[i] + '\''
00878 else:
00879 critval = str(valuelist[i])
00880 criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' and'
00881 criterion = criterion[0:-3]
00882 return adm(a,criterion)
00883
00884
00885 def alinexor (a,columnlist,valuelist):
00886 """
00887 Returns the rows of an array where col (from columnlist) = val (from
00888 valuelist). One value is required for each column in columnlist.
00889 The exception is if either columnlist or valuelist has only 1 value,
00890 in which case that item will be expanded to match the length of the
00891 other list.
00892
00893 Usage: alinexor (a,columnlist,valuelist)
00894 Returns: the rows of a where columnlist[i]=valuelist[i] for ANY i
00895 """
00896 if type(columnlist) not in [ListType,TupleType,N.ndarray]:
00897 columnlist = [columnlist]
00898 if type(valuelist) not in [ListType,TupleType,N.ndarray]:
00899 valuelist = [valuelist]
00900 criterion = ''
00901 if len(columnlist) == 1 and len(valuelist) > 1:
00902 columnlist = columnlist*len(valuelist)
00903 elif len(valuelist) == 1 and len(columnlist) > 1:
00904 valuelist = valuelist*len(columnlist)
00905 for i in range(len(columnlist)):
00906 if type(valuelist[i])==StringType:
00907 critval = '\'' + valuelist[i] + '\''
00908 else:
00909 critval = str(valuelist[i])
00910 criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' or'
00911 criterion = criterion[0:-2]
00912 return adm(a,criterion)
00913
00914
00915 def areplace (a,oldval,newval):
00916 """
00917 Replaces all occurrences of oldval with newval in array a.
00918
00919 Usage: areplace(a,oldval,newval)
00920 """
00921 return N.where(a==oldval,newval,a)
00922
00923
00924 def arecode (a,listmap,col='all'):
00925 """
00926 Remaps the values in an array to a new set of values (useful when
00927 you need to recode data from (e.g.) strings to numbers as most stats
00928 packages require. Can work on SINGLE columns, or 'all' columns at once.
00929 @@@BROKEN 2007-11-26
00930
00931 Usage: arecode (a,listmap,col='all')
00932 Returns: a version of array a where listmap[i][0] = (instead) listmap[i][1]
00933 """
00934 ashape = a.shape
00935 if col == 'all':
00936 work = a.ravel()
00937 else:
00938 work = acolex(a,col)
00939 work = work.ravel()
00940 for pair in listmap:
00941 if type(pair[1]) == StringType or work.dtype.char=='O' or a.dtype.char=='O':
00942 work = N.array(work,dtype='O')
00943 a = N.array(a,dtype='O')
00944 for i in range(len(work)):
00945 if work[i]==pair[0]:
00946 work[i] = pair[1]
00947 if col == 'all':
00948 return N.reshape(work,ashape)
00949 else:
00950 return N.concatenate([a[:,0:col],work[:,N.newaxis],a[:,col+1:]],1)
00951 else:
00952 work = N.where(work==pair[0],pair[1],work)
00953 return N.concatenate([a[:,0:col],work[:,N.newaxis],a[:,col+1:]],1)
00954
00955
00956 def arowcompare(row1, row2):
00957 """
00958 Compares two rows from an array, regardless of whether it is an
00959 array of numbers or of python objects (which requires the cmp function).
00960 @@@PURPOSE? 2007-11-26
00961
00962 Usage: arowcompare(row1,row2)
00963 Returns: an array of equal length containing 1s where the two rows had
00964 identical elements and 0 otherwise
00965 """
00966 return
00967 if row1.dtype.char=='O' or row2.dtype=='O':
00968 cmpvect = N.logical_not(abs(N.array(map(cmp,row1,row2))))
00969 else:
00970 cmpvect = N.equal(row1,row2)
00971 return cmpvect
00972
00973
00974 def arowsame(row1, row2):
00975 """
00976 Compares two rows from an array, regardless of whether it is an
00977 array of numbers or of python objects (which requires the cmp function).
00978
00979 Usage: arowsame(row1,row2)
00980 Returns: 1 if the two rows are identical, 0 otherwise.
00981 """
00982 cmpval = N.alltrue(arowcompare(row1,row2))
00983 return cmpval
00984
00985
00986 def asortrows(a,axis=0):
00987 """
00988 Sorts an array "by rows". This differs from the Numeric.sort() function,
00989 which sorts elements WITHIN the given axis. Instead, this function keeps
00990 the elements along the given axis intact, but shifts them 'up or down'
00991 relative to one another.
00992
00993 Usage: asortrows(a,axis=0)
00994 Returns: sorted version of a
00995 """
00996 return N.sort(a,axis=axis,kind='mergesort')
00997
00998
00999 def aunique(inarray):
01000 """
01001 Returns unique items in the FIRST dimension of the passed array. Only
01002 works on arrays NOT including string items.
01003
01004 Usage: aunique (inarray)
01005 """
01006 uniques = N.array([inarray[0]])
01007 if len(uniques.shape) == 1:
01008 for item in inarray[1:]:
01009 if N.add.reduce(N.equal(uniques,item).ravel()) == 0:
01010 try:
01011 uniques = N.concatenate([uniques,N.array[N.newaxis,:]])
01012 except TypeError:
01013 uniques = N.concatenate([uniques,N.array([item])])
01014 else:
01015 if inarray.dtype.char != 'O':
01016 for item in inarray[1:]:
01017 if not N.sum(N.alltrue(N.equal(uniques,item),1)):
01018 try:
01019 uniques = N.concatenate( [uniques,item[N.newaxis,:]] )
01020 except TypeError:
01021 uniques = N.concatenate([uniques,N.array([item])])
01022 else:
01023 pass
01024 else:
01025 for item in inarray[1:]:
01026 newflag = 1
01027 for unq in uniques:
01028 test = N.sum(abs(N.array(map(cmp,item,unq))))
01029 if test == 0:
01030 newflag = 0
01031 break
01032 if newflag == 1:
01033 try:
01034 uniques = N.concatenate( [uniques,item[N.newaxis,:]] )
01035 except TypeError:
01036 uniques = N.concatenate([uniques,N.array([item])])
01037 return uniques
01038
01039
01040 def aduplicates(inarray):
01041 """
01042 Returns duplicate items in the FIRST dimension of the passed array. Only
01043 works on arrays NOT including string items.
01044
01045 Usage: aunique (inarray)
01046 """
01047 inarray = N.array(inarray)
01048 if len(inarray.shape) == 1:
01049 dups = []
01050 inarray = inarray.tolist()
01051 for i in range(len(inarray)):
01052 if inarray[i] in inarray[i+1:]:
01053 dups.append(inarray[i])
01054 dups = aunique(dups)
01055 else:
01056 dups = []
01057 aslist = inarray.tolist()
01058 for i in range(len(aslist)):
01059 if aslist[i] in aslist[i+1:]:
01060 dups.append(aslist[i])
01061 dups = unique(dups)
01062 dups = N.array(dups)
01063 return dups
01064
01065 except ImportError:
01066 pass