pstat.py
Go to the documentation of this file.
00001 # Copyright (c) 1999-2007 Gary Strangman; All Rights Reserved.
00002 #
00003 # Permission is hereby granted, free of charge, to any person obtaining a copy
00004 # of this software and associated documentation files (the "Software"), to deal
00005 # in the Software without restriction, including without limitation the rights
00006 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
00007 # copies of the Software, and to permit persons to whom the Software is
00008 # furnished to do so, subject to the following conditions:
00009 # 
00010 # The above copyright notice and this permission notice shall be included in
00011 # all copies or substantial portions of the Software.
00012 # 
00013 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
00014 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
00015 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
00016 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
00017 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
00018 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
00019 # THE SOFTWARE.
00020 #
00021 # Comments and/or additions are welcome (send e-mail to:
00022 # strang@nmr.mgh.harvard.edu).
00023 # 
00024 """
00025 pstat.py module
00026 
00027 #################################################
00028 #######  Written by:  Gary Strangman  ###########
00029 #######  Last modified:  Dec 18, 2007 ###########
00030 #################################################
00031 
00032 This module provides some useful list and array manipulation routines
00033 modeled after those found in the |Stat package by Gary Perlman, plus a
00034 number of other useful list/file manipulation functions.  The list-based
00035 functions include:
00036 
00037       abut (source,*args)
00038       simpleabut (source, addon)
00039       colex (listoflists,cnums)
00040       collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
00041       dm (listoflists,criterion)
00042       flat (l)
00043       linexand (listoflists,columnlist,valuelist)
00044       linexor (listoflists,columnlist,valuelist)
00045       linedelimited (inlist,delimiter)
00046       lineincols (inlist,colsize) 
00047       lineincustcols (inlist,colsizes)
00048       list2string (inlist)
00049       makelol(inlist)
00050       makestr(x)
00051       printcc (lst,extra=2)
00052       printincols (listoflists,colsize)
00053       pl (listoflists)
00054       printl(listoflists)
00055       replace (lst,oldval,newval)
00056       recode (inlist,listmap,cols='all')
00057       remap (listoflists,criterion)
00058       roundlist (inlist,num_digits_to_round_floats_to)
00059       sortby(listoflists,sortcols)
00060       unique (inlist)
00061       duplicates(inlist)
00062       writedelimited (listoflists, delimiter, file, writetype='w')
00063 
00064 Some of these functions have alternate versions which are defined only if
00065 Numeric (NumPy) can be imported.  These functions are generally named as
00066 above, with an 'a' prefix.
00067 
00068       aabut (source, *args)
00069       acolex (a,indices,axis=1)
00070       acollapse (a,keepcols,collapsecols,sterr=0,ns=0)
00071       adm (a,criterion)
00072       alinexand (a,columnlist,valuelist)
00073       alinexor (a,columnlist,valuelist)
00074       areplace (a,oldval,newval)
00075       arecode (a,listmap,col='all')
00076       arowcompare (row1, row2)
00077       arowsame (row1, row2)
00078       asortrows(a,axis=0)
00079       aunique(inarray)
00080       aduplicates(inarray)
00081 
00082 Currently, the code is all but completely un-optimized.  In many cases, the
00083 array versions of functions amount simply to aliases to built-in array
00084 functions/methods.  Their inclusion here is for function name consistency.
00085 """
00086 
00087 ## CHANGE LOG:
00088 ## ==========
00089 ## 07-11-26 ... edited to work with numpy
00090 ## 01-11-15 ... changed list2string() to accept a delimiter
00091 ## 01-06-29 ... converted exec()'s to eval()'s to make compatible with Py2.1
00092 ## 01-05-31 ... added duplicates() and aduplicates() functions
00093 ## 00-12-28 ... license made GPL, docstring and import requirements
00094 ## 99-11-01 ... changed version to 0.3
00095 ## 99-08-30 ... removed get, getstrings, put, aget, aput (into io.py)
00096 ## 03/27/99 ... added areplace function, made replace fcn recursive
00097 ## 12/31/98 ... added writefc function for ouput to fixed column sizes
00098 ## 12/07/98 ... fixed import problem (failed on collapse() fcn)
00099 ##              added __version__ variable (now 0.2)
00100 ## 12/05/98 ... updated doc-strings
00101 ##              added features to collapse() function
00102 ##              added flat() function for lists
00103 ##              fixed a broken asortrows() 
00104 ## 11/16/98 ... fixed minor bug in aput for 1D arrays
00105 ##
00106 ## 11/08/98 ... fixed aput to output large arrays correctly
00107 
00108 import stats  # required 3rd party module
00109 import string, copy
00110 from types import *
00111 
00112 __version__ = 0.4
00113 
00114 ###===========================  LIST FUNCTIONS  ==========================
00115 ###
00116 ### Here are the list functions, DEFINED FOR ALL SYSTEMS.
00117 ### Array functions (for NumPy-enabled computers) appear below.
00118 ###
00119 
00120 def abut (source,*args):
00121     """
00122 Like the |Stat abut command.  It concatenates two lists side-by-side
00123 and returns the result.  '2D' lists are also accomodated for either argument
00124 (source or addon).  CAUTION:  If one list is shorter, it will be repeated
00125 until it is as long as the longest list.  If this behavior is not desired,
00126 use pstat.simpleabut().
00127 
00128 Usage:   abut(source, args)   where args=any # of lists
00129 Returns: a list of lists as long as the LONGEST list past, source on the
00130          'left', lists in <args> attached consecutively on the 'right'
00131 """
00132 
00133     if type(source) not in [ListType,TupleType]:
00134         source = [source]
00135     for addon in args:
00136         if type(addon) not in [ListType,TupleType]:
00137             addon = [addon]
00138         if len(addon) < len(source):                # is source list longer?
00139             if len(source) % len(addon) == 0:        # are they integer multiples?
00140                 repeats = len(source)/len(addon)    # repeat addon n times
00141                 origadd = copy.deepcopy(addon)
00142                 for i in range(repeats-1):
00143                     addon = addon + origadd
00144             else:
00145                 repeats = len(source)/len(addon)+1  # repeat addon x times,
00146                 origadd = copy.deepcopy(addon)      #    x is NOT an integer
00147                 for i in range(repeats-1):
00148                     addon = addon + origadd
00149                     addon = addon[0:len(source)]
00150         elif len(source) < len(addon):                # is addon list longer?
00151             if len(addon) % len(source) == 0:        # are they integer multiples?
00152                 repeats = len(addon)/len(source)    # repeat source n times
00153                 origsour = copy.deepcopy(source)
00154                 for i in range(repeats-1):
00155                     source = source + origsour
00156             else:
00157                 repeats = len(addon)/len(source)+1  # repeat source x times,
00158                 origsour = copy.deepcopy(source)    #   x is NOT an integer
00159                 for i in range(repeats-1):
00160                     source = source + origsour
00161                 source = source[0:len(addon)]
00162 
00163         source = simpleabut(source,addon)
00164     return source
00165 
00166 
00167 def simpleabut (source, addon):
00168     """
00169 Concatenates two lists as columns and returns the result.  '2D' lists
00170 are also accomodated for either argument (source or addon).  This DOES NOT
00171 repeat either list to make the 2 lists of equal length.  Beware of list pairs
00172 with different lengths ... the resulting list will be the length of the
00173 FIRST list passed.
00174 
00175 Usage:   simpleabut(source,addon)  where source, addon=list (or list-of-lists)
00176 Returns: a list of lists as long as source, with source on the 'left' and
00177                  addon on the 'right'
00178 """
00179     if type(source) not in [ListType,TupleType]:
00180         source = [source]
00181     if type(addon) not in [ListType,TupleType]:
00182         addon = [addon]
00183     minlen = min(len(source),len(addon))
00184     list = copy.deepcopy(source)                # start abut process
00185     if type(source[0]) not in [ListType,TupleType]:
00186         if type(addon[0]) not in [ListType,TupleType]:
00187             for i in range(minlen):
00188                 list[i] = [source[i]] + [addon[i]]        # source/addon = column
00189         else:
00190             for i in range(minlen):
00191                 list[i] = [source[i]] + addon[i]        # addon=list-of-lists
00192     else:
00193         if type(addon[0]) not in [ListType,TupleType]:
00194             for i in range(minlen):
00195                 list[i] = source[i] + [addon[i]]        # source=list-of-lists
00196         else:
00197             for i in range(minlen):
00198                 list[i] = source[i] + addon[i]        # source/addon = list-of-lists
00199     source = list
00200     return source
00201 
00202 
00203 def colex (listoflists,cnums):
00204     """
00205 Extracts from listoflists the columns specified in the list 'cnums'
00206 (cnums can be an integer, a sequence of integers, or a string-expression that
00207 corresponds to a slice operation on the variable x ... e.g., 'x[3:]' will colex
00208 columns 3 onward from the listoflists).
00209 
00210 Usage:   colex (listoflists,cnums)
00211 Returns: a list-of-lists corresponding to the columns from listoflists
00212          specified by cnums, in the order the column numbers appear in cnums
00213 """
00214     global index
00215     column = 0
00216     if type(cnums) in [ListType,TupleType]:   # if multiple columns to get
00217         index = cnums[0]
00218         column = map(lambda x: x[index], listoflists)
00219         for col in cnums[1:]:
00220             index = col
00221             column = abut(column,map(lambda x: x[index], listoflists))
00222     elif type(cnums) == StringType:              # if an 'x[3:]' type expr.
00223         evalstring = 'map(lambda x: x'+cnums+', listoflists)'
00224         column = eval(evalstring)
00225     else:                                     # else it's just 1 col to get
00226         index = cnums
00227         column = map(lambda x: x[index], listoflists)
00228     return column
00229 
00230 
00231 def collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None):
00232      """
00233 Averages data in collapsecol, keeping all unique items in keepcols
00234 (using unique, which keeps unique LISTS of column numbers), retaining the
00235 unique sets of values in keepcols, the mean for each.  Setting fcn1
00236 and/or fcn2 to point to a function rather than None (e.g., stats.sterr, len)
00237 will append those results (e.g., the sterr, N) after each calculated mean.
00238 cfcn is the collapse function to apply (defaults to mean, defined here in the
00239 pstat module to avoid circular imports with stats.py, but harmonicmean or
00240 others could be passed).
00241 
00242 Usage:    collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
00243 Returns: a list of lists with all unique permutations of entries appearing in
00244      columns ("conditions") specified by keepcols, abutted with the result of
00245      cfcn (if cfcn=None, defaults to the mean) of each column specified by
00246      collapsecols.
00247 """
00248      def collmean (inlist):
00249          s = 0
00250          for item in inlist:
00251              s = s + item
00252          return s/float(len(inlist))
00253 
00254      if type(keepcols) not in [ListType,TupleType]:
00255          keepcols = [keepcols]
00256      if type(collapsecols) not in [ListType,TupleType]:
00257          collapsecols = [collapsecols]
00258      if cfcn == None:
00259          cfcn = collmean
00260      if keepcols == []:
00261          means = [0]*len(collapsecols)
00262          for i in range(len(collapsecols)):
00263              avgcol = colex(listoflists,collapsecols[i])
00264              means[i] = cfcn(avgcol)
00265              if fcn1:
00266                  try:
00267                      test = fcn1(avgcol)
00268                  except:
00269                      test = 'N/A'
00270                      means[i] = [means[i], test]
00271              if fcn2:
00272                  try:
00273                      test = fcn2(avgcol)
00274                  except:
00275                      test = 'N/A'
00276                  try:
00277                      means[i] = means[i] + [len(avgcol)]
00278                  except TypeError:
00279                      means[i] = [means[i],len(avgcol)]
00280          return means
00281      else:
00282          values = colex(listoflists,keepcols)
00283          uniques = unique(values)
00284          uniques.sort()
00285          newlist = []
00286          if type(keepcols) not in [ListType,TupleType]:  keepcols = [keepcols]
00287          for item in uniques:
00288              if type(item) not in [ListType,TupleType]:  item =[item]
00289              tmprows = linexand(listoflists,keepcols,item)
00290              for col in collapsecols:
00291                  avgcol = colex(tmprows,col)
00292                  item.append(cfcn(avgcol))
00293                  if fcn1 <> None:
00294                      try:
00295                          test = fcn1(avgcol)
00296                      except:
00297                          test = 'N/A'
00298                      item.append(test)
00299                  if fcn2 <> None:
00300                      try:
00301                          test = fcn2(avgcol)
00302                      except:
00303                          test = 'N/A'
00304                      item.append(test)
00305                  newlist.append(item)
00306          return newlist
00307 
00308 
00309 def dm (listoflists,criterion):
00310     """
00311 Returns rows from the passed list of lists that meet the criteria in
00312 the passed criterion expression (a string as a function of x; e.g., 'x[3]>=9'
00313 will return all rows where the 4th column>=9 and "x[2]=='N'" will return rows
00314 with column 2 equal to the string 'N').
00315 
00316 Usage:   dm (listoflists, criterion)
00317 Returns: rows from listoflists that meet the specified criterion.
00318 """
00319     function = 'filter(lambda x: '+criterion+',listoflists)'
00320     lines = eval(function)
00321     return lines
00322 
00323 
00324 def flat(l):
00325     """
00326 Returns the flattened version of a '2D' list.  List-correlate to the a.ravel()()
00327 method of NumPy arrays.
00328 
00329 Usage:    flat(l)
00330 """
00331     newl = []
00332     for i in range(len(l)):
00333         for j in range(len(l[i])):
00334             newl.append(l[i][j])
00335     return newl
00336 
00337 
00338 def linexand (listoflists,columnlist,valuelist):
00339     """
00340 Returns the rows of a list of lists where col (from columnlist) = val
00341 (from valuelist) for EVERY pair of values (columnlist[i],valuelists[i]).
00342 len(columnlist) must equal len(valuelist).
00343 
00344 Usage:   linexand (listoflists,columnlist,valuelist)
00345 Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ALL i
00346 """
00347     if type(columnlist) not in [ListType,TupleType]:
00348         columnlist = [columnlist]
00349     if type(valuelist) not in [ListType,TupleType]:
00350         valuelist = [valuelist]
00351     criterion = ''
00352     for i in range(len(columnlist)):
00353         if type(valuelist[i])==StringType:
00354             critval = '\'' + valuelist[i] + '\''
00355         else:
00356             critval = str(valuelist[i])
00357         criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' and'
00358     criterion = criterion[0:-3]         # remove the "and" after the last crit
00359     function = 'filter(lambda x: '+criterion+',listoflists)'
00360     lines = eval(function)
00361     return lines
00362 
00363 
00364 def linexor (listoflists,columnlist,valuelist):
00365     """
00366 Returns the rows of a list of lists where col (from columnlist) = val
00367 (from valuelist) for ANY pair of values (colunmlist[i],valuelist[i[).
00368 One value is required for each column in columnlist.  If only one value
00369 exists for columnlist but multiple values appear in valuelist, the
00370 valuelist values are all assumed to pertain to the same column.
00371 
00372 Usage:   linexor (listoflists,columnlist,valuelist)
00373 Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ANY i
00374 """
00375     if type(columnlist) not in [ListType,TupleType]:
00376         columnlist = [columnlist]
00377     if type(valuelist) not in [ListType,TupleType]:
00378         valuelist = [valuelist]
00379     criterion = ''
00380     if len(columnlist) == 1 and len(valuelist) > 1:
00381         columnlist = columnlist*len(valuelist)
00382     for i in range(len(columnlist)):          # build an exec string
00383         if type(valuelist[i])==StringType:
00384             critval = '\'' + valuelist[i] + '\''
00385         else:
00386             critval = str(valuelist[i])
00387         criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' or'
00388     criterion = criterion[0:-2]         # remove the "or" after the last crit
00389     function = 'filter(lambda x: '+criterion+',listoflists)'
00390     lines = eval(function)
00391     return lines
00392 
00393 
00394 def linedelimited (inlist,delimiter):
00395     """
00396 Returns a string composed of elements in inlist, with each element
00397 separated by 'delimiter.'  Used by function writedelimited.  Use '\t'
00398 for tab-delimiting.
00399 
00400 Usage:   linedelimited (inlist,delimiter)
00401 """
00402     outstr = ''
00403     for item in inlist:
00404         if type(item) <> StringType:
00405             item = str(item)
00406         outstr = outstr + item + delimiter
00407     outstr = outstr[0:-1]
00408     return outstr
00409 
00410 
00411 def lineincols (inlist,colsize):
00412     """
00413 Returns a string composed of elements in inlist, with each element
00414 right-aligned in columns of (fixed) colsize.
00415 
00416 Usage:   lineincols (inlist,colsize)   where colsize is an integer
00417 """
00418     outstr = ''
00419     for item in inlist:
00420         if type(item) <> StringType:
00421             item = str(item)
00422         size = len(item)
00423         if size <= colsize:
00424             for i in range(colsize-size):
00425                 outstr = outstr + ' '
00426             outstr = outstr + item
00427         else:
00428             outstr = outstr + item[0:colsize+1]
00429     return outstr
00430 
00431 
00432 def lineincustcols (inlist,colsizes):
00433     """
00434 Returns a string composed of elements in inlist, with each element
00435 right-aligned in a column of width specified by a sequence colsizes.  The
00436 length of colsizes must be greater than or equal to the number of columns
00437 in inlist.
00438 
00439 Usage:   lineincustcols (inlist,colsizes)
00440 Returns: formatted string created from inlist
00441 """
00442     outstr = ''
00443     for i in range(len(inlist)):
00444         if type(inlist[i]) <> StringType:
00445             item = str(inlist[i])
00446         else:
00447             item = inlist[i]
00448         size = len(item)
00449         if size <= colsizes[i]:
00450             for j in range(colsizes[i]-size):
00451                 outstr = outstr + ' '
00452             outstr = outstr + item
00453         else:
00454             outstr = outstr + item[0:colsizes[i]+1]
00455     return outstr
00456 
00457 
00458 def list2string (inlist,delimit=' '):
00459     """
00460 Converts a 1D list to a single long string for file output, using
00461 the string.join function.
00462 
00463 Usage:   list2string (inlist,delimit=' ')
00464 Returns: the string created from inlist
00465 """
00466     stringlist = map(makestr,inlist)
00467     return string.join(stringlist,delimit)
00468 
00469 
00470 def makelol(inlist):
00471     """
00472 Converts a 1D list to a 2D list (i.e., a list-of-lists).  Useful when you
00473 want to use put() to write a 1D list one item per line in the file.
00474 
00475 Usage:   makelol(inlist)
00476 Returns: if l = [1,2,'hi'] then returns [[1],[2],['hi']] etc.
00477 """
00478     x = []
00479     for item in inlist:
00480         x.append([item])
00481     return x
00482 
00483 
00484 def makestr (x):
00485     if type(x) <> StringType:
00486         x = str(x)
00487     return x
00488 
00489 
00490 def printcc (lst,extra=2):
00491     """
00492 Prints a list of lists in columns, customized by the max size of items
00493 within the columns (max size of items in col, plus 'extra' number of spaces).
00494 Use 'dashes' or '\\n' in the list-of-lists to print dashes or blank lines,
00495 respectively.
00496 
00497 Usage:   printcc (lst,extra=2)
00498 Returns: None
00499 """
00500     if type(lst[0]) not in [ListType,TupleType]:
00501         lst = [lst]
00502     rowstokill = []
00503     list2print = copy.deepcopy(lst)
00504     for i in range(len(lst)):
00505         if lst[i] == ['\n'] or lst[i]=='\n' or lst[i]=='dashes' or lst[i]=='' or lst[i]==['']:
00506             rowstokill = rowstokill + [i]
00507     rowstokill.reverse()   # delete blank rows from the end
00508     for row in rowstokill:
00509         del list2print[row]
00510     maxsize = [0]*len(list2print[0])
00511     for col in range(len(list2print[0])):
00512         items = colex(list2print,col)
00513         items = map(makestr,items)
00514         maxsize[col] = max(map(len,items)) + extra
00515     for row in lst:
00516         if row == ['\n'] or row == '\n' or row == '' or row == ['']:
00517             print
00518         elif row == ['dashes'] or row == 'dashes':
00519             dashes = [0]*len(maxsize)
00520             for j in range(len(maxsize)):
00521                 dashes[j] = '-'*(maxsize[j]-2)
00522             print lineincustcols(dashes,maxsize)
00523         else:
00524             print lineincustcols(row,maxsize)
00525     return None
00526 
00527 
00528 def printincols (listoflists,colsize):
00529     """
00530 Prints a list of lists in columns of (fixed) colsize width, where
00531 colsize is an integer.
00532 
00533 Usage:   printincols (listoflists,colsize)
00534 Returns: None
00535 """
00536     for row in listoflists:
00537         print lineincols(row,colsize)
00538     return None
00539 
00540 
00541 def pl (listoflists):
00542     """
00543 Prints a list of lists, 1 list (row) at a time.
00544 
00545 Usage:   pl(listoflists)
00546 Returns: None
00547 """
00548     for row in listoflists:
00549         if row[-1] == '\n':
00550             print row,
00551         else:
00552             print row
00553     return None
00554 
00555 
00556 def printl(listoflists):
00557     """Alias for pl."""
00558     pl(listoflists)
00559     return
00560 
00561 
00562 def replace (inlst,oldval,newval):
00563     """
00564 Replaces all occurrences of 'oldval' with 'newval', recursively.
00565 
00566 Usage:   replace (inlst,oldval,newval)
00567 """
00568     lst = inlst*1
00569     for i in range(len(lst)):
00570         if type(lst[i]) not in [ListType,TupleType]:
00571             if lst[i]==oldval: lst[i]=newval
00572         else:
00573             lst[i] = replace(lst[i],oldval,newval)
00574     return lst
00575 
00576 
00577 def recode (inlist,listmap,cols=None):
00578     """
00579 Changes the values in a list to a new set of values (useful when
00580 you need to recode data from (e.g.) strings to numbers.  cols defaults
00581 to None (meaning all columns are recoded).
00582 
00583 Usage:   recode (inlist,listmap,cols=None)  cols=recode cols, listmap=2D list
00584 Returns: inlist with the appropriate values replaced with new ones
00585 """
00586     lst = copy.deepcopy(inlist)
00587     if cols != None:
00588         if type(cols) not in [ListType,TupleType]:
00589             cols = [cols]
00590         for col in cols:
00591             for row in range(len(lst)):
00592                 try:
00593                     idx = colex(listmap,0).index(lst[row][col])
00594                     lst[row][col] = listmap[idx][1]
00595                 except ValueError:
00596                     pass
00597     else:
00598         for row in range(len(lst)):
00599             for col in range(len(lst)):
00600                 try:
00601                     idx = colex(listmap,0).index(lst[row][col])
00602                     lst[row][col] = listmap[idx][1]
00603                 except ValueError:
00604                     pass
00605     return lst
00606 
00607 
00608 def remap (listoflists,criterion):
00609     """
00610 Remaps values in a given column of a 2D list (listoflists).  This requires
00611 a criterion as a function of 'x' so that the result of the following is
00612 returned ... map(lambda x: 'criterion',listoflists).  
00613 
00614 Usage:   remap(listoflists,criterion)    criterion=string
00615 Returns: remapped version of listoflists
00616 """
00617     function = 'map(lambda x: '+criterion+',listoflists)'
00618     lines = eval(function)
00619     return lines
00620 
00621 
00622 def roundlist (inlist,digits):
00623     """
00624 Goes through each element in a 1D or 2D inlist, and applies the following
00625 function to all elements of FloatType ... round(element,digits).
00626 
00627 Usage:   roundlist(inlist,digits)
00628 Returns: list with rounded floats
00629 """
00630     if type(inlist[0]) in [IntType, FloatType]:
00631         inlist = [inlist]
00632     l = inlist*1
00633     for i in range(len(l)):
00634         for j in range(len(l[i])):
00635             if type(l[i][j])==FloatType:
00636                 l[i][j] = round(l[i][j],digits)
00637     return l
00638 
00639 
00640 def sortby(listoflists,sortcols):
00641     """
00642 Sorts a list of lists on the column(s) specified in the sequence
00643 sortcols.
00644 
00645 Usage:   sortby(listoflists,sortcols)
00646 Returns: sorted list, unchanged column ordering
00647 """
00648     newlist = abut(colex(listoflists,sortcols),listoflists)
00649     newlist.sort()
00650     try:
00651         numcols = len(sortcols)
00652     except TypeError:
00653         numcols = 1
00654     crit = '[' + str(numcols) + ':]'
00655     newlist = colex(newlist,crit)
00656     return newlist
00657 
00658 
00659 def unique (inlist):
00660     """
00661 Returns all unique items in the passed list.  If the a list-of-lists
00662 is passed, unique LISTS are found (i.e., items in the first dimension are
00663 compared).
00664 
00665 Usage:   unique (inlist)
00666 Returns: the unique elements (or rows) in inlist
00667 """
00668     uniques = []
00669     for item in inlist:
00670         if item not in uniques:
00671             uniques.append(item)
00672     return uniques
00673 
00674 def duplicates(inlist):
00675     """
00676 Returns duplicate items in the FIRST dimension of the passed list.
00677 
00678 Usage:   duplicates (inlist)
00679 """
00680     dups = []
00681     for i in range(len(inlist)):
00682         if inlist[i] in inlist[i+1:]:
00683             dups.append(inlist[i])
00684     return dups
00685 
00686 
00687 def nonrepeats(inlist):
00688     """
00689 Returns items that are NOT duplicated in the first dim of the passed list.
00690 
00691 Usage:   nonrepeats (inlist)
00692 """
00693     nonrepeats = []
00694     for i in range(len(inlist)):
00695         if inlist.count(inlist[i]) == 1:
00696             nonrepeats.append(inlist[i])
00697     return nonrepeats
00698 
00699 
00700 #===================   PSTAT ARRAY FUNCTIONS  =====================
00701 #===================   PSTAT ARRAY FUNCTIONS  =====================
00702 #===================   PSTAT ARRAY FUNCTIONS  =====================
00703 #===================   PSTAT ARRAY FUNCTIONS  =====================
00704 #===================   PSTAT ARRAY FUNCTIONS  =====================
00705 #===================   PSTAT ARRAY FUNCTIONS  =====================
00706 #===================   PSTAT ARRAY FUNCTIONS  =====================
00707 #===================   PSTAT ARRAY FUNCTIONS  =====================
00708 #===================   PSTAT ARRAY FUNCTIONS  =====================
00709 #===================   PSTAT ARRAY FUNCTIONS  =====================
00710 #===================   PSTAT ARRAY FUNCTIONS  =====================
00711 #===================   PSTAT ARRAY FUNCTIONS  =====================
00712 #===================   PSTAT ARRAY FUNCTIONS  =====================
00713 #===================   PSTAT ARRAY FUNCTIONS  =====================
00714 #===================   PSTAT ARRAY FUNCTIONS  =====================
00715 #===================   PSTAT ARRAY FUNCTIONS  =====================
00716 
00717 try:                         # DEFINE THESE *ONLY* IF numpy IS AVAILABLE
00718  import numpy as N
00719 
00720  def aabut (source, *args):
00721     """
00722 Like the |Stat abut command.  It concatenates two arrays column-wise
00723 and returns the result.  CAUTION:  If one array is shorter, it will be
00724 repeated until it is as long as the other.
00725 
00726 Usage:   aabut (source, args)    where args=any # of arrays
00727 Returns: an array as long as the LONGEST array past, source appearing on the
00728          'left', arrays in <args> attached on the 'right'.
00729 """
00730     if len(source.shape)==1:
00731         width = 1
00732         source = N.resize(source,[source.shape[0],width])
00733     else:
00734         width = source.shape[1]
00735     for addon in args:
00736         if len(addon.shape)==1:
00737             width = 1
00738             addon = N.resize(addon,[source.shape[0],width])
00739         else:
00740             width = source.shape[1]
00741         if len(addon) < len(source):
00742             addon = N.resize(addon,[source.shape[0],addon.shape[1]])
00743         elif len(source) < len(addon):
00744             source = N.resize(source,[addon.shape[0],source.shape[1]])
00745         source = N.concatenate((source,addon),1)
00746     return source
00747 
00748 
00749  def acolex (a,indices,axis=1):
00750     """
00751 Extracts specified indices (a list) from passed array, along passed
00752 axis (column extraction is default).  BEWARE: A 1D array is presumed to be a
00753 column-array (and that the whole array will be returned as a column).
00754 
00755 Usage:   acolex (a,indices,axis=1)
00756 Returns: the columns of a specified by indices
00757 """
00758     if type(indices) not in [ListType,TupleType,N.ndarray]:
00759         indices = [indices]
00760     if len(N.shape(a)) == 1:
00761         cols = N.resize(a,[a.shape[0],1])
00762     else:
00763         cols = N.take(a,indices,axis)
00764     return cols
00765 
00766 
00767  def acollapse (a,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None):
00768     """
00769 Averages data in collapsecol, keeping all unique items in keepcols
00770 (using unique, which keeps unique LISTS of column numbers), retaining
00771 the unique sets of values in keepcols, the mean for each.  If stderror or
00772 N of the mean are desired, set either or both parameters to 1.
00773 
00774 Usage:   acollapse (a,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
00775 Returns: unique 'conditions' specified by the contents of columns specified
00776          by keepcols, abutted with the mean(s) of column(s) specified by
00777          collapsecols
00778 """
00779     def acollmean (inarray):
00780         return N.sum(N.ravel(inarray))
00781 
00782     if type(keepcols) not in [ListType,TupleType,N.ndarray]:
00783         keepcols = [keepcols]
00784     if type(collapsecols) not in [ListType,TupleType,N.ndarray]:
00785         collapsecols = [collapsecols]
00786 
00787     if cfcn == None:
00788         cfcn = acollmean
00789     if keepcols == []:
00790         avgcol = acolex(a,collapsecols)
00791         means = N.sum(avgcol)/float(len(avgcol))
00792         if fcn1<>None:
00793             try:
00794                 test = fcn1(avgcol)
00795             except:
00796                 test = N.array(['N/A']*len(means))
00797             means = aabut(means,test)
00798         if fcn2<>None:
00799             try:
00800                 test = fcn2(avgcol)
00801             except:
00802                 test = N.array(['N/A']*len(means))
00803             means = aabut(means,test)
00804         return means
00805     else:
00806         if type(keepcols) not in [ListType,TupleType,N.ndarray]:
00807             keepcols = [keepcols]
00808         values = colex(a,keepcols)   # so that "item" can be appended (below)
00809         uniques = unique(values)  # get a LIST, so .sort keeps rows intact
00810         uniques.sort()
00811         newlist = []
00812         for item in uniques:
00813             if type(item) not in [ListType,TupleType,N.ndarray]:
00814                 item =[item]
00815             tmprows = alinexand(a,keepcols,item)
00816             for col in collapsecols:
00817                 avgcol = acolex(tmprows,col)
00818                 item.append(acollmean(avgcol))
00819                 if fcn1<>None:
00820                     try:
00821                         test = fcn1(avgcol)
00822                     except:
00823                         test = 'N/A'
00824                     item.append(test)
00825                 if fcn2<>None:
00826                     try:
00827                         test = fcn2(avgcol)
00828                     except:
00829                         test = 'N/A'
00830                     item.append(test)
00831                 newlist.append(item)
00832         try:
00833             new_a = N.array(newlist)
00834         except TypeError:
00835             new_a = N.array(newlist,'O')
00836         return new_a
00837 
00838 
00839  def adm (a,criterion):
00840     """
00841 Returns rows from the passed list of lists that meet the criteria in
00842 the passed criterion expression (a string as a function of x).
00843 
00844 Usage:   adm (a,criterion)   where criterion is like 'x[2]==37'
00845 """
00846     function = 'filter(lambda x: '+criterion+',a)'
00847     lines = eval(function)
00848     try:
00849         lines = N.array(lines)
00850     except:
00851         lines = N.array(lines,dtype='O')
00852     return lines
00853 
00854 
00855  def isstring(x):
00856     if type(x)==StringType:
00857         return 1
00858     else:
00859         return 0
00860 
00861 
00862  def alinexand (a,columnlist,valuelist):
00863     """
00864 Returns the rows of an array where col (from columnlist) = val
00865 (from valuelist).  One value is required for each column in columnlist.
00866 
00867 Usage:   alinexand (a,columnlist,valuelist)
00868 Returns: the rows of a where columnlist[i]=valuelist[i] for ALL i
00869 """
00870     if type(columnlist) not in [ListType,TupleType,N.ndarray]:
00871         columnlist = [columnlist]
00872     if type(valuelist) not in [ListType,TupleType,N.ndarray]:
00873         valuelist = [valuelist]
00874     criterion = ''
00875     for i in range(len(columnlist)):
00876         if type(valuelist[i])==StringType:
00877             critval = '\'' + valuelist[i] + '\''
00878         else:
00879             critval = str(valuelist[i])
00880         criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' and'
00881     criterion = criterion[0:-3]         # remove the "and" after the last crit
00882     return adm(a,criterion)
00883 
00884 
00885  def alinexor (a,columnlist,valuelist):
00886     """
00887 Returns the rows of an array where col (from columnlist) = val (from
00888 valuelist).  One value is required for each column in columnlist.
00889 The exception is if either columnlist or valuelist has only 1 value,
00890 in which case that item will be expanded to match the length of the
00891 other list.
00892 
00893 Usage:   alinexor (a,columnlist,valuelist)
00894 Returns: the rows of a where columnlist[i]=valuelist[i] for ANY i
00895 """
00896     if type(columnlist) not in [ListType,TupleType,N.ndarray]:
00897         columnlist = [columnlist]
00898     if type(valuelist) not in [ListType,TupleType,N.ndarray]:
00899         valuelist = [valuelist]
00900     criterion = ''
00901     if len(columnlist) == 1 and len(valuelist) > 1:
00902         columnlist = columnlist*len(valuelist)
00903     elif len(valuelist) == 1 and len(columnlist) > 1:
00904         valuelist = valuelist*len(columnlist)
00905     for i in range(len(columnlist)):
00906         if type(valuelist[i])==StringType:
00907             critval = '\'' + valuelist[i] + '\''
00908         else:
00909             critval = str(valuelist[i])
00910         criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' or'
00911     criterion = criterion[0:-2]         # remove the "or" after the last crit
00912     return adm(a,criterion)
00913 
00914 
00915  def areplace (a,oldval,newval):
00916     """
00917 Replaces all occurrences of oldval with newval in array a.
00918 
00919 Usage:   areplace(a,oldval,newval)
00920 """
00921     return N.where(a==oldval,newval,a)
00922 
00923 
00924  def arecode (a,listmap,col='all'):
00925     """
00926 Remaps the values in an array to a new set of values (useful when
00927 you need to recode data from (e.g.) strings to numbers as most stats
00928 packages require.  Can work on SINGLE columns, or 'all' columns at once.
00929 @@@BROKEN 2007-11-26
00930 
00931 Usage:   arecode (a,listmap,col='all')
00932 Returns: a version of array a where listmap[i][0] = (instead) listmap[i][1]
00933 """
00934     ashape = a.shape
00935     if col == 'all':
00936         work = a.ravel()
00937     else:
00938         work = acolex(a,col)
00939         work = work.ravel()
00940     for pair in listmap:
00941         if type(pair[1]) == StringType or work.dtype.char=='O' or a.dtype.char=='O':
00942             work = N.array(work,dtype='O')
00943             a = N.array(a,dtype='O')
00944             for i in range(len(work)):
00945                 if work[i]==pair[0]:
00946                     work[i] = pair[1]
00947             if col == 'all':
00948                 return N.reshape(work,ashape)
00949             else:
00950                 return N.concatenate([a[:,0:col],work[:,N.newaxis],a[:,col+1:]],1)
00951         else:   # must be a non-Object type array and replacement
00952             work = N.where(work==pair[0],pair[1],work)
00953             return N.concatenate([a[:,0:col],work[:,N.newaxis],a[:,col+1:]],1)
00954 
00955 
00956  def arowcompare(row1, row2):
00957     """
00958 Compares two rows from an array, regardless of whether it is an
00959 array of numbers or of python objects (which requires the cmp function).
00960 @@@PURPOSE? 2007-11-26
00961 
00962 Usage:   arowcompare(row1,row2)
00963 Returns: an array of equal length containing 1s where the two rows had
00964          identical elements and 0 otherwise
00965 """
00966     return 
00967     if row1.dtype.char=='O' or row2.dtype=='O':
00968         cmpvect = N.logical_not(abs(N.array(map(cmp,row1,row2)))) # cmp fcn gives -1,0,1
00969     else:
00970         cmpvect = N.equal(row1,row2)
00971     return cmpvect
00972 
00973 
00974  def arowsame(row1, row2):
00975     """
00976 Compares two rows from an array, regardless of whether it is an
00977 array of numbers or of python objects (which requires the cmp function).
00978 
00979 Usage:   arowsame(row1,row2)
00980 Returns: 1 if the two rows are identical, 0 otherwise.
00981 """
00982     cmpval = N.alltrue(arowcompare(row1,row2))
00983     return cmpval
00984 
00985 
00986  def asortrows(a,axis=0):
00987     """
00988 Sorts an array "by rows".  This differs from the Numeric.sort() function,
00989 which sorts elements WITHIN the given axis.  Instead, this function keeps
00990 the elements along the given axis intact, but shifts them 'up or down'
00991 relative to one another.
00992 
00993 Usage:   asortrows(a,axis=0)
00994 Returns: sorted version of a
00995 """
00996     return N.sort(a,axis=axis,kind='mergesort')
00997 
00998 
00999  def aunique(inarray):
01000     """
01001 Returns unique items in the FIRST dimension of the passed array. Only
01002 works on arrays NOT including string items.
01003 
01004 Usage:   aunique (inarray)
01005 """
01006     uniques = N.array([inarray[0]])
01007     if len(uniques.shape) == 1:            # IF IT'S A 1D ARRAY
01008         for item in inarray[1:]:
01009             if N.add.reduce(N.equal(uniques,item).ravel()) == 0:
01010                 try:
01011                     uniques = N.concatenate([uniques,N.array[N.newaxis,:]])
01012                 except TypeError:
01013                     uniques = N.concatenate([uniques,N.array([item])])
01014     else:                                  # IT MUST BE A 2+D ARRAY
01015         if inarray.dtype.char != 'O':  # not an Object array
01016             for item in inarray[1:]:
01017                 if not N.sum(N.alltrue(N.equal(uniques,item),1)):
01018                     try:
01019                         uniques = N.concatenate( [uniques,item[N.newaxis,:]] )
01020                     except TypeError:    # the item to add isn't a list
01021                         uniques = N.concatenate([uniques,N.array([item])])
01022                 else:
01023                     pass  # this item is already in the uniques array
01024         else:   # must be an Object array, alltrue/equal functions don't work
01025             for item in inarray[1:]:
01026                 newflag = 1
01027                 for unq in uniques:  # NOTE: cmp --> 0=same, -1=<, 1=>
01028                     test = N.sum(abs(N.array(map(cmp,item,unq))))
01029                     if test == 0:   # if item identical to any 1 row in uniques
01030                         newflag = 0 # then not a novel item to add
01031                         break
01032                 if newflag == 1:
01033                     try:
01034                         uniques = N.concatenate( [uniques,item[N.newaxis,:]] )
01035                     except TypeError:    # the item to add isn't a list
01036                         uniques = N.concatenate([uniques,N.array([item])])
01037     return uniques
01038 
01039 
01040  def aduplicates(inarray):
01041     """
01042 Returns duplicate items in the FIRST dimension of the passed array. Only
01043 works on arrays NOT including string items.
01044 
01045 Usage:   aunique (inarray)
01046 """
01047     inarray = N.array(inarray)
01048     if len(inarray.shape) == 1:            # IF IT'S A 1D ARRAY
01049         dups = []
01050         inarray = inarray.tolist()
01051         for i in range(len(inarray)):
01052             if inarray[i] in inarray[i+1:]:
01053                 dups.append(inarray[i])
01054         dups = aunique(dups)
01055     else:                                  # IT MUST BE A 2+D ARRAY
01056         dups = []
01057         aslist = inarray.tolist()
01058         for i in range(len(aslist)):
01059             if aslist[i] in aslist[i+1:]:
01060                 dups.append(aslist[i])
01061         dups = unique(dups)
01062         dups = N.array(dups)
01063     return dups
01064 
01065 except ImportError:    # IF NUMERIC ISN'T AVAILABLE, SKIP ALL arrayfuncs
01066  pass


wiimote
Author(s): Andreas Paepcke, Melonee Wise
autogenerated on Mon Oct 6 2014 01:06:37