$search
00001 # Copyright (c) 1999-2007 Gary Strangman; All Rights Reserved. 00002 # 00003 # Permission is hereby granted, free of charge, to any person obtaining a copy 00004 # of this software and associated documentation files (the "Software"), to deal 00005 # in the Software without restriction, including without limitation the rights 00006 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 00007 # copies of the Software, and to permit persons to whom the Software is 00008 # furnished to do so, subject to the following conditions: 00009 # 00010 # The above copyright notice and this permission notice shall be included in 00011 # all copies or substantial portions of the Software. 00012 # 00013 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 00014 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 00015 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 00016 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 00017 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 00018 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 00019 # THE SOFTWARE. 00020 # 00021 # Comments and/or additions are welcome (send e-mail to: 00022 # strang@nmr.mgh.harvard.edu). 00023 # 00024 """ 00025 pstat.py module 00026 00027 ################################################# 00028 ####### Written by: Gary Strangman ########### 00029 ####### Last modified: Dec 18, 2007 ########### 00030 ################################################# 00031 00032 This module provides some useful list and array manipulation routines 00033 modeled after those found in the |Stat package by Gary Perlman, plus a 00034 number of other useful list/file manipulation functions. The list-based 00035 functions include: 00036 00037 abut (source,*args) 00038 simpleabut (source, addon) 00039 colex (listoflists,cnums) 00040 collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None) 00041 dm (listoflists,criterion) 00042 flat (l) 00043 linexand (listoflists,columnlist,valuelist) 00044 linexor (listoflists,columnlist,valuelist) 00045 linedelimited (inlist,delimiter) 00046 lineincols (inlist,colsize) 00047 lineincustcols (inlist,colsizes) 00048 list2string (inlist) 00049 makelol(inlist) 00050 makestr(x) 00051 printcc (lst,extra=2) 00052 printincols (listoflists,colsize) 00053 pl (listoflists) 00054 printl(listoflists) 00055 replace (lst,oldval,newval) 00056 recode (inlist,listmap,cols='all') 00057 remap (listoflists,criterion) 00058 roundlist (inlist,num_digits_to_round_floats_to) 00059 sortby(listoflists,sortcols) 00060 unique (inlist) 00061 duplicates(inlist) 00062 writedelimited (listoflists, delimiter, file, writetype='w') 00063 00064 Some of these functions have alternate versions which are defined only if 00065 Numeric (NumPy) can be imported. These functions are generally named as 00066 above, with an 'a' prefix. 00067 00068 aabut (source, *args) 00069 acolex (a,indices,axis=1) 00070 acollapse (a,keepcols,collapsecols,sterr=0,ns=0) 00071 adm (a,criterion) 00072 alinexand (a,columnlist,valuelist) 00073 alinexor (a,columnlist,valuelist) 00074 areplace (a,oldval,newval) 00075 arecode (a,listmap,col='all') 00076 arowcompare (row1, row2) 00077 arowsame (row1, row2) 00078 asortrows(a,axis=0) 00079 aunique(inarray) 00080 aduplicates(inarray) 00081 00082 Currently, the code is all but completely un-optimized. In many cases, the 00083 array versions of functions amount simply to aliases to built-in array 00084 functions/methods. Their inclusion here is for function name consistency. 00085 """ 00086 00087 ## CHANGE LOG: 00088 ## ========== 00089 ## 07-11-26 ... edited to work with numpy 00090 ## 01-11-15 ... changed list2string() to accept a delimiter 00091 ## 01-06-29 ... converted exec()'s to eval()'s to make compatible with Py2.1 00092 ## 01-05-31 ... added duplicates() and aduplicates() functions 00093 ## 00-12-28 ... license made GPL, docstring and import requirements 00094 ## 99-11-01 ... changed version to 0.3 00095 ## 99-08-30 ... removed get, getstrings, put, aget, aput (into io.py) 00096 ## 03/27/99 ... added areplace function, made replace fcn recursive 00097 ## 12/31/98 ... added writefc function for ouput to fixed column sizes 00098 ## 12/07/98 ... fixed import problem (failed on collapse() fcn) 00099 ## added __version__ variable (now 0.2) 00100 ## 12/05/98 ... updated doc-strings 00101 ## added features to collapse() function 00102 ## added flat() function for lists 00103 ## fixed a broken asortrows() 00104 ## 11/16/98 ... fixed minor bug in aput for 1D arrays 00105 ## 00106 ## 11/08/98 ... fixed aput to output large arrays correctly 00107 00108 import stats # required 3rd party module 00109 import string, copy 00110 from types import * 00111 00112 __version__ = 0.4 00113 00114 ###=========================== LIST FUNCTIONS ========================== 00115 ### 00116 ### Here are the list functions, DEFINED FOR ALL SYSTEMS. 00117 ### Array functions (for NumPy-enabled computers) appear below. 00118 ### 00119 00120 def abut (source,*args): 00121 """ 00122 Like the |Stat abut command. It concatenates two lists side-by-side 00123 and returns the result. '2D' lists are also accomodated for either argument 00124 (source or addon). CAUTION: If one list is shorter, it will be repeated 00125 until it is as long as the longest list. If this behavior is not desired, 00126 use pstat.simpleabut(). 00127 00128 Usage: abut(source, args) where args=any # of lists 00129 Returns: a list of lists as long as the LONGEST list past, source on the 00130 'left', lists in <args> attached consecutively on the 'right' 00131 """ 00132 00133 if type(source) not in [ListType,TupleType]: 00134 source = [source] 00135 for addon in args: 00136 if type(addon) not in [ListType,TupleType]: 00137 addon = [addon] 00138 if len(addon) < len(source): # is source list longer? 00139 if len(source) % len(addon) == 0: # are they integer multiples? 00140 repeats = len(source)/len(addon) # repeat addon n times 00141 origadd = copy.deepcopy(addon) 00142 for i in range(repeats-1): 00143 addon = addon + origadd 00144 else: 00145 repeats = len(source)/len(addon)+1 # repeat addon x times, 00146 origadd = copy.deepcopy(addon) # x is NOT an integer 00147 for i in range(repeats-1): 00148 addon = addon + origadd 00149 addon = addon[0:len(source)] 00150 elif len(source) < len(addon): # is addon list longer? 00151 if len(addon) % len(source) == 0: # are they integer multiples? 00152 repeats = len(addon)/len(source) # repeat source n times 00153 origsour = copy.deepcopy(source) 00154 for i in range(repeats-1): 00155 source = source + origsour 00156 else: 00157 repeats = len(addon)/len(source)+1 # repeat source x times, 00158 origsour = copy.deepcopy(source) # x is NOT an integer 00159 for i in range(repeats-1): 00160 source = source + origsour 00161 source = source[0:len(addon)] 00162 00163 source = simpleabut(source,addon) 00164 return source 00165 00166 00167 def simpleabut (source, addon): 00168 """ 00169 Concatenates two lists as columns and returns the result. '2D' lists 00170 are also accomodated for either argument (source or addon). This DOES NOT 00171 repeat either list to make the 2 lists of equal length. Beware of list pairs 00172 with different lengths ... the resulting list will be the length of the 00173 FIRST list passed. 00174 00175 Usage: simpleabut(source,addon) where source, addon=list (or list-of-lists) 00176 Returns: a list of lists as long as source, with source on the 'left' and 00177 addon on the 'right' 00178 """ 00179 if type(source) not in [ListType,TupleType]: 00180 source = [source] 00181 if type(addon) not in [ListType,TupleType]: 00182 addon = [addon] 00183 minlen = min(len(source),len(addon)) 00184 list = copy.deepcopy(source) # start abut process 00185 if type(source[0]) not in [ListType,TupleType]: 00186 if type(addon[0]) not in [ListType,TupleType]: 00187 for i in range(minlen): 00188 list[i] = [source[i]] + [addon[i]] # source/addon = column 00189 else: 00190 for i in range(minlen): 00191 list[i] = [source[i]] + addon[i] # addon=list-of-lists 00192 else: 00193 if type(addon[0]) not in [ListType,TupleType]: 00194 for i in range(minlen): 00195 list[i] = source[i] + [addon[i]] # source=list-of-lists 00196 else: 00197 for i in range(minlen): 00198 list[i] = source[i] + addon[i] # source/addon = list-of-lists 00199 source = list 00200 return source 00201 00202 00203 def colex (listoflists,cnums): 00204 """ 00205 Extracts from listoflists the columns specified in the list 'cnums' 00206 (cnums can be an integer, a sequence of integers, or a string-expression that 00207 corresponds to a slice operation on the variable x ... e.g., 'x[3:]' will colex 00208 columns 3 onward from the listoflists). 00209 00210 Usage: colex (listoflists,cnums) 00211 Returns: a list-of-lists corresponding to the columns from listoflists 00212 specified by cnums, in the order the column numbers appear in cnums 00213 """ 00214 global index 00215 column = 0 00216 if type(cnums) in [ListType,TupleType]: # if multiple columns to get 00217 index = cnums[0] 00218 column = map(lambda x: x[index], listoflists) 00219 for col in cnums[1:]: 00220 index = col 00221 column = abut(column,map(lambda x: x[index], listoflists)) 00222 elif type(cnums) == StringType: # if an 'x[3:]' type expr. 00223 evalstring = 'map(lambda x: x'+cnums+', listoflists)' 00224 column = eval(evalstring) 00225 else: # else it's just 1 col to get 00226 index = cnums 00227 column = map(lambda x: x[index], listoflists) 00228 return column 00229 00230 00231 def collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None): 00232 """ 00233 Averages data in collapsecol, keeping all unique items in keepcols 00234 (using unique, which keeps unique LISTS of column numbers), retaining the 00235 unique sets of values in keepcols, the mean for each. Setting fcn1 00236 and/or fcn2 to point to a function rather than None (e.g., stats.sterr, len) 00237 will append those results (e.g., the sterr, N) after each calculated mean. 00238 cfcn is the collapse function to apply (defaults to mean, defined here in the 00239 pstat module to avoid circular imports with stats.py, but harmonicmean or 00240 others could be passed). 00241 00242 Usage: collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None) 00243 Returns: a list of lists with all unique permutations of entries appearing in 00244 columns ("conditions") specified by keepcols, abutted with the result of 00245 cfcn (if cfcn=None, defaults to the mean) of each column specified by 00246 collapsecols. 00247 """ 00248 def collmean (inlist): 00249 s = 0 00250 for item in inlist: 00251 s = s + item 00252 return s/float(len(inlist)) 00253 00254 if type(keepcols) not in [ListType,TupleType]: 00255 keepcols = [keepcols] 00256 if type(collapsecols) not in [ListType,TupleType]: 00257 collapsecols = [collapsecols] 00258 if cfcn == None: 00259 cfcn = collmean 00260 if keepcols == []: 00261 means = [0]*len(collapsecols) 00262 for i in range(len(collapsecols)): 00263 avgcol = colex(listoflists,collapsecols[i]) 00264 means[i] = cfcn(avgcol) 00265 if fcn1: 00266 try: 00267 test = fcn1(avgcol) 00268 except: 00269 test = 'N/A' 00270 means[i] = [means[i], test] 00271 if fcn2: 00272 try: 00273 test = fcn2(avgcol) 00274 except: 00275 test = 'N/A' 00276 try: 00277 means[i] = means[i] + [len(avgcol)] 00278 except TypeError: 00279 means[i] = [means[i],len(avgcol)] 00280 return means 00281 else: 00282 values = colex(listoflists,keepcols) 00283 uniques = unique(values) 00284 uniques.sort() 00285 newlist = [] 00286 if type(keepcols) not in [ListType,TupleType]: keepcols = [keepcols] 00287 for item in uniques: 00288 if type(item) not in [ListType,TupleType]: item =[item] 00289 tmprows = linexand(listoflists,keepcols,item) 00290 for col in collapsecols: 00291 avgcol = colex(tmprows,col) 00292 item.append(cfcn(avgcol)) 00293 if fcn1 <> None: 00294 try: 00295 test = fcn1(avgcol) 00296 except: 00297 test = 'N/A' 00298 item.append(test) 00299 if fcn2 <> None: 00300 try: 00301 test = fcn2(avgcol) 00302 except: 00303 test = 'N/A' 00304 item.append(test) 00305 newlist.append(item) 00306 return newlist 00307 00308 00309 def dm (listoflists,criterion): 00310 """ 00311 Returns rows from the passed list of lists that meet the criteria in 00312 the passed criterion expression (a string as a function of x; e.g., 'x[3]>=9' 00313 will return all rows where the 4th column>=9 and "x[2]=='N'" will return rows 00314 with column 2 equal to the string 'N'). 00315 00316 Usage: dm (listoflists, criterion) 00317 Returns: rows from listoflists that meet the specified criterion. 00318 """ 00319 function = 'filter(lambda x: '+criterion+',listoflists)' 00320 lines = eval(function) 00321 return lines 00322 00323 00324 def flat(l): 00325 """ 00326 Returns the flattened version of a '2D' list. List-correlate to the a.ravel()() 00327 method of NumPy arrays. 00328 00329 Usage: flat(l) 00330 """ 00331 newl = [] 00332 for i in range(len(l)): 00333 for j in range(len(l[i])): 00334 newl.append(l[i][j]) 00335 return newl 00336 00337 00338 def linexand (listoflists,columnlist,valuelist): 00339 """ 00340 Returns the rows of a list of lists where col (from columnlist) = val 00341 (from valuelist) for EVERY pair of values (columnlist[i],valuelists[i]). 00342 len(columnlist) must equal len(valuelist). 00343 00344 Usage: linexand (listoflists,columnlist,valuelist) 00345 Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ALL i 00346 """ 00347 if type(columnlist) not in [ListType,TupleType]: 00348 columnlist = [columnlist] 00349 if type(valuelist) not in [ListType,TupleType]: 00350 valuelist = [valuelist] 00351 criterion = '' 00352 for i in range(len(columnlist)): 00353 if type(valuelist[i])==StringType: 00354 critval = '\'' + valuelist[i] + '\'' 00355 else: 00356 critval = str(valuelist[i]) 00357 criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' and' 00358 criterion = criterion[0:-3] # remove the "and" after the last crit 00359 function = 'filter(lambda x: '+criterion+',listoflists)' 00360 lines = eval(function) 00361 return lines 00362 00363 00364 def linexor (listoflists,columnlist,valuelist): 00365 """ 00366 Returns the rows of a list of lists where col (from columnlist) = val 00367 (from valuelist) for ANY pair of values (colunmlist[i],valuelist[i[). 00368 One value is required for each column in columnlist. If only one value 00369 exists for columnlist but multiple values appear in valuelist, the 00370 valuelist values are all assumed to pertain to the same column. 00371 00372 Usage: linexor (listoflists,columnlist,valuelist) 00373 Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ANY i 00374 """ 00375 if type(columnlist) not in [ListType,TupleType]: 00376 columnlist = [columnlist] 00377 if type(valuelist) not in [ListType,TupleType]: 00378 valuelist = [valuelist] 00379 criterion = '' 00380 if len(columnlist) == 1 and len(valuelist) > 1: 00381 columnlist = columnlist*len(valuelist) 00382 for i in range(len(columnlist)): # build an exec string 00383 if type(valuelist[i])==StringType: 00384 critval = '\'' + valuelist[i] + '\'' 00385 else: 00386 critval = str(valuelist[i]) 00387 criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' or' 00388 criterion = criterion[0:-2] # remove the "or" after the last crit 00389 function = 'filter(lambda x: '+criterion+',listoflists)' 00390 lines = eval(function) 00391 return lines 00392 00393 00394 def linedelimited (inlist,delimiter): 00395 """ 00396 Returns a string composed of elements in inlist, with each element 00397 separated by 'delimiter.' Used by function writedelimited. Use '\t' 00398 for tab-delimiting. 00399 00400 Usage: linedelimited (inlist,delimiter) 00401 """ 00402 outstr = '' 00403 for item in inlist: 00404 if type(item) <> StringType: 00405 item = str(item) 00406 outstr = outstr + item + delimiter 00407 outstr = outstr[0:-1] 00408 return outstr 00409 00410 00411 def lineincols (inlist,colsize): 00412 """ 00413 Returns a string composed of elements in inlist, with each element 00414 right-aligned in columns of (fixed) colsize. 00415 00416 Usage: lineincols (inlist,colsize) where colsize is an integer 00417 """ 00418 outstr = '' 00419 for item in inlist: 00420 if type(item) <> StringType: 00421 item = str(item) 00422 size = len(item) 00423 if size <= colsize: 00424 for i in range(colsize-size): 00425 outstr = outstr + ' ' 00426 outstr = outstr + item 00427 else: 00428 outstr = outstr + item[0:colsize+1] 00429 return outstr 00430 00431 00432 def lineincustcols (inlist,colsizes): 00433 """ 00434 Returns a string composed of elements in inlist, with each element 00435 right-aligned in a column of width specified by a sequence colsizes. The 00436 length of colsizes must be greater than or equal to the number of columns 00437 in inlist. 00438 00439 Usage: lineincustcols (inlist,colsizes) 00440 Returns: formatted string created from inlist 00441 """ 00442 outstr = '' 00443 for i in range(len(inlist)): 00444 if type(inlist[i]) <> StringType: 00445 item = str(inlist[i]) 00446 else: 00447 item = inlist[i] 00448 size = len(item) 00449 if size <= colsizes[i]: 00450 for j in range(colsizes[i]-size): 00451 outstr = outstr + ' ' 00452 outstr = outstr + item 00453 else: 00454 outstr = outstr + item[0:colsizes[i]+1] 00455 return outstr 00456 00457 00458 def list2string (inlist,delimit=' '): 00459 """ 00460 Converts a 1D list to a single long string for file output, using 00461 the string.join function. 00462 00463 Usage: list2string (inlist,delimit=' ') 00464 Returns: the string created from inlist 00465 """ 00466 stringlist = map(makestr,inlist) 00467 return string.join(stringlist,delimit) 00468 00469 00470 def makelol(inlist): 00471 """ 00472 Converts a 1D list to a 2D list (i.e., a list-of-lists). Useful when you 00473 want to use put() to write a 1D list one item per line in the file. 00474 00475 Usage: makelol(inlist) 00476 Returns: if l = [1,2,'hi'] then returns [[1],[2],['hi']] etc. 00477 """ 00478 x = [] 00479 for item in inlist: 00480 x.append([item]) 00481 return x 00482 00483 00484 def makestr (x): 00485 if type(x) <> StringType: 00486 x = str(x) 00487 return x 00488 00489 00490 def printcc (lst,extra=2): 00491 """ 00492 Prints a list of lists in columns, customized by the max size of items 00493 within the columns (max size of items in col, plus 'extra' number of spaces). 00494 Use 'dashes' or '\\n' in the list-of-lists to print dashes or blank lines, 00495 respectively. 00496 00497 Usage: printcc (lst,extra=2) 00498 Returns: None 00499 """ 00500 if type(lst[0]) not in [ListType,TupleType]: 00501 lst = [lst] 00502 rowstokill = [] 00503 list2print = copy.deepcopy(lst) 00504 for i in range(len(lst)): 00505 if lst[i] == ['\n'] or lst[i]=='\n' or lst[i]=='dashes' or lst[i]=='' or lst[i]==['']: 00506 rowstokill = rowstokill + [i] 00507 rowstokill.reverse() # delete blank rows from the end 00508 for row in rowstokill: 00509 del list2print[row] 00510 maxsize = [0]*len(list2print[0]) 00511 for col in range(len(list2print[0])): 00512 items = colex(list2print,col) 00513 items = map(makestr,items) 00514 maxsize[col] = max(map(len,items)) + extra 00515 for row in lst: 00516 if row == ['\n'] or row == '\n' or row == '' or row == ['']: 00517 print 00518 elif row == ['dashes'] or row == 'dashes': 00519 dashes = [0]*len(maxsize) 00520 for j in range(len(maxsize)): 00521 dashes[j] = '-'*(maxsize[j]-2) 00522 print lineincustcols(dashes,maxsize) 00523 else: 00524 print lineincustcols(row,maxsize) 00525 return None 00526 00527 00528 def printincols (listoflists,colsize): 00529 """ 00530 Prints a list of lists in columns of (fixed) colsize width, where 00531 colsize is an integer. 00532 00533 Usage: printincols (listoflists,colsize) 00534 Returns: None 00535 """ 00536 for row in listoflists: 00537 print lineincols(row,colsize) 00538 return None 00539 00540 00541 def pl (listoflists): 00542 """ 00543 Prints a list of lists, 1 list (row) at a time. 00544 00545 Usage: pl(listoflists) 00546 Returns: None 00547 """ 00548 for row in listoflists: 00549 if row[-1] == '\n': 00550 print row, 00551 else: 00552 print row 00553 return None 00554 00555 00556 def printl(listoflists): 00557 """Alias for pl.""" 00558 pl(listoflists) 00559 return 00560 00561 00562 def replace (inlst,oldval,newval): 00563 """ 00564 Replaces all occurrences of 'oldval' with 'newval', recursively. 00565 00566 Usage: replace (inlst,oldval,newval) 00567 """ 00568 lst = inlst*1 00569 for i in range(len(lst)): 00570 if type(lst[i]) not in [ListType,TupleType]: 00571 if lst[i]==oldval: lst[i]=newval 00572 else: 00573 lst[i] = replace(lst[i],oldval,newval) 00574 return lst 00575 00576 00577 def recode (inlist,listmap,cols=None): 00578 """ 00579 Changes the values in a list to a new set of values (useful when 00580 you need to recode data from (e.g.) strings to numbers. cols defaults 00581 to None (meaning all columns are recoded). 00582 00583 Usage: recode (inlist,listmap,cols=None) cols=recode cols, listmap=2D list 00584 Returns: inlist with the appropriate values replaced with new ones 00585 """ 00586 lst = copy.deepcopy(inlist) 00587 if cols != None: 00588 if type(cols) not in [ListType,TupleType]: 00589 cols = [cols] 00590 for col in cols: 00591 for row in range(len(lst)): 00592 try: 00593 idx = colex(listmap,0).index(lst[row][col]) 00594 lst[row][col] = listmap[idx][1] 00595 except ValueError: 00596 pass 00597 else: 00598 for row in range(len(lst)): 00599 for col in range(len(lst)): 00600 try: 00601 idx = colex(listmap,0).index(lst[row][col]) 00602 lst[row][col] = listmap[idx][1] 00603 except ValueError: 00604 pass 00605 return lst 00606 00607 00608 def remap (listoflists,criterion): 00609 """ 00610 Remaps values in a given column of a 2D list (listoflists). This requires 00611 a criterion as a function of 'x' so that the result of the following is 00612 returned ... map(lambda x: 'criterion',listoflists). 00613 00614 Usage: remap(listoflists,criterion) criterion=string 00615 Returns: remapped version of listoflists 00616 """ 00617 function = 'map(lambda x: '+criterion+',listoflists)' 00618 lines = eval(function) 00619 return lines 00620 00621 00622 def roundlist (inlist,digits): 00623 """ 00624 Goes through each element in a 1D or 2D inlist, and applies the following 00625 function to all elements of FloatType ... round(element,digits). 00626 00627 Usage: roundlist(inlist,digits) 00628 Returns: list with rounded floats 00629 """ 00630 if type(inlist[0]) in [IntType, FloatType]: 00631 inlist = [inlist] 00632 l = inlist*1 00633 for i in range(len(l)): 00634 for j in range(len(l[i])): 00635 if type(l[i][j])==FloatType: 00636 l[i][j] = round(l[i][j],digits) 00637 return l 00638 00639 00640 def sortby(listoflists,sortcols): 00641 """ 00642 Sorts a list of lists on the column(s) specified in the sequence 00643 sortcols. 00644 00645 Usage: sortby(listoflists,sortcols) 00646 Returns: sorted list, unchanged column ordering 00647 """ 00648 newlist = abut(colex(listoflists,sortcols),listoflists) 00649 newlist.sort() 00650 try: 00651 numcols = len(sortcols) 00652 except TypeError: 00653 numcols = 1 00654 crit = '[' + str(numcols) + ':]' 00655 newlist = colex(newlist,crit) 00656 return newlist 00657 00658 00659 def unique (inlist): 00660 """ 00661 Returns all unique items in the passed list. If the a list-of-lists 00662 is passed, unique LISTS are found (i.e., items in the first dimension are 00663 compared). 00664 00665 Usage: unique (inlist) 00666 Returns: the unique elements (or rows) in inlist 00667 """ 00668 uniques = [] 00669 for item in inlist: 00670 if item not in uniques: 00671 uniques.append(item) 00672 return uniques 00673 00674 def duplicates(inlist): 00675 """ 00676 Returns duplicate items in the FIRST dimension of the passed list. 00677 00678 Usage: duplicates (inlist) 00679 """ 00680 dups = [] 00681 for i in range(len(inlist)): 00682 if inlist[i] in inlist[i+1:]: 00683 dups.append(inlist[i]) 00684 return dups 00685 00686 00687 def nonrepeats(inlist): 00688 """ 00689 Returns items that are NOT duplicated in the first dim of the passed list. 00690 00691 Usage: nonrepeats (inlist) 00692 """ 00693 nonrepeats = [] 00694 for i in range(len(inlist)): 00695 if inlist.count(inlist[i]) == 1: 00696 nonrepeats.append(inlist[i]) 00697 return nonrepeats 00698 00699 00700 #=================== PSTAT ARRAY FUNCTIONS ===================== 00701 #=================== PSTAT ARRAY FUNCTIONS ===================== 00702 #=================== PSTAT ARRAY FUNCTIONS ===================== 00703 #=================== PSTAT ARRAY FUNCTIONS ===================== 00704 #=================== PSTAT ARRAY FUNCTIONS ===================== 00705 #=================== PSTAT ARRAY FUNCTIONS ===================== 00706 #=================== PSTAT ARRAY FUNCTIONS ===================== 00707 #=================== PSTAT ARRAY FUNCTIONS ===================== 00708 #=================== PSTAT ARRAY FUNCTIONS ===================== 00709 #=================== PSTAT ARRAY FUNCTIONS ===================== 00710 #=================== PSTAT ARRAY FUNCTIONS ===================== 00711 #=================== PSTAT ARRAY FUNCTIONS ===================== 00712 #=================== PSTAT ARRAY FUNCTIONS ===================== 00713 #=================== PSTAT ARRAY FUNCTIONS ===================== 00714 #=================== PSTAT ARRAY FUNCTIONS ===================== 00715 #=================== PSTAT ARRAY FUNCTIONS ===================== 00716 00717 try: # DEFINE THESE *ONLY* IF numpy IS AVAILABLE 00718 import numpy as N 00719 00720 def aabut (source, *args): 00721 """ 00722 Like the |Stat abut command. It concatenates two arrays column-wise 00723 and returns the result. CAUTION: If one array is shorter, it will be 00724 repeated until it is as long as the other. 00725 00726 Usage: aabut (source, args) where args=any # of arrays 00727 Returns: an array as long as the LONGEST array past, source appearing on the 00728 'left', arrays in <args> attached on the 'right'. 00729 """ 00730 if len(source.shape)==1: 00731 width = 1 00732 source = N.resize(source,[source.shape[0],width]) 00733 else: 00734 width = source.shape[1] 00735 for addon in args: 00736 if len(addon.shape)==1: 00737 width = 1 00738 addon = N.resize(addon,[source.shape[0],width]) 00739 else: 00740 width = source.shape[1] 00741 if len(addon) < len(source): 00742 addon = N.resize(addon,[source.shape[0],addon.shape[1]]) 00743 elif len(source) < len(addon): 00744 source = N.resize(source,[addon.shape[0],source.shape[1]]) 00745 source = N.concatenate((source,addon),1) 00746 return source 00747 00748 00749 def acolex (a,indices,axis=1): 00750 """ 00751 Extracts specified indices (a list) from passed array, along passed 00752 axis (column extraction is default). BEWARE: A 1D array is presumed to be a 00753 column-array (and that the whole array will be returned as a column). 00754 00755 Usage: acolex (a,indices,axis=1) 00756 Returns: the columns of a specified by indices 00757 """ 00758 if type(indices) not in [ListType,TupleType,N.ndarray]: 00759 indices = [indices] 00760 if len(N.shape(a)) == 1: 00761 cols = N.resize(a,[a.shape[0],1]) 00762 else: 00763 cols = N.take(a,indices,axis) 00764 return cols 00765 00766 00767 def acollapse (a,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None): 00768 """ 00769 Averages data in collapsecol, keeping all unique items in keepcols 00770 (using unique, which keeps unique LISTS of column numbers), retaining 00771 the unique sets of values in keepcols, the mean for each. If stderror or 00772 N of the mean are desired, set either or both parameters to 1. 00773 00774 Usage: acollapse (a,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None) 00775 Returns: unique 'conditions' specified by the contents of columns specified 00776 by keepcols, abutted with the mean(s) of column(s) specified by 00777 collapsecols 00778 """ 00779 def acollmean (inarray): 00780 return N.sum(N.ravel(inarray)) 00781 00782 if type(keepcols) not in [ListType,TupleType,N.ndarray]: 00783 keepcols = [keepcols] 00784 if type(collapsecols) not in [ListType,TupleType,N.ndarray]: 00785 collapsecols = [collapsecols] 00786 00787 if cfcn == None: 00788 cfcn = acollmean 00789 if keepcols == []: 00790 avgcol = acolex(a,collapsecols) 00791 means = N.sum(avgcol)/float(len(avgcol)) 00792 if fcn1<>None: 00793 try: 00794 test = fcn1(avgcol) 00795 except: 00796 test = N.array(['N/A']*len(means)) 00797 means = aabut(means,test) 00798 if fcn2<>None: 00799 try: 00800 test = fcn2(avgcol) 00801 except: 00802 test = N.array(['N/A']*len(means)) 00803 means = aabut(means,test) 00804 return means 00805 else: 00806 if type(keepcols) not in [ListType,TupleType,N.ndarray]: 00807 keepcols = [keepcols] 00808 values = colex(a,keepcols) # so that "item" can be appended (below) 00809 uniques = unique(values) # get a LIST, so .sort keeps rows intact 00810 uniques.sort() 00811 newlist = [] 00812 for item in uniques: 00813 if type(item) not in [ListType,TupleType,N.ndarray]: 00814 item =[item] 00815 tmprows = alinexand(a,keepcols,item) 00816 for col in collapsecols: 00817 avgcol = acolex(tmprows,col) 00818 item.append(acollmean(avgcol)) 00819 if fcn1<>None: 00820 try: 00821 test = fcn1(avgcol) 00822 except: 00823 test = 'N/A' 00824 item.append(test) 00825 if fcn2<>None: 00826 try: 00827 test = fcn2(avgcol) 00828 except: 00829 test = 'N/A' 00830 item.append(test) 00831 newlist.append(item) 00832 try: 00833 new_a = N.array(newlist) 00834 except TypeError: 00835 new_a = N.array(newlist,'O') 00836 return new_a 00837 00838 00839 def adm (a,criterion): 00840 """ 00841 Returns rows from the passed list of lists that meet the criteria in 00842 the passed criterion expression (a string as a function of x). 00843 00844 Usage: adm (a,criterion) where criterion is like 'x[2]==37' 00845 """ 00846 function = 'filter(lambda x: '+criterion+',a)' 00847 lines = eval(function) 00848 try: 00849 lines = N.array(lines) 00850 except: 00851 lines = N.array(lines,dtype='O') 00852 return lines 00853 00854 00855 def isstring(x): 00856 if type(x)==StringType: 00857 return 1 00858 else: 00859 return 0 00860 00861 00862 def alinexand (a,columnlist,valuelist): 00863 """ 00864 Returns the rows of an array where col (from columnlist) = val 00865 (from valuelist). One value is required for each column in columnlist. 00866 00867 Usage: alinexand (a,columnlist,valuelist) 00868 Returns: the rows of a where columnlist[i]=valuelist[i] for ALL i 00869 """ 00870 if type(columnlist) not in [ListType,TupleType,N.ndarray]: 00871 columnlist = [columnlist] 00872 if type(valuelist) not in [ListType,TupleType,N.ndarray]: 00873 valuelist = [valuelist] 00874 criterion = '' 00875 for i in range(len(columnlist)): 00876 if type(valuelist[i])==StringType: 00877 critval = '\'' + valuelist[i] + '\'' 00878 else: 00879 critval = str(valuelist[i]) 00880 criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' and' 00881 criterion = criterion[0:-3] # remove the "and" after the last crit 00882 return adm(a,criterion) 00883 00884 00885 def alinexor (a,columnlist,valuelist): 00886 """ 00887 Returns the rows of an array where col (from columnlist) = val (from 00888 valuelist). One value is required for each column in columnlist. 00889 The exception is if either columnlist or valuelist has only 1 value, 00890 in which case that item will be expanded to match the length of the 00891 other list. 00892 00893 Usage: alinexor (a,columnlist,valuelist) 00894 Returns: the rows of a where columnlist[i]=valuelist[i] for ANY i 00895 """ 00896 if type(columnlist) not in [ListType,TupleType,N.ndarray]: 00897 columnlist = [columnlist] 00898 if type(valuelist) not in [ListType,TupleType,N.ndarray]: 00899 valuelist = [valuelist] 00900 criterion = '' 00901 if len(columnlist) == 1 and len(valuelist) > 1: 00902 columnlist = columnlist*len(valuelist) 00903 elif len(valuelist) == 1 and len(columnlist) > 1: 00904 valuelist = valuelist*len(columnlist) 00905 for i in range(len(columnlist)): 00906 if type(valuelist[i])==StringType: 00907 critval = '\'' + valuelist[i] + '\'' 00908 else: 00909 critval = str(valuelist[i]) 00910 criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' or' 00911 criterion = criterion[0:-2] # remove the "or" after the last crit 00912 return adm(a,criterion) 00913 00914 00915 def areplace (a,oldval,newval): 00916 """ 00917 Replaces all occurrences of oldval with newval in array a. 00918 00919 Usage: areplace(a,oldval,newval) 00920 """ 00921 return N.where(a==oldval,newval,a) 00922 00923 00924 def arecode (a,listmap,col='all'): 00925 """ 00926 Remaps the values in an array to a new set of values (useful when 00927 you need to recode data from (e.g.) strings to numbers as most stats 00928 packages require. Can work on SINGLE columns, or 'all' columns at once. 00929 @@@BROKEN 2007-11-26 00930 00931 Usage: arecode (a,listmap,col='all') 00932 Returns: a version of array a where listmap[i][0] = (instead) listmap[i][1] 00933 """ 00934 ashape = a.shape 00935 if col == 'all': 00936 work = a.ravel() 00937 else: 00938 work = acolex(a,col) 00939 work = work.ravel() 00940 for pair in listmap: 00941 if type(pair[1]) == StringType or work.dtype.char=='O' or a.dtype.char=='O': 00942 work = N.array(work,dtype='O') 00943 a = N.array(a,dtype='O') 00944 for i in range(len(work)): 00945 if work[i]==pair[0]: 00946 work[i] = pair[1] 00947 if col == 'all': 00948 return N.reshape(work,ashape) 00949 else: 00950 return N.concatenate([a[:,0:col],work[:,N.newaxis],a[:,col+1:]],1) 00951 else: # must be a non-Object type array and replacement 00952 work = N.where(work==pair[0],pair[1],work) 00953 return N.concatenate([a[:,0:col],work[:,N.newaxis],a[:,col+1:]],1) 00954 00955 00956 def arowcompare(row1, row2): 00957 """ 00958 Compares two rows from an array, regardless of whether it is an 00959 array of numbers or of python objects (which requires the cmp function). 00960 @@@PURPOSE? 2007-11-26 00961 00962 Usage: arowcompare(row1,row2) 00963 Returns: an array of equal length containing 1s where the two rows had 00964 identical elements and 0 otherwise 00965 """ 00966 return 00967 if row1.dtype.char=='O' or row2.dtype=='O': 00968 cmpvect = N.logical_not(abs(N.array(map(cmp,row1,row2)))) # cmp fcn gives -1,0,1 00969 else: 00970 cmpvect = N.equal(row1,row2) 00971 return cmpvect 00972 00973 00974 def arowsame(row1, row2): 00975 """ 00976 Compares two rows from an array, regardless of whether it is an 00977 array of numbers or of python objects (which requires the cmp function). 00978 00979 Usage: arowsame(row1,row2) 00980 Returns: 1 if the two rows are identical, 0 otherwise. 00981 """ 00982 cmpval = N.alltrue(arowcompare(row1,row2)) 00983 return cmpval 00984 00985 00986 def asortrows(a,axis=0): 00987 """ 00988 Sorts an array "by rows". This differs from the Numeric.sort() function, 00989 which sorts elements WITHIN the given axis. Instead, this function keeps 00990 the elements along the given axis intact, but shifts them 'up or down' 00991 relative to one another. 00992 00993 Usage: asortrows(a,axis=0) 00994 Returns: sorted version of a 00995 """ 00996 return N.sort(a,axis=axis,kind='mergesort') 00997 00998 00999 def aunique(inarray): 01000 """ 01001 Returns unique items in the FIRST dimension of the passed array. Only 01002 works on arrays NOT including string items. 01003 01004 Usage: aunique (inarray) 01005 """ 01006 uniques = N.array([inarray[0]]) 01007 if len(uniques.shape) == 1: # IF IT'S A 1D ARRAY 01008 for item in inarray[1:]: 01009 if N.add.reduce(N.equal(uniques,item).ravel()) == 0: 01010 try: 01011 uniques = N.concatenate([uniques,N.array[N.newaxis,:]]) 01012 except TypeError: 01013 uniques = N.concatenate([uniques,N.array([item])]) 01014 else: # IT MUST BE A 2+D ARRAY 01015 if inarray.dtype.char != 'O': # not an Object array 01016 for item in inarray[1:]: 01017 if not N.sum(N.alltrue(N.equal(uniques,item),1)): 01018 try: 01019 uniques = N.concatenate( [uniques,item[N.newaxis,:]] ) 01020 except TypeError: # the item to add isn't a list 01021 uniques = N.concatenate([uniques,N.array([item])]) 01022 else: 01023 pass # this item is already in the uniques array 01024 else: # must be an Object array, alltrue/equal functions don't work 01025 for item in inarray[1:]: 01026 newflag = 1 01027 for unq in uniques: # NOTE: cmp --> 0=same, -1=<, 1=> 01028 test = N.sum(abs(N.array(map(cmp,item,unq)))) 01029 if test == 0: # if item identical to any 1 row in uniques 01030 newflag = 0 # then not a novel item to add 01031 break 01032 if newflag == 1: 01033 try: 01034 uniques = N.concatenate( [uniques,item[N.newaxis,:]] ) 01035 except TypeError: # the item to add isn't a list 01036 uniques = N.concatenate([uniques,N.array([item])]) 01037 return uniques 01038 01039 01040 def aduplicates(inarray): 01041 """ 01042 Returns duplicate items in the FIRST dimension of the passed array. Only 01043 works on arrays NOT including string items. 01044 01045 Usage: aunique (inarray) 01046 """ 01047 inarray = N.array(inarray) 01048 if len(inarray.shape) == 1: # IF IT'S A 1D ARRAY 01049 dups = [] 01050 inarray = inarray.tolist() 01051 for i in range(len(inarray)): 01052 if inarray[i] in inarray[i+1:]: 01053 dups.append(inarray[i]) 01054 dups = aunique(dups) 01055 else: # IT MUST BE A 2+D ARRAY 01056 dups = [] 01057 aslist = inarray.tolist() 01058 for i in range(len(aslist)): 01059 if aslist[i] in aslist[i+1:]: 01060 dups.append(aslist[i]) 01061 dups = unique(dups) 01062 dups = N.array(dups) 01063 return dups 01064 01065 except ImportError: # IF NUMERIC ISN'T AVAILABLE, SKIP ALL arrayfuncs 01066 pass