pstat.py
Go to the documentation of this file.
1 # Copyright (c) 1999-2007 Gary Strangman; All Rights Reserved.
2 #
3 # Permission is hereby granted, free of charge, to any person obtaining a copy
4 # of this software and associated documentation files (the "Software"), to deal
5 # in the Software without restriction, including without limitation the rights
6 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 # copies of the Software, and to permit persons to whom the Software is
8 # furnished to do so, subject to the following conditions:
9 #
10 # The above copyright notice and this permission notice shall be included in
11 # all copies or substantial portions of the Software.
12 #
13 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 # THE SOFTWARE.
20 #
21 # Comments and/or additions are welcome (send e-mail to:
22 # strang@nmr.mgh.harvard.edu).
23 #
24 """
25 pstat.py module
26 
27 #################################################
28 ####### Written by: Gary Strangman ###########
29 ####### Last modified: Dec 18, 2007 ###########
30 #################################################
31 
32 This module provides some useful list and array manipulation routines
33 modeled after those found in the |Stat package by Gary Perlman, plus a
34 number of other useful list/file manipulation functions. The list-based
35 functions include:
36 
37  abut (source,*args)
38  simpleabut (source, addon)
39  colex (listoflists,cnums)
40  collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
41  dm (listoflists,criterion)
42  flat (l)
43  linexand (listoflists,columnlist,valuelist)
44  linexor (listoflists,columnlist,valuelist)
45  linedelimited (inlist,delimiter)
46  lineincols (inlist,colsize)
47  lineincustcols (inlist,colsizes)
48  list2string (inlist)
49  makelol(inlist)
50  makestr(x)
51  printcc (lst,extra=2)
52  printincols (listoflists,colsize)
53  pl (listoflists)
54  printl(listoflists)
55  replace (lst,oldval,newval)
56  recode (inlist,listmap,cols='all')
57  remap (listoflists,criterion)
58  roundlist (inlist,num_digits_to_round_floats_to)
59  sortby(listoflists,sortcols)
60  unique (inlist)
61  duplicates(inlist)
62  writedelimited (listoflists, delimiter, file, writetype='w')
63 
64 Some of these functions have alternate versions which are defined only if
65 Numeric (NumPy) can be imported. These functions are generally named as
66 above, with an 'a' prefix.
67 
68  aabut (source, *args)
69  acolex (a,indices,axis=1)
70  acollapse (a,keepcols,collapsecols,sterr=0,ns=0)
71  adm (a,criterion)
72  alinexand (a,columnlist,valuelist)
73  alinexor (a,columnlist,valuelist)
74  areplace (a,oldval,newval)
75  arecode (a,listmap,col='all')
76  arowcompare (row1, row2)
77  arowsame (row1, row2)
78  asortrows(a,axis=0)
79  aunique(inarray)
80  aduplicates(inarray)
81 
82 Currently, the code is all but completely un-optimized. In many cases, the
83 array versions of functions amount simply to aliases to built-in array
84 functions/methods. Their inclusion here is for function name consistency.
85 """
86 
87 ## CHANGE LOG:
88 ## ==========
89 ## 07-11-26 ... edited to work with numpy
90 ## 01-11-15 ... changed list2string() to accept a delimiter
91 ## 01-06-29 ... converted exec()'s to eval()'s to make compatible with Py2.1
92 ## 01-05-31 ... added duplicates() and aduplicates() functions
93 ## 00-12-28 ... license made GPL, docstring and import requirements
94 ## 99-11-01 ... changed version to 0.3
95 ## 99-08-30 ... removed get, getstrings, put, aget, aput (into io.py)
96 ## 03/27/99 ... added areplace function, made replace fcn recursive
97 ## 12/31/98 ... added writefc function for ouput to fixed column sizes
98 ## 12/07/98 ... fixed import problem (failed on collapse() fcn)
99 ## added __version__ variable (now 0.2)
100 ## 12/05/98 ... updated doc-strings
101 ## added features to collapse() function
102 ## added flat() function for lists
103 ## fixed a broken asortrows()
104 ## 11/16/98 ... fixed minor bug in aput for 1D arrays
105 
107 
108 import stats # required 3rd party module
109 import string, copy
110 from types import *
111 
112 __version__ = 0.4
113 
114 ###=========================== LIST FUNCTIONS ==========================
115 ###
116 ### Here are the list functions, DEFINED FOR ALL SYSTEMS.
117 ### Array functions (for NumPy-enabled computers) appear below.
118 ###
119 
120 def abut (source,*args):
121  """
122 Like the |Stat abut command. It concatenates two lists side-by-side
123 and returns the result. '2D' lists are also accomodated for either argument
124 (source or addon). CAUTION: If one list is shorter, it will be repeated
125 until it is as long as the longest list. If this behavior is not desired,
126 use pstat.simpleabut().
127 
128 Usage: abut(source, args) where args=any # of lists
129 Returns: a list of lists as long as the LONGEST list past, source on the
130  'left', lists in <args> attached consecutively on the 'right'
131 """
132 
133  if type(source) not in [ListType,TupleType]:
134  source = [source]
135  for addon in args:
136  if type(addon) not in [ListType,TupleType]:
137  addon = [addon]
138  if len(addon) < len(source): # is source list longer?
139  if len(source) % len(addon) == 0: # are they integer multiples?
140  repeats = len(source)/len(addon) # repeat addon n times
141  origadd = copy.deepcopy(addon)
142  for i in range(repeats-1):
143  addon = addon + origadd
144  else:
145  repeats = len(source)/len(addon)+1 # repeat addon x times,
146  origadd = copy.deepcopy(addon) # x is NOT an integer
147  for i in range(repeats-1):
148  addon = addon + origadd
149  addon = addon[0:len(source)]
150  elif len(source) < len(addon): # is addon list longer?
151  if len(addon) % len(source) == 0: # are they integer multiples?
152  repeats = len(addon)/len(source) # repeat source n times
153  origsour = copy.deepcopy(source)
154  for i in range(repeats-1):
155  source = source + origsour
156  else:
157  repeats = len(addon)/len(source)+1 # repeat source x times,
158  origsour = copy.deepcopy(source) # x is NOT an integer
159  for i in range(repeats-1):
160  source = source + origsour
161  source = source[0:len(addon)]
162 
163  source = simpleabut(source,addon)
164  return source
165 
166 
167 def simpleabut (source, addon):
168  """
169 Concatenates two lists as columns and returns the result. '2D' lists
170 are also accomodated for either argument (source or addon). This DOES NOT
171 repeat either list to make the 2 lists of equal length. Beware of list pairs
172 with different lengths ... the resulting list will be the length of the
173 FIRST list passed.
174 
175 Usage: simpleabut(source,addon) where source, addon=list (or list-of-lists)
176 Returns: a list of lists as long as source, with source on the 'left' and
177  addon on the 'right'
178 """
179  if type(source) not in [ListType,TupleType]:
180  source = [source]
181  if type(addon) not in [ListType,TupleType]:
182  addon = [addon]
183  minlen = min(len(source),len(addon))
184  list = copy.deepcopy(source) # start abut process
185  if type(source[0]) not in [ListType,TupleType]:
186  if type(addon[0]) not in [ListType,TupleType]:
187  for i in range(minlen):
188  list[i] = [source[i]] + [addon[i]] # source/addon = column
189  else:
190  for i in range(minlen):
191  list[i] = [source[i]] + addon[i] # addon=list-of-lists
192  else:
193  if type(addon[0]) not in [ListType,TupleType]:
194  for i in range(minlen):
195  list[i] = source[i] + [addon[i]] # source=list-of-lists
196  else:
197  for i in range(minlen):
198  list[i] = source[i] + addon[i] # source/addon = list-of-lists
199  source = list
200  return source
201 
202 
203 def colex (listoflists,cnums):
204  """
205 Extracts from listoflists the columns specified in the list 'cnums'
206 (cnums can be an integer, a sequence of integers, or a string-expression that
207 corresponds to a slice operation on the variable x ... e.g., 'x[3:]' will colex
208 columns 3 onward from the listoflists).
209 
210 Usage: colex (listoflists,cnums)
211 Returns: a list-of-lists corresponding to the columns from listoflists
212  specified by cnums, in the order the column numbers appear in cnums
213 """
214  global index
215  column = 0
216  if type(cnums) in [ListType,TupleType]: # if multiple columns to get
217  index = cnums[0]
218  column = map(lambda x: x[index], listoflists)
219  for col in cnums[1:]:
220  index = col
221  column = abut(column,map(lambda x: x[index], listoflists))
222  elif type(cnums) == StringType: # if an 'x[3:]' type expr.
223  evalstring = 'map(lambda x: x'+cnums+', listoflists)'
224  column = eval(evalstring)
225  else: # else it's just 1 col to get
226  index = cnums
227  column = map(lambda x: x[index], listoflists)
228  return column
229 
230 
231 def collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None):
232  """
233 Averages data in collapsecol, keeping all unique items in keepcols
234 (using unique, which keeps unique LISTS of column numbers), retaining the
235 unique sets of values in keepcols, the mean for each. Setting fcn1
236 and/or fcn2 to point to a function rather than None (e.g., stats.sterr, len)
237 will append those results (e.g., the sterr, N) after each calculated mean.
238 cfcn is the collapse function to apply (defaults to mean, defined here in the
239 pstat module to avoid circular imports with stats.py, but harmonicmean or
240 others could be passed).
241 
242 Usage: collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
243 Returns: a list of lists with all unique permutations of entries appearing in
244  columns ("conditions") specified by keepcols, abutted with the result of
245  cfcn (if cfcn=None, defaults to the mean) of each column specified by
246  collapsecols.
247 """
248  def collmean (inlist):
249  s = 0
250  for item in inlist:
251  s = s + item
252  return s/float(len(inlist))
253 
254  if type(keepcols) not in [ListType,TupleType]:
255  keepcols = [keepcols]
256  if type(collapsecols) not in [ListType,TupleType]:
257  collapsecols = [collapsecols]
258  if cfcn == None:
259  cfcn = collmean
260  if keepcols == []:
261  means = [0]*len(collapsecols)
262  for i in range(len(collapsecols)):
263  avgcol = colex(listoflists,collapsecols[i])
264  means[i] = cfcn(avgcol)
265  if fcn1:
266  try:
267  test = fcn1(avgcol)
268  except:
269  test = 'N/A'
270  means[i] = [means[i], test]
271  if fcn2:
272  try:
273  test = fcn2(avgcol)
274  except:
275  test = 'N/A'
276  try:
277  means[i] = means[i] + [len(avgcol)]
278  except TypeError:
279  means[i] = [means[i],len(avgcol)]
280  return means
281  else:
282  values = colex(listoflists,keepcols)
283  uniques = unique(values)
284  uniques.sort()
285  newlist = []
286  if type(keepcols) not in [ListType,TupleType]: keepcols = [keepcols]
287  for item in uniques:
288  if type(item) not in [ListType,TupleType]: item =[item]
289  tmprows = linexand(listoflists,keepcols,item)
290  for col in collapsecols:
291  avgcol = colex(tmprows,col)
292  item.append(cfcn(avgcol))
293  if fcn1 <> None:
294  try:
295  test = fcn1(avgcol)
296  except:
297  test = 'N/A'
298  item.append(test)
299  if fcn2 <> None:
300  try:
301  test = fcn2(avgcol)
302  except:
303  test = 'N/A'
304  item.append(test)
305  newlist.append(item)
306  return newlist
307 
308 
309 def dm (listoflists,criterion):
310  """
311 Returns rows from the passed list of lists that meet the criteria in
312 the passed criterion expression (a string as a function of x; e.g., 'x[3]>=9'
313 will return all rows where the 4th column>=9 and "x[2]=='N'" will return rows
314 with column 2 equal to the string 'N').
315 
316 Usage: dm (listoflists, criterion)
317 Returns: rows from listoflists that meet the specified criterion.
318 """
319  function = 'filter(lambda x: '+criterion+',listoflists)'
320  lines = eval(function)
321  return lines
322 
323 
324 def flat(l):
325  """
326 Returns the flattened version of a '2D' list. List-correlate to the a.ravel()()
327 method of NumPy arrays.
328 
329 Usage: flat(l)
330 """
331  newl = []
332  for i in range(len(l)):
333  for j in range(len(l[i])):
334  newl.append(l[i][j])
335  return newl
336 
337 
338 def linexand (listoflists,columnlist,valuelist):
339  """
340 Returns the rows of a list of lists where col (from columnlist) = val
341 (from valuelist) for EVERY pair of values (columnlist[i],valuelists[i]).
342 len(columnlist) must equal len(valuelist).
343 
344 Usage: linexand (listoflists,columnlist,valuelist)
345 Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ALL i
346 """
347  if type(columnlist) not in [ListType,TupleType]:
348  columnlist = [columnlist]
349  if type(valuelist) not in [ListType,TupleType]:
350  valuelist = [valuelist]
351  criterion = ''
352  for i in range(len(columnlist)):
353  if type(valuelist[i])==StringType:
354  critval = '\'' + valuelist[i] + '\''
355  else:
356  critval = str(valuelist[i])
357  criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' and'
358  criterion = criterion[0:-3] # remove the "and" after the last crit
359  function = 'filter(lambda x: '+criterion+',listoflists)'
360  lines = eval(function)
361  return lines
362 
363 
364 def linexor (listoflists,columnlist,valuelist):
365  """
366 Returns the rows of a list of lists where col (from columnlist) = val
367 (from valuelist) for ANY pair of values (colunmlist[i],valuelist[i[).
368 One value is required for each column in columnlist. If only one value
369 exists for columnlist but multiple values appear in valuelist, the
370 valuelist values are all assumed to pertain to the same column.
371 
372 Usage: linexor (listoflists,columnlist,valuelist)
373 Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ANY i
374 """
375  if type(columnlist) not in [ListType,TupleType]:
376  columnlist = [columnlist]
377  if type(valuelist) not in [ListType,TupleType]:
378  valuelist = [valuelist]
379  criterion = ''
380  if len(columnlist) == 1 and len(valuelist) > 1:
381  columnlist = columnlist*len(valuelist)
382  for i in range(len(columnlist)): # build an exec string
383  if type(valuelist[i])==StringType:
384  critval = '\'' + valuelist[i] + '\''
385  else:
386  critval = str(valuelist[i])
387  criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' or'
388  criterion = criterion[0:-2] # remove the "or" after the last crit
389  function = 'filter(lambda x: '+criterion+',listoflists)'
390  lines = eval(function)
391  return lines
392 
393 
394 def linedelimited (inlist,delimiter):
395  """
396 Returns a string composed of elements in inlist, with each element
397 separated by 'delimiter.' Used by function writedelimited. Use '\t'
398 for tab-delimiting.
399 
400 Usage: linedelimited (inlist,delimiter)
401 """
402  outstr = ''
403  for item in inlist:
404  if type(item) <> StringType:
405  item = str(item)
406  outstr = outstr + item + delimiter
407  outstr = outstr[0:-1]
408  return outstr
409 
410 
411 def lineincols (inlist,colsize):
412  """
413 Returns a string composed of elements in inlist, with each element
414 right-aligned in columns of (fixed) colsize.
415 
416 Usage: lineincols (inlist,colsize) where colsize is an integer
417 """
418  outstr = ''
419  for item in inlist:
420  if type(item) <> StringType:
421  item = str(item)
422  size = len(item)
423  if size <= colsize:
424  for i in range(colsize-size):
425  outstr = outstr + ' '
426  outstr = outstr + item
427  else:
428  outstr = outstr + item[0:colsize+1]
429  return outstr
430 
431 
432 def lineincustcols (inlist,colsizes):
433  """
434 Returns a string composed of elements in inlist, with each element
435 right-aligned in a column of width specified by a sequence colsizes. The
436 length of colsizes must be greater than or equal to the number of columns
437 in inlist.
438 
439 Usage: lineincustcols (inlist,colsizes)
440 Returns: formatted string created from inlist
441 """
442  outstr = ''
443  for i in range(len(inlist)):
444  if type(inlist[i]) <> StringType:
445  item = str(inlist[i])
446  else:
447  item = inlist[i]
448  size = len(item)
449  if size <= colsizes[i]:
450  for j in range(colsizes[i]-size):
451  outstr = outstr + ' '
452  outstr = outstr + item
453  else:
454  outstr = outstr + item[0:colsizes[i]+1]
455  return outstr
456 
457 
458 def list2string (inlist,delimit=' '):
459  """
460 Converts a 1D list to a single long string for file output, using
461 the string.join function.
462 
463 Usage: list2string (inlist,delimit=' ')
464 Returns: the string created from inlist
465 """
466  stringlist = map(makestr,inlist)
467  return string.join(stringlist,delimit)
468 
469 
470 def makelol(inlist):
471  """
472 Converts a 1D list to a 2D list (i.e., a list-of-lists). Useful when you
473 want to use put() to write a 1D list one item per line in the file.
474 
475 Usage: makelol(inlist)
476 Returns: if l = [1,2,'hi'] then returns [[1],[2],['hi']] etc.
477 """
478  x = []
479  for item in inlist:
480  x.append([item])
481  return x
482 
483 
484 def makestr (x):
485  if type(x) <> StringType:
486  x = str(x)
487  return x
488 
489 
490 def printcc (lst,extra=2):
491  """
492 Prints a list of lists in columns, customized by the max size of items
493 within the columns (max size of items in col, plus 'extra' number of spaces).
494 Use 'dashes' or '\\n' in the list-of-lists to print dashes or blank lines,
495 respectively.
496 
497 Usage: printcc (lst,extra=2)
498 Returns: None
499 """
500  if type(lst[0]) not in [ListType,TupleType]:
501  lst = [lst]
502  rowstokill = []
503  list2print = copy.deepcopy(lst)
504  for i in range(len(lst)):
505  if lst[i] == ['\n'] or lst[i]=='\n' or lst[i]=='dashes' or lst[i]=='' or lst[i]==['']:
506  rowstokill = rowstokill + [i]
507  rowstokill.reverse() # delete blank rows from the end
508  for row in rowstokill:
509  del list2print[row]
510  maxsize = [0]*len(list2print[0])
511  for col in range(len(list2print[0])):
512  items = colex(list2print,col)
513  items = map(makestr,items)
514  maxsize[col] = max(map(len,items)) + extra
515  for row in lst:
516  if row == ['\n'] or row == '\n' or row == '' or row == ['']:
517  print
518  elif row == ['dashes'] or row == 'dashes':
519  dashes = [0]*len(maxsize)
520  for j in range(len(maxsize)):
521  dashes[j] = '-'*(maxsize[j]-2)
522  print lineincustcols(dashes,maxsize)
523  else:
524  print lineincustcols(row,maxsize)
525  return None
526 
527 
528 def printincols (listoflists,colsize):
529  """
530 Prints a list of lists in columns of (fixed) colsize width, where
531 colsize is an integer.
532 
533 Usage: printincols (listoflists,colsize)
534 Returns: None
535 """
536  for row in listoflists:
537  print lineincols(row,colsize)
538  return None
539 
540 
541 def pl (listoflists):
542  """
543 Prints a list of lists, 1 list (row) at a time.
544 
545 Usage: pl(listoflists)
546 Returns: None
547 """
548  for row in listoflists:
549  if row[-1] == '\n':
550  print row,
551  else:
552  print row
553  return None
554 
555 
556 def printl(listoflists):
557  """Alias for pl."""
558  pl(listoflists)
559  return
560 
561 
562 def replace (inlst,oldval,newval):
563  """
564 Replaces all occurrences of 'oldval' with 'newval', recursively.
565 
566 Usage: replace (inlst,oldval,newval)
567 """
568  lst = inlst*1
569  for i in range(len(lst)):
570  if type(lst[i]) not in [ListType,TupleType]:
571  if lst[i]==oldval: lst[i]=newval
572  else:
573  lst[i] = replace(lst[i],oldval,newval)
574  return lst
575 
576 
577 def recode (inlist,listmap,cols=None):
578  """
579 Changes the values in a list to a new set of values (useful when
580 you need to recode data from (e.g.) strings to numbers. cols defaults
581 to None (meaning all columns are recoded).
582 
583 Usage: recode (inlist,listmap,cols=None) cols=recode cols, listmap=2D list
584 Returns: inlist with the appropriate values replaced with new ones
585 """
586  lst = copy.deepcopy(inlist)
587  if cols != None:
588  if type(cols) not in [ListType,TupleType]:
589  cols = [cols]
590  for col in cols:
591  for row in range(len(lst)):
592  try:
593  idx = colex(listmap,0).index(lst[row][col])
594  lst[row][col] = listmap[idx][1]
595  except ValueError:
596  pass
597  else:
598  for row in range(len(lst)):
599  for col in range(len(lst)):
600  try:
601  idx = colex(listmap,0).index(lst[row][col])
602  lst[row][col] = listmap[idx][1]
603  except ValueError:
604  pass
605  return lst
606 
607 
608 def remap (listoflists,criterion):
609  """
610 Remaps values in a given column of a 2D list (listoflists). This requires
611 a criterion as a function of 'x' so that the result of the following is
612 returned ... map(lambda x: 'criterion',listoflists).
613 
614 Usage: remap(listoflists,criterion) criterion=string
615 Returns: remapped version of listoflists
616 """
617  function = 'map(lambda x: '+criterion+',listoflists)'
618  lines = eval(function)
619  return lines
620 
621 
622 def roundlist (inlist,digits):
623  """
624 Goes through each element in a 1D or 2D inlist, and applies the following
625 function to all elements of FloatType ... round(element,digits).
626 
627 Usage: roundlist(inlist,digits)
628 Returns: list with rounded floats
629 """
630  if type(inlist[0]) in [IntType, FloatType]:
631  inlist = [inlist]
632  l = inlist*1
633  for i in range(len(l)):
634  for j in range(len(l[i])):
635  if type(l[i][j])==FloatType:
636  l[i][j] = round(l[i][j],digits)
637  return l
638 
639 
640 def sortby(listoflists,sortcols):
641  """
642 Sorts a list of lists on the column(s) specified in the sequence
643 sortcols.
644 
645 Usage: sortby(listoflists,sortcols)
646 Returns: sorted list, unchanged column ordering
647 """
648  newlist = abut(colex(listoflists,sortcols),listoflists)
649  newlist.sort()
650  try:
651  numcols = len(sortcols)
652  except TypeError:
653  numcols = 1
654  crit = '[' + str(numcols) + ':]'
655  newlist = colex(newlist,crit)
656  return newlist
657 
658 
659 def unique (inlist):
660  """
661 Returns all unique items in the passed list. If the a list-of-lists
662 is passed, unique LISTS are found (i.e., items in the first dimension are
663 compared).
664 
665 Usage: unique (inlist)
666 Returns: the unique elements (or rows) in inlist
667 """
668  uniques = []
669  for item in inlist:
670  if item not in uniques:
671  uniques.append(item)
672  return uniques
673 
674 def duplicates(inlist):
675  """
676 Returns duplicate items in the FIRST dimension of the passed list.
677 
678 Usage: duplicates (inlist)
679 """
680  dups = []
681  for i in range(len(inlist)):
682  if inlist[i] in inlist[i+1:]:
683  dups.append(inlist[i])
684  return dups
685 
686 
687 def nonrepeats(inlist):
688  """
689 Returns items that are NOT duplicated in the first dim of the passed list.
690 
691 Usage: nonrepeats (inlist)
692 """
693  nonrepeats = []
694  for i in range(len(inlist)):
695  if inlist.count(inlist[i]) == 1:
696  nonrepeats.append(inlist[i])
697  return nonrepeats
698 
699 
700 #=================== PSTAT ARRAY FUNCTIONS =====================
701 #=================== PSTAT ARRAY FUNCTIONS =====================
702 #=================== PSTAT ARRAY FUNCTIONS =====================
703 #=================== PSTAT ARRAY FUNCTIONS =====================
704 #=================== PSTAT ARRAY FUNCTIONS =====================
705 #=================== PSTAT ARRAY FUNCTIONS =====================
706 #=================== PSTAT ARRAY FUNCTIONS =====================
707 #=================== PSTAT ARRAY FUNCTIONS =====================
708 #=================== PSTAT ARRAY FUNCTIONS =====================
709 #=================== PSTAT ARRAY FUNCTIONS =====================
710 #=================== PSTAT ARRAY FUNCTIONS =====================
711 #=================== PSTAT ARRAY FUNCTIONS =====================
712 #=================== PSTAT ARRAY FUNCTIONS =====================
713 #=================== PSTAT ARRAY FUNCTIONS =====================
714 #=================== PSTAT ARRAY FUNCTIONS =====================
715 #=================== PSTAT ARRAY FUNCTIONS =====================
716 
717 try: # DEFINE THESE *ONLY* IF numpy IS AVAILABLE
718  import numpy as N
719 
720  def aabut (source, *args):
721  """
722 Like the |Stat abut command. It concatenates two arrays column-wise
723 and returns the result. CAUTION: If one array is shorter, it will be
724 repeated until it is as long as the other.
725 
726 Usage: aabut (source, args) where args=any # of arrays
727 Returns: an array as long as the LONGEST array past, source appearing on the
728  'left', arrays in <args> attached on the 'right'.
729 """
730  if len(source.shape)==1:
731  width = 1
732  source = N.resize(source,[source.shape[0],width])
733  else:
734  width = source.shape[1]
735  for addon in args:
736  if len(addon.shape)==1:
737  width = 1
738  addon = N.resize(addon,[source.shape[0],width])
739  else:
740  width = source.shape[1]
741  if len(addon) < len(source):
742  addon = N.resize(addon,[source.shape[0],addon.shape[1]])
743  elif len(source) < len(addon):
744  source = N.resize(source,[addon.shape[0],source.shape[1]])
745  source = N.concatenate((source,addon),1)
746  return source
747 
748 
749  def acolex (a,indices,axis=1):
750  """
751 Extracts specified indices (a list) from passed array, along passed
752 axis (column extraction is default). BEWARE: A 1D array is presumed to be a
753 column-array (and that the whole array will be returned as a column).
754 
755 Usage: acolex (a,indices,axis=1)
756 Returns: the columns of a specified by indices
757 """
758  if type(indices) not in [ListType,TupleType,N.ndarray]:
759  indices = [indices]
760  if len(N.shape(a)) == 1:
761  cols = N.resize(a,[a.shape[0],1])
762  else:
763  cols = N.take(a,indices,axis)
764  return cols
765 
766 
767  def acollapse (a,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None):
768  """
769 Averages data in collapsecol, keeping all unique items in keepcols
770 (using unique, which keeps unique LISTS of column numbers), retaining
771 the unique sets of values in keepcols, the mean for each. If stderror or
772 N of the mean are desired, set either or both parameters to 1.
773 
774 Usage: acollapse (a,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
775 Returns: unique 'conditions' specified by the contents of columns specified
776  by keepcols, abutted with the mean(s) of column(s) specified by
777  collapsecols
778 """
779  def acollmean (inarray):
780  return N.sum(N.ravel(inarray))
781 
782  if type(keepcols) not in [ListType,TupleType,N.ndarray]:
783  keepcols = [keepcols]
784  if type(collapsecols) not in [ListType,TupleType,N.ndarray]:
785  collapsecols = [collapsecols]
786 
787  if cfcn == None:
788  cfcn = acollmean
789  if keepcols == []:
790  avgcol = acolex(a,collapsecols)
791  means = N.sum(avgcol)/float(len(avgcol))
792  if fcn1<>None:
793  try:
794  test = fcn1(avgcol)
795  except:
796  test = N.array(['N/A']*len(means))
797  means = aabut(means,test)
798  if fcn2<>None:
799  try:
800  test = fcn2(avgcol)
801  except:
802  test = N.array(['N/A']*len(means))
803  means = aabut(means,test)
804  return means
805  else:
806  if type(keepcols) not in [ListType,TupleType,N.ndarray]:
807  keepcols = [keepcols]
808  values = colex(a,keepcols) # so that "item" can be appended (below)
809  uniques = unique(values) # get a LIST, so .sort keeps rows intact
810  uniques.sort()
811  newlist = []
812  for item in uniques:
813  if type(item) not in [ListType,TupleType,N.ndarray]:
814  item =[item]
815  tmprows = alinexand(a,keepcols,item)
816  for col in collapsecols:
817  avgcol = acolex(tmprows,col)
818  item.append(acollmean(avgcol))
819  if fcn1<>None:
820  try:
821  test = fcn1(avgcol)
822  except:
823  test = 'N/A'
824  item.append(test)
825  if fcn2<>None:
826  try:
827  test = fcn2(avgcol)
828  except:
829  test = 'N/A'
830  item.append(test)
831  newlist.append(item)
832  try:
833  new_a = N.array(newlist)
834  except TypeError:
835  new_a = N.array(newlist,'O')
836  return new_a
837 
838 
839  def adm (a,criterion):
840  """
841 Returns rows from the passed list of lists that meet the criteria in
842 the passed criterion expression (a string as a function of x).
843 
844 Usage: adm (a,criterion) where criterion is like 'x[2]==37'
845 """
846  function = 'filter(lambda x: '+criterion+',a)'
847  lines = eval(function)
848  try:
849  lines = N.array(lines)
850  except:
851  lines = N.array(lines,dtype='O')
852  return lines
853 
854 
855  def isstring(x):
856  if type(x)==StringType:
857  return 1
858  else:
859  return 0
860 
861 
862  def alinexand (a,columnlist,valuelist):
863  """
864 Returns the rows of an array where col (from columnlist) = val
865 (from valuelist). One value is required for each column in columnlist.
866 
867 Usage: alinexand (a,columnlist,valuelist)
868 Returns: the rows of a where columnlist[i]=valuelist[i] for ALL i
869 """
870  if type(columnlist) not in [ListType,TupleType,N.ndarray]:
871  columnlist = [columnlist]
872  if type(valuelist) not in [ListType,TupleType,N.ndarray]:
873  valuelist = [valuelist]
874  criterion = ''
875  for i in range(len(columnlist)):
876  if type(valuelist[i])==StringType:
877  critval = '\'' + valuelist[i] + '\''
878  else:
879  critval = str(valuelist[i])
880  criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' and'
881  criterion = criterion[0:-3] # remove the "and" after the last crit
882  return adm(a,criterion)
883 
884 
885  def alinexor (a,columnlist,valuelist):
886  """
887 Returns the rows of an array where col (from columnlist) = val (from
888 valuelist). One value is required for each column in columnlist.
889 The exception is if either columnlist or valuelist has only 1 value,
890 in which case that item will be expanded to match the length of the
891 other list.
892 
893 Usage: alinexor (a,columnlist,valuelist)
894 Returns: the rows of a where columnlist[i]=valuelist[i] for ANY i
895 """
896  if type(columnlist) not in [ListType,TupleType,N.ndarray]:
897  columnlist = [columnlist]
898  if type(valuelist) not in [ListType,TupleType,N.ndarray]:
899  valuelist = [valuelist]
900  criterion = ''
901  if len(columnlist) == 1 and len(valuelist) > 1:
902  columnlist = columnlist*len(valuelist)
903  elif len(valuelist) == 1 and len(columnlist) > 1:
904  valuelist = valuelist*len(columnlist)
905  for i in range(len(columnlist)):
906  if type(valuelist[i])==StringType:
907  critval = '\'' + valuelist[i] + '\''
908  else:
909  critval = str(valuelist[i])
910  criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' or'
911  criterion = criterion[0:-2] # remove the "or" after the last crit
912  return adm(a,criterion)
913 
914 
915  def areplace (a,oldval,newval):
916  """
917 Replaces all occurrences of oldval with newval in array a.
918 
919 Usage: areplace(a,oldval,newval)
920 """
921  return N.where(a==oldval,newval,a)
922 
923 
924  def arecode (a,listmap,col='all'):
925  """
926 Remaps the values in an array to a new set of values (useful when
927 you need to recode data from (e.g.) strings to numbers as most stats
928 packages require. Can work on SINGLE columns, or 'all' columns at once.
929 @@@BROKEN 2007-11-26
930 
931 Usage: arecode (a,listmap,col='all')
932 Returns: a version of array a where listmap[i][0] = (instead) listmap[i][1]
933 """
934  ashape = a.shape
935  if col == 'all':
936  work = a.ravel()
937  else:
938  work = acolex(a,col)
939  work = work.ravel()
940  for pair in listmap:
941  if type(pair[1]) == StringType or work.dtype.char=='O' or a.dtype.char=='O':
942  work = N.array(work,dtype='O')
943  a = N.array(a,dtype='O')
944  for i in range(len(work)):
945  if work[i]==pair[0]:
946  work[i] = pair[1]
947  if col == 'all':
948  return N.reshape(work,ashape)
949  else:
950  return N.concatenate([a[:,0:col],work[:,N.newaxis],a[:,col+1:]],1)
951  else: # must be a non-Object type array and replacement
952  work = N.where(work==pair[0],pair[1],work)
953  return N.concatenate([a[:,0:col],work[:,N.newaxis],a[:,col+1:]],1)
954 
955 
956  def arowcompare(row1, row2):
957  """
958 Compares two rows from an array, regardless of whether it is an
959 array of numbers or of python objects (which requires the cmp function).
960 @@@PURPOSE? 2007-11-26
961 
962 Usage: arowcompare(row1,row2)
963 Returns: an array of equal length containing 1s where the two rows had
964  identical elements and 0 otherwise
965 """
966  return
967  if row1.dtype.char=='O' or row2.dtype=='O':
968  cmpvect = N.logical_not(abs(N.array(map(cmp,row1,row2)))) # cmp fcn gives -1,0,1
969  else:
970  cmpvect = N.equal(row1,row2)
971  return cmpvect
972 
973 
974  def arowsame(row1, row2):
975  """
976 Compares two rows from an array, regardless of whether it is an
977 array of numbers or of python objects (which requires the cmp function).
978 
979 Usage: arowsame(row1,row2)
980 Returns: 1 if the two rows are identical, 0 otherwise.
981 """
982  cmpval = N.alltrue(arowcompare(row1,row2))
983  return cmpval
984 
985 
986  def asortrows(a,axis=0):
987  """
988 Sorts an array "by rows". This differs from the Numeric.sort() function,
989 which sorts elements WITHIN the given axis. Instead, this function keeps
990 the elements along the given axis intact, but shifts them 'up or down'
991 relative to one another.
992 
993 Usage: asortrows(a,axis=0)
994 Returns: sorted version of a
995 """
996  return N.sort(a,axis=axis,kind='mergesort')
997 
998 
999  def aunique(inarray):
1000  """
1001 Returns unique items in the FIRST dimension of the passed array. Only
1002 works on arrays NOT including string items.
1003 
1004 Usage: aunique (inarray)
1005 """
1006  uniques = N.array([inarray[0]])
1007  if len(uniques.shape) == 1: # IF IT'S A 1D ARRAY
1008  for item in inarray[1:]:
1009  if N.add.reduce(N.equal(uniques,item).ravel()) == 0:
1010  try:
1011  uniques = N.concatenate([uniques,N.array[N.newaxis,:]])
1012  except TypeError:
1013  uniques = N.concatenate([uniques,N.array([item])])
1014  else: # IT MUST BE A 2+D ARRAY
1015  if inarray.dtype.char != 'O': # not an Object array
1016  for item in inarray[1:]:
1017  if not N.sum(N.alltrue(N.equal(uniques,item),1)):
1018  try:
1019  uniques = N.concatenate( [uniques,item[N.newaxis,:]] )
1020  except TypeError: # the item to add isn't a list
1021  uniques = N.concatenate([uniques,N.array([item])])
1022  else:
1023  pass # this item is already in the uniques array
1024  else: # must be an Object array, alltrue/equal functions don't work
1025  for item in inarray[1:]:
1026  newflag = 1
1027  for unq in uniques: # NOTE: cmp --> 0=same, -1=<, 1=>
1028  test = N.sum(abs(N.array(map(cmp,item,unq))))
1029  if test == 0: # if item identical to any 1 row in uniques
1030  newflag = 0 # then not a novel item to add
1031  break
1032  if newflag == 1:
1033  try:
1034  uniques = N.concatenate( [uniques,item[N.newaxis,:]] )
1035  except TypeError: # the item to add isn't a list
1036  uniques = N.concatenate([uniques,N.array([item])])
1037  return uniques
1038 
1039 
1040  def aduplicates(inarray):
1041  """
1042 Returns duplicate items in the FIRST dimension of the passed array. Only
1043 works on arrays NOT including string items.
1044 
1045 Usage: aunique (inarray)
1046 """
1047  inarray = N.array(inarray)
1048  if len(inarray.shape) == 1: # IF IT'S A 1D ARRAY
1049  dups = []
1050  inarray = inarray.tolist()
1051  for i in range(len(inarray)):
1052  if inarray[i] in inarray[i+1:]:
1053  dups.append(inarray[i])
1054  dups = aunique(dups)
1055  else: # IT MUST BE A 2+D ARRAY
1056  dups = []
1057  aslist = inarray.tolist()
1058  for i in range(len(aslist)):
1059  if aslist[i] in aslist[i+1:]:
1060  dups.append(aslist[i])
1061  dups = unique(dups)
1062  dups = N.array(dups)
1063  return dups
1064 
1065 except ImportError: # IF NUMERIC ISN'T AVAILABLE, SKIP ALL arrayfuncs
1066  pass
def printl(listoflists)
Definition: pstat.py:556
def colex(listoflists, cnums)
Definition: pstat.py:203
def duplicates(inlist)
Definition: pstat.py:674
def linexor(listoflists, columnlist, valuelist)
Definition: pstat.py:364
def nonrepeats(inlist)
Definition: pstat.py:687
def remap(listoflists, criterion)
Definition: pstat.py:608
def abut(source, args)
=========================== LIST FUNCTIONS ==========================
Definition: pstat.py:120
def pl(listoflists)
Definition: pstat.py:541
def makestr(x)
Definition: pstat.py:484
def roundlist(inlist, digits)
Definition: pstat.py:622
def printcc(lst, extra=2)
Definition: pstat.py:490
def recode(inlist, listmap, cols=None)
Definition: pstat.py:577
def collapse(listoflists, keepcols, collapsecols, fcn1=None, fcn2=None, cfcn=None)
Definition: pstat.py:231
def dm(listoflists, criterion)
Definition: pstat.py:309
def list2string(inlist, delimit=' ')
Definition: pstat.py:458
def makelol(inlist)
Definition: pstat.py:470
def flat(l)
Definition: pstat.py:324
def replace(inlst, oldval, newval)
Definition: pstat.py:562
def simpleabut(source, addon)
Definition: pstat.py:167
def lineincols(inlist, colsize)
Definition: pstat.py:411
def linedelimited(inlist, delimiter)
Definition: pstat.py:394
def linexand(listoflists, columnlist, valuelist)
Definition: pstat.py:338
def sortby(listoflists, sortcols)
Definition: pstat.py:640
def printincols(listoflists, colsize)
Definition: pstat.py:528
def lineincustcols(inlist, colsizes)
Definition: pstat.py:432
def unique(inlist)
Definition: pstat.py:659


wiimote
Author(s): Andreas Paepcke, Melonee Wise, Mark Horn
autogenerated on Mon Jun 10 2019 13:42:43