hai_sandbox: success_classifier.py Source File

Go to the documentation of this file.
00001 import roslib; roslib.load_manifest('hai_sandbox')
00002 import rospy
00003 import glob
00004 import hrl_lib.rutils as ru
00005 import hrl_pr2_lib.devices as hpr2
00006 import hai_sandbox.bag_processor as bp
00007 import hrl_lib.util as ut
00008 import visualization_msgs.msg as vm
00009 import numpy as np
00010 import hrl_lib.viz as viz
00011 import hai_sandbox.dimreduce as dimreduce
00012 import sensor_msgs.msg as sm
00013 import scipy.spatial as sp
00014 import pdb
00015 import scipy.stats as kde
00016 import pr2_msgs.msg as pm
00017 import threading
00018 
00019 
00020 ##
00021 # Takes in a folder, outputs a dictionary organized by data by contact segment,
00022 # topic, list of {'left', 'right', 't'}
00023 #
00024 def success_failure_classification_preprocess(folder_name):
00025     data_dict = None
00026     #counter = 0
00027     for bag_name in glob.glob('%s/*.bag' % folder_name):
00028         print 'Loading bag %s' % bag_name
00029         topics_dict = ru.bag_sel(bag_name, 
00030                                     ['/imitate_behavior_marker',
00031                                     '/pressure/l_gripper_motor',      
00032                                     '/pressure/r_gripper_motor',
00033                                     '/accelerometer/l_gripper_motor', 
00034                                     '/accelerometer/r_gripper_motor',
00035                                     '/joint_states', 
00036                                     '/l_cart/command_pose', 
00037                                     '/r_cart/command_pose'])
00038 
00039         ##break each mat into segments based on states
00040         #/imitate_behavior_marker
00041         #topics_dict['/imitate_behavior_marker']['t']
00042         print 'There are %d marker messages.' % len(topics_dict['/imitate_behavior_marker']['msg'])
00043         time_segments = [[]]
00044         for marker_msg in topics_dict['/imitate_behavior_marker']['msg']:
00045             if len(time_segments[-1]) >= 2:
00046                 time_segments.append([])
00047             time_segments[-1].append(marker_msg.header.stamp.to_time())
00048 
00049         if data_dict == None:
00050             data_dict = {}
00051             #organize by segment, then by type, then by what that type will store
00052             for i in range(len(time_segments)):
00053                 data_dict[i] = {}
00054         
00055         ###pressure mat with times
00056         ##/pressure/l_gripper_motor      
00057         ##/pressure/r_gripper_motor
00058         for ptop in ['/pressure/l_gripper_motor', '/pressure/r_gripper_motor']:
00059             p = topics_dict[ptop]
00060             psegs = bp.segment_msgs(time_segments, p['msg'])
00061             print '>> Separated records of %s (%s) into %d segments' % (ptop, bag_name, len(psegs))
00062             #convert segments into mats
00063             for i, pseg in enumerate(psegs):
00064                 #each col is an example in left-f and right_f
00065                 left_f, right_f, ptimes = hpr2.pressure_state_to_mat(pseg)
00066                 if not data_dict[i].has_key(ptop):
00067                     data_dict[i][ptop] = []
00068                 data_dict[i][ptop].append({'left': left_f, 'right': right_f, 't': ptimes})
00069 
00070 
00071         for ptop in ['/pressure/l_gripper_motor', '/pressure/r_gripper_motor']:
00072             for k in data_dict.keys():
00073                 print '>>', k, ptop, len(data_dict[k][ptop])
00074     return data_dict
00075 
00076 def select_time(data, time_rec, time_start, time_end):
00077     #pdb.set_trace()
00078     return data[:, np.where(np.multiply(time_rec >= time_start, time_rec < time_end))[0]]
00079 
00080 def break_record_matrices_into_chunks(data_dict, segmented_matrices, \
00081         minimal_trial_lengths, chunk_times, topic):
00082     #For each state
00083     data_sets = {}
00084     for state in range(len(data_dict.keys())):
00085         data_sets[state] = {}
00086         time_starts = np.arange(0, minimal_trial_lengths[state], chunk_times[state])
00087         time_ends   = np.arange(chunk_times[state], minimal_trial_lengths[state] + chunk_times[state], chunk_times[state])
00088 
00089         #Go over all the records
00090         for record_number in range(len(data_dict[state][topic])):
00091             time_rec = segmented_matrices[record_number]['t'][state]
00092             time_rec = time_rec - time_rec[0]
00093             #Break them up into little chunks
00094             for tidx in range(len(time_starts)):
00095                 if not data_sets[state].has_key(tidx):
00096                     data_sets[state][tidx] = {}
00097                     data_sets[state][tidx]['data'] = []
00098                     data_sets[state][tidx]['time'] = [time_starts[tidx], time_ends[tidx]]
00099 
00100                 #pdb.set_trace()
00101                 data_chunk = select_time(segmented_matrices[record_number]['mat'][state], time_rec, 
00102                                             time_starts[tidx], time_ends[tidx])
00103                 data_sets[state][tidx]['data'].append(data_chunk)
00104     return data_sets
00105 
00106 
00107 class TimeSeriesVectorizer:
00108 
00109     def __init__(self):
00110         self.channels = {}
00111         self.buffer_lengths = {}
00112 
00113     def register_listener(self, vectorize_func, msg_type, topic, buffer_length_secs):
00114         self.buffer_lengths[topic] = buffer_length_secs
00115 
00116         def listener_func(msg):
00117             amat = vectorize_func(msg)
00118             t    = np.matrix([msg.header.stamp.to_time()])
00119             got_lock = False
00120             if self.channels[topic][0] == None:
00121                 self.channels[topic] = [amat, t, threading.RLock()]
00122             else:
00123                 lock = self.channels[topic][2]
00124                 lock.acquire()
00125                 got_lock = True
00126                 #print 'l locked'
00127                 new_record = [np.column_stack((self.channels[topic][0], amat)), 
00128                               np.column_stack((self.channels[topic][1], t)),
00129                               lock]
00130                 #print 'got something', new_record[0].shape
00131                 self.channels[topic] = new_record
00132                 #print 'after appending', self.channels[topic][0].shape, self.channels[topic][1].shape
00133                 #print 'time recorded is', t[0,0]
00134                 #print 'shape', self.channels[topic][0].shape
00135                 #lock.release()
00136                 #print 'l released'
00137                                         
00138             lock = self.channels[topic][2]
00139             if not got_lock:
00140                 lock.acquire()
00141             #lock.acquire()
00142             #select only messages n-seconds ago
00143             n_seconds_ago = t[0,0] - buffer_length_secs
00144             records_in_range = (np.where(self.channels[topic][1] >= n_seconds_ago)[1]).A1
00145             #print records_in_range, self.channels[topic][0].shape
00146             self.channels[topic][0] = self.channels[topic][0][:, records_in_range]
00147             self.channels[topic][1] = self.channels[topic][1][:, records_in_range]
00148             #print 'after shortening', self.channels[topic][0].shape, self.channels[topic][1].shape
00149             #print 'shape after selection...', self.channels[topic][0].shape
00150             lock.release()
00151        
00152         self.channels[topic] = [None]
00153         rospy.Subscriber(topic, msg_type, listener_func)
00154 
00155     def get_n_steps(self, topic, timestart, nsteps, wait=True): 
00156         #print 'timestart is', timestart
00157         if self.channels[topic][0] != None:
00158             if timestart < self.channels[topic][1][0,0]:
00159                 # [0,0] is the earliest, and [0,-1] is the latest
00160                 #print self.channels[topic][1][0,0], self.channels[topic][1][0,-1], self.channels[topic][1].shape
00161                 #print 'diff', self.channels[topic][1][0,0] - timestart
00162                 #print 'diff', self.channels[topic][1][0,-1] - timestart
00163                 #print timestart, self.channels[topic][1][0,0]
00164                 raise RuntimeError('timestart <= self.channels[topic][1][0,0]')
00165 
00166         r = rospy.Rate(100)
00167         selected = None
00168         while selected == None:
00169             if self.channels[topic][0] == None:
00170                 r.sleep()
00171                 continue
00172 
00173             lock = self.channels[topic][2]
00174             lock.acquire()
00175             #print 'g locked'
00176             #print self.channels[topic][0].shape
00177             record_idxs = (np.where(self.channels[topic][1] > timestart)[1]).A1
00178             #print self.channels[topic][0].shape, self.channels[topic][1].shape
00179             #print record_idxs
00180             #print record_idxs, self.channels[topic][0].shape
00181             records_from_time = self.channels[topic][0][:, record_idxs]
00182             #print 'records_from_time', records_from_time.shape, 'need', nsteps
00183             #print '>>'
00184             selected = records_from_time[:, :nsteps]
00185             lock.release()
00186             #print 'g released'
00187             #r.sleep()
00188             if selected.shape[1] < nsteps:
00189                 if not wait:
00190                     return None
00191                 else:
00192                     selected = None
00193                     r.sleep()
00194             else:
00195                 return selected
00196 
00197     def get_range(self, topic, time_start, time_end):
00198         times = self.channels[topic][1]
00199         selected = self.channels[topic][0][:, np.where((times > time_start) * (times <= time_end))]
00200         return selected
00201         
00202 
00203 #class SuccessProbabilityEstimator:
00204 class TestOnlineClassification:
00205 
00206     def __init__(self):
00207         rospy.init_node('success_classifier')
00208         self.vectorizer = TimeSeriesVectorizer()
00209         def pressure_vectorizer(pmsg):
00210             return np.row_stack((np.matrix((pmsg.l_finger_tip)).T, np.matrix((pmsg.r_finger_tip)).T))
00211         self.vectorizer.register_listener(pressure_vectorizer, pm.PressureState, '/pressure/l_gripper_motor', 15.)
00212 
00213     def start_classifying(self):
00214         #print 'start_classifying'
00215         segment_idx = 0
00216         segment_lengths = [10, 10, 10]
00217         n_segments = len(segment_lengths)
00218         for segment_idx in range(n_segments):
00219             n_steps = segment_lengths[segment_idx]
00220             #print 'getting_n_steps'
00221             selected = self.vectorizer.get_n_steps('/pressure/l_gripper_motor', \
00222                     rospy.get_rostime().to_time(), n_steps)
00223             print 'selected.shpae', selected.shape
00224             print selected
00225         print 'done!'
00226 
00227 
00228 class TimeSeriesClassifier:
00229 
00230     def __init__(self):
00231         rospy.init_node('time_series_classifier')
00232         self.vectorizer = TimeSeriesVectorizer()
00233         def pressure_vectorizer(pmsg):
00234             return np.row_stack((np.matrix((pmsg.l_finger_tip)).T, np.matrix((pmsg.r_finger_tip)).T))
00235         self.vectorizer.register_listener(pressure_vectorizer, pm.PressureState, \
00236                 '/pressure/l_gripper_motor', 15.)
00237 
00238     #TEST this when you have robot time!
00239     def run(self):
00240         #models = self.models['models']
00241         for state in range(len(self.models['models'])):
00242             for chunk_idx in range(len(self.models['models'][state])):
00243                 xmat = self.fetch_data(state, chunk_idx)
00244                 print self.probability(xmat, state, chunk_idx)
00245 
00246     def fetch_data(self, state, chunk_idx):
00247         chunk_params = self.models['chunk_params']
00248         n_steps = chunk_params['chunk_dim'][state][chunk_idx]
00249         x_mat = self.vectorizer.get_n_steps('/pressure/l_gripper_motor', \
00250                      rospy.get_rostime().to_time(), n_steps)
00251         return x_mat
00252 
00253     def probability(self, x_mat, state, chunk_idx, successp):
00254         models = self.models['models']
00255         chunk_params = self.models['chunk_params']
00256 
00257         #Subtract out the mean
00258         x_vec = np.reshape(x_mat, (x_mat.shape[0] * x_mat.shape[1], 1)) - models[state][chunk_idx]['mean']
00259         #project
00260         projected_x = np.array((models[state][chunk_idx]['project'].T * x_vec).T)
00261 
00262         succ_prob = models[state][chunk_idx]['kde'][0].evaluate(np.array(projected_x))
00263         fail_prob = models[state][chunk_idx]['kde'][1].evaluate(np.array(projected_x))
00264         succ_total = np.sum(models[state][chunk_idx]['labels']) / float(models[state][chunk_idx]['labels'].shape[1]) 
00265         fail_total = 1 - succ_total
00266 
00267         if successp:
00268             prob = (succ_prob * succ_total) / ((fail_prob*fail_total) + (succ_total*succ_prob))
00269         else:
00270             prob = (fail_prob * fail_total) / ((fail_prob*fail_total) + (succ_total*succ_prob))
00271 
00272         #print succ_prob, fail_prob, fail_total, succ_total
00273         n_steps = chunk_params['chunk_dim'][state][chunk_idx]
00274         print 'frame size %d state %d chunk %d prob %.3f' % (n_steps, state, chunk_idx, prob)
00275         return prob
00276 
00277     def save_models(self, name='timeseries_pca_model.pkl'):
00278         print 'saving models'
00279         ut.save_pickle(self.models, name)
00280 
00281     def load_models(self, name='timeseries_pca_model.pkl'):
00282         print 'loading models'
00283         self.models = ut.load_pickle(name)
00284         #print self.models.__class__, self.models.keys()
00285         models = self.models['models']
00286         for state in range(len(models.keys())):
00287             for chunk_idx in range(len(models[state])):
00288                 #print models.keys()
00289                 reduced_data = models[state][chunk_idx]['reduced'] 
00290 
00291                 #labels = np.column_stack((np.matrix(np.ones((1, num_pos))), np.matrix(np.zeros((1, num_neg)))))
00292                 success_data = reduced_data[:, (np.where(models[state][chunk_idx]['labels'] > 0))[1].A1]
00293                 failure_data = reduced_data[:, (np.where(models[state][chunk_idx]['labels'] == 0))[1].A1]
00294 
00295                 models[state][chunk_idx]['kde'] = [kde.gaussian_kde(np.array(success_data)), 
00296                                                    kde.gaussian_kde(np.array(failure_data))]
00297                 #models[state][chunk_idx]['tree'] = sp.KDTree(np.array(reduced_data.T))
00298 
00299     def create_model(self, succ_pickle, fail_pickle):
00300         print 'creating model...'
00301         topic = '/pressure/l_gripper_motor'
00302         SEGMENT_LENGTH = 1.0
00303         VARIANCE_KEEP = .7
00304 
00305         # load in pickle
00306         print 'loading pickles'
00307         successes = ut.load_pickle(succ_pickle)
00308         failures = ut.load_pickle(fail_pickle)
00309 
00310         #chop data set into little chunks
00311         # data_sets[state][chunk_idx]['data', 'time'][chunk_record]
00312         print 'preprocess pickles'
00313         success_data_sets, failure_data_sets, chunk_params = self.preprocess_pickles(successes, \
00314                 failures, topic, SEGMENT_LENGTH)
00315 
00316         # turn each set of records into a matrix
00317         combined_sets = {}
00318         for dset_name, datasets in zip(['success', 'failure'], [success_data_sets, failure_data_sets]):
00319             #merge the two matrices from mat_set
00320             mat_set = self.create_matrix_from_chunked_datasets(datasets)
00321             for state in range(len(datasets.keys())):
00322                 if not combined_sets.has_key(state):
00323                     combined_sets[state] = {}
00324                 for chunk_idx in range(len(datasets[state])):
00325                     if not combined_sets[state].has_key(chunk_idx):
00326                         combined_sets[state][chunk_idx] = {}
00327                     combined_sets[state][chunk_idx][dset_name] = mat_set[state][chunk_idx]['data']
00328                     combined_sets[state][chunk_idx]['time'] = mat_set[state][chunk_idx]['time']
00329 
00330         # run PCA over the entire set
00331         models = {}
00332         for state in range(len(combined_sets.keys())):
00333             models[state] = []
00334             for chunk_idx in range(len(combined_sets[state])):
00335                 print 'building model for state', state, 'chunk idx', chunk_idx
00336                 # pdb.set_trace()
00337                 data_chunk = np.column_stack((combined_sets[state][chunk_idx]['success'], \
00338                                               combined_sets[state][chunk_idx]['failure']))
00339                 num_pos = combined_sets[state][chunk_idx]['success'].shape[1]
00340                 num_neg = combined_sets[state][chunk_idx]['failure'].shape[1]
00341                 labels = np.column_stack((np.matrix(np.ones((1, num_pos))), np.matrix(np.zeros((1, num_neg)))))
00342 
00343                 projection_basis, dmean = dimreduce.pca_vectors(data_chunk, VARIANCE_KEEP)
00344                 print 'pca_basis: number of dimensions', projection_basis.shape[1]
00345                 reduced_data = projection_basis.T * (data_chunk-dmean)
00346                 models[state].append({'time':    combined_sets[state][chunk_idx]['time'],
00347                                       'project': projection_basis,
00348                                       'reduced': reduced_data,
00349                                       'labels':  labels,
00350                                       'mean':    dmean,
00351                                       'data':    data_chunk
00352                                       #'tree':    sp.KDTree(np.array(reduced_data.T))
00353                                       })
00354 
00355         self.models = {'models':models, 
00356                        'chunk_params': chunk_params}
00357 
00358 
00359     def create_matrix_from_chunked_datasets(self, datasets):
00360         mat_set = {}
00361         for state in range(len(datasets.keys())):
00362             mat_set[state] = {}
00363             for chunk_idx in range(len(datasets[state])):
00364                 records_l = []
00365                 for chunk_record in range(len(datasets[state][chunk_idx]['data'])):
00366                     a = datasets[state][chunk_idx]['data'][chunk_record]
00367                     records_l.append(np.reshape(a, (a.shape[0]*a.shape[1], 1)))
00368 
00369                 mat_set[state][chunk_idx] = {}
00370                 mat_set[state][chunk_idx]['data'] = np.column_stack(records_l) 
00371                 mat_set[state][chunk_idx]['time'] = datasets[state][chunk_idx]['time']
00372         return mat_set
00373 
00374     def preprocess_pickles(self, successes, failures, topic, segment_length):
00375         #Break matrices into segments based on state
00376         # list of list of 44xN matrices
00377         success_matrices_segmented_by_state = construct_list_of_segmented_matrices_from_trial_recording(successes, topic)
00378         failure_matrices_segmented_by_state = construct_list_of_segmented_matrices_from_trial_recording(failures, topic)
00379 
00380         #Calculate how long each chunk needs to be
00381         success_trial_durations = find_trial_durations(successes, topic) # trial_durations[state][trial number]
00382         failure_trial_durations = find_trial_durations(failures, topic) # trial_durations[state][trial number]
00383         #for state in range(len(times_dict.keys())):
00384         #    durations = [state_times[-1] - state_times[0] for state_times in times_dict[state]]
00385 
00386         #pdb.set_trace()
00387         minimal_trial_lengths = [np.min(np.min(success_trial_durations[state]), np.min(failure_trial_durations[state])) \
00388                 for state in range(len(success_trial_durations.keys()))]
00389         chunk_times = [length/np.floor(length/segment_length) for length in minimal_trial_lengths]
00390 
00391         #make little chunks out of the matrices of pressure readings
00392         success_data_sets = break_record_matrices_into_chunks(successes, success_matrices_segmented_by_state, \
00393                 minimal_trial_lengths, chunk_times, topic)
00394         failure_data_sets = break_record_matrices_into_chunks(failures, failure_matrices_segmented_by_state, \
00395                 minimal_trial_lengths, chunk_times, topic)
00396 
00397         #Make sure the little chunks are of the same dimension across positive and negative examples
00398         chunk_dim = {}
00399         for state in range(len(successes.keys())):
00400             chunk_dim[state] = {}
00401             for chunk_idx in range(len(success_data_sets[state])):
00402                 #figure minimum chunk lengths in array size
00403                 chunk_length_successes = [success_data_sets[state][chunk_idx]['data'][chunk_record].shape[1] \
00404                         for chunk_record in range(len(success_data_sets[state][chunk_idx]['data']))]
00405                 chunk_length_failures  = [failure_data_sets[state][chunk_idx]['data'][chunk_record].shape[1] \
00406                         for chunk_record in range(len(failure_data_sets[state][chunk_idx]['data']))]
00407                 #pdb.set_trace()
00408                 shortest_chunk_length = np.min([np.min(chunk_length_successes), np.min(chunk_length_failures)])
00409                 chunk_dim[state][chunk_idx] = shortest_chunk_length
00410                 #shorten data to shortest_chunk_length
00411                 #if state == 0 and chunk_idx == 2:
00412                 #    pdb.set_trace()
00413 
00414         for state in range(len(successes.keys())):
00415             for chunk_idx in range(len(success_data_sets[state])):                
00416                 for dataset_idx, data_sets in enumerate([success_data_sets, failure_data_sets]):
00417                     for chunk_record in range(len(data_sets[state][chunk_idx]['data'])):
00418                         shortest_chunk_length = chunk_dim[state][chunk_idx]
00419                         data_sets[state][chunk_idx]['data'][chunk_record] = \
00420                                 data_sets[state][chunk_idx]['data'][chunk_record][:,:shortest_chunk_length]
00421                         #if state == 0 and chunk_idx == 2:
00422                         #    #pdb.set_trace()
00423                         #    print data_sets[state][chunk_idx].__class__, 'len(data_sets[state][chunk_idx])', len(data_sets[state][chunk_idx]), 'dataset idx', dataset_idx, 'shortest_chunk_length', shortest_chunk_length, 'shape', data_sets[state][chunk_idx]['data'][chunk_record].shape
00424 
00425         chunk_params = {'chunk_dim': chunk_dim,
00426                         'trial_lengths': minimal_trial_lengths,
00427                         'chunk_times': chunk_times,
00428                         'topic': topic}
00429         return success_data_sets, failure_data_sets, chunk_params
00430 
00431     def preprocess_individual_pickle(self, apickle):
00432         data = ut.load_pickle(apickle)
00433         #models, chunk_params = self.models
00434         models = self.models['models']
00435         chunk_params = self.models['chunk_params']
00436         #break pickle into chunks given model.
00437         #chunks of equivalent time, chunks of equivalent dimensions
00438         data_segmented = construct_list_of_segmented_matrices_from_trial_recording(data, chunk_params['topic'])
00439 
00440         # 1) break into time chunks
00441         chunked_data = break_record_matrices_into_chunks(data, data_segmented, \
00442                 chunk_params['trial_lengths'], chunk_params['chunk_times'], chunk_params['topic'])
00443 
00444         # 2) shorten into appropriate dimensions
00445         for state in range(len(models)):
00446             for chunk_idx in range(len(models[state])):
00447                 for chunk_record in range(len(chunked_data[state][chunk_idx]['data'])):
00448                     chunk_length = chunk_params['chunk_dim'][state][chunk_idx]
00449                     chunked_data[state][chunk_idx]['data'][chunk_record] =\
00450                             chunked_data[state][chunk_idx]['data'][chunk_record][:, :chunk_length]
00451 
00452         return chunked_data
00453 
00454     def classify_pickle(self, apickle):
00455         # a pickle can have multiple records...
00456         chunked_data = self.preprocess_individual_pickle(apickle)
00457         #mat_set = self.create_matrix_from_chunked_datasets(chunked_data)
00458         models = self.models['models']
00459 
00460         total_ex = models[0][0]['labels'].shape[1]
00461         pos_ex = np.sum(models[0][0]['labels'])
00462         neg_ex = total_ex - pos_ex
00463         prior_pos = pos_ex / float(total_ex)
00464         prior_neg = neg_ex / float(total_ex)
00465 
00466         results = {}
00467         NEIGHBORS = 3
00468         for record_idx in range(len(chunked_data[0][0]['data'])):
00469             for state in range(len(models)):
00470                 if not results.has_key(state):
00471                     results[state] = {}
00472                 for chunk_idx in range(len(models[state])):
00473                     if not results[state].has_key(chunk_idx):
00474                         results[state][chunk_idx] = []
00475 
00476                     x_mat = chunked_data[state][chunk_idx]['data'][record_idx]
00477                     x_vec = np.reshape(x_mat, (x_mat.shape[0] * x_mat.shape[1], 1))
00478                     projected_x = np.array((models[state][chunk_idx]['project'].T * x_vec).T)
00479 
00480                     #match_idx = models[state][chunk_idx]['tree'].query(projected_x, NEIGHBORS)[1]
00481                     #success_density = estimate_density(models[state][chunk_idx]['prob_trees'][0], projected_x)
00482                     #failure_density = estimate_density(models[state][chunk_idx]['prob_trees'][1], projected_x)
00483                     # p(x | suc) * p (suc) / sum(), p (x | fail)
00484                     succ_prob = models[state][chunk_idx]['kde'][0].evaluate(np.array(projected_x))
00485                     fail_prob = models[state][chunk_idx]['kde'][1].evaluate(np.array(projected_x))
00486                     succ_total = np.sum(models[state][chunk_idx]['labels']) / float(models[state][chunk_idx]['labels'].shape[1])
00487                     fail_total = 1 - succ_total
00488 
00489                     prob = (succ_prob * succ_total) / ((fail_prob*fail_total) + (succ_total*succ_prob))
00490                     if np.isnan(prob):
00491                         prob = 0.
00492 
00493                     results[state][chunk_idx].append(prob > .5)
00494                     print 'record idx %d state %d chunk %d label prob %.2f success? %d' % (record_idx, state, chunk_idx, prob, prob > .5)
00495             print '============================='
00496 
00497         for state in range(len(models)):
00498             for chunk_idx in range(len(models[state])):
00499                 correct = np.sum(results[state][chunk_idx])
00500                 all_val = float(len(results[state][chunk_idx]))
00501                 print all_val
00502                 print 'state %d chunk %d results %.3f' % (state, chunk_idx, correct/all_val)
00503                 
00504 
00505 def zero_out_time_in_trials(data_dict, topic):
00506     #print 'Finding times...'
00507     #pdb.set_trace()
00508     times_dict = {} # times_dict[state][ list of durations ]
00509     for state in range(len(data_dict.keys())):
00510         times_dict[state] = []
00511         for record_number in range(len(data_dict[state][topic])):
00512             time_start = data_dict[0][topic][record_number]['t'][0]
00513             #pdb.set_trace()
00514             times_dict[state].append(data_dict[state][topic][record_number]['t'] - time_start)
00515     return times_dict
00516 
00517 def find_trial_durations(data_dict, topic):
00518     times_dict = {} # times_dict[state][ list of durations ]
00519     for state in range(len(data_dict.keys())):
00520         times_dict[state] = []
00521         for record_number in range(len(data_dict[state][topic])):
00522             time_start = data_dict[state][topic][record_number]['t'][0]
00523             time_end = data_dict[state][topic][record_number]['t'][-1]
00524             times_dict[state].append(time_end - time_start)
00525     return times_dict
00526 
00527 def construct_pressure_marker_message(data_dict, topic, base_color=np.matrix([1.,0, 0, 1.]).T):
00528     #record_number = 0
00529     STATE_SEPARATION_DIST = .4
00530     points_ll = []
00531     colors_ll = []
00532     pressures_l = []
00533    
00534     #Record the duration of each trial
00535     times_dict = zero_out_time_in_trials(data_dict, topic)
00536     #Use the durations to figure out offsets
00537     state_time_offsets = {}
00538     state_time_offsets[-1] = {'duration':0, 'offset':0}
00539     for state in range(len(times_dict.keys())):
00540         durations = [state_times[-1] - state_times[0] for state_times in times_dict[state]]
00541         duration_state = np.max(durations)
00542         state_time_offsets[state] = {'duration': duration_state,
00543                                      'offset': state_time_offsets[state-1]['offset'] + state_time_offsets[state-1]['duration'] + STATE_SEPARATION_DIST}
00544         print 'state', state, 'offset', state_time_offsets[state]['offset'], 'duration', state_time_offsets[state]['duration']
00545 
00546     #Create corrected timelines
00547     times_m_list = []
00548     for record_number in range(len(data_dict[0][topic])):
00549 
00550         #For each state figure out the time offset & store in times_l
00551         times_l = []
00552         for i in range(len(data_dict.keys())):
00553             #times_l.append(np.matrix(state_time_offsets[i]['offset'] + data_dict[i][topic][record_number]['t'] - data_dict[0][topic][record_number]['t'][0]))
00554             curr_times = data_dict[i][topic][record_number]['t']
00555             curr_times = curr_times - curr_times[0]
00556             times_l.append(np.matrix(curr_times + state_time_offsets[i]['offset']))
00557             #times_l.append(np.matrix( - data_dict[0][topic][record_number]['t'][0]))
00558         #Stack times_l to form times_m
00559         times_m = np.column_stack(times_l)
00560         times_m_list.append(times_m)
00561 
00562     
00563 
00564     print 'constructing segmented matrices...'
00565     pressure_mats = []
00566     for lp in construct_list_of_segmented_matrices_from_trial_recording(data_dict, topic):
00567         p = np.column_stack(lp) 
00568         p = p - p[:,0]
00569 
00570         pressure_mats.append(p)
00571 
00572     print 'creating colored points...'
00573     pressures, all_points, colors_mat = create_colored_3d_points_from_matrices(pressure_mats, times_m_list)
00574 
00575     print 'creating pointcloud message'
00576     #point_cloud = ru.np_to_colored_pointcloud(all_points, np.matrix(pressures) + min_pval, 'pressure_viz')
00577     point_cloud = ru.np_to_colored_pointcloud(all_points, np.matrix(pressures), 'pressure_viz')
00578     return point_cloud
00579 
00580 class DisplayDataWithRviz:
00581 
00582     def __init__(self):
00583         rospy.init_node('display_pressure_with_rviz')
00584         self.succ_marker = rospy.Publisher('succ_marker', vm.Marker)
00585         self.fail_marker = rospy.Publisher('fail_marker', vm.Marker)
00586 
00587         self.succ_pc_pub = rospy.Publisher('succ_pc', sm.PointCloud)
00588         self.fail_pc_pub = rospy.Publisher('fail_pc', sm.PointCloud)
00589 
00590     def display(self, succ_pickle, fail_pickle):
00591         # load in pickle
00592         print 'loading...'
00593         successes = ut.load_pickle(succ_pickle)
00594         failures = ut.load_pickle(fail_pickle)
00595 
00596         print 'Enter the topic number:'
00597         for i, k in enumerate(successes[0].keys()):
00598             print i, k
00599         topic = successes[0].keys()[int(raw_input())]
00600 
00601         red = np.matrix([1.,0, 0, 1.]).T
00602         green = np.matrix([0.,1., 0, 1.]).T
00603         print 'construct_pressure_marker_message(successes, topic, green)'
00604         #succ_marker, succ_pc = construct_pressure_marker_message(successes, topic, green)
00605         succ_pc = construct_pressure_marker_message(successes, topic, green)
00606         print 'construct_pressure_marker_message(failures, topic, red)'
00607         #fail_marker, fail_pc = construct_pressure_marker_message(failures, topic, red)
00608         fail_pc = construct_pressure_marker_message(failures, topic, red)
00609         print 'publishing...'
00610         r = rospy.Rate(10)
00611         while not rospy.is_shutdown():
00612             self.succ_pc_pub.publish(succ_pc)
00613             self.fail_pc_pub.publish(fail_pc)
00614             r.sleep()
00615 
00616 ##
00617 # Create matrices split based on state
00618 #
00619 # @param data_dict [state number] [topic] [trial number] ['t' 'left' 'right']
00620 # @param topic string
00621 # @return segmented_matrices[record_number][state_number]['t'] =>   1xN array          
00622 #                                                        ['mat'] => 44xN mat
00623 def construct_list_of_segmented_matrices_from_trial_recording(data_dict, topic):
00624     segmented_matrices = []
00625     for record_number in range(len(data_dict[0][topic])):
00626         segmented_matrix = []
00627         trecs = []
00628         for state in range(len(data_dict.keys())):
00629             segmented_matrix.append(np.row_stack((data_dict[state][topic][record_number]['left'], 
00630                                                   data_dict[state][topic][record_number]['right'])))
00631             trecs.append(data_dict[state][topic][record_number]['t'])
00632         segmented_matrices.append({'mat': segmented_matrix,
00633                                    't': trecs})
00634         #segmented_matrices.append(segmented_matrix)
00635     return segmented_matrices
00636 
00637 #def construct_marker_message_from_list_of_segmented_matrices(segmented_matrix, slot_number, column_index):
00638 def create_colored_3d_points_from_matrices(matrices, index_list):
00639     points3d_l = []
00640     colors_ll = []
00641     mat_l = []
00642     X_MULTIPLIER = 1/15.
00643 
00644     for i, mat in enumerate(matrices):
00645         X, Y = np.meshgrid(range(mat.shape[0]), range(mat.shape[1]))
00646         x_size = mat.shape[0] * X_MULTIPLIER
00647         X = np.matrix(X * X_MULTIPLIER) + x_size * i + (i * x_size / 3.)
00648         #Y = (np.matrix(np.ones((mat.shape[0], 1))) * times_m).T
00649         Y = (np.matrix(np.ones((mat.shape[0], 1))) * index_list[i]).T
00650         Z = np.matrix(np.zeros(mat.shape)).T
00651 
00652         points = np.row_stack((X.reshape(1, X.shape[0] * X.shape[1]),
00653                                Y.reshape(1, Y.shape[0] * Y.shape[1]),
00654                                Z.reshape(1, Z.shape[0] * Z.shape[1])))
00655         colors = np.matrix(np.zeros((4, mat.shape[0]*mat.shape[1])))
00656         mat_l.append(mat.T.reshape((1,mat.shape[1] * mat.shape[0])))
00657         points3d_l.append(points)
00658         colors_ll.append(colors)
00659 
00660     all_mats = np.column_stack(mat_l)
00661     all_points = np.column_stack(points3d_l)
00662     all_colors = np.column_stack(colors_ll)
00663     return all_mats, all_points, all_colors
00664 
00665 #data_dict [state number] [topic] [trial number] ['t' 'left' 'right']
00666 def average_reading_over_trials(data_dict, topic):
00667     #Construct list of list of matrices indexing [state][trial number] => contact information for both fingers
00668     contact_info = {}
00669     for state in data_dict.keys():
00670         contact_info[state] = []
00671         for trial in data_dict[state][topic]:
00672             contact_info[state].append(np.row_stack((trial['left'], trial['right'])))
00673     
00674     ret_dict = {}
00675     #shorten the trials to be the length of the shortest trial
00676     for state in contact_info.keys():
00677         shortest_length = np.min([trial.shape[1] for trial in contact_info[state]])
00678         trimmed_mats    = [trial[:,:shortest_length] for trial in contact_info[state]]
00679         avg_reading     = np.matrix(np.sum(np.concatenate([np.reshape(np.array(trial), (trial.shape[0], trial.shape[1], 1)) for trial in trimmed_mats], 2), 2) / len(trimmed_mats))
00680         div_point = avg_reading.shape[0]/2.
00681         assert(div_point == 22)
00682         ret_dict[state] = {topic: [{'t':     data_dict[state][topic][0]['t'][:shortest_length] ,#contact_info[state][0][:shortest_length],
00683                                     'left':  avg_reading[:div_point,:],
00684                                     'right': avg_reading[div_point:,:]}] }
00685     return ret_dict
00686 
00687 
00688 def subtract_records(recorda, recordb, topic):
00689     ret_dict = {}
00690     for state in recorda.keys():
00691         shortest_length = min(recorda[state][topic][0]['left'].shape[1], recordb[state][topic][0]['left'].shape[1])
00692         ret_dict[state] = {topic: [{
00693             't':     recorda[state][topic][0]['t'][:shortest_length],
00694             'left':  np.abs(recorda[state][topic][0]['left'][:,:shortest_length] - recordb[state][topic][0]['left'][:,:shortest_length]),
00695             'right': np.abs(recorda[state][topic][0]['right'][:,:shortest_length] - recordb[state][topic][0]['right'][:,:shortest_length])
00696             }]}
00697     return ret_dict
00698 
00699 
00700 #Debug this!
00701 class DiffDisplay:
00702 
00703     def __init__(self):
00704         rospy.init_node('diff_display')
00705         self.fail_marker = rospy.Publisher('diff_fail_avg', vm.Marker)
00706         self.fail_pc_pub = rospy.Publisher('diff_fail_pc', sm.PointCloud)
00707 
00708         self.succ_marker = rospy.Publisher('diff_succ_avg', vm.Marker)
00709         self.succ_pc_pub = rospy.Publisher('diff_succ_pc', sm.PointCloud)
00710 
00711         self.diff_marker = rospy.Publisher('diff_avg', vm.Marker)
00712         self.diff_pc_pub = rospy.Publisher('diff_pc', sm.PointCloud)
00713 
00714     def display(self, succ_pickle, fail_pickle):
00715         # load in pickle
00716         print 'loading...'
00717         successes = ut.load_pickle(succ_pickle)
00718         failures = ut.load_pickle(fail_pickle)
00719 
00720         topics = ['/pressure/l_gripper_motor', '/pressure/r_gripper_motor']
00721         topic = topics[0]
00722 
00723         red = np.matrix([1., 0, 0, 1.]).T
00724         green = np.matrix([0., 1., 0, 1.]).T
00725         blue = np.matrix([0., 0, 1., 1.]).T
00726 
00727         #data_dict [state number] [topic] [trial number] ['t' 'left' 'right']
00728         succ_avg = average_reading_over_trials(successes, topic)
00729         fail_avg = average_reading_over_trials(failures, topic)
00730         diff_avg = subtract_records(succ_avg, fail_avg, topic)
00731 
00732         succ_marker, succ_pc = construct_pressure_marker_message(succ_avg, topic, green)
00733         fail_marker, fail_pc = construct_pressure_marker_message(fail_avg, topic, red)
00734         diff_marker, diff_pc = construct_pressure_marker_message(diff_avg, topic, blue)
00735 
00736         r = rospy.Rate(10)
00737         print 'publishing...'
00738         while not rospy.is_shutdown():
00739             self.succ_marker.publish(succ_marker)
00740             self.fail_marker.publish(fail_marker)
00741 
00742             self.succ_pc_pub.publish(succ_pc)
00743             self.fail_pc_pub.publish(fail_pc)
00744 
00745             self.diff_marker.publish(diff_marker)
00746             self.diff_pc_pub.publish(diff_pc)
00747             r.sleep()
00748 
00749 
00750 if __name__ == '__main__':
00751     import sys
00752 
00753     if 'preprocess' == sys.argv[1]:
00754         print 'Loading success bags..'
00755         succ_dict = success_failure_classification_preprocess(sys.argv[2])
00756         print 'Saving success dict.'
00757         ut.save_pickle(succ_dict, '%s/success_data.pkl' % sys.argv[2])
00758 
00759         print 'Loading failure bags..'
00760         fail_dict = success_failure_classification_preprocess(sys.argv[3])
00761         print 'Saving failure dict.'
00762         ut.save_pickle(fail_dict, '%s/failure_data.pkl' % sys.argv[3])
00763 
00764         print 'Done!'
00765 
00766     if 'learn' == sys.argv[1]:
00767         classifier = TimeSeriesClassifier()
00768         classifier.create_model(sys.argv[2], sys.argv[3])
00769         classifier.save_models()
00770 
00771     if 'test' == sys.argv[1]:
00772         classifier = TimeSeriesClassifier()
00773         classifier.load_models()
00774         classifier.classify_pickle(sys.argv[2])
00775 
00776     #Debug this to display failure cases
00777     if 'display' == sys.argv[1]:
00778         d = DisplayDataWithRviz()
00779         #data_dict [state number] [topic] [trial number] ['t' 'left' 'right']
00780         d.display(sys.argv[2], sys.argv[3])
00781 
00782     #Dbug this so that it displays the average value, and the diff between the averages.
00783     if 'diff' == sys.argv[1]:
00784         d = DiffDisplay()
00785         #data_dict [state number] [topic] [trial number] ['t' 'left' 'right']
00786         d.display(sys.argv[2], sys.argv[3])
00787 
00788 
00789     if 'run' == sys.argv[1]:
00790         #t = TestOnlineClassification()
00791         #t.start_classifying()
00792         t = TimeSeriesClassifier()
00793         t.load_models()
00794         t.run()
00795 
00796 
00797 
00798 
00799 
00800 
00801 
00802 #class VectorTimeSeriesClassifier:
00803 #
00804 #    def __init__(self):
00805 #        self.data = None
00806 #        self.times = None
00807 #        self.start_time = None
00808 #
00809 #    def add_data(self, t, mat):
00810 #        if self.data == None:
00811 #            self.data = mat
00812 #            self.times = [t]
00813 #        else:
00814 #            self.data = np.column_stack((self.data, mat))
00815 #            self.times.append(t)
00816 #
00817 #    def start_collecting_data(self):
00818 #        self.start_time = rospy.get_rostime().to_time()
00819 #
00820 #    def get_data(self, start_time_in_segment):
00821 #        #option 2
00822 #        #instead of gettting n data points back, can we interpolate?
00823 #        #given start time, end time, and number of points needed
00824 #        #we can interpolate to get those n points
00825 #
00826 #        #option 1
00827 #        np.array(self.times) - rospy.get_rostime().to_time()
00828 #        current_time = rospy.get_rostime().to_time() - self.start_time
00829 #        start_time_in_segment
00830 
00831 #need to keep track of time since start 
00832 #need to put data points into windows of data
00833 
00834 #want data in consecutive matrices of the same size as what the classifier will
00835 # expect if we're still using the same
00836 # format
00837 
00838 #a)
00839 #collect messages into data
00840 #break up chunks of data but keep it as one large matrix
00841 
00842 #b)
00843 #consume data chunks as long as there is enough
00844 #get a message with a time after when we should expect messages
00845 #spit out a chunk when there are enough data points
00846 
00847 ### how do we combine this event based classifier with RL?
00848 ## This seems better
00849 # might get into dead locks?
00850 # can we run this in one process?
00851 #     rl agent executes a state
00852 #       while classifier listen for messages
00853 #     rl agent checks for state estimate.
00854 #
00855 # rl agent executes a state
00856 # waits for estimate... 
00857 # take state estimate then act.
00858