00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030 from classifier import classifier
00031
00032 import opencv as cv
00033 import hrl_lib.util as ut
00034 import numpy as np
00035 from hrl_lib.util import getTime
00036 import os
00037
00038 import processor
00039
00040 import ransac
00041
00042
00043
00044 class boosted_tree_classifier(classifier) :
00045 '''
00046 classdocs
00047 '''
00048
00049 cv_classifier = None
00050
00051
00052
00053
00054
00055
00056
00057 def create_train_datastructures(self):
00058
00059 self.processor.scan_dataset = self.processor.scans_database.get_dataset(0)
00060
00061 training_set_size = 0
00062
00063 data = []
00064
00065 while False != self.processor.scan_dataset:
00066 if self.processor.scan_dataset.is_training_set:
00067
00068 filename = self.processor.get_features_filename(True)
00069 print 'loading', filename
00070 dict = ut.load_pickle(filename)
00071
00072
00073 difference = np.sum(dict['labels'] == processor.LABEL_SURFACE) - np.sum(dict['labels'] == processor.LABEL_CLUTTER)
00074
00075
00076
00077
00078
00079 if difference > 0:
00080 clutter_features = (dict['features'])[np.nonzero(dict['labels'] == processor.LABEL_CLUTTER)]
00081 if len(clutter_features) > 0:
00082 dict['set_size'] += difference
00083 dict['features'] = np.vstack((dict['features'], clutter_features[np.random.randint(0,len(clutter_features),size=difference)]))
00084 dict['labels'] = np.hstack((dict['labels'], np.ones(difference) * processor.LABEL_CLUTTER))
00085 elif difference < 0:
00086 surface_features = (dict['features'])[np.nonzero(dict['labels'] == processor.LABEL_SURFACE)]
00087 if len(surface_features) > 0:
00088 difference = -difference
00089 dict['set_size'] += difference
00090 dict['features'] = np.vstack((dict['features'], surface_features[np.random.randint(0,len(surface_features),size=difference)]))
00091 dict['labels'] = np.hstack((dict['labels'], np.ones(difference) * processor.LABEL_SURFACE))
00092
00093 training_set_size += dict['set_size']
00094 data.append(dict)
00095
00096 self.processor.scan_dataset = self.processor.scans_database.get_next_dataset()
00097
00098
00099
00100 self.processor.scan_dataset = self.processor.scans_database.get_dataset(0)
00101 current_training_set_index = 0
00102
00103
00104 feature_vector_length = len(self.processor.features.get_indexvector(self.features))
00105 print getTime(), feature_vector_length
00106
00107 print getTime(), '#training set size ', training_set_size
00108
00109
00110 max_traning_size = 1800000
00111
00112 if True:
00113 train_data = cv.cvCreateMat(training_set_size,feature_vector_length,cv.CV_32FC1)
00114 train_labels = cv.cvCreateMat(training_set_size,1,cv.CV_32FC1)
00115
00116 for dict in data:
00117 for index in range(dict['set_size']):
00118
00119 if dict['labels'][index] == processor.LABEL_SURFACE or dict['labels'][index]== processor.LABEL_CLUTTER:
00120
00121
00122
00123
00124 fv = (dict['features'][index])[self.processor.features.get_indexvector(self.features)]
00125
00126
00127
00128 for fv_index, fv_value in enumerate(fv):
00129 train_data[current_training_set_index][fv_index] = fv_value
00130 train_labels[current_training_set_index] = dict['labels'][index]
00131
00132
00133
00134
00135
00136 current_training_set_index = current_training_set_index + 1
00137
00138
00139
00140 if current_training_set_index % 16384 == 0:
00141 print getTime(), 'reading features:', current_training_set_index, 'of', training_set_size, '(',(float(current_training_set_index)/float(training_set_size)*100.0),'%)'
00142
00143 else:
00144 print getTime(), 'more than',max_traning_size,'features, sample from them...'
00145
00146 all_data = []
00147 all_labels = []
00148 for dict in data:
00149 for index in range(dict['set_size']):
00150 if dict['labels'][index] == processor.LABEL_SURFACE or dict['labels'][index]== processor.LABEL_CLUTTER:
00151 fv = (dict['features'][index])[self.processor.features.get_indexvector(self.features)]
00152 all_data += [fv]
00153 all_labels += [dict['labels'][index]]
00154
00155 current_training_set_index = current_training_set_index + 1
00156 if current_training_set_index % 16384 == 0:
00157 print getTime(), 'reading features:', current_training_set_index, 'of', training_set_size, '(',(float(current_training_set_index)/float(training_set_size)*100.0),'%)'
00158
00159 del data
00160 import random
00161 indices = np.array(random.sample(xrange(len(all_labels)),max_traning_size))
00162 all_data = np.asarray(all_data)
00163 all_labels = np.asarray(all_labels)
00164
00165 all_data = all_data[indices]
00166 all_labels = all_labels[indices]
00167
00168 train_data = cv.cvCreateMat(max_traning_size,feature_vector_length,cv.CV_32FC1)
00169 train_labels = cv.cvCreateMat(max_traning_size,1,cv.CV_32FC1)
00170
00171 for index in range(max_traning_size):
00172 for fv_index, fv_value in enumerate(all_data[index]):
00173 train_data[index][fv_index] = fv_value
00174 train_labels[index] = all_labels[index]
00175 if index % 16384 == 0:
00176 print getTime(), 'setting features:', (float(index)/float(max_traning_size))
00177
00178
00179 print getTime(), 'start training Classifier'
00180
00181 type_mask = cv.cvCreateMat(1, feature_vector_length+1, cv.CV_8UC1)
00182 cv.cvSet( type_mask, cv.CV_VAR_NUMERICAL, 0)
00183 type_mask[feature_vector_length] = cv.CV_VAR_CATEGORICAL
00184
00185 return (train_data, train_labels, type_mask)
00186
00187
00188
00189 def train(self):
00190
00191
00192
00193
00194
00195
00196
00197
00198 self.cv_classifier = cv.CvDTree()
00199 train_datastructures = self.create_train_datastructures()
00200
00201 (train_data, train_labels, type_mask) = train_datastructures
00202 print 'WARNING! use CvDTree (single decision trees) for now as load/save works!'
00203 print getTime(), self.cv_classifier.train(train_data, cv.CV_ROW_SAMPLE, train_labels, None, None, type_mask )
00204
00205 print getTime(), 'traning finished'
00206
00207
00208
00209
00210 def release_train_datastructures(self, train_datastructures):
00211 if None != train_datastructures:
00212 (train_data, train_labels, type_mask) = train_datastructures
00213 cv.cvReleaseMat(train_data)
00214 cv.cvReleaseMat(train_labels)
00215 cv.cvReleaseMat(type_mask)
00216
00217
00218 def test(self, feature_data = None):
00219
00220 print getTime(), 'test on:', self.processor.scan_dataset.id
00221
00222 if feature_data == None:
00223 filename = self.processor.get_features_filename()
00224 print 'loading', filename
00225 dict = ut.load_pickle(filename)
00226 else:
00227 dict = feature_data
00228
00229
00230 current_set_size = dict['set_size']
00231 feature_vector_length = len(self.processor.features.get_indexvector(self.features))
00232 print getTime(), feature_vector_length
00233 labels = np.array(np.zeros(len(self.processor.map_polys)))
00234 print 'test: length of labels vector:', len(labels)
00235 test = cv.cvCreateMat(1,feature_vector_length,cv.CV_32FC1)
00236
00237 if current_set_size == 0:
00238 print getTime(), 'ERROR: test dataset is empty!'
00239 return labels, 1, 1, 1
00240
00241 count = 0
00242 for index in dict['point_indices']:
00243 fv = (dict['features'][count])[self.processor.features.get_indexvector(self.features)]
00244
00245
00246 for fv_index, fv_value in enumerate(fv):
00247 test[fv_index] = fv_value
00248
00249
00250 label = self.cv_classifier.predict(test)
00251
00252 labels[index] = label.value
00253
00254 if count % 4096 == 0:
00255 print getTime(), 'testing:', count, 'of', current_set_size, '(',(float(count)/float(current_set_size)*100.0),'%)'
00256
00257 count += 1
00258
00259
00260
00261 self.test_feature_dict = dict
00262 self.test_labels = labels
00263
00264 return labels, self.test_results(dict, labels)
00265
00266
00267
00268 def test_postprocess(self):
00269 labels = self.postprocess(self.test_labels)
00270 return labels, self.test_results(self.test_feature_dict, labels)
00271
00272 def postprocess(self, labels):
00273
00274 debug = False
00275 model = ransac.PlaneLeastSquaresModel(debug)
00276 data_idx = np.where(np.asarray(labels) == processor.LABEL_SURFACE)[0]
00277 data = np.asarray(self.processor.pts3d_bound).T[data_idx]
00278 n, _ = np.shape(data)
00279 if n < 5000:
00280 k = 700
00281 else:
00282 k = 2000
00283
00284 ransac_fit, ransac_data = ransac.ransac(data,model,
00285 3, k, 0.04, len(data_idx)/2.5,
00286 debug=debug,return_all=True)
00287 print 'ransac: model',ransac_fit
00288 print 'ransac:',ransac_data
00289 print 'len inlier',len(ransac_data['inliers']),'shape pts',np.shape(self.processor.pts3d_bound)
00290
00291
00292 fancy = np.zeros(len(np.asarray(labels))).astype(bool)
00293 fancy[data_idx] = True
00294 fancy[data_idx[ransac_data['inliers']]] = False
00295 labels[fancy] = processor.LABEL_CLUTTER
00296
00297
00298
00299
00300
00301
00302
00303
00304 return labels
00305
00306 def save(self):
00307 classifier_filename = self.get_filename()
00308
00309
00310 if False == os.path.isfile(classifier_filename):
00311 open(classifier_filename,'w')
00312 self.cv_classifier.save(classifier_filename)
00313
00314
00315 def load(self):
00316 self.cv_classifier = cv.CvDTree()
00317 print getTime(), 'loading Classifier',self.features
00318 self.cv_classifier.load(self.get_filename())
00319
00320