00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030 '''
00031 # Location of classifier to load or save is found by: classifier.get_filename(self):
00032 # Loc = self.processor.config.path+'/classifier_'+self.features+'_'+
00033 # self.processor.feature_type+'_k'+str(self.processor.feature_neighborhood)+
00034 # '_r'+str(self.processor.feature_radius)+'.XML'
00035 '''
00036
00037 from classifier import classifier
00038
00039 import opencv as cv
00040 import util as ut
00041 import numpy as np
00042 import os
00043
00044 import processor
00045
00046 import ransac
00047 import random
00048
00049
00050 class boosted_tree_classifier(classifier) :
00051 '''
00052 classdocs
00053 '''
00054
00055 cv_classifier = None
00056
00057
00058
00059
00060
00061
00062
00063 def create_train_datastructures(self):
00064
00065 self.processor.scan_dataset = self.processor.scans_database.get_dataset(0)
00066
00067 training_set_size = 0
00068
00069 data = []
00070
00071 while False != self.processor.scan_dataset:
00072 if self.processor.scan_dataset.is_training_set:
00073
00074 filename = self.processor.get_features_filename(True)
00075 print 'loading', filename
00076 dict = ut.load_pickle(filename)
00077
00078
00079 difference = np.sum(dict['labels'] == processor.LABEL_SURFACE) - np.sum(dict['labels'] == processor.LABEL_CLUTTER)
00080
00081
00082
00083
00084
00085 if difference > 0:
00086 clutter_features = (dict['features'])[np.nonzero(dict['labels'] == processor.LABEL_CLUTTER)]
00087 if len(clutter_features) > 0:
00088 dict['set_size'] += difference
00089 dict['features'] = np.vstack((dict['features'], clutter_features[np.random.randint(0,len(clutter_features),size=difference)]))
00090 dict['labels'] = np.hstack((dict['labels'], np.ones(difference) * processor.LABEL_CLUTTER))
00091 elif difference < 0:
00092 surface_features = (dict['features'])[np.nonzero(dict['labels'] == processor.LABEL_SURFACE)]
00093 if len(surface_features) > 0:
00094 difference = -difference
00095 dict['set_size'] += difference
00096 dict['features'] = np.vstack((dict['features'], surface_features[np.random.randint(0,len(surface_features),size=difference)]))
00097 dict['labels'] = np.hstack((dict['labels'], np.ones(difference) * processor.LABEL_SURFACE))
00098
00099 training_set_size += dict['set_size']
00100 data.append(dict)
00101
00102 self.processor.scan_dataset = self.processor.scans_database.get_next_dataset()
00103
00104
00105
00106 self.processor.scan_dataset = self.processor.scans_database.get_dataset(0)
00107 current_training_set_index = 0
00108
00109
00110 feature_vector_length = len(self.processor.features.get_indexvector(self.features))
00111 print ut.getTime(), feature_vector_length
00112
00113 print ut.getTime(), '#training set size ', training_set_size
00114
00115
00116 max_traning_size = 1800000
00117
00118
00119 train_data = cv.cvCreateMat(training_set_size,feature_vector_length,cv.CV_32FC1)
00120 train_labels = cv.cvCreateMat(training_set_size,1,cv.CV_32FC1)
00121
00122 for dict in data:
00123 for index in range(dict['set_size']):
00124
00125 if dict['labels'][index] == processor.LABEL_SURFACE or dict['labels'][index]== processor.LABEL_CLUTTER:
00126
00127
00128
00129
00130 fv = (dict['features'][index])[self.processor.features.get_indexvector(self.features)]
00131
00132
00133
00134 for fv_index, fv_value in enumerate(fv):
00135 train_data[current_training_set_index][fv_index] = fv_value
00136 train_labels[current_training_set_index] = dict['labels'][index]
00137
00138
00139
00140
00141
00142 current_training_set_index = current_training_set_index + 1
00143
00144
00145
00146 if current_training_set_index % 16384 == 0:
00147 print ut.getTime(), 'reading features:', current_training_set_index, 'of', training_set_size, '(',(float(current_training_set_index)/float(training_set_size)*100.0),'%)'
00148
00149
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159
00160
00161
00162
00163
00164
00165
00166
00167
00168
00169
00170
00171
00172
00173
00174
00175
00176
00177
00178
00179
00180
00181
00182
00183
00184 print ut.getTime(), 'start training Classifier'
00185
00186 type_mask = cv.cvCreateMat(1, feature_vector_length+1, cv.CV_8UC1)
00187 cv.cvSet( type_mask, cv.CV_VAR_NUMERICAL, 0)
00188 type_mask[feature_vector_length] = cv.CV_VAR_CATEGORICAL
00189
00190 return (train_data, train_labels, type_mask)
00191
00192
00193
00194 def train(self):
00195
00196
00197
00198
00199
00200
00201
00202
00203 self.cv_classifier = cv.CvDTree()
00204 train_datastructures = self.create_train_datastructures()
00205
00206 (train_data, train_labels, type_mask) = train_datastructures
00207 print 'WARNING! use CvDTree (single decision trees) for now as load/save works!'
00208 print ut.getTime(), self.cv_classifier.train(train_data, cv.CV_ROW_SAMPLE, train_labels, None, None, type_mask )
00209
00210 print ut.getTime(), 'traning finished'
00211
00212
00213
00214
00215 def release_train_datastructures(self, train_datastructures):
00216 if None != train_datastructures:
00217 (train_data, train_labels, type_mask) = train_datastructures
00218 cv.cvReleaseMat(train_data)
00219 cv.cvReleaseMat(train_labels)
00220 cv.cvReleaseMat(type_mask)
00221
00222
00223 def test(self, feature_data = None):
00224
00225 print ut.getTime(), 'test on:', self.processor.scan_dataset.id
00226
00227 if feature_data == None:
00228 filename = self.processor.get_features_filename()
00229 print 'loading', filename
00230 dict = ut.load_pickle(filename)
00231 else:
00232 dict = feature_data
00233
00234
00235 current_set_size = dict['set_size']
00236 feature_vector_length = len(self.processor.features.get_indexvector(self.features))
00237 print ut.getTime(), feature_vector_length
00238 labels = np.array(np.zeros(len(self.processor.map_polys)))
00239 print 'test: length of labels vector:', len(labels)
00240 test = cv.cvCreateMat(1,feature_vector_length,cv.CV_32FC1)
00241
00242 if current_set_size == 0:
00243 print ut.getTime(), 'ERROR: test dataset is empty!'
00244 return labels, 1, 1, 1
00245
00246 count = 0
00247 for index in dict['point_indices']:
00248 fv = (dict['features'][count])[self.processor.features.get_indexvector(self.features)]
00249
00250
00251 for fv_index, fv_value in enumerate(fv):
00252 test[fv_index] = fv_value
00253
00254
00255 label = self.cv_classifier.predict(test)
00256
00257 labels[index] = label.value
00258
00259 if count % 4096 == 0:
00260 print ut.getTime(), 'testing:', count, 'of', current_set_size, '(',(float(count)/float(current_set_size)*100.0),'%)'
00261
00262 count += 1
00263
00264
00265
00266 self.test_feature_dict = dict
00267 self.test_labels = labels
00268
00269 return labels, self.test_results(dict, labels)
00270
00271
00272
00273 def test_postprocess(self):
00274 labels = self.postprocess(self.test_labels)
00275 return labels, self.test_results(self.test_feature_dict, labels)
00276
00277 def postprocess(self, labels):
00278
00279 debug = False
00280 model = ransac.PlaneLeastSquaresModel(debug)
00281 data_idx = np.where(np.asarray(labels) == processor.LABEL_SURFACE)[0]
00282 data = np.asarray(self.processor.pts3d_bound).T[data_idx]
00283 n, _ = np.shape(data)
00284 if n < 5000:
00285 k = 700
00286 else:
00287 k = 2000
00288
00289 ransac_fit, ransac_data = ransac.ransac(data,model,
00290 3, k, 0.04, len(data_idx)/2.5,
00291 debug=debug,return_all=True)
00292 print 'ransac: model',ransac_fit
00293 print 'ransac:',ransac_data
00294 print 'len inlier',len(ransac_data['inliers']),'shape pts',np.shape(self.processor.pts3d_bound)
00295
00296
00297 fancy = np.zeros(len(np.asarray(labels))).astype(bool)
00298 fancy[data_idx] = True
00299 fancy[data_idx[ransac_data['inliers']]] = False
00300 labels[fancy] = processor.LABEL_CLUTTER
00301
00302 return labels
00303
00304 def save(self):
00305 classifier_filename = self.get_filename()
00306
00307
00308 if False == os.path.isfile(classifier_filename):
00309 open(classifier_filename,'w')
00310 self.cv_classifier.save(classifier_filename)
00311
00312
00313 def load(self):
00314 self.cv_classifier = cv.CvDTree()
00315 print ut.getTime(), 'loading Classifier',self.features
00316 self.cv_classifier.load(self.get_filename())
00317
00318