target_obejct_detector: svmutil.py Source File

Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 
00003 import os
00004 import sys
00005 from svm import *
00006 from svm import __all__ as svm_all
00007 
00008 
00009 __all__ = ['evaluations', 'svm_load_model', 'svm_predict', 'svm_read_problem',
00010            'svm_save_model', 'svm_train'] + svm_all
00011 
00012 sys.path = [os.path.dirname(os.path.abspath(__file__))] + sys.path
00013 
00014 def svm_read_problem(data_file_name):
00015         """
00016         svm_read_problem(data_file_name) -> [y, x]
00017 
00018         Read LIBSVM-format data from data_file_name and return labels y
00019         and data instances x.
00020         """
00021         prob_y = []
00022         prob_x = []
00023         for line in open(data_file_name):
00024                 line = line.split(None, 1)
00025                 # In case an instance with all zero features
00026                 if len(line) == 1: line += ['']
00027                 label, features = line
00028                 xi = {}
00029                 for e in features.split():
00030                         ind, val = e.split(":")
00031                         xi[int(ind)] = float(val)
00032                 prob_y += [float(label)]
00033                 prob_x += [xi]
00034         return (prob_y, prob_x)
00035 
00036 def svm_load_model(model_file_name):
00037         """
00038         svm_load_model(model_file_name) -> model
00039 
00040         Load a LIBSVM model from model_file_name and return.
00041         """
00042         model = libsvm.svm_load_model(model_file_name.encode())
00043         if not model:
00044                 print("can't open model file %s" % model_file_name)
00045                 return None
00046         model = toPyModel(model)
00047         return model
00048 
00049 def svm_save_model(model_file_name, model):
00050         """
00051         svm_save_model(model_file_name, model) -> None
00052 
00053         Save a LIBSVM model to the file model_file_name.
00054         """
00055         libsvm.svm_save_model(model_file_name.encode(), model)
00056 
00057 def evaluations(ty, pv):
00058         """
00059         evaluations(ty, pv) -> (ACC, MSE, SCC)
00060 
00061         Calculate accuracy, mean squared error and squared correlation coefficient
00062         using the true values (ty) and predicted values (pv).
00063         """
00064         if len(ty) != len(pv):
00065                 raise ValueError("len(ty) must equal to len(pv)")
00066         total_correct = total_error = 0
00067         sumv = sumy = sumvv = sumyy = sumvy = 0
00068         for v, y in zip(pv, ty):
00069                 if y == v:
00070                         total_correct += 1
00071                 total_error += (v-y)*(v-y)
00072                 sumv += v
00073                 sumy += y
00074                 sumvv += v*v
00075                 sumyy += y*y
00076                 sumvy += v*y
00077         l = len(ty)
00078         ACC = 100.0*total_correct/l
00079         MSE = total_error/l
00080         try:
00081                 SCC = ((l*sumvy-sumv*sumy)*(l*sumvy-sumv*sumy))/((l*sumvv-sumv*sumv)*(l*sumyy-sumy*sumy))
00082         except:
00083                 SCC = float('nan')
00084         return (ACC, MSE, SCC)
00085 
00086 def svm_train(arg1, arg2=None, arg3=None):
00087         """
00088         svm_train(y, x [, options]) -> model | ACC | MSE
00089         svm_train(prob [, options]) -> model | ACC | MSE
00090         svm_train(prob, param) -> model | ACC| MSE
00091 
00092         Train an SVM model from data (y, x) or an svm_problem prob using
00093         'options' or an svm_parameter param.
00094         If '-v' is specified in 'options' (i.e., cross validation)
00095         either accuracy (ACC) or mean-squared error (MSE) is returned.
00096         options:
00097             -s svm_type : set type of SVM (default 0)
00098                 0 -- C-SVC              (multi-class classification)
00099                 1 -- nu-SVC             (multi-class classification)
00100                 2 -- one-class SVM
00101                 3 -- epsilon-SVR        (regression)
00102                 4 -- nu-SVR             (regression)
00103             -t kernel_type : set type of kernel function (default 2)
00104                 0 -- linear: u'*v
00105                 1 -- polynomial: (gamma*u'*v + coef0)^degree
00106                 2 -- radial basis function: exp(-gamma*|u-v|^2)
00107                 3 -- sigmoid: tanh(gamma*u'*v + coef0)
00108                 4 -- precomputed kernel (kernel values in training_set_file)
00109             -d degree : set degree in kernel function (default 3)
00110             -g gamma : set gamma in kernel function (default 1/num_features)
00111             -r coef0 : set coef0 in kernel function (default 0)
00112             -c cost : set the parameter C of C-SVC, epsilon-SVR, and nu-SVR (default 1)
00113             -n nu : set the parameter nu of nu-SVC, one-class SVM, and nu-SVR (default 0.5)
00114             -p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)
00115             -m cachesize : set cache memory size in MB (default 100)
00116             -e epsilon : set tolerance of termination criterion (default 0.001)
00117             -h shrinking : whether to use the shrinking heuristics, 0 or 1 (default 1)
00118             -b probability_estimates : whether to train a SVC or SVR model for probability estimates, 0 or 1 (default 0)
00119             -wi weight : set the parameter C of class i to weight*C, for C-SVC (default 1)
00120             -v n: n-fold cross validation mode
00121             -q : quiet mode (no outputs)
00122         """
00123         prob, param = None, None
00124         if isinstance(arg1, (list, tuple)):
00125                 assert isinstance(arg2, (list, tuple))
00126                 y, x, options = arg1, arg2, arg3
00127                 param = svm_parameter(options)
00128                 prob = svm_problem(y, x, isKernel=(param.kernel_type == PRECOMPUTED))
00129         elif isinstance(arg1, svm_problem):
00130                 prob = arg1
00131                 if isinstance(arg2, svm_parameter):
00132                         param = arg2
00133                 else:
00134                         param = svm_parameter(arg2)
00135         if prob == None or param == None:
00136                 raise TypeError("Wrong types for the arguments")
00137 
00138         if param.kernel_type == PRECOMPUTED:
00139                 for xi in prob.x_space:
00140                         idx, val = xi[0].index, xi[0].value
00141                         if xi[0].index != 0:
00142                                 raise ValueError('Wrong input format: first column must be 0:sample_serial_number')
00143                         if val <= 0 or val > prob.n:
00144                                 raise ValueError('Wrong input format: sample_serial_number out of range')
00145 
00146         if param.gamma == 0 and prob.n > 0:
00147                 param.gamma = 1.0 / prob.n
00148         libsvm.svm_set_print_string_function(param.print_func)
00149         err_msg = libsvm.svm_check_parameter(prob, param)
00150         if err_msg:
00151                 raise ValueError('Error: %s' % err_msg)
00152 
00153         if param.cross_validation:
00154                 l, nr_fold = prob.l, param.nr_fold
00155                 target = (c_double * l)()
00156                 libsvm.svm_cross_validation(prob, param, nr_fold, target)
00157                 ACC, MSE, SCC = evaluations(prob.y[:l], target[:l])
00158                 if param.svm_type in [EPSILON_SVR, NU_SVR]:
00159                         print("Cross Validation Mean squared error = %g" % MSE)
00160                         print("Cross Validation Squared correlation coefficient = %g" % SCC)
00161                         return MSE
00162                 else:
00163                         print("Cross Validation Accuracy = %g%%" % ACC)
00164                         return ACC
00165         else:
00166                 m = libsvm.svm_train(prob, param)
00167                 m = toPyModel(m)
00168 
00169                 # If prob is destroyed, data including SVs pointed by m can remain.
00170                 m.x_space = prob.x_space
00171                 return m
00172 
00173 def svm_predict(y, x, m, options=""):
00174         """
00175         svm_predict(y, x, m [, options]) -> (p_labels, p_acc, p_vals)
00176 
00177         Predict data (y, x) with the SVM model m.
00178         options:
00179             -b probability_estimates: whether to predict probability estimates,
00180                 0 or 1 (default 0); for one-class SVM only 0 is supported.
00181             -q : quiet mode (no outputs).
00182 
00183         The return tuple contains
00184         p_labels: a list of predicted labels
00185         p_acc: a tuple including  accuracy (for classification), mean-squared
00186                error, and squared correlation coefficient (for regression).
00187         p_vals: a list of decision values or probability estimates (if '-b 1'
00188                 is specified). If k is the number of classes, for decision values,
00189                 each element includes results of predicting k(k-1)/2 binary-class
00190                 SVMs. For probabilities, each element contains k values indicating
00191                 the probability that the testing instance is in each class.
00192                 Note that the order of classes here is the same as 'model.label'
00193                 field in the model structure.
00194         """
00195 
00196         def info(s):
00197                 print(s)
00198 
00199         predict_probability = 0
00200         argv = options.split()
00201         i = 0
00202         while i < len(argv):
00203                 if argv[i] == '-b':
00204                         i += 1
00205                         predict_probability = int(argv[i])
00206                 elif argv[i] == '-q':
00207                         info = print_null
00208                 else:
00209                         raise ValueError("Wrong options")
00210                 i+=1
00211 
00212         svm_type = m.get_svm_type()
00213         is_prob_model = m.is_probability_model()
00214         nr_class = m.get_nr_class()
00215         pred_labels = []
00216         pred_values = []
00217 
00218         if predict_probability:
00219                 if not is_prob_model:
00220                         raise ValueError("Model does not support probabiliy estimates")
00221 
00222                 if svm_type in [NU_SVR, EPSILON_SVR]:
00223                         info("Prob. model for test data: target value = predicted value + z,\n"
00224                         "z: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma=%g" % m.get_svr_probability());
00225                         nr_class = 0
00226 
00227                 prob_estimates = (c_double * nr_class)()
00228                 for xi in x:
00229                         xi, idx = gen_svm_nodearray(xi, isKernel=(m.param.kernel_type == PRECOMPUTED))
00230                         label = libsvm.svm_predict_probability(m, xi, prob_estimates)
00231                         values = prob_estimates[:nr_class]
00232                         pred_labels += [label]
00233                         pred_values += [values]
00234         else:
00235                 if is_prob_model:
00236                         info("Model supports probability estimates, but disabled in predicton.")
00237                 if svm_type in (ONE_CLASS, EPSILON_SVR, NU_SVC):
00238                         nr_classifier = 1
00239                 else:
00240                         nr_classifier = nr_class*(nr_class-1)//2
00241                 dec_values = (c_double * nr_classifier)()
00242                 for xi in x:
00243                         xi, idx = gen_svm_nodearray(xi, isKernel=(m.param.kernel_type == PRECOMPUTED))
00244                         label = libsvm.svm_predict_values(m, xi, dec_values)
00245                         if(nr_class == 1):
00246                                 values = [1]
00247                         else:
00248                                 values = dec_values[:nr_classifier]
00249                         pred_labels += [label]
00250                         pred_values += [values]
00251 
00252         ACC, MSE, SCC = evaluations(y, pred_labels)
00253         l = len(y)
00254         if svm_type in [EPSILON_SVR, NU_SVR]:
00255                 info("Mean squared error = %g (regression)" % MSE)
00256                 info("Squared correlation coefficient = %g (regression)" % SCC)
00257         else:
00258                 info("Accuracy = %g%% (%d/%d) (classification)" % (ACC, int(l*ACC/100), l))
00259 
00260         return pred_labels, (ACC, MSE, SCC), pred_values
00261 
00262