00001
00002
00003 import os
00004 import sys
00005 from svm import *
00006 from svm import __all__ as svm_all
00007
00008
00009 __all__ = ['evaluations', 'svm_load_model', 'svm_predict', 'svm_read_problem',
00010 'svm_save_model', 'svm_train'] + svm_all
00011
00012 sys.path = [os.path.dirname(os.path.abspath(__file__))] + sys.path
00013
00014 def svm_read_problem(data_file_name):
00015 """
00016 svm_read_problem(data_file_name) -> [y, x]
00017
00018 Read LIBSVM-format data from data_file_name and return labels y
00019 and data instances x.
00020 """
00021 prob_y = []
00022 prob_x = []
00023 for line in open(data_file_name):
00024 line = line.split(None, 1)
00025
00026 if len(line) == 1: line += ['']
00027 label, features = line
00028 xi = {}
00029 for e in features.split():
00030 ind, val = e.split(":")
00031 xi[int(ind)] = float(val)
00032 prob_y += [float(label)]
00033 prob_x += [xi]
00034 return (prob_y, prob_x)
00035
00036 def svm_load_model(model_file_name):
00037 """
00038 svm_load_model(model_file_name) -> model
00039
00040 Load a LIBSVM model from model_file_name and return.
00041 """
00042 model = libsvm.svm_load_model(model_file_name.encode())
00043 if not model:
00044 print("can't open model file %s" % model_file_name)
00045 return None
00046 model = toPyModel(model)
00047 return model
00048
00049 def svm_save_model(model_file_name, model):
00050 """
00051 svm_save_model(model_file_name, model) -> None
00052
00053 Save a LIBSVM model to the file model_file_name.
00054 """
00055 libsvm.svm_save_model(model_file_name.encode(), model)
00056
00057 def evaluations(ty, pv):
00058 """
00059 evaluations(ty, pv) -> (ACC, MSE, SCC)
00060
00061 Calculate accuracy, mean squared error and squared correlation coefficient
00062 using the true values (ty) and predicted values (pv).
00063 """
00064 if len(ty) != len(pv):
00065 raise ValueError("len(ty) must equal to len(pv)")
00066 total_correct = total_error = 0
00067 sumv = sumy = sumvv = sumyy = sumvy = 0
00068 for v, y in zip(pv, ty):
00069 if y == v:
00070 total_correct += 1
00071 total_error += (v-y)*(v-y)
00072 sumv += v
00073 sumy += y
00074 sumvv += v*v
00075 sumyy += y*y
00076 sumvy += v*y
00077 l = len(ty)
00078 ACC = 100.0*total_correct/l
00079 MSE = total_error/l
00080 try:
00081 SCC = ((l*sumvy-sumv*sumy)*(l*sumvy-sumv*sumy))/((l*sumvv-sumv*sumv)*(l*sumyy-sumy*sumy))
00082 except:
00083 SCC = float('nan')
00084 return (ACC, MSE, SCC)
00085
00086 def svm_train(arg1, arg2=None, arg3=None):
00087 """
00088 svm_train(y, x [, options]) -> model | ACC | MSE
00089 svm_train(prob [, options]) -> model | ACC | MSE
00090 svm_train(prob, param) -> model | ACC| MSE
00091
00092 Train an SVM model from data (y, x) or an svm_problem prob using
00093 'options' or an svm_parameter param.
00094 If '-v' is specified in 'options' (i.e., cross validation)
00095 either accuracy (ACC) or mean-squared error (MSE) is returned.
00096 options:
00097 -s svm_type : set type of SVM (default 0)
00098 0 -- C-SVC (multi-class classification)
00099 1 -- nu-SVC (multi-class classification)
00100 2 -- one-class SVM
00101 3 -- epsilon-SVR (regression)
00102 4 -- nu-SVR (regression)
00103 -t kernel_type : set type of kernel function (default 2)
00104 0 -- linear: u'*v
00105 1 -- polynomial: (gamma*u'*v + coef0)^degree
00106 2 -- radial basis function: exp(-gamma*|u-v|^2)
00107 3 -- sigmoid: tanh(gamma*u'*v + coef0)
00108 4 -- precomputed kernel (kernel values in training_set_file)
00109 -d degree : set degree in kernel function (default 3)
00110 -g gamma : set gamma in kernel function (default 1/num_features)
00111 -r coef0 : set coef0 in kernel function (default 0)
00112 -c cost : set the parameter C of C-SVC, epsilon-SVR, and nu-SVR (default 1)
00113 -n nu : set the parameter nu of nu-SVC, one-class SVM, and nu-SVR (default 0.5)
00114 -p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)
00115 -m cachesize : set cache memory size in MB (default 100)
00116 -e epsilon : set tolerance of termination criterion (default 0.001)
00117 -h shrinking : whether to use the shrinking heuristics, 0 or 1 (default 1)
00118 -b probability_estimates : whether to train a SVC or SVR model for probability estimates, 0 or 1 (default 0)
00119 -wi weight : set the parameter C of class i to weight*C, for C-SVC (default 1)
00120 -v n: n-fold cross validation mode
00121 -q : quiet mode (no outputs)
00122 """
00123 prob, param = None, None
00124 if isinstance(arg1, (list, tuple)):
00125 assert isinstance(arg2, (list, tuple))
00126 y, x, options = arg1, arg2, arg3
00127 param = svm_parameter(options)
00128 prob = svm_problem(y, x, isKernel=(param.kernel_type == PRECOMPUTED))
00129 elif isinstance(arg1, svm_problem):
00130 prob = arg1
00131 if isinstance(arg2, svm_parameter):
00132 param = arg2
00133 else:
00134 param = svm_parameter(arg2)
00135 if prob == None or param == None:
00136 raise TypeError("Wrong types for the arguments")
00137
00138 if param.kernel_type == PRECOMPUTED:
00139 for xi in prob.x_space:
00140 idx, val = xi[0].index, xi[0].value
00141 if xi[0].index != 0:
00142 raise ValueError('Wrong input format: first column must be 0:sample_serial_number')
00143 if val <= 0 or val > prob.n:
00144 raise ValueError('Wrong input format: sample_serial_number out of range')
00145
00146 if param.gamma == 0 and prob.n > 0:
00147 param.gamma = 1.0 / prob.n
00148 libsvm.svm_set_print_string_function(param.print_func)
00149 err_msg = libsvm.svm_check_parameter(prob, param)
00150 if err_msg:
00151 raise ValueError('Error: %s' % err_msg)
00152
00153 if param.cross_validation:
00154 l, nr_fold = prob.l, param.nr_fold
00155 target = (c_double * l)()
00156 libsvm.svm_cross_validation(prob, param, nr_fold, target)
00157 ACC, MSE, SCC = evaluations(prob.y[:l], target[:l])
00158 if param.svm_type in [EPSILON_SVR, NU_SVR]:
00159 print("Cross Validation Mean squared error = %g" % MSE)
00160 print("Cross Validation Squared correlation coefficient = %g" % SCC)
00161 return MSE
00162 else:
00163 print("Cross Validation Accuracy = %g%%" % ACC)
00164 return ACC
00165 else:
00166 m = libsvm.svm_train(prob, param)
00167 m = toPyModel(m)
00168
00169
00170 m.x_space = prob.x_space
00171 return m
00172
00173 def svm_predict(y, x, m, options=""):
00174 """
00175 svm_predict(y, x, m [, options]) -> (p_labels, p_acc, p_vals)
00176
00177 Predict data (y, x) with the SVM model m.
00178 options:
00179 -b probability_estimates: whether to predict probability estimates,
00180 0 or 1 (default 0); for one-class SVM only 0 is supported.
00181 -q : quiet mode (no outputs).
00182
00183 The return tuple contains
00184 p_labels: a list of predicted labels
00185 p_acc: a tuple including accuracy (for classification), mean-squared
00186 error, and squared correlation coefficient (for regression).
00187 p_vals: a list of decision values or probability estimates (if '-b 1'
00188 is specified). If k is the number of classes, for decision values,
00189 each element includes results of predicting k(k-1)/2 binary-class
00190 SVMs. For probabilities, each element contains k values indicating
00191 the probability that the testing instance is in each class.
00192 Note that the order of classes here is the same as 'model.label'
00193 field in the model structure.
00194 """
00195
00196 def info(s):
00197 print(s)
00198
00199 predict_probability = 0
00200 argv = options.split()
00201 i = 0
00202 while i < len(argv):
00203 if argv[i] == '-b':
00204 i += 1
00205 predict_probability = int(argv[i])
00206 elif argv[i] == '-q':
00207 info = print_null
00208 else:
00209 raise ValueError("Wrong options")
00210 i+=1
00211
00212 svm_type = m.get_svm_type()
00213 is_prob_model = m.is_probability_model()
00214 nr_class = m.get_nr_class()
00215 pred_labels = []
00216 pred_values = []
00217
00218 if predict_probability:
00219 if not is_prob_model:
00220 raise ValueError("Model does not support probabiliy estimates")
00221
00222 if svm_type in [NU_SVR, EPSILON_SVR]:
00223 info("Prob. model for test data: target value = predicted value + z,\n"
00224 "z: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma=%g" % m.get_svr_probability());
00225 nr_class = 0
00226
00227 prob_estimates = (c_double * nr_class)()
00228 for xi in x:
00229 xi, idx = gen_svm_nodearray(xi, isKernel=(m.param.kernel_type == PRECOMPUTED))
00230 label = libsvm.svm_predict_probability(m, xi, prob_estimates)
00231 values = prob_estimates[:nr_class]
00232 pred_labels += [label]
00233 pred_values += [values]
00234 else:
00235 if is_prob_model:
00236 info("Model supports probability estimates, but disabled in predicton.")
00237 if svm_type in (ONE_CLASS, EPSILON_SVR, NU_SVC):
00238 nr_classifier = 1
00239 else:
00240 nr_classifier = nr_class*(nr_class-1)//2
00241 dec_values = (c_double * nr_classifier)()
00242 for xi in x:
00243 xi, idx = gen_svm_nodearray(xi, isKernel=(m.param.kernel_type == PRECOMPUTED))
00244 label = libsvm.svm_predict_values(m, xi, dec_values)
00245 if(nr_class == 1):
00246 values = [1]
00247 else:
00248 values = dec_values[:nr_classifier]
00249 pred_labels += [label]
00250 pred_values += [values]
00251
00252 ACC, MSE, SCC = evaluations(y, pred_labels)
00253 l = len(y)
00254 if svm_type in [EPSILON_SVR, NU_SVR]:
00255 info("Mean squared error = %g (regression)" % MSE)
00256 info("Squared correlation coefficient = %g (regression)" % SCC)
00257 else:
00258 info("Accuracy = %g%% (%d/%d) (classification)" % (ACC, int(l*ACC/100), l))
00259
00260 return pred_labels, (ACC, MSE, SCC), pred_values
00261
00262