7 svm_read_problem(data_file_name) -> [y, x] 9 Read LIBSVM-format data from data_file_name and return labels y 14 for line
in open(data_file_name):
15 line = line.split(
None, 1)
17 if len(line) == 1: line += [
'']
18 label, features = line
20 for e
in features.split():
21 ind, val = e.split(
":")
22 xi[int(ind)] = float(val)
23 prob_y += [float(label)]
25 return (prob_y, prob_x)
29 svm_load_model(model_file_name) -> model 31 Load a LIBSVM model from model_file_name and return. 33 model = libsvm.svm_load_model(model_file_name.encode())
35 print(
"can't open model file %s" % model_file_name)
42 svm_save_model(model_file_name, model) -> None 44 Save a LIBSVM model to the file model_file_name. 46 libsvm.svm_save_model(model_file_name.encode(), model)
50 evaluations(ty, pv) -> (ACC, MSE, SCC) 52 Calculate accuracy, mean squared error and squared correlation coefficient 53 using the true values (ty) and predicted values (pv). 55 if len(ty) != len(pv):
56 raise ValueError(
"len(ty) must equal to len(pv)")
57 total_correct = total_error = 0
58 sumv = sumy = sumvv = sumyy = sumvy = 0
59 for v, y
in zip(pv, ty):
62 total_error += (v-y)*(v-y)
69 ACC = 100.0*total_correct/l
72 SCC = ((l*sumvy-sumv*sumy)*(l*sumvy-sumv*sumy))/((l*sumvv-sumv*sumv)*(l*sumyy-sumy*sumy))
75 return (ACC, MSE, SCC)
79 svm_train(y, x [, 'options']) -> model | ACC | MSE 80 svm_train(prob, [, 'options']) -> model | ACC | MSE 81 svm_train(prob, param) -> model | ACC| MSE 83 Train an SVM model from data (y, x) or an svm_problem prob using 84 'options' or an svm_parameter param. 85 If '-v' is specified in 'options' (i.e., cross validation) 86 either accuracy (ACC) or mean-squared error (MSE) is returned. 88 -s svm_type : set type of SVM (default 0) 89 0 -- C-SVC (multi-class classification) 90 1 -- nu-SVC (multi-class classification) 92 3 -- epsilon-SVR (regression) 93 4 -- nu-SVR (regression) 94 -t kernel_type : set type of kernel function (default 2) 96 1 -- polynomial: (gamma*u'*v + coef0)^degree 97 2 -- radial basis function: exp(-gamma*|u-v|^2) 98 3 -- sigmoid: tanh(gamma*u'*v + coef0) 99 4 -- precomputed kernel (kernel values in training_set_file) 100 -d degree : set degree in kernel function (default 3) 101 -g gamma : set gamma in kernel function (default 1/num_features) 102 -r coef0 : set coef0 in kernel function (default 0) 103 -c cost : set the parameter C of C-SVC, epsilon-SVR, and nu-SVR (default 1) 104 -n nu : set the parameter nu of nu-SVC, one-class SVM, and nu-SVR (default 0.5) 105 -p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1) 106 -m cachesize : set cache memory size in MB (default 100) 107 -e epsilon : set tolerance of termination criterion (default 0.001) 108 -h shrinking : whether to use the shrinking heuristics, 0 or 1 (default 1) 109 -b probability_estimates : whether to train a SVC or SVR model for probability estimates, 0 or 1 (default 0) 110 -wi weight : set the parameter C of class i to weight*C, for C-SVC (default 1) 111 -v n: n-fold cross validation mode 112 -q : quiet mode (no outputs) 114 prob, param =
None,
None 115 if isinstance(arg1, (list, tuple)):
116 assert isinstance(arg2, (list, tuple))
117 y, x, options = arg1, arg2, arg3
119 prob =
svm_problem(y, x, isKernel=(param.kernel_type == PRECOMPUTED))
120 elif isinstance(arg1, svm_problem):
122 if isinstance(arg2, svm_parameter):
126 if prob ==
None or param ==
None:
127 raise TypeError(
"Wrong types for the arguments")
129 if param.kernel_type == PRECOMPUTED:
130 for xi
in prob.x_space:
131 idx, val = xi[0].index, xi[0].value
133 raise ValueError(
'Wrong input format: first column must be 0:sample_serial_number')
134 if val <= 0
or val > prob.n:
135 raise ValueError(
'Wrong input format: sample_serial_number out of range')
137 if param.gamma == 0
and prob.n > 0:
138 param.gamma = 1.0 / prob.n
139 libsvm.svm_set_print_string_function(param.print_func)
140 err_msg = libsvm.svm_check_parameter(prob, param)
142 raise ValueError(
'Error: %s' % err_msg)
144 if param.cross_validation:
145 l, nr_fold = prob.l, param.nr_fold
146 target = (c_double * l)()
147 libsvm.svm_cross_validation(prob, param, nr_fold, target)
148 ACC, MSE, SCC =
evaluations(prob.y[:l], target[:l])
149 if param.svm_type
in [EPSILON_SVR, NU_SVR]:
150 print(
"Cross Validation Mean squared error = %g" % MSE)
151 print(
"Cross Validation Squared correlation coefficient = %g" % SCC)
154 print(
"Cross Validation Accuracy = %g%%" % ACC)
157 m = libsvm.svm_train(prob, param)
161 m.x_space = prob.x_space
166 svm_predict(y, x, m [, "options"]) -> (p_labels, p_acc, p_vals) 168 Predict data (y, x) with the SVM model m. 170 -b probability_estimates: whether to predict probability estimates, 171 0 or 1 (default 0); for one-class SVM only 0 is supported. 172 -q : quiet mode (no outputs). 174 The return tuple contains 175 p_labels: a list of predicted labels 176 p_acc: a tuple including accuracy (for classification), mean-squared 177 error, and squared correlation coefficient (for regression). 178 p_vals: a list of decision values or probability estimates (if '-b 1' 179 is specified). If k is the number of classes, for decision values, 180 each element includes results of predicting k(k-1)/2 binary-class 181 SVMs. For probabilities, each element contains k values indicating 182 the probability that the testing instance is in each class. 183 Note that the order of classes here is the same as 'model.label' 184 field in the model structure. 190 predict_probability = 0
191 argv = options.split()
196 predict_probability = int(argv[i])
197 elif argv[i] ==
'-q':
200 raise ValueError(
"Wrong options")
203 svm_type = m.get_svm_type()
204 is_prob_model = m.is_probability_model()
205 nr_class = m.get_nr_class()
209 if predict_probability:
210 if not is_prob_model:
211 raise ValueError(
"Model does not support probabiliy estimates")
213 if svm_type
in [NU_SVR, EPSILON_SVR]:
214 info(
"Prob. model for test data: target value = predicted value + z,\n" 215 "z: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma=%g" % m.get_svr_probability());
218 prob_estimates = (c_double * nr_class)()
221 label = libsvm.svm_predict_probability(m, xi, prob_estimates)
222 values = prob_estimates[:nr_class]
223 pred_labels += [label]
224 pred_values += [values]
227 info(
"Model supports probability estimates, but disabled in predicton.")
228 if svm_type
in (ONE_CLASS, EPSILON_SVR, NU_SVC):
231 nr_classifier = nr_class*(nr_class-1)//2
232 dec_values = (c_double * nr_classifier)()
235 label = libsvm.svm_predict_values(m, xi, dec_values)
239 values = dec_values[:nr_classifier]
240 pred_labels += [label]
241 pred_values += [values]
245 if svm_type
in [EPSILON_SVR, NU_SVR]:
246 info(
"Mean squared error = %g (regression)" % MSE)
247 info(
"Squared correlation coefficient = %g (regression)" % SCC)
249 info(
"Accuracy = %g%% (%d/%d) (classification)" % (ACC, int(l*ACC/100), l))
251 return pred_labels, (ACC, MSE, SCC), pred_values
def gen_svm_nodearray(xi, feature_max=None, isKernel=None)
def svm_train(arg1, arg2=None, arg3=None)
def svm_save_model(model_file_name, model)
def svm_load_model(model_file_name)
def svm_read_problem(data_file_name)
def svm_predict(y, x, m, options="")
int(* info)(const char *fmt,...)