svmutil.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 from svm import *
4 
5 def svm_read_problem(data_file_name):
6  """
7  svm_read_problem(data_file_name) -> [y, x]
8 
9  Read LIBSVM-format data from data_file_name and return labels y
10  and data instances x.
11  """
12  prob_y = []
13  prob_x = []
14  for line in open(data_file_name):
15  line = line.split(None, 1)
16  # In case an instance with all zero features
17  if len(line) == 1: line += ['']
18  label, features = line
19  xi = {}
20  for e in features.split():
21  ind, val = e.split(":")
22  xi[int(ind)] = float(val)
23  prob_y += [float(label)]
24  prob_x += [xi]
25  return (prob_y, prob_x)
26 
27 def svm_load_model(model_file_name):
28  """
29  svm_load_model(model_file_name) -> model
30 
31  Load a LIBSVM model from model_file_name and return.
32  """
33  model = libsvm.svm_load_model(model_file_name.encode())
34  if not model:
35  print("can't open model file %s" % model_file_name)
36  return None
37  model = toPyModel(model)
38  return model
39 
40 def svm_save_model(model_file_name, model):
41  """
42  svm_save_model(model_file_name, model) -> None
43 
44  Save a LIBSVM model to the file model_file_name.
45  """
46  libsvm.svm_save_model(model_file_name.encode(), model)
47 
48 def evaluations(ty, pv):
49  """
50  evaluations(ty, pv) -> (ACC, MSE, SCC)
51 
52  Calculate accuracy, mean squared error and squared correlation coefficient
53  using the true values (ty) and predicted values (pv).
54  """
55  if len(ty) != len(pv):
56  raise ValueError("len(ty) must equal to len(pv)")
57  total_correct = total_error = 0
58  sumv = sumy = sumvv = sumyy = sumvy = 0
59  for v, y in zip(pv, ty):
60  if y == v:
61  total_correct += 1
62  total_error += (v-y)*(v-y)
63  sumv += v
64  sumy += y
65  sumvv += v*v
66  sumyy += y*y
67  sumvy += v*y
68  l = len(ty)
69  ACC = 100.0*total_correct/l
70  MSE = total_error/l
71  try:
72  SCC = ((l*sumvy-sumv*sumy)*(l*sumvy-sumv*sumy))/((l*sumvv-sumv*sumv)*(l*sumyy-sumy*sumy))
73  except:
74  SCC = float('nan')
75  return (ACC, MSE, SCC)
76 
77 def svm_train(arg1, arg2=None, arg3=None):
78  """
79  svm_train(y, x [, 'options']) -> model | ACC | MSE
80  svm_train(prob, [, 'options']) -> model | ACC | MSE
81  svm_train(prob, param) -> model | ACC| MSE
82 
83  Train an SVM model from data (y, x) or an svm_problem prob using
84  'options' or an svm_parameter param.
85  If '-v' is specified in 'options' (i.e., cross validation)
86  either accuracy (ACC) or mean-squared error (MSE) is returned.
87  'options':
88  -s svm_type : set type of SVM (default 0)
89  0 -- C-SVC (multi-class classification)
90  1 -- nu-SVC (multi-class classification)
91  2 -- one-class SVM
92  3 -- epsilon-SVR (regression)
93  4 -- nu-SVR (regression)
94  -t kernel_type : set type of kernel function (default 2)
95  0 -- linear: u'*v
96  1 -- polynomial: (gamma*u'*v + coef0)^degree
97  2 -- radial basis function: exp(-gamma*|u-v|^2)
98  3 -- sigmoid: tanh(gamma*u'*v + coef0)
99  4 -- precomputed kernel (kernel values in training_set_file)
100  -d degree : set degree in kernel function (default 3)
101  -g gamma : set gamma in kernel function (default 1/num_features)
102  -r coef0 : set coef0 in kernel function (default 0)
103  -c cost : set the parameter C of C-SVC, epsilon-SVR, and nu-SVR (default 1)
104  -n nu : set the parameter nu of nu-SVC, one-class SVM, and nu-SVR (default 0.5)
105  -p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)
106  -m cachesize : set cache memory size in MB (default 100)
107  -e epsilon : set tolerance of termination criterion (default 0.001)
108  -h shrinking : whether to use the shrinking heuristics, 0 or 1 (default 1)
109  -b probability_estimates : whether to train a SVC or SVR model for probability estimates, 0 or 1 (default 0)
110  -wi weight : set the parameter C of class i to weight*C, for C-SVC (default 1)
111  -v n: n-fold cross validation mode
112  -q : quiet mode (no outputs)
113  """
114  prob, param = None, None
115  if isinstance(arg1, (list, tuple)):
116  assert isinstance(arg2, (list, tuple))
117  y, x, options = arg1, arg2, arg3
118  param = svm_parameter(options)
119  prob = svm_problem(y, x, isKernel=(param.kernel_type == PRECOMPUTED))
120  elif isinstance(arg1, svm_problem):
121  prob = arg1
122  if isinstance(arg2, svm_parameter):
123  param = arg2
124  else:
125  param = svm_parameter(arg2)
126  if prob == None or param == None:
127  raise TypeError("Wrong types for the arguments")
128 
129  if param.kernel_type == PRECOMPUTED:
130  for xi in prob.x_space:
131  idx, val = xi[0].index, xi[0].value
132  if xi[0].index != 0:
133  raise ValueError('Wrong input format: first column must be 0:sample_serial_number')
134  if val <= 0 or val > prob.n:
135  raise ValueError('Wrong input format: sample_serial_number out of range')
136 
137  if param.gamma == 0 and prob.n > 0:
138  param.gamma = 1.0 / prob.n
139  libsvm.svm_set_print_string_function(param.print_func)
140  err_msg = libsvm.svm_check_parameter(prob, param)
141  if err_msg:
142  raise ValueError('Error: %s' % err_msg)
143 
144  if param.cross_validation:
145  l, nr_fold = prob.l, param.nr_fold
146  target = (c_double * l)()
147  libsvm.svm_cross_validation(prob, param, nr_fold, target)
148  ACC, MSE, SCC = evaluations(prob.y[:l], target[:l])
149  if param.svm_type in [EPSILON_SVR, NU_SVR]:
150  print("Cross Validation Mean squared error = %g" % MSE)
151  print("Cross Validation Squared correlation coefficient = %g" % SCC)
152  return MSE
153  else:
154  print("Cross Validation Accuracy = %g%%" % ACC)
155  return ACC
156  else:
157  m = libsvm.svm_train(prob, param)
158  m = toPyModel(m)
159 
160  # If prob is destroyed, data including SVs pointed by m can remain.
161  m.x_space = prob.x_space
162  return m
163 
164 def svm_predict(y, x, m, options=""):
165  """
166  svm_predict(y, x, m [, "options"]) -> (p_labels, p_acc, p_vals)
167 
168  Predict data (y, x) with the SVM model m.
169  "options":
170  -b probability_estimates: whether to predict probability estimates,
171  0 or 1 (default 0); for one-class SVM only 0 is supported.
172  -q : quiet mode (no outputs).
173 
174  The return tuple contains
175  p_labels: a list of predicted labels
176  p_acc: a tuple including accuracy (for classification), mean-squared
177  error, and squared correlation coefficient (for regression).
178  p_vals: a list of decision values or probability estimates (if '-b 1'
179  is specified). If k is the number of classes, for decision values,
180  each element includes results of predicting k(k-1)/2 binary-class
181  SVMs. For probabilities, each element contains k values indicating
182  the probability that the testing instance is in each class.
183  Note that the order of classes here is the same as 'model.label'
184  field in the model structure.
185  """
186 
187  def info(s):
188  print(s)
189 
190  predict_probability = 0
191  argv = options.split()
192  i = 0
193  while i < len(argv):
194  if argv[i] == '-b':
195  i += 1
196  predict_probability = int(argv[i])
197  elif argv[i] == '-q':
198  info = print_null
199  else:
200  raise ValueError("Wrong options")
201  i+=1
202 
203  svm_type = m.get_svm_type()
204  is_prob_model = m.is_probability_model()
205  nr_class = m.get_nr_class()
206  pred_labels = []
207  pred_values = []
208 
209  if predict_probability:
210  if not is_prob_model:
211  raise ValueError("Model does not support probabiliy estimates")
212 
213  if svm_type in [NU_SVR, EPSILON_SVR]:
214  info("Prob. model for test data: target value = predicted value + z,\n"
215  "z: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma=%g" % m.get_svr_probability());
216  nr_class = 0
217 
218  prob_estimates = (c_double * nr_class)()
219  for xi in x:
220  xi, idx = gen_svm_nodearray(xi, isKernel=(m.param.kernel_type == PRECOMPUTED))
221  label = libsvm.svm_predict_probability(m, xi, prob_estimates)
222  values = prob_estimates[:nr_class]
223  pred_labels += [label]
224  pred_values += [values]
225  else:
226  if is_prob_model:
227  info("Model supports probability estimates, but disabled in predicton.")
228  if svm_type in (ONE_CLASS, EPSILON_SVR, NU_SVC):
229  nr_classifier = 1
230  else:
231  nr_classifier = nr_class*(nr_class-1)//2
232  dec_values = (c_double * nr_classifier)()
233  for xi in x:
234  xi, idx = gen_svm_nodearray(xi, isKernel=(m.param.kernel_type == PRECOMPUTED))
235  label = libsvm.svm_predict_values(m, xi, dec_values)
236  if(nr_class == 1):
237  values = [1]
238  else:
239  values = dec_values[:nr_classifier]
240  pred_labels += [label]
241  pred_values += [values]
242 
243  ACC, MSE, SCC = evaluations(y, pred_labels)
244  l = len(y)
245  if svm_type in [EPSILON_SVR, NU_SVR]:
246  info("Mean squared error = %g (regression)" % MSE)
247  info("Squared correlation coefficient = %g (regression)" % SCC)
248  else:
249  info("Accuracy = %g%% (%d/%d) (classification)" % (ACC, int(l*ACC/100), l))
250 
251  return pred_labels, (ACC, MSE, SCC), pred_values
252 
def gen_svm_nodearray(xi, feature_max=None, isKernel=None)
Definition: svm.py:43
def svm_train(arg1, arg2=None, arg3=None)
Definition: svmutil.py:77
def svm_save_model(model_file_name, model)
Definition: svmutil.py:40
def svm_load_model(model_file_name)
Definition: svmutil.py:27
def svm_read_problem(data_file_name)
Definition: svmutil.py:5
def svm_predict(y, x, m, options="")
Definition: svmutil.py:164
int(* info)(const char *fmt,...)
Definition: svmpredict.c:18
def evaluations(ty, pv)
Definition: svmutil.py:48
def toPyModel(model_ptr)
Definition: svm.py:267


ml_classifiers
Author(s): Scott Niekum , Joshua Whitley
autogenerated on Mon Feb 28 2022 22:46:49