svm.java
Go to the documentation of this file.
1 
2 
3 
4 
5 package libsvm;
6 import java.io.*;
7 import java.util.*;
8 
9 //
10 // Kernel Cache
11 //
12 // l is the number of total data items
13 // size is the cache size limit in bytes
14 //
15 class Cache {
16  private final int l;
17  private long size;
18  private final class head_t
19  {
20  head_t prev, next; // a cicular list
21  float[] data;
22  int len; // data[0,len) is cached in this entry
23  }
24  private final head_t[] head;
25  private head_t lru_head;
26 
27  Cache(int l_, long size_)
28  {
29  l = l_;
30  size = size_;
31  head = new head_t[l];
32  for(int i=0;i<l;i++) head[i] = new head_t();
33  size /= 4;
34  size -= l * (16/4); // sizeof(head_t) == 16
35  size = Math.max(size, 2* (long) l); // cache must be large enough for two columns
36  lru_head = new head_t();
37  lru_head.next = lru_head.prev = lru_head;
38  }
39 
40  private void lru_delete(head_t h)
41  {
42  // delete from current location
43  h.prev.next = h.next;
44  h.next.prev = h.prev;
45  }
46 
47  private void lru_insert(head_t h)
48  {
49  // insert to last position
50  h.next = lru_head;
51  h.prev = lru_head.prev;
52  h.prev.next = h;
53  h.next.prev = h;
54  }
55 
56  // request data [0,len)
57  // return some position p where [p,len) need to be filled
58  // (p >= len if nothing needs to be filled)
59  // java: simulate pointer using single-element array
60  int get_data(int index, float[][] data, int len)
61  {
62  head_t h = head[index];
63  if(h.len > 0) lru_delete(h);
64  int more = len - h.len;
65 
66  if(more > 0)
67  {
68  // free old space
69  while(size < more)
70  {
71  head_t old = lru_head.next;
72  lru_delete(old);
73  size += old.len;
74  old.data = null;
75  old.len = 0;
76  }
77 
78  // allocate new space
79  float[] new_data = new float[len];
80  if(h.data != null) System.arraycopy(h.data,0,new_data,0,h.len);
81  h.data = new_data;
82  size -= more;
83  do {int _=h.len; h.len=len; len=_;} while(false);
84  }
85 
86  lru_insert(h);
87  data[0] = h.data;
88  return len;
89  }
90 
91  void swap_index(int i, int j)
92  {
93  if(i==j) return;
94 
95  if(head[i].len > 0) lru_delete(head[i]);
96  if(head[j].len > 0) lru_delete(head[j]);
97  do {float[] _=head[i].data; head[i].data=head[j].data; head[j].data=_;} while(false);
98  do {int _=head[i].len; head[i].len=head[j].len; head[j].len=_;} while(false);
99  if(head[i].len > 0) lru_insert(head[i]);
100  if(head[j].len > 0) lru_insert(head[j]);
101 
102  if(i>j) do {int _=i; i=j; j=_;} while(false);
103  for(head_t h = lru_head.next; h!=lru_head; h=h.next)
104  {
105  if(h.len > i)
106  {
107  if(h.len > j)
108  do {float _=h.data[i]; h.data[i]=h.data[j]; h.data[j]=_;} while(false);
109  else
110  {
111  // give up
112  lru_delete(h);
113  size += h.len;
114  h.data = null;
115  h.len = 0;
116  }
117  }
118  }
119  }
120 }
121 
122 //
123 // Kernel evaluation
124 //
125 // the static method k_function is for doing single kernel evaluation
126 // the constructor of Kernel prepares to calculate the l*l kernel matrix
127 // the member function get_Q is for getting one column from the Q Matrix
128 //
129 abstract class QMatrix {
130  abstract float[] get_Q(int column, int len);
131  abstract double[] get_QD();
132  abstract void swap_index(int i, int j);
133 };
134 
135 abstract class Kernel extends QMatrix {
136  private svm_node[][] x;
137  private final double[] x_square;
138 
139  // svm_parameter
140  private final int kernel_type;
141  private final int degree;
142  private final double gamma;
143  private final double coef0;
144 
145  abstract float[] get_Q(int column, int len);
146  abstract double[] get_QD();
147 
148  void swap_index(int i, int j)
149  {
150  do {svm_node[] _=x[i]; x[i]=x[j]; x[j]=_;} while(false);
151  if(x_square != null) do {double _=x_square[i]; x_square[i]=x_square[j]; x_square[j]=_;} while(false);
152  }
153 
154  private static double powi(double base, int times)
155  {
156  double tmp = base, ret = 1.0;
157 
158  for(int t=times; t>0; t/=2)
159  {
160  if(t%2==1) ret*=tmp;
161  tmp = tmp * tmp;
162  }
163  return ret;
164  }
165 
166  double kernel_function(int i, int j)
167  {
168  switch(kernel_type)
169  {
170  case svm_parameter.LINEAR:
171  return dot(x[i],x[j]);
172  case svm_parameter.POLY:
173  return powi(gamma*dot(x[i],x[j])+coef0,degree);
174  case svm_parameter.RBF:
175  return Math.exp(-gamma*(x_square[i]+x_square[j]-2*dot(x[i],x[j])));
176  case svm_parameter.SIGMOID:
177  return Math.tanh(gamma*dot(x[i],x[j])+coef0);
179  return x[i][(int)(x[j][0].value)].value;
180  default:
181  return 0; // java
182  }
183  }
184 
185  Kernel(int l, svm_node[][] x_, svm_parameter param)
186  {
187  this.kernel_type = param.kernel_type;
188  this.degree = param.degree;
189  this.gamma = param.gamma;
190  this.coef0 = param.coef0;
191 
192  x = (svm_node[][])x_.clone();
193 
194  if(kernel_type == svm_parameter.RBF)
195  {
196  x_square = new double[l];
197  for(int i=0;i<l;i++)
198  x_square[i] = dot(x[i],x[i]);
199  }
200  else x_square = null;
201  }
202 
203  static double dot(svm_node[] x, svm_node[] y)
204  {
205  double sum = 0;
206  int xlen = x.length;
207  int ylen = y.length;
208  int i = 0;
209  int j = 0;
210  while(i < xlen && j < ylen)
211  {
212  if(x[i].index == y[j].index)
213  sum += x[i++].value * y[j++].value;
214  else
215  {
216  if(x[i].index > y[j].index)
217  ++j;
218  else
219  ++i;
220  }
221  }
222  return sum;
223  }
224 
225  static double k_function(svm_node[] x, svm_node[] y,
226  svm_parameter param)
227  {
228  switch(param.kernel_type)
229  {
230  case svm_parameter.LINEAR:
231  return dot(x,y);
232  case svm_parameter.POLY:
233  return powi(param.gamma*dot(x,y)+param.coef0,param.degree);
234  case svm_parameter.RBF:
235  {
236  double sum = 0;
237  int xlen = x.length;
238  int ylen = y.length;
239  int i = 0;
240  int j = 0;
241  while(i < xlen && j < ylen)
242  {
243  if(x[i].index == y[j].index)
244  {
245  double d = x[i++].value - y[j++].value;
246  sum += d*d;
247  }
248  else if(x[i].index > y[j].index)
249  {
250  sum += y[j].value * y[j].value;
251  ++j;
252  }
253  else
254  {
255  sum += x[i].value * x[i].value;
256  ++i;
257  }
258  }
259 
260  while(i < xlen)
261  {
262  sum += x[i].value * x[i].value;
263  ++i;
264  }
265 
266  while(j < ylen)
267  {
268  sum += y[j].value * y[j].value;
269  ++j;
270  }
271 
272  return Math.exp(-param.gamma*sum);
273  }
274  case svm_parameter.SIGMOID:
275  return Math.tanh(param.gamma*dot(x,y)+param.coef0);
277  return x[(int)(y[0].value)].value;
278  default:
279  return 0; // java
280  }
281  }
282 }
283 
284 // An SMO algorithm in Fan et al., JMLR 6(2005), p. 1889--1918
285 // Solves:
286 //
287 // min 0.5(\alpha^T Q \alpha) + p^T \alpha
288 //
289 // y^T \alpha = \delta
290 // y_i = +1 or -1
291 // 0 <= alpha_i <= Cp for y_i = 1
292 // 0 <= alpha_i <= Cn for y_i = -1
293 //
294 // Given:
295 //
296 // Q, p, y, Cp, Cn, and an initial feasible point \alpha
297 // l is the size of vectors and matrices
298 // eps is the stopping tolerance
299 //
300 // solution will be put in \alpha, objective value will be put in obj
301 //
302 class Solver {
303  int active_size;
304  byte[] y;
305  double[] G; // gradient of objective function
306  static final byte LOWER_BOUND = 0;
307  static final byte UPPER_BOUND = 1;
308  static final byte FREE = 2;
309  byte[] alpha_status; // LOWER_BOUND, UPPER_BOUND, FREE
310  double[] alpha;
311  QMatrix Q;
312  double[] QD;
313  double eps;
314  double Cp,Cn;
315  double[] p;
316  int[] active_set;
317  double[] G_bar; // gradient, if we treat free variables as 0
318  int l;
319  boolean unshrink; // XXX
320 
321  static final double INF = java.lang.Double.POSITIVE_INFINITY;
322 
323  double get_C(int i)
324  {
325  return (y[i] > 0)? Cp : Cn;
326  }
327  void update_alpha_status(int i)
328  {
329  if(alpha[i] >= get_C(i))
330  alpha_status[i] = UPPER_BOUND;
331  else if(alpha[i] <= 0)
332  alpha_status[i] = LOWER_BOUND;
333  else alpha_status[i] = FREE;
334  }
335  boolean is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; }
336  boolean is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; }
337  boolean is_free(int i) { return alpha_status[i] == FREE; }
338 
339  // java: information about solution except alpha,
340  // because we cannot return multiple values otherwise...
341  static class SolutionInfo {
342  double obj;
343  double rho;
344  double upper_bound_p;
345  double upper_bound_n;
346  double r; // for Solver_NU
347  }
348 
349  void swap_index(int i, int j)
350  {
351  Q.swap_index(i,j);
352  do {byte _=y[i]; y[i]=y[j]; y[j]=_;} while(false);
353  do {double _=G[i]; G[i]=G[j]; G[j]=_;} while(false);
354  do {byte _=alpha_status[i]; alpha_status[i]=alpha_status[j]; alpha_status[j]=_;} while(false);
355  do {double _=alpha[i]; alpha[i]=alpha[j]; alpha[j]=_;} while(false);
356  do {double _=p[i]; p[i]=p[j]; p[j]=_;} while(false);
357  do {int _=active_set[i]; active_set[i]=active_set[j]; active_set[j]=_;} while(false);
358  do {double _=G_bar[i]; G_bar[i]=G_bar[j]; G_bar[j]=_;} while(false);
359  }
360 
361  void reconstruct_gradient()
362  {
363  // reconstruct inactive elements of G from G_bar and free variables
364 
365  if(active_size == l) return;
366 
367  int i,j;
368  int nr_free = 0;
369 
370  for(j=active_size;j<l;j++)
371  G[j] = G_bar[j] + p[j];
372 
373  for(j=0;j<active_size;j++)
374  if(is_free(j))
375  nr_free++;
376 
377  if(2*nr_free < active_size)
378  svm.info("\nWARNING: using -h 0 may be faster\n");
379 
380  if (nr_free*l > 2*active_size*(l-active_size))
381  {
382  for(i=active_size;i<l;i++)
383  {
384  float[] Q_i = Q.get_Q(i,active_size);
385  for(j=0;j<active_size;j++)
386  if(is_free(j))
387  G[i] += alpha[j] * Q_i[j];
388  }
389  }
390  else
391  {
392  for(i=0;i<active_size;i++)
393  if(is_free(i))
394  {
395  float[] Q_i = Q.get_Q(i,l);
396  double alpha_i = alpha[i];
397  for(j=active_size;j<l;j++)
398  G[j] += alpha_i * Q_i[j];
399  }
400  }
401  }
402 
403  void Solve(int l, QMatrix Q, double[] p_, byte[] y_,
404  double[] alpha_, double Cp, double Cn, double eps, SolutionInfo si, int shrinking)
405  {
406  this.l = l;
407  this.Q = Q;
408  QD = Q.get_QD();
409  p = (double[])p_.clone();
410  y = (byte[])y_.clone();
411  alpha = (double[])alpha_.clone();
412  this.Cp = Cp;
413  this.Cn = Cn;
414  this.eps = eps;
415  this.unshrink = false;
416 
417  // initialize alpha_status
418  {
419  alpha_status = new byte[l];
420  for(int i=0;i<l;i++)
421  update_alpha_status(i);
422  }
423 
424  // initialize active set (for shrinking)
425  {
426  active_set = new int[l];
427  for(int i=0;i<l;i++)
428  active_set[i] = i;
429  active_size = l;
430  }
431 
432  // initialize gradient
433  {
434  G = new double[l];
435  G_bar = new double[l];
436  int i;
437  for(i=0;i<l;i++)
438  {
439  G[i] = p[i];
440  G_bar[i] = 0;
441  }
442  for(i=0;i<l;i++)
443  if(!is_lower_bound(i))
444  {
445  float[] Q_i = Q.get_Q(i,l);
446  double alpha_i = alpha[i];
447  int j;
448  for(j=0;j<l;j++)
449  G[j] += alpha_i*Q_i[j];
450  if(is_upper_bound(i))
451  for(j=0;j<l;j++)
452  G_bar[j] += get_C(i) * Q_i[j];
453  }
454  }
455 
456  // optimization step
457 
458  int iter = 0;
459  int max_iter = Math.max(10000000, l>Integer.MAX_VALUE/100 ? Integer.MAX_VALUE : 100*l);
460  int counter = Math.min(l,1000)+1;
461  int[] working_set = new int[2];
462 
463  while(iter < max_iter)
464  {
465  // show progress and do shrinking
466 
467  if(--counter == 0)
468  {
469  counter = Math.min(l,1000);
470  if(shrinking!=0) do_shrinking();
471  svm.info(".");
472  }
473 
474  if(select_working_set(working_set)!=0)
475  {
476  // reconstruct the whole gradient
477  reconstruct_gradient();
478  // reset active set size and check
479  active_size = l;
480  svm.info("*");
481  if(select_working_set(working_set)!=0)
482  break;
483  else
484  counter = 1; // do shrinking next iteration
485  }
486 
487  int i = working_set[0];
488  int j = working_set[1];
489 
490  ++iter;
491 
492  // update alpha[i] and alpha[j], handle bounds carefully
493 
494  float[] Q_i = Q.get_Q(i,active_size);
495  float[] Q_j = Q.get_Q(j,active_size);
496 
497  double C_i = get_C(i);
498  double C_j = get_C(j);
499 
500  double old_alpha_i = alpha[i];
501  double old_alpha_j = alpha[j];
502 
503  if(y[i]!=y[j])
504  {
505  double quad_coef = QD[i]+QD[j]+2*Q_i[j];
506  if (quad_coef <= 0)
507  quad_coef = 1e-12;
508  double delta = (-G[i]-G[j])/quad_coef;
509  double diff = alpha[i] - alpha[j];
510  alpha[i] += delta;
511  alpha[j] += delta;
512 
513  if(diff > 0)
514  {
515  if(alpha[j] < 0)
516  {
517  alpha[j] = 0;
518  alpha[i] = diff;
519  }
520  }
521  else
522  {
523  if(alpha[i] < 0)
524  {
525  alpha[i] = 0;
526  alpha[j] = -diff;
527  }
528  }
529  if(diff > C_i - C_j)
530  {
531  if(alpha[i] > C_i)
532  {
533  alpha[i] = C_i;
534  alpha[j] = C_i - diff;
535  }
536  }
537  else
538  {
539  if(alpha[j] > C_j)
540  {
541  alpha[j] = C_j;
542  alpha[i] = C_j + diff;
543  }
544  }
545  }
546  else
547  {
548  double quad_coef = QD[i]+QD[j]-2*Q_i[j];
549  if (quad_coef <= 0)
550  quad_coef = 1e-12;
551  double delta = (G[i]-G[j])/quad_coef;
552  double sum = alpha[i] + alpha[j];
553  alpha[i] -= delta;
554  alpha[j] += delta;
555 
556  if(sum > C_i)
557  {
558  if(alpha[i] > C_i)
559  {
560  alpha[i] = C_i;
561  alpha[j] = sum - C_i;
562  }
563  }
564  else
565  {
566  if(alpha[j] < 0)
567  {
568  alpha[j] = 0;
569  alpha[i] = sum;
570  }
571  }
572  if(sum > C_j)
573  {
574  if(alpha[j] > C_j)
575  {
576  alpha[j] = C_j;
577  alpha[i] = sum - C_j;
578  }
579  }
580  else
581  {
582  if(alpha[i] < 0)
583  {
584  alpha[i] = 0;
585  alpha[j] = sum;
586  }
587  }
588  }
589 
590  // update G
591 
592  double delta_alpha_i = alpha[i] - old_alpha_i;
593  double delta_alpha_j = alpha[j] - old_alpha_j;
594 
595  for(int k=0;k<active_size;k++)
596  {
597  G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j;
598  }
599 
600  // update alpha_status and G_bar
601 
602  {
603  boolean ui = is_upper_bound(i);
604  boolean uj = is_upper_bound(j);
605  update_alpha_status(i);
606  update_alpha_status(j);
607  int k;
608  if(ui != is_upper_bound(i))
609  {
610  Q_i = Q.get_Q(i,l);
611  if(ui)
612  for(k=0;k<l;k++)
613  G_bar[k] -= C_i * Q_i[k];
614  else
615  for(k=0;k<l;k++)
616  G_bar[k] += C_i * Q_i[k];
617  }
618 
619  if(uj != is_upper_bound(j))
620  {
621  Q_j = Q.get_Q(j,l);
622  if(uj)
623  for(k=0;k<l;k++)
624  G_bar[k] -= C_j * Q_j[k];
625  else
626  for(k=0;k<l;k++)
627  G_bar[k] += C_j * Q_j[k];
628  }
629  }
630 
631  }
632 
633  if(iter >= max_iter)
634  {
635  if(active_size < l)
636  {
637  // reconstruct the whole gradient to calculate objective value
638  reconstruct_gradient();
639  active_size = l;
640  svm.info("*");
641  }
642  System.err.print("\nWARNING: reaching max number of iterations\n");
643  }
644 
645  // calculate rho
646 
647  si.rho = calculate_rho();
648 
649  // calculate objective value
650  {
651  double v = 0;
652  int i;
653  for(i=0;i<l;i++)
654  v += alpha[i] * (G[i] + p[i]);
655 
656  si.obj = v/2;
657  }
658 
659  // put back the solution
660  {
661  for(int i=0;i<l;i++)
662  alpha_[active_set[i]] = alpha[i];
663  }
664 
665  si.upper_bound_p = Cp;
666  si.upper_bound_n = Cn;
667 
668  svm.info("\noptimization finished, #iter = "+iter+"\n");
669  }
670 
671  // return 1 if already optimal, return 0 otherwise
672  int select_working_set(int[] working_set)
673  {
674  // return i,j such that
675  // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
676  // j: mimimizes the decrease of obj value
677  // (if quadratic coefficeint <= 0, replace it with tau)
678  // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
679 
680  double Gmax = -INF;
681  double Gmax2 = -INF;
682  int Gmax_idx = -1;
683  int Gmin_idx = -1;
684  double obj_diff_min = INF;
685 
686  for(int t=0;t<active_size;t++)
687  if(y[t]==+1)
688  {
689  if(!is_upper_bound(t))
690  if(-G[t] >= Gmax)
691  {
692  Gmax = -G[t];
693  Gmax_idx = t;
694  }
695  }
696  else
697  {
698  if(!is_lower_bound(t))
699  if(G[t] >= Gmax)
700  {
701  Gmax = G[t];
702  Gmax_idx = t;
703  }
704  }
705 
706  int i = Gmax_idx;
707  float[] Q_i = null;
708  if(i != -1) // null Q_i not accessed: Gmax=-INF if i=-1
709  Q_i = Q.get_Q(i,active_size);
710 
711  for(int j=0;j<active_size;j++)
712  {
713  if(y[j]==+1)
714  {
715  if (!is_lower_bound(j))
716  {
717  double grad_diff=Gmax+G[j];
718  if (G[j] >= Gmax2)
719  Gmax2 = G[j];
720  if (grad_diff > 0)
721  {
722  double obj_diff;
723  double quad_coef = QD[i]+QD[j]-2.0*y[i]*Q_i[j];
724  if (quad_coef > 0)
725  obj_diff = -(grad_diff*grad_diff)/quad_coef;
726  else
727  obj_diff = -(grad_diff*grad_diff)/1e-12;
728 
729  if (obj_diff <= obj_diff_min)
730  {
731  Gmin_idx=j;
732  obj_diff_min = obj_diff;
733  }
734  }
735  }
736  }
737  else
738  {
739  if (!is_upper_bound(j))
740  {
741  double grad_diff= Gmax-G[j];
742  if (-G[j] >= Gmax2)
743  Gmax2 = -G[j];
744  if (grad_diff > 0)
745  {
746  double obj_diff;
747  double quad_coef = QD[i]+QD[j]+2.0*y[i]*Q_i[j];
748  if (quad_coef > 0)
749  obj_diff = -(grad_diff*grad_diff)/quad_coef;
750  else
751  obj_diff = -(grad_diff*grad_diff)/1e-12;
752 
753  if (obj_diff <= obj_diff_min)
754  {
755  Gmin_idx=j;
756  obj_diff_min = obj_diff;
757  }
758  }
759  }
760  }
761  }
762 
763  if(Gmax+Gmax2 < eps)
764  return 1;
765 
766  working_set[0] = Gmax_idx;
767  working_set[1] = Gmin_idx;
768  return 0;
769  }
770 
771  private boolean be_shrunk(int i, double Gmax1, double Gmax2)
772  {
773  if(is_upper_bound(i))
774  {
775  if(y[i]==+1)
776  return(-G[i] > Gmax1);
777  else
778  return(-G[i] > Gmax2);
779  }
780  else if(is_lower_bound(i))
781  {
782  if(y[i]==+1)
783  return(G[i] > Gmax2);
784  else
785  return(G[i] > Gmax1);
786  }
787  else
788  return(false);
789  }
790 
791  void do_shrinking()
792  {
793  int i;
794  double Gmax1 = -INF; // max { -y_i * grad(f)_i | i in I_up(\alpha) }
795  double Gmax2 = -INF; // max { y_i * grad(f)_i | i in I_low(\alpha) }
796 
797  // find maximal violating pair first
798  for(i=0;i<active_size;i++)
799  {
800  if(y[i]==+1)
801  {
802  if(!is_upper_bound(i))
803  {
804  if(-G[i] >= Gmax1)
805  Gmax1 = -G[i];
806  }
807  if(!is_lower_bound(i))
808  {
809  if(G[i] >= Gmax2)
810  Gmax2 = G[i];
811  }
812  }
813  else
814  {
815  if(!is_upper_bound(i))
816  {
817  if(-G[i] >= Gmax2)
818  Gmax2 = -G[i];
819  }
820  if(!is_lower_bound(i))
821  {
822  if(G[i] >= Gmax1)
823  Gmax1 = G[i];
824  }
825  }
826  }
827 
828  if(unshrink == false && Gmax1 + Gmax2 <= eps*10)
829  {
830  unshrink = true;
831  reconstruct_gradient();
832  active_size = l;
833  }
834 
835  for(i=0;i<active_size;i++)
836  if (be_shrunk(i, Gmax1, Gmax2))
837  {
838  active_size--;
839  while (active_size > i)
840  {
841  if (!be_shrunk(active_size, Gmax1, Gmax2))
842  {
843  swap_index(i,active_size);
844  break;
845  }
846  active_size--;
847  }
848  }
849  }
850 
851  double calculate_rho()
852  {
853  double r;
854  int nr_free = 0;
855  double ub = INF, lb = -INF, sum_free = 0;
856  for(int i=0;i<active_size;i++)
857  {
858  double yG = y[i]*G[i];
859 
860  if(is_lower_bound(i))
861  {
862  if(y[i] > 0)
863  ub = Math.min(ub,yG);
864  else
865  lb = Math.max(lb,yG);
866  }
867  else if(is_upper_bound(i))
868  {
869  if(y[i] < 0)
870  ub = Math.min(ub,yG);
871  else
872  lb = Math.max(lb,yG);
873  }
874  else
875  {
876  ++nr_free;
877  sum_free += yG;
878  }
879  }
880 
881  if(nr_free>0)
882  r = sum_free/nr_free;
883  else
884  r = (ub+lb)/2;
885 
886  return r;
887  }
888 
889 }
890 
891 //
892 // Solver for nu-svm classification and regression
893 //
894 // additional constraint: e^T \alpha = constant
895 //
896 final class Solver_NU extends Solver
897 {
898  private SolutionInfo si;
899 
900  void Solve(int l, QMatrix Q, double[] p, byte[] y,
901  double[] alpha, double Cp, double Cn, double eps,
902  SolutionInfo si, int shrinking)
903  {
904  this.si = si;
905  super.Solve(l,Q,p,y,alpha,Cp,Cn,eps,si,shrinking);
906  }
907 
908  // return 1 if already optimal, return 0 otherwise
909  int select_working_set(int[] working_set)
910  {
911  // return i,j such that y_i = y_j and
912  // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
913  // j: minimizes the decrease of obj value
914  // (if quadratic coefficeint <= 0, replace it with tau)
915  // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
916 
917  double Gmaxp = -INF;
918  double Gmaxp2 = -INF;
919  int Gmaxp_idx = -1;
920 
921  double Gmaxn = -INF;
922  double Gmaxn2 = -INF;
923  int Gmaxn_idx = -1;
924 
925  int Gmin_idx = -1;
926  double obj_diff_min = INF;
927 
928  for(int t=0;t<active_size;t++)
929  if(y[t]==+1)
930  {
931  if(!is_upper_bound(t))
932  if(-G[t] >= Gmaxp)
933  {
934  Gmaxp = -G[t];
935  Gmaxp_idx = t;
936  }
937  }
938  else
939  {
940  if(!is_lower_bound(t))
941  if(G[t] >= Gmaxn)
942  {
943  Gmaxn = G[t];
944  Gmaxn_idx = t;
945  }
946  }
947 
948  int ip = Gmaxp_idx;
949  int in = Gmaxn_idx;
950  float[] Q_ip = null;
951  float[] Q_in = null;
952  if(ip != -1) // null Q_ip not accessed: Gmaxp=-INF if ip=-1
953  Q_ip = Q.get_Q(ip,active_size);
954  if(in != -1)
955  Q_in = Q.get_Q(in,active_size);
956 
957  for(int j=0;j<active_size;j++)
958  {
959  if(y[j]==+1)
960  {
961  if (!is_lower_bound(j))
962  {
963  double grad_diff=Gmaxp+G[j];
964  if (G[j] >= Gmaxp2)
965  Gmaxp2 = G[j];
966  if (grad_diff > 0)
967  {
968  double obj_diff;
969  double quad_coef = QD[ip]+QD[j]-2*Q_ip[j];
970  if (quad_coef > 0)
971  obj_diff = -(grad_diff*grad_diff)/quad_coef;
972  else
973  obj_diff = -(grad_diff*grad_diff)/1e-12;
974 
975  if (obj_diff <= obj_diff_min)
976  {
977  Gmin_idx=j;
978  obj_diff_min = obj_diff;
979  }
980  }
981  }
982  }
983  else
984  {
985  if (!is_upper_bound(j))
986  {
987  double grad_diff=Gmaxn-G[j];
988  if (-G[j] >= Gmaxn2)
989  Gmaxn2 = -G[j];
990  if (grad_diff > 0)
991  {
992  double obj_diff;
993  double quad_coef = QD[in]+QD[j]-2*Q_in[j];
994  if (quad_coef > 0)
995  obj_diff = -(grad_diff*grad_diff)/quad_coef;
996  else
997  obj_diff = -(grad_diff*grad_diff)/1e-12;
998 
999  if (obj_diff <= obj_diff_min)
1000  {
1001  Gmin_idx=j;
1002  obj_diff_min = obj_diff;
1003  }
1004  }
1005  }
1006  }
1007  }
1008 
1009  if(Math.max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2) < eps)
1010  return 1;
1011 
1012  if(y[Gmin_idx] == +1)
1013  working_set[0] = Gmaxp_idx;
1014  else
1015  working_set[0] = Gmaxn_idx;
1016  working_set[1] = Gmin_idx;
1017 
1018  return 0;
1019  }
1020 
1021  private boolean be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4)
1022  {
1023  if(is_upper_bound(i))
1024  {
1025  if(y[i]==+1)
1026  return(-G[i] > Gmax1);
1027  else
1028  return(-G[i] > Gmax4);
1029  }
1030  else if(is_lower_bound(i))
1031  {
1032  if(y[i]==+1)
1033  return(G[i] > Gmax2);
1034  else
1035  return(G[i] > Gmax3);
1036  }
1037  else
1038  return(false);
1039  }
1040 
1041  void do_shrinking()
1042  {
1043  double Gmax1 = -INF; // max { -y_i * grad(f)_i | y_i = +1, i in I_up(\alpha) }
1044  double Gmax2 = -INF; // max { y_i * grad(f)_i | y_i = +1, i in I_low(\alpha) }
1045  double Gmax3 = -INF; // max { -y_i * grad(f)_i | y_i = -1, i in I_up(\alpha) }
1046  double Gmax4 = -INF; // max { y_i * grad(f)_i | y_i = -1, i in I_low(\alpha) }
1047 
1048  // find maximal violating pair first
1049  int i;
1050  for(i=0;i<active_size;i++)
1051  {
1052  if(!is_upper_bound(i))
1053  {
1054  if(y[i]==+1)
1055  {
1056  if(-G[i] > Gmax1) Gmax1 = -G[i];
1057  }
1058  else if(-G[i] > Gmax4) Gmax4 = -G[i];
1059  }
1060  if(!is_lower_bound(i))
1061  {
1062  if(y[i]==+1)
1063  {
1064  if(G[i] > Gmax2) Gmax2 = G[i];
1065  }
1066  else if(G[i] > Gmax3) Gmax3 = G[i];
1067  }
1068  }
1069 
1070  if(unshrink == false && Math.max(Gmax1+Gmax2,Gmax3+Gmax4) <= eps*10)
1071  {
1072  unshrink = true;
1073  reconstruct_gradient();
1074  active_size = l;
1075  }
1076 
1077  for(i=0;i<active_size;i++)
1078  if (be_shrunk(i, Gmax1, Gmax2, Gmax3, Gmax4))
1079  {
1080  active_size--;
1081  while (active_size > i)
1082  {
1083  if (!be_shrunk(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
1084  {
1085  swap_index(i,active_size);
1086  break;
1087  }
1088  active_size--;
1089  }
1090  }
1091  }
1092 
1093  double calculate_rho()
1094  {
1095  int nr_free1 = 0,nr_free2 = 0;
1096  double ub1 = INF, ub2 = INF;
1097  double lb1 = -INF, lb2 = -INF;
1098  double sum_free1 = 0, sum_free2 = 0;
1099 
1100  for(int i=0;i<active_size;i++)
1101  {
1102  if(y[i]==+1)
1103  {
1104  if(is_lower_bound(i))
1105  ub1 = Math.min(ub1,G[i]);
1106  else if(is_upper_bound(i))
1107  lb1 = Math.max(lb1,G[i]);
1108  else
1109  {
1110  ++nr_free1;
1111  sum_free1 += G[i];
1112  }
1113  }
1114  else
1115  {
1116  if(is_lower_bound(i))
1117  ub2 = Math.min(ub2,G[i]);
1118  else if(is_upper_bound(i))
1119  lb2 = Math.max(lb2,G[i]);
1120  else
1121  {
1122  ++nr_free2;
1123  sum_free2 += G[i];
1124  }
1125  }
1126  }
1127 
1128  double r1,r2;
1129  if(nr_free1 > 0)
1130  r1 = sum_free1/nr_free1;
1131  else
1132  r1 = (ub1+lb1)/2;
1133 
1134  if(nr_free2 > 0)
1135  r2 = sum_free2/nr_free2;
1136  else
1137  r2 = (ub2+lb2)/2;
1138 
1139  si.r = (r1+r2)/2;
1140  return (r1-r2)/2;
1141  }
1142 }
1143 
1144 //
1145 // Q matrices for various formulations
1146 //
1147 class SVC_Q extends Kernel
1148 {
1149  private final byte[] y;
1150  private final Cache cache;
1151  private final double[] QD;
1152 
1153  SVC_Q(svm_problem prob, svm_parameter param, byte[] y_)
1154  {
1155  super(prob.l, prob.x, param);
1156  y = (byte[])y_.clone();
1157  cache = new Cache(prob.l,(long)(param.cache_size*(1<<20)));
1158  QD = new double[prob.l];
1159  for(int i=0;i<prob.l;i++)
1160  QD[i] = kernel_function(i,i);
1161  }
1162 
1163  float[] get_Q(int i, int len)
1164  {
1165  float[][] data = new float[1][];
1166  int start, j;
1167  if((start = cache.get_data(i,data,len)) < len)
1168  {
1169  for(j=start;j<len;j++)
1170  data[0][j] = (float)(y[i]*y[j]*kernel_function(i,j));
1171  }
1172  return data[0];
1173  }
1174 
1175  double[] get_QD()
1176  {
1177  return QD;
1178  }
1179 
1180  void swap_index(int i, int j)
1181  {
1182  cache.swap_index(i,j);
1183  super.swap_index(i,j);
1184  do {byte _=y[i]; y[i]=y[j]; y[j]=_;} while(false);
1185  do {double _=QD[i]; QD[i]=QD[j]; QD[j]=_;} while(false);
1186  }
1187 }
1188 
1189 class ONE_CLASS_Q extends Kernel
1190 {
1191  private final Cache cache;
1192  private final double[] QD;
1193 
1194  ONE_CLASS_Q(svm_problem prob, svm_parameter param)
1195  {
1196  super(prob.l, prob.x, param);
1197  cache = new Cache(prob.l,(long)(param.cache_size*(1<<20)));
1198  QD = new double[prob.l];
1199  for(int i=0;i<prob.l;i++)
1200  QD[i] = kernel_function(i,i);
1201  }
1202 
1203  float[] get_Q(int i, int len)
1204  {
1205  float[][] data = new float[1][];
1206  int start, j;
1207  if((start = cache.get_data(i,data,len)) < len)
1208  {
1209  for(j=start;j<len;j++)
1210  data[0][j] = (float)kernel_function(i,j);
1211  }
1212  return data[0];
1213  }
1214 
1215  double[] get_QD()
1216  {
1217  return QD;
1218  }
1219 
1220  void swap_index(int i, int j)
1221  {
1222  cache.swap_index(i,j);
1223  super.swap_index(i,j);
1224  do {double _=QD[i]; QD[i]=QD[j]; QD[j]=_;} while(false);
1225  }
1226 }
1227 
1228 class SVR_Q extends Kernel
1229 {
1230  private final int l;
1231  private final Cache cache;
1232  private final byte[] sign;
1233  private final int[] index;
1234  private int next_buffer;
1235  private float[][] buffer;
1236  private final double[] QD;
1237 
1239  {
1240  super(prob.l, prob.x, param);
1241  l = prob.l;
1242  cache = new Cache(l,(long)(param.cache_size*(1<<20)));
1243  QD = new double[2*l];
1244  sign = new byte[2*l];
1245  index = new int[2*l];
1246  for(int k=0;k<l;k++)
1247  {
1248  sign[k] = 1;
1249  sign[k+l] = -1;
1250  index[k] = k;
1251  index[k+l] = k;
1252  QD[k] = kernel_function(k,k);
1253  QD[k+l] = QD[k];
1254  }
1255  buffer = new float[2][2*l];
1256  next_buffer = 0;
1257  }
1258 
1259  void swap_index(int i, int j)
1260  {
1261  do {byte _=sign[i]; sign[i]=sign[j]; sign[j]=_;} while(false);
1262  do {int _=index[i]; index[i]=index[j]; index[j]=_;} while(false);
1263  do {double _=QD[i]; QD[i]=QD[j]; QD[j]=_;} while(false);
1264  }
1265 
1266  float[] get_Q(int i, int len)
1267  {
1268  float[][] data = new float[1][];
1269  int j, real_i = index[i];
1270  if(cache.get_data(real_i,data,l) < l)
1271  {
1272  for(j=0;j<l;j++)
1273  data[0][j] = (float)kernel_function(real_i,j);
1274  }
1275 
1276  // reorder and copy
1277  float buf[] = buffer[next_buffer];
1278  next_buffer = 1 - next_buffer;
1279  byte si = sign[i];
1280  for(j=0;j<len;j++)
1281  buf[j] = (float) si * sign[j] * data[0][index[j]];
1282  return buf;
1283  }
1284 
1285  double[] get_QD()
1286  {
1287  return QD;
1288  }
1289 }
1290 
1291 public class svm {
1292  //
1293  // construct and solve various formulations
1294  //
1295  public static final int LIBSVM_VERSION=314;
1296  public static final Random rand = new Random();
1297 
1298  private static svm_print_interface svm_print_stdout = new svm_print_interface()
1299  {
1300  public void print(String s)
1301  {
1302  System.out.print(s);
1303  System.out.flush();
1304  }
1305  };
1306 
1307  private static svm_print_interface svm_print_string = svm_print_stdout;
1308 
1309  static void info(String s)
1310  {
1311  svm_print_string.print(s);
1312  }
1313 
1315  double[] alpha, Solver.SolutionInfo si,
1316  double Cp, double Cn)
1317  {
1318  int l = prob.l;
1319  double[] minus_ones = new double[l];
1320  byte[] y = new byte[l];
1321 
1322  int i;
1323 
1324  for(i=0;i<l;i++)
1325  {
1326  alpha[i] = 0;
1327  minus_ones[i] = -1;
1328  if(prob.y[i] > 0) y[i] = +1; else y[i] = -1;
1329  }
1330 
1331  Solver s = new Solver();
1332  s.Solve(l, new SVC_Q(prob,param,y), minus_ones, y,
1333  alpha, Cp, Cn, param.eps, si, param.shrinking);
1334 
1335  double sum_alpha=0;
1336  for(i=0;i<l;i++)
1337  sum_alpha += alpha[i];
1338 
1339  if (Cp==Cn)
1340  svm.info("nu = "+sum_alpha/(Cp*prob.l)+"\n");
1341 
1342  for(i=0;i<l;i++)
1343  alpha[i] *= y[i];
1344  }
1345 
1347  double[] alpha, Solver.SolutionInfo si)
1348  {
1349  int i;
1350  int l = prob.l;
1351  double nu = param.nu;
1352 
1353  byte[] y = new byte[l];
1354 
1355  for(i=0;i<l;i++)
1356  if(prob.y[i]>0)
1357  y[i] = +1;
1358  else
1359  y[i] = -1;
1360 
1361  double sum_pos = nu*l/2;
1362  double sum_neg = nu*l/2;
1363 
1364  for(i=0;i<l;i++)
1365  if(y[i] == +1)
1366  {
1367  alpha[i] = Math.min(1.0,sum_pos);
1368  sum_pos -= alpha[i];
1369  }
1370  else
1371  {
1372  alpha[i] = Math.min(1.0,sum_neg);
1373  sum_neg -= alpha[i];
1374  }
1375 
1376  double[] zeros = new double[l];
1377 
1378  for(i=0;i<l;i++)
1379  zeros[i] = 0;
1380 
1381  Solver_NU s = new Solver_NU();
1382  s.Solve(l, new SVC_Q(prob,param,y), zeros, y,
1383  alpha, 1.0, 1.0, param.eps, si, param.shrinking);
1384  double r = si.r;
1385 
1386  svm.info("C = "+1/r+"\n");
1387 
1388  for(i=0;i<l;i++)
1389  alpha[i] *= y[i]/r;
1390 
1391  si.rho /= r;
1392  si.obj /= (r*r);
1393  si.upper_bound_p = 1/r;
1394  si.upper_bound_n = 1/r;
1395  }
1396 
1398  double[] alpha, Solver.SolutionInfo si)
1399  {
1400  int l = prob.l;
1401  double[] zeros = new double[l];
1402  byte[] ones = new byte[l];
1403  int i;
1404 
1405  int n = (int)(param.nu*prob.l); // # of alpha's at upper bound
1406 
1407  for(i=0;i<n;i++)
1408  alpha[i] = 1;
1409  if(n<prob.l)
1410  alpha[n] = param.nu * prob.l - n;
1411  for(i=n+1;i<l;i++)
1412  alpha[i] = 0;
1413 
1414  for(i=0;i<l;i++)
1415  {
1416  zeros[i] = 0;
1417  ones[i] = 1;
1418  }
1419 
1420  Solver s = new Solver();
1421  s.Solve(l, new ONE_CLASS_Q(prob,param), zeros, ones,
1422  alpha, 1.0, 1.0, param.eps, si, param.shrinking);
1423  }
1424 
1426  double[] alpha, Solver.SolutionInfo si)
1427  {
1428  int l = prob.l;
1429  double[] alpha2 = new double[2*l];
1430  double[] linear_term = new double[2*l];
1431  byte[] y = new byte[2*l];
1432  int i;
1433 
1434  for(i=0;i<l;i++)
1435  {
1436  alpha2[i] = 0;
1437  linear_term[i] = param.p - prob.y[i];
1438  y[i] = 1;
1439 
1440  alpha2[i+l] = 0;
1441  linear_term[i+l] = param.p + prob.y[i];
1442  y[i+l] = -1;
1443  }
1444 
1445  Solver s = new Solver();
1446  s.Solve(2*l, new SVR_Q(prob,param), linear_term, y,
1447  alpha2, param.C, param.C, param.eps, si, param.shrinking);
1448 
1449  double sum_alpha = 0;
1450  for(i=0;i<l;i++)
1451  {
1452  alpha[i] = alpha2[i] - alpha2[i+l];
1453  sum_alpha += Math.abs(alpha[i]);
1454  }
1455  svm.info("nu = "+sum_alpha/(param.C*l)+"\n");
1456  }
1457 
1459  double[] alpha, Solver.SolutionInfo si)
1460  {
1461  int l = prob.l;
1462  double C = param.C;
1463  double[] alpha2 = new double[2*l];
1464  double[] linear_term = new double[2*l];
1465  byte[] y = new byte[2*l];
1466  int i;
1467 
1468  double sum = C * param.nu * l / 2;
1469  for(i=0;i<l;i++)
1470  {
1471  alpha2[i] = alpha2[i+l] = Math.min(sum,C);
1472  sum -= alpha2[i];
1473 
1474  linear_term[i] = - prob.y[i];
1475  y[i] = 1;
1476 
1477  linear_term[i+l] = prob.y[i];
1478  y[i+l] = -1;
1479  }
1480 
1481  Solver_NU s = new Solver_NU();
1482  s.Solve(2*l, new SVR_Q(prob,param), linear_term, y,
1483  alpha2, C, C, param.eps, si, param.shrinking);
1484 
1485  svm.info("epsilon = "+(-si.r)+"\n");
1486 
1487  for(i=0;i<l;i++)
1488  alpha[i] = alpha2[i] - alpha2[i+l];
1489  }
1490 
1491  //
1492  // decision_function
1493  //
1494  static class decision_function
1495  {
1496  double[] alpha;
1497  double rho;
1498  };
1499 
1500  static decision_function svm_train_one(
1502  double Cp, double Cn)
1503  {
1504  double[] alpha = new double[prob.l];
1505  Solver.SolutionInfo si = new Solver.SolutionInfo();
1506  switch(param.svm_type)
1507  {
1508  case svm_parameter.C_SVC:
1509  solve_c_svc(prob,param,alpha,si,Cp,Cn);
1510  break;
1511  case svm_parameter.NU_SVC:
1512  solve_nu_svc(prob,param,alpha,si);
1513  break;
1514  case svm_parameter.ONE_CLASS:
1515  solve_one_class(prob,param,alpha,si);
1516  break;
1518  solve_epsilon_svr(prob,param,alpha,si);
1519  break;
1520  case svm_parameter.NU_SVR:
1521  solve_nu_svr(prob,param,alpha,si);
1522  break;
1523  }
1524 
1525  svm.info("obj = "+si.obj+", rho = "+si.rho+"\n");
1526 
1527  // output SVs
1528 
1529  int nSV = 0;
1530  int nBSV = 0;
1531  for(int i=0;i<prob.l;i++)
1532  {
1533  if(Math.abs(alpha[i]) > 0)
1534  {
1535  ++nSV;
1536  if(prob.y[i] > 0)
1537  {
1538  if(Math.abs(alpha[i]) >= si.upper_bound_p)
1539  ++nBSV;
1540  }
1541  else
1542  {
1543  if(Math.abs(alpha[i]) >= si.upper_bound_n)
1544  ++nBSV;
1545  }
1546  }
1547  }
1548 
1549  svm.info("nSV = "+nSV+", nBSV = "+nBSV+"\n");
1550 
1551  decision_function f = new decision_function();
1552  f.alpha = alpha;
1553  f.rho = si.rho;
1554  return f;
1555  }
1556 
1557  // Platt's binary SVM Probablistic Output: an improvement from Lin et al.
1558  private static void sigmoid_train(int l, double[] dec_values, double[] labels,
1559  double[] probAB)
1560  {
1561  double A, B;
1562  double prior1=0, prior0 = 0;
1563  int i;
1564 
1565  for (i=0;i<l;i++)
1566  if (labels[i] > 0) prior1+=1;
1567  else prior0+=1;
1568 
1569  int max_iter=100; // Maximal number of iterations
1570  double min_step=1e-10; // Minimal step taken in line search
1571  double sigma=1e-12; // For numerically strict PD of Hessian
1572  double eps=1e-5;
1573  double hiTarget=(prior1+1.0)/(prior1+2.0);
1574  double loTarget=1/(prior0+2.0);
1575  double[] t= new double[l];
1576  double fApB,p,q,h11,h22,h21,g1,g2,det,dA,dB,gd,stepsize;
1577  double newA,newB,newf,d1,d2;
1578  int iter;
1579 
1580  // Initial Point and Initial Fun Value
1581  A=0.0; B=Math.log((prior0+1.0)/(prior1+1.0));
1582  double fval = 0.0;
1583 
1584  for (i=0;i<l;i++)
1585  {
1586  if (labels[i]>0) t[i]=hiTarget;
1587  else t[i]=loTarget;
1588  fApB = dec_values[i]*A+B;
1589  if (fApB>=0)
1590  fval += t[i]*fApB + Math.log(1+Math.exp(-fApB));
1591  else
1592  fval += (t[i] - 1)*fApB +Math.log(1+Math.exp(fApB));
1593  }
1594  for (iter=0;iter<max_iter;iter++)
1595  {
1596  // Update Gradient and Hessian (use H' = H + sigma I)
1597  h11=sigma; // numerically ensures strict PD
1598  h22=sigma;
1599  h21=0.0;g1=0.0;g2=0.0;
1600  for (i=0;i<l;i++)
1601  {
1602  fApB = dec_values[i]*A+B;
1603  if (fApB >= 0)
1604  {
1605  p=Math.exp(-fApB)/(1.0+Math.exp(-fApB));
1606  q=1.0/(1.0+Math.exp(-fApB));
1607  }
1608  else
1609  {
1610  p=1.0/(1.0+Math.exp(fApB));
1611  q=Math.exp(fApB)/(1.0+Math.exp(fApB));
1612  }
1613  d2=p*q;
1614  h11+=dec_values[i]*dec_values[i]*d2;
1615  h22+=d2;
1616  h21+=dec_values[i]*d2;
1617  d1=t[i]-p;
1618  g1+=dec_values[i]*d1;
1619  g2+=d1;
1620  }
1621 
1622  // Stopping Criteria
1623  if (Math.abs(g1)<eps && Math.abs(g2)<eps)
1624  break;
1625 
1626  // Finding Newton direction: -inv(H') * g
1627  det=h11*h22-h21*h21;
1628  dA=-(h22*g1 - h21 * g2) / det;
1629  dB=-(-h21*g1+ h11 * g2) / det;
1630  gd=g1*dA+g2*dB;
1631 
1632 
1633  stepsize = 1; // Line Search
1634  while (stepsize >= min_step)
1635  {
1636  newA = A + stepsize * dA;
1637  newB = B + stepsize * dB;
1638 
1639  // New function value
1640  newf = 0.0;
1641  for (i=0;i<l;i++)
1642  {
1643  fApB = dec_values[i]*newA+newB;
1644  if (fApB >= 0)
1645  newf += t[i]*fApB + Math.log(1+Math.exp(-fApB));
1646  else
1647  newf += (t[i] - 1)*fApB +Math.log(1+Math.exp(fApB));
1648  }
1649  // Check sufficient decrease
1650  if (newf<fval+0.0001*stepsize*gd)
1651  {
1652  A=newA;B=newB;fval=newf;
1653  break;
1654  }
1655  else
1656  stepsize = stepsize / 2.0;
1657  }
1658 
1659  if (stepsize < min_step)
1660  {
1661  svm.info("Line search fails in two-class probability estimates\n");
1662  break;
1663  }
1664  }
1665 
1666  if (iter>=max_iter)
1667  svm.info("Reaching maximal iterations in two-class probability estimates\n");
1668  probAB[0]=A;probAB[1]=B;
1669  }
1670 
1671  private static double sigmoid_predict(double decision_value, double A, double B)
1672  {
1673  double fApB = decision_value*A+B;
1674  if (fApB >= 0)
1675  return Math.exp(-fApB)/(1.0+Math.exp(-fApB));
1676  else
1677  return 1.0/(1+Math.exp(fApB)) ;
1678  }
1679 
1680  // Method 2 from the multiclass_prob paper by Wu, Lin, and Weng
1681  private static void multiclass_probability(int k, double[][] r, double[] p)
1682  {
1683  int t,j;
1684  int iter = 0, max_iter=Math.max(100,k);
1685  double[][] Q=new double[k][k];
1686  double[] Qp=new double[k];
1687  double pQp, eps=0.005/k;
1688 
1689  for (t=0;t<k;t++)
1690  {
1691  p[t]=1.0/k; // Valid if k = 1
1692  Q[t][t]=0;
1693  for (j=0;j<t;j++)
1694  {
1695  Q[t][t]+=r[j][t]*r[j][t];
1696  Q[t][j]=Q[j][t];
1697  }
1698  for (j=t+1;j<k;j++)
1699  {
1700  Q[t][t]+=r[j][t]*r[j][t];
1701  Q[t][j]=-r[j][t]*r[t][j];
1702  }
1703  }
1704  for (iter=0;iter<max_iter;iter++)
1705  {
1706  // stopping condition, recalculate QP,pQP for numerical accuracy
1707  pQp=0;
1708  for (t=0;t<k;t++)
1709  {
1710  Qp[t]=0;
1711  for (j=0;j<k;j++)
1712  Qp[t]+=Q[t][j]*p[j];
1713  pQp+=p[t]*Qp[t];
1714  }
1715  double max_error=0;
1716  for (t=0;t<k;t++)
1717  {
1718  double error=Math.abs(Qp[t]-pQp);
1719  if (error>max_error)
1720  max_error=error;
1721  }
1722  if (max_error<eps) break;
1723 
1724  for (t=0;t<k;t++)
1725  {
1726  double diff=(-Qp[t]+pQp)/Q[t][t];
1727  p[t]+=diff;
1728  pQp=(pQp+diff*(diff*Q[t][t]+2*Qp[t]))/(1+diff)/(1+diff);
1729  for (j=0;j<k;j++)
1730  {
1731  Qp[j]=(Qp[j]+diff*Q[t][j])/(1+diff);
1732  p[j]/=(1+diff);
1733  }
1734  }
1735  }
1736  if (iter>=max_iter)
1737  svm.info("Exceeds max_iter in multiclass_prob\n");
1738  }
1739 
1740  // Cross-validation decision values for probability estimates
1741  private static void svm_binary_svc_probability(svm_problem prob, svm_parameter param, double Cp, double Cn, double[] probAB)
1742  {
1743  int i;
1744  int nr_fold = 5;
1745  int[] perm = new int[prob.l];
1746  double[] dec_values = new double[prob.l];
1747 
1748  // random shuffle
1749  for(i=0;i<prob.l;i++) perm[i]=i;
1750  for(i=0;i<prob.l;i++)
1751  {
1752  int j = i+rand.nextInt(prob.l-i);
1753  do {int _=perm[i]; perm[i]=perm[j]; perm[j]=_;} while(false);
1754  }
1755  for(i=0;i<nr_fold;i++)
1756  {
1757  int begin = i*prob.l/nr_fold;
1758  int end = (i+1)*prob.l/nr_fold;
1759  int j,k;
1760  svm_problem subprob = new svm_problem();
1761 
1762  subprob.l = prob.l-(end-begin);
1763  subprob.x = new svm_node[subprob.l][];
1764  subprob.y = new double[subprob.l];
1765 
1766  k=0;
1767  for(j=0;j<begin;j++)
1768  {
1769  subprob.x[k] = prob.x[perm[j]];
1770  subprob.y[k] = prob.y[perm[j]];
1771  ++k;
1772  }
1773  for(j=end;j<prob.l;j++)
1774  {
1775  subprob.x[k] = prob.x[perm[j]];
1776  subprob.y[k] = prob.y[perm[j]];
1777  ++k;
1778  }
1779  int p_count=0,n_count=0;
1780  for(j=0;j<k;j++)
1781  if(subprob.y[j]>0)
1782  p_count++;
1783  else
1784  n_count++;
1785 
1786  if(p_count==0 && n_count==0)
1787  for(j=begin;j<end;j++)
1788  dec_values[perm[j]] = 0;
1789  else if(p_count > 0 && n_count == 0)
1790  for(j=begin;j<end;j++)
1791  dec_values[perm[j]] = 1;
1792  else if(p_count == 0 && n_count > 0)
1793  for(j=begin;j<end;j++)
1794  dec_values[perm[j]] = -1;
1795  else
1796  {
1797  svm_parameter subparam = (svm_parameter)param.clone();
1798  subparam.probability=0;
1799  subparam.C=1.0;
1800  subparam.nr_weight=2;
1801  subparam.weight_label = new int[2];
1802  subparam.weight = new double[2];
1803  subparam.weight_label[0]=+1;
1804  subparam.weight_label[1]=-1;
1805  subparam.weight[0]=Cp;
1806  subparam.weight[1]=Cn;
1807  svm_model submodel = svm_train(subprob,subparam);
1808  for(j=begin;j<end;j++)
1809  {
1810  double[] dec_value=new double[1];
1811  svm_predict_values(submodel,prob.x[perm[j]],dec_value);
1812  dec_values[perm[j]]=dec_value[0];
1813  // ensure +1 -1 order; reason not using CV subroutine
1814  dec_values[perm[j]] *= submodel.label[0];
1815  }
1816  }
1817  }
1818  sigmoid_train(prob.l,dec_values,prob.y,probAB);
1819  }
1820 
1821  // Return parameter of a Laplace distribution
1822  private static double svm_svr_probability(svm_problem prob, svm_parameter param)
1823  {
1824  int i;
1825  int nr_fold = 5;
1826  double[] ymv = new double[prob.l];
1827  double mae = 0;
1828 
1829  svm_parameter newparam = (svm_parameter)param.clone();
1830  newparam.probability = 0;
1831  svm_cross_validation(prob,newparam,nr_fold,ymv);
1832  for(i=0;i<prob.l;i++)
1833  {
1834  ymv[i]=prob.y[i]-ymv[i];
1835  mae += Math.abs(ymv[i]);
1836  }
1837  mae /= prob.l;
1838  double std=Math.sqrt(2*mae*mae);
1839  int count=0;
1840  mae=0;
1841  for(i=0;i<prob.l;i++)
1842  if (Math.abs(ymv[i]) > 5*std)
1843  count=count+1;
1844  else
1845  mae+=Math.abs(ymv[i]);
1846  mae /= (prob.l-count);
1847  svm.info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma="+mae+"\n");
1848  return mae;
1849  }
1850 
1851  // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
1852  // perm, length l, must be allocated before calling this subroutine
1853  private static void svm_group_classes(svm_problem prob, int[] nr_class_ret, int[][] label_ret, int[][] start_ret, int[][] count_ret, int[] perm)
1854  {
1855  int l = prob.l;
1856  int max_nr_class = 16;
1857  int nr_class = 0;
1858  int[] label = new int[max_nr_class];
1859  int[] count = new int[max_nr_class];
1860  int[] data_label = new int[l];
1861  int i;
1862 
1863  for(i=0;i<l;i++)
1864  {
1865  int this_label = (int)(prob.y[i]);
1866  int j;
1867  for(j=0;j<nr_class;j++)
1868  {
1869  if(this_label == label[j])
1870  {
1871  ++count[j];
1872  break;
1873  }
1874  }
1875  data_label[i] = j;
1876  if(j == nr_class)
1877  {
1878  if(nr_class == max_nr_class)
1879  {
1880  max_nr_class *= 2;
1881  int[] new_data = new int[max_nr_class];
1882  System.arraycopy(label,0,new_data,0,label.length);
1883  label = new_data;
1884  new_data = new int[max_nr_class];
1885  System.arraycopy(count,0,new_data,0,count.length);
1886  count = new_data;
1887  }
1888  label[nr_class] = this_label;
1889  count[nr_class] = 1;
1890  ++nr_class;
1891  }
1892  }
1893 
1894  int[] start = new int[nr_class];
1895  start[0] = 0;
1896  for(i=1;i<nr_class;i++)
1897  start[i] = start[i-1]+count[i-1];
1898  for(i=0;i<l;i++)
1899  {
1900  perm[start[data_label[i]]] = i;
1901  ++start[data_label[i]];
1902  }
1903  start[0] = 0;
1904  for(i=1;i<nr_class;i++)
1905  start[i] = start[i-1]+count[i-1];
1906 
1907  nr_class_ret[0] = nr_class;
1908  label_ret[0] = label;
1909  start_ret[0] = start;
1910  count_ret[0] = count;
1911  }
1912 
1913  //
1914  // Interface functions
1915  //
1916  public static svm_model svm_train(svm_problem prob, svm_parameter param)
1917  {
1918  svm_model model = new svm_model();
1919  model.param = param;
1920 
1921  if(param.svm_type == svm_parameter.ONE_CLASS ||
1922  param.svm_type == svm_parameter.EPSILON_SVR ||
1923  param.svm_type == svm_parameter.NU_SVR)
1924  {
1925  // regression or one-class-svm
1926  model.nr_class = 2;
1927  model.label = null;
1928  model.nSV = null;
1929  model.probA = null; model.probB = null;
1930  model.sv_coef = new double[1][];
1931 
1932  if(param.probability == 1 &&
1933  (param.svm_type == svm_parameter.EPSILON_SVR ||
1934  param.svm_type == svm_parameter.NU_SVR))
1935  {
1936  model.probA = new double[1];
1937  model.probA[0] = svm_svr_probability(prob,param);
1938  }
1939 
1940  decision_function f = svm_train_one(prob,param,0,0);
1941  model.rho = new double[1];
1942  model.rho[0] = f.rho;
1943 
1944  int nSV = 0;
1945  int i;
1946  for(i=0;i<prob.l;i++)
1947  if(Math.abs(f.alpha[i]) > 0) ++nSV;
1948  model.l = nSV;
1949  model.SV = new svm_node[nSV][];
1950  model.sv_coef[0] = new double[nSV];
1951  model.sv_indices = new int[nSV];
1952  int j = 0;
1953  for(i=0;i<prob.l;i++)
1954  if(Math.abs(f.alpha[i]) > 0)
1955  {
1956  model.SV[j] = prob.x[i];
1957  model.sv_coef[0][j] = f.alpha[i];
1958  model.sv_indices[j] = i+1;
1959  ++j;
1960  }
1961  }
1962  else
1963  {
1964  // classification
1965  int l = prob.l;
1966  int[] tmp_nr_class = new int[1];
1967  int[][] tmp_label = new int[1][];
1968  int[][] tmp_start = new int[1][];
1969  int[][] tmp_count = new int[1][];
1970  int[] perm = new int[l];
1971 
1972  // group training data of the same class
1973  svm_group_classes(prob,tmp_nr_class,tmp_label,tmp_start,tmp_count,perm);
1974  int nr_class = tmp_nr_class[0];
1975  int[] label = tmp_label[0];
1976  int[] start = tmp_start[0];
1977  int[] count = tmp_count[0];
1978 
1979  if(nr_class == 1)
1980  svm.info("WARNING: training data in only one class. See README for details.\n");
1981 
1982  svm_node[][] x = new svm_node[l][];
1983  int i;
1984  for(i=0;i<l;i++)
1985  x[i] = prob.x[perm[i]];
1986 
1987  // calculate weighted C
1988 
1989  double[] weighted_C = new double[nr_class];
1990  for(i=0;i<nr_class;i++)
1991  weighted_C[i] = param.C;
1992  for(i=0;i<param.nr_weight;i++)
1993  {
1994  int j;
1995  for(j=0;j<nr_class;j++)
1996  if(param.weight_label[i] == label[j])
1997  break;
1998  if(j == nr_class)
1999  System.err.print("WARNING: class label "+param.weight_label[i]+" specified in weight is not found\n");
2000  else
2001  weighted_C[j] *= param.weight[i];
2002  }
2003 
2004  // train k*(k-1)/2 models
2005 
2006  boolean[] nonzero = new boolean[l];
2007  for(i=0;i<l;i++)
2008  nonzero[i] = false;
2009  decision_function[] f = new decision_function[nr_class*(nr_class-1)/2];
2010 
2011  double[] probA=null,probB=null;
2012  if (param.probability == 1)
2013  {
2014  probA=new double[nr_class*(nr_class-1)/2];
2015  probB=new double[nr_class*(nr_class-1)/2];
2016  }
2017 
2018  int p = 0;
2019  for(i=0;i<nr_class;i++)
2020  for(int j=i+1;j<nr_class;j++)
2021  {
2022  svm_problem sub_prob = new svm_problem();
2023  int si = start[i], sj = start[j];
2024  int ci = count[i], cj = count[j];
2025  sub_prob.l = ci+cj;
2026  sub_prob.x = new svm_node[sub_prob.l][];
2027  sub_prob.y = new double[sub_prob.l];
2028  int k;
2029  for(k=0;k<ci;k++)
2030  {
2031  sub_prob.x[k] = x[si+k];
2032  sub_prob.y[k] = +1;
2033  }
2034  for(k=0;k<cj;k++)
2035  {
2036  sub_prob.x[ci+k] = x[sj+k];
2037  sub_prob.y[ci+k] = -1;
2038  }
2039 
2040  if(param.probability == 1)
2041  {
2042  double[] probAB=new double[2];
2043  svm_binary_svc_probability(sub_prob,param,weighted_C[i],weighted_C[j],probAB);
2044  probA[p]=probAB[0];
2045  probB[p]=probAB[1];
2046  }
2047 
2048  f[p] = svm_train_one(sub_prob,param,weighted_C[i],weighted_C[j]);
2049  for(k=0;k<ci;k++)
2050  if(!nonzero[si+k] && Math.abs(f[p].alpha[k]) > 0)
2051  nonzero[si+k] = true;
2052  for(k=0;k<cj;k++)
2053  if(!nonzero[sj+k] && Math.abs(f[p].alpha[ci+k]) > 0)
2054  nonzero[sj+k] = true;
2055  ++p;
2056  }
2057 
2058  // build output
2059 
2060  model.nr_class = nr_class;
2061 
2062  model.label = new int[nr_class];
2063  for(i=0;i<nr_class;i++)
2064  model.label[i] = label[i];
2065 
2066  model.rho = new double[nr_class*(nr_class-1)/2];
2067  for(i=0;i<nr_class*(nr_class-1)/2;i++)
2068  model.rho[i] = f[i].rho;
2069 
2070  if(param.probability == 1)
2071  {
2072  model.probA = new double[nr_class*(nr_class-1)/2];
2073  model.probB = new double[nr_class*(nr_class-1)/2];
2074  for(i=0;i<nr_class*(nr_class-1)/2;i++)
2075  {
2076  model.probA[i] = probA[i];
2077  model.probB[i] = probB[i];
2078  }
2079  }
2080  else
2081  {
2082  model.probA=null;
2083  model.probB=null;
2084  }
2085 
2086  int nnz = 0;
2087  int[] nz_count = new int[nr_class];
2088  model.nSV = new int[nr_class];
2089  for(i=0;i<nr_class;i++)
2090  {
2091  int nSV = 0;
2092  for(int j=0;j<count[i];j++)
2093  if(nonzero[start[i]+j])
2094  {
2095  ++nSV;
2096  ++nnz;
2097  }
2098  model.nSV[i] = nSV;
2099  nz_count[i] = nSV;
2100  }
2101 
2102  svm.info("Total nSV = "+nnz+"\n");
2103 
2104  model.l = nnz;
2105  model.SV = new svm_node[nnz][];
2106  model.sv_indices = new int[nnz];
2107  p = 0;
2108  for(i=0;i<l;i++)
2109  if(nonzero[i])
2110  {
2111  model.SV[p] = x[i];
2112  model.sv_indices[p++] = perm[i] + 1;
2113  }
2114 
2115  int[] nz_start = new int[nr_class];
2116  nz_start[0] = 0;
2117  for(i=1;i<nr_class;i++)
2118  nz_start[i] = nz_start[i-1]+nz_count[i-1];
2119 
2120  model.sv_coef = new double[nr_class-1][];
2121  for(i=0;i<nr_class-1;i++)
2122  model.sv_coef[i] = new double[nnz];
2123 
2124  p = 0;
2125  for(i=0;i<nr_class;i++)
2126  for(int j=i+1;j<nr_class;j++)
2127  {
2128  // classifier (i,j): coefficients with
2129  // i are in sv_coef[j-1][nz_start[i]...],
2130  // j are in sv_coef[i][nz_start[j]...]
2131 
2132  int si = start[i];
2133  int sj = start[j];
2134  int ci = count[i];
2135  int cj = count[j];
2136 
2137  int q = nz_start[i];
2138  int k;
2139  for(k=0;k<ci;k++)
2140  if(nonzero[si+k])
2141  model.sv_coef[j-1][q++] = f[p].alpha[k];
2142  q = nz_start[j];
2143  for(k=0;k<cj;k++)
2144  if(nonzero[sj+k])
2145  model.sv_coef[i][q++] = f[p].alpha[ci+k];
2146  ++p;
2147  }
2148  }
2149  return model;
2150  }
2151 
2152  // Stratified cross validation
2153  public static void svm_cross_validation(svm_problem prob, svm_parameter param, int nr_fold, double[] target)
2154  {
2155  int i;
2156  int[] fold_start = new int[nr_fold+1];
2157  int l = prob.l;
2158  int[] perm = new int[l];
2159 
2160  // stratified cv may not give leave-one-out rate
2161  // Each class to l folds -> some folds may have zero elements
2162  if((param.svm_type == svm_parameter.C_SVC ||
2163  param.svm_type == svm_parameter.NU_SVC) && nr_fold < l)
2164  {
2165  int[] tmp_nr_class = new int[1];
2166  int[][] tmp_label = new int[1][];
2167  int[][] tmp_start = new int[1][];
2168  int[][] tmp_count = new int[1][];
2169 
2170  svm_group_classes(prob,tmp_nr_class,tmp_label,tmp_start,tmp_count,perm);
2171 
2172  int nr_class = tmp_nr_class[0];
2173  int[] start = tmp_start[0];
2174  int[] count = tmp_count[0];
2175 
2176  // random shuffle and then data grouped by fold using the array perm
2177  int[] fold_count = new int[nr_fold];
2178  int c;
2179  int[] index = new int[l];
2180  for(i=0;i<l;i++)
2181  index[i]=perm[i];
2182  for (c=0; c<nr_class; c++)
2183  for(i=0;i<count[c];i++)
2184  {
2185  int j = i+rand.nextInt(count[c]-i);
2186  do {int _=index[start[c]+j]; index[start[c]+j]=index[start[c]+i]; index[start[c]+i]=_;} while(false);
2187  }
2188  for(i=0;i<nr_fold;i++)
2189  {
2190  fold_count[i] = 0;
2191  for (c=0; c<nr_class;c++)
2192  fold_count[i]+=(i+1)*count[c]/nr_fold-i*count[c]/nr_fold;
2193  }
2194  fold_start[0]=0;
2195  for (i=1;i<=nr_fold;i++)
2196  fold_start[i] = fold_start[i-1]+fold_count[i-1];
2197  for (c=0; c<nr_class;c++)
2198  for(i=0;i<nr_fold;i++)
2199  {
2200  int begin = start[c]+i*count[c]/nr_fold;
2201  int end = start[c]+(i+1)*count[c]/nr_fold;
2202  for(int j=begin;j<end;j++)
2203  {
2204  perm[fold_start[i]] = index[j];
2205  fold_start[i]++;
2206  }
2207  }
2208  fold_start[0]=0;
2209  for (i=1;i<=nr_fold;i++)
2210  fold_start[i] = fold_start[i-1]+fold_count[i-1];
2211  }
2212  else
2213  {
2214  for(i=0;i<l;i++) perm[i]=i;
2215  for(i=0;i<l;i++)
2216  {
2217  int j = i+rand.nextInt(l-i);
2218  do {int _=perm[i]; perm[i]=perm[j]; perm[j]=_;} while(false);
2219  }
2220  for(i=0;i<=nr_fold;i++)
2221  fold_start[i]=i*l/nr_fold;
2222  }
2223 
2224  for(i=0;i<nr_fold;i++)
2225  {
2226  int begin = fold_start[i];
2227  int end = fold_start[i+1];
2228  int j,k;
2229  svm_problem subprob = new svm_problem();
2230 
2231  subprob.l = l-(end-begin);
2232  subprob.x = new svm_node[subprob.l][];
2233  subprob.y = new double[subprob.l];
2234 
2235  k=0;
2236  for(j=0;j<begin;j++)
2237  {
2238  subprob.x[k] = prob.x[perm[j]];
2239  subprob.y[k] = prob.y[perm[j]];
2240  ++k;
2241  }
2242  for(j=end;j<l;j++)
2243  {
2244  subprob.x[k] = prob.x[perm[j]];
2245  subprob.y[k] = prob.y[perm[j]];
2246  ++k;
2247  }
2248  svm_model submodel = svm_train(subprob,param);
2249  if(param.probability==1 &&
2250  (param.svm_type == svm_parameter.C_SVC ||
2251  param.svm_type == svm_parameter.NU_SVC))
2252  {
2253  double[] prob_estimates= new double[svm_get_nr_class(submodel)];
2254  for(j=begin;j<end;j++)
2255  target[perm[j]] = svm_predict_probability(submodel,prob.x[perm[j]],prob_estimates);
2256  }
2257  else
2258  for(j=begin;j<end;j++)
2259  target[perm[j]] = svm_predict(submodel,prob.x[perm[j]]);
2260  }
2261  }
2262 
2263  public static int svm_get_svm_type(svm_model model)
2264  {
2265  return model.param.svm_type;
2266  }
2267 
2268  public static int svm_get_nr_class(svm_model model)
2269  {
2270  return model.nr_class;
2271  }
2272 
2273  public static void svm_get_labels(svm_model model, int[] label)
2274  {
2275  if (model.label != null)
2276  for(int i=0;i<model.nr_class;i++)
2277  label[i] = model.label[i];
2278  }
2279 
2280  public static void svm_get_sv_indices(svm_model model, int[] indices)
2281  {
2282  if (model.sv_indices != null)
2283  for(int i=0;i<model.l;i++)
2284  indices[i] = model.sv_indices[i];
2285  }
2286 
2287  public static int svm_get_nr_sv(svm_model model)
2288  {
2289  return model.l;
2290  }
2291 
2293  {
2295  model.probA!=null)
2296  return model.probA[0];
2297  else
2298  {
2299  System.err.print("Model doesn't contain information for SVR probability inference\n");
2300  return 0;
2301  }
2302  }
2303 
2304  public static double svm_predict_values(svm_model model, svm_node[] x, double[] dec_values)
2305  {
2306  int i;
2307  if(model.param.svm_type == svm_parameter.ONE_CLASS ||
2310  {
2311  double[] sv_coef = model.sv_coef[0];
2312  double sum = 0;
2313  for(i=0;i<model.l;i++)
2314  sum += sv_coef[i] * Kernel.k_function(x,model.SV[i],model.param);
2315  sum -= model.rho[0];
2316  dec_values[0] = sum;
2317 
2318  if(model.param.svm_type == svm_parameter.ONE_CLASS)
2319  return (sum>0)?1:-1;
2320  else
2321  return sum;
2322  }
2323  else
2324  {
2325  int nr_class = model.nr_class;
2326  int l = model.l;
2327 
2328  double[] kvalue = new double[l];
2329  for(i=0;i<l;i++)
2330  kvalue[i] = Kernel.k_function(x,model.SV[i],model.param);
2331 
2332  int[] start = new int[nr_class];
2333  start[0] = 0;
2334  for(i=1;i<nr_class;i++)
2335  start[i] = start[i-1]+model.nSV[i-1];
2336 
2337  int[] vote = new int[nr_class];
2338  for(i=0;i<nr_class;i++)
2339  vote[i] = 0;
2340 
2341  int p=0;
2342  for(i=0;i<nr_class;i++)
2343  for(int j=i+1;j<nr_class;j++)
2344  {
2345  double sum = 0;
2346  int si = start[i];
2347  int sj = start[j];
2348  int ci = model.nSV[i];
2349  int cj = model.nSV[j];
2350 
2351  int k;
2352  double[] coef1 = model.sv_coef[j-1];
2353  double[] coef2 = model.sv_coef[i];
2354  for(k=0;k<ci;k++)
2355  sum += coef1[si+k] * kvalue[si+k];
2356  for(k=0;k<cj;k++)
2357  sum += coef2[sj+k] * kvalue[sj+k];
2358  sum -= model.rho[p];
2359  dec_values[p] = sum;
2360 
2361  if(dec_values[p] > 0)
2362  ++vote[i];
2363  else
2364  ++vote[j];
2365  p++;
2366  }
2367 
2368  int vote_max_idx = 0;
2369  for(i=1;i<nr_class;i++)
2370  if(vote[i] > vote[vote_max_idx])
2371  vote_max_idx = i;
2372 
2373  return model.label[vote_max_idx];
2374  }
2375  }
2376 
2377  public static double svm_predict(svm_model model, svm_node[] x)
2378  {
2379  int nr_class = model.nr_class;
2380  double[] dec_values;
2381  if(model.param.svm_type == svm_parameter.ONE_CLASS ||
2384  dec_values = new double[1];
2385  else
2386  dec_values = new double[nr_class*(nr_class-1)/2];
2387  double pred_result = svm_predict_values(model, x, dec_values);
2388  return pred_result;
2389  }
2390 
2391  public static double svm_predict_probability(svm_model model, svm_node[] x, double[] prob_estimates)
2392  {
2393  if ((model.param.svm_type == svm_parameter.C_SVC || model.param.svm_type == svm_parameter.NU_SVC) &&
2394  model.probA!=null && model.probB!=null)
2395  {
2396  int i;
2397  int nr_class = model.nr_class;
2398  double[] dec_values = new double[nr_class*(nr_class-1)/2];
2399  svm_predict_values(model, x, dec_values);
2400 
2401  double min_prob=1e-7;
2402  double[][] pairwise_prob=new double[nr_class][nr_class];
2403 
2404  int k=0;
2405  for(i=0;i<nr_class;i++)
2406  for(int j=i+1;j<nr_class;j++)
2407  {
2408  pairwise_prob[i][j]=Math.min(Math.max(sigmoid_predict(dec_values[k],model.probA[k],model.probB[k]),min_prob),1-min_prob);
2409  pairwise_prob[j][i]=1-pairwise_prob[i][j];
2410  k++;
2411  }
2412  multiclass_probability(nr_class,pairwise_prob,prob_estimates);
2413 
2414  int prob_max_idx = 0;
2415  for(i=1;i<nr_class;i++)
2416  if(prob_estimates[i] > prob_estimates[prob_max_idx])
2417  prob_max_idx = i;
2418  return model.label[prob_max_idx];
2419  }
2420  else
2421  return svm_predict(model, x);
2422  }
2423 
2424  static final String svm_type_table[] =
2425  {
2426  "c_svc","nu_svc","one_class","epsilon_svr","nu_svr",
2427  };
2428 
2429  static final String kernel_type_table[]=
2430  {
2431  "linear","polynomial","rbf","sigmoid","precomputed"
2432  };
2433 
2434  public static void svm_save_model(String model_file_name, svm_model model) throws IOException
2435  {
2436  DataOutputStream fp = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(model_file_name)));
2437 
2438  svm_parameter param = model.param;
2439 
2440  fp.writeBytes("svm_type "+svm_type_table[param.svm_type]+"\n");
2441  fp.writeBytes("kernel_type "+kernel_type_table[param.kernel_type]+"\n");
2442 
2443  if(param.kernel_type == svm_parameter.POLY)
2444  fp.writeBytes("degree "+param.degree+"\n");
2445 
2446  if(param.kernel_type == svm_parameter.POLY ||
2447  param.kernel_type == svm_parameter.RBF ||
2449  fp.writeBytes("gamma "+param.gamma+"\n");
2450 
2451  if(param.kernel_type == svm_parameter.POLY ||
2453  fp.writeBytes("coef0 "+param.coef0+"\n");
2454 
2455  int nr_class = model.nr_class;
2456  int l = model.l;
2457  fp.writeBytes("nr_class "+nr_class+"\n");
2458  fp.writeBytes("total_sv "+l+"\n");
2459 
2460  {
2461  fp.writeBytes("rho");
2462  for(int i=0;i<nr_class*(nr_class-1)/2;i++)
2463  fp.writeBytes(" "+model.rho[i]);
2464  fp.writeBytes("\n");
2465  }
2466 
2467  if(model.label != null)
2468  {
2469  fp.writeBytes("label");
2470  for(int i=0;i<nr_class;i++)
2471  fp.writeBytes(" "+model.label[i]);
2472  fp.writeBytes("\n");
2473  }
2474 
2475  if(model.probA != null) // regression has probA only
2476  {
2477  fp.writeBytes("probA");
2478  for(int i=0;i<nr_class*(nr_class-1)/2;i++)
2479  fp.writeBytes(" "+model.probA[i]);
2480  fp.writeBytes("\n");
2481  }
2482  if(model.probB != null)
2483  {
2484  fp.writeBytes("probB");
2485  for(int i=0;i<nr_class*(nr_class-1)/2;i++)
2486  fp.writeBytes(" "+model.probB[i]);
2487  fp.writeBytes("\n");
2488  }
2489 
2490  if(model.nSV != null)
2491  {
2492  fp.writeBytes("nr_sv");
2493  for(int i=0;i<nr_class;i++)
2494  fp.writeBytes(" "+model.nSV[i]);
2495  fp.writeBytes("\n");
2496  }
2497 
2498  fp.writeBytes("SV\n");
2499  double[][] sv_coef = model.sv_coef;
2500  svm_node[][] SV = model.SV;
2501 
2502  for(int i=0;i<l;i++)
2503  {
2504  for(int j=0;j<nr_class-1;j++)
2505  fp.writeBytes(sv_coef[j][i]+" ");
2506 
2507  svm_node[] p = SV[i];
2509  fp.writeBytes("0:"+(int)(p[0].value));
2510  else
2511  for(int j=0;j<p.length;j++)
2512  fp.writeBytes(p[j].index+":"+p[j].value+" ");
2513  fp.writeBytes("\n");
2514  }
2515 
2516  fp.close();
2517  }
2518 
2519  private static double atof(String s)
2520  {
2521  return Double.valueOf(s).doubleValue();
2522  }
2523 
2524  private static int atoi(String s)
2525  {
2526  return Integer.parseInt(s);
2527  }
2528 
2529  public static svm_model svm_load_model(String model_file_name) throws IOException
2530  {
2531  return svm_load_model(new BufferedReader(new FileReader(model_file_name)));
2532  }
2533 
2534  public static svm_model svm_load_model(BufferedReader fp) throws IOException
2535  {
2536  // read parameters
2537 
2538  svm_model model = new svm_model();
2539  svm_parameter param = new svm_parameter();
2540  model.param = param;
2541  model.rho = null;
2542  model.probA = null;
2543  model.probB = null;
2544  model.label = null;
2545  model.nSV = null;
2546 
2547  while(true)
2548  {
2549  String cmd = fp.readLine();
2550  String arg = cmd.substring(cmd.indexOf(' ')+1);
2551 
2552  if(cmd.startsWith("svm_type"))
2553  {
2554  int i;
2555  for(i=0;i<svm_type_table.length;i++)
2556  {
2557  if(arg.indexOf(svm_type_table[i])!=-1)
2558  {
2559  param.svm_type=i;
2560  break;
2561  }
2562  }
2563  if(i == svm_type_table.length)
2564  {
2565  System.err.print("unknown svm type.\n");
2566  return null;
2567  }
2568  }
2569  else if(cmd.startsWith("kernel_type"))
2570  {
2571  int i;
2572  for(i=0;i<kernel_type_table.length;i++)
2573  {
2574  if(arg.indexOf(kernel_type_table[i])!=-1)
2575  {
2576  param.kernel_type=i;
2577  break;
2578  }
2579  }
2580  if(i == kernel_type_table.length)
2581  {
2582  System.err.print("unknown kernel function.\n");
2583  return null;
2584  }
2585  }
2586  else if(cmd.startsWith("degree"))
2587  param.degree = atoi(arg);
2588  else if(cmd.startsWith("gamma"))
2589  param.gamma = atof(arg);
2590  else if(cmd.startsWith("coef0"))
2591  param.coef0 = atof(arg);
2592  else if(cmd.startsWith("nr_class"))
2593  model.nr_class = atoi(arg);
2594  else if(cmd.startsWith("total_sv"))
2595  model.l = atoi(arg);
2596  else if(cmd.startsWith("rho"))
2597  {
2598  int n = model.nr_class * (model.nr_class-1)/2;
2599  model.rho = new double[n];
2600  StringTokenizer st = new StringTokenizer(arg);
2601  for(int i=0;i<n;i++)
2602  model.rho[i] = atof(st.nextToken());
2603  }
2604  else if(cmd.startsWith("label"))
2605  {
2606  int n = model.nr_class;
2607  model.label = new int[n];
2608  StringTokenizer st = new StringTokenizer(arg);
2609  for(int i=0;i<n;i++)
2610  model.label[i] = atoi(st.nextToken());
2611  }
2612  else if(cmd.startsWith("probA"))
2613  {
2614  int n = model.nr_class*(model.nr_class-1)/2;
2615  model.probA = new double[n];
2616  StringTokenizer st = new StringTokenizer(arg);
2617  for(int i=0;i<n;i++)
2618  model.probA[i] = atof(st.nextToken());
2619  }
2620  else if(cmd.startsWith("probB"))
2621  {
2622  int n = model.nr_class*(model.nr_class-1)/2;
2623  model.probB = new double[n];
2624  StringTokenizer st = new StringTokenizer(arg);
2625  for(int i=0;i<n;i++)
2626  model.probB[i] = atof(st.nextToken());
2627  }
2628  else if(cmd.startsWith("nr_sv"))
2629  {
2630  int n = model.nr_class;
2631  model.nSV = new int[n];
2632  StringTokenizer st = new StringTokenizer(arg);
2633  for(int i=0;i<n;i++)
2634  model.nSV[i] = atoi(st.nextToken());
2635  }
2636  else if(cmd.startsWith("SV"))
2637  {
2638  break;
2639  }
2640  else
2641  {
2642  System.err.print("unknown text in model file: ["+cmd+"]\n");
2643  return null;
2644  }
2645  }
2646 
2647  // read sv_coef and SV
2648 
2649  int m = model.nr_class - 1;
2650  int l = model.l;
2651  model.sv_coef = new double[m][l];
2652  model.SV = new svm_node[l][];
2653 
2654  for(int i=0;i<l;i++)
2655  {
2656  String line = fp.readLine();
2657  StringTokenizer st = new StringTokenizer(line," \t\n\r\f:");
2658 
2659  for(int k=0;k<m;k++)
2660  model.sv_coef[k][i] = atof(st.nextToken());
2661  int n = st.countTokens()/2;
2662  model.SV[i] = new svm_node[n];
2663  for(int j=0;j<n;j++)
2664  {
2665  model.SV[i][j] = new svm_node();
2666  model.SV[i][j].index = atoi(st.nextToken());
2667  model.SV[i][j].value = atof(st.nextToken());
2668  }
2669  }
2670 
2671  fp.close();
2672  return model;
2673  }
2674 
2675  public static String svm_check_parameter(svm_problem prob, svm_parameter param)
2676  {
2677  // svm_type
2678 
2679  int svm_type = param.svm_type;
2680  if(svm_type != svm_parameter.C_SVC &&
2681  svm_type != svm_parameter.NU_SVC &&
2682  svm_type != svm_parameter.ONE_CLASS &&
2683  svm_type != svm_parameter.EPSILON_SVR &&
2684  svm_type != svm_parameter.NU_SVR)
2685  return "unknown svm type";
2686 
2687  // kernel_type, degree
2688 
2689  int kernel_type = param.kernel_type;
2690  if(kernel_type != svm_parameter.LINEAR &&
2691  kernel_type != svm_parameter.POLY &&
2692  kernel_type != svm_parameter.RBF &&
2693  kernel_type != svm_parameter.SIGMOID &&
2694  kernel_type != svm_parameter.PRECOMPUTED)
2695  return "unknown kernel type";
2696 
2697  if(param.gamma < 0)
2698  return "gamma < 0";
2699 
2700  if(param.degree < 0)
2701  return "degree of polynomial kernel < 0";
2702 
2703  // cache_size,eps,C,nu,p,shrinking
2704 
2705  if(param.cache_size <= 0)
2706  return "cache_size <= 0";
2707 
2708  if(param.eps <= 0)
2709  return "eps <= 0";
2710 
2711  if(svm_type == svm_parameter.C_SVC ||
2712  svm_type == svm_parameter.EPSILON_SVR ||
2713  svm_type == svm_parameter.NU_SVR)
2714  if(param.C <= 0)
2715  return "C <= 0";
2716 
2717  if(svm_type == svm_parameter.NU_SVC ||
2718  svm_type == svm_parameter.ONE_CLASS ||
2719  svm_type == svm_parameter.NU_SVR)
2720  if(param.nu <= 0 || param.nu > 1)
2721  return "nu <= 0 or nu > 1";
2722 
2723  if(svm_type == svm_parameter.EPSILON_SVR)
2724  if(param.p < 0)
2725  return "p < 0";
2726 
2727  if(param.shrinking != 0 &&
2728  param.shrinking != 1)
2729  return "shrinking != 0 and shrinking != 1";
2730 
2731  if(param.probability != 0 &&
2732  param.probability != 1)
2733  return "probability != 0 and probability != 1";
2734 
2735  if(param.probability == 1 &&
2736  svm_type == svm_parameter.ONE_CLASS)
2737  return "one-class SVM probability output not supported yet";
2738 
2739  // check whether nu-svc is feasible
2740 
2741  if(svm_type == svm_parameter.NU_SVC)
2742  {
2743  int l = prob.l;
2744  int max_nr_class = 16;
2745  int nr_class = 0;
2746  int[] label = new int[max_nr_class];
2747  int[] count = new int[max_nr_class];
2748 
2749  int i;
2750  for(i=0;i<l;i++)
2751  {
2752  int this_label = (int)prob.y[i];
2753  int j;
2754  for(j=0;j<nr_class;j++)
2755  if(this_label == label[j])
2756  {
2757  ++count[j];
2758  break;
2759  }
2760 
2761  if(j == nr_class)
2762  {
2763  if(nr_class == max_nr_class)
2764  {
2765  max_nr_class *= 2;
2766  int[] new_data = new int[max_nr_class];
2767  System.arraycopy(label,0,new_data,0,label.length);
2768  label = new_data;
2769 
2770  new_data = new int[max_nr_class];
2771  System.arraycopy(count,0,new_data,0,count.length);
2772  count = new_data;
2773  }
2774  label[nr_class] = this_label;
2775  count[nr_class] = 1;
2776  ++nr_class;
2777  }
2778  }
2779 
2780  for(i=0;i<nr_class;i++)
2781  {
2782  int n1 = count[i];
2783  for(int j=i+1;j<nr_class;j++)
2784  {
2785  int n2 = count[j];
2786  if(param.nu*(n1+n2)/2 > Math.min(n1,n2))
2787  return "specified nu is infeasible";
2788  }
2789  }
2790  }
2791 
2792  return null;
2793  }
2794 
2796  {
2797  if (((model.param.svm_type == svm_parameter.C_SVC || model.param.svm_type == svm_parameter.NU_SVC) &&
2798  model.probA!=null && model.probB!=null) ||
2800  model.probA!=null))
2801  return 1;
2802  else
2803  return 0;
2804  }
2805 
2807  {
2808  if (print_func == null)
2809  svm_print_string = svm_print_stdout;
2810  else
2811  svm_print_string = print_func;
2812  }
2813 }
static final int C_SVC
d
static void solve_one_class(svm_problem prob, svm_parameter param, double[] alpha, Solver.SolutionInfo si)
Definition: svm.java:1397
static void solve_c_svc(svm_problem prob, svm_parameter param, double[] alpha, Solver.SolutionInfo si, double Cp, double Cn)
Definition: svm.java:1314
static void svm_group_classes(const svm_problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
Definition: svm.cpp:2044
static void solve_one_class(const svm_problem *prob, const svm_parameter *param, double *alpha, Solver::SolutionInfo *si)
Definition: svm.cpp:1555
svm_parameter param
Definition: svm_model.java:7
static double svm_svr_probability(svm_problem prob, svm_parameter param)
Definition: svm.java:1822
int * nSV
Definition: svm.h:67
string cmd
Definition: easy.py:48
static svm_model svm_load_model(BufferedReader fp)
Definition: svm.java:2534
int nr_fold
Definition: svmtrain.c:64
struct svm_problem prob
Definition: svmtrain.c:60
ROSCPP_DECL void start()
static void solve_epsilon_svr(svm_problem prob, svm_parameter param, double[] alpha, Solver.SolutionInfo si)
Definition: svm.java:1425
static int svm_get_svm_type(svm_model model)
Definition: svm.java:2263
static void svm_get_labels(svm_model model, int[] label)
Definition: svm.java:2273
static final int NU_SVR
struct svm_parameter param
Definition: svmtrain.c:59
Definition: svm.py:1
def svm_train(arg1, arg2=None, arg3=None)
Definition: svmutil.py:77
static void svm_binary_svc_probability(svm_problem prob, svm_parameter param, double Cp, double Cn, double[] probAB)
Definition: svm.java:1741
HBITMAP buffer
ONE_CLASS_Q(const svm_problem &prob, const svm_parameter &param)
Definition: svm.cpp:1343
static double sigmoid_predict(double decision_value, double A, double B)
Definition: svm.cpp:1848
XmlRpcServer s
static void solve_epsilon_svr(const svm_problem *prob, const svm_parameter *param, double *alpha, Solver::SolutionInfo *si)
Definition: svm.cpp:1587
static svm_model svm_train(svm_problem prob, svm_parameter param)
Definition: svm.java:1916
static void sigmoid_train(int l, double[] dec_values, double[] labels, double[] probAB)
Definition: svm.java:1558
int l
Definition: svm.h:56
static void svm_get_sv_indices(svm_model model, int[] indices)
Definition: svm.java:2280
static const char * kernel_type_table[]
Definition: svm.cpp:2646
void svm_cross_validation(const svm_problem *prob, const svm_parameter *param, int nr_fold, double *target)
Definition: svm.cpp:2352
struct svm_node ** SV
Definition: svm.h:57
double * probB
Definition: svm.h:61
def svm_load_model(model_file_name)
Definition: svmutil.py:27
static void svm_group_classes(svm_problem prob, int[] nr_class_ret, int[][] label_ret, int[][] start_ret, int[][] count_ret, int[] perm)
Definition: svm.java:1853
static double atof(String s)
Definition: svm.java:2519
static final int LINEAR
svm_node[][] SV
Definition: svm_model.java:10
static void sigmoid_train(int l, const double *dec_values, const double *labels, double &A, double &B)
Definition: svm.cpp:1730
ROSCONSOLE_DECL void print(FilterBase *filter, void *logger, Level level, const char *file, int line, const char *function, const char *fmt,...) ROSCONSOLE_PRINTF_ATTRIBUTE(7
double * probA
Definition: svm.h:60
int nr_class
Definition: svm.h:55
static double svm_predict_probability(svm_model model, svm_node[] x, double[] prob_estimates)
Definition: svm.java:2391
#define LIBSVM_VERSION
Definition: svm.h:4
double value
Definition: svm_node.java:5
static int svm_check_probability_model(svm_model model)
Definition: svm.java:2795
static double svm_get_svr_probability(svm_model model)
Definition: svm.java:2292
static void multiclass_probability(int k, double[][] r, double[] p)
Definition: svm.java:1681
def svm_predict(y, x, m, options="")
Definition: svmutil.py:164
Solver()
Definition: svm.cpp:409
struct svm_node * x
Definition: svm-predict.c:12
static void svm_set_print_string_function(svm_print_interface print_func)
Definition: svm.java:2806
static final int RBF
int svm_get_nr_class(const svm_model *model)
Definition: svm.cpp:2475
Kernel(int l, svm_node *const *x, const svm_parameter &param)
Definition: svm.cpp:266
static int svm_get_nr_class(svm_model model)
Definition: svm.java:2268
static void solve_nu_svr(const svm_problem *prob, const svm_parameter *param, double *alpha, Solver::SolutionInfo *si)
Definition: svm.cpp:1625
double[][] sv_coef
Definition: svm_model.java:11
#define INF
Definition: svm.cpp:48
static void(* svm_print_string)(const char *)
Definition: svm.cpp:57
static void svm_binary_svc_probability(const svm_problem *prob, const svm_parameter *param, double Cp, double Cn, double &probA, double &probB)
Definition: svm.cpp:1923
f
Definition: easy.py:54
static final int SIGMOID
static String svm_check_parameter(svm_problem prob, svm_parameter param)
Definition: svm.java:2675
int(* info)(const char *fmt,...)
Definition: svmpredict.c:18
static double powi(double base, int times)
Definition: svm.cpp:37
static void solve_nu_svr(svm_problem prob, svm_parameter param, double[] alpha, Solver.SolutionInfo si)
Definition: svm.java:1458
double[] probA
Definition: svm_model.java:13
static int svm_get_nr_sv(svm_model model)
Definition: svm.java:2287
int * label
Definition: svm.h:66
double svm_predict_values(const svm_model *model, const svm_node *x, double *dec_values)
Definition: svm.cpp:2511
double svm_predict_probability(const svm_model *model, const svm_node *x, double *prob_estimates)
Definition: svm.cpp:2602
struct svm_parameter param
Definition: svm.h:54
static double svm_svr_probability(const svm_problem *prob, const svm_parameter *param)
Definition: svm.cpp:2010
SVR_Q(const svm_problem &prob, const svm_parameter &param)
Definition: svm.cpp:1389
double * rho
Definition: svm.h:59
static double sigmoid_predict(double decision_value, double A, double B)
Definition: svm.java:1671
static void svm_save_model(String model_file_name, svm_model model)
Definition: svm.java:2434
static svm_model svm_load_model(String model_file_name)
Definition: svm.java:2529
double ** sv_coef
Definition: svm.h:58
char * line
Definition: svm-scale.c:21
double[] probB
Definition: svm_model.java:14
static decision_function svm_train_one(const svm_problem *prob, const svm_parameter *param, double Cp, double Cn)
Definition: svm.cpp:1672
svm_node[][] x
Definition: svm_problem.java:6
SVC_Q(const svm_problem &prob, const svm_parameter &param, const schar *y_)
Definition: svm.cpp:1293
static void multiclass_probability(int k, double **r, double *p)
Definition: svm.cpp:1859
static double svm_predict(svm_model model, svm_node[] x)
Definition: svm.java:2377
static void solve_nu_svc(const svm_problem *prob, const svm_parameter *param, double *alpha, Solver::SolutionInfo *si)
Definition: svm.cpp:1500
static const char * svm_type_table[]
Definition: svm.cpp:2641
c
Definition: easy.py:61
static final int PRECOMPUTED
struct svm_model * model
Definition: svmtrain.c:61
static void solve_c_svc(const svm_problem *prob, const svm_parameter *param, double *alpha, Solver::SolutionInfo *si, double Cp, double Cn)
Definition: svm.cpp:1464
static void solve_nu_svc(svm_problem prob, svm_parameter param, double[] alpha, Solver.SolutionInfo si)
Definition: svm.java:1346
static final int ONE_CLASS
static double svm_predict_values(svm_model model, svm_node[] x, double[] dec_values)
Definition: svm.java:2304
static int atoi(String s)
Definition: svm.java:2524
static final int POLY
static final int EPSILON_SVR
static void svm_cross_validation(svm_problem prob, svm_parameter param, int nr_fold, double[] target)
Definition: svm.java:2153
Solver_NU()
Definition: svm.cpp:1036
Cache(int l, long int size)
Definition: svm.cpp:105
static final int NU_SVC


ml_classifiers
Author(s): Scott Niekum , Joshua Whitley
autogenerated on Tue May 14 2019 02:28:35