svm.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2000-2014 Chih-Chung Chang and Chih-Jen Lin
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  * notice, this list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright
13  * notice, this list of conditions and the following disclaimer in the
14  * documentation and/or other materials provided with the distribution.
15  *
16  * 3. Neither name of copyright holders nor the names of its contributors
17  * may be used to endorse or promote products derived from this software
18  * without specific prior written permission.
19  *
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR
25  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include <math.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <ctype.h>
38 #include <float.h>
39 #include <string.h>
40 #include <stdarg.h>
41 #include <limits.h>
42 #include <locale.h>
43 #include <assert.h>
44 #include "svm.h"
46 typedef float Qfloat;
47 typedef signed char schar;
48 #ifndef min
49 template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
50 #endif
51 #ifndef max
52 template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
53 #endif
54 template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
55 template <class S, class T> static inline void clone(T*& dst, S* src, int n)
56 {
57  dst = new T[n];
58  memcpy((void *)dst,(void *)src,sizeof(T)*n);
59 }
60 static inline double powi(double base, int times)
61 {
62  double tmp = base, ret = 1.0;
63 
64  for(int t=times; t>0; t/=2)
65  {
66  if(t%2==1) ret*=tmp;
67  tmp = tmp * tmp;
68  }
69  return ret;
70 }
71 #define INF HUGE_VAL
72 #define TAU 1e-12
73 #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
74 
75 static void print_string_stdout(const char *s)
76 {
77  fputs(s,stdout);
78  fflush(stdout);
79 }
80 static void (*svm_print_string) (const char *) = &print_string_stdout;
81 #if 0
82 static void info(const char *fmt,...)
83 {
84  char buf[BUFSIZ];
85  va_list ap;
86  va_start(ap,fmt);
87  vsprintf(buf,fmt,ap);
88  va_end(ap);
89  (*svm_print_string)(buf);
90 }
91 #else
92 static void info(const char *fmt,...) {}
93 #endif
94 
95 //
96 // Kernel Cache
97 //
98 // l is the number of total data items
99 // size is the cache size limit in bytes
100 //
101 class Cache
102 {
103 public:
104  Cache(int l,long int size);
105  ~Cache();
106 
107  // request data [0,len)
108  // return some position p where [p,len) need to be filled
109  // (p >= len if nothing needs to be filled)
110  int get_data(const int index, Qfloat **data, int len);
111  void swap_index(int i, int j);
112 private:
113  int l;
114  long int size;
115  struct head_t
116  {
117  head_t *prev, *next; // a circular list
119  int len; // data[0,len) is cached in this entry
120  };
121 
124  void lru_delete(head_t *h);
125  void lru_insert(head_t *h);
126 };
127 
128 Cache::Cache(int l_,long int size_):l(l_),size(size_)
129 {
130  head = (head_t *)calloc(l,sizeof(head_t)); // initialized to 0
131  size /= sizeof(Qfloat);
132  size -= l * sizeof(head_t) / sizeof(Qfloat);
133  size = max(size, 2 * (long int) l); // cache must be large enough for two columns
135 }
136 
138 {
139  for(head_t *h = lru_head.next; h != &lru_head; h=h->next)
140  free(h->data);
141  free(head);
142 }
143 
145 {
146  // delete from current location
147  h->prev->next = h->next;
148  h->next->prev = h->prev;
149 }
150 
152 {
153  // insert to last position
154  h->next = &lru_head;
155  h->prev = lru_head.prev;
156  h->prev->next = h;
157  h->next->prev = h;
158 }
159 
160 int Cache::get_data(const int index, Qfloat **data, int len)
161 {
162  head_t *h = &head[index];
163  if(h->len) lru_delete(h);
164  int more = len - h->len;
165 
166  if(more > 0)
167  {
168  // free old space
169  while(size < more)
170  {
171  head_t *old = lru_head.next;
172  lru_delete(old);
173  free(old->data);
174  size += old->len;
175  old->data = 0;
176  old->len = 0;
177  }
178 
179  // allocate new space
180  h->data = (Qfloat *)realloc(h->data,sizeof(Qfloat)*len);
181  size -= more;
182  swap(h->len,len);
183  }
184 
185  lru_insert(h);
186  *data = h->data;
187  return len;
188 }
189 
190 void Cache::swap_index(int i, int j)
191 {
192  if(i==j) return;
193 
194  if(head[i].len) lru_delete(&head[i]);
195  if(head[j].len) lru_delete(&head[j]);
196  swap(head[i].data,head[j].data);
197  swap(head[i].len,head[j].len);
198  if(head[i].len) lru_insert(&head[i]);
199  if(head[j].len) lru_insert(&head[j]);
200 
201  if(i>j) swap(i,j);
202  for(head_t *h = lru_head.next; h!=&lru_head; h=h->next)
203  {
204  if(h->len > i)
205  {
206  if(h->len > j)
207  swap(h->data[i],h->data[j]);
208  else
209  {
210  // give up
211  lru_delete(h);
212  free(h->data);
213  size += h->len;
214  h->data = 0;
215  h->len = 0;
216  }
217  }
218  }
219 }
220 
221 //
222 // Kernel evaluation
223 //
224 // the static method k_function is for doing single kernel evaluation
225 // the constructor of Kernel prepares to calculate the l*l kernel matrix
226 // the member function get_Q is for getting one column from the Q Matrix
227 //
228 class QMatrix {
229 public:
230  virtual Qfloat *get_Q(int column, int len) const = 0;
231  virtual double *get_QD() const = 0;
232  virtual void swap_index(int i, int j) const = 0;
233  virtual ~QMatrix() {}
234 };
235 
236 class Kernel: public QMatrix {
237 public:
238  Kernel(int l, svm_node * const * x, const svm_parameter& param);
239  virtual ~Kernel();
240 
241  static double k_function(const svm_node *x, const svm_node *y,
242  const svm_parameter& param);
243 
244  virtual Qfloat *get_Q(int column, int len) const = 0;
245  virtual double *get_QD() const = 0;
246  virtual void swap_index(int i, int j) const // no so const...
247  {
248  swap(x[i],x[j]);
249  if(x_square) swap(x_square[i],x_square[j]);
250  }
251 protected:
252 
253  double (Kernel::*kernel_function)(int i, int j) const;
254 
255 private:
256  const svm_node **x;
257  double *x_square;
258 
259  // svm_parameter
260  const int kernel_type;
261  const int degree;
262  const double gamma;
263  const double coef0;
264 
265  static double dot(const svm_node *px, const svm_node *py);
266  double kernel_linear(int i, int j) const
267  {
268  return dot(x[i],x[j]);
269  }
270  double kernel_poly(int i, int j) const
271  {
272  return powi(gamma*dot(x[i],x[j])+coef0,degree);
273  }
274  double kernel_rbf(int i, int j) const
275  {
276  return exp(-gamma*(x_square[i]+x_square[j]-2*dot(x[i],x[j])));
277  }
278  double kernel_sigmoid(int i, int j) const
279  {
280  return tanh(gamma*dot(x[i],x[j])+coef0);
281  }
282  double kernel_precomputed(int i, int j) const
283  {
284  return x[i][(int)(x[j][0].value)].value;
285  }
286 };
287 
288 
289 double k_function(const svm_node* x, const svm_node* y, const svm_parameter& param)
290 {
291  return Kernel::k_function(x, y, param);
292 }
293 
294 Kernel::Kernel(int l, svm_node * const * x_, const svm_parameter& param)
295  :kernel_type(param.kernel_type), degree(param.degree),
296  gamma(param.gamma), coef0(param.coef0)
297 {
298  switch(kernel_type)
299  {
300  case LINEAR:
302  break;
303  case POLY:
305  break;
306  case RBF:
308  break;
309  case SIGMOID:
311  break;
312  case PRECOMPUTED:
314  break;
315  }
316 
317  clone(x,x_,l);
318 
319  if(kernel_type == RBF)
320  {
321  x_square = new double[l];
322  for(int i=0;i<l;i++)
323  x_square[i] = dot(x[i],x[i]);
324  }
325  else
326  x_square = 0;
327 }
328 
330 {
331  delete[] x;
332  delete[] x_square;
333 }
334 
335 double Kernel::dot(const svm_node *px, const svm_node *py)
336 {
337  double sum = 0;
338  while(px->index != -1 && py->index != -1)
339  {
340  if(px->index == py->index)
341  {
342  sum += px->value * py->value;
343  ++px;
344  ++py;
345  }
346  else
347  {
348  if(px->index > py->index)
349  ++py;
350  else
351  ++px;
352  }
353  }
354  return sum;
355 }
356 
357 double Kernel::k_function(const svm_node *x, const svm_node *y,
358  const svm_parameter& param)
359 {
360  switch(param.kernel_type)
361  {
362  case LINEAR:
363  return dot(x,y);
364  case POLY:
365  return powi(param.gamma*dot(x,y)+param.coef0,param.degree);
366  case RBF:
367  {
368  double sum = 0;
369  while(x->index != -1 && y->index !=-1)
370  {
371  if(x->index == y->index)
372  {
373  double d = x->value - y->value;
374  sum += d*d;
375  ++x;
376  ++y;
377  }
378  else
379  {
380  if(x->index > y->index)
381  {
382  sum += y->value * y->value;
383  ++y;
384  }
385  else
386  {
387  sum += x->value * x->value;
388  ++x;
389  }
390  }
391  }
392 
393  while(x->index != -1)
394  {
395  sum += x->value * x->value;
396  ++x;
397  }
398 
399  while(y->index != -1)
400  {
401  sum += y->value * y->value;
402  ++y;
403  }
404 
405  return exp(-param.gamma*sum);
406  }
407  case SIGMOID:
408  return tanh(param.gamma*dot(x,y)+param.coef0);
409  case PRECOMPUTED: //x: test (validation), y: SV
410  return x[(int)(y->value)].value;
411  default:
412  return 0; // Unreachable
413  }
414 }
415 
416 // An SMO algorithm in Fan et al., JMLR 6(2005), p. 1889--1918
417 // Solves:
418 //
419 // min 0.5(\alpha^T Q \alpha) + p^T \alpha
420 //
421 // y^T \alpha = \delta
422 // y_i = +1 or -1
423 // 0 <= alpha_i <= Cp for y_i = 1
424 // 0 <= alpha_i <= Cn for y_i = -1
425 //
426 // Given:
427 //
428 // Q, p, y, Cp, Cn, and an initial feasible point \alpha
429 // l is the size of vectors and matrices
430 // eps is the stopping tolerance
431 //
432 // solution will be put in \alpha, objective value will be put in obj
433 //
434 class Solver {
435 public:
436  Solver() {};
437  virtual ~Solver() {};
438 
439  struct SolutionInfo {
440  double obj;
441  double rho;
442  double *upper_bound;
443  double r; // for Solver_NU
444  };
445 
446  void Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
447  double *alpha_, const double* C_, double eps,
448  SolutionInfo* si, int shrinking);
449 protected:
452  double *G; // gradient of objective function
454  char *alpha_status; // LOWER_BOUND, UPPER_BOUND, FREE
455  double *alpha;
456  const QMatrix *Q;
457  const double *QD;
458  double eps;
459  double Cp,Cn;
460  double *C;
461  double *p;
463  double *G_bar; // gradient, if we treat free variables as 0
464  int l;
465  bool unshrink; // XXX
466 
467  double get_C(int i)
468  {
469  return C[i];
470  }
472  {
473  if(alpha[i] >= get_C(i))
475  else if(alpha[i] <= 0)
477  else alpha_status[i] = FREE;
478  }
479  bool is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; }
480  bool is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; }
481  bool is_free(int i) { return alpha_status[i] == FREE; }
482  void swap_index(int i, int j);
483  void reconstruct_gradient();
484  virtual int select_working_set(int &i, int &j);
485  virtual double calculate_rho();
486  virtual void do_shrinking();
487 private:
488  bool be_shrunk(int i, double Gmax1, double Gmax2);
489 };
490 
491 void Solver::swap_index(int i, int j)
492 {
493  Q->swap_index(i,j);
494  swap(y[i],y[j]);
495  swap(G[i],G[j]);
497  swap(alpha[i],alpha[j]);
498  swap(p[i],p[j]);
499  swap(active_set[i],active_set[j]);
500  swap(G_bar[i],G_bar[j]);
501  swap(C[i],C[j]);
502 }
503 
505 {
506  // reconstruct inactive elements of G from G_bar and free variables
507 
508  if(active_size == l) return;
509 
510  int i,j;
511  int nr_free = 0;
512 
513  for(j=active_size;j<l;j++)
514  G[j] = G_bar[j] + p[j];
515 
516  for(j=0;j<active_size;j++)
517  if(is_free(j))
518  nr_free++;
519 
520  if(2*nr_free < active_size)
521  info("\nWARNING: using -h 0 may be faster\n");
522 
523  if (nr_free*l > 2*active_size*(l-active_size))
524  {
525  for(i=active_size;i<l;i++)
526  {
527  const Qfloat *Q_i = Q->get_Q(i,active_size);
528  for(j=0;j<active_size;j++)
529  if(is_free(j))
530  G[i] += alpha[j] * Q_i[j];
531  }
532  }
533  else
534  {
535  for(i=0;i<active_size;i++)
536  if(is_free(i))
537  {
538  const Qfloat *Q_i = Q->get_Q(i,l);
539  double alpha_i = alpha[i];
540  for(j=active_size;j<l;j++)
541  G[j] += alpha_i * Q_i[j];
542  }
543  }
544 }
545 
546 void Solver::Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
547  double *alpha_, const double* C_, double eps,
548  SolutionInfo* si, int shrinking)
549 {
550  this->l = l;
551  this->Q = &Q;
552  QD=Q.get_QD();
553  clone(p, p_,l);
554  clone(y, y_,l);
555  clone(alpha,alpha_,l);
556  clone(C,C_,l);
557  this->eps = eps;
558  unshrink = false;
559 
560  // initialize alpha_status
561  {
562  alpha_status = new char[l];
563  for(int i=0;i<l;i++)
565  }
566 
567  // initialize active set (for shrinking)
568  {
569  active_set = new int[l];
570  for(int i=0;i<l;i++)
571  active_set[i] = i;
572  active_size = l;
573  }
574 
575  // initialize gradient
576  {
577  G = new double[l];
578  G_bar = new double[l];
579  int i;
580  for(i=0;i<l;i++)
581  {
582  G[i] = p[i];
583  G_bar[i] = 0;
584  }
585  for(i=0;i<l;i++)
586  if(!is_lower_bound(i))
587  {
588  const Qfloat *Q_i = Q.get_Q(i,l);
589  double alpha_i = alpha[i];
590  int j;
591  for(j=0;j<l;j++)
592  G[j] += alpha_i*Q_i[j];
593  if(is_upper_bound(i))
594  for(j=0;j<l;j++)
595  G_bar[j] += get_C(i) * Q_i[j];
596  }
597  }
598 
599  // optimization step
600 
601  int iter = 0;
602  int max_iter = max(10000000, l>INT_MAX/100 ? INT_MAX : 100*l);
603  int counter = min(l,1000)+1;
604 
605  while(iter < max_iter)
606  {
607  // show progress and do shrinking
608 
609  if(--counter == 0)
610  {
611  counter = min(l,1000);
612  if(shrinking) do_shrinking();
613  info(".");
614  }
615 
616  int i,j;
617  if(select_working_set(i,j)!=0)
618  {
619  // reconstruct the whole gradient
621  // reset active set size and check
622  active_size = l;
623  info("*");
624  if(select_working_set(i,j)!=0)
625  break;
626  else
627  counter = 1; // do shrinking next iteration
628  }
629 
630  ++iter;
631 
632  // update alpha[i] and alpha[j], handle bounds carefully
633 
634  const Qfloat *Q_i = Q.get_Q(i,active_size);
635  const Qfloat *Q_j = Q.get_Q(j,active_size);
636 
637  double C_i = get_C(i);
638  double C_j = get_C(j);
639 
640  double old_alpha_i = alpha[i];
641  double old_alpha_j = alpha[j];
642 
643  if(y[i]!=y[j])
644  {
645  double quad_coef = QD[i]+QD[j]+2*Q_i[j];
646  if (quad_coef <= 0)
647  quad_coef = TAU;
648  double delta = (-G[i]-G[j])/quad_coef;
649  double diff = alpha[i] - alpha[j];
650  alpha[i] += delta;
651  alpha[j] += delta;
652 
653  if(diff > 0)
654  {
655  if(alpha[j] < 0)
656  {
657  alpha[j] = 0;
658  alpha[i] = diff;
659  }
660  }
661  else
662  {
663  if(alpha[i] < 0)
664  {
665  alpha[i] = 0;
666  alpha[j] = -diff;
667  }
668  }
669  if(diff > C_i - C_j)
670  {
671  if(alpha[i] > C_i)
672  {
673  alpha[i] = C_i;
674  alpha[j] = C_i - diff;
675  }
676  }
677  else
678  {
679  if(alpha[j] > C_j)
680  {
681  alpha[j] = C_j;
682  alpha[i] = C_j + diff;
683  }
684  }
685  }
686  else
687  {
688  double quad_coef = QD[i]+QD[j]-2*Q_i[j];
689  if (quad_coef <= 0)
690  quad_coef = TAU;
691  double delta = (G[i]-G[j])/quad_coef;
692  double sum = alpha[i] + alpha[j];
693  alpha[i] -= delta;
694  alpha[j] += delta;
695 
696  if(sum > C_i)
697  {
698  if(alpha[i] > C_i)
699  {
700  alpha[i] = C_i;
701  alpha[j] = sum - C_i;
702  }
703  }
704  else
705  {
706  if(alpha[j] < 0)
707  {
708  alpha[j] = 0;
709  alpha[i] = sum;
710  }
711  }
712  if(sum > C_j)
713  {
714  if(alpha[j] > C_j)
715  {
716  alpha[j] = C_j;
717  alpha[i] = sum - C_j;
718  }
719  }
720  else
721  {
722  if(alpha[i] < 0)
723  {
724  alpha[i] = 0;
725  alpha[j] = sum;
726  }
727  }
728  }
729 
730  // update G
731 
732  double delta_alpha_i = alpha[i] - old_alpha_i;
733  double delta_alpha_j = alpha[j] - old_alpha_j;
734 
735  for(int k=0;k<active_size;k++)
736  {
737  G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j;
738  }
739 
740  // update alpha_status and G_bar
741 
742  {
743  bool ui = is_upper_bound(i);
744  bool uj = is_upper_bound(j);
747  int k;
748  if(ui != is_upper_bound(i))
749  {
750  Q_i = Q.get_Q(i,l);
751  if(ui)
752  for(k=0;k<l;k++)
753  G_bar[k] -= C_i * Q_i[k];
754  else
755  for(k=0;k<l;k++)
756  G_bar[k] += C_i * Q_i[k];
757  }
758 
759  if(uj != is_upper_bound(j))
760  {
761  Q_j = Q.get_Q(j,l);
762  if(uj)
763  for(k=0;k<l;k++)
764  G_bar[k] -= C_j * Q_j[k];
765  else
766  for(k=0;k<l;k++)
767  G_bar[k] += C_j * Q_j[k];
768  }
769  }
770  }
771 
772  if(iter >= max_iter)
773  {
774  if(active_size < l)
775  {
776  // reconstruct the whole gradient to calculate objective value
778  active_size = l;
779  info("*");
780  }
781  fprintf(stderr,"\nWARNING: reaching max number of iterations\n");
782  }
783 
784  // calculate rho
785 
786  si->rho = calculate_rho();
787 
788  // calculate objective value
789  {
790  double v = 0;
791  int i;
792  for(i=0;i<l;i++)
793  v += alpha[i] * (G[i] + p[i]);
794 
795  si->obj = v/2;
796  }
797 
798  // put back the solution
799  {
800  for(int i=0;i<l;i++)
801  alpha_[active_set[i]] = alpha[i];
802  }
803 
804  // juggle everything back
805  /*{
806  for(int i=0;i<l;i++)
807  while(active_set[i] != i)
808  swap_index(i,active_set[i]);
809  // or Q.swap_index(i,active_set[i]);
810  }*/
811 
812  for(int i=0;i<l;i++)
813  si->upper_bound[i] = C[i];
814 
815  info("\noptimization finished, #iter = %d\n",iter);
816 
817  delete[] p;
818  delete[] y;
819  delete[] C;
820  delete[] alpha;
821  delete[] alpha_status;
822  delete[] active_set;
823  delete[] G;
824  delete[] G_bar;
825 }
826 
827 // return 1 if already optimal, return 0 otherwise
828 int Solver::select_working_set(int &out_i, int &out_j)
829 {
830  // return i,j such that
831  // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
832  // j: minimizes the decrease of obj value
833  // (if quadratic coefficeint <= 0, replace it with tau)
834  // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
835 
836  double Gmax = -INF;
837  double Gmax2 = -INF;
838  int Gmax_idx = -1;
839  int Gmin_idx = -1;
840  double obj_diff_min = INF;
841 
842  for(int t=0;t<active_size;t++)
843  if(y[t]==+1)
844  {
845  if(!is_upper_bound(t))
846  if(-G[t] >= Gmax)
847  {
848  Gmax = -G[t];
849  Gmax_idx = t;
850  }
851  }
852  else
853  {
854  if(!is_lower_bound(t))
855  if(G[t] >= Gmax)
856  {
857  Gmax = G[t];
858  Gmax_idx = t;
859  }
860  }
861 
862  int i = Gmax_idx;
863  const Qfloat *Q_i = NULL;
864  if(i != -1) // NULL Q_i not accessed: Gmax=-INF if i=-1
865  Q_i = Q->get_Q(i,active_size);
866 
867  for(int j=0;j<active_size;j++)
868  {
869  if(y[j]==+1)
870  {
871  if (!is_lower_bound(j))
872  {
873  double grad_diff=Gmax+G[j];
874  if (G[j] >= Gmax2)
875  Gmax2 = G[j];
876  if (grad_diff > 0)
877  {
878  double obj_diff;
879  double quad_coef = QD[i]+QD[j]-2.0*y[i]*Q_i[j];
880  if (quad_coef > 0)
881  obj_diff = -(grad_diff*grad_diff)/quad_coef;
882  else
883  obj_diff = -(grad_diff*grad_diff)/TAU;
884 
885  if (obj_diff <= obj_diff_min)
886  {
887  Gmin_idx=j;
888  obj_diff_min = obj_diff;
889  }
890  }
891  }
892  }
893  else
894  {
895  if (!is_upper_bound(j))
896  {
897  double grad_diff= Gmax-G[j];
898  if (-G[j] >= Gmax2)
899  Gmax2 = -G[j];
900  if (grad_diff > 0)
901  {
902  double obj_diff;
903  double quad_coef = QD[i]+QD[j]+2.0*y[i]*Q_i[j];
904  if (quad_coef > 0)
905  obj_diff = -(grad_diff*grad_diff)/quad_coef;
906  else
907  obj_diff = -(grad_diff*grad_diff)/TAU;
908 
909  if (obj_diff <= obj_diff_min)
910  {
911  Gmin_idx=j;
912  obj_diff_min = obj_diff;
913  }
914  }
915  }
916  }
917  }
918 
919  if(Gmax+Gmax2 < eps)
920  return 1;
921 
922  out_i = Gmax_idx;
923  out_j = Gmin_idx;
924  return 0;
925 }
926 
927 bool Solver::be_shrunk(int i, double Gmax1, double Gmax2)
928 {
929  if(is_upper_bound(i))
930  {
931  if(y[i]==+1)
932  return(-G[i] > Gmax1);
933  else
934  return(-G[i] > Gmax2);
935  }
936  else if(is_lower_bound(i))
937  {
938  if(y[i]==+1)
939  return(G[i] > Gmax2);
940  else
941  return(G[i] > Gmax1);
942  }
943  else
944  return(false);
945 }
946 
948 {
949  int i;
950  double Gmax1 = -INF; // max { -y_i * grad(f)_i | i in I_up(\alpha) }
951  double Gmax2 = -INF; // max { y_i * grad(f)_i | i in I_low(\alpha) }
952 
953  // find maximal violating pair first
954  for(i=0;i<active_size;i++)
955  {
956  if(y[i]==+1)
957  {
958  if(!is_upper_bound(i))
959  {
960  if(-G[i] >= Gmax1)
961  Gmax1 = -G[i];
962  }
963  if(!is_lower_bound(i))
964  {
965  if(G[i] >= Gmax2)
966  Gmax2 = G[i];
967  }
968  }
969  else
970  {
971  if(!is_upper_bound(i))
972  {
973  if(-G[i] >= Gmax2)
974  Gmax2 = -G[i];
975  }
976  if(!is_lower_bound(i))
977  {
978  if(G[i] >= Gmax1)
979  Gmax1 = G[i];
980  }
981  }
982  }
983 
984  if(unshrink == false && Gmax1 + Gmax2 <= eps*10)
985  {
986  unshrink = true;
988  active_size = l;
989  info("*");
990  }
991 
992  for(i=0;i<active_size;i++)
993  if (be_shrunk(i, Gmax1, Gmax2))
994  {
995  active_size--;
996  while (active_size > i)
997  {
998  if (!be_shrunk(active_size, Gmax1, Gmax2))
999  {
1001  break;
1002  }
1003  active_size--;
1004  }
1005  }
1006 }
1007 
1009 {
1010  double r;
1011  int nr_free = 0;
1012  double ub = INF, lb = -INF, sum_free = 0;
1013  for(int i=0;i<active_size;i++)
1014  {
1015  double yG = y[i]*G[i];
1016 
1017  if(is_upper_bound(i))
1018  {
1019  if(y[i]==-1)
1020  ub = min(ub,yG);
1021  else
1022  lb = max(lb,yG);
1023  }
1024  else if(is_lower_bound(i))
1025  {
1026  if(y[i]==+1)
1027  ub = min(ub,yG);
1028  else
1029  lb = max(lb,yG);
1030  }
1031  else
1032  {
1033  ++nr_free;
1034  sum_free += yG;
1035  }
1036  }
1037 
1038  if(nr_free>0)
1039  r = sum_free/nr_free;
1040  else
1041  r = (ub+lb)/2;
1042 
1043  return r;
1044 }
1045 
1046 //
1047 // Solver for nu-svm classification and regression
1048 //
1049 // additional constraint: e^T \alpha = constant
1050 //
1051 class Solver_NU : public Solver
1052 {
1053 public:
1055  void Solve(int l, const QMatrix& Q, const double *p, const schar *y,
1056  double *alpha, double* C_, double eps,
1057  SolutionInfo* si, int shrinking)
1058  {
1059  this->si = si;
1060  Solver::Solve(l,Q,p,y,alpha,C_,eps,si,shrinking);
1061  }
1062 private:
1063  SolutionInfo *si;
1064  int select_working_set(int &i, int &j);
1065  double calculate_rho();
1066  bool be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4);
1067  void do_shrinking();
1068 };
1069 
1070 // return 1 if already optimal, return 0 otherwise
1071 int Solver_NU::select_working_set(int &out_i, int &out_j)
1072 {
1073  // return i,j such that y_i = y_j and
1074  // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
1075  // j: minimizes the decrease of obj value
1076  // (if quadratic coefficeint <= 0, replace it with tau)
1077  // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
1078 
1079  double Gmaxp = -INF;
1080  double Gmaxp2 = -INF;
1081  int Gmaxp_idx = -1;
1082 
1083  double Gmaxn = -INF;
1084  double Gmaxn2 = -INF;
1085  int Gmaxn_idx = -1;
1086 
1087  int Gmin_idx = -1;
1088  double obj_diff_min = INF;
1089 
1090  for(int t=0;t<active_size;t++)
1091  if(y[t]==+1)
1092  {
1093  if(!is_upper_bound(t))
1094  if(-G[t] >= Gmaxp)
1095  {
1096  Gmaxp = -G[t];
1097  Gmaxp_idx = t;
1098  }
1099  }
1100  else
1101  {
1102  if(!is_lower_bound(t))
1103  if(G[t] >= Gmaxn)
1104  {
1105  Gmaxn = G[t];
1106  Gmaxn_idx = t;
1107  }
1108  }
1109 
1110  int ip = Gmaxp_idx;
1111  int in = Gmaxn_idx;
1112  const Qfloat *Q_ip = NULL;
1113  const Qfloat *Q_in = NULL;
1114  if(ip != -1) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1
1115  Q_ip = Q->get_Q(ip,active_size);
1116  if(in != -1)
1117  Q_in = Q->get_Q(in,active_size);
1118 
1119  for(int j=0;j<active_size;j++)
1120  {
1121  if(y[j]==+1)
1122  {
1123  if (!is_lower_bound(j))
1124  {
1125  double grad_diff=Gmaxp+G[j];
1126  if (G[j] >= Gmaxp2)
1127  Gmaxp2 = G[j];
1128  if (grad_diff > 0)
1129  {
1130  double obj_diff;
1131  double quad_coef = QD[ip]+QD[j]-2*Q_ip[j];
1132  if (quad_coef > 0)
1133  obj_diff = -(grad_diff*grad_diff)/quad_coef;
1134  else
1135  obj_diff = -(grad_diff*grad_diff)/TAU;
1136 
1137  if (obj_diff <= obj_diff_min)
1138  {
1139  Gmin_idx=j;
1140  obj_diff_min = obj_diff;
1141  }
1142  }
1143  }
1144  }
1145  else
1146  {
1147  if (!is_upper_bound(j))
1148  {
1149  double grad_diff=Gmaxn-G[j];
1150  if (-G[j] >= Gmaxn2)
1151  Gmaxn2 = -G[j];
1152  if (grad_diff > 0)
1153  {
1154  double obj_diff;
1155  double quad_coef = QD[in]+QD[j]-2*Q_in[j];
1156  if (quad_coef > 0)
1157  obj_diff = -(grad_diff*grad_diff)/quad_coef;
1158  else
1159  obj_diff = -(grad_diff*grad_diff)/TAU;
1160 
1161  if (obj_diff <= obj_diff_min)
1162  {
1163  Gmin_idx=j;
1164  obj_diff_min = obj_diff;
1165  }
1166  }
1167  }
1168  }
1169  }
1170 
1171  if(max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2) < eps)
1172  return 1;
1173 
1174  if (y[Gmin_idx] == +1)
1175  out_i = Gmaxp_idx;
1176  else
1177  out_i = Gmaxn_idx;
1178  out_j = Gmin_idx;
1179 
1180  return 0;
1181 }
1182 
1183 bool Solver_NU::be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4)
1184 {
1185  if(is_upper_bound(i))
1186  {
1187  if(y[i]==+1)
1188  return(-G[i] > Gmax1);
1189  else
1190  return(-G[i] > Gmax4);
1191  }
1192  else if(is_lower_bound(i))
1193  {
1194  if(y[i]==+1)
1195  return(G[i] > Gmax2);
1196  else
1197  return(G[i] > Gmax3);
1198  }
1199  else
1200  return(false);
1201 }
1202 
1204 {
1205  double Gmax1 = -INF; // max { -y_i * grad(f)_i | y_i = +1, i in I_up(\alpha) }
1206  double Gmax2 = -INF; // max { y_i * grad(f)_i | y_i = +1, i in I_low(\alpha) }
1207  double Gmax3 = -INF; // max { -y_i * grad(f)_i | y_i = -1, i in I_up(\alpha) }
1208  double Gmax4 = -INF; // max { y_i * grad(f)_i | y_i = -1, i in I_low(\alpha) }
1209 
1210  // find maximal violating pair first
1211  int i;
1212  for(i=0;i<active_size;i++)
1213  {
1214  if(!is_upper_bound(i))
1215  {
1216  if(y[i]==+1)
1217  {
1218  if(-G[i] > Gmax1) Gmax1 = -G[i];
1219  }
1220  else if(-G[i] > Gmax4) Gmax4 = -G[i];
1221  }
1222  if(!is_lower_bound(i))
1223  {
1224  if(y[i]==+1)
1225  {
1226  if(G[i] > Gmax2) Gmax2 = G[i];
1227  }
1228  else if(G[i] > Gmax3) Gmax3 = G[i];
1229  }
1230  }
1231 
1232  if(unshrink == false && max(Gmax1+Gmax2,Gmax3+Gmax4) <= eps*10)
1233  {
1234  unshrink = true;
1236  active_size = l;
1237  }
1238 
1239  for(i=0;i<active_size;i++)
1240  if (be_shrunk(i, Gmax1, Gmax2, Gmax3, Gmax4))
1241  {
1242  active_size--;
1243  while (active_size > i)
1244  {
1245  if (!be_shrunk(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
1246  {
1248  break;
1249  }
1250  active_size--;
1251  }
1252  }
1253 }
1254 
1256 {
1257  int nr_free1 = 0,nr_free2 = 0;
1258  double ub1 = INF, ub2 = INF;
1259  double lb1 = -INF, lb2 = -INF;
1260  double sum_free1 = 0, sum_free2 = 0;
1261 
1262  for(int i=0;i<active_size;i++)
1263  {
1264  if(y[i]==+1)
1265  {
1266  if(is_upper_bound(i))
1267  lb1 = max(lb1,G[i]);
1268  else if(is_lower_bound(i))
1269  ub1 = min(ub1,G[i]);
1270  else
1271  {
1272  ++nr_free1;
1273  sum_free1 += G[i];
1274  }
1275  }
1276  else
1277  {
1278  if(is_upper_bound(i))
1279  lb2 = max(lb2,G[i]);
1280  else if(is_lower_bound(i))
1281  ub2 = min(ub2,G[i]);
1282  else
1283  {
1284  ++nr_free2;
1285  sum_free2 += G[i];
1286  }
1287  }
1288  }
1289 
1290  double r1,r2;
1291  if(nr_free1 > 0)
1292  r1 = sum_free1/nr_free1;
1293  else
1294  r1 = (ub1+lb1)/2;
1295 
1296  if(nr_free2 > 0)
1297  r2 = sum_free2/nr_free2;
1298  else
1299  r2 = (ub2+lb2)/2;
1300 
1301  si->r = (r1+r2)/2;
1302  return (r1-r2)/2;
1303 }
1304 
1305 //
1306 // Q matrices for various formulations
1307 //
1308 class SVC_Q: public Kernel
1309 {
1310 public:
1311  SVC_Q(const svm_problem& prob, const svm_parameter& param, const schar *y_)
1312  :Kernel(prob.l, prob.x, param)
1313  {
1314  clone(y,y_,prob.l);
1315  cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
1316  QD = new double[prob.l];
1317  for(int i=0;i<prob.l;i++)
1318  QD[i] = (this->*kernel_function)(i,i);
1319  }
1320 
1321  Qfloat *get_Q(int i, int len) const
1322  {
1323  Qfloat *data;
1324  int start, j;
1325  if((start = cache->get_data(i,&data,len)) < len)
1326  {
1327  for(j=start;j<len;j++)
1328  data[j] = (Qfloat)(y[i]*y[j]*(this->*kernel_function)(i,j));
1329  }
1330  return data;
1331  }
1332 
1333  double *get_QD() const
1334  {
1335  return QD;
1336  }
1337 
1338  void swap_index(int i, int j) const
1339  {
1340  cache->swap_index(i,j);
1341  Kernel::swap_index(i,j);
1342  swap(y[i],y[j]);
1343  swap(QD[i],QD[j]);
1344  }
1345 
1347  {
1348  delete[] y;
1349  delete cache;
1350  delete[] QD;
1351  }
1352 private:
1355  double *QD;
1356 };
1357 
1358 class ONE_CLASS_Q: public Kernel
1359 {
1360 public:
1361  ONE_CLASS_Q(const svm_problem& prob, const svm_parameter& param)
1362  :Kernel(prob.l, prob.x, param)
1363  {
1364  cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
1365  QD = new double[prob.l];
1366  for(int i=0;i<prob.l;i++)
1367  QD[i] = (this->*kernel_function)(i,i);
1368  }
1369 
1370  Qfloat *get_Q(int i, int len) const
1371  {
1372  Qfloat *data;
1373  int start, j;
1374  if((start = cache->get_data(i,&data,len)) < len)
1375  {
1376  for(j=start;j<len;j++)
1377  data[j] = (Qfloat)(this->*kernel_function)(i,j);
1378  }
1379  return data;
1380  }
1381 
1382  double *get_QD() const
1383  {
1384  return QD;
1385  }
1386 
1387  void swap_index(int i, int j) const
1388  {
1389  cache->swap_index(i,j);
1390  Kernel::swap_index(i,j);
1391  swap(QD[i],QD[j]);
1392  }
1393 
1395  {
1396  delete cache;
1397  delete[] QD;
1398  }
1399 private:
1401  double *QD;
1402 };
1403 
1404 class SVR_Q: public Kernel
1405 {
1406 public:
1407  SVR_Q(const svm_problem& prob, const svm_parameter& param)
1408  :Kernel(prob.l, prob.x, param)
1409  {
1410  l = prob.l;
1411  cache = new Cache(l,(long int)(param.cache_size*(1<<20)));
1412  QD = new double[2*l];
1413  sign = new schar[2*l];
1414  index = new int[2*l];
1415  for(int k=0;k<l;k++)
1416  {
1417  sign[k] = 1;
1418  sign[k+l] = -1;
1419  index[k] = k;
1420  index[k+l] = k;
1421  QD[k] = (this->*kernel_function)(k,k);
1422  QD[k+l] = QD[k];
1423  }
1424  buffer[0] = new Qfloat[2*l];
1425  buffer[1] = new Qfloat[2*l];
1426  next_buffer = 0;
1427  }
1428 
1429  void swap_index(int i, int j) const
1430  {
1431  swap(sign[i],sign[j]);
1432  swap(index[i],index[j]);
1433  swap(QD[i],QD[j]);
1434  }
1435 
1436  Qfloat *get_Q(int i, int len) const
1437  {
1438  Qfloat *data;
1439  int j, real_i = index[i];
1440  if(cache->get_data(real_i,&data,l) < l)
1441  {
1442  for(j=0;j<l;j++)
1443  data[j] = (Qfloat)(this->*kernel_function)(real_i,j);
1444  }
1445 
1446  // reorder and copy
1447  Qfloat *buf = buffer[next_buffer];
1448  next_buffer = 1 - next_buffer;
1449  schar si = sign[i];
1450  for(j=0;j<len;j++)
1451  buf[j] = (Qfloat) si * (Qfloat) sign[j] * data[index[j]];
1452  return buf;
1453  }
1454 
1455  double *get_QD() const
1456  {
1457  return QD;
1458  }
1459 
1461  {
1462  delete cache;
1463  delete[] sign;
1464  delete[] index;
1465  delete[] buffer[0];
1466  delete[] buffer[1];
1467  delete[] QD;
1468  }
1469 private:
1470  int l;
1473  int *index;
1474  mutable int next_buffer;
1476  double *QD;
1477 };
1478 #include <iostream>
1479 //
1480 // construct and solve various formulations
1481 //
1482 static void solve_c_svc(
1483  const svm_problem *prob, const svm_parameter* param,
1484  double *alpha, Solver::SolutionInfo* si, double Cp, double Cn)
1485 {
1486  int l = prob->l;
1487  double *minus_ones = new double[l];
1488  schar *y = new schar[l];
1489  double *C = new double[l];
1490 
1491  int i;
1492 
1493  for(i=0;i<l;i++)
1494  {
1495  alpha[i] = 0;
1496  minus_ones[i] = -1;
1497  if(prob->y[i] > 0)
1498  {
1499  y[i] = +1;
1500  C[i] = prob->W[i]*Cp;
1501  }
1502  else
1503  {
1504  y[i] = -1;
1505  C[i] = prob->W[i]*Cn;
1506  }
1507  //std::cout << C[i] << " ";
1508  }
1509  //std::cout << std::endl;
1510 
1511  Solver s;
1512  s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y,
1513  alpha, C, param->eps, si, param->shrinking);
1514 
1515  /*
1516  double sum_alpha=0;
1517  for(i=0;i<l;i++)
1518  sum_alpha += alpha[i];
1519  if (Cp==Cn)
1520  info("nu = %f\n", sum_alpha/(Cp*prob->l));
1521  */
1522 
1523  for(i=0;i<l;i++)
1524  alpha[i] *= y[i];
1525 
1526  delete[] C;
1527  delete[] minus_ones;
1528  delete[] y;
1529 }
1530 
1531 static void solve_nu_svc(
1532  const svm_problem *prob, const svm_parameter *param,
1533  double *alpha, Solver::SolutionInfo* si)
1534 {
1535  int i;
1536  int l = prob->l;
1537  double nu = param->nu;
1538 
1539  schar *y = new schar[l];
1540  double *C = new double[l];
1541 
1542  for(i=0;i<l;i++)
1543  {
1544  if(prob->y[i]>0)
1545  y[i] = +1;
1546  else
1547  y[i] = -1;
1548  C[i] = prob->W[i];
1549  }
1550 
1551  double nu_l = 0;
1552  for(i=0;i<l;i++) nu_l += nu*C[i];
1553  double sum_pos = nu_l/2;
1554  double sum_neg = nu_l/2;
1555 
1556  for(i=0;i<l;i++)
1557  if(y[i] == +1)
1558  {
1559  alpha[i] = min(C[i],sum_pos);
1560  sum_pos -= alpha[i];
1561  }
1562  else
1563  {
1564  alpha[i] = min(C[i],sum_neg);
1565  sum_neg -= alpha[i];
1566  }
1567 
1568  double *zeros = new double[l];
1569 
1570  for(i=0;i<l;i++)
1571  zeros[i] = 0;
1572 
1573  Solver_NU s;
1574  s.Solve(l, SVC_Q(*prob,*param,y), zeros, y,
1575  alpha, C, param->eps, si, param->shrinking);
1576  double r = si->r;
1577 
1578  info("C = %f\n",1/r);
1579 
1580  for(i=0;i<l;i++)
1581  {
1582  alpha[i] *= y[i]/r;
1583  si->upper_bound[i] /= r;
1584  }
1585 
1586  si->rho /= r;
1587  si->obj /= (r*r);
1588 
1589  delete[] C;
1590  delete[] y;
1591  delete[] zeros;
1592 }
1593 
1594 static void solve_one_class(
1595  const svm_problem *prob, const svm_parameter *param,
1596  double *alpha, Solver::SolutionInfo* si)
1597 {
1598  int l = prob->l;
1599  double *zeros = new double[l];
1600  schar *ones = new schar[l];
1601  double *C = new double[l];
1602  int i;
1603 
1604  double nu_l = 0;
1605 
1606  for(i=0;i<l;i++)
1607  {
1608  C[i] = prob->W[i];
1609  nu_l += C[i] * param->nu;
1610  }
1611 
1612  i = 0;
1613  while(nu_l > 0)
1614  {
1615  alpha[i] = min(C[i],nu_l);
1616  nu_l -= alpha[i];
1617  ++i;
1618  }
1619  for(;i<l;i++)
1620  alpha[i] = 0;
1621 
1622  for(i=0;i<l;i++)
1623  {
1624  zeros[i] = 0;
1625  ones[i] = 1;
1626  }
1627 
1628  Solver s;
1629  s.Solve(l, ONE_CLASS_Q(*prob,*param), zeros, ones,
1630  alpha, C, param->eps, si, param->shrinking);
1631 
1632  delete[] C;
1633  delete[] zeros;
1634  delete[] ones;
1635 }
1636 
1637 static void solve_epsilon_svr(
1638  const svm_problem *prob, const svm_parameter *param,
1639  double *alpha, Solver::SolutionInfo* si)
1640 {
1641  int l = prob->l;
1642  double *alpha2 = new double[2*l];
1643  double *linear_term = new double[2*l];
1644  double *C = new double[2*l];
1645  schar *y = new schar[2*l];
1646  int i;
1647 
1648  for(i=0;i<l;i++)
1649  {
1650  alpha2[i] = 0;
1651  linear_term[i] = param->p - prob->y[i];
1652  y[i] = 1;
1653  C[i] = prob->W[i]*param->C;
1654 
1655  alpha2[i+l] = 0;
1656  linear_term[i+l] = param->p + prob->y[i];
1657  y[i+l] = -1;
1658  C[i+l] = prob->W[i]*param->C;
1659  }
1660 
1661  Solver s;
1662  s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
1663  alpha2, C, param->eps, si, param->shrinking);
1664  double sum_alpha = 0;
1665  for(i=0;i<l;i++)
1666  {
1667  alpha[i] = alpha2[i] - alpha2[i+l];
1668  sum_alpha += fabs(alpha[i]);
1669  }
1670  //info("nu = %f\n",sum_alpha/(param->C*l));
1671  delete[] alpha2;
1672  delete[] linear_term;
1673  delete[] C;
1674  delete[] y;
1675 }
1676 
1677 static void solve_nu_svr(
1678  const svm_problem *prob, const svm_parameter *param,
1679  double *alpha, Solver::SolutionInfo* si)
1680 {
1681  int l = prob->l;
1682  double *C = new double[2*l];
1683  double *alpha2 = new double[2*l];
1684  double *linear_term = new double[2*l];
1685  schar *y = new schar[2*l];
1686  int i;
1687 
1688  double sum = 0;
1689  for(i=0;i<l;i++)
1690  {
1691  C[i] = C[i+l] = prob->W[i]*param->C;
1692  sum += C[i] * param->nu;
1693  }
1694  sum /= 2;
1695 
1696  for(i=0;i<l;i++)
1697  {
1698  alpha2[i] = alpha2[i+l] = min(sum,C[i]);
1699  sum -= alpha2[i];
1700 
1701  linear_term[i] = - prob->y[i];
1702  y[i] = 1;
1703 
1704  linear_term[i+l] = prob->y[i];
1705  y[i+l] = -1;
1706  }
1707 
1708  Solver_NU s;
1709  s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
1710  alpha2, C, param->eps, si, param->shrinking);
1711 
1712  info("epsilon = %f\n",-si->r);
1713 
1714  for(i=0;i<l;i++)
1715  alpha[i] = alpha2[i] - alpha2[i+l];
1716 
1717  delete[] alpha2;
1718  delete[] linear_term;
1719  delete[] C;
1720  delete[] y;
1721 }
1722 
1723 //
1724 // decision_function
1725 //
1727 {
1728  double *alpha;
1729  double rho;
1730 };
1731 
1733  const svm_problem *prob, const svm_parameter *param,
1734  double Cp, double Cn)
1735 {
1736  double *alpha = Malloc(double,prob->l);
1738  switch(param->svm_type)
1739  {
1740  case C_SVC:
1741  si.upper_bound = Malloc(double,prob->l);
1742  solve_c_svc(prob,param,alpha,&si,Cp,Cn);
1743  break;
1744  case NU_SVC:
1745  si.upper_bound = Malloc(double,prob->l);
1746  solve_nu_svc(prob,param,alpha,&si);
1747  break;
1748  case ONE_CLASS:
1749  si.upper_bound = Malloc(double,prob->l);
1750  solve_one_class(prob,param,alpha,&si);
1751  break;
1752  case EPSILON_SVR:
1753  si.upper_bound = Malloc(double,2*prob->l);
1754  solve_epsilon_svr(prob,param,alpha,&si);
1755  break;
1756  case NU_SVR:
1757  si.upper_bound = Malloc(double,2*prob->l);
1758  solve_nu_svr(prob,param,alpha,&si);
1759  break;
1760  }
1761 
1762  info("obj = %f, rho = %f\n",si.obj,si.rho);
1763 
1764  // output SVs
1765 
1766  int nSV = 0;
1767  int nBSV = 0;
1768  for(int i=0;i<prob->l;i++)
1769  {
1770  if(fabs(alpha[i]) > 0)
1771  {
1772  ++nSV;
1773  if(prob->y[i] > 0)
1774  {
1775  if(fabs(alpha[i]) >= si.upper_bound[i])
1776  ++nBSV;
1777  }
1778  else
1779  {
1780  if(fabs(alpha[i]) >= si.upper_bound[i])
1781  ++nBSV;
1782  }
1783  }
1784  }
1785 
1786  free(si.upper_bound);
1787 
1788  info("nSV = %d, nBSV = %d\n",nSV,nBSV);
1789 
1791  f.alpha = alpha;
1792  f.rho = si.rho;
1793  return f;
1794 }
1795 
1796 // Platt's binary SVM Probablistic Output: an improvement from Lin et al.
1797 static void sigmoid_train(
1798  int l, const double *dec_values, const double *labels,
1799  double& A, double& B)
1800 {
1801  double prior1=0, prior0 = 0;
1802  int i;
1803 
1804  for (i=0;i<l;i++)
1805  if (labels[i] > 0) prior1+=1;
1806  else prior0+=1;
1807 
1808  int max_iter=100; // Maximal number of iterations
1809  double min_step=1e-10; // Minimal step taken in line search
1810  double sigma=1e-12; // For numerically strict PD of Hessian
1811  double eps=1e-5;
1812  double hiTarget=(prior1+1.0)/(prior1+2.0);
1813  double loTarget=1/(prior0+2.0);
1814  double *t=Malloc(double,l);
1815  double fApB,p,q,h11,h22,h21,g1,g2,det,dA,dB,gd,stepsize;
1816  double newA,newB,newf,d1,d2;
1817  int iter;
1818 
1819  // Initial Point and Initial Fun Value
1820  A=0.0; B=log((prior0+1.0)/(prior1+1.0));
1821  double fval = 0.0;
1822 
1823  for (i=0;i<l;i++)
1824  {
1825  if (labels[i]>0) t[i]=hiTarget;
1826  else t[i]=loTarget;
1827  fApB = dec_values[i]*A+B;
1828  if (fApB>=0)
1829  fval += t[i]*fApB + log(1+exp(-fApB));
1830  else
1831  fval += (t[i] - 1)*fApB +log(1+exp(fApB));
1832  }
1833  for (iter=0;iter<max_iter;iter++)
1834  {
1835  // Update Gradient and Hessian (use H' = H + sigma I)
1836  h11=sigma; // numerically ensures strict PD
1837  h22=sigma;
1838  h21=0.0;g1=0.0;g2=0.0;
1839  for (i=0;i<l;i++)
1840  {
1841  fApB = dec_values[i]*A+B;
1842  if (fApB >= 0)
1843  {
1844  p=exp(-fApB)/(1.0+exp(-fApB));
1845  q=1.0/(1.0+exp(-fApB));
1846  }
1847  else
1848  {
1849  p=1.0/(1.0+exp(fApB));
1850  q=exp(fApB)/(1.0+exp(fApB));
1851  }
1852  d2=p*q;
1853  h11+=dec_values[i]*dec_values[i]*d2;
1854  h22+=d2;
1855  h21+=dec_values[i]*d2;
1856  d1=t[i]-p;
1857  g1+=dec_values[i]*d1;
1858  g2+=d1;
1859  }
1860 
1861  // Stopping Criteria
1862  if (fabs(g1)<eps && fabs(g2)<eps)
1863  break;
1864 
1865  // Finding Newton direction: -inv(H') * g
1866  det=h11*h22-h21*h21;
1867  dA=-(h22*g1 - h21 * g2) / det;
1868  dB=-(-h21*g1+ h11 * g2) / det;
1869  gd=g1*dA+g2*dB;
1870 
1871 
1872  stepsize = 1; // Line Search
1873  while (stepsize >= min_step)
1874  {
1875  newA = A + stepsize * dA;
1876  newB = B + stepsize * dB;
1877 
1878  // New function value
1879  newf = 0.0;
1880  for (i=0;i<l;i++)
1881  {
1882  fApB = dec_values[i]*newA+newB;
1883  if (fApB >= 0)
1884  newf += t[i]*fApB + log(1+exp(-fApB));
1885  else
1886  newf += (t[i] - 1)*fApB +log(1+exp(fApB));
1887  }
1888  // Check sufficient decrease
1889  if (newf<fval+0.0001*stepsize*gd)
1890  {
1891  A=newA;B=newB;fval=newf;
1892  break;
1893  }
1894  else
1895  stepsize = stepsize / 2.0;
1896  }
1897 
1898  if (stepsize < min_step)
1899  {
1900  info("Line search fails in two-class probability estimates\n");
1901  break;
1902  }
1903  }
1904 
1905  if (iter>=max_iter)
1906  info("Reaching maximal iterations in two-class probability estimates\n");
1907  free(t);
1908 }
1909 
1910 static double sigmoid_predict(double decision_value, double A, double B)
1911 {
1912  double fApB = decision_value*A+B;
1913  // 1-p used later; avoid catastrophic cancellation
1914  if (fApB >= 0)
1915  return exp(-fApB)/(1.0+exp(-fApB));
1916  else
1917  return 1.0/(1+exp(fApB)) ;
1918 }
1919 
1920 // Method 2 from the multiclass_prob paper by Wu, Lin, and Weng
1921 static void multiclass_probability(int k, double **r, double *p)
1922 {
1923  int t,j;
1924  int iter = 0, max_iter=max(100,k);
1925  double **Q=Malloc(double *,k);
1926  double *Qp=Malloc(double,k);
1927  double pQp, eps=0.005/k;
1928 
1929  for (t=0;t<k;t++)
1930  {
1931  p[t]=1.0/k; // Valid if k = 1
1932  Q[t]=Malloc(double,k);
1933  Q[t][t]=0;
1934  for (j=0;j<t;j++)
1935  {
1936  Q[t][t]+=r[j][t]*r[j][t];
1937  Q[t][j]=Q[j][t];
1938  }
1939  for (j=t+1;j<k;j++)
1940  {
1941  Q[t][t]+=r[j][t]*r[j][t];
1942  Q[t][j]=-r[j][t]*r[t][j];
1943  }
1944  }
1945  for (iter=0;iter<max_iter;iter++)
1946  {
1947  // stopping condition, recalculate QP,pQP for numerical accuracy
1948  pQp=0;
1949  for (t=0;t<k;t++)
1950  {
1951  Qp[t]=0;
1952  for (j=0;j<k;j++)
1953  Qp[t]+=Q[t][j]*p[j];
1954  pQp+=p[t]*Qp[t];
1955  }
1956  double max_error=0;
1957  for (t=0;t<k;t++)
1958  {
1959  double error=fabs(Qp[t]-pQp);
1960  if (error>max_error)
1961  max_error=error;
1962  }
1963  if (max_error<eps) break;
1964 
1965  for (t=0;t<k;t++)
1966  {
1967  double diff=(-Qp[t]+pQp)/Q[t][t];
1968  p[t]+=diff;
1969  pQp=(pQp+diff*(diff*Q[t][t]+2*Qp[t]))/(1+diff)/(1+diff);
1970  for (j=0;j<k;j++)
1971  {
1972  Qp[j]=(Qp[j]+diff*Q[t][j])/(1+diff);
1973  p[j]/=(1+diff);
1974  }
1975  }
1976  }
1977  if (iter>=max_iter)
1978  info("Exceeds max_iter in multiclass_prob\n");
1979  for(t=0;t<k;t++) free(Q[t]);
1980  free(Q);
1981  free(Qp);
1982 }
1983 
1984 // Cross-validation decision values for probability estimates
1986  const svm_problem *prob, const svm_parameter *param,
1987  double Cp, double Cn, double& probA, double& probB)
1988 {
1989  int i;
1990  int nr_fold = 5;
1991  int *perm = Malloc(int,prob->l);
1992  double *dec_values = Malloc(double,prob->l);
1993 
1994  // random shuffle
1995  for(i=0;i<prob->l;i++) perm[i]=i;
1996  for(i=0;i<prob->l;i++)
1997  {
1998  int j = i+rand()%(prob->l-i);
1999  swap(perm[i],perm[j]);
2000  }
2001  for(i=0;i<nr_fold;i++)
2002  {
2003  int begin = i*prob->l/nr_fold;
2004  int end = (i+1)*prob->l/nr_fold;
2005  int j,k;
2006  struct svm_problem subprob;
2007 
2008  subprob.l = prob->l-(end-begin);
2009  subprob.x = Malloc(struct svm_node*,subprob.l);
2010  subprob.y = Malloc(double,subprob.l);
2011  subprob.W = Malloc(double,subprob.l);
2012 
2013  k=0;
2014  for(j=0;j<begin;j++)
2015  {
2016  subprob.x[k] = prob->x[perm[j]];
2017  subprob.y[k] = prob->y[perm[j]];
2018  subprob.W[k] = prob->W[perm[j]];
2019  ++k;
2020  }
2021  for(j=end;j<prob->l;j++)
2022  {
2023  subprob.x[k] = prob->x[perm[j]];
2024  subprob.y[k] = prob->y[perm[j]];
2025  subprob.W[k] = prob->W[perm[j]];
2026  ++k;
2027  }
2028  int p_count=0,n_count=0;
2029  for(j=0;j<k;j++)
2030  if(subprob.y[j]>0)
2031  p_count++;
2032  else
2033  n_count++;
2034 
2035  if(p_count==0 && n_count==0)
2036  for(j=begin;j<end;j++)
2037  dec_values[perm[j]] = 0;
2038  else if(p_count > 0 && n_count == 0)
2039  for(j=begin;j<end;j++)
2040  dec_values[perm[j]] = 1;
2041  else if(p_count == 0 && n_count > 0)
2042  for(j=begin;j<end;j++)
2043  dec_values[perm[j]] = -1;
2044  else
2045  {
2046  svm_parameter subparam = *param;
2047  subparam.probability=0;
2048  subparam.C=1.0;
2049  subparam.nr_weight=2;
2050  subparam.weight_label = Malloc(int,2);
2051  subparam.weight = Malloc(double,2);
2052  subparam.weight_label[0]=+1;
2053  subparam.weight_label[1]=-1;
2054  subparam.weight[0]=Cp;
2055  subparam.weight[1]=Cn;
2056  struct svm_model *submodel = svm_train(&subprob,&subparam);
2057  for(j=begin;j<end;j++)
2058  {
2059  svm_predict_values(submodel,prob->x[perm[j]],&(dec_values[perm[j]]));
2060  // ensure +1 -1 order; reason not using CV subroutine
2061  dec_values[perm[j]] *= submodel->label[0];
2062  }
2063  svm_free_and_destroy_model(&submodel);
2064  svm_destroy_param(&subparam);
2065  }
2066  free(subprob.x);
2067  free(subprob.y);
2068  free(subprob.W);
2069  }
2070  sigmoid_train(prob->l,dec_values,prob->y,probA,probB);
2071  free(dec_values);
2072  free(perm);
2073 }
2074 
2075 // Return parameter of a Laplace distribution
2076 static double svm_svr_probability(
2077  const svm_problem *prob, const svm_parameter *param)
2078 {
2079  int i;
2080  int nr_fold = 5;
2081  double *ymv = Malloc(double,prob->l);
2082  double mae = 0;
2083 
2084  svm_parameter newparam = *param;
2085  newparam.probability = 0;
2086  svm_cross_validation(prob,&newparam,nr_fold,ymv);
2087  for(i=0;i<prob->l;i++)
2088  {
2089  ymv[i]=prob->y[i]-ymv[i];
2090  mae += fabs(ymv[i]);
2091  }
2092  mae /= prob->l;
2093  double std=sqrt(2*mae*mae);
2094  int count=0;
2095  mae=0;
2096  for(i=0;i<prob->l;i++)
2097  if (fabs(ymv[i]) > 5*std)
2098  count=count+1;
2099  else
2100  mae+=fabs(ymv[i]);
2101  mae /= (prob->l-count);
2102  info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma= %g\n",mae);
2103  free(ymv);
2104  return mae;
2105 }
2106 
2107 
2108 // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
2109 // perm, length l, must be allocated before calling this subroutine
2110 static void svm_group_classes(const svm_problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
2111 {
2112  int l = prob->l;
2113  int max_nr_class = 16;
2114  int nr_class = 0;
2115  int *label = Malloc(int,max_nr_class);
2116  int *count = Malloc(int,max_nr_class);
2117  int *data_label = Malloc(int,l);
2118  int i;
2119 
2120  for(i=0;i<l;i++)
2121  {
2122  int this_label = (int)prob->y[i];
2123  int j;
2124  for(j=0;j<nr_class;j++)
2125  {
2126  if(this_label == label[j])
2127  {
2128  ++count[j];
2129  break;
2130  }
2131  }
2132  data_label[i] = j;
2133  if(j == nr_class)
2134  {
2135  if(nr_class == max_nr_class)
2136  {
2137  max_nr_class *= 2;
2138  label = (int *)realloc(label,max_nr_class*sizeof(int));
2139  count = (int *)realloc(count,max_nr_class*sizeof(int));
2140  }
2141  label[nr_class] = this_label;
2142  count[nr_class] = 1;
2143  ++nr_class;
2144  }
2145  }
2146 
2147  int *start = Malloc(int,nr_class);
2148  start[0] = 0;
2149  for(i=1;i<nr_class;i++)
2150  start[i] = start[i-1]+count[i-1];
2151  for(i=0;i<l;i++)
2152  {
2153  perm[start[data_label[i]]] = i;
2154  ++start[data_label[i]];
2155  }
2156  start[0] = 0;
2157  for(i=1;i<nr_class;i++)
2158  start[i] = start[i-1]+count[i-1];
2159 
2160  *nr_class_ret = nr_class;
2161  *label_ret = label;
2162  *start_ret = start;
2163  *count_ret = count;
2164  free(data_label);
2165 }
2166 
2167 //
2168 // Remove zero weighed data as libsvm and some liblinear solvers require C > 0.
2169 //
2170 static void remove_zero_weight(svm_problem *newprob, const svm_problem *prob)
2171 {
2172  int i;
2173  int l = 0;
2174  for(i=0;i<prob->l;i++)
2175  if(prob->W[i] > 0) l++;
2176  *newprob = *prob;
2177  newprob->l = l;
2178  newprob->x = Malloc(svm_node*,l);
2179  newprob->y = Malloc(double,l);
2180  newprob->W = Malloc(double,l);
2181 
2182  int j = 0;
2183  for(i=0;i<prob->l;i++)
2184  if(prob->W[i] > 0)
2185  {
2186  newprob->x[j] = prob->x[i];
2187  newprob->y[j] = prob->y[i];
2188  newprob->W[j] = prob->W[i];
2189  j++;
2190  }
2191 }
2192 
2193 //
2194 // Interface functions
2195 //
2197 {
2198  svm_problem newprob;
2199  remove_zero_weight(&newprob, prob);
2200  prob = &newprob;
2201 
2202  svm_model *model = Malloc(svm_model,1);
2203  model->param = *param;
2204  model->free_sv = 0; // XXX
2205 
2206  if(param->svm_type == ONE_CLASS ||
2207  param->svm_type == EPSILON_SVR ||
2208  param->svm_type == NU_SVR)
2209  {
2210  // regression or one-class-svm
2211  model->nr_class = 2;
2212  model->label = NULL;
2213  model->nSV = NULL;
2214  model->probA = NULL; model->probB = NULL;
2215  model->sv_coef = Malloc(double *,1);
2216 
2217  if(param->probability &&
2218  (param->svm_type == EPSILON_SVR ||
2219  param->svm_type == NU_SVR))
2220  {
2221  model->probA = Malloc(double,1);
2222  model->probA[0] = svm_svr_probability(prob,param);
2223  }
2224 
2225  decision_function f = svm_train_one(prob,param,0,0);
2226  model->rho = Malloc(double,1);
2227  model->rho[0] = f.rho;
2228 
2229  int nSV = 0;
2230  int i;
2231  for(i=0;i<prob->l;i++)
2232  if(fabs(f.alpha[i]) > 0) ++nSV;
2233  model->l = nSV;
2234  model->SV = Malloc(svm_node *,nSV);
2235  model->sv_coef[0] = Malloc(double,nSV);
2236  model->sv_indices = Malloc(int,nSV);
2237  int j = 0;
2238  for(i=0;i<prob->l;i++)
2239  if(fabs(f.alpha[i]) > 0)
2240  {
2241  model->SV[j] = prob->x[i];
2242  model->sv_coef[0][j] = f.alpha[i];
2243  model->sv_indices[j] = i+1;
2244  ++j;
2245  }
2246 
2247  free(f.alpha);
2248  }
2249  else
2250  {
2251  // classification
2252  int l = prob->l;
2253  int nr_class;
2254  int *label = NULL;
2255  int *start = NULL;
2256  int *count = NULL;
2257  int *perm = Malloc(int,l);
2258 
2259  // group training data of the same class
2260  svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
2261  if(nr_class == 1)
2262  info("WARNING: training data in only one class. See README for details.\n");
2263 
2264  svm_node **x = Malloc(svm_node *,l);
2265  double *W;
2266  W = Malloc(double,l);
2267 
2268  int i;
2269  for(i=0;i<l;i++)
2270  {
2271  x[i] = prob->x[perm[i]];
2272  W[i] = prob->W[perm[i]];
2273  }
2274 
2275  // calculate weighted C
2276 
2277  double *weighted_C = Malloc(double, nr_class);
2278  for(i=0;i<nr_class;i++)
2279  weighted_C[i] = param->C;
2280  for(i=0;i<param->nr_weight;i++)
2281  {
2282  int j;
2283  for(j=0;j<nr_class;j++)
2284  if(param->weight_label[i] == label[j])
2285  break;
2286  if(j == nr_class)
2287  fprintf(stderr,"WARNING: class label %d specified in weight is not found\n", param->weight_label[i]);
2288  else
2289  weighted_C[j] *= param->weight[i];
2290  }
2291 
2292  // train k*(k-1)/2 models
2293 
2294  bool *nonzero = Malloc(bool,l);
2295  for(i=0;i<l;i++)
2296  nonzero[i] = false;
2298 
2299  double *probA=NULL,*probB=NULL;
2300  if (param->probability)
2301  {
2302  probA=Malloc(double,nr_class*(nr_class-1)/2);
2303  probB=Malloc(double,nr_class*(nr_class-1)/2);
2304  }
2305 
2306  int p = 0;
2307  for(i=0;i<nr_class;i++)
2308  for(int j=i+1;j<nr_class;j++)
2309  {
2310  svm_problem sub_prob;
2311  int si = start[i], sj = start[j];
2312  int ci = count[i], cj = count[j];
2313  sub_prob.l = ci+cj;
2314  sub_prob.x = Malloc(svm_node *,sub_prob.l);
2315  sub_prob.y = Malloc(double,sub_prob.l);
2316  sub_prob.W = Malloc(double,sub_prob.l);
2317  int k;
2318  for(k=0;k<ci;k++)
2319  {
2320  sub_prob.x[k] = x[si+k];
2321  sub_prob.y[k] = +1;
2322  sub_prob.W[k] = W[si+k];
2323  }
2324  for(k=0;k<cj;k++)
2325  {
2326  sub_prob.x[ci+k] = x[sj+k];
2327  sub_prob.y[ci+k] = -1;
2328  sub_prob.W[ci+k] = W[sj+k];
2329  }
2330 
2331  if(param->probability)
2332  svm_binary_svc_probability(&sub_prob,param,weighted_C[i],weighted_C[j],probA[p],probB[p]);
2333 
2334  f[p] = svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j]);
2335  for(k=0;k<ci;k++)
2336  if(!nonzero[si+k] && fabs(f[p].alpha[k]) > 0)
2337  nonzero[si+k] = true;
2338  for(k=0;k<cj;k++)
2339  if(!nonzero[sj+k] && fabs(f[p].alpha[ci+k]) > 0)
2340  nonzero[sj+k] = true;
2341  free(sub_prob.x);
2342  free(sub_prob.y);
2343  free(sub_prob.W);
2344  ++p;
2345  }
2346 
2347  // build output
2348 
2349  model->nr_class = nr_class;
2350 
2351  model->label = Malloc(int,nr_class);
2352  for(i=0;i<nr_class;i++)
2353  model->label[i] = label[i];
2354 
2355  model->rho = Malloc(double,nr_class*(nr_class-1)/2);
2356  for(i=0;i<nr_class*(nr_class-1)/2;i++)
2357  model->rho[i] = f[i].rho;
2358 
2359  if(param->probability)
2360  {
2361  model->probA = Malloc(double,nr_class*(nr_class-1)/2);
2362  model->probB = Malloc(double,nr_class*(nr_class-1)/2);
2363  for(i=0;i<nr_class*(nr_class-1)/2;i++)
2364  {
2365  model->probA[i] = probA[i];
2366  model->probB[i] = probB[i];
2367  }
2368  }
2369  else
2370  {
2371  model->probA=NULL;
2372  model->probB=NULL;
2373  }
2374 
2375  int total_sv = 0;
2376  int *nz_count = Malloc(int,nr_class);
2377  model->nSV = Malloc(int,nr_class);
2378  for(i=0;i<nr_class;i++)
2379  {
2380  int nSV = 0;
2381  for(int j=0;j<count[i];j++)
2382  if(nonzero[start[i]+j])
2383  {
2384  ++nSV;
2385  ++total_sv;
2386  }
2387  model->nSV[i] = nSV;
2388  nz_count[i] = nSV;
2389  }
2390 
2391  info("Total nSV = %d\n",total_sv);
2392 
2393  model->l = total_sv;
2394  model->SV = Malloc(svm_node *,total_sv);
2395  model->sv_indices = Malloc(int,total_sv);
2396  p = 0;
2397  for(i=0;i<l;i++)
2398  if(nonzero[i])
2399  {
2400  model->SV[p] = x[i];
2401  model->sv_indices[p++] = perm[i] + 1;
2402  }
2403 
2404  int *nz_start = Malloc(int,nr_class);
2405  nz_start[0] = 0;
2406  for(i=1;i<nr_class;i++)
2407  nz_start[i] = nz_start[i-1]+nz_count[i-1];
2408 
2409  model->sv_coef = Malloc(double *,nr_class-1);
2410  for(i=0;i<nr_class-1;i++)
2411  model->sv_coef[i] = Malloc(double,total_sv);
2412 
2413  p = 0;
2414  for(i=0;i<nr_class;i++)
2415  for(int j=i+1;j<nr_class;j++)
2416  {
2417  // classifier (i,j): coefficients with
2418  // i are in sv_coef[j-1][nz_start[i]...],
2419  // j are in sv_coef[i][nz_start[j]...]
2420 
2421  int si = start[i];
2422  int sj = start[j];
2423  int ci = count[i];
2424  int cj = count[j];
2425 
2426  int q = nz_start[i];
2427  int k;
2428  for(k=0;k<ci;k++)
2429  if(nonzero[si+k])
2430  model->sv_coef[j-1][q++] = f[p].alpha[k];
2431  q = nz_start[j];
2432  for(k=0;k<cj;k++)
2433  if(nonzero[sj+k])
2434  model->sv_coef[i][q++] = f[p].alpha[ci+k];
2435  ++p;
2436  }
2437 
2438  free(label);
2439  free(probA);
2440  free(probB);
2441  free(count);
2442  free(perm);
2443  free(start);
2444  free(W);
2445  free(x);
2446  free(weighted_C);
2447  free(nonzero);
2448  for(i=0;i<nr_class*(nr_class-1)/2;i++)
2449  free(f[i].alpha);
2450  free(f);
2451  free(nz_count);
2452  free(nz_start);
2453  }
2454  free(newprob.x);
2455  free(newprob.y);
2456  free(newprob.W);
2457  return model;
2458 }
2459 
2460 // Stratified cross validation
2461 void svm_cross_validation(const svm_problem *prob, const svm_parameter *param, int nr_fold, double *target)
2462 {
2463  int i;
2464  int *fold_start = Malloc(int,nr_fold+1);
2465  int l = prob->l;
2466  int *perm = Malloc(int,l);
2467  int nr_class;
2468 
2469  // stratified cv may not give leave-one-out rate
2470  // Each class to l folds -> some folds may have zero elements
2471  if((param->svm_type == C_SVC ||
2472  param->svm_type == NU_SVC) && nr_fold < l)
2473  {
2474  int *start = NULL;
2475  int *label = NULL;
2476  int *count = NULL;
2477  svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
2478 
2479  // random shuffle and then data grouped by fold using the array perm
2480  int *fold_count = Malloc(int,nr_fold);
2481  int c;
2482  int *index = Malloc(int,l);
2483  for(i=0;i<l;i++)
2484  index[i]=perm[i];
2485  for (c=0; c<nr_class; c++)
2486  for(i=0;i<count[c];i++)
2487  {
2488  int j = i+rand()%(count[c]-i);
2489  swap(index[start[c]+j],index[start[c]+i]);
2490  }
2491  for(i=0;i<nr_fold;i++)
2492  {
2493  fold_count[i] = 0;
2494  for (c=0; c<nr_class;c++)
2495  fold_count[i]+=(i+1)*count[c]/nr_fold-i*count[c]/nr_fold;
2496  }
2497  fold_start[0]=0;
2498  for (i=1;i<=nr_fold;i++)
2499  fold_start[i] = fold_start[i-1]+fold_count[i-1];
2500  for (c=0; c<nr_class;c++)
2501  for(i=0;i<nr_fold;i++)
2502  {
2503  int begin = start[c]+i*count[c]/nr_fold;
2504  int end = start[c]+(i+1)*count[c]/nr_fold;
2505  for(int j=begin;j<end;j++)
2506  {
2507  perm[fold_start[i]] = index[j];
2508  fold_start[i]++;
2509  }
2510  }
2511  fold_start[0]=0;
2512  for (i=1;i<=nr_fold;i++)
2513  fold_start[i] = fold_start[i-1]+fold_count[i-1];
2514  free(start);
2515  free(label);
2516  free(count);
2517  free(index);
2518  free(fold_count);
2519  }
2520  else
2521  {
2522  for(i=0;i<l;i++) perm[i]=i;
2523  for(i=0;i<l;i++)
2524  {
2525  int j = i+rand()%(l-i);
2526  swap(perm[i],perm[j]);
2527  }
2528  for(i=0;i<=nr_fold;i++)
2529  fold_start[i]=i*l/nr_fold;
2530  }
2531 
2532  for(i=0;i<nr_fold;i++)
2533  {
2534  int begin = fold_start[i];
2535  int end = fold_start[i+1];
2536  int j,k;
2537  struct svm_problem subprob;
2538 
2539  subprob.l = l-(end-begin);
2540  subprob.x = Malloc(struct svm_node*,subprob.l);
2541  subprob.y = Malloc(double,subprob.l);
2542 
2543  subprob.W = Malloc(double,subprob.l);
2544  k=0;
2545  for(j=0;j<begin;j++)
2546  {
2547  subprob.x[k] = prob->x[perm[j]];
2548  subprob.y[k] = prob->y[perm[j]];
2549  subprob.W[k] = prob->W[perm[j]];
2550  ++k;
2551  }
2552  for(j=end;j<l;j++)
2553  {
2554  subprob.x[k] = prob->x[perm[j]];
2555  subprob.y[k] = prob->y[perm[j]];
2556  subprob.W[k] = prob->W[perm[j]];
2557  ++k;
2558  }
2559  struct svm_model *submodel = svm_train(&subprob,param);
2560  if(param->probability &&
2561  (param->svm_type == C_SVC || param->svm_type == NU_SVC))
2562  {
2563  double *prob_estimates=Malloc(double,svm_get_nr_class(submodel));
2564  for(j=begin;j<end;j++)
2565  target[perm[j]] = svm_predict_probability(submodel,prob->x[perm[j]],prob_estimates);
2566  free(prob_estimates);
2567  }
2568  else
2569  for(j=begin;j<end;j++)
2570  target[perm[j]] = svm_predict(submodel,prob->x[perm[j]]);
2571  svm_free_and_destroy_model(&submodel);
2572  free(subprob.x);
2573  free(subprob.y);
2574  free(subprob.W);
2575  }
2576  free(fold_start);
2577  free(perm);
2578 }
2579 
2580 
2581 int svm_get_svm_type(const svm_model *model)
2582 {
2583  return model->param.svm_type;
2584 }
2585 
2586 int svm_get_nr_class(const svm_model *model)
2587 {
2588  return model->nr_class;
2589 }
2590 
2591 void svm_get_labels(const svm_model *model, int* label)
2592 {
2593  if (model->label != NULL)
2594  for(int i=0;i<model->nr_class;i++)
2595  label[i] = model->label[i];
2596 }
2597 
2598 void svm_get_sv_indices(const svm_model *model, int* indices)
2599 {
2600  if (model->sv_indices != NULL)
2601  for(int i=0;i<model->l;i++)
2602  indices[i] = model->sv_indices[i];
2603 }
2604 
2605 int svm_get_nr_sv(const svm_model *model)
2606 {
2607  return model->l;
2608 }
2609 
2611 {
2612  if ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
2613  model->probA!=NULL)
2614  return model->probA[0];
2615  else
2616  {
2617  fprintf(stderr,"Model doesn't contain information for SVR probability inference\n");
2618  return 0;
2619  }
2620 }
2621 
2622 double svm_hyper_w_normsqr_twoclass(const struct svm_model* model)
2623 {
2624  assert(model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC);
2625  int i, j;
2626  // int nr_class = model->nr_class;
2627  // assert(nr_class == 2);
2628  int l = model->l;
2629 
2630  double sum = 0;
2631  double *coef = model->sv_coef[0];
2632 
2633  for(i=0;i<l;++i)
2634  for(j=0;j<l;++j)
2635  {
2636  double kv = Kernel::k_function(model->SV[i],model->SV[j],model->param);
2637  sum += kv * coef[i] * coef[j];
2638  }
2639 
2640  return sum;
2641 }
2642 
2643 double svm_predict_values_twoclass(const struct svm_model* model, const struct svm_node* x)
2644 {
2645 
2646  assert(model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC);
2647  int i;
2648  // int nr_class = model->nr_class;
2649  // assert(nr_class == 2);
2650  int l = model->l;
2651 
2652  double *kvalue = Malloc(double,l);
2653  for(i=0;i<l;i++)
2654  kvalue[i] = Kernel::k_function(x,model->SV[i],model->param);
2655 
2656 
2657  double sum = 0;
2658  double *coef = model->sv_coef[0];
2659  for(i=0;i<l;++i)
2660  sum += coef[i] * kvalue[i];
2661  sum -= model->rho[0];
2662 
2663  free(kvalue);
2664 
2665  return sum * model->label[0];
2666 }
2667 
2668 double svm_predict_values(const svm_model *model, const svm_node *x, double* dec_values)
2669 {
2670  int i;
2671  if(model->param.svm_type == ONE_CLASS ||
2672  model->param.svm_type == EPSILON_SVR ||
2673  model->param.svm_type == NU_SVR)
2674  {
2675  double *sv_coef = model->sv_coef[0];
2676  double sum = 0;
2677  for(i=0;i<model->l;i++)
2678  sum += sv_coef[i] * Kernel::k_function(x,model->SV[i],model->param);
2679  sum -= model->rho[0];
2680  *dec_values = sum;
2681 
2682  if(model->param.svm_type == ONE_CLASS)
2683  return (sum>0)?1:-1;
2684  else
2685  return sum;
2686  }
2687  else
2688  {
2689  int nr_class = model->nr_class;
2690  int l = model->l;
2691 
2692  double *kvalue = Malloc(double,l);
2693  for(i=0;i<l;i++)
2694  kvalue[i] = Kernel::k_function(x,model->SV[i],model->param);
2695 
2696  int *start = Malloc(int,nr_class);
2697  start[0] = 0;
2698  for(i=1;i<nr_class;i++)
2699  start[i] = start[i-1]+model->nSV[i-1];
2700 
2701  int *vote = Malloc(int,nr_class);
2702  for(i=0;i<nr_class;i++)
2703  vote[i] = 0;
2704 
2705  int p=0;
2706  for(i=0;i<nr_class;i++)
2707  for(int j=i+1;j<nr_class;j++)
2708  {
2709  double sum = 0;
2710  int si = start[i];
2711  int sj = start[j];
2712  int ci = model->nSV[i];
2713  int cj = model->nSV[j];
2714 
2715  int k;
2716  double *coef1 = model->sv_coef[j-1];
2717  double *coef2 = model->sv_coef[i];
2718  for(k=0;k<ci;k++)
2719  sum += coef1[si+k] * kvalue[si+k];
2720  for(k=0;k<cj;k++)
2721  sum += coef2[sj+k] * kvalue[sj+k];
2722  sum -= model->rho[p];
2723  dec_values[p] = sum;
2724 
2725  if(dec_values[p] > 0)
2726  ++vote[i];
2727  else
2728  ++vote[j];
2729  p++;
2730  }
2731 
2732  int vote_max_idx = 0;
2733  for(i=1;i<nr_class;i++)
2734  if(vote[i] > vote[vote_max_idx])
2735  vote_max_idx = i;
2736 
2737  free(kvalue);
2738  free(start);
2739  free(vote);
2740  return model->label[vote_max_idx];
2741  }
2742 }
2743 
2744 double svm_predict(const svm_model *model, const svm_node *x)
2745 {
2746  int nr_class = model->nr_class;
2747  double *dec_values;
2748  if(model->param.svm_type == ONE_CLASS ||
2749  model->param.svm_type == EPSILON_SVR ||
2750  model->param.svm_type == NU_SVR)
2751  dec_values = Malloc(double, 1);
2752  else
2753  dec_values = Malloc(double, nr_class*(nr_class-1)/2);
2754  double pred_result = svm_predict_values(model, x, dec_values);
2755  free(dec_values);
2756  return pred_result;
2757 }
2758 
2760  const svm_model *model, const svm_node *x, double *prob_estimates)
2761 {
2762  if ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
2763  model->probA!=NULL && model->probB!=NULL)
2764  {
2765  int i;
2766  int nr_class = model->nr_class;
2767  double *dec_values = Malloc(double, nr_class*(nr_class-1)/2);
2768  svm_predict_values(model, x, dec_values);
2769 
2770  double min_prob=1e-7;
2771  double **pairwise_prob=Malloc(double *,nr_class);
2772  for(i=0;i<nr_class;i++)
2773  pairwise_prob[i]=Malloc(double,nr_class);
2774  int k=0;
2775  for(i=0;i<nr_class;i++)
2776  for(int j=i+1;j<nr_class;j++)
2777  {
2778  pairwise_prob[i][j]=min(max(sigmoid_predict(dec_values[k],model->probA[k],model->probB[k]),min_prob),1-min_prob);
2779  pairwise_prob[j][i]=1-pairwise_prob[i][j];
2780  k++;
2781  }
2782  multiclass_probability(nr_class,pairwise_prob,prob_estimates);
2783 
2784  int prob_max_idx = 0;
2785  for(i=1;i<nr_class;i++)
2786  if(prob_estimates[i] > prob_estimates[prob_max_idx])
2787  prob_max_idx = i;
2788  for(i=0;i<nr_class;i++)
2789  free(pairwise_prob[i]);
2790  free(dec_values);
2791  free(pairwise_prob);
2792  return model->label[prob_max_idx];
2793  }
2794  else
2795  return svm_predict(model, x);
2796 }
2797 
2798 static const char *svm_type_table[] =
2799  {
2800  "c_svc","nu_svc","one_class","epsilon_svr","nu_svr",NULL
2801  };
2802 
2803 static const char *kernel_type_table[]=
2804  {
2805  "linear","polynomial","rbf","sigmoid","precomputed",NULL
2806  };
2807 
2808 int svm_save_model(const char *model_file_name, const svm_model *model)
2809 {
2810  FILE *fp = fopen(model_file_name,"w");
2811  if(fp==NULL) return -1;
2812 
2813  char *old_locale = strdup(setlocale(LC_ALL, NULL));
2814  setlocale(LC_ALL, "C");
2815 
2816  const svm_parameter& param = model->param;
2817 
2818  fprintf(fp,"svm_type %s\n", svm_type_table[param.svm_type]);
2819  fprintf(fp,"kernel_type %s\n", kernel_type_table[param.kernel_type]);
2820 
2821  if(param.kernel_type == POLY)
2822  fprintf(fp,"degree %d\n", param.degree);
2823 
2825  fprintf(fp,"gamma %g\n", param.gamma);
2826 
2828  fprintf(fp,"coef0 %g\n", param.coef0);
2829 
2830  int nr_class = model->nr_class;
2831  int l = model->l;
2832  fprintf(fp, "nr_class %d\n", nr_class);
2833  fprintf(fp, "total_sv %d\n",l);
2834 
2835  {
2836  fprintf(fp, "rho");
2837  for(int i=0;i<nr_class*(nr_class-1)/2;i++)
2838  fprintf(fp," %g",model->rho[i]);
2839  fprintf(fp, "\n");
2840  }
2841 
2842  if(model->label)
2843  {
2844  fprintf(fp, "label");
2845  for(int i=0;i<nr_class;i++)
2846  fprintf(fp," %d",model->label[i]);
2847  fprintf(fp, "\n");
2848  }
2849 
2850  if(model->probA) // regression has probA only
2851  {
2852  fprintf(fp, "probA");
2853  for(int i=0;i<nr_class*(nr_class-1)/2;i++)
2854  fprintf(fp," %g",model->probA[i]);
2855  fprintf(fp, "\n");
2856  }
2857  if(model->probB)
2858  {
2859  fprintf(fp, "probB");
2860  for(int i=0;i<nr_class*(nr_class-1)/2;i++)
2861  fprintf(fp," %g",model->probB[i]);
2862  fprintf(fp, "\n");
2863  }
2864 
2865  if(model->nSV)
2866  {
2867  fprintf(fp, "nr_sv");
2868  for(int i=0;i<nr_class;i++)
2869  fprintf(fp," %d",model->nSV[i]);
2870  fprintf(fp, "\n");
2871  }
2872 
2873  fprintf(fp, "SV\n");
2874  const double * const *sv_coef = model->sv_coef;
2875  const svm_node * const *SV = model->SV;
2876 
2877  for(int i=0;i<l;i++)
2878  {
2879  for(int j=0;j<nr_class-1;j++)
2880  fprintf(fp, "%.16g ",sv_coef[j][i]);
2881 
2882  const svm_node *p = SV[i];
2883 
2885  fprintf(fp,"0:%d ",(int)(p->value));
2886  else
2887  while(p->index != -1)
2888  {
2889  fprintf(fp,"%d:%.8g ",p->index,p->value);
2890  p++;
2891  }
2892  fprintf(fp, "\n");
2893  }
2894 
2895  setlocale(LC_ALL, old_locale);
2896  free(old_locale);
2897 
2898  if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
2899  else return 0;
2900 }
2901 
2902 static char *line = NULL;
2903 static int max_line_len;
2904 
2905 static char* readline(FILE *input)
2906 {
2907  int len;
2908 
2909  if(fgets(line,max_line_len,input) == NULL)
2910  return NULL;
2911 
2912  while(strrchr(line,'\n') == NULL)
2913  {
2914  max_line_len *= 2;
2915  line = (char *) realloc(line,max_line_len);
2916  len = (int) strlen(line);
2917  if(fgets(line+len,max_line_len-len,input) == NULL)
2918  break;
2919  }
2920  return line;
2921 }
2922 
2923 svm_model *svm_load_model(const char *model_file_name)
2924 {
2925  FILE *fp = fopen(model_file_name,"rb");
2926  if(fp==NULL) return NULL;
2927 
2928  char *old_locale = strdup(setlocale(LC_ALL, NULL));
2929  setlocale(LC_ALL, "C");
2930 
2931  // read parameters
2932 
2933  svm_model *model = Malloc(svm_model,1);
2934  svm_parameter& param = model->param;
2935  model->rho = NULL;
2936  model->probA = NULL;
2937  model->probB = NULL;
2938  model->label = NULL;
2939  model->nSV = NULL;
2940 
2941  char cmd[81];
2942  while(1)
2943  {
2944  fscanf(fp,"%80s",cmd);
2945 
2946  if(strcmp(cmd,"svm_type")==0)
2947  {
2948  fscanf(fp,"%80s",cmd);
2949  int i;
2950  for(i=0;svm_type_table[i];i++)
2951  {
2952  if(strcmp(svm_type_table[i],cmd)==0)
2953  {
2954  param.svm_type=i;
2955  break;
2956  }
2957  }
2958  if(svm_type_table[i] == NULL)
2959  {
2960  fprintf(stderr,"unknown svm type.\n");
2961 
2962  setlocale(LC_ALL, old_locale);
2963  free(old_locale);
2964  free(model->rho);
2965  free(model->label);
2966  free(model->nSV);
2967  free(model);
2968  return NULL;
2969  }
2970  }
2971  else if(strcmp(cmd,"kernel_type")==0)
2972  {
2973  fscanf(fp,"%80s",cmd);
2974  int i;
2975  for(i=0;kernel_type_table[i];i++)
2976  {
2977  if(strcmp(kernel_type_table[i],cmd)==0)
2978  {
2979  param.kernel_type=i;
2980  break;
2981  }
2982  }
2983  if(kernel_type_table[i] == NULL)
2984  {
2985  fprintf(stderr,"unknown kernel function.\n");
2986 
2987  setlocale(LC_ALL, old_locale);
2988  free(old_locale);
2989  free(model->rho);
2990  free(model->label);
2991  free(model->nSV);
2992  free(model);
2993  return NULL;
2994  }
2995  }
2996  else if(strcmp(cmd,"degree")==0)
2997  fscanf(fp,"%d",&param.degree);
2998  else if(strcmp(cmd,"gamma")==0)
2999  fscanf(fp,"%lf",&param.gamma);
3000  else if(strcmp(cmd,"coef0")==0)
3001  fscanf(fp,"%lf",&param.coef0);
3002  else if(strcmp(cmd,"nr_class")==0)
3003  fscanf(fp,"%d",&model->nr_class);
3004  else if(strcmp(cmd,"total_sv")==0)
3005  fscanf(fp,"%d",&model->l);
3006  else if(strcmp(cmd,"rho")==0)
3007  {
3008  int n = model->nr_class * (model->nr_class-1)/2;
3009  model->rho = Malloc(double,n);
3010  for(int i=0;i<n;i++)
3011  fscanf(fp,"%lf",&model->rho[i]);
3012  }
3013  else if(strcmp(cmd,"label")==0)
3014  {
3015  int n = model->nr_class;
3016  model->label = Malloc(int,n);
3017  for(int i=0;i<n;i++)
3018  fscanf(fp,"%d",&model->label[i]);
3019  }
3020  else if(strcmp(cmd,"probA")==0)
3021  {
3022  int n = model->nr_class * (model->nr_class-1)/2;
3023  model->probA = Malloc(double,n);
3024  for(int i=0;i<n;i++)
3025  fscanf(fp,"%lf",&model->probA[i]);
3026  }
3027  else if(strcmp(cmd,"probB")==0)
3028  {
3029  int n = model->nr_class * (model->nr_class-1)/2;
3030  model->probB = Malloc(double,n);
3031  for(int i=0;i<n;i++)
3032  fscanf(fp,"%lf",&model->probB[i]);
3033  }
3034  else if(strcmp(cmd,"nr_sv")==0)
3035  {
3036  int n = model->nr_class;
3037  model->nSV = Malloc(int,n);
3038  for(int i=0;i<n;i++)
3039  fscanf(fp,"%d",&model->nSV[i]);
3040  }
3041  else if(strcmp(cmd,"SV")==0)
3042  {
3043  while(1)
3044  {
3045  int c = getc(fp);
3046  if(c==EOF || c=='\n') break;
3047  }
3048  break;
3049  }
3050  else
3051  {
3052  fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
3053 
3054  setlocale(LC_ALL, old_locale);
3055  free(old_locale);
3056  free(model->rho);
3057  free(model->label);
3058  free(model->nSV);
3059  free(model);
3060  return NULL;
3061  }
3062  }
3063 
3064  // read sv_coef and SV
3065 
3066  int elements = 0;
3067  long pos = ftell(fp);
3068 
3069  max_line_len = 1024;
3070  line = Malloc(char,max_line_len);
3071  char *p,*endptr,*idx,*val;
3072 
3073  while(readline(fp)!=NULL)
3074  {
3075  p = strtok(line,":");
3076  while(1)
3077  {
3078  p = strtok(NULL,":");
3079  if(p == NULL)
3080  break;
3081  ++elements;
3082  }
3083  }
3084  elements += model->l;
3085 
3086  fseek(fp,pos,SEEK_SET);
3087 
3088  int m = model->nr_class - 1;
3089  int l = model->l;
3090  model->sv_coef = Malloc(double *,m);
3091  int i;
3092  for(i=0;i<m;i++)
3093  model->sv_coef[i] = Malloc(double,l);
3094  model->SV = Malloc(svm_node*,l);
3095  svm_node *x_space = NULL;
3096  if(l>0) x_space = Malloc(svm_node,elements);
3097 
3098  int j=0;
3099  for(i=0;i<l;i++)
3100  {
3101  readline(fp);
3102  model->SV[i] = &x_space[j];
3103 
3104  p = strtok(line, " \t");
3105  model->sv_coef[0][i] = strtod(p,&endptr);
3106  for(int k=1;k<m;k++)
3107  {
3108  p = strtok(NULL, " \t");
3109  model->sv_coef[k][i] = strtod(p,&endptr);
3110  }
3111 
3112  while(1)
3113  {
3114  idx = strtok(NULL, ":");
3115  val = strtok(NULL, " \t");
3116 
3117  if(val == NULL)
3118  break;
3119  x_space[j].index = (int) strtol(idx,&endptr,10);
3120  x_space[j].value = strtod(val,&endptr);
3121 
3122  ++j;
3123  }
3124  x_space[j++].index = -1;
3125  }
3126  free(line);
3127 
3128  setlocale(LC_ALL, old_locale);
3129  free(old_locale);
3130 
3131  if (ferror(fp) != 0 || fclose(fp) != 0)
3132  return NULL;
3133 
3134  model->free_sv = 1; // XXX
3135  return model;
3136 }
3137 
3139 {
3140  if(model_ptr->free_sv && model_ptr->l > 0 && model_ptr->SV != NULL)
3141  free((void *)(model_ptr->SV[0]));
3142  if(model_ptr->sv_coef)
3143  {
3144  for(int i=0;i<model_ptr->nr_class-1;i++)
3145  free(model_ptr->sv_coef[i]);
3146  }
3147 
3148  free(model_ptr->SV);
3149  model_ptr->SV = NULL;
3150 
3151  free(model_ptr->sv_coef);
3152  model_ptr->sv_coef = NULL;
3153 
3154  free(model_ptr->rho);
3155  model_ptr->rho = NULL;
3156 
3157  free(model_ptr->label);
3158  model_ptr->label= NULL;
3159 
3160  free(model_ptr->probA);
3161  model_ptr->probA = NULL;
3162 
3163  free(model_ptr->probB);
3164  model_ptr->probB= NULL;
3165 
3166  free(model_ptr->nSV);
3167  model_ptr->nSV = NULL;
3168 }
3169 
3171 {
3172  if(model_ptr_ptr != NULL && *model_ptr_ptr != NULL)
3173  {
3174  svm_free_model_content(*model_ptr_ptr);
3175  free(*model_ptr_ptr);
3176  *model_ptr_ptr = NULL;
3177  }
3178 }
3179 
3181 {
3182  free(param->weight_label);
3183  free(param->weight);
3184 }
3185 
3186 const char *svm_check_parameter(const svm_problem *prob, const svm_parameter *param)
3187 {
3188  // svm_type
3189 
3190  int svm_type = param->svm_type;
3191  if(svm_type != C_SVC &&
3192  svm_type != NU_SVC &&
3193  svm_type != ONE_CLASS &&
3194  svm_type != EPSILON_SVR &&
3195  svm_type != NU_SVR)
3196  return "unknown svm type";
3197 
3198  // kernel_type, degree
3199 
3200  int kernel_type = param->kernel_type;
3201  if(kernel_type != LINEAR &&
3202  kernel_type != POLY &&
3203  kernel_type != RBF &&
3204  kernel_type != SIGMOID &&
3205  kernel_type != PRECOMPUTED)
3206  return "unknown kernel type";
3207 
3208  if(param->gamma < 0)
3209  return "gamma < 0";
3210 
3211  if(param->degree < 0)
3212  return "degree of polynomial kernel < 0";
3213 
3214  // cache_size,eps,C,nu,p,shrinking
3215 
3216  if(param->cache_size <= 0)
3217  return "cache_size <= 0";
3218 
3219  if(param->eps <= 0)
3220  return "eps <= 0";
3221 
3222  if(svm_type == C_SVC ||
3223  svm_type == EPSILON_SVR ||
3224  svm_type == NU_SVR)
3225  if(param->C <= 0)
3226  return "C <= 0";
3227 
3228  if(svm_type == NU_SVC ||
3229  svm_type == ONE_CLASS ||
3230  svm_type == NU_SVR)
3231  if(param->nu <= 0 || param->nu > 1)
3232  return "nu <= 0 or nu > 1";
3233 
3234  if(svm_type == EPSILON_SVR)
3235  if(param->p < 0)
3236  return "p < 0";
3237 
3238  if(param->shrinking != 0 &&
3239  param->shrinking != 1)
3240  return "shrinking != 0 and shrinking != 1";
3241 
3242  if(param->probability != 0 &&
3243  param->probability != 1)
3244  return "probability != 0 and probability != 1";
3245 
3246  if(param->probability == 1 &&
3247  svm_type == ONE_CLASS)
3248  return "one-class SVM probability output not supported yet";
3249 
3250 
3251  // check whether nu-svc is feasible
3252 
3253  if(svm_type == NU_SVC)
3254  {
3255  int l = prob->l;
3256  int max_nr_class = 16;
3257  int nr_class = 0;
3258  int *label = Malloc(int,max_nr_class);
3259  double *count = Malloc(double,max_nr_class);
3260 
3261  int i;
3262  for(i=0;i<l;i++)
3263  {
3264  int this_label = (int)prob->y[i];
3265  int j;
3266  for(j=0;j<nr_class;j++)
3267  if(this_label == label[j])
3268  {
3269  count[j] += prob->W[i];
3270  break;
3271  }
3272  if(j == nr_class)
3273  {
3274  if(nr_class == max_nr_class)
3275  {
3276  max_nr_class *= 2;
3277  label = (int *)realloc(label,max_nr_class*sizeof(int));
3278  count = (double *)realloc(count,max_nr_class*sizeof(double));
3279  }
3280  label[nr_class] = this_label;
3281  count[nr_class] = prob->W[i];
3282  ++nr_class;
3283  }
3284  }
3285 
3286  for(i=0;i<nr_class;i++)
3287  {
3288  double n1 = count[i];
3289  for(int j=i+1;j<nr_class;j++)
3290  {
3291  double n2 = count[j];
3292  if(param->nu*(n1+n2)/2 > min(n1,n2))
3293  {
3294  free(label);
3295  free(count);
3296  return "specified nu is infeasible";
3297  }
3298  }
3299  }
3300  free(label);
3301  free(count);
3302  }
3303 
3304  return NULL;
3305 }
3306 
3308 {
3309  return ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
3310  model->probA!=NULL && model->probB!=NULL) ||
3311  ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
3312  model->probA!=NULL);
3313 }
3314 
3315 void svm_set_print_string_function(void (*print_func)(const char *))
3316 {
3317  if(print_func == NULL)
3319  else
3320  svm_print_string = print_func;
3321 }
svm_check_probability_model
int svm_check_probability_model(const svm_model *model)
Definition: svm.cpp:3307
Cache::Cache
Cache(int l, long int size)
Definition: svm.cpp:128
svm_problem::y
double * y
Definition: svm.h:56
Solver::is_upper_bound
bool is_upper_bound(int i)
Definition: svm.cpp:479
Solver::is_free
bool is_free(int i)
Definition: svm.cpp:481
Solver::QD
const double * QD
Definition: svm.cpp:457
svm_get_labels
void svm_get_labels(const svm_model *model, int *label)
Definition: svm.cpp:2591
Solver::update_alpha_status
void update_alpha_status(int i)
Definition: svm.cpp:471
QMatrix::get_Q
virtual Qfloat * get_Q(int column, int len) const =0
Solver::calculate_rho
virtual double calculate_rho()
Definition: svm.cpp:1008
QMatrix::~QMatrix
virtual ~QMatrix()
Definition: svm.cpp:233
svm_predict_values_twoclass
double svm_predict_values_twoclass(const struct svm_model *model, const struct svm_node *x)
Definition: svm.cpp:2643
Solver::LOWER_BOUND
@ LOWER_BOUND
Definition: svm.cpp:453
Kernel::kernel_type
const int kernel_type
Definition: svm.cpp:260
svm_model::rho
double * rho
Definition: svm.h:95
print_string_stdout
static void print_string_stdout(const char *s)
Definition: svm.cpp:75
solve_c_svc
static void solve_c_svc(const svm_problem *prob, const svm_parameter *param, double *alpha, Solver::SolutionInfo *si, double Cp, double Cn)
Definition: svm.cpp:1482
svm_group_classes
static void svm_group_classes(const svm_problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
Definition: svm.cpp:2110
Cache::size
long int size
Definition: svm.cpp:114
Solver::SolutionInfo::r
double r
Definition: svm.cpp:443
svm_model::nSV
int * nSV
Definition: svm.h:104
Solver::get_C
double get_C(int i)
Definition: svm.cpp:467
svm_parameter::weight_label
int * weight_label
Definition: svm.h:77
svm_parameter::kernel_type
int kernel_type
Definition: svm.h:67
solve_nu_svc
static void solve_nu_svc(const svm_problem *prob, const svm_parameter *param, double *alpha, Solver::SolutionInfo *si)
Definition: svm.cpp:1531
SVR_Q::SVR_Q
SVR_Q(const svm_problem &prob, const svm_parameter &param)
Definition: svm.cpp:1407
Solver::Cp
double Cp
Definition: svm.cpp:459
svm_parameter::eps
double eps
Definition: svm.h:74
svm_model::sv_indices
int * sv_indices
Definition: svm.h:98
svm_model::l
int l
Definition: svm.h:92
RBF
@ RBF
Definition: svm.h:62
Cache::head_t::len
int len
Definition: svm.cpp:119
Kernel::get_Q
virtual Qfloat * get_Q(int column, int len) const =0
svm_node::index
int index
Definition: svm.h:49
Solver::l
int l
Definition: svm.cpp:464
Kernel::swap_index
virtual void swap_index(int i, int j) const
Definition: svm.cpp:246
Solver::Cn
double Cn
Definition: svm.cpp:459
Solver::C
double * C
Definition: svm.cpp:460
Kernel::~Kernel
virtual ~Kernel()
Definition: svm.cpp:329
svm_svr_probability
static double svm_svr_probability(const svm_problem *prob, const svm_parameter *param)
Definition: svm.cpp:2076
SVR_Q::l
int l
Definition: svm.cpp:1470
svm_set_print_string_function
void svm_set_print_string_function(void(*print_func)(const char *))
Definition: svm.cpp:3315
Solver::y
schar * y
Definition: svm.cpp:451
Solver_NU::do_shrinking
void do_shrinking()
Definition: svm.cpp:1203
svm_get_sv_indices
void svm_get_sv_indices(const svm_model *model, int *indices)
Definition: svm.cpp:2598
svm_problem::l
int l
Definition: svm.h:55
svm_parameter::C
double C
Definition: svm.h:75
svm_model::probA
double * probA
Definition: svm.h:96
NU_SVC
@ NU_SVC
Definition: svm.h:61
svm_get_nr_class
int svm_get_nr_class(const svm_model *model)
Definition: svm.cpp:2586
Cache::head_t::next
head_t * next
Definition: svm.cpp:117
multiclass_probability
static void multiclass_probability(int k, double **r, double *p)
Definition: svm.cpp:1921
Solver_NU::Solver_NU
Solver_NU()
Definition: svm.cpp:1054
swap
static void swap(T &x, T &y)
Definition: svm.cpp:54
SVC_Q::SVC_Q
SVC_Q(const svm_problem &prob, const svm_parameter &param, const schar *y_)
Definition: svm.cpp:1311
Solver::is_lower_bound
bool is_lower_bound(int i)
Definition: svm.cpp:480
svm_node
Definition: svm.h:47
max_line_len
static int max_line_len
Definition: svm.cpp:2903
TAU
#define TAU
Definition: svm.cpp:72
sigmoid_predict
static double sigmoid_predict(double decision_value, double A, double B)
Definition: svm.cpp:1910
svm_load_model
svm_model * svm_load_model(const char *model_file_name)
Definition: svm.cpp:2923
Cache::head_t::prev
head_t * prev
Definition: svm.cpp:117
EPSILON_SVR
@ EPSILON_SVR
Definition: svm.h:61
Solver_NU::calculate_rho
double calculate_rho()
Definition: svm.cpp:1255
decision_function::alpha
double * alpha
Definition: svm.cpp:1728
max
static T max(T x, T y)
Definition: svm.cpp:52
svm_parameter::cache_size
double cache_size
Definition: svm.h:73
SVC_Q::y
schar * y
Definition: svm.cpp:1353
Solver::SolutionInfo::rho
double rho
Definition: svm.cpp:441
PRECOMPUTED
@ PRECOMPUTED
Definition: svm.h:62
svm_model::label
int * label
Definition: svm.h:103
SVR_Q
Definition: svm.cpp:1404
label
string label
Definition: test_half_space_convex.cpp:143
svm_model::SV
struct svm_node ** SV
Definition: svm.h:93
svm_parameter::coef0
double coef0
Definition: svm.h:70
Kernel::k_function
static double k_function(const svm_node *x, const svm_node *y, const svm_parameter &param)
Definition: svm.cpp:357
libsvm_version
int libsvm_version
Definition: svm.cpp:45
QMatrix
Definition: svm.cpp:228
svm_problem
Definition: svm.h:53
SIGMOID
@ SIGMOID
Definition: svm.h:62
clone
static void clone(T *&dst, S *src, int n)
Definition: svm.cpp:55
Kernel::dot
static double dot(const svm_node *px, const svm_node *py)
Definition: svm.cpp:335
svm_predict
double svm_predict(const svm_model *model, const svm_node *x)
Definition: svm.cpp:2744
Solver::Solve
void Solve(int l, const QMatrix &Q, const double *p_, const schar *y_, double *alpha_, const double *C_, double eps, SolutionInfo *si, int shrinking)
Definition: svm.cpp:546
Solver_NU::Solve
void Solve(int l, const QMatrix &Q, const double *p, const schar *y, double *alpha, double *C_, double eps, SolutionInfo *si, int shrinking)
Definition: svm.cpp:1055
ONE_CLASS_Q
Definition: svm.cpp:1358
Kernel::x
const svm_node ** x
Definition: svm.cpp:256
Solver_NU
Definition: svm.cpp:1051
SVC_Q::get_QD
double * get_QD() const
Definition: svm.cpp:1333
Solver::unshrink
bool unshrink
Definition: svm.cpp:465
POLY
@ POLY
Definition: svm.h:62
Solver::SolutionInfo::obj
double obj
Definition: svm.cpp:440
svm.h
svm_model::sv_coef
double ** sv_coef
Definition: svm.h:94
Kernel::get_QD
virtual double * get_QD() const =0
svm_parameter::probability
int probability
Definition: svm.h:82
svm_free_and_destroy_model
void svm_free_and_destroy_model(svm_model **model_ptr_ptr)
Definition: svm.cpp:3170
svm_parameter
Definition: svm.h:64
svm_model::nr_class
int nr_class
Definition: svm.h:91
SVC_Q
Definition: svm.cpp:1308
svm_parameter::gamma
double gamma
Definition: svm.h:69
Solver::~Solver
virtual ~Solver()
Definition: svm.cpp:437
SVR_Q::~SVR_Q
~SVR_Q()
Definition: svm.cpp:1460
Solver::SolutionInfo
Definition: svm.cpp:439
svm_type_table
static const char * svm_type_table[]
Definition: svm.cpp:2798
svm_train_one
static decision_function svm_train_one(const svm_problem *prob, const svm_parameter *param, double Cp, double Cn)
Definition: svm.cpp:1732
Solver::be_shrunk
bool be_shrunk(int i, double Gmax1, double Gmax2)
Definition: svm.cpp:927
svm_cross_validation
void svm_cross_validation(const svm_problem *prob, const svm_parameter *param, int nr_fold, double *target)
Definition: svm.cpp:2461
svm_parameter::nr_weight
int nr_weight
Definition: svm.h:76
Cache::lru_head
head_t lru_head
Definition: svm.cpp:123
svm_model::param
struct svm_parameter param
Definition: svm.h:90
svm_check_parameter
const char * svm_check_parameter(const svm_problem *prob, const svm_parameter *param)
Definition: svm.cpp:3186
svm_train
svm_model * svm_train(const svm_problem *prob, const svm_parameter *param)
Definition: svm.cpp:2196
Solver::UPPER_BOUND
@ UPPER_BOUND
Definition: svm.cpp:453
Solver::reconstruct_gradient
void reconstruct_gradient()
Definition: svm.cpp:504
SVC_Q::~SVC_Q
~SVC_Q()
Definition: svm.cpp:1346
svm_parameter::nu
double nu
Definition: svm.h:79
Solver::select_working_set
virtual int select_working_set(int &i, int &j)
Definition: svm.cpp:828
svm_predict_values
double svm_predict_values(const svm_model *model, const svm_node *x, double *dec_values)
Definition: svm.cpp:2668
Kernel::degree
const int degree
Definition: svm.cpp:261
svm_binary_svc_probability
static void svm_binary_svc_probability(const svm_problem *prob, const svm_parameter *param, double Cp, double Cn, double &probA, double &probB)
Definition: svm.cpp:1985
SVR_Q::cache
Cache * cache
Definition: svm.cpp:1471
Solver_NU::select_working_set
int select_working_set(int &i, int &j)
Definition: svm.cpp:1071
Cache::~Cache
~Cache()
Definition: svm.cpp:137
svm_save_model
int svm_save_model(const char *model_file_name, const svm_model *model)
Definition: svm.cpp:2808
Solver
Definition: svm.cpp:434
Kernel::gamma
const double gamma
Definition: svm.cpp:262
svm_parameter::degree
int degree
Definition: svm.h:68
Solver::active_size
int active_size
Definition: svm.cpp:450
Solver::Solver
Solver()
Definition: svm.cpp:436
svm_print_string
static void(* svm_print_string)(const char *)
Definition: svm.cpp:80
Kernel::kernel_precomputed
double kernel_precomputed(int i, int j) const
Definition: svm.cpp:282
LINEAR
@ LINEAR
Definition: svm.h:62
Cache::head
head_t * head
Definition: svm.cpp:122
Malloc
#define Malloc(type, n)
Definition: svm.cpp:73
NU_SVR
@ NU_SVR
Definition: svm.h:61
svm_get_svm_type
int svm_get_svm_type(const svm_model *model)
Definition: svm.cpp:2581
svm_problem::W
double * W
Definition: svm.h:58
ONE_CLASS_Q::get_Q
Qfloat * get_Q(int i, int len) const
Definition: svm.cpp:1370
svm_problem::x
struct svm_node ** x
Definition: svm.h:57
k_function
double k_function(const svm_node *x, const svm_node *y, const svm_parameter &param)
Definition: svm.cpp:289
sigmoid_train
static void sigmoid_train(int l, const double *dec_values, const double *labels, double &A, double &B)
Definition: svm.cpp:1797
r
S r
Definition: test_sphere_box.cpp:171
Kernel
Definition: svm.cpp:236
Kernel::kernel_rbf
double kernel_rbf(int i, int j) const
Definition: svm.cpp:274
svm_hyper_w_normsqr_twoclass
double svm_hyper_w_normsqr_twoclass(const struct svm_model *model)
Definition: svm.cpp:2622
ONE_CLASS_Q::~ONE_CLASS_Q
~ONE_CLASS_Q()
Definition: svm.cpp:1394
C_SVC
@ C_SVC
Definition: svm.h:61
Solver::p
double * p
Definition: svm.cpp:461
svm_parameter::svm_type
int svm_type
Definition: svm.h:66
svm_node::value
double value
Definition: svm.h:50
Solver::swap_index
void swap_index(int i, int j)
Definition: svm.cpp:491
Solver::alpha
double * alpha
Definition: svm.cpp:455
decision_function::rho
double rho
Definition: svm.cpp:1729
Cache::head_t::data
Qfloat * data
Definition: svm.cpp:118
Solver::SolutionInfo::upper_bound
double * upper_bound
Definition: svm.cpp:442
Qfloat
float Qfloat
Definition: svm.cpp:46
SVR_Q::next_buffer
int next_buffer
Definition: svm.cpp:1474
Kernel::kernel_function
double(Kernel::* kernel_function)(int i, int j) const
Definition: svm.cpp:253
Solver::eps
double eps
Definition: svm.cpp:458
Cache::lru_insert
void lru_insert(head_t *h)
Definition: svm.cpp:151
solve_epsilon_svr
static void solve_epsilon_svr(const svm_problem *prob, const svm_parameter *param, double *alpha, Solver::SolutionInfo *si)
Definition: svm.cpp:1637
Kernel::Kernel
Kernel(int l, svm_node *const *x, const svm_parameter &param)
Definition: svm.cpp:294
SVR_Q::QD
double * QD
Definition: svm.cpp:1476
Solver::G_bar
double * G_bar
Definition: svm.cpp:463
svm_parameter::weight
double * weight
Definition: svm.h:78
solve_one_class
static void solve_one_class(const svm_problem *prob, const svm_parameter *param, double *alpha, Solver::SolutionInfo *si)
Definition: svm.cpp:1594
svm_predict_probability
double svm_predict_probability(const svm_model *model, const svm_node *x, double *prob_estimates)
Definition: svm.cpp:2759
Cache::swap_index
void swap_index(int i, int j)
Definition: svm.cpp:190
ONE_CLASS
@ ONE_CLASS
Definition: svm.h:61
SVR_Q::sign
schar * sign
Definition: svm.cpp:1472
ONE_CLASS_Q::cache
Cache * cache
Definition: svm.cpp:1400
ONE_CLASS_Q::swap_index
void swap_index(int i, int j) const
Definition: svm.cpp:1387
svm_parameter::p
double p
Definition: svm.h:80
Cache::lru_delete
void lru_delete(head_t *h)
Definition: svm.cpp:144
svm_model::free_sv
int free_sv
Definition: svm.h:107
schar
signed char schar
Definition: svm.cpp:47
INF
#define INF
Definition: svm.cpp:71
SVC_Q::get_Q
Qfloat * get_Q(int i, int len) const
Definition: svm.cpp:1321
svm_model
Definition: svm.h:88
QMatrix::swap_index
virtual void swap_index(int i, int j) const =0
svm_free_model_content
void svm_free_model_content(svm_model *model_ptr)
Definition: svm.cpp:3138
SVC_Q::cache
Cache * cache
Definition: svm.cpp:1354
min
static T min(T x, T y)
Definition: svm.cpp:49
SVR_Q::buffer
Qfloat * buffer[2]
Definition: svm.cpp:1475
svm_parameter::shrinking
int shrinking
Definition: svm.h:81
svm_get_svr_probability
double svm_get_svr_probability(const svm_model *model)
Definition: svm.cpp:2610
SVC_Q::swap_index
void swap_index(int i, int j) const
Definition: svm.cpp:1338
decision_function
Definition: svm.cpp:1726
Kernel::coef0
const double coef0
Definition: svm.cpp:263
readline
static char * readline(FILE *input)
Definition: svm.cpp:2905
Solver::FREE
@ FREE
Definition: svm.cpp:453
Solver::alpha_status
char * alpha_status
Definition: svm.cpp:454
info
static void info(const char *fmt,...)
Definition: svm.cpp:92
Cache::head_t
Definition: svm.cpp:115
Solver_NU::be_shrunk
bool be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4)
Definition: svm.cpp:1183
ONE_CLASS_Q::QD
double * QD
Definition: svm.cpp:1401
svm_destroy_param
void svm_destroy_param(svm_parameter *param)
Definition: svm.cpp:3180
line
static char * line
Definition: svm.cpp:2902
Kernel::x_square
double * x_square
Definition: svm.cpp:257
Kernel::kernel_poly
double kernel_poly(int i, int j) const
Definition: svm.cpp:270
svm_get_nr_sv
int svm_get_nr_sv(const svm_model *model)
Definition: svm.cpp:2605
Solver::active_set
int * active_set
Definition: svm.cpp:462
powi
static double powi(double base, int times)
Definition: svm.cpp:60
SVR_Q::get_QD
double * get_QD() const
Definition: svm.cpp:1455
svm_model::probB
double * probB
Definition: svm.h:97
Kernel::kernel_sigmoid
double kernel_sigmoid(int i, int j) const
Definition: svm.cpp:278
SVR_Q::swap_index
void swap_index(int i, int j) const
Definition: svm.cpp:1429
Cache::l
int l
Definition: svm.cpp:113
Cache
Definition: svm.cpp:101
SVR_Q::get_Q
Qfloat * get_Q(int i, int len) const
Definition: svm.cpp:1436
Solver::Q
const QMatrix * Q
Definition: svm.cpp:456
kernel_type_table
static const char * kernel_type_table[]
Definition: svm.cpp:2803
LIBSVM_VERSION
#define LIBSVM_VERSION
Definition: svm.h:39
ONE_CLASS_Q::ONE_CLASS_Q
ONE_CLASS_Q(const svm_problem &prob, const svm_parameter &param)
Definition: svm.cpp:1361
solve_nu_svr
static void solve_nu_svr(const svm_problem *prob, const svm_parameter *param, double *alpha, Solver::SolutionInfo *si)
Definition: svm.cpp:1677
SVC_Q::QD
double * QD
Definition: svm.cpp:1355
SVR_Q::index
int * index
Definition: svm.cpp:1473
Kernel::kernel_linear
double kernel_linear(int i, int j) const
Definition: svm.cpp:266
ONE_CLASS_Q::get_QD
double * get_QD() const
Definition: svm.cpp:1382
Solver::G
double * G
Definition: svm.cpp:452
remove_zero_weight
static void remove_zero_weight(svm_problem *newprob, const svm_problem *prob)
Definition: svm.cpp:2170
Solver_NU::si
SolutionInfo * si
Definition: svm.cpp:1063
QMatrix::get_QD
virtual double * get_QD() const =0
Cache::get_data
int get_data(const int index, Qfloat **data, int len)
Definition: svm.cpp:160
Solver::do_shrinking
virtual void do_shrinking()
Definition: svm.cpp:947


fcl
Author(s):
autogenerated on Tue Dec 5 2023 03:40:49