00001 #include <math.h>
00002 #include <stdio.h>
00003 #include <stdlib.h>
00004 #include <ctype.h>
00005 #include <float.h>
00006 #include <string.h>
00007 #include <stdarg.h>
00008 #include <limits.h>
00009 #include <locale.h>
00010 #include "svm.h"
00011 int libsvm_version = LIBSVM_VERSION;
00012 typedef float Qfloat;
00013 typedef signed char schar;
00014 #ifndef min
00015 template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
00016 #endif
00017 #ifndef max
00018 template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
00019 #endif
00020 template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
00021 template <class S, class T> static inline void clone(T*& dst, S* src, int n)
00022 {
00023 dst = new T[n];
00024 memcpy((void *)dst,(void *)src,sizeof(T)*n);
00025 }
00026 static inline double powi(double base, int times)
00027 {
00028 double tmp = base, ret = 1.0;
00029
00030 for(int t=times; t>0; t/=2)
00031 {
00032 if(t%2==1) ret*=tmp;
00033 tmp = tmp * tmp;
00034 }
00035 return ret;
00036 }
00037 #define INF HUGE_VAL
00038 #define TAU 1e-12
00039 #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
00040
00041 static void print_string_stdout(const char *s)
00042 {
00043 fputs(s,stdout);
00044 fflush(stdout);
00045 }
00046 static void (*svm_print_string) (const char *) = &print_string_stdout;
00047 #if 1
00048 static void info(const char *fmt,...)
00049 {
00050 char buf[BUFSIZ];
00051 va_list ap;
00052 va_start(ap,fmt);
00053 vsprintf(buf,fmt,ap);
00054 va_end(ap);
00055 (*svm_print_string)(buf);
00056 }
00057 #else
00058 static void info(const char *fmt,...) {}
00059 #endif
00060
00061
00062
00063
00064
00065
00066
00067 class Cache
00068 {
00069 public:
00070 Cache(int l,long int size);
00071 ~Cache();
00072
00073
00074
00075
00076 int get_data(const int index, Qfloat **data, int len);
00077 void swap_index(int i, int j);
00078 private:
00079 int l;
00080 long int size;
00081 struct head_t
00082 {
00083 head_t *prev, *next;
00084 Qfloat *data;
00085 int len;
00086 };
00087
00088 head_t *head;
00089 head_t lru_head;
00090 void lru_delete(head_t *h);
00091 void lru_insert(head_t *h);
00092 };
00093
00094 Cache::Cache(int l_,long int size_):l(l_),size(size_)
00095 {
00096 head = (head_t *)calloc(l,sizeof(head_t));
00097 size /= sizeof(Qfloat);
00098 size -= l * sizeof(head_t) / sizeof(Qfloat);
00099 size = max(size, 2 * (long int) l);
00100 lru_head.next = lru_head.prev = &lru_head;
00101 }
00102
00103 Cache::~Cache()
00104 {
00105 for(head_t *h = lru_head.next; h != &lru_head; h=h->next)
00106 free(h->data);
00107 free(head);
00108 }
00109
00110 void Cache::lru_delete(head_t *h)
00111 {
00112
00113 h->prev->next = h->next;
00114 h->next->prev = h->prev;
00115 }
00116
00117 void Cache::lru_insert(head_t *h)
00118 {
00119
00120 h->next = &lru_head;
00121 h->prev = lru_head.prev;
00122 h->prev->next = h;
00123 h->next->prev = h;
00124 }
00125
00126 int Cache::get_data(const int index, Qfloat **data, int len)
00127 {
00128 head_t *h = &head[index];
00129 if(h->len) lru_delete(h);
00130 int more = len - h->len;
00131
00132 if(more > 0)
00133 {
00134
00135 while(size < more)
00136 {
00137 head_t *old = lru_head.next;
00138 lru_delete(old);
00139 free(old->data);
00140 size += old->len;
00141 old->data = 0;
00142 old->len = 0;
00143 }
00144
00145
00146 h->data = (Qfloat *)realloc(h->data,sizeof(Qfloat)*len);
00147 size -= more;
00148 swap(h->len,len);
00149 }
00150
00151 lru_insert(h);
00152 *data = h->data;
00153 return len;
00154 }
00155
00156 void Cache::swap_index(int i, int j)
00157 {
00158 if(i==j) return;
00159
00160 if(head[i].len) lru_delete(&head[i]);
00161 if(head[j].len) lru_delete(&head[j]);
00162 swap(head[i].data,head[j].data);
00163 swap(head[i].len,head[j].len);
00164 if(head[i].len) lru_insert(&head[i]);
00165 if(head[j].len) lru_insert(&head[j]);
00166
00167 if(i>j) swap(i,j);
00168 for(head_t *h = lru_head.next; h!=&lru_head; h=h->next)
00169 {
00170 if(h->len > i)
00171 {
00172 if(h->len > j)
00173 swap(h->data[i],h->data[j]);
00174 else
00175 {
00176
00177 lru_delete(h);
00178 free(h->data);
00179 size += h->len;
00180 h->data = 0;
00181 h->len = 0;
00182 }
00183 }
00184 }
00185 }
00186
00187
00188
00189
00190
00191
00192
00193
00194 class QMatrix {
00195 public:
00196 virtual Qfloat *get_Q(int column, int len) const = 0;
00197 virtual double *get_QD() const = 0;
00198 virtual void swap_index(int i, int j) const = 0;
00199 virtual ~QMatrix() {}
00200 };
00201
00202 class Kernel: public QMatrix {
00203 public:
00204 Kernel(int l, svm_node * const * x, const svm_parameter& param);
00205 virtual ~Kernel();
00206
00207 static double k_function(const svm_node *x, const svm_node *y,
00208 const svm_parameter& param);
00209 virtual Qfloat *get_Q(int column, int len) const = 0;
00210 virtual double *get_QD() const = 0;
00211 virtual void swap_index(int i, int j) const
00212 {
00213 swap(x[i],x[j]);
00214 if(x_square) swap(x_square[i],x_square[j]);
00215 }
00216 protected:
00217
00218 double (Kernel::*kernel_function)(int i, int j) const;
00219
00220 private:
00221 const svm_node **x;
00222 double *x_square;
00223
00224
00225 const int kernel_type;
00226 const int degree;
00227 const double gamma;
00228 const double coef0;
00229
00230 static double dot(const svm_node *px, const svm_node *py);
00231 double kernel_linear(int i, int j) const
00232 {
00233 return dot(x[i],x[j]);
00234 }
00235 double kernel_poly(int i, int j) const
00236 {
00237 return powi(gamma*dot(x[i],x[j])+coef0,degree);
00238 }
00239 double kernel_rbf(int i, int j) const
00240 {
00241 return exp(-gamma*(x_square[i]+x_square[j]-2*dot(x[i],x[j])));
00242 }
00243 double kernel_sigmoid(int i, int j) const
00244 {
00245 return tanh(gamma*dot(x[i],x[j])+coef0);
00246 }
00247 double kernel_precomputed(int i, int j) const
00248 {
00249 return x[i][(int)(x[j][0].value)].value;
00250 }
00251 };
00252
00253 Kernel::Kernel(int l, svm_node * const * x_, const svm_parameter& param)
00254 :kernel_type(param.kernel_type), degree(param.degree),
00255 gamma(param.gamma), coef0(param.coef0)
00256 {
00257 switch(kernel_type)
00258 {
00259 case LINEAR:
00260 kernel_function = &Kernel::kernel_linear;
00261 break;
00262 case POLY:
00263 kernel_function = &Kernel::kernel_poly;
00264 break;
00265 case RBF:
00266 kernel_function = &Kernel::kernel_rbf;
00267 break;
00268 case SIGMOID:
00269 kernel_function = &Kernel::kernel_sigmoid;
00270 break;
00271 case PRECOMPUTED:
00272 kernel_function = &Kernel::kernel_precomputed;
00273 break;
00274 }
00275
00276 clone(x,x_,l);
00277
00278 if(kernel_type == RBF)
00279 {
00280 x_square = new double[l];
00281 for(int i=0;i<l;i++)
00282 x_square[i] = dot(x[i],x[i]);
00283 }
00284 else
00285 x_square = 0;
00286 }
00287
00288 Kernel::~Kernel()
00289 {
00290 delete[] x;
00291 delete[] x_square;
00292 }
00293
00294 double Kernel::dot(const svm_node *px, const svm_node *py)
00295 {
00296 double sum = 0;
00297 while(px->index != -1 && py->index != -1)
00298 {
00299 if(px->index == py->index)
00300 {
00301 sum += px->value * py->value;
00302 ++px;
00303 ++py;
00304 }
00305 else
00306 {
00307 if(px->index > py->index)
00308 ++py;
00309 else
00310 ++px;
00311 }
00312 }
00313 return sum;
00314 }
00315
00316 double Kernel::k_function(const svm_node *x, const svm_node *y,
00317 const svm_parameter& param)
00318 {
00319 switch(param.kernel_type)
00320 {
00321 case LINEAR:
00322 return dot(x,y);
00323 case POLY:
00324 return powi(param.gamma*dot(x,y)+param.coef0,param.degree);
00325 case RBF:
00326 {
00327 double sum = 0;
00328 while(x->index != -1 && y->index !=-1)
00329 {
00330 if(x->index == y->index)
00331 {
00332 double d = x->value - y->value;
00333 sum += d*d;
00334 ++x;
00335 ++y;
00336 }
00337 else
00338 {
00339 if(x->index > y->index)
00340 {
00341 sum += y->value * y->value;
00342 ++y;
00343 }
00344 else
00345 {
00346 sum += x->value * x->value;
00347 ++x;
00348 }
00349 }
00350 }
00351
00352 while(x->index != -1)
00353 {
00354 sum += x->value * x->value;
00355 ++x;
00356 }
00357
00358 while(y->index != -1)
00359 {
00360 sum += y->value * y->value;
00361 ++y;
00362 }
00363
00364 return exp(-param.gamma*sum);
00365 }
00366 case SIGMOID:
00367 return tanh(param.gamma*dot(x,y)+param.coef0);
00368 case PRECOMPUTED:
00369 return x[(int)(y->value)].value;
00370 default:
00371 return 0;
00372 }
00373 }
00374
00375
00376
00377
00378
00379
00380
00381
00382
00383
00384
00385
00386
00387
00388
00389
00390
00391
00392
00393 class Solver {
00394 public:
00395 Solver() {};
00396 virtual ~Solver() {};
00397
00398 struct SolutionInfo {
00399 double obj;
00400 double rho;
00401 double upper_bound_p;
00402 double upper_bound_n;
00403 double r;
00404 };
00405
00406 void Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
00407 double *alpha_, double Cp, double Cn, double eps,
00408 SolutionInfo* si, int shrinking);
00409 protected:
00410 int active_size;
00411 schar *y;
00412 double *G;
00413 enum { LOWER_BOUND, UPPER_BOUND, FREE };
00414 char *alpha_status;
00415 double *alpha;
00416 const QMatrix *Q;
00417 const double *QD;
00418 double eps;
00419 double Cp,Cn;
00420 double *p;
00421 int *active_set;
00422 double *G_bar;
00423 int l;
00424 bool unshrink;
00425
00426 double get_C(int i)
00427 {
00428 return (y[i] > 0)? Cp : Cn;
00429 }
00430 void update_alpha_status(int i)
00431 {
00432 if(alpha[i] >= get_C(i))
00433 alpha_status[i] = UPPER_BOUND;
00434 else if(alpha[i] <= 0)
00435 alpha_status[i] = LOWER_BOUND;
00436 else alpha_status[i] = FREE;
00437 }
00438 bool is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; }
00439 bool is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; }
00440 bool is_free(int i) { return alpha_status[i] == FREE; }
00441 void swap_index(int i, int j);
00442 void reconstruct_gradient();
00443 virtual int select_working_set(int &i, int &j);
00444 virtual double calculate_rho();
00445 virtual void do_shrinking();
00446 private:
00447 bool be_shrunk(int i, double Gmax1, double Gmax2);
00448 };
00449
00450 void Solver::swap_index(int i, int j)
00451 {
00452 Q->swap_index(i,j);
00453 swap(y[i],y[j]);
00454 swap(G[i],G[j]);
00455 swap(alpha_status[i],alpha_status[j]);
00456 swap(alpha[i],alpha[j]);
00457 swap(p[i],p[j]);
00458 swap(active_set[i],active_set[j]);
00459 swap(G_bar[i],G_bar[j]);
00460 }
00461
00462 void Solver::reconstruct_gradient()
00463 {
00464
00465
00466 if(active_size == l) return;
00467
00468 int i,j;
00469 int nr_free = 0;
00470
00471 for(j=active_size;j<l;j++)
00472 G[j] = G_bar[j] + p[j];
00473
00474 for(j=0;j<active_size;j++)
00475 if(is_free(j))
00476 nr_free++;
00477
00478 if(2*nr_free < active_size)
00479 info("\nWARNING: using -h 0 may be faster\n");
00480
00481 if (nr_free*l > 2*active_size*(l-active_size))
00482 {
00483 for(i=active_size;i<l;i++)
00484 {
00485 const Qfloat *Q_i = Q->get_Q(i,active_size);
00486 for(j=0;j<active_size;j++)
00487 if(is_free(j))
00488 G[i] += alpha[j] * Q_i[j];
00489 }
00490 }
00491 else
00492 {
00493 for(i=0;i<active_size;i++)
00494 if(is_free(i))
00495 {
00496 const Qfloat *Q_i = Q->get_Q(i,l);
00497 double alpha_i = alpha[i];
00498 for(j=active_size;j<l;j++)
00499 G[j] += alpha_i * Q_i[j];
00500 }
00501 }
00502 }
00503
00504 void Solver::Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
00505 double *alpha_, double Cp, double Cn, double eps,
00506 SolutionInfo* si, int shrinking)
00507 {
00508 this->l = l;
00509 this->Q = &Q;
00510 QD=Q.get_QD();
00511 clone(p, p_,l);
00512 clone(y, y_,l);
00513 clone(alpha,alpha_,l);
00514 this->Cp = Cp;
00515 this->Cn = Cn;
00516 this->eps = eps;
00517 unshrink = false;
00518
00519
00520 {
00521 alpha_status = new char[l];
00522 for(int i=0;i<l;i++)
00523 update_alpha_status(i);
00524 }
00525
00526
00527 {
00528 active_set = new int[l];
00529 for(int i=0;i<l;i++)
00530 active_set[i] = i;
00531 active_size = l;
00532 }
00533
00534
00535 {
00536 G = new double[l];
00537 G_bar = new double[l];
00538 int i;
00539 for(i=0;i<l;i++)
00540 {
00541 G[i] = p[i];
00542 G_bar[i] = 0;
00543 }
00544 for(i=0;i<l;i++)
00545 if(!is_lower_bound(i))
00546 {
00547 const Qfloat *Q_i = Q.get_Q(i,l);
00548 double alpha_i = alpha[i];
00549 int j;
00550 for(j=0;j<l;j++)
00551 G[j] += alpha_i*Q_i[j];
00552 if(is_upper_bound(i))
00553 for(j=0;j<l;j++)
00554 G_bar[j] += get_C(i) * Q_i[j];
00555 }
00556 }
00557
00558
00559
00560 int iter = 0;
00561 int max_iter = max(10000000, l>INT_MAX/100 ? INT_MAX : 100*l);
00562 int counter = min(l,1000)+1;
00563
00564 while(iter < max_iter)
00565 {
00566
00567
00568 if(--counter == 0)
00569 {
00570 counter = min(l,1000);
00571 if(shrinking) do_shrinking();
00572
00573 }
00574
00575 int i,j;
00576 if(select_working_set(i,j)!=0)
00577 {
00578
00579 reconstruct_gradient();
00580
00581 active_size = l;
00582
00583 if(select_working_set(i,j)!=0)
00584 break;
00585 else
00586 counter = 1;
00587 }
00588
00589 ++iter;
00590
00591
00592
00593 const Qfloat *Q_i = Q.get_Q(i,active_size);
00594 const Qfloat *Q_j = Q.get_Q(j,active_size);
00595
00596 double C_i = get_C(i);
00597 double C_j = get_C(j);
00598
00599 double old_alpha_i = alpha[i];
00600 double old_alpha_j = alpha[j];
00601
00602 if(y[i]!=y[j])
00603 {
00604 double quad_coef = QD[i]+QD[j]+2*Q_i[j];
00605 if (quad_coef <= 0)
00606 quad_coef = TAU;
00607 double delta = (-G[i]-G[j])/quad_coef;
00608 double diff = alpha[i] - alpha[j];
00609 alpha[i] += delta;
00610 alpha[j] += delta;
00611
00612 if(diff > 0)
00613 {
00614 if(alpha[j] < 0)
00615 {
00616 alpha[j] = 0;
00617 alpha[i] = diff;
00618 }
00619 }
00620 else
00621 {
00622 if(alpha[i] < 0)
00623 {
00624 alpha[i] = 0;
00625 alpha[j] = -diff;
00626 }
00627 }
00628 if(diff > C_i - C_j)
00629 {
00630 if(alpha[i] > C_i)
00631 {
00632 alpha[i] = C_i;
00633 alpha[j] = C_i - diff;
00634 }
00635 }
00636 else
00637 {
00638 if(alpha[j] > C_j)
00639 {
00640 alpha[j] = C_j;
00641 alpha[i] = C_j + diff;
00642 }
00643 }
00644 }
00645 else
00646 {
00647 double quad_coef = QD[i]+QD[j]-2*Q_i[j];
00648 if (quad_coef <= 0)
00649 quad_coef = TAU;
00650 double delta = (G[i]-G[j])/quad_coef;
00651 double sum = alpha[i] + alpha[j];
00652 alpha[i] -= delta;
00653 alpha[j] += delta;
00654
00655 if(sum > C_i)
00656 {
00657 if(alpha[i] > C_i)
00658 {
00659 alpha[i] = C_i;
00660 alpha[j] = sum - C_i;
00661 }
00662 }
00663 else
00664 {
00665 if(alpha[j] < 0)
00666 {
00667 alpha[j] = 0;
00668 alpha[i] = sum;
00669 }
00670 }
00671 if(sum > C_j)
00672 {
00673 if(alpha[j] > C_j)
00674 {
00675 alpha[j] = C_j;
00676 alpha[i] = sum - C_j;
00677 }
00678 }
00679 else
00680 {
00681 if(alpha[i] < 0)
00682 {
00683 alpha[i] = 0;
00684 alpha[j] = sum;
00685 }
00686 }
00687 }
00688
00689
00690
00691 double delta_alpha_i = alpha[i] - old_alpha_i;
00692 double delta_alpha_j = alpha[j] - old_alpha_j;
00693
00694 for(int k=0;k<active_size;k++)
00695 {
00696 G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j;
00697 }
00698
00699
00700
00701 {
00702 bool ui = is_upper_bound(i);
00703 bool uj = is_upper_bound(j);
00704 update_alpha_status(i);
00705 update_alpha_status(j);
00706 int k;
00707 if(ui != is_upper_bound(i))
00708 {
00709 Q_i = Q.get_Q(i,l);
00710 if(ui)
00711 for(k=0;k<l;k++)
00712 G_bar[k] -= C_i * Q_i[k];
00713 else
00714 for(k=0;k<l;k++)
00715 G_bar[k] += C_i * Q_i[k];
00716 }
00717
00718 if(uj != is_upper_bound(j))
00719 {
00720 Q_j = Q.get_Q(j,l);
00721 if(uj)
00722 for(k=0;k<l;k++)
00723 G_bar[k] -= C_j * Q_j[k];
00724 else
00725 for(k=0;k<l;k++)
00726 G_bar[k] += C_j * Q_j[k];
00727 }
00728 }
00729 }
00730
00731 if(iter >= max_iter)
00732 {
00733 if(active_size < l)
00734 {
00735
00736 reconstruct_gradient();
00737 active_size = l;
00738
00739 }
00740 fprintf(stderr,"\nWARNING: reaching max number of iterations\n");
00741 }
00742
00743
00744
00745 si->rho = calculate_rho();
00746
00747
00748 {
00749 double v = 0;
00750 int i;
00751 for(i=0;i<l;i++)
00752 v += alpha[i] * (G[i] + p[i]);
00753
00754 si->obj = v/2;
00755 }
00756
00757
00758 {
00759 for(int i=0;i<l;i++)
00760 alpha_[active_set[i]] = alpha[i];
00761 }
00762
00763
00764
00765
00766
00767
00768
00769
00770
00771 si->upper_bound_p = Cp;
00772 si->upper_bound_n = Cn;
00773
00774
00775
00776 delete[] p;
00777 delete[] y;
00778 delete[] alpha;
00779 delete[] alpha_status;
00780 delete[] active_set;
00781 delete[] G;
00782 delete[] G_bar;
00783 }
00784
00785
00786 int Solver::select_working_set(int &out_i, int &out_j)
00787 {
00788
00789
00790
00791
00792
00793
00794 double Gmax = -INF;
00795 double Gmax2 = -INF;
00796 int Gmax_idx = -1;
00797 int Gmin_idx = -1;
00798 double obj_diff_min = INF;
00799
00800 for(int t=0;t<active_size;t++)
00801 if(y[t]==+1)
00802 {
00803 if(!is_upper_bound(t))
00804 if(-G[t] >= Gmax)
00805 {
00806 Gmax = -G[t];
00807 Gmax_idx = t;
00808 }
00809 }
00810 else
00811 {
00812 if(!is_lower_bound(t))
00813 if(G[t] >= Gmax)
00814 {
00815 Gmax = G[t];
00816 Gmax_idx = t;
00817 }
00818 }
00819
00820 int i = Gmax_idx;
00821 const Qfloat *Q_i = NULL;
00822 if(i != -1)
00823 Q_i = Q->get_Q(i,active_size);
00824
00825 for(int j=0;j<active_size;j++)
00826 {
00827 if(y[j]==+1)
00828 {
00829 if (!is_lower_bound(j))
00830 {
00831 double grad_diff=Gmax+G[j];
00832 if (G[j] >= Gmax2)
00833 Gmax2 = G[j];
00834 if (grad_diff > 0)
00835 {
00836 double obj_diff;
00837 double quad_coef = QD[i]+QD[j]-2.0*y[i]*Q_i[j];
00838 if (quad_coef > 0)
00839 obj_diff = -(grad_diff*grad_diff)/quad_coef;
00840 else
00841 obj_diff = -(grad_diff*grad_diff)/TAU;
00842
00843 if (obj_diff <= obj_diff_min)
00844 {
00845 Gmin_idx=j;
00846 obj_diff_min = obj_diff;
00847 }
00848 }
00849 }
00850 }
00851 else
00852 {
00853 if (!is_upper_bound(j))
00854 {
00855 double grad_diff= Gmax-G[j];
00856 if (-G[j] >= Gmax2)
00857 Gmax2 = -G[j];
00858 if (grad_diff > 0)
00859 {
00860 double obj_diff;
00861 double quad_coef = QD[i]+QD[j]+2.0*y[i]*Q_i[j];
00862 if (quad_coef > 0)
00863 obj_diff = -(grad_diff*grad_diff)/quad_coef;
00864 else
00865 obj_diff = -(grad_diff*grad_diff)/TAU;
00866
00867 if (obj_diff <= obj_diff_min)
00868 {
00869 Gmin_idx=j;
00870 obj_diff_min = obj_diff;
00871 }
00872 }
00873 }
00874 }
00875 }
00876
00877 if(Gmax+Gmax2 < eps)
00878 return 1;
00879
00880 out_i = Gmax_idx;
00881 out_j = Gmin_idx;
00882 return 0;
00883 }
00884
00885 bool Solver::be_shrunk(int i, double Gmax1, double Gmax2)
00886 {
00887 if(is_upper_bound(i))
00888 {
00889 if(y[i]==+1)
00890 return(-G[i] > Gmax1);
00891 else
00892 return(-G[i] > Gmax2);
00893 }
00894 else if(is_lower_bound(i))
00895 {
00896 if(y[i]==+1)
00897 return(G[i] > Gmax2);
00898 else
00899 return(G[i] > Gmax1);
00900 }
00901 else
00902 return(false);
00903 }
00904
00905 void Solver::do_shrinking()
00906 {
00907 int i;
00908 double Gmax1 = -INF;
00909 double Gmax2 = -INF;
00910
00911
00912 for(i=0;i<active_size;i++)
00913 {
00914 if(y[i]==+1)
00915 {
00916 if(!is_upper_bound(i))
00917 {
00918 if(-G[i] >= Gmax1)
00919 Gmax1 = -G[i];
00920 }
00921 if(!is_lower_bound(i))
00922 {
00923 if(G[i] >= Gmax2)
00924 Gmax2 = G[i];
00925 }
00926 }
00927 else
00928 {
00929 if(!is_upper_bound(i))
00930 {
00931 if(-G[i] >= Gmax2)
00932 Gmax2 = -G[i];
00933 }
00934 if(!is_lower_bound(i))
00935 {
00936 if(G[i] >= Gmax1)
00937 Gmax1 = G[i];
00938 }
00939 }
00940 }
00941
00942 if(unshrink == false && Gmax1 + Gmax2 <= eps*10)
00943 {
00944 unshrink = true;
00945 reconstruct_gradient();
00946 active_size = l;
00947
00948 }
00949
00950 for(i=0;i<active_size;i++)
00951 if (be_shrunk(i, Gmax1, Gmax2))
00952 {
00953 active_size--;
00954 while (active_size > i)
00955 {
00956 if (!be_shrunk(active_size, Gmax1, Gmax2))
00957 {
00958 swap_index(i,active_size);
00959 break;
00960 }
00961 active_size--;
00962 }
00963 }
00964 }
00965
00966 double Solver::calculate_rho()
00967 {
00968 double r;
00969 int nr_free = 0;
00970 double ub = INF, lb = -INF, sum_free = 0;
00971 for(int i=0;i<active_size;i++)
00972 {
00973 double yG = y[i]*G[i];
00974
00975 if(is_upper_bound(i))
00976 {
00977 if(y[i]==-1)
00978 ub = min(ub,yG);
00979 else
00980 lb = max(lb,yG);
00981 }
00982 else if(is_lower_bound(i))
00983 {
00984 if(y[i]==+1)
00985 ub = min(ub,yG);
00986 else
00987 lb = max(lb,yG);
00988 }
00989 else
00990 {
00991 ++nr_free;
00992 sum_free += yG;
00993 }
00994 }
00995
00996 if(nr_free>0)
00997 r = sum_free/nr_free;
00998 else
00999 r = (ub+lb)/2;
01000
01001 return r;
01002 }
01003
01004
01005
01006
01007
01008
01009 class Solver_NU: public Solver
01010 {
01011 public:
01012 Solver_NU() {}
01013 void Solve(int l, const QMatrix& Q, const double *p, const schar *y,
01014 double *alpha, double Cp, double Cn, double eps,
01015 SolutionInfo* si, int shrinking)
01016 {
01017 this->si = si;
01018 Solver::Solve(l,Q,p,y,alpha,Cp,Cn,eps,si,shrinking);
01019 }
01020 private:
01021 SolutionInfo *si;
01022 int select_working_set(int &i, int &j);
01023 double calculate_rho();
01024 bool be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4);
01025 void do_shrinking();
01026 };
01027
01028
01029 int Solver_NU::select_working_set(int &out_i, int &out_j)
01030 {
01031
01032
01033
01034
01035
01036
01037 double Gmaxp = -INF;
01038 double Gmaxp2 = -INF;
01039 int Gmaxp_idx = -1;
01040
01041 double Gmaxn = -INF;
01042 double Gmaxn2 = -INF;
01043 int Gmaxn_idx = -1;
01044
01045 int Gmin_idx = -1;
01046 double obj_diff_min = INF;
01047
01048 for(int t=0;t<active_size;t++)
01049 if(y[t]==+1)
01050 {
01051 if(!is_upper_bound(t))
01052 if(-G[t] >= Gmaxp)
01053 {
01054 Gmaxp = -G[t];
01055 Gmaxp_idx = t;
01056 }
01057 }
01058 else
01059 {
01060 if(!is_lower_bound(t))
01061 if(G[t] >= Gmaxn)
01062 {
01063 Gmaxn = G[t];
01064 Gmaxn_idx = t;
01065 }
01066 }
01067
01068 int ip = Gmaxp_idx;
01069 int in = Gmaxn_idx;
01070 const Qfloat *Q_ip = NULL;
01071 const Qfloat *Q_in = NULL;
01072 if(ip != -1)
01073 Q_ip = Q->get_Q(ip,active_size);
01074 if(in != -1)
01075 Q_in = Q->get_Q(in,active_size);
01076
01077 for(int j=0;j<active_size;j++)
01078 {
01079 if(y[j]==+1)
01080 {
01081 if (!is_lower_bound(j))
01082 {
01083 double grad_diff=Gmaxp+G[j];
01084 if (G[j] >= Gmaxp2)
01085 Gmaxp2 = G[j];
01086 if (grad_diff > 0)
01087 {
01088 double obj_diff;
01089 double quad_coef = QD[ip]+QD[j]-2*Q_ip[j];
01090 if (quad_coef > 0)
01091 obj_diff = -(grad_diff*grad_diff)/quad_coef;
01092 else
01093 obj_diff = -(grad_diff*grad_diff)/TAU;
01094
01095 if (obj_diff <= obj_diff_min)
01096 {
01097 Gmin_idx=j;
01098 obj_diff_min = obj_diff;
01099 }
01100 }
01101 }
01102 }
01103 else
01104 {
01105 if (!is_upper_bound(j))
01106 {
01107 double grad_diff=Gmaxn-G[j];
01108 if (-G[j] >= Gmaxn2)
01109 Gmaxn2 = -G[j];
01110 if (grad_diff > 0)
01111 {
01112 double obj_diff;
01113 double quad_coef = QD[in]+QD[j]-2*Q_in[j];
01114 if (quad_coef > 0)
01115 obj_diff = -(grad_diff*grad_diff)/quad_coef;
01116 else
01117 obj_diff = -(grad_diff*grad_diff)/TAU;
01118
01119 if (obj_diff <= obj_diff_min)
01120 {
01121 Gmin_idx=j;
01122 obj_diff_min = obj_diff;
01123 }
01124 }
01125 }
01126 }
01127 }
01128
01129 if(max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2) < eps)
01130 return 1;
01131
01132 if (y[Gmin_idx] == +1)
01133 out_i = Gmaxp_idx;
01134 else
01135 out_i = Gmaxn_idx;
01136 out_j = Gmin_idx;
01137
01138 return 0;
01139 }
01140
01141 bool Solver_NU::be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4)
01142 {
01143 if(is_upper_bound(i))
01144 {
01145 if(y[i]==+1)
01146 return(-G[i] > Gmax1);
01147 else
01148 return(-G[i] > Gmax4);
01149 }
01150 else if(is_lower_bound(i))
01151 {
01152 if(y[i]==+1)
01153 return(G[i] > Gmax2);
01154 else
01155 return(G[i] > Gmax3);
01156 }
01157 else
01158 return(false);
01159 }
01160
01161 void Solver_NU::do_shrinking()
01162 {
01163 double Gmax1 = -INF;
01164 double Gmax2 = -INF;
01165 double Gmax3 = -INF;
01166 double Gmax4 = -INF;
01167
01168
01169 int i;
01170 for(i=0;i<active_size;i++)
01171 {
01172 if(!is_upper_bound(i))
01173 {
01174 if(y[i]==+1)
01175 {
01176 if(-G[i] > Gmax1) Gmax1 = -G[i];
01177 }
01178 else if(-G[i] > Gmax4) Gmax4 = -G[i];
01179 }
01180 if(!is_lower_bound(i))
01181 {
01182 if(y[i]==+1)
01183 {
01184 if(G[i] > Gmax2) Gmax2 = G[i];
01185 }
01186 else if(G[i] > Gmax3) Gmax3 = G[i];
01187 }
01188 }
01189
01190 if(unshrink == false && max(Gmax1+Gmax2,Gmax3+Gmax4) <= eps*10)
01191 {
01192 unshrink = true;
01193 reconstruct_gradient();
01194 active_size = l;
01195 }
01196
01197 for(i=0;i<active_size;i++)
01198 if (be_shrunk(i, Gmax1, Gmax2, Gmax3, Gmax4))
01199 {
01200 active_size--;
01201 while (active_size > i)
01202 {
01203 if (!be_shrunk(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
01204 {
01205 swap_index(i,active_size);
01206 break;
01207 }
01208 active_size--;
01209 }
01210 }
01211 }
01212
01213 double Solver_NU::calculate_rho()
01214 {
01215 int nr_free1 = 0,nr_free2 = 0;
01216 double ub1 = INF, ub2 = INF;
01217 double lb1 = -INF, lb2 = -INF;
01218 double sum_free1 = 0, sum_free2 = 0;
01219
01220 for(int i=0;i<active_size;i++)
01221 {
01222 if(y[i]==+1)
01223 {
01224 if(is_upper_bound(i))
01225 lb1 = max(lb1,G[i]);
01226 else if(is_lower_bound(i))
01227 ub1 = min(ub1,G[i]);
01228 else
01229 {
01230 ++nr_free1;
01231 sum_free1 += G[i];
01232 }
01233 }
01234 else
01235 {
01236 if(is_upper_bound(i))
01237 lb2 = max(lb2,G[i]);
01238 else if(is_lower_bound(i))
01239 ub2 = min(ub2,G[i]);
01240 else
01241 {
01242 ++nr_free2;
01243 sum_free2 += G[i];
01244 }
01245 }
01246 }
01247
01248 double r1,r2;
01249 if(nr_free1 > 0)
01250 r1 = sum_free1/nr_free1;
01251 else
01252 r1 = (ub1+lb1)/2;
01253
01254 if(nr_free2 > 0)
01255 r2 = sum_free2/nr_free2;
01256 else
01257 r2 = (ub2+lb2)/2;
01258
01259 si->r = (r1+r2)/2;
01260 return (r1-r2)/2;
01261 }
01262
01263
01264
01265
01266 class SVC_Q: public Kernel
01267 {
01268 public:
01269 SVC_Q(const svm_problem& prob, const svm_parameter& param, const schar *y_)
01270 :Kernel(prob.l, prob.x, param)
01271 {
01272 clone(y,y_,prob.l);
01273 cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
01274 QD = new double[prob.l];
01275 for(int i=0;i<prob.l;i++)
01276 QD[i] = (this->*kernel_function)(i,i);
01277 }
01278
01279 Qfloat *get_Q(int i, int len) const
01280 {
01281 Qfloat *data;
01282 int start, j;
01283 if((start = cache->get_data(i,&data,len)) < len)
01284 {
01285 for(j=start;j<len;j++)
01286 data[j] = (Qfloat)(y[i]*y[j]*(this->*kernel_function)(i,j));
01287 }
01288 return data;
01289 }
01290
01291 double *get_QD() const
01292 {
01293 return QD;
01294 }
01295
01296 void swap_index(int i, int j) const
01297 {
01298 cache->swap_index(i,j);
01299 Kernel::swap_index(i,j);
01300 swap(y[i],y[j]);
01301 swap(QD[i],QD[j]);
01302 }
01303
01304 ~SVC_Q()
01305 {
01306 delete[] y;
01307 delete cache;
01308 delete[] QD;
01309 }
01310 private:
01311 schar *y;
01312 Cache *cache;
01313 double *QD;
01314 };
01315
01316 class ONE_CLASS_Q: public Kernel
01317 {
01318 public:
01319 ONE_CLASS_Q(const svm_problem& prob, const svm_parameter& param)
01320 :Kernel(prob.l, prob.x, param)
01321 {
01322 cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
01323 QD = new double[prob.l];
01324 for(int i=0;i<prob.l;i++)
01325 QD[i] = (this->*kernel_function)(i,i);
01326 }
01327
01328 Qfloat *get_Q(int i, int len) const
01329 {
01330 Qfloat *data;
01331 int start, j;
01332 if((start = cache->get_data(i,&data,len)) < len)
01333 {
01334 for(j=start;j<len;j++)
01335 data[j] = (Qfloat)(this->*kernel_function)(i,j);
01336 }
01337 return data;
01338 }
01339
01340 double *get_QD() const
01341 {
01342 return QD;
01343 }
01344
01345 void swap_index(int i, int j) const
01346 {
01347 cache->swap_index(i,j);
01348 Kernel::swap_index(i,j);
01349 swap(QD[i],QD[j]);
01350 }
01351
01352 ~ONE_CLASS_Q()
01353 {
01354 delete cache;
01355 delete[] QD;
01356 }
01357 private:
01358 Cache *cache;
01359 double *QD;
01360 };
01361
01362 class SVR_Q: public Kernel
01363 {
01364 public:
01365 SVR_Q(const svm_problem& prob, const svm_parameter& param)
01366 :Kernel(prob.l, prob.x, param)
01367 {
01368 l = prob.l;
01369 cache = new Cache(l,(long int)(param.cache_size*(1<<20)));
01370 QD = new double[2*l];
01371 sign = new schar[2*l];
01372 index = new int[2*l];
01373 for(int k=0;k<l;k++)
01374 {
01375 sign[k] = 1;
01376 sign[k+l] = -1;
01377 index[k] = k;
01378 index[k+l] = k;
01379 QD[k] = (this->*kernel_function)(k,k);
01380 QD[k+l] = QD[k];
01381 }
01382 buffer[0] = new Qfloat[2*l];
01383 buffer[1] = new Qfloat[2*l];
01384 next_buffer = 0;
01385 }
01386
01387 void swap_index(int i, int j) const
01388 {
01389 swap(sign[i],sign[j]);
01390 swap(index[i],index[j]);
01391 swap(QD[i],QD[j]);
01392 }
01393
01394 Qfloat *get_Q(int i, int len) const
01395 {
01396 Qfloat *data;
01397 int j, real_i = index[i];
01398 if(cache->get_data(real_i,&data,l) < l)
01399 {
01400 for(j=0;j<l;j++)
01401 data[j] = (Qfloat)(this->*kernel_function)(real_i,j);
01402 }
01403
01404
01405 Qfloat *buf = buffer[next_buffer];
01406 next_buffer = 1 - next_buffer;
01407 schar si = sign[i];
01408 for(j=0;j<len;j++)
01409 buf[j] = (Qfloat) si * (Qfloat) sign[j] * data[index[j]];
01410 return buf;
01411 }
01412
01413 double *get_QD() const
01414 {
01415 return QD;
01416 }
01417
01418 ~SVR_Q()
01419 {
01420 delete cache;
01421 delete[] sign;
01422 delete[] index;
01423 delete[] buffer[0];
01424 delete[] buffer[1];
01425 delete[] QD;
01426 }
01427 private:
01428 int l;
01429 Cache *cache;
01430 schar *sign;
01431 int *index;
01432 mutable int next_buffer;
01433 Qfloat *buffer[2];
01434 double *QD;
01435 };
01436
01437
01438
01439
01440 static void solve_c_svc(
01441 const svm_problem *prob, const svm_parameter* param,
01442 double *alpha, Solver::SolutionInfo* si, double Cp, double Cn)
01443 {
01444 int l = prob->l;
01445 double *minus_ones = new double[l];
01446 schar *y = new schar[l];
01447
01448 int i;
01449
01450 for(i=0;i<l;i++)
01451 {
01452 alpha[i] = 0;
01453 minus_ones[i] = -1;
01454 if(prob->y[i] > 0) y[i] = +1; else y[i] = -1;
01455 }
01456
01457 Solver s;
01458 s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y,
01459 alpha, Cp, Cn, param->eps, si, param->shrinking);
01460
01461 double sum_alpha=0;
01462 for(i=0;i<l;i++)
01463 sum_alpha += alpha[i];
01464
01465
01466
01467
01468 for(i=0;i<l;i++)
01469 alpha[i] *= y[i];
01470
01471 delete[] minus_ones;
01472 delete[] y;
01473 }
01474
01475 static void solve_nu_svc(
01476 const svm_problem *prob, const svm_parameter *param,
01477 double *alpha, Solver::SolutionInfo* si)
01478 {
01479 int i;
01480 int l = prob->l;
01481 double nu = param->nu;
01482
01483 schar *y = new schar[l];
01484
01485 for(i=0;i<l;i++)
01486 if(prob->y[i]>0)
01487 y[i] = +1;
01488 else
01489 y[i] = -1;
01490
01491 double sum_pos = nu*l/2;
01492 double sum_neg = nu*l/2;
01493
01494 for(i=0;i<l;i++)
01495 if(y[i] == +1)
01496 {
01497 alpha[i] = min(1.0,sum_pos);
01498 sum_pos -= alpha[i];
01499 }
01500 else
01501 {
01502 alpha[i] = min(1.0,sum_neg);
01503 sum_neg -= alpha[i];
01504 }
01505
01506 double *zeros = new double[l];
01507
01508 for(i=0;i<l;i++)
01509 zeros[i] = 0;
01510
01511 Solver_NU s;
01512 s.Solve(l, SVC_Q(*prob,*param,y), zeros, y,
01513 alpha, 1.0, 1.0, param->eps, si, param->shrinking);
01514 double r = si->r;
01515
01516
01517
01518 for(i=0;i<l;i++)
01519 alpha[i] *= y[i]/r;
01520
01521 si->rho /= r;
01522 si->obj /= (r*r);
01523 si->upper_bound_p = 1/r;
01524 si->upper_bound_n = 1/r;
01525
01526 delete[] y;
01527 delete[] zeros;
01528 }
01529
01530 static void solve_one_class(
01531 const svm_problem *prob, const svm_parameter *param,
01532 double *alpha, Solver::SolutionInfo* si)
01533 {
01534 int l = prob->l;
01535 double *zeros = new double[l];
01536 schar *ones = new schar[l];
01537 int i;
01538
01539 int n = (int)(param->nu*prob->l);
01540
01541 for(i=0;i<n;i++)
01542 alpha[i] = 1;
01543 if(n<prob->l)
01544 alpha[n] = param->nu * prob->l - n;
01545 for(i=n+1;i<l;i++)
01546 alpha[i] = 0;
01547
01548 for(i=0;i<l;i++)
01549 {
01550 zeros[i] = 0;
01551 ones[i] = 1;
01552 }
01553
01554 Solver s;
01555 s.Solve(l, ONE_CLASS_Q(*prob,*param), zeros, ones,
01556 alpha, 1.0, 1.0, param->eps, si, param->shrinking);
01557
01558 delete[] zeros;
01559 delete[] ones;
01560 }
01561
01562 static void solve_epsilon_svr(
01563 const svm_problem *prob, const svm_parameter *param,
01564 double *alpha, Solver::SolutionInfo* si)
01565 {
01566 int l = prob->l;
01567 double *alpha2 = new double[2*l];
01568 double *linear_term = new double[2*l];
01569 schar *y = new schar[2*l];
01570 int i;
01571
01572 for(i=0;i<l;i++)
01573 {
01574 alpha2[i] = 0;
01575 linear_term[i] = param->p - prob->y[i];
01576 y[i] = 1;
01577
01578 alpha2[i+l] = 0;
01579 linear_term[i+l] = param->p + prob->y[i];
01580 y[i+l] = -1;
01581 }
01582
01583 Solver s;
01584 s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
01585 alpha2, param->C, param->C, param->eps, si, param->shrinking);
01586
01587 double sum_alpha = 0;
01588 for(i=0;i<l;i++)
01589 {
01590 alpha[i] = alpha2[i] - alpha2[i+l];
01591 sum_alpha += fabs(alpha[i]);
01592 }
01593
01594
01595 delete[] alpha2;
01596 delete[] linear_term;
01597 delete[] y;
01598 }
01599
01600 static void solve_nu_svr(
01601 const svm_problem *prob, const svm_parameter *param,
01602 double *alpha, Solver::SolutionInfo* si)
01603 {
01604 int l = prob->l;
01605 double C = param->C;
01606 double *alpha2 = new double[2*l];
01607 double *linear_term = new double[2*l];
01608 schar *y = new schar[2*l];
01609 int i;
01610
01611 double sum = C * param->nu * l / 2;
01612 for(i=0;i<l;i++)
01613 {
01614 alpha2[i] = alpha2[i+l] = min(sum,C);
01615 sum -= alpha2[i];
01616
01617 linear_term[i] = - prob->y[i];
01618 y[i] = 1;
01619
01620 linear_term[i+l] = prob->y[i];
01621 y[i+l] = -1;
01622 }
01623
01624 Solver_NU s;
01625 s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
01626 alpha2, C, C, param->eps, si, param->shrinking);
01627
01628
01629
01630 for(i=0;i<l;i++)
01631 alpha[i] = alpha2[i] - alpha2[i+l];
01632
01633 delete[] alpha2;
01634 delete[] linear_term;
01635 delete[] y;
01636 }
01637
01638
01639
01640
01641 struct decision_function
01642 {
01643 double *alpha;
01644 double rho;
01645 };
01646
01647 static decision_function svm_train_one(
01648 const svm_problem *prob, const svm_parameter *param,
01649 double Cp, double Cn)
01650 {
01651 double *alpha = Malloc(double,prob->l);
01652 Solver::SolutionInfo si;
01653 switch(param->svm_type)
01654 {
01655 case C_SVC:
01656 solve_c_svc(prob,param,alpha,&si,Cp,Cn);
01657 break;
01658 case NU_SVC:
01659 solve_nu_svc(prob,param,alpha,&si);
01660 break;
01661 case ONE_CLASS:
01662 solve_one_class(prob,param,alpha,&si);
01663 break;
01664 case EPSILON_SVR:
01665 solve_epsilon_svr(prob,param,alpha,&si);
01666 break;
01667 case NU_SVR:
01668 solve_nu_svr(prob,param,alpha,&si);
01669 break;
01670 }
01671
01672
01673
01674
01675
01676 int nSV = 0;
01677 int nBSV = 0;
01678 for(int i=0;i<prob->l;i++)
01679 {
01680 if(fabs(alpha[i]) > 0)
01681 {
01682 ++nSV;
01683 if(prob->y[i] > 0)
01684 {
01685 if(fabs(alpha[i]) >= si.upper_bound_p)
01686 ++nBSV;
01687 }
01688 else
01689 {
01690 if(fabs(alpha[i]) >= si.upper_bound_n)
01691 ++nBSV;
01692 }
01693 }
01694 }
01695
01696
01697
01698 decision_function f;
01699 f.alpha = alpha;
01700 f.rho = si.rho;
01701 return f;
01702 }
01703
01704
01705 static void sigmoid_train(
01706 int l, const double *dec_values, const double *labels,
01707 double& A, double& B)
01708 {
01709 double prior1=0, prior0 = 0;
01710 int i;
01711
01712 for (i=0;i<l;i++)
01713 if (labels[i] > 0) prior1+=1;
01714 else prior0+=1;
01715
01716 int max_iter=100;
01717 double min_step=1e-10;
01718 double sigma=1e-12;
01719 double eps=1e-5;
01720 double hiTarget=(prior1+1.0)/(prior1+2.0);
01721 double loTarget=1/(prior0+2.0);
01722 double *t=Malloc(double,l);
01723 double fApB,p,q,h11,h22,h21,g1,g2,det,dA,dB,gd,stepsize;
01724 double newA,newB,newf,d1,d2;
01725 int iter;
01726
01727
01728 A=0.0; B=log((prior0+1.0)/(prior1+1.0));
01729 double fval = 0.0;
01730
01731 for (i=0;i<l;i++)
01732 {
01733 if (labels[i]>0) t[i]=hiTarget;
01734 else t[i]=loTarget;
01735 fApB = dec_values[i]*A+B;
01736 if (fApB>=0)
01737 fval += t[i]*fApB + log(1+exp(-fApB));
01738 else
01739 fval += (t[i] - 1)*fApB +log(1+exp(fApB));
01740 }
01741 for (iter=0;iter<max_iter;iter++)
01742 {
01743
01744 h11=sigma;
01745 h22=sigma;
01746 h21=0.0;g1=0.0;g2=0.0;
01747 for (i=0;i<l;i++)
01748 {
01749 fApB = dec_values[i]*A+B;
01750 if (fApB >= 0)
01751 {
01752 p=exp(-fApB)/(1.0+exp(-fApB));
01753 q=1.0/(1.0+exp(-fApB));
01754 }
01755 else
01756 {
01757 p=1.0/(1.0+exp(fApB));
01758 q=exp(fApB)/(1.0+exp(fApB));
01759 }
01760 d2=p*q;
01761 h11+=dec_values[i]*dec_values[i]*d2;
01762 h22+=d2;
01763 h21+=dec_values[i]*d2;
01764 d1=t[i]-p;
01765 g1+=dec_values[i]*d1;
01766 g2+=d1;
01767 }
01768
01769
01770 if (fabs(g1)<eps && fabs(g2)<eps)
01771 break;
01772
01773
01774 det=h11*h22-h21*h21;
01775 dA=-(h22*g1 - h21 * g2) / det;
01776 dB=-(-h21*g1+ h11 * g2) / det;
01777 gd=g1*dA+g2*dB;
01778
01779
01780 stepsize = 1;
01781 while (stepsize >= min_step)
01782 {
01783 newA = A + stepsize * dA;
01784 newB = B + stepsize * dB;
01785
01786
01787 newf = 0.0;
01788 for (i=0;i<l;i++)
01789 {
01790 fApB = dec_values[i]*newA+newB;
01791 if (fApB >= 0)
01792 newf += t[i]*fApB + log(1+exp(-fApB));
01793 else
01794 newf += (t[i] - 1)*fApB +log(1+exp(fApB));
01795 }
01796
01797 if (newf<fval+0.0001*stepsize*gd)
01798 {
01799 A=newA;B=newB;fval=newf;
01800 break;
01801 }
01802 else
01803 stepsize = stepsize / 2.0;
01804 }
01805
01806 if (stepsize < min_step)
01807 {
01808
01809 break;
01810 }
01811 }
01812
01813 if (iter>=max_iter)
01814 info("Reaching maximal iterations in two-class probability estimates\n");
01815 free(t);
01816 }
01817
01818 static double sigmoid_predict(double decision_value, double A, double B)
01819 {
01820 double fApB = decision_value*A+B;
01821
01822 if (fApB >= 0)
01823 return exp(-fApB)/(1.0+exp(-fApB));
01824 else
01825 return 1.0/(1+exp(fApB)) ;
01826 }
01827
01828
01829 static void multiclass_probability(int k, double **r, double *p)
01830 {
01831 int t,j;
01832 int iter = 0, max_iter=max(100,k);
01833 double **Q=Malloc(double *,k);
01834 double *Qp=Malloc(double,k);
01835 double pQp, eps=0.005/k;
01836
01837 for (t=0;t<k;t++)
01838 {
01839 p[t]=1.0/k;
01840 Q[t]=Malloc(double,k);
01841 Q[t][t]=0;
01842 for (j=0;j<t;j++)
01843 {
01844 Q[t][t]+=r[j][t]*r[j][t];
01845 Q[t][j]=Q[j][t];
01846 }
01847 for (j=t+1;j<k;j++)
01848 {
01849 Q[t][t]+=r[j][t]*r[j][t];
01850 Q[t][j]=-r[j][t]*r[t][j];
01851 }
01852 }
01853 for (iter=0;iter<max_iter;iter++)
01854 {
01855
01856 pQp=0;
01857 for (t=0;t<k;t++)
01858 {
01859 Qp[t]=0;
01860 for (j=0;j<k;j++)
01861 Qp[t]+=Q[t][j]*p[j];
01862 pQp+=p[t]*Qp[t];
01863 }
01864 double max_error=0;
01865 for (t=0;t<k;t++)
01866 {
01867 double error=fabs(Qp[t]-pQp);
01868 if (error>max_error)
01869 max_error=error;
01870 }
01871 if (max_error<eps) break;
01872
01873 for (t=0;t<k;t++)
01874 {
01875 double diff=(-Qp[t]+pQp)/Q[t][t];
01876 p[t]+=diff;
01877 pQp=(pQp+diff*(diff*Q[t][t]+2*Qp[t]))/(1+diff)/(1+diff);
01878 for (j=0;j<k;j++)
01879 {
01880 Qp[j]=(Qp[j]+diff*Q[t][j])/(1+diff);
01881 p[j]/=(1+diff);
01882 }
01883 }
01884 }
01885 if (iter>=max_iter)
01886 info("Exceeds max_iter in multiclass_prob\n");
01887 for(t=0;t<k;t++) free(Q[t]);
01888 free(Q);
01889 free(Qp);
01890 }
01891
01892
01893 static void svm_binary_svc_probability(
01894 const svm_problem *prob, const svm_parameter *param,
01895 double Cp, double Cn, double& probA, double& probB)
01896 {
01897 int i;
01898 int nr_fold = 5;
01899 int *perm = Malloc(int,prob->l);
01900 double *dec_values = Malloc(double,prob->l);
01901
01902
01903 for(i=0;i<prob->l;i++) perm[i]=i;
01904 for(i=0;i<prob->l;i++)
01905 {
01906 int j = i+rand()%(prob->l-i);
01907 swap(perm[i],perm[j]);
01908 }
01909 for(i=0;i<nr_fold;i++)
01910 {
01911 int begin = i*prob->l/nr_fold;
01912 int end = (i+1)*prob->l/nr_fold;
01913 int j,k;
01914 struct svm_problem subprob;
01915
01916 subprob.l = prob->l-(end-begin);
01917 subprob.x = Malloc(struct svm_node*,subprob.l);
01918 subprob.y = Malloc(double,subprob.l);
01919
01920 k=0;
01921 for(j=0;j<begin;j++)
01922 {
01923 subprob.x[k] = prob->x[perm[j]];
01924 subprob.y[k] = prob->y[perm[j]];
01925 ++k;
01926 }
01927 for(j=end;j<prob->l;j++)
01928 {
01929 subprob.x[k] = prob->x[perm[j]];
01930 subprob.y[k] = prob->y[perm[j]];
01931 ++k;
01932 }
01933 int p_count=0,n_count=0;
01934 for(j=0;j<k;j++)
01935 if(subprob.y[j]>0)
01936 p_count++;
01937 else
01938 n_count++;
01939
01940 if(p_count==0 && n_count==0)
01941 for(j=begin;j<end;j++)
01942 dec_values[perm[j]] = 0;
01943 else if(p_count > 0 && n_count == 0)
01944 for(j=begin;j<end;j++)
01945 dec_values[perm[j]] = 1;
01946 else if(p_count == 0 && n_count > 0)
01947 for(j=begin;j<end;j++)
01948 dec_values[perm[j]] = -1;
01949 else
01950 {
01951 svm_parameter subparam = *param;
01952 subparam.probability=0;
01953 subparam.C=1.0;
01954 subparam.nr_weight=2;
01955 subparam.weight_label = Malloc(int,2);
01956 subparam.weight = Malloc(double,2);
01957 subparam.weight_label[0]=+1;
01958 subparam.weight_label[1]=-1;
01959 subparam.weight[0]=Cp;
01960 subparam.weight[1]=Cn;
01961 struct svm_model *submodel = svm_train(&subprob,&subparam);
01962 for(j=begin;j<end;j++)
01963 {
01964 svm_predict_values(submodel,prob->x[perm[j]],&(dec_values[perm[j]]));
01965
01966 dec_values[perm[j]] *= submodel->label[0];
01967 }
01968 svm_free_and_destroy_model(&submodel);
01969 svm_destroy_param(&subparam);
01970 }
01971 free(subprob.x);
01972 free(subprob.y);
01973 }
01974 sigmoid_train(prob->l,dec_values,prob->y,probA,probB);
01975 free(dec_values);
01976 free(perm);
01977 }
01978
01979
01980 static double svm_svr_probability(
01981 const svm_problem *prob, const svm_parameter *param)
01982 {
01983 int i;
01984 int nr_fold = 5;
01985 double *ymv = Malloc(double,prob->l);
01986 double mae = 0;
01987
01988 svm_parameter newparam = *param;
01989 newparam.probability = 0;
01990 svm_cross_validation(prob,&newparam,nr_fold,ymv);
01991 for(i=0;i<prob->l;i++)
01992 {
01993 ymv[i]=prob->y[i]-ymv[i];
01994 mae += fabs(ymv[i]);
01995 }
01996 mae /= prob->l;
01997 double std=sqrt(2*mae*mae);
01998 int count=0;
01999 mae=0;
02000 for(i=0;i<prob->l;i++)
02001 if (fabs(ymv[i]) > 5*std)
02002 count=count+1;
02003 else
02004 mae+=fabs(ymv[i]);
02005 mae /= (prob->l-count);
02006 info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma= %g\n",mae);
02007 free(ymv);
02008 return mae;
02009 }
02010
02011
02012
02013
02014 static void svm_group_classes(const svm_problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
02015 {
02016 int l = prob->l;
02017 int max_nr_class = 16;
02018 int nr_class = 0;
02019 int *label = Malloc(int,max_nr_class);
02020 int *count = Malloc(int,max_nr_class);
02021 int *data_label = Malloc(int,l);
02022 int i;
02023
02024 for(i=0;i<l;i++)
02025 {
02026 int this_label = (int)prob->y[i];
02027 int j;
02028 for(j=0;j<nr_class;j++)
02029 {
02030 if(this_label == label[j])
02031 {
02032 ++count[j];
02033 break;
02034 }
02035 }
02036 data_label[i] = j;
02037 if(j == nr_class)
02038 {
02039 if(nr_class == max_nr_class)
02040 {
02041 max_nr_class *= 2;
02042 label = (int *)realloc(label,max_nr_class*sizeof(int));
02043 count = (int *)realloc(count,max_nr_class*sizeof(int));
02044 }
02045 label[nr_class] = this_label;
02046 count[nr_class] = 1;
02047 ++nr_class;
02048 }
02049 }
02050
02051 int *start = Malloc(int,nr_class);
02052 start[0] = 0;
02053 for(i=1;i<nr_class;i++)
02054 start[i] = start[i-1]+count[i-1];
02055 for(i=0;i<l;i++)
02056 {
02057 perm[start[data_label[i]]] = i;
02058 ++start[data_label[i]];
02059 }
02060 start[0] = 0;
02061 for(i=1;i<nr_class;i++)
02062 start[i] = start[i-1]+count[i-1];
02063
02064 *nr_class_ret = nr_class;
02065 *label_ret = label;
02066 *start_ret = start;
02067 *count_ret = count;
02068 free(data_label);
02069 }
02070
02071
02072
02073
02074 svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
02075 {
02076 svm_model *model = Malloc(svm_model,1);
02077 model->param = *param;
02078 model->free_sv = 0;
02079
02080 if(param->svm_type == ONE_CLASS ||
02081 param->svm_type == EPSILON_SVR ||
02082 param->svm_type == NU_SVR)
02083 {
02084
02085 model->nr_class = 2;
02086 model->label = NULL;
02087 model->nSV = NULL;
02088 model->probA = NULL; model->probB = NULL;
02089 model->sv_coef = Malloc(double *,1);
02090
02091 if(param->probability &&
02092 (param->svm_type == EPSILON_SVR ||
02093 param->svm_type == NU_SVR))
02094 {
02095 model->probA = Malloc(double,1);
02096 model->probA[0] = svm_svr_probability(prob,param);
02097 }
02098
02099 decision_function f = svm_train_one(prob,param,0,0);
02100 model->rho = Malloc(double,1);
02101 model->rho[0] = f.rho;
02102
02103 int nSV = 0;
02104 int i;
02105 for(i=0;i<prob->l;i++)
02106 if(fabs(f.alpha[i]) > 0) ++nSV;
02107 model->l = nSV;
02108 model->SV = Malloc(svm_node *,nSV);
02109 model->sv_coef[0] = Malloc(double,nSV);
02110 model->sv_indices = Malloc(int,nSV);
02111 int j = 0;
02112 for(i=0;i<prob->l;i++)
02113 if(fabs(f.alpha[i]) > 0)
02114 {
02115 model->SV[j] = prob->x[i];
02116 model->sv_coef[0][j] = f.alpha[i];
02117 model->sv_indices[j] = i+1;
02118 ++j;
02119 }
02120
02121 free(f.alpha);
02122 }
02123 else
02124 {
02125
02126 int l = prob->l;
02127 int nr_class;
02128 int *label = NULL;
02129 int *start = NULL;
02130 int *count = NULL;
02131 int *perm = Malloc(int,l);
02132
02133
02134 svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
02135
02136
02137
02138 svm_node **x = Malloc(svm_node *,l);
02139 int i;
02140 for(i=0;i<l;i++)
02141 x[i] = prob->x[perm[i]];
02142
02143
02144
02145 double *weighted_C = Malloc(double, nr_class);
02146 for(i=0;i<nr_class;i++)
02147 weighted_C[i] = param->C;
02148 for(i=0;i<param->nr_weight;i++)
02149 {
02150 int j;
02151 for(j=0;j<nr_class;j++)
02152 if(param->weight_label[i] == label[j])
02153 break;
02154 if(j == nr_class)
02155 fprintf(stderr,"WARNING: class label %d specified in weight is not found\n", param->weight_label[i]);
02156 else
02157 weighted_C[j] *= param->weight[i];
02158 }
02159
02160
02161
02162 bool *nonzero = Malloc(bool,l);
02163 for(i=0;i<l;i++)
02164 nonzero[i] = false;
02165 decision_function *f = Malloc(decision_function,nr_class*(nr_class-1)/2);
02166
02167 double *probA=NULL,*probB=NULL;
02168 if (param->probability)
02169 {
02170 probA=Malloc(double,nr_class*(nr_class-1)/2);
02171 probB=Malloc(double,nr_class*(nr_class-1)/2);
02172 }
02173
02174 int p = 0;
02175 for(i=0;i<nr_class;i++)
02176 for(int j=i+1;j<nr_class;j++)
02177 {
02178 svm_problem sub_prob;
02179 int si = start[i], sj = start[j];
02180 int ci = count[i], cj = count[j];
02181 sub_prob.l = ci+cj;
02182 sub_prob.x = Malloc(svm_node *,sub_prob.l);
02183 sub_prob.y = Malloc(double,sub_prob.l);
02184 int k;
02185 for(k=0;k<ci;k++)
02186 {
02187 sub_prob.x[k] = x[si+k];
02188 sub_prob.y[k] = +1;
02189 }
02190 for(k=0;k<cj;k++)
02191 {
02192 sub_prob.x[ci+k] = x[sj+k];
02193 sub_prob.y[ci+k] = -1;
02194 }
02195
02196 if(param->probability)
02197 svm_binary_svc_probability(&sub_prob,param,weighted_C[i],weighted_C[j],probA[p],probB[p]);
02198
02199 f[p] = svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j]);
02200 for(k=0;k<ci;k++)
02201 if(!nonzero[si+k] && fabs(f[p].alpha[k]) > 0)
02202 nonzero[si+k] = true;
02203 for(k=0;k<cj;k++)
02204 if(!nonzero[sj+k] && fabs(f[p].alpha[ci+k]) > 0)
02205 nonzero[sj+k] = true;
02206 free(sub_prob.x);
02207 free(sub_prob.y);
02208 ++p;
02209 }
02210
02211
02212
02213 model->nr_class = nr_class;
02214
02215 model->label = Malloc(int,nr_class);
02216 for(i=0;i<nr_class;i++)
02217 model->label[i] = label[i];
02218
02219 model->rho = Malloc(double,nr_class*(nr_class-1)/2);
02220 for(i=0;i<nr_class*(nr_class-1)/2;i++)
02221 model->rho[i] = f[i].rho;
02222
02223 if(param->probability)
02224 {
02225 model->probA = Malloc(double,nr_class*(nr_class-1)/2);
02226 model->probB = Malloc(double,nr_class*(nr_class-1)/2);
02227 for(i=0;i<nr_class*(nr_class-1)/2;i++)
02228 {
02229 model->probA[i] = probA[i];
02230 model->probB[i] = probB[i];
02231 }
02232 }
02233 else
02234 {
02235 model->probA=NULL;
02236 model->probB=NULL;
02237 }
02238
02239 int total_sv = 0;
02240 int *nz_count = Malloc(int,nr_class);
02241 model->nSV = Malloc(int,nr_class);
02242 for(i=0;i<nr_class;i++)
02243 {
02244 int nSV = 0;
02245 for(int j=0;j<count[i];j++)
02246 if(nonzero[start[i]+j])
02247 {
02248 ++nSV;
02249 ++total_sv;
02250 }
02251 model->nSV[i] = nSV;
02252 nz_count[i] = nSV;
02253 }
02254
02255
02256
02257 model->l = total_sv;
02258 model->SV = Malloc(svm_node *,total_sv);
02259 model->sv_indices = Malloc(int,total_sv);
02260 p = 0;
02261 for(i=0;i<l;i++)
02262 if(nonzero[i])
02263 {
02264 model->SV[p] = x[i];
02265 model->sv_indices[p++] = perm[i] + 1;
02266 }
02267
02268 int *nz_start = Malloc(int,nr_class);
02269 nz_start[0] = 0;
02270 for(i=1;i<nr_class;i++)
02271 nz_start[i] = nz_start[i-1]+nz_count[i-1];
02272
02273 model->sv_coef = Malloc(double *,nr_class-1);
02274 for(i=0;i<nr_class-1;i++)
02275 model->sv_coef[i] = Malloc(double,total_sv);
02276
02277 p = 0;
02278 for(i=0;i<nr_class;i++)
02279 for(int j=i+1;j<nr_class;j++)
02280 {
02281
02282
02283
02284
02285 int si = start[i];
02286 int sj = start[j];
02287 int ci = count[i];
02288 int cj = count[j];
02289
02290 int q = nz_start[i];
02291 int k;
02292 for(k=0;k<ci;k++)
02293 if(nonzero[si+k])
02294 model->sv_coef[j-1][q++] = f[p].alpha[k];
02295 q = nz_start[j];
02296 for(k=0;k<cj;k++)
02297 if(nonzero[sj+k])
02298 model->sv_coef[i][q++] = f[p].alpha[ci+k];
02299 ++p;
02300 }
02301
02302 free(label);
02303 free(probA);
02304 free(probB);
02305 free(count);
02306 free(perm);
02307 free(start);
02308 free(x);
02309 free(weighted_C);
02310 free(nonzero);
02311 for(i=0;i<nr_class*(nr_class-1)/2;i++)
02312 free(f[i].alpha);
02313 free(f);
02314 free(nz_count);
02315 free(nz_start);
02316 }
02317 return model;
02318 }
02319
02320
02321 void svm_cross_validation(const svm_problem *prob, const svm_parameter *param, int nr_fold, double *target)
02322 {
02323 int i;
02324 int *fold_start = Malloc(int,nr_fold+1);
02325 int l = prob->l;
02326 int *perm = Malloc(int,l);
02327 int nr_class;
02328
02329
02330
02331 if((param->svm_type == C_SVC ||
02332 param->svm_type == NU_SVC) && nr_fold < l)
02333 {
02334 int *start = NULL;
02335 int *label = NULL;
02336 int *count = NULL;
02337 svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
02338
02339
02340 int *fold_count = Malloc(int,nr_fold);
02341 int c;
02342 int *index = Malloc(int,l);
02343 for(i=0;i<l;i++)
02344 index[i]=perm[i];
02345 for (c=0; c<nr_class; c++)
02346 for(i=0;i<count[c];i++)
02347 {
02348 int j = i+rand()%(count[c]-i);
02349 swap(index[start[c]+j],index[start[c]+i]);
02350 }
02351 for(i=0;i<nr_fold;i++)
02352 {
02353 fold_count[i] = 0;
02354 for (c=0; c<nr_class;c++)
02355 fold_count[i]+=(i+1)*count[c]/nr_fold-i*count[c]/nr_fold;
02356 }
02357 fold_start[0]=0;
02358 for (i=1;i<=nr_fold;i++)
02359 fold_start[i] = fold_start[i-1]+fold_count[i-1];
02360 for (c=0; c<nr_class;c++)
02361 for(i=0;i<nr_fold;i++)
02362 {
02363 int begin = start[c]+i*count[c]/nr_fold;
02364 int end = start[c]+(i+1)*count[c]/nr_fold;
02365 for(int j=begin;j<end;j++)
02366 {
02367 perm[fold_start[i]] = index[j];
02368 fold_start[i]++;
02369 }
02370 }
02371 fold_start[0]=0;
02372 for (i=1;i<=nr_fold;i++)
02373 fold_start[i] = fold_start[i-1]+fold_count[i-1];
02374 free(start);
02375 free(label);
02376 free(count);
02377 free(index);
02378 free(fold_count);
02379 }
02380 else
02381 {
02382 for(i=0;i<l;i++) perm[i]=i;
02383 for(i=0;i<l;i++)
02384 {
02385 int j = i+rand()%(l-i);
02386 swap(perm[i],perm[j]);
02387 }
02388 for(i=0;i<=nr_fold;i++)
02389 fold_start[i]=i*l/nr_fold;
02390 }
02391
02392 for(i=0;i<nr_fold;i++)
02393 {
02394 int begin = fold_start[i];
02395 int end = fold_start[i+1];
02396 int j,k;
02397 struct svm_problem subprob;
02398
02399 subprob.l = l-(end-begin);
02400 subprob.x = Malloc(struct svm_node*,subprob.l);
02401 subprob.y = Malloc(double,subprob.l);
02402
02403 k=0;
02404 for(j=0;j<begin;j++)
02405 {
02406 subprob.x[k] = prob->x[perm[j]];
02407 subprob.y[k] = prob->y[perm[j]];
02408 ++k;
02409 }
02410 for(j=end;j<l;j++)
02411 {
02412 subprob.x[k] = prob->x[perm[j]];
02413 subprob.y[k] = prob->y[perm[j]];
02414 ++k;
02415 }
02416 struct svm_model *submodel = svm_train(&subprob,param);
02417 if(param->probability &&
02418 (param->svm_type == C_SVC || param->svm_type == NU_SVC))
02419 {
02420 double *prob_estimates=Malloc(double,svm_get_nr_class(submodel));
02421 for(j=begin;j<end;j++)
02422 target[perm[j]] = svm_predict_probability(submodel,prob->x[perm[j]],prob_estimates);
02423 free(prob_estimates);
02424 }
02425 else
02426 for(j=begin;j<end;j++){
02427 target[perm[j]] = svm_predict(submodel,prob->x[perm[j]]);
02428 }
02429 svm_free_and_destroy_model(&submodel);
02430 free(subprob.x);
02431 free(subprob.y);
02432 }
02433 free(fold_start);
02434 free(perm);
02435 }
02436
02437
02438 int svm_get_svm_type(const svm_model *model)
02439 {
02440 return model->param.svm_type;
02441 }
02442
02443 int svm_get_nr_class(const svm_model *model)
02444 {
02445 return model->nr_class;
02446 }
02447
02448 void svm_get_labels(const svm_model *model, int* label)
02449 {
02450 if (model->label != NULL)
02451 for(int i=0;i<model->nr_class;i++)
02452 label[i] = model->label[i];
02453 }
02454
02455 void svm_get_sv_indices(const svm_model *model, int* indices)
02456 {
02457 if (model->sv_indices != NULL)
02458 for(int i=0;i<model->l;i++)
02459 indices[i] = model->sv_indices[i];
02460 }
02461
02462 int svm_get_nr_sv(const svm_model *model)
02463 {
02464 return model->l;
02465 }
02466
02467 double svm_get_svr_probability(const svm_model *model)
02468 {
02469 if ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
02470 model->probA!=NULL)
02471 return model->probA[0];
02472 else
02473 {
02474 fprintf(stderr,"Model doesn't contain information for SVR probability inference\n");
02475 return 0;
02476 }
02477 }
02478
02479 double svm_predict_values(const svm_model *model, const svm_node *x, double* dec_values)
02480 {
02481 int i;
02482 if(model->param.svm_type == ONE_CLASS ||
02483 model->param.svm_type == EPSILON_SVR ||
02484 model->param.svm_type == NU_SVR)
02485 {
02486 double *sv_coef = model->sv_coef[0];
02487 double sum = 0;
02488 for(i=0;i<model->l;i++)
02489 sum += sv_coef[i] * Kernel::k_function(x,model->SV[i],model->param);
02490 sum -= model->rho[0];
02491 *dec_values = sum;
02492
02493 if(model->param.svm_type == ONE_CLASS)
02494 return (sum>0)?1:-1;
02495 else
02496 return sum;
02497 }
02498 else
02499 {
02500 int nr_class = model->nr_class;
02501 int l = model->l;
02502
02503 double *kvalue = Malloc(double,l);
02504 for(i=0;i<l;i++)
02505 kvalue[i] = Kernel::k_function(x,model->SV[i],model->param);
02506
02507 int *start = Malloc(int,nr_class);
02508 start[0] = 0;
02509 for(i=1;i<nr_class;i++)
02510 start[i] = start[i-1]+model->nSV[i-1];
02511
02512 int *vote = Malloc(int,nr_class);
02513 for(i=0;i<nr_class;i++)
02514 vote[i] = 0;
02515
02516 int p=0;
02517 for(i=0;i<nr_class;i++)
02518 for(int j=i+1;j<nr_class;j++)
02519 {
02520 double sum = 0;
02521 int si = start[i];
02522 int sj = start[j];
02523 int ci = model->nSV[i];
02524 int cj = model->nSV[j];
02525
02526 int k;
02527 double *coef1 = model->sv_coef[j-1];
02528 double *coef2 = model->sv_coef[i];
02529 for(k=0;k<ci;k++)
02530 sum += coef1[si+k] * kvalue[si+k];
02531 for(k=0;k<cj;k++)
02532 sum += coef2[sj+k] * kvalue[sj+k];
02533 sum -= model->rho[p];
02534 dec_values[p] = sum;
02535
02536 if(dec_values[p] > 0)
02537 ++vote[i];
02538 else
02539 ++vote[j];
02540 p++;
02541 }
02542
02543 int vote_max_idx = 0;
02544 for(i=1;i<nr_class;i++)
02545 if(vote[i] > vote[vote_max_idx])
02546 vote_max_idx = i;
02547
02548 free(kvalue);
02549 free(start);
02550 free(vote);
02551 return model->label[vote_max_idx];
02552 }
02553 }
02554
02555 double svm_predict(const svm_model *model, const svm_node *x)
02556 {
02557 int nr_class = model->nr_class;
02558 double *dec_values;
02559 if(model->param.svm_type == ONE_CLASS ||
02560 model->param.svm_type == EPSILON_SVR ||
02561 model->param.svm_type == NU_SVR)
02562 dec_values = Malloc(double, 1);
02563 else
02564 dec_values = Malloc(double, nr_class*(nr_class-1)/2);
02565 double pred_result = svm_predict_values(model, x, dec_values);
02566 free(dec_values);
02567 return pred_result;
02568 }
02569
02570 double svm_predict_probability(
02571 const svm_model *model, const svm_node *x, double *prob_estimates)
02572 {
02573 if ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
02574 model->probA!=NULL && model->probB!=NULL)
02575 {
02576 int i;
02577 int nr_class = model->nr_class;
02578 double *dec_values = Malloc(double, nr_class*(nr_class-1)/2);
02579 svm_predict_values(model, x, dec_values);
02580
02581 double min_prob=1e-7;
02582 double **pairwise_prob=Malloc(double *,nr_class);
02583 for(i=0;i<nr_class;i++)
02584 pairwise_prob[i]=Malloc(double,nr_class);
02585 int k=0;
02586 for(i=0;i<nr_class;i++)
02587 for(int j=i+1;j<nr_class;j++)
02588 {
02589 pairwise_prob[i][j]=min(max(sigmoid_predict(dec_values[k],model->probA[k],model->probB[k]),min_prob),1-min_prob);
02590 pairwise_prob[j][i]=1-pairwise_prob[i][j];
02591 k++;
02592 }
02593 multiclass_probability(nr_class,pairwise_prob,prob_estimates);
02594
02595 int prob_max_idx = 0;
02596 for(i=1;i<nr_class;i++)
02597 if(prob_estimates[i] > prob_estimates[prob_max_idx])
02598 prob_max_idx = i;
02599 for(i=0;i<nr_class;i++)
02600 free(pairwise_prob[i]);
02601 free(dec_values);
02602 free(pairwise_prob);
02603 return model->label[prob_max_idx];
02604 }
02605 else
02606 return svm_predict(model, x);
02607 }
02608
02609 static const char *svm_type_table[] =
02610 {
02611 "c_svc","nu_svc","one_class","epsilon_svr","nu_svr",NULL
02612 };
02613
02614 static const char *kernel_type_table[]=
02615 {
02616 "linear","polynomial","rbf","sigmoid","precomputed",NULL
02617 };
02618
02619 int svm_save_model(const char *model_file_name, const svm_model *model)
02620 {
02621 FILE *fp = fopen(model_file_name,"w");
02622 if(fp==NULL) return -1;
02623
02624 char *old_locale = strdup(setlocale(LC_ALL, NULL));
02625 setlocale(LC_ALL, "C");
02626
02627 const svm_parameter& param = model->param;
02628
02629 fprintf(fp,"svm_type %s\n", svm_type_table[param.svm_type]);
02630 fprintf(fp,"kernel_type %s\n", kernel_type_table[param.kernel_type]);
02631
02632 if(param.kernel_type == POLY)
02633 fprintf(fp,"degree %d\n", param.degree);
02634
02635 if(param.kernel_type == POLY || param.kernel_type == RBF || param.kernel_type == SIGMOID)
02636 fprintf(fp,"gamma %g\n", param.gamma);
02637
02638 if(param.kernel_type == POLY || param.kernel_type == SIGMOID)
02639 fprintf(fp,"coef0 %g\n", param.coef0);
02640
02641 int nr_class = model->nr_class;
02642 int l = model->l;
02643 fprintf(fp, "nr_class %d\n", nr_class);
02644 fprintf(fp, "total_sv %d\n",l);
02645
02646 {
02647 fprintf(fp, "rho");
02648 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
02649 fprintf(fp," %g",model->rho[i]);
02650 fprintf(fp, "\n");
02651 }
02652
02653 if(model->label)
02654 {
02655 fprintf(fp, "label");
02656 for(int i=0;i<nr_class;i++)
02657 fprintf(fp," %d",model->label[i]);
02658 fprintf(fp, "\n");
02659 }
02660
02661 if(model->probA)
02662 {
02663 fprintf(fp, "probA");
02664 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
02665 fprintf(fp," %g",model->probA[i]);
02666 fprintf(fp, "\n");
02667 }
02668 if(model->probB)
02669 {
02670 fprintf(fp, "probB");
02671 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
02672 fprintf(fp," %g",model->probB[i]);
02673 fprintf(fp, "\n");
02674 }
02675
02676 if(model->nSV)
02677 {
02678 fprintf(fp, "nr_sv");
02679 for(int i=0;i<nr_class;i++)
02680 fprintf(fp," %d",model->nSV[i]);
02681 fprintf(fp, "\n");
02682 }
02683
02684 fprintf(fp, "SV\n");
02685 const double * const *sv_coef = model->sv_coef;
02686 const svm_node * const *SV = model->SV;
02687
02688 for(int i=0;i<l;i++)
02689 {
02690 for(int j=0;j<nr_class-1;j++)
02691 fprintf(fp, "%.16g ",sv_coef[j][i]);
02692
02693 const svm_node *p = SV[i];
02694
02695 if(param.kernel_type == PRECOMPUTED)
02696 fprintf(fp,"0:%d ",(int)(p->value));
02697 else
02698 while(p->index != -1)
02699 {
02700 fprintf(fp,"%d:%.8g ",p->index,p->value);
02701 p++;
02702 }
02703 fprintf(fp, "\n");
02704 }
02705
02706 setlocale(LC_ALL, old_locale);
02707 free(old_locale);
02708
02709 if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
02710 else return 0;
02711 }
02712
02713 static char *line = NULL;
02714 static int max_line_len;
02715
02716 static char* readline(FILE *input)
02717 {
02718 int len;
02719
02720 if(fgets(line,max_line_len,input) == NULL)
02721 return NULL;
02722
02723 while(strrchr(line,'\n') == NULL)
02724 {
02725 max_line_len *= 2;
02726 line = (char *) realloc(line,max_line_len);
02727 len = (int) strlen(line);
02728 if(fgets(line+len,max_line_len-len,input) == NULL)
02729 break;
02730 }
02731 return line;
02732 }
02733
02734 svm_model *svm_load_model(const char *model_file_name)
02735 {
02736 FILE *fp = fopen(model_file_name,"rb");
02737 if(fp==NULL) return NULL;
02738
02739 char *old_locale = strdup(setlocale(LC_ALL, NULL));
02740 setlocale(LC_ALL, "C");
02741
02742
02743
02744 svm_model *model = Malloc(svm_model,1);
02745 svm_parameter& param = model->param;
02746 model->rho = NULL;
02747 model->probA = NULL;
02748 model->probB = NULL;
02749 model->label = NULL;
02750 model->nSV = NULL;
02751
02752 char cmd[81];
02753 while(1)
02754 {
02755 fscanf(fp,"%80s",cmd);
02756
02757 if(strcmp(cmd,"svm_type")==0)
02758 {
02759 fscanf(fp,"%80s",cmd);
02760 int i;
02761 for(i=0;svm_type_table[i];i++)
02762 {
02763 if(strcmp(svm_type_table[i],cmd)==0)
02764 {
02765 param.svm_type=i;
02766 break;
02767 }
02768 }
02769 if(svm_type_table[i] == NULL)
02770 {
02771 fprintf(stderr,"unknown svm type.\n");
02772
02773 setlocale(LC_ALL, old_locale);
02774 free(old_locale);
02775 free(model->rho);
02776 free(model->label);
02777 free(model->nSV);
02778 free(model);
02779 return NULL;
02780 }
02781 }
02782 else if(strcmp(cmd,"kernel_type")==0)
02783 {
02784 fscanf(fp,"%80s",cmd);
02785 int i;
02786 for(i=0;kernel_type_table[i];i++)
02787 {
02788 if(strcmp(kernel_type_table[i],cmd)==0)
02789 {
02790 param.kernel_type=i;
02791 break;
02792 }
02793 }
02794 if(kernel_type_table[i] == NULL)
02795 {
02796 fprintf(stderr,"unknown kernel function.\n");
02797
02798 setlocale(LC_ALL, old_locale);
02799 free(old_locale);
02800 free(model->rho);
02801 free(model->label);
02802 free(model->nSV);
02803 free(model);
02804 return NULL;
02805 }
02806 }
02807 else if(strcmp(cmd,"degree")==0)
02808 fscanf(fp,"%d",¶m.degree);
02809 else if(strcmp(cmd,"gamma")==0)
02810 fscanf(fp,"%lf",¶m.gamma);
02811 else if(strcmp(cmd,"coef0")==0)
02812 fscanf(fp,"%lf",¶m.coef0);
02813 else if(strcmp(cmd,"nr_class")==0)
02814 fscanf(fp,"%d",&model->nr_class);
02815 else if(strcmp(cmd,"total_sv")==0)
02816 fscanf(fp,"%d",&model->l);
02817 else if(strcmp(cmd,"rho")==0)
02818 {
02819 int n = model->nr_class * (model->nr_class-1)/2;
02820 model->rho = Malloc(double,n);
02821 for(int i=0;i<n;i++)
02822 fscanf(fp,"%lf",&model->rho[i]);
02823 }
02824 else if(strcmp(cmd,"label")==0)
02825 {
02826 int n = model->nr_class;
02827 model->label = Malloc(int,n);
02828 for(int i=0;i<n;i++)
02829 fscanf(fp,"%d",&model->label[i]);
02830 }
02831 else if(strcmp(cmd,"probA")==0)
02832 {
02833 int n = model->nr_class * (model->nr_class-1)/2;
02834 model->probA = Malloc(double,n);
02835 for(int i=0;i<n;i++)
02836 fscanf(fp,"%lf",&model->probA[i]);
02837 }
02838 else if(strcmp(cmd,"probB")==0)
02839 {
02840 int n = model->nr_class * (model->nr_class-1)/2;
02841 model->probB = Malloc(double,n);
02842 for(int i=0;i<n;i++)
02843 fscanf(fp,"%lf",&model->probB[i]);
02844 }
02845 else if(strcmp(cmd,"nr_sv")==0)
02846 {
02847 int n = model->nr_class;
02848 model->nSV = Malloc(int,n);
02849 for(int i=0;i<n;i++)
02850 fscanf(fp,"%d",&model->nSV[i]);
02851 }
02852 else if(strcmp(cmd,"SV")==0)
02853 {
02854 while(1)
02855 {
02856 int c = getc(fp);
02857 if(c==EOF || c=='\n') break;
02858 }
02859 break;
02860 }
02861 else
02862 {
02863 fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
02864
02865 setlocale(LC_ALL, old_locale);
02866 free(old_locale);
02867 free(model->rho);
02868 free(model->label);
02869 free(model->nSV);
02870 free(model);
02871 return NULL;
02872 }
02873 }
02874
02875
02876
02877 int elements = 0;
02878 long pos = ftell(fp);
02879
02880 max_line_len = 1024;
02881 line = Malloc(char,max_line_len);
02882 char *p,*endptr,*idx,*val;
02883
02884 while(readline(fp)!=NULL)
02885 {
02886 p = strtok(line,":");
02887 while(1)
02888 {
02889 p = strtok(NULL,":");
02890 if(p == NULL)
02891 break;
02892 ++elements;
02893 }
02894 }
02895 elements += model->l;
02896
02897 fseek(fp,pos,SEEK_SET);
02898
02899 int m = model->nr_class - 1;
02900 int l = model->l;
02901 model->sv_coef = Malloc(double *,m);
02902 int i;
02903 for(i=0;i<m;i++)
02904 model->sv_coef[i] = Malloc(double,l);
02905 model->SV = Malloc(svm_node*,l);
02906 svm_node *x_space = NULL;
02907 if(l>0) x_space = Malloc(svm_node,elements);
02908
02909 int j=0;
02910 for(i=0;i<l;i++)
02911 {
02912 readline(fp);
02913 model->SV[i] = &x_space[j];
02914
02915 p = strtok(line, " \t");
02916 model->sv_coef[0][i] = strtod(p,&endptr);
02917 for(int k=1;k<m;k++)
02918 {
02919 p = strtok(NULL, " \t");
02920 model->sv_coef[k][i] = strtod(p,&endptr);
02921 }
02922
02923 while(1)
02924 {
02925 idx = strtok(NULL, ":");
02926 val = strtok(NULL, " \t");
02927
02928 if(val == NULL)
02929 break;
02930 x_space[j].index = (int) strtol(idx,&endptr,10);
02931 x_space[j].value = strtod(val,&endptr);
02932
02933 ++j;
02934 }
02935 x_space[j++].index = -1;
02936 }
02937 free(line);
02938
02939 setlocale(LC_ALL, old_locale);
02940 free(old_locale);
02941
02942 if (ferror(fp) != 0 || fclose(fp) != 0)
02943 return NULL;
02944
02945 model->free_sv = 1;
02946 return model;
02947 }
02948
02949 void svm_free_model_content(svm_model* model_ptr)
02950 {
02951 if(model_ptr->free_sv && model_ptr->l > 0 && model_ptr->SV != NULL)
02952 free((void *)(model_ptr->SV[0]));
02953 if(model_ptr->sv_coef)
02954 {
02955 for(int i=0;i<model_ptr->nr_class-1;i++)
02956 free(model_ptr->sv_coef[i]);
02957 }
02958
02959 free(model_ptr->SV);
02960 model_ptr->SV = NULL;
02961
02962 free(model_ptr->sv_coef);
02963 model_ptr->sv_coef = NULL;
02964
02965 free(model_ptr->rho);
02966 model_ptr->rho = NULL;
02967
02968 free(model_ptr->label);
02969 model_ptr->label= NULL;
02970
02971 free(model_ptr->probA);
02972 model_ptr->probA = NULL;
02973
02974 free(model_ptr->probB);
02975 model_ptr->probB= NULL;
02976
02977 free(model_ptr->nSV);
02978 model_ptr->nSV = NULL;
02979 }
02980
02981 void svm_free_and_destroy_model(svm_model** model_ptr_ptr)
02982 {
02983 if(model_ptr_ptr != NULL && *model_ptr_ptr != NULL)
02984 {
02985 svm_free_model_content(*model_ptr_ptr);
02986 free(*model_ptr_ptr);
02987 *model_ptr_ptr = NULL;
02988 }
02989 }
02990
02991 void svm_destroy_param(svm_parameter* param)
02992 {
02993 free(param->weight_label);
02994 free(param->weight);
02995 }
02996
02997 const char *svm_check_parameter(const svm_problem *prob, const svm_parameter *param)
02998 {
02999
03000
03001 int svm_type = param->svm_type;
03002 if(svm_type != C_SVC &&
03003 svm_type != NU_SVC &&
03004 svm_type != ONE_CLASS &&
03005 svm_type != EPSILON_SVR &&
03006 svm_type != NU_SVR)
03007 return "unknown svm type";
03008
03009
03010
03011 int kernel_type = param->kernel_type;
03012 if(kernel_type != LINEAR &&
03013 kernel_type != POLY &&
03014 kernel_type != RBF &&
03015 kernel_type != SIGMOID &&
03016 kernel_type != PRECOMPUTED)
03017 return "unknown kernel type";
03018
03019 if(param->gamma < 0)
03020 return "gamma < 0";
03021
03022 if(param->degree < 0)
03023 return "degree of polynomial kernel < 0";
03024
03025
03026
03027 if(param->cache_size <= 0)
03028 return "cache_size <= 0";
03029
03030 if(param->eps <= 0)
03031 return "eps <= 0";
03032
03033 if(svm_type == C_SVC ||
03034 svm_type == EPSILON_SVR ||
03035 svm_type == NU_SVR)
03036 if(param->C <= 0)
03037 return "C <= 0";
03038
03039 if(svm_type == NU_SVC ||
03040 svm_type == ONE_CLASS ||
03041 svm_type == NU_SVR)
03042 if(param->nu <= 0 || param->nu > 1)
03043 return "nu <= 0 or nu > 1";
03044
03045 if(svm_type == EPSILON_SVR)
03046 if(param->p < 0)
03047 return "p < 0";
03048
03049 if(param->shrinking != 0 &&
03050 param->shrinking != 1)
03051 return "shrinking != 0 and shrinking != 1";
03052
03053 if(param->probability != 0 &&
03054 param->probability != 1)
03055 return "probability != 0 and probability != 1";
03056
03057 if(param->probability == 1 &&
03058 svm_type == ONE_CLASS)
03059 return "one-class SVM probability output not supported yet";
03060
03061
03062
03063
03064 if(svm_type == NU_SVC)
03065 {
03066 int l = prob->l;
03067 int max_nr_class = 16;
03068 int nr_class = 0;
03069 int *label = Malloc(int,max_nr_class);
03070 int *count = Malloc(int,max_nr_class);
03071
03072 int i;
03073 for(i=0;i<l;i++)
03074 {
03075 int this_label = (int)prob->y[i];
03076 int j;
03077 for(j=0;j<nr_class;j++)
03078 if(this_label == label[j])
03079 {
03080 ++count[j];
03081 break;
03082 }
03083 if(j == nr_class)
03084 {
03085 if(nr_class == max_nr_class)
03086 {
03087 max_nr_class *= 2;
03088 label = (int *)realloc(label,max_nr_class*sizeof(int));
03089 count = (int *)realloc(count,max_nr_class*sizeof(int));
03090 }
03091 label[nr_class] = this_label;
03092 count[nr_class] = 1;
03093 ++nr_class;
03094 }
03095 }
03096
03097 for(i=0;i<nr_class;i++)
03098 {
03099 int n1 = count[i];
03100 for(int j=i+1;j<nr_class;j++)
03101 {
03102 int n2 = count[j];
03103 if(param->nu*(n1+n2)/2 > min(n1,n2))
03104 {
03105 free(label);
03106 free(count);
03107 return "specified nu is infeasible";
03108 }
03109 }
03110 }
03111 free(label);
03112 free(count);
03113 }
03114
03115 return NULL;
03116 }
03117
03118 int svm_check_probability_model(const svm_model *model)
03119 {
03120 return ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
03121 model->probA!=NULL && model->probB!=NULL) ||
03122 ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
03123 model->probA!=NULL);
03124 }
03125
03126 void svm_set_print_string_function(void (*print_func)(const char *))
03127 {
03128 if(print_func == NULL)
03129 svm_print_string = &print_string_stdout;
03130 else
03131 svm_print_string = print_func;
03132 }