00001 #include <math.h>
00002 #include <stdio.h>
00003 #include <stdlib.h>
00004 #include <ctype.h>
00005 #include <float.h>
00006 #include <string.h>
00007 #include <stdarg.h>
00008 #include "svm.h"
00009 typedef float Qfloat;
00010 typedef signed char schar;
00011 #ifndef min
00012 template <class T> inline T min(T x,T y) { return (x<y)?x:y; }
00013 #endif
00014 #ifndef max
00015 template <class T> inline T max(T x,T y) { return (x>y)?x:y; }
00016 #endif
00017 template <class T> inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
00018 template <class S, class T> inline void clone(T*& dst, S* src, int n)
00019 {
00020 dst = new T[n];
00021 memcpy((void *)dst,(void *)src,sizeof(T)*n);
00022 }
00023 inline double powi(double base, int times)
00024 {
00025 double tmp = base, ret = 1.0;
00026
00027 for(int t=times; t>0; t/=2)
00028 {
00029 if(t%2==1) ret*=tmp;
00030 tmp = tmp * tmp;
00031 }
00032 return ret;
00033 }
00034 #define INF HUGE_VAL
00035 #define TAU 1e-12
00036 #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
00037 #if 0
00038 void info(const char *fmt,...)
00039 {
00040 va_list ap;
00041 va_start(ap,fmt);
00042 vprintf(fmt,ap);
00043 va_end(ap);
00044 }
00045 void info_flush()
00046 {
00047 fflush(stdout);
00048 }
00049 #else
00050 void info(char *fmt,...) {}
00051 void info_flush() {}
00052 #endif
00053
00054
00055
00056
00057
00058
00059
00060 class Cache
00061 {
00062 public:
00063 Cache(int l,long int size);
00064 ~Cache();
00065
00066
00067
00068
00069 int get_data(const int index, Qfloat **data, int len);
00070 void swap_index(int i, int j);
00071 private:
00072 int l;
00073 long int size;
00074 struct head_t
00075 {
00076 head_t *prev, *next;
00077 Qfloat *data;
00078 int len;
00079 };
00080
00081 head_t *head;
00082 head_t lru_head;
00083 void lru_delete(head_t *h);
00084 void lru_insert(head_t *h);
00085 };
00086
00087 Cache::Cache(int l_,long int size_):l(l_),size(size_)
00088 {
00089 head = (head_t *)calloc(l,sizeof(head_t));
00090 size /= sizeof(Qfloat);
00091 size -= l * sizeof(head_t) / sizeof(Qfloat);
00092 size = max(size, 2 * (long int) l);
00093 lru_head.next = lru_head.prev = &lru_head;
00094 }
00095
00096 Cache::~Cache()
00097 {
00098 for(head_t *h = lru_head.next; h != &lru_head; h=h->next)
00099 free(h->data);
00100 free(head);
00101 }
00102
00103 void Cache::lru_delete(head_t *h)
00104 {
00105
00106 h->prev->next = h->next;
00107 h->next->prev = h->prev;
00108 }
00109
00110 void Cache::lru_insert(head_t *h)
00111 {
00112
00113 h->next = &lru_head;
00114 h->prev = lru_head.prev;
00115 h->prev->next = h;
00116 h->next->prev = h;
00117 }
00118
00119 int Cache::get_data(const int index, Qfloat **data, int len)
00120 {
00121 head_t *h = &head[index];
00122 if(h->len) lru_delete(h);
00123 int more = len - h->len;
00124
00125 if(more > 0)
00126 {
00127
00128 while(size < more)
00129 {
00130 head_t *old = lru_head.next;
00131 lru_delete(old);
00132 free(old->data);
00133 size += old->len;
00134 old->data = 0;
00135 old->len = 0;
00136 }
00137
00138
00139 h->data = (Qfloat *)realloc(h->data,sizeof(Qfloat)*len);
00140 size -= more;
00141 swap(h->len,len);
00142 }
00143
00144 lru_insert(h);
00145 *data = h->data;
00146 return len;
00147 }
00148
00149 void Cache::swap_index(int i, int j)
00150 {
00151 if(i==j) return;
00152
00153 if(head[i].len) lru_delete(&head[i]);
00154 if(head[j].len) lru_delete(&head[j]);
00155 swap(head[i].data,head[j].data);
00156 swap(head[i].len,head[j].len);
00157 if(head[i].len) lru_insert(&head[i]);
00158 if(head[j].len) lru_insert(&head[j]);
00159
00160 if(i>j) swap(i,j);
00161 for(head_t *h = lru_head.next; h!=&lru_head; h=h->next)
00162 {
00163 if(h->len > i)
00164 {
00165 if(h->len > j)
00166 swap(h->data[i],h->data[j]);
00167 else
00168 {
00169
00170 lru_delete(h);
00171 free(h->data);
00172 size += h->len;
00173 h->data = 0;
00174 h->len = 0;
00175 }
00176 }
00177 }
00178 }
00179
00180
00181
00182
00183
00184
00185
00186
00187 class QMatrix {
00188 public:
00189 virtual Qfloat *get_Q(int column, int len) const = 0;
00190 virtual Qfloat *get_QD() const = 0;
00191 virtual void swap_index(int i, int j) const = 0;
00192 virtual ~QMatrix() {}
00193 };
00194
00195 class Kernel: public QMatrix {
00196 public:
00197 Kernel(int l, svm_node * const * x, const svm_parameter& param);
00198 virtual ~Kernel();
00199
00200 static double k_function(const svm_node *x, const svm_node *y,
00201 const svm_parameter& param);
00202 virtual Qfloat *get_Q(int column, int len) const = 0;
00203 virtual Qfloat *get_QD() const = 0;
00204 virtual void swap_index(int i, int j) const
00205 {
00206 swap(x[i],x[j]);
00207 if(x_square) swap(x_square[i],x_square[j]);
00208 }
00209 protected:
00210
00211 double (Kernel::*kernel_function)(int i, int j) const;
00212
00213 private:
00214 const svm_node **x;
00215 double *x_square;
00216
00217
00218 const int kernel_type;
00219 const int degree;
00220 const double gamma;
00221 const double coef0;
00222
00223 static double dot(const svm_node *px, const svm_node *py);
00224 double kernel_linear(int i, int j) const
00225 {
00226 return dot(x[i],x[j]);
00227 }
00228 double kernel_poly(int i, int j) const
00229 {
00230 return powi(gamma*dot(x[i],x[j])+coef0,degree);
00231 }
00232 double kernel_rbf(int i, int j) const
00233 {
00234 return exp(-gamma*(x_square[i]+x_square[j]-2*dot(x[i],x[j])));
00235 }
00236 double kernel_sigmoid(int i, int j) const
00237 {
00238 return tanh(gamma*dot(x[i],x[j])+coef0);
00239 }
00240 double kernel_precomputed(int i, int j) const
00241 {
00242 return x[i][(int)(x[j][0].value)].value;
00243 }
00244 };
00245
00246 Kernel::Kernel(int l, svm_node * const * x_, const svm_parameter& param)
00247 :kernel_type(param.kernel_type), degree(param.degree),
00248 gamma(param.gamma), coef0(param.coef0)
00249 {
00250 switch(kernel_type)
00251 {
00252 case LINEAR:
00253 kernel_function = &Kernel::kernel_linear;
00254 break;
00255 case POLY:
00256 kernel_function = &Kernel::kernel_poly;
00257 break;
00258 case RBF:
00259 kernel_function = &Kernel::kernel_rbf;
00260 break;
00261 case SIGMOID:
00262 kernel_function = &Kernel::kernel_sigmoid;
00263 break;
00264 case PRECOMPUTED:
00265 kernel_function = &Kernel::kernel_precomputed;
00266 break;
00267 }
00268
00269 clone(x,x_,l);
00270
00271 if(kernel_type == RBF)
00272 {
00273 x_square = new double[l];
00274 for(int i=0;i<l;i++)
00275 x_square[i] = dot(x[i],x[i]);
00276 }
00277 else
00278 x_square = 0;
00279 }
00280
00281 Kernel::~Kernel()
00282 {
00283 delete[] x;
00284 delete[] x_square;
00285 }
00286
00287 double Kernel::dot(const svm_node *px, const svm_node *py)
00288 {
00289 double sum = 0;
00290 while(px->index != -1 && py->index != -1)
00291 {
00292 if(px->index == py->index)
00293 {
00294 sum += px->value * py->value;
00295 ++px;
00296 ++py;
00297 }
00298 else
00299 {
00300 if(px->index > py->index)
00301 ++py;
00302 else
00303 ++px;
00304 }
00305 }
00306 return sum;
00307 }
00308
00309 double Kernel::k_function(const svm_node *x, const svm_node *y,
00310 const svm_parameter& param)
00311 {
00312 switch(param.kernel_type)
00313 {
00314 case LINEAR:
00315 return dot(x,y);
00316 case POLY:
00317 return powi(param.gamma*dot(x,y)+param.coef0,param.degree);
00318 case RBF:
00319 {
00320 double sum = 0;
00321 while(x->index != -1 && y->index !=-1)
00322 {
00323 if(x->index == y->index)
00324 {
00325 double d = x->value - y->value;
00326 sum += d*d;
00327 ++x;
00328 ++y;
00329 }
00330 else
00331 {
00332 if(x->index > y->index)
00333 {
00334 sum += y->value * y->value;
00335 ++y;
00336 }
00337 else
00338 {
00339 sum += x->value * x->value;
00340 ++x;
00341 }
00342 }
00343 }
00344
00345 while(x->index != -1)
00346 {
00347 sum += x->value * x->value;
00348 ++x;
00349 }
00350
00351 while(y->index != -1)
00352 {
00353 sum += y->value * y->value;
00354 ++y;
00355 }
00356
00357 return exp(-param.gamma*sum);
00358 }
00359 case SIGMOID:
00360 return tanh(param.gamma*dot(x,y)+param.coef0);
00361 case PRECOMPUTED:
00362 return x[(int)(y->value)].value;
00363 default:
00364 return 0;
00365 }
00366 }
00367
00368
00369
00370
00371
00372
00373
00374
00375
00376
00377
00378
00379
00380
00381
00382
00383
00384
00385
00386 class Solver {
00387 public:
00388 Solver() {};
00389 virtual ~Solver() {};
00390
00391 struct SolutionInfo {
00392 double obj;
00393 double rho;
00394 double upper_bound_p;
00395 double upper_bound_n;
00396 double r;
00397 };
00398
00399 void Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
00400 double *alpha_, double Cp, double Cn, double eps,
00401 SolutionInfo* si, int shrinking);
00402 protected:
00403 int active_size;
00404 schar *y;
00405 double *G;
00406 enum { LOWER_BOUND, UPPER_BOUND, FREE };
00407 char *alpha_status;
00408 double *alpha;
00409 const QMatrix *Q;
00410 const Qfloat *QD;
00411 double eps;
00412 double Cp,Cn;
00413 double *p;
00414 int *active_set;
00415 double *G_bar;
00416 int l;
00417 bool unshrinked;
00418
00419 double get_C(int i)
00420 {
00421 return (y[i] > 0)? Cp : Cn;
00422 }
00423 void update_alpha_status(int i)
00424 {
00425 if(alpha[i] >= get_C(i))
00426 alpha_status[i] = UPPER_BOUND;
00427 else if(alpha[i] <= 0)
00428 alpha_status[i] = LOWER_BOUND;
00429 else alpha_status[i] = FREE;
00430 }
00431 bool is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; }
00432 bool is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; }
00433 bool is_free(int i) { return alpha_status[i] == FREE; }
00434 void swap_index(int i, int j);
00435 void reconstruct_gradient();
00436 virtual int select_working_set(int &i, int &j);
00437 virtual double calculate_rho();
00438 virtual void do_shrinking();
00439 private:
00440 bool be_shrunken(int i, double Gmax1, double Gmax2);
00441 };
00442
00443 void Solver::swap_index(int i, int j)
00444 {
00445 Q->swap_index(i,j);
00446 swap(y[i],y[j]);
00447 swap(G[i],G[j]);
00448 swap(alpha_status[i],alpha_status[j]);
00449 swap(alpha[i],alpha[j]);
00450 swap(p[i],p[j]);
00451 swap(active_set[i],active_set[j]);
00452 swap(G_bar[i],G_bar[j]);
00453 }
00454
00455 void Solver::reconstruct_gradient()
00456 {
00457
00458
00459 if(active_size == l) return;
00460
00461 int i;
00462 for(i=active_size;i<l;i++)
00463 G[i] = G_bar[i] + p[i];
00464
00465 for(i=0;i<active_size;i++)
00466 if(is_free(i))
00467 {
00468 const Qfloat *Q_i = Q->get_Q(i,l);
00469 double alpha_i = alpha[i];
00470 for(int j=active_size;j<l;j++)
00471 G[j] += alpha_i * Q_i[j];
00472 }
00473 }
00474
00475 void Solver::Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
00476 double *alpha_, double Cp, double Cn, double eps,
00477 SolutionInfo* si, int shrinking)
00478 {
00479 this->l = l;
00480 this->Q = &Q;
00481 QD=Q.get_QD();
00482 clone(p, p_,l);
00483 clone(y, y_,l);
00484 clone(alpha,alpha_,l);
00485 this->Cp = Cp;
00486 this->Cn = Cn;
00487 this->eps = eps;
00488 unshrinked = false;
00489
00490
00491 {
00492 alpha_status = new char[l];
00493 for(int i=0;i<l;i++)
00494 update_alpha_status(i);
00495 }
00496
00497
00498 {
00499 active_set = new int[l];
00500 for(int i=0;i<l;i++)
00501 active_set[i] = i;
00502 active_size = l;
00503 }
00504
00505
00506 {
00507 G = new double[l];
00508 G_bar = new double[l];
00509 int i;
00510 for(i=0;i<l;i++)
00511 {
00512 G[i] = p[i];
00513 G_bar[i] = 0;
00514 }
00515 for(i=0;i<l;i++)
00516 if(!is_lower_bound(i))
00517 {
00518 const Qfloat *Q_i = Q.get_Q(i,l);
00519 double alpha_i = alpha[i];
00520 int j;
00521 for(j=0;j<l;j++)
00522 G[j] += alpha_i*Q_i[j];
00523 if(is_upper_bound(i))
00524 for(j=0;j<l;j++)
00525 G_bar[j] += get_C(i) * Q_i[j];
00526 }
00527 }
00528
00529
00530
00531 int iter = 0;
00532 int counter = min(l,1000)+1;
00533
00534 while(1)
00535 {
00536
00537
00538 if(--counter == 0)
00539 {
00540 counter = min(l,1000);
00541 if(shrinking) do_shrinking();
00542 info("."); info_flush();
00543 }
00544
00545 int i,j;
00546 if(select_working_set(i,j)!=0)
00547 {
00548
00549 reconstruct_gradient();
00550
00551 active_size = l;
00552 info("*"); info_flush();
00553 if(select_working_set(i,j)!=0)
00554 break;
00555 else
00556 counter = 1;
00557 }
00558
00559 ++iter;
00560
00561
00562
00563 const Qfloat *Q_i = Q.get_Q(i,active_size);
00564 const Qfloat *Q_j = Q.get_Q(j,active_size);
00565
00566 double C_i = get_C(i);
00567 double C_j = get_C(j);
00568
00569 double old_alpha_i = alpha[i];
00570 double old_alpha_j = alpha[j];
00571
00572 if(y[i]!=y[j])
00573 {
00574 double quad_coef = Q_i[i]+Q_j[j]+2*Q_i[j];
00575 if (quad_coef <= 0)
00576 quad_coef = TAU;
00577 double delta = (-G[i]-G[j])/quad_coef;
00578 double diff = alpha[i] - alpha[j];
00579 alpha[i] += delta;
00580 alpha[j] += delta;
00581
00582 if(diff > 0)
00583 {
00584 if(alpha[j] < 0)
00585 {
00586 alpha[j] = 0;
00587 alpha[i] = diff;
00588 }
00589 }
00590 else
00591 {
00592 if(alpha[i] < 0)
00593 {
00594 alpha[i] = 0;
00595 alpha[j] = -diff;
00596 }
00597 }
00598 if(diff > C_i - C_j)
00599 {
00600 if(alpha[i] > C_i)
00601 {
00602 alpha[i] = C_i;
00603 alpha[j] = C_i - diff;
00604 }
00605 }
00606 else
00607 {
00608 if(alpha[j] > C_j)
00609 {
00610 alpha[j] = C_j;
00611 alpha[i] = C_j + diff;
00612 }
00613 }
00614 }
00615 else
00616 {
00617 double quad_coef = Q_i[i]+Q_j[j]-2*Q_i[j];
00618 if (quad_coef <= 0)
00619 quad_coef = TAU;
00620 double delta = (G[i]-G[j])/quad_coef;
00621 double sum = alpha[i] + alpha[j];
00622 alpha[i] -= delta;
00623 alpha[j] += delta;
00624
00625 if(sum > C_i)
00626 {
00627 if(alpha[i] > C_i)
00628 {
00629 alpha[i] = C_i;
00630 alpha[j] = sum - C_i;
00631 }
00632 }
00633 else
00634 {
00635 if(alpha[j] < 0)
00636 {
00637 alpha[j] = 0;
00638 alpha[i] = sum;
00639 }
00640 }
00641 if(sum > C_j)
00642 {
00643 if(alpha[j] > C_j)
00644 {
00645 alpha[j] = C_j;
00646 alpha[i] = sum - C_j;
00647 }
00648 }
00649 else
00650 {
00651 if(alpha[i] < 0)
00652 {
00653 alpha[i] = 0;
00654 alpha[j] = sum;
00655 }
00656 }
00657 }
00658
00659
00660
00661 double delta_alpha_i = alpha[i] - old_alpha_i;
00662 double delta_alpha_j = alpha[j] - old_alpha_j;
00663
00664 for(int k=0;k<active_size;k++)
00665 {
00666 G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j;
00667 }
00668
00669
00670
00671 {
00672 bool ui = is_upper_bound(i);
00673 bool uj = is_upper_bound(j);
00674 update_alpha_status(i);
00675 update_alpha_status(j);
00676 int k;
00677 if(ui != is_upper_bound(i))
00678 {
00679 Q_i = Q.get_Q(i,l);
00680 if(ui)
00681 for(k=0;k<l;k++)
00682 G_bar[k] -= C_i * Q_i[k];
00683 else
00684 for(k=0;k<l;k++)
00685 G_bar[k] += C_i * Q_i[k];
00686 }
00687
00688 if(uj != is_upper_bound(j))
00689 {
00690 Q_j = Q.get_Q(j,l);
00691 if(uj)
00692 for(k=0;k<l;k++)
00693 G_bar[k] -= C_j * Q_j[k];
00694 else
00695 for(k=0;k<l;k++)
00696 G_bar[k] += C_j * Q_j[k];
00697 }
00698 }
00699 }
00700
00701
00702
00703 si->rho = calculate_rho();
00704
00705
00706 {
00707 double v = 0;
00708 int i;
00709 for(i=0;i<l;i++)
00710 v += alpha[i] * (G[i] + p[i]);
00711
00712 si->obj = v/2;
00713 }
00714
00715
00716 {
00717 for(int i=0;i<l;i++)
00718 alpha_[active_set[i]] = alpha[i];
00719 }
00720
00721
00722
00723
00724
00725
00726
00727
00728
00729 si->upper_bound_p = Cp;
00730 si->upper_bound_n = Cn;
00731
00732 info("\noptimization finished, #iter = %d\n",iter);
00733
00734 delete[] p;
00735 delete[] y;
00736 delete[] alpha;
00737 delete[] alpha_status;
00738 delete[] active_set;
00739 delete[] G;
00740 delete[] G_bar;
00741 }
00742
00743
00744 int Solver::select_working_set(int &out_i, int &out_j)
00745 {
00746
00747
00748
00749
00750
00751
00752 double Gmax = -INF;
00753 double Gmax2 = -INF;
00754 int Gmax_idx = -1;
00755 int Gmin_idx = -1;
00756 double obj_diff_min = INF;
00757
00758 for(int t=0;t<active_size;t++)
00759 if(y[t]==+1)
00760 {
00761 if(!is_upper_bound(t))
00762 if(-G[t] >= Gmax)
00763 {
00764 Gmax = -G[t];
00765 Gmax_idx = t;
00766 }
00767 }
00768 else
00769 {
00770 if(!is_lower_bound(t))
00771 if(G[t] >= Gmax)
00772 {
00773 Gmax = G[t];
00774 Gmax_idx = t;
00775 }
00776 }
00777
00778 int i = Gmax_idx;
00779 const Qfloat *Q_i = NULL;
00780 if(i != -1)
00781 Q_i = Q->get_Q(i,active_size);
00782
00783 for(int j=0;j<active_size;j++)
00784 {
00785 if(y[j]==+1)
00786 {
00787 if (!is_lower_bound(j))
00788 {
00789 double grad_diff=Gmax+G[j];
00790 if (G[j] >= Gmax2)
00791 Gmax2 = G[j];
00792 if (grad_diff > 0)
00793 {
00794 double obj_diff;
00795 double quad_coef=Q_i[i]+QD[j]-2*y[i]*Q_i[j];
00796 if (quad_coef > 0)
00797 obj_diff = -(grad_diff*grad_diff)/quad_coef;
00798 else
00799 obj_diff = -(grad_diff*grad_diff)/TAU;
00800
00801 if (obj_diff <= obj_diff_min)
00802 {
00803 Gmin_idx=j;
00804 obj_diff_min = obj_diff;
00805 }
00806 }
00807 }
00808 }
00809 else
00810 {
00811 if (!is_upper_bound(j))
00812 {
00813 double grad_diff= Gmax-G[j];
00814 if (-G[j] >= Gmax2)
00815 Gmax2 = -G[j];
00816 if (grad_diff > 0)
00817 {
00818 double obj_diff;
00819 double quad_coef=Q_i[i]+QD[j]+2*y[i]*Q_i[j];
00820 if (quad_coef > 0)
00821 obj_diff = -(grad_diff*grad_diff)/quad_coef;
00822 else
00823 obj_diff = -(grad_diff*grad_diff)/TAU;
00824
00825 if (obj_diff <= obj_diff_min)
00826 {
00827 Gmin_idx=j;
00828 obj_diff_min = obj_diff;
00829 }
00830 }
00831 }
00832 }
00833 }
00834
00835 if(Gmax+Gmax2 < eps)
00836 return 1;
00837
00838 out_i = Gmax_idx;
00839 out_j = Gmin_idx;
00840 return 0;
00841 }
00842
00843 bool Solver::be_shrunken(int i, double Gmax1, double Gmax2)
00844 {
00845 if(is_upper_bound(i))
00846 {
00847 if(y[i]==+1)
00848 return(-G[i] > Gmax1);
00849 else
00850 return(-G[i] > Gmax2);
00851 }
00852 else if(is_lower_bound(i))
00853 {
00854 if(y[i]==+1)
00855 return(G[i] > Gmax2);
00856 else
00857 return(G[i] > Gmax1);
00858 }
00859 else
00860 return(false);
00861 }
00862
00863 void Solver::do_shrinking()
00864 {
00865 int i;
00866 double Gmax1 = -INF;
00867 double Gmax2 = -INF;
00868
00869
00870 for(i=0;i<active_size;i++)
00871 {
00872 if(y[i]==+1)
00873 {
00874 if(!is_upper_bound(i))
00875 {
00876 if(-G[i] >= Gmax1)
00877 Gmax1 = -G[i];
00878 }
00879 if(!is_lower_bound(i))
00880 {
00881 if(G[i] >= Gmax2)
00882 Gmax2 = G[i];
00883 }
00884 }
00885 else
00886 {
00887 if(!is_upper_bound(i))
00888 {
00889 if(-G[i] >= Gmax2)
00890 Gmax2 = -G[i];
00891 }
00892 if(!is_lower_bound(i))
00893 {
00894 if(G[i] >= Gmax1)
00895 Gmax1 = G[i];
00896 }
00897 }
00898 }
00899
00900
00901
00902 for(i=0;i<active_size;i++)
00903 if (be_shrunken(i, Gmax1, Gmax2))
00904 {
00905 active_size--;
00906 while (active_size > i)
00907 {
00908 if (!be_shrunken(active_size, Gmax1, Gmax2))
00909 {
00910 swap_index(i,active_size);
00911 break;
00912 }
00913 active_size--;
00914 }
00915 }
00916
00917
00918
00919 if(unshrinked || Gmax1 + Gmax2 > eps*10) return;
00920
00921 unshrinked = true;
00922 reconstruct_gradient();
00923
00924 for(i=l-1;i>=active_size;i--)
00925 if (!be_shrunken(i, Gmax1, Gmax2))
00926 {
00927 while (active_size < i)
00928 {
00929 if (be_shrunken(active_size, Gmax1, Gmax2))
00930 {
00931 swap_index(i,active_size);
00932 break;
00933 }
00934 active_size++;
00935 }
00936 active_size++;
00937 }
00938 }
00939
00940 double Solver::calculate_rho()
00941 {
00942 double r;
00943 int nr_free = 0;
00944 double ub = INF, lb = -INF, sum_free = 0;
00945 for(int i=0;i<active_size;i++)
00946 {
00947 double yG = y[i]*G[i];
00948
00949 if(is_upper_bound(i))
00950 {
00951 if(y[i]==-1)
00952 ub = min(ub,yG);
00953 else
00954 lb = max(lb,yG);
00955 }
00956 else if(is_lower_bound(i))
00957 {
00958 if(y[i]==+1)
00959 ub = min(ub,yG);
00960 else
00961 lb = max(lb,yG);
00962 }
00963 else
00964 {
00965 ++nr_free;
00966 sum_free += yG;
00967 }
00968 }
00969
00970 if(nr_free>0)
00971 r = sum_free/nr_free;
00972 else
00973 r = (ub+lb)/2;
00974
00975 return r;
00976 }
00977
00978
00979
00980
00981
00982
00983 class Solver_NU : public Solver
00984 {
00985 public:
00986 Solver_NU() {}
00987 void Solve(int l, const QMatrix& Q, const double *p, const schar *y,
00988 double *alpha, double Cp, double Cn, double eps,
00989 SolutionInfo* si, int shrinking)
00990 {
00991 this->si = si;
00992 Solver::Solve(l,Q,p,y,alpha,Cp,Cn,eps,si,shrinking);
00993 }
00994 private:
00995 SolutionInfo *si;
00996 int select_working_set(int &i, int &j);
00997 double calculate_rho();
00998 bool be_shrunken(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4);
00999 void do_shrinking();
01000 };
01001
01002
01003 int Solver_NU::select_working_set(int &out_i, int &out_j)
01004 {
01005
01006
01007
01008
01009
01010
01011 double Gmaxp = -INF;
01012 double Gmaxp2 = -INF;
01013 int Gmaxp_idx = -1;
01014
01015 double Gmaxn = -INF;
01016 double Gmaxn2 = -INF;
01017 int Gmaxn_idx = -1;
01018
01019 int Gmin_idx = -1;
01020 double obj_diff_min = INF;
01021
01022 for(int t=0;t<active_size;t++)
01023 if(y[t]==+1)
01024 {
01025 if(!is_upper_bound(t))
01026 if(-G[t] >= Gmaxp)
01027 {
01028 Gmaxp = -G[t];
01029 Gmaxp_idx = t;
01030 }
01031 }
01032 else
01033 {
01034 if(!is_lower_bound(t))
01035 if(G[t] >= Gmaxn)
01036 {
01037 Gmaxn = G[t];
01038 Gmaxn_idx = t;
01039 }
01040 }
01041
01042 int ip = Gmaxp_idx;
01043 int in = Gmaxn_idx;
01044 const Qfloat *Q_ip = NULL;
01045 const Qfloat *Q_in = NULL;
01046 if(ip != -1)
01047 Q_ip = Q->get_Q(ip,active_size);
01048 if(in != -1)
01049 Q_in = Q->get_Q(in,active_size);
01050
01051 for(int j=0;j<active_size;j++)
01052 {
01053 if(y[j]==+1)
01054 {
01055 if (!is_lower_bound(j))
01056 {
01057 double grad_diff=Gmaxp+G[j];
01058 if (G[j] >= Gmaxp2)
01059 Gmaxp2 = G[j];
01060 if (grad_diff > 0)
01061 {
01062 double obj_diff;
01063 double quad_coef = Q_ip[ip]+QD[j]-2*Q_ip[j];
01064 if (quad_coef > 0)
01065 obj_diff = -(grad_diff*grad_diff)/quad_coef;
01066 else
01067 obj_diff = -(grad_diff*grad_diff)/TAU;
01068
01069 if (obj_diff <= obj_diff_min)
01070 {
01071 Gmin_idx=j;
01072 obj_diff_min = obj_diff;
01073 }
01074 }
01075 }
01076 }
01077 else
01078 {
01079 if (!is_upper_bound(j))
01080 {
01081 double grad_diff=Gmaxn-G[j];
01082 if (-G[j] >= Gmaxn2)
01083 Gmaxn2 = -G[j];
01084 if (grad_diff > 0)
01085 {
01086 double obj_diff;
01087 double quad_coef = Q_in[in]+QD[j]-2*Q_in[j];
01088 if (quad_coef > 0)
01089 obj_diff = -(grad_diff*grad_diff)/quad_coef;
01090 else
01091 obj_diff = -(grad_diff*grad_diff)/TAU;
01092
01093 if (obj_diff <= obj_diff_min)
01094 {
01095 Gmin_idx=j;
01096 obj_diff_min = obj_diff;
01097 }
01098 }
01099 }
01100 }
01101 }
01102
01103 if(max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2) < eps)
01104 return 1;
01105
01106 if (y[Gmin_idx] == +1)
01107 out_i = Gmaxp_idx;
01108 else
01109 out_i = Gmaxn_idx;
01110 out_j = Gmin_idx;
01111
01112 return 0;
01113 }
01114
01115 bool Solver_NU::be_shrunken(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4)
01116 {
01117 if(is_upper_bound(i))
01118 {
01119 if(y[i]==+1)
01120 return(-G[i] > Gmax1);
01121 else
01122 return(-G[i] > Gmax4);
01123 }
01124 else if(is_lower_bound(i))
01125 {
01126 if(y[i]==+1)
01127 return(G[i] > Gmax2);
01128 else
01129 return(G[i] > Gmax3);
01130 }
01131 else
01132 return(false);
01133 }
01134
01135 void Solver_NU::do_shrinking()
01136 {
01137 double Gmax1 = -INF;
01138 double Gmax2 = -INF;
01139 double Gmax3 = -INF;
01140 double Gmax4 = -INF;
01141
01142
01143 int i;
01144 for(i=0;i<active_size;i++)
01145 {
01146 if(!is_upper_bound(i))
01147 {
01148 if(y[i]==+1)
01149 {
01150 if(-G[i] > Gmax1) Gmax1 = -G[i];
01151 }
01152 else if(-G[i] > Gmax4) Gmax4 = -G[i];
01153 }
01154 if(!is_lower_bound(i))
01155 {
01156 if(y[i]==+1)
01157 {
01158 if(G[i] > Gmax2) Gmax2 = G[i];
01159 }
01160 else if(G[i] > Gmax3) Gmax3 = G[i];
01161 }
01162 }
01163
01164
01165
01166 for(i=0;i<active_size;i++)
01167 if (be_shrunken(i, Gmax1, Gmax2, Gmax3, Gmax4))
01168 {
01169 active_size--;
01170 while (active_size > i)
01171 {
01172 if (!be_shrunken(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
01173 {
01174 swap_index(i,active_size);
01175 break;
01176 }
01177 active_size--;
01178 }
01179 }
01180
01181
01182
01183 if(unshrinked || max(Gmax1+Gmax2,Gmax3+Gmax4) > eps*10) return;
01184
01185 unshrinked = true;
01186 reconstruct_gradient();
01187
01188 for(i=l-1;i>=active_size;i--)
01189 if (!be_shrunken(i, Gmax1, Gmax2, Gmax3, Gmax4))
01190 {
01191 while (active_size < i)
01192 {
01193 if (be_shrunken(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
01194 {
01195 swap_index(i,active_size);
01196 break;
01197 }
01198 active_size++;
01199 }
01200 active_size++;
01201 }
01202 }
01203
01204 double Solver_NU::calculate_rho()
01205 {
01206 int nr_free1 = 0,nr_free2 = 0;
01207 double ub1 = INF, ub2 = INF;
01208 double lb1 = -INF, lb2 = -INF;
01209 double sum_free1 = 0, sum_free2 = 0;
01210
01211 for(int i=0;i<active_size;i++)
01212 {
01213 if(y[i]==+1)
01214 {
01215 if(is_upper_bound(i))
01216 lb1 = max(lb1,G[i]);
01217 else if(is_lower_bound(i))
01218 ub1 = min(ub1,G[i]);
01219 else
01220 {
01221 ++nr_free1;
01222 sum_free1 += G[i];
01223 }
01224 }
01225 else
01226 {
01227 if(is_upper_bound(i))
01228 lb2 = max(lb2,G[i]);
01229 else if(is_lower_bound(i))
01230 ub2 = min(ub2,G[i]);
01231 else
01232 {
01233 ++nr_free2;
01234 sum_free2 += G[i];
01235 }
01236 }
01237 }
01238
01239 double r1,r2;
01240 if(nr_free1 > 0)
01241 r1 = sum_free1/nr_free1;
01242 else
01243 r1 = (ub1+lb1)/2;
01244
01245 if(nr_free2 > 0)
01246 r2 = sum_free2/nr_free2;
01247 else
01248 r2 = (ub2+lb2)/2;
01249
01250 si->r = (r1+r2)/2;
01251 return (r1-r2)/2;
01252 }
01253
01254
01255
01256
01257 class SVC_Q: public Kernel
01258 {
01259 public:
01260 SVC_Q(const svm_problem& prob, const svm_parameter& param, const schar *y_)
01261 :Kernel(prob.l, prob.x, param)
01262 {
01263 clone(y,y_,prob.l);
01264 cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
01265 QD = new Qfloat[prob.l];
01266 for(int i=0;i<prob.l;i++)
01267 QD[i]= (Qfloat)(this->*kernel_function)(i,i);
01268 }
01269
01270 Qfloat *get_Q(int i, int len) const
01271 {
01272 Qfloat *data;
01273 int start;
01274 if((start = cache->get_data(i,&data,len)) < len)
01275 {
01276 for(int j=start;j<len;j++)
01277 data[j] = (Qfloat)(y[i]*y[j]*(this->*kernel_function)(i,j));
01278 }
01279 return data;
01280 }
01281
01282 Qfloat *get_QD() const
01283 {
01284 return QD;
01285 }
01286
01287 void swap_index(int i, int j) const
01288 {
01289 cache->swap_index(i,j);
01290 Kernel::swap_index(i,j);
01291 swap(y[i],y[j]);
01292 swap(QD[i],QD[j]);
01293 }
01294
01295 ~SVC_Q()
01296 {
01297 delete[] y;
01298 delete cache;
01299 delete[] QD;
01300 }
01301 private:
01302 schar *y;
01303 Cache *cache;
01304 Qfloat *QD;
01305 };
01306
01307 class ONE_CLASS_Q: public Kernel
01308 {
01309 public:
01310 ONE_CLASS_Q(const svm_problem& prob, const svm_parameter& param)
01311 :Kernel(prob.l, prob.x, param)
01312 {
01313 cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
01314 QD = new Qfloat[prob.l];
01315 for(int i=0;i<prob.l;i++)
01316 QD[i]= (Qfloat)(this->*kernel_function)(i,i);
01317 }
01318
01319 Qfloat *get_Q(int i, int len) const
01320 {
01321 Qfloat *data;
01322 int start;
01323 if((start = cache->get_data(i,&data,len)) < len)
01324 {
01325 for(int j=start;j<len;j++)
01326 data[j] = (Qfloat)(this->*kernel_function)(i,j);
01327 }
01328 return data;
01329 }
01330
01331 Qfloat *get_QD() const
01332 {
01333 return QD;
01334 }
01335
01336 void swap_index(int i, int j) const
01337 {
01338 cache->swap_index(i,j);
01339 Kernel::swap_index(i,j);
01340 swap(QD[i],QD[j]);
01341 }
01342
01343 ~ONE_CLASS_Q()
01344 {
01345 delete cache;
01346 delete[] QD;
01347 }
01348 private:
01349 Cache *cache;
01350 Qfloat *QD;
01351 };
01352
01353 class SVR_Q: public Kernel
01354 {
01355 public:
01356 SVR_Q(const svm_problem& prob, const svm_parameter& param)
01357 :Kernel(prob.l, prob.x, param)
01358 {
01359 l = prob.l;
01360 cache = new Cache(l,(long int)(param.cache_size*(1<<20)));
01361 QD = new Qfloat[2*l];
01362 sign = new schar[2*l];
01363 index = new int[2*l];
01364 for(int k=0;k<l;k++)
01365 {
01366 sign[k] = 1;
01367 sign[k+l] = -1;
01368 index[k] = k;
01369 index[k+l] = k;
01370 QD[k]= (Qfloat)(this->*kernel_function)(k,k);
01371 QD[k+l]=QD[k];
01372 }
01373 buffer[0] = new Qfloat[2*l];
01374 buffer[1] = new Qfloat[2*l];
01375 next_buffer = 0;
01376 }
01377
01378 void swap_index(int i, int j) const
01379 {
01380 swap(sign[i],sign[j]);
01381 swap(index[i],index[j]);
01382 swap(QD[i],QD[j]);
01383 }
01384
01385 Qfloat *get_Q(int i, int len) const
01386 {
01387 Qfloat *data;
01388 int real_i = index[i];
01389 if(cache->get_data(real_i,&data,l) < l)
01390 {
01391 for(int j=0;j<l;j++)
01392 data[j] = (Qfloat)(this->*kernel_function)(real_i,j);
01393 }
01394
01395
01396 Qfloat *buf = buffer[next_buffer];
01397 next_buffer = 1 - next_buffer;
01398 schar si = sign[i];
01399 for(int j=0;j<len;j++)
01400 buf[j] = si * sign[j] * data[index[j]];
01401 return buf;
01402 }
01403
01404 Qfloat *get_QD() const
01405 {
01406 return QD;
01407 }
01408
01409 ~SVR_Q()
01410 {
01411 delete cache;
01412 delete[] sign;
01413 delete[] index;
01414 delete[] buffer[0];
01415 delete[] buffer[1];
01416 delete[] QD;
01417 }
01418 private:
01419 int l;
01420 Cache *cache;
01421 schar *sign;
01422 int *index;
01423 mutable int next_buffer;
01424 Qfloat *buffer[2];
01425 Qfloat *QD;
01426 };
01427
01428
01429
01430
01431 static void solve_c_svc(
01432 const svm_problem *prob, const svm_parameter* param,
01433 double *alpha, Solver::SolutionInfo* si, double Cp, double Cn)
01434 {
01435 int l = prob->l;
01436 double *minus_ones = new double[l];
01437 schar *y = new schar[l];
01438
01439 int i;
01440
01441 for(i=0;i<l;i++)
01442 {
01443 alpha[i] = 0;
01444 minus_ones[i] = -1;
01445 if(prob->y[i] > 0) y[i] = +1; else y[i]=-1;
01446 }
01447
01448 Solver s;
01449 s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y,
01450 alpha, Cp, Cn, param->eps, si, param->shrinking);
01451
01452 double sum_alpha=0;
01453 for(i=0;i<l;i++)
01454 sum_alpha += alpha[i];
01455
01456 if (Cp==Cn)
01457 info("nu = %f\n", sum_alpha/(Cp*prob->l));
01458
01459 for(i=0;i<l;i++)
01460 alpha[i] *= y[i];
01461
01462 delete[] minus_ones;
01463 delete[] y;
01464 }
01465
01466 static void solve_nu_svc(
01467 const svm_problem *prob, const svm_parameter *param,
01468 double *alpha, Solver::SolutionInfo* si)
01469 {
01470 int i;
01471 int l = prob->l;
01472 double nu = param->nu;
01473
01474 schar *y = new schar[l];
01475
01476 for(i=0;i<l;i++)
01477 if(prob->y[i]>0)
01478 y[i] = +1;
01479 else
01480 y[i] = -1;
01481
01482 double sum_pos = nu*l/2;
01483 double sum_neg = nu*l/2;
01484
01485 for(i=0;i<l;i++)
01486 if(y[i] == +1)
01487 {
01488 alpha[i] = min(1.0,sum_pos);
01489 sum_pos -= alpha[i];
01490 }
01491 else
01492 {
01493 alpha[i] = min(1.0,sum_neg);
01494 sum_neg -= alpha[i];
01495 }
01496
01497 double *zeros = new double[l];
01498
01499 for(i=0;i<l;i++)
01500 zeros[i] = 0;
01501
01502 Solver_NU s;
01503 s.Solve(l, SVC_Q(*prob,*param,y), zeros, y,
01504 alpha, 1.0, 1.0, param->eps, si, param->shrinking);
01505 double r = si->r;
01506
01507 info("C = %f\n",1/r);
01508
01509 for(i=0;i<l;i++)
01510 alpha[i] *= y[i]/r;
01511
01512 si->rho /= r;
01513 si->obj /= (r*r);
01514 si->upper_bound_p = 1/r;
01515 si->upper_bound_n = 1/r;
01516
01517 delete[] y;
01518 delete[] zeros;
01519 }
01520
01521 static void solve_one_class(
01522 const svm_problem *prob, const svm_parameter *param,
01523 double *alpha, Solver::SolutionInfo* si)
01524 {
01525 int l = prob->l;
01526 double *zeros = new double[l];
01527 schar *ones = new schar[l];
01528 int i;
01529
01530 int n = (int)(param->nu*prob->l);
01531
01532 for(i=0;i<n;i++)
01533 alpha[i] = 1;
01534 if(n<prob->l)
01535 alpha[n] = param->nu * prob->l - n;
01536 for(i=n+1;i<l;i++)
01537 alpha[i] = 0;
01538
01539 for(i=0;i<l;i++)
01540 {
01541 zeros[i] = 0;
01542 ones[i] = 1;
01543 }
01544
01545 Solver s;
01546 s.Solve(l, ONE_CLASS_Q(*prob,*param), zeros, ones,
01547 alpha, 1.0, 1.0, param->eps, si, param->shrinking);
01548
01549 delete[] zeros;
01550 delete[] ones;
01551 }
01552
01553 static void solve_epsilon_svr(
01554 const svm_problem *prob, const svm_parameter *param,
01555 double *alpha, Solver::SolutionInfo* si)
01556 {
01557 int l = prob->l;
01558 double *alpha2 = new double[2*l];
01559 double *linear_term = new double[2*l];
01560 schar *y = new schar[2*l];
01561 int i;
01562
01563 for(i=0;i<l;i++)
01564 {
01565 alpha2[i] = 0;
01566 linear_term[i] = param->p - prob->y[i];
01567 y[i] = 1;
01568
01569 alpha2[i+l] = 0;
01570 linear_term[i+l] = param->p + prob->y[i];
01571 y[i+l] = -1;
01572 }
01573
01574 Solver s;
01575 s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
01576 alpha2, param->C, param->C, param->eps, si, param->shrinking);
01577
01578 double sum_alpha = 0;
01579 for(i=0;i<l;i++)
01580 {
01581 alpha[i] = alpha2[i] - alpha2[i+l];
01582 sum_alpha += fabs(alpha[i]);
01583 }
01584 info("nu = %f\n",sum_alpha/(param->C*l));
01585
01586 delete[] alpha2;
01587 delete[] linear_term;
01588 delete[] y;
01589 }
01590
01591 static void solve_nu_svr(
01592 const svm_problem *prob, const svm_parameter *param,
01593 double *alpha, Solver::SolutionInfo* si)
01594 {
01595 int l = prob->l;
01596 double C = param->C;
01597 double *alpha2 = new double[2*l];
01598 double *linear_term = new double[2*l];
01599 schar *y = new schar[2*l];
01600 int i;
01601
01602 double sum = C * param->nu * l / 2;
01603 for(i=0;i<l;i++)
01604 {
01605 alpha2[i] = alpha2[i+l] = min(sum,C);
01606 sum -= alpha2[i];
01607
01608 linear_term[i] = - prob->y[i];
01609 y[i] = 1;
01610
01611 linear_term[i+l] = prob->y[i];
01612 y[i+l] = -1;
01613 }
01614
01615 Solver_NU s;
01616 s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
01617 alpha2, C, C, param->eps, si, param->shrinking);
01618
01619 info("epsilon = %f\n",-si->r);
01620
01621 for(i=0;i<l;i++)
01622 alpha[i] = alpha2[i] - alpha2[i+l];
01623
01624 delete[] alpha2;
01625 delete[] linear_term;
01626 delete[] y;
01627 }
01628
01629
01630
01631
01632 struct decision_function
01633 {
01634 double *alpha;
01635 double rho;
01636 };
01637
01638 decision_function svm_train_one(
01639 const svm_problem *prob, const svm_parameter *param,
01640 double Cp, double Cn)
01641 {
01642 double *alpha = Malloc(double,prob->l);
01643 Solver::SolutionInfo si;
01644 switch(param->svm_type)
01645 {
01646 case C_SVC:
01647 solve_c_svc(prob,param,alpha,&si,Cp,Cn);
01648 break;
01649 case NU_SVC:
01650 solve_nu_svc(prob,param,alpha,&si);
01651 break;
01652 case ONE_CLASS:
01653 solve_one_class(prob,param,alpha,&si);
01654 break;
01655 case EPSILON_SVR:
01656 solve_epsilon_svr(prob,param,alpha,&si);
01657 break;
01658 case NU_SVR:
01659 solve_nu_svr(prob,param,alpha,&si);
01660 break;
01661 }
01662
01663 info("obj = %f, rho = %f\n",si.obj,si.rho);
01664
01665
01666
01667 int nSV = 0;
01668 int nBSV = 0;
01669 for(int i=0;i<prob->l;i++)
01670 {
01671 if(fabs(alpha[i]) > 0)
01672 {
01673 ++nSV;
01674 if(prob->y[i] > 0)
01675 {
01676 if(fabs(alpha[i]) >= si.upper_bound_p)
01677 ++nBSV;
01678 }
01679 else
01680 {
01681 if(fabs(alpha[i]) >= si.upper_bound_n)
01682 ++nBSV;
01683 }
01684 }
01685 }
01686
01687 info("nSV = %d, nBSV = %d\n",nSV,nBSV);
01688
01689 decision_function f;
01690 f.alpha = alpha;
01691 f.rho = si.rho;
01692 return f;
01693 }
01694
01695
01696
01697
01698 struct svm_model
01699 {
01700 svm_parameter param;
01701 int nr_class;
01702 int l;
01703 svm_node **SV;
01704 double **sv_coef;
01705 double *rho;
01706 double *probA;
01707 double *probB;
01708
01709
01710
01711 int *label;
01712 int *nSV;
01713
01714
01715 int free_sv;
01716
01717 };
01718
01719
01720 void sigmoid_train(
01721 int l, const double *dec_values, const double *labels,
01722 double& A, double& B)
01723 {
01724 double prior1=0, prior0 = 0;
01725 int i;
01726
01727 for (i=0;i<l;i++)
01728 if (labels[i] > 0) prior1+=1;
01729 else prior0+=1;
01730
01731 int max_iter=100;
01732 double min_step=1e-10;
01733 double sigma=1e-12;
01734 double eps=1e-5;
01735 double hiTarget=(prior1+1.0)/(prior1+2.0);
01736 double loTarget=1/(prior0+2.0);
01737 double *t=Malloc(double,l);
01738 double fApB,p,q,h11,h22,h21,g1,g2,det,dA,dB,gd,stepsize;
01739 double newA,newB,newf,d1,d2;
01740 int iter;
01741
01742
01743 A=0.0; B=log((prior0+1.0)/(prior1+1.0));
01744 double fval = 0.0;
01745
01746 for (i=0;i<l;i++)
01747 {
01748 if (labels[i]>0) t[i]=hiTarget;
01749 else t[i]=loTarget;
01750 fApB = dec_values[i]*A+B;
01751 if (fApB>=0)
01752 fval += t[i]*fApB + log(1+exp(-fApB));
01753 else
01754 fval += (t[i] - 1)*fApB +log(1+exp(fApB));
01755 }
01756 for (iter=0;iter<max_iter;iter++)
01757 {
01758
01759 h11=sigma;
01760 h22=sigma;
01761 h21=0.0;g1=0.0;g2=0.0;
01762 for (i=0;i<l;i++)
01763 {
01764 fApB = dec_values[i]*A+B;
01765 if (fApB >= 0)
01766 {
01767 p=exp(-fApB)/(1.0+exp(-fApB));
01768 q=1.0/(1.0+exp(-fApB));
01769 }
01770 else
01771 {
01772 p=1.0/(1.0+exp(fApB));
01773 q=exp(fApB)/(1.0+exp(fApB));
01774 }
01775 d2=p*q;
01776 h11+=dec_values[i]*dec_values[i]*d2;
01777 h22+=d2;
01778 h21+=dec_values[i]*d2;
01779 d1=t[i]-p;
01780 g1+=dec_values[i]*d1;
01781 g2+=d1;
01782 }
01783
01784
01785 if (fabs(g1)<eps && fabs(g2)<eps)
01786 break;
01787
01788
01789 det=h11*h22-h21*h21;
01790 dA=-(h22*g1 - h21 * g2) / det;
01791 dB=-(-h21*g1+ h11 * g2) / det;
01792 gd=g1*dA+g2*dB;
01793
01794
01795 stepsize = 1;
01796 while (stepsize >= min_step)
01797 {
01798 newA = A + stepsize * dA;
01799 newB = B + stepsize * dB;
01800
01801
01802 newf = 0.0;
01803 for (i=0;i<l;i++)
01804 {
01805 fApB = dec_values[i]*newA+newB;
01806 if (fApB >= 0)
01807 newf += t[i]*fApB + log(1+exp(-fApB));
01808 else
01809 newf += (t[i] - 1)*fApB +log(1+exp(fApB));
01810 }
01811
01812 if (newf<fval+0.0001*stepsize*gd)
01813 {
01814 A=newA;B=newB;fval=newf;
01815 break;
01816 }
01817 else
01818 stepsize = stepsize / 2.0;
01819 }
01820
01821 if (stepsize < min_step)
01822 {
01823 info("Line search fails in two-class probability estimates\n");
01824 break;
01825 }
01826 }
01827
01828 if (iter>=max_iter)
01829 info("Reaching maximal iterations in two-class probability estimates\n");
01830 free(t);
01831 }
01832
01833 double sigmoid_predict(double decision_value, double A, double B)
01834 {
01835 double fApB = decision_value*A+B;
01836 if (fApB >= 0)
01837 return exp(-fApB)/(1.0+exp(-fApB));
01838 else
01839 return 1.0/(1+exp(fApB)) ;
01840 }
01841
01842
01843 void multiclass_probability(int k, double **r, double *p)
01844 {
01845 int t,j;
01846 int iter = 0, max_iter=max(100,k);
01847 double **Q=Malloc(double *,k);
01848 double *Qp=Malloc(double,k);
01849 double pQp, eps=0.005/k;
01850
01851 for (t=0;t<k;t++)
01852 {
01853 p[t]=1.0/k;
01854 Q[t]=Malloc(double,k);
01855 Q[t][t]=0;
01856 for (j=0;j<t;j++)
01857 {
01858 Q[t][t]+=r[j][t]*r[j][t];
01859 Q[t][j]=Q[j][t];
01860 }
01861 for (j=t+1;j<k;j++)
01862 {
01863 Q[t][t]+=r[j][t]*r[j][t];
01864 Q[t][j]=-r[j][t]*r[t][j];
01865 }
01866 }
01867 for (iter=0;iter<max_iter;iter++)
01868 {
01869
01870 pQp=0;
01871 for (t=0;t<k;t++)
01872 {
01873 Qp[t]=0;
01874 for (j=0;j<k;j++)
01875 Qp[t]+=Q[t][j]*p[j];
01876 pQp+=p[t]*Qp[t];
01877 }
01878 double max_error=0;
01879 for (t=0;t<k;t++)
01880 {
01881 double error=fabs(Qp[t]-pQp);
01882 if (error>max_error)
01883 max_error=error;
01884 }
01885 if (max_error<eps) break;
01886
01887 for (t=0;t<k;t++)
01888 {
01889 double diff=(-Qp[t]+pQp)/Q[t][t];
01890 p[t]+=diff;
01891 pQp=(pQp+diff*(diff*Q[t][t]+2*Qp[t]))/(1+diff)/(1+diff);
01892 for (j=0;j<k;j++)
01893 {
01894 Qp[j]=(Qp[j]+diff*Q[t][j])/(1+diff);
01895 p[j]/=(1+diff);
01896 }
01897 }
01898 }
01899 if (iter>=max_iter)
01900 info("Exceeds max_iter in multiclass_prob\n");
01901 for(t=0;t<k;t++) free(Q[t]);
01902 free(Q);
01903 free(Qp);
01904 }
01905
01906
01907 void svm_binary_svc_probability(
01908 const svm_problem *prob, const svm_parameter *param,
01909 double Cp, double Cn, double& probA, double& probB)
01910 {
01911 int i;
01912 int nr_fold = 5;
01913 int *perm = Malloc(int,prob->l);
01914 double *dec_values = Malloc(double,prob->l);
01915
01916
01917 for(i=0;i<prob->l;i++) perm[i]=i;
01918 for(i=0;i<prob->l;i++)
01919 {
01920 int j = i+rand()%(prob->l-i);
01921 swap(perm[i],perm[j]);
01922 }
01923 for(i=0;i<nr_fold;i++)
01924 {
01925 int begin = i*prob->l/nr_fold;
01926 int end = (i+1)*prob->l/nr_fold;
01927 int j,k;
01928 struct svm_problem subprob;
01929
01930 subprob.l = prob->l-(end-begin);
01931 subprob.x = Malloc(struct svm_node*,subprob.l);
01932 subprob.y = Malloc(double,subprob.l);
01933
01934 k=0;
01935 for(j=0;j<begin;j++)
01936 {
01937 subprob.x[k] = prob->x[perm[j]];
01938 subprob.y[k] = prob->y[perm[j]];
01939 ++k;
01940 }
01941 for(j=end;j<prob->l;j++)
01942 {
01943 subprob.x[k] = prob->x[perm[j]];
01944 subprob.y[k] = prob->y[perm[j]];
01945 ++k;
01946 }
01947 int p_count=0,n_count=0;
01948 for(j=0;j<k;j++)
01949 if(subprob.y[j]>0)
01950 p_count++;
01951 else
01952 n_count++;
01953
01954 if(p_count==0 && n_count==0)
01955 for(j=begin;j<end;j++)
01956 dec_values[perm[j]] = 0;
01957 else if(p_count > 0 && n_count == 0)
01958 for(j=begin;j<end;j++)
01959 dec_values[perm[j]] = 1;
01960 else if(p_count == 0 && n_count > 0)
01961 for(j=begin;j<end;j++)
01962 dec_values[perm[j]] = -1;
01963 else
01964 {
01965 svm_parameter subparam = *param;
01966 subparam.probability=0;
01967 subparam.C=1.0;
01968 subparam.nr_weight=2;
01969 subparam.weight_label = Malloc(int,2);
01970 subparam.weight = Malloc(double,2);
01971 subparam.weight_label[0]=+1;
01972 subparam.weight_label[1]=-1;
01973 subparam.weight[0]=Cp;
01974 subparam.weight[1]=Cn;
01975 struct svm_model *submodel = svm_train(&subprob,&subparam);
01976 for(j=begin;j<end;j++)
01977 {
01978 svm_predict_values(submodel,prob->x[perm[j]],&(dec_values[perm[j]]));
01979
01980 dec_values[perm[j]] *= submodel->label[0];
01981 }
01982 svm_destroy_model(submodel);
01983 svm_destroy_param(&subparam);
01984 }
01985 free(subprob.x);
01986 free(subprob.y);
01987 }
01988 sigmoid_train(prob->l,dec_values,prob->y,probA,probB);
01989 free(dec_values);
01990 free(perm);
01991 }
01992
01993
01994 double svm_svr_probability(
01995 const svm_problem *prob, const svm_parameter *param)
01996 {
01997 int i;
01998 int nr_fold = 5;
01999 double *ymv = Malloc(double,prob->l);
02000 double mae = 0;
02001
02002 svm_parameter newparam = *param;
02003 newparam.probability = 0;
02004 svm_cross_validation(prob,&newparam,nr_fold,ymv);
02005 for(i=0;i<prob->l;i++)
02006 {
02007 ymv[i]=prob->y[i]-ymv[i];
02008 mae += fabs(ymv[i]);
02009 }
02010 mae /= prob->l;
02011 double std=sqrt(2*mae*mae);
02012 int count=0;
02013 mae=0;
02014 for(i=0;i<prob->l;i++)
02015 if (fabs(ymv[i]) > 5*std)
02016 count=count+1;
02017 else
02018 mae+=fabs(ymv[i]);
02019 mae /= (prob->l-count);
02020 info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma= %g\n",mae);
02021 free(ymv);
02022 return mae;
02023 }
02024
02025
02026
02027
02028 void svm_group_classes(const svm_problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
02029 {
02030 int l = prob->l;
02031 int max_nr_class = 16;
02032 int nr_class = 0;
02033 int *label = Malloc(int,max_nr_class);
02034 int *count = Malloc(int,max_nr_class);
02035 int *data_label = Malloc(int,l);
02036 int i;
02037
02038 for(i=0;i<l;i++)
02039 {
02040 int this_label = (int)prob->y[i];
02041 int j;
02042 for(j=0;j<nr_class;j++)
02043 {
02044 if(this_label == label[j])
02045 {
02046 ++count[j];
02047 break;
02048 }
02049 }
02050 data_label[i] = j;
02051 if(j == nr_class)
02052 {
02053 if(nr_class == max_nr_class)
02054 {
02055 max_nr_class *= 2;
02056 label = (int *)realloc(label,max_nr_class*sizeof(int));
02057 count = (int *)realloc(count,max_nr_class*sizeof(int));
02058 }
02059 label[nr_class] = this_label;
02060 count[nr_class] = 1;
02061 ++nr_class;
02062 }
02063 }
02064
02065 int *start = Malloc(int,nr_class);
02066 start[0] = 0;
02067 for(i=1;i<nr_class;i++)
02068 start[i] = start[i-1]+count[i-1];
02069 for(i=0;i<l;i++)
02070 {
02071 perm[start[data_label[i]]] = i;
02072 ++start[data_label[i]];
02073 }
02074 start[0] = 0;
02075 for(i=1;i<nr_class;i++)
02076 start[i] = start[i-1]+count[i-1];
02077
02078 *nr_class_ret = nr_class;
02079 *label_ret = label;
02080 *start_ret = start;
02081 *count_ret = count;
02082 free(data_label);
02083 }
02084
02085
02086
02087
02088 svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
02089 {
02090 svm_model *model = Malloc(svm_model,1);
02091 model->param = *param;
02092 model->free_sv = 0;
02093
02094 if(param->svm_type == ONE_CLASS ||
02095 param->svm_type == EPSILON_SVR ||
02096 param->svm_type == NU_SVR)
02097 {
02098
02099 model->nr_class = 2;
02100 model->label = NULL;
02101 model->nSV = NULL;
02102 model->probA = NULL; model->probB = NULL;
02103 model->sv_coef = Malloc(double *,1);
02104
02105 if(param->probability &&
02106 (param->svm_type == EPSILON_SVR ||
02107 param->svm_type == NU_SVR))
02108 {
02109 model->probA = Malloc(double,1);
02110 model->probA[0] = svm_svr_probability(prob,param);
02111 }
02112
02113 decision_function f = svm_train_one(prob,param,0,0);
02114 model->rho = Malloc(double,1);
02115 model->rho[0] = f.rho;
02116
02117 int nSV = 0;
02118 int i;
02119 for(i=0;i<prob->l;i++)
02120 if(fabs(f.alpha[i]) > 0) ++nSV;
02121 model->l = nSV;
02122 model->SV = Malloc(svm_node *,nSV);
02123 model->sv_coef[0] = Malloc(double,nSV);
02124 int j = 0;
02125 for(i=0;i<prob->l;i++)
02126 if(fabs(f.alpha[i]) > 0)
02127 {
02128 model->SV[j] = prob->x[i];
02129 model->sv_coef[0][j] = f.alpha[i];
02130 ++j;
02131 }
02132
02133 free(f.alpha);
02134 }
02135 else
02136 {
02137
02138 int l = prob->l;
02139 int nr_class;
02140 int *label = NULL;
02141 int *start = NULL;
02142 int *count = NULL;
02143 int *perm = Malloc(int,l);
02144
02145
02146 svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
02147 svm_node **x = Malloc(svm_node *,l);
02148 int i;
02149 for(i=0;i<l;i++)
02150 x[i] = prob->x[perm[i]];
02151
02152
02153
02154 double *weighted_C = Malloc(double, nr_class);
02155 for(i=0;i<nr_class;i++)
02156 weighted_C[i] = param->C;
02157 for(i=0;i<param->nr_weight;i++)
02158 {
02159 int j;
02160 for(j=0;j<nr_class;j++)
02161 if(param->weight_label[i] == label[j])
02162 break;
02163 if(j == nr_class)
02164 fprintf(stderr,"warning: class label %d specified in weight is not found\n", param->weight_label[i]);
02165 else
02166 weighted_C[j] *= param->weight[i];
02167 }
02168
02169
02170
02171 bool *nonzero = Malloc(bool,l);
02172 for(i=0;i<l;i++)
02173 nonzero[i] = false;
02174 decision_function *f = Malloc(decision_function,nr_class*(nr_class-1)/2);
02175
02176 double *probA=NULL,*probB=NULL;
02177 if (param->probability)
02178 {
02179 probA=Malloc(double,nr_class*(nr_class-1)/2);
02180 probB=Malloc(double,nr_class*(nr_class-1)/2);
02181 }
02182
02183 int p = 0;
02184 for(i=0;i<nr_class;i++)
02185 for(int j=i+1;j<nr_class;j++)
02186 {
02187 svm_problem sub_prob;
02188 int si = start[i], sj = start[j];
02189 int ci = count[i], cj = count[j];
02190 sub_prob.l = ci+cj;
02191 sub_prob.x = Malloc(svm_node *,sub_prob.l);
02192 sub_prob.y = Malloc(double,sub_prob.l);
02193 int k;
02194 for(k=0;k<ci;k++)
02195 {
02196 sub_prob.x[k] = x[si+k];
02197 sub_prob.y[k] = +1;
02198 }
02199 for(k=0;k<cj;k++)
02200 {
02201 sub_prob.x[ci+k] = x[sj+k];
02202 sub_prob.y[ci+k] = -1;
02203 }
02204
02205 if(param->probability)
02206 svm_binary_svc_probability(&sub_prob,param,weighted_C[i],weighted_C[j],probA[p],probB[p]);
02207
02208 f[p] = svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j]);
02209 for(k=0;k<ci;k++)
02210 if(!nonzero[si+k] && fabs(f[p].alpha[k]) > 0)
02211 nonzero[si+k] = true;
02212 for(k=0;k<cj;k++)
02213 if(!nonzero[sj+k] && fabs(f[p].alpha[ci+k]) > 0)
02214 nonzero[sj+k] = true;
02215 free(sub_prob.x);
02216 free(sub_prob.y);
02217 ++p;
02218 }
02219
02220
02221
02222 model->nr_class = nr_class;
02223
02224 model->label = Malloc(int,nr_class);
02225 for(i=0;i<nr_class;i++)
02226 model->label[i] = label[i];
02227
02228 model->rho = Malloc(double,nr_class*(nr_class-1)/2);
02229 for(i=0;i<nr_class*(nr_class-1)/2;i++)
02230 model->rho[i] = f[i].rho;
02231
02232 if(param->probability)
02233 {
02234 model->probA = Malloc(double,nr_class*(nr_class-1)/2);
02235 model->probB = Malloc(double,nr_class*(nr_class-1)/2);
02236 for(i=0;i<nr_class*(nr_class-1)/2;i++)
02237 {
02238 model->probA[i] = probA[i];
02239 model->probB[i] = probB[i];
02240 }
02241 }
02242 else
02243 {
02244 model->probA=NULL;
02245 model->probB=NULL;
02246 }
02247
02248 int total_sv = 0;
02249 int *nz_count = Malloc(int,nr_class);
02250 model->nSV = Malloc(int,nr_class);
02251 for(i=0;i<nr_class;i++)
02252 {
02253 int nSV = 0;
02254 for(int j=0;j<count[i];j++)
02255 if(nonzero[start[i]+j])
02256 {
02257 ++nSV;
02258 ++total_sv;
02259 }
02260 model->nSV[i] = nSV;
02261 nz_count[i] = nSV;
02262 }
02263
02264 info("Total nSV = %d\n",total_sv);
02265
02266 model->l = total_sv;
02267 model->SV = Malloc(svm_node *,total_sv);
02268 p = 0;
02269 for(i=0;i<l;i++)
02270 if(nonzero[i]) model->SV[p++] = x[i];
02271
02272 int *nz_start = Malloc(int,nr_class);
02273 nz_start[0] = 0;
02274 for(i=1;i<nr_class;i++)
02275 nz_start[i] = nz_start[i-1]+nz_count[i-1];
02276
02277 model->sv_coef = Malloc(double *,nr_class-1);
02278 for(i=0;i<nr_class-1;i++)
02279 model->sv_coef[i] = Malloc(double,total_sv);
02280
02281 p = 0;
02282 for(i=0;i<nr_class;i++)
02283 for(int j=i+1;j<nr_class;j++)
02284 {
02285
02286
02287
02288
02289 int si = start[i];
02290 int sj = start[j];
02291 int ci = count[i];
02292 int cj = count[j];
02293
02294 int q = nz_start[i];
02295 int k;
02296 for(k=0;k<ci;k++)
02297 if(nonzero[si+k])
02298 model->sv_coef[j-1][q++] = f[p].alpha[k];
02299 q = nz_start[j];
02300 for(k=0;k<cj;k++)
02301 if(nonzero[sj+k])
02302 model->sv_coef[i][q++] = f[p].alpha[ci+k];
02303 ++p;
02304 }
02305
02306 free(label);
02307 free(probA);
02308 free(probB);
02309 free(count);
02310 free(perm);
02311 free(start);
02312 free(x);
02313 free(weighted_C);
02314 free(nonzero);
02315 for(i=0;i<nr_class*(nr_class-1)/2;i++)
02316 free(f[i].alpha);
02317 free(f);
02318 free(nz_count);
02319 free(nz_start);
02320 }
02321 return model;
02322 }
02323
02324
02325 void svm_cross_validation(const svm_problem *prob, const svm_parameter *param, int nr_fold, double *target)
02326 {
02327 int i;
02328 int *fold_start = Malloc(int,nr_fold+1);
02329 int l = prob->l;
02330 int *perm = Malloc(int,l);
02331 int nr_class;
02332
02333
02334
02335 if((param->svm_type == C_SVC ||
02336 param->svm_type == NU_SVC) && nr_fold < l)
02337 {
02338 int *start = NULL;
02339 int *label = NULL;
02340 int *count = NULL;
02341 svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
02342
02343
02344 int *fold_count = Malloc(int,nr_fold);
02345 int c;
02346 int *index = Malloc(int,l);
02347 for(i=0;i<l;i++)
02348 index[i]=perm[i];
02349 for (c=0; c<nr_class; c++)
02350 for(i=0;i<count[c];i++)
02351 {
02352 int j = i+rand()%(count[c]-i);
02353 swap(index[start[c]+j],index[start[c]+i]);
02354 }
02355 for(i=0;i<nr_fold;i++)
02356 {
02357 fold_count[i] = 0;
02358 for (c=0; c<nr_class;c++)
02359 fold_count[i]+=(i+1)*count[c]/nr_fold-i*count[c]/nr_fold;
02360 }
02361 fold_start[0]=0;
02362 for (i=1;i<=nr_fold;i++)
02363 fold_start[i] = fold_start[i-1]+fold_count[i-1];
02364 for (c=0; c<nr_class;c++)
02365 for(i=0;i<nr_fold;i++)
02366 {
02367 int begin = start[c]+i*count[c]/nr_fold;
02368 int end = start[c]+(i+1)*count[c]/nr_fold;
02369 for(int j=begin;j<end;j++)
02370 {
02371 perm[fold_start[i]] = index[j];
02372 fold_start[i]++;
02373 }
02374 }
02375 fold_start[0]=0;
02376 for (i=1;i<=nr_fold;i++)
02377 fold_start[i] = fold_start[i-1]+fold_count[i-1];
02378 free(start);
02379 free(label);
02380 free(count);
02381 free(index);
02382 free(fold_count);
02383 }
02384 else
02385 {
02386 for(i=0;i<l;i++) perm[i]=i;
02387 for(i=0;i<l;i++)
02388 {
02389 int j = i+rand()%(l-i);
02390 swap(perm[i],perm[j]);
02391 }
02392 for(i=0;i<=nr_fold;i++)
02393 fold_start[i]=i*l/nr_fold;
02394 }
02395
02396 for(i=0;i<nr_fold;i++)
02397 {
02398 int begin = fold_start[i];
02399 int end = fold_start[i+1];
02400 int j,k;
02401 struct svm_problem subprob;
02402
02403 subprob.l = l-(end-begin);
02404 subprob.x = Malloc(struct svm_node*,subprob.l);
02405 subprob.y = Malloc(double,subprob.l);
02406
02407 k=0;
02408 for(j=0;j<begin;j++)
02409 {
02410 subprob.x[k] = prob->x[perm[j]];
02411 subprob.y[k] = prob->y[perm[j]];
02412 ++k;
02413 }
02414 for(j=end;j<l;j++)
02415 {
02416 subprob.x[k] = prob->x[perm[j]];
02417 subprob.y[k] = prob->y[perm[j]];
02418 ++k;
02419 }
02420 struct svm_model *submodel = svm_train(&subprob,param);
02421 if(param->probability &&
02422 (param->svm_type == C_SVC || param->svm_type == NU_SVC))
02423 {
02424 double *prob_estimates=Malloc(double,svm_get_nr_class(submodel));
02425 for(j=begin;j<end;j++)
02426 target[perm[j]] = svm_predict_probability(submodel,prob->x[perm[j]],prob_estimates);
02427 free(prob_estimates);
02428 }
02429 else
02430 for(j=begin;j<end;j++)
02431 target[perm[j]] = svm_predict(submodel,prob->x[perm[j]]);
02432 svm_destroy_model(submodel);
02433 free(subprob.x);
02434 free(subprob.y);
02435 }
02436 free(fold_start);
02437 free(perm);
02438 }
02439
02440
02441 int svm_get_svm_type(const svm_model *model)
02442 {
02443 return model->param.svm_type;
02444 }
02445
02446 int svm_get_nr_class(const svm_model *model)
02447 {
02448 return model->nr_class;
02449 }
02450
02451 void svm_get_labels(const svm_model *model, int* label)
02452 {
02453 if (model->label != NULL)
02454 for(int i=0;i<model->nr_class;i++)
02455 label[i] = model->label[i];
02456 }
02457
02458 double svm_get_svr_probability(const svm_model *model)
02459 {
02460 if ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
02461 model->probA!=NULL)
02462 return model->probA[0];
02463 else
02464 {
02465 info("Model doesn't contain information for SVR probability inference\n");
02466 return 0;
02467 }
02468 }
02469
02470 void svm_predict_values(const svm_model *model, const svm_node *x, double* dec_values)
02471 {
02472 if(model->param.svm_type == ONE_CLASS ||
02473 model->param.svm_type == EPSILON_SVR ||
02474 model->param.svm_type == NU_SVR)
02475 {
02476 double *sv_coef = model->sv_coef[0];
02477 double sum = 0;
02478 for(int i=0;i<model->l;i++)
02479 sum += sv_coef[i] * Kernel::k_function(x,model->SV[i],model->param);
02480 sum -= model->rho[0];
02481 *dec_values = sum;
02482 }
02483 else
02484 {
02485 int i;
02486 int nr_class = model->nr_class;
02487 int l = model->l;
02488
02489 double *kvalue = Malloc(double,l);
02490 for(i=0;i<l;i++)
02491 kvalue[i] = Kernel::k_function(x,model->SV[i],model->param);
02492
02493 int *start = Malloc(int,nr_class);
02494 start[0] = 0;
02495 for(i=1;i<nr_class;i++)
02496 start[i] = start[i-1]+model->nSV[i-1];
02497
02498 int p=0;
02499 for(i=0;i<nr_class;i++)
02500 for(int j=i+1;j<nr_class;j++)
02501 {
02502 double sum = 0;
02503 int si = start[i];
02504 int sj = start[j];
02505 int ci = model->nSV[i];
02506 int cj = model->nSV[j];
02507
02508 int k;
02509 double *coef1 = model->sv_coef[j-1];
02510 double *coef2 = model->sv_coef[i];
02511 for(k=0;k<ci;k++)
02512 sum += coef1[si+k] * kvalue[si+k];
02513 for(k=0;k<cj;k++)
02514 sum += coef2[sj+k] * kvalue[sj+k];
02515 sum -= model->rho[p];
02516 dec_values[p] = sum;
02517 p++;
02518 }
02519
02520 free(kvalue);
02521 free(start);
02522 }
02523 }
02524
02525 double svm_predict(const svm_model *model, const svm_node *x)
02526 {
02527 if(model->param.svm_type == ONE_CLASS ||
02528 model->param.svm_type == EPSILON_SVR ||
02529 model->param.svm_type == NU_SVR)
02530 {
02531 double res;
02532 svm_predict_values(model, x, &res);
02533
02534 if(model->param.svm_type == ONE_CLASS)
02535 return (res>0)?1:-1;
02536 else
02537 return res;
02538 }
02539 else
02540 {
02541 int i;
02542 int nr_class = model->nr_class;
02543 double *dec_values = Malloc(double, nr_class*(nr_class-1)/2);
02544 svm_predict_values(model, x, dec_values);
02545
02546 int *vote = Malloc(int,nr_class);
02547 for(i=0;i<nr_class;i++)
02548 vote[i] = 0;
02549 int pos=0;
02550 for(i=0;i<nr_class;i++)
02551 for(int j=i+1;j<nr_class;j++)
02552 {
02553 if(dec_values[pos++] > 0)
02554 ++vote[i];
02555 else
02556 ++vote[j];
02557 }
02558
02559 int vote_max_idx = 0;
02560 for(i=1;i<nr_class;i++)
02561 if(vote[i] > vote[vote_max_idx])
02562 vote_max_idx = i;
02563 free(vote);
02564 free(dec_values);
02565 return model->label[vote_max_idx];
02566 }
02567 }
02568
02569 double svm_predict_probability(
02570 const svm_model *model, const svm_node *x, double *prob_estimates)
02571 {
02572 if ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
02573 model->probA!=NULL && model->probB!=NULL)
02574 {
02575 int i;
02576 int nr_class = model->nr_class;
02577 double *dec_values = Malloc(double, nr_class*(nr_class-1)/2);
02578 svm_predict_values(model, x, dec_values);
02579
02580 double min_prob=1e-7;
02581 double **pairwise_prob=Malloc(double *,nr_class);
02582 for(i=0;i<nr_class;i++)
02583 pairwise_prob[i]=Malloc(double,nr_class);
02584 int k=0;
02585 for(i=0;i<nr_class;i++)
02586 for(int j=i+1;j<nr_class;j++)
02587 {
02588 pairwise_prob[i][j]=min(max(sigmoid_predict(dec_values[k],model->probA[k],model->probB[k]),min_prob),1-min_prob);
02589 pairwise_prob[j][i]=1-pairwise_prob[i][j];
02590 k++;
02591 }
02592 multiclass_probability(nr_class,pairwise_prob,prob_estimates);
02593
02594 int prob_max_idx = 0;
02595 for(i=1;i<nr_class;i++)
02596 if(prob_estimates[i] > prob_estimates[prob_max_idx])
02597 prob_max_idx = i;
02598 for(i=0;i<nr_class;i++)
02599 free(pairwise_prob[i]);
02600 free(dec_values);
02601 free(pairwise_prob);
02602 return model->label[prob_max_idx];
02603 }
02604 else
02605 return svm_predict(model, x);
02606 }
02607
02608 const char *svm_type_table[] =
02609 {
02610 "c_svc","nu_svc","one_class","epsilon_svr","nu_svr",NULL
02611 };
02612
02613 const char *kernel_type_table[]=
02614 {
02615 "linear","polynomial","rbf","sigmoid","precomputed",NULL
02616 };
02617
02618 int svm_save_model(const char *model_file_name, const svm_model *model)
02619 {
02620 FILE *fp = fopen(model_file_name,"w");
02621 if(fp==NULL) return -1;
02622
02623 const svm_parameter& param = model->param;
02624
02625 fprintf(fp,"svm_type %s\n", svm_type_table[param.svm_type]);
02626 fprintf(fp,"kernel_type %s\n", kernel_type_table[param.kernel_type]);
02627
02628 if(param.kernel_type == POLY)
02629 fprintf(fp,"degree %d\n", param.degree);
02630
02631 if(param.kernel_type == POLY || param.kernel_type == RBF || param.kernel_type == SIGMOID)
02632 fprintf(fp,"gamma %g\n", param.gamma);
02633
02634 if(param.kernel_type == POLY || param.kernel_type == SIGMOID)
02635 fprintf(fp,"coef0 %g\n", param.coef0);
02636
02637 int nr_class = model->nr_class;
02638 int l = model->l;
02639 fprintf(fp, "nr_class %d\n", nr_class);
02640 fprintf(fp, "total_sv %d\n",l);
02641
02642 {
02643 fprintf(fp, "rho");
02644 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
02645 fprintf(fp," %g",model->rho[i]);
02646 fprintf(fp, "\n");
02647 }
02648
02649 if(model->label)
02650 {
02651 fprintf(fp, "label");
02652 for(int i=0;i<nr_class;i++)
02653 fprintf(fp," %d",model->label[i]);
02654 fprintf(fp, "\n");
02655 }
02656
02657 if(model->probA)
02658 {
02659 fprintf(fp, "probA");
02660 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
02661 fprintf(fp," %g",model->probA[i]);
02662 fprintf(fp, "\n");
02663 }
02664 if(model->probB)
02665 {
02666 fprintf(fp, "probB");
02667 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
02668 fprintf(fp," %g",model->probB[i]);
02669 fprintf(fp, "\n");
02670 }
02671
02672 if(model->nSV)
02673 {
02674 fprintf(fp, "nr_sv");
02675 for(int i=0;i<nr_class;i++)
02676 fprintf(fp," %d",model->nSV[i]);
02677 fprintf(fp, "\n");
02678 }
02679
02680 fprintf(fp, "SV\n");
02681 const double * const *sv_coef = model->sv_coef;
02682 const svm_node * const *SV = model->SV;
02683
02684 for(int i=0;i<l;i++)
02685 {
02686 for(int j=0;j<nr_class-1;j++)
02687 fprintf(fp, "%.16g ",sv_coef[j][i]);
02688
02689 const svm_node *p = SV[i];
02690
02691 if(param.kernel_type == PRECOMPUTED)
02692 fprintf(fp,"0:%d ",(int)(p->value));
02693 else
02694 while(p->index != -1)
02695 {
02696 fprintf(fp,"%d:%.8g ",p->index,p->value);
02697 p++;
02698 }
02699 fprintf(fp, "\n");
02700 }
02701 if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
02702 else return 0;
02703 }
02704
02705 svm_model *svm_load_model(const char *model_file_name)
02706 {
02707 FILE *fp = fopen(model_file_name,"r");
02708 if(fp==NULL) return NULL;
02709
02710
02711
02712 svm_model *model = Malloc(svm_model,1);
02713 svm_parameter& param = model->param;
02714 model->rho = NULL;
02715 model->probA = NULL;
02716 model->probB = NULL;
02717 model->label = NULL;
02718 model->nSV = NULL;
02719
02720 char cmd[81];
02721 while(1)
02722 {
02723 fscanf(fp,"%80s",cmd);
02724
02725 if(strcmp(cmd,"svm_type")==0)
02726 {
02727 fscanf(fp,"%80s",cmd);
02728 int i;
02729 for(i=0;svm_type_table[i];i++)
02730 {
02731 if(strcmp(svm_type_table[i],cmd)==0)
02732 {
02733 param.svm_type=i;
02734 break;
02735 }
02736 }
02737 if(svm_type_table[i] == NULL)
02738 {
02739 fprintf(stderr,"unknown svm type.\n");
02740 free(model->rho);
02741 free(model->label);
02742 free(model->nSV);
02743 free(model);
02744 return NULL;
02745 }
02746 }
02747 else if(strcmp(cmd,"kernel_type")==0)
02748 {
02749 fscanf(fp,"%80s",cmd);
02750 int i;
02751 for(i=0;kernel_type_table[i];i++)
02752 {
02753 if(strcmp(kernel_type_table[i],cmd)==0)
02754 {
02755 param.kernel_type=i;
02756 break;
02757 }
02758 }
02759 if(kernel_type_table[i] == NULL)
02760 {
02761 fprintf(stderr,"unknown kernel function.\n");
02762 free(model->rho);
02763 free(model->label);
02764 free(model->nSV);
02765 free(model);
02766 return NULL;
02767 }
02768 }
02769 else if(strcmp(cmd,"degree")==0)
02770 fscanf(fp,"%d",¶m.degree);
02771 else if(strcmp(cmd,"gamma")==0)
02772 fscanf(fp,"%lf",¶m.gamma);
02773 else if(strcmp(cmd,"coef0")==0)
02774 fscanf(fp,"%lf",¶m.coef0);
02775 else if(strcmp(cmd,"nr_class")==0)
02776 fscanf(fp,"%d",&model->nr_class);
02777 else if(strcmp(cmd,"total_sv")==0)
02778 fscanf(fp,"%d",&model->l);
02779 else if(strcmp(cmd,"rho")==0)
02780 {
02781 int n = model->nr_class * (model->nr_class-1)/2;
02782 model->rho = Malloc(double,n);
02783 for(int i=0;i<n;i++)
02784 fscanf(fp,"%lf",&model->rho[i]);
02785 }
02786 else if(strcmp(cmd,"label")==0)
02787 {
02788 int n = model->nr_class;
02789 model->label = Malloc(int,n);
02790 for(int i=0;i<n;i++)
02791 fscanf(fp,"%d",&model->label[i]);
02792 }
02793 else if(strcmp(cmd,"probA")==0)
02794 {
02795 int n = model->nr_class * (model->nr_class-1)/2;
02796 model->probA = Malloc(double,n);
02797 for(int i=0;i<n;i++)
02798 fscanf(fp,"%lf",&model->probA[i]);
02799 }
02800 else if(strcmp(cmd,"probB")==0)
02801 {
02802 int n = model->nr_class * (model->nr_class-1)/2;
02803 model->probB = Malloc(double,n);
02804 for(int i=0;i<n;i++)
02805 fscanf(fp,"%lf",&model->probB[i]);
02806 }
02807 else if(strcmp(cmd,"nr_sv")==0)
02808 {
02809 int n = model->nr_class;
02810 model->nSV = Malloc(int,n);
02811 for(int i=0;i<n;i++)
02812 fscanf(fp,"%d",&model->nSV[i]);
02813 }
02814 else if(strcmp(cmd,"SV")==0)
02815 {
02816 while(1)
02817 {
02818 int c = getc(fp);
02819 if(c==EOF || c=='\n') break;
02820 }
02821 break;
02822 }
02823 else
02824 {
02825 fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
02826 free(model->rho);
02827 free(model->label);
02828 free(model->nSV);
02829 free(model);
02830 return NULL;
02831 }
02832 }
02833
02834
02835
02836 int elements = 0;
02837 long pos = ftell(fp);
02838
02839 while(1)
02840 {
02841 int c = fgetc(fp);
02842 switch(c)
02843 {
02844 case '\n':
02845
02846 case ':':
02847 ++elements;
02848 break;
02849 case EOF:
02850 goto out;
02851 default:
02852 ;
02853 }
02854 }
02855 out:
02856 fseek(fp,pos,SEEK_SET);
02857
02858 int m = model->nr_class - 1;
02859 int l = model->l;
02860 model->sv_coef = Malloc(double *,m);
02861 int i;
02862 for(i=0;i<m;i++)
02863 model->sv_coef[i] = Malloc(double,l);
02864 model->SV = Malloc(svm_node*,l);
02865 svm_node *x_space=NULL;
02866 if(l>0) x_space = Malloc(svm_node,elements);
02867
02868 int j=0;
02869 for(i=0;i<l;i++)
02870 {
02871 model->SV[i] = &x_space[j];
02872 for(int k=0;k<m;k++)
02873 fscanf(fp,"%lf",&model->sv_coef[k][i]);
02874 while(1)
02875 {
02876 int c;
02877 do {
02878 c = getc(fp);
02879 if(c=='\n') goto out2;
02880 } while(isspace(c));
02881 ungetc(c,fp);
02882 fscanf(fp,"%d:%lf",&(x_space[j].index),&(x_space[j].value));
02883 ++j;
02884 }
02885 out2:
02886 x_space[j++].index = -1;
02887 }
02888 if (ferror(fp) != 0 || fclose(fp) != 0) return NULL;
02889
02890 model->free_sv = 1;
02891 return model;
02892 }
02893
02894 void svm_destroy_model(svm_model* model)
02895 {
02896 if(model->free_sv && model->l > 0)
02897 free((void *)(model->SV[0]));
02898 for(int i=0;i<model->nr_class-1;i++)
02899 free(model->sv_coef[i]);
02900 free(model->SV);
02901 free(model->sv_coef);
02902 free(model->rho);
02903 free(model->label);
02904 free(model->probA);
02905 free(model->probB);
02906 free(model->nSV);
02907 free(model);
02908 }
02909
02910 void svm_destroy_param(svm_parameter* param)
02911 {
02912 free(param->weight_label);
02913 free(param->weight);
02914 }
02915
02916 const char *svm_check_parameter(const svm_problem *prob, const svm_parameter *param)
02917 {
02918
02919
02920 int svm_type = param->svm_type;
02921 if(svm_type != C_SVC &&
02922 svm_type != NU_SVC &&
02923 svm_type != ONE_CLASS &&
02924 svm_type != EPSILON_SVR &&
02925 svm_type != NU_SVR)
02926 return "unknown svm type";
02927
02928
02929
02930 int kernel_type = param->kernel_type;
02931 if(kernel_type != LINEAR &&
02932 kernel_type != POLY &&
02933 kernel_type != RBF &&
02934 kernel_type != SIGMOID &&
02935 kernel_type != PRECOMPUTED)
02936 return "unknown kernel type";
02937
02938 if(param->degree < 0)
02939 return "degree of polynomial kernel < 0";
02940
02941
02942
02943 if(param->cache_size <= 0)
02944 return "cache_size <= 0";
02945
02946 if(param->eps <= 0)
02947 return "eps <= 0";
02948
02949 if(svm_type == C_SVC ||
02950 svm_type == EPSILON_SVR ||
02951 svm_type == NU_SVR)
02952 if(param->C <= 0)
02953 return "C <= 0";
02954
02955 if(svm_type == NU_SVC ||
02956 svm_type == ONE_CLASS ||
02957 svm_type == NU_SVR)
02958 if(param->nu <= 0 || param->nu > 1)
02959 return "nu <= 0 or nu > 1";
02960
02961 if(svm_type == EPSILON_SVR)
02962 if(param->p < 0)
02963 return "p < 0";
02964
02965 if(param->shrinking != 0 &&
02966 param->shrinking != 1)
02967 return "shrinking != 0 and shrinking != 1";
02968
02969 if(param->probability != 0 &&
02970 param->probability != 1)
02971 return "probability != 0 and probability != 1";
02972
02973 if(param->probability == 1 &&
02974 svm_type == ONE_CLASS)
02975 return "one-class SVM probability output not supported yet";
02976
02977
02978
02979
02980 if(svm_type == NU_SVC)
02981 {
02982 int l = prob->l;
02983 int max_nr_class = 16;
02984 int nr_class = 0;
02985 int *label = Malloc(int,max_nr_class);
02986 int *count = Malloc(int,max_nr_class);
02987
02988 int i;
02989 for(i=0;i<l;i++)
02990 {
02991 int this_label = (int)prob->y[i];
02992 int j;
02993 for(j=0;j<nr_class;j++)
02994 if(this_label == label[j])
02995 {
02996 ++count[j];
02997 break;
02998 }
02999 if(j == nr_class)
03000 {
03001 if(nr_class == max_nr_class)
03002 {
03003 max_nr_class *= 2;
03004 label = (int *)realloc(label,max_nr_class*sizeof(int));
03005 count = (int *)realloc(count,max_nr_class*sizeof(int));
03006 }
03007 label[nr_class] = this_label;
03008 count[nr_class] = 1;
03009 ++nr_class;
03010 }
03011 }
03012
03013 for(i=0;i<nr_class;i++)
03014 {
03015 int n1 = count[i];
03016 for(int j=i+1;j<nr_class;j++)
03017 {
03018 int n2 = count[j];
03019 if(param->nu*(n1+n2)/2 > min(n1,n2))
03020 {
03021 free(label);
03022 free(count);
03023 return "specified nu is infeasible";
03024 }
03025 }
03026 }
03027 free(label);
03028 free(count);
03029 }
03030
03031 return NULL;
03032 }
03033
03034 int svm_check_probability_model(const svm_model *model)
03035 {
03036 return ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
03037 model->probA!=NULL && model->probB!=NULL) ||
03038 ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
03039 model->probA!=NULL);
03040 }