00001 #include <math.h>
00002 #include <stdio.h>
00003 #include <stdlib.h>
00004 #include <ctype.h>
00005 #include <float.h>
00006 #include <string.h>
00007 #include <stdarg.h>
00008 #include <limits.h>
00009 #include <locale.h>
00010 #include "svm.h"
00011 int libsvm_version = LIBSVM_VERSION;
00012 typedef float Qfloat;
00013 typedef signed char schar;
00014 #ifndef min
00015 template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
00016 #endif
00017 #ifndef max
00018 template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
00019 #endif
00020 template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
00021 template <class S, class T> static inline void clone(T*& dst, S* src, int n)
00022 {
00023 dst = new T[n];
00024 memcpy((void *)dst,(void *)src,sizeof(T)*n);
00025 }
00026 static inline double powi(double base, int times)
00027 {
00028 double tmp = base, ret = 1.0;
00029
00030 for(int t=times; t>0; t/=2)
00031 {
00032 if(t%2==1) ret*=tmp;
00033 tmp = tmp * tmp;
00034 }
00035 return ret;
00036 }
00037 #define INF HUGE_VAL
00038 #define TAU 1e-12
00039 #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
00040
00041 static void print_string_stdout(const char *s)
00042 {
00043 fputs(s,stdout);
00044 fflush(stdout);
00045 }
00046 static void (*svm_print_string) (const char *) = &print_string_stdout;
00047 #if 1
00048 static void info(const char *fmt,...)
00049 {
00050 char buf[BUFSIZ];
00051 va_list ap;
00052 va_start(ap,fmt);
00053 vsprintf(buf,fmt,ap);
00054 va_end(ap);
00055 (*svm_print_string)(buf);
00056 }
00057 #else
00058 static void info(const char *fmt,...) {}
00059 #endif
00060
00061
00062
00063
00064
00065
00066
00067 class Cache
00068 {
00069 public:
00070 Cache(int l,long int size);
00071 ~Cache();
00072
00073
00074
00075
00076 int get_data(const int index, Qfloat **data, int len);
00077 void swap_index(int i, int j);
00078 private:
00079 int l;
00080 long int size;
00081 struct head_t
00082 {
00083 head_t *prev, *next;
00084 Qfloat *data;
00085 int len;
00086 };
00087
00088 head_t *head;
00089 head_t lru_head;
00090 void lru_delete(head_t *h);
00091 void lru_insert(head_t *h);
00092 };
00093
00094 Cache::Cache(int l_,long int size_):l(l_),size(size_)
00095 {
00096 head = (head_t *)calloc(l,sizeof(head_t));
00097 size /= sizeof(Qfloat);
00098 size -= l * sizeof(head_t) / sizeof(Qfloat);
00099 size = max(size, 2 * (long int) l);
00100 lru_head.next = lru_head.prev = &lru_head;
00101 }
00102
00103 Cache::~Cache()
00104 {
00105 for(head_t *h = lru_head.next; h != &lru_head; h=h->next)
00106 free(h->data);
00107 free(head);
00108 }
00109
00110 void Cache::lru_delete(head_t *h)
00111 {
00112
00113 h->prev->next = h->next;
00114 h->next->prev = h->prev;
00115 }
00116
00117 void Cache::lru_insert(head_t *h)
00118 {
00119
00120 h->next = &lru_head;
00121 h->prev = lru_head.prev;
00122 h->prev->next = h;
00123 h->next->prev = h;
00124 }
00125
00126 int Cache::get_data(const int index, Qfloat **data, int len)
00127 {
00128 head_t *h = &head[index];
00129 if(h->len) lru_delete(h);
00130 int more = len - h->len;
00131
00132 if(more > 0)
00133 {
00134
00135 while(size < more)
00136 {
00137 head_t *old = lru_head.next;
00138 lru_delete(old);
00139 free(old->data);
00140 size += old->len;
00141 old->data = 0;
00142 old->len = 0;
00143 }
00144
00145
00146 h->data = (Qfloat *)realloc(h->data,sizeof(Qfloat)*len);
00147 size -= more;
00148 swap(h->len,len);
00149 }
00150
00151 lru_insert(h);
00152 *data = h->data;
00153 return len;
00154 }
00155
00156 void Cache::swap_index(int i, int j)
00157 {
00158 if(i==j) return;
00159
00160 if(head[i].len) lru_delete(&head[i]);
00161 if(head[j].len) lru_delete(&head[j]);
00162 swap(head[i].data,head[j].data);
00163 swap(head[i].len,head[j].len);
00164 if(head[i].len) lru_insert(&head[i]);
00165 if(head[j].len) lru_insert(&head[j]);
00166
00167 if(i>j) swap(i,j);
00168 for(head_t *h = lru_head.next; h!=&lru_head; h=h->next)
00169 {
00170 if(h->len > i)
00171 {
00172 if(h->len > j)
00173 swap(h->data[i],h->data[j]);
00174 else
00175 {
00176
00177 lru_delete(h);
00178 free(h->data);
00179 size += h->len;
00180 h->data = 0;
00181 h->len = 0;
00182 }
00183 }
00184 }
00185 }
00186
00187
00188
00189
00190
00191
00192
00193
00194 class QMatrix {
00195 public:
00196 virtual Qfloat *get_Q(int column, int len) const = 0;
00197 virtual double *get_QD() const = 0;
00198 virtual void swap_index(int i, int j) const = 0;
00199 virtual ~QMatrix() {}
00200 };
00201
00202 class Kernel: public QMatrix {
00203 public:
00204 Kernel(int l, svm_node * const * x, const svm_parameter& param);
00205 virtual ~Kernel();
00206
00207 static double k_function(const svm_node *x, const svm_node *y,
00208 const svm_parameter& param);
00209 virtual Qfloat *get_Q(int column, int len) const = 0;
00210 virtual double *get_QD() const = 0;
00211 virtual void swap_index(int i, int j) const
00212 {
00213 swap(x[i],x[j]);
00214 if(x_square) swap(x_square[i],x_square[j]);
00215 }
00216 protected:
00217
00218 double (Kernel::*kernel_function)(int i, int j) const;
00219
00220 private:
00221 const svm_node **x;
00222 double *x_square;
00223
00224
00225 const int kernel_type;
00226 const int degree;
00227 const double gamma;
00228 const double coef0;
00229
00230 static double dot(const svm_node *px, const svm_node *py);
00231 double kernel_linear(int i, int j) const
00232 {
00233 return dot(x[i],x[j]);
00234 }
00235 double kernel_poly(int i, int j) const
00236 {
00237 return powi(gamma*dot(x[i],x[j])+coef0,degree);
00238 }
00239 double kernel_rbf(int i, int j) const
00240 {
00241 return exp(-gamma*(x_square[i]+x_square[j]-2*dot(x[i],x[j])));
00242 }
00243 double kernel_sigmoid(int i, int j) const
00244 {
00245 return tanh(gamma*dot(x[i],x[j])+coef0);
00246 }
00247 double kernel_precomputed(int i, int j) const
00248 {
00249 return x[i][(int)(x[j][0].value)].value;
00250 }
00251 };
00252
00253 Kernel::Kernel(int l, svm_node * const * x_, const svm_parameter& param)
00254 :kernel_type(param.kernel_type), degree(param.degree),
00255 gamma(param.gamma), coef0(param.coef0)
00256 {
00257 switch(kernel_type)
00258 {
00259 case LINEAR:
00260 kernel_function = &Kernel::kernel_linear;
00261 break;
00262 case POLY:
00263 kernel_function = &Kernel::kernel_poly;
00264 break;
00265 case RBF:
00266 kernel_function = &Kernel::kernel_rbf;
00267 break;
00268 case SIGMOID:
00269 kernel_function = &Kernel::kernel_sigmoid;
00270 break;
00271 case PRECOMPUTED:
00272 kernel_function = &Kernel::kernel_precomputed;
00273 break;
00274 }
00275
00276 clone(x,x_,l);
00277
00278 if(kernel_type == RBF)
00279 {
00280 x_square = new double[l];
00281 for(int i=0;i<l;i++)
00282 x_square[i] = dot(x[i],x[i]);
00283 }
00284 else
00285 x_square = 0;
00286 }
00287
00288 Kernel::~Kernel()
00289 {
00290 delete[] x;
00291 delete[] x_square;
00292 }
00293
00294 double Kernel::dot(const svm_node *px, const svm_node *py)
00295 {
00296 double sum = 0;
00297 while(px->index != -1 && py->index != -1)
00298 {
00299 if(px->index == py->index)
00300 {
00301 sum += px->value * py->value;
00302 ++px;
00303 ++py;
00304 }
00305 else
00306 {
00307 if(px->index > py->index)
00308 ++py;
00309 else
00310 ++px;
00311 }
00312 }
00313 return sum;
00314 }
00315
00316 double Kernel::k_function(const svm_node *x, const svm_node *y,
00317 const svm_parameter& param)
00318 {
00319 switch(param.kernel_type)
00320 {
00321 case LINEAR:
00322 return dot(x,y);
00323 case POLY:
00324 return powi(param.gamma*dot(x,y)+param.coef0,param.degree);
00325 case RBF:
00326 {
00327 double sum = 0;
00328 while(x->index != -1 && y->index !=-1)
00329 {
00330 if(x->index == y->index)
00331 {
00332 double d = x->value - y->value;
00333 sum += d*d;
00334 ++x;
00335 ++y;
00336 }
00337 else
00338 {
00339 if(x->index > y->index)
00340 {
00341 sum += y->value * y->value;
00342 ++y;
00343 }
00344 else
00345 {
00346 sum += x->value * x->value;
00347 ++x;
00348 }
00349 }
00350 }
00351
00352 while(x->index != -1)
00353 {
00354 sum += x->value * x->value;
00355 ++x;
00356 }
00357
00358 while(y->index != -1)
00359 {
00360 sum += y->value * y->value;
00361 ++y;
00362 }
00363
00364 return exp(-param.gamma*sum);
00365 }
00366 case SIGMOID:
00367 return tanh(param.gamma*dot(x,y)+param.coef0);
00368 case PRECOMPUTED:
00369 return x[(int)(y->value)].value;
00370 default:
00371 return 0;
00372 }
00373 }
00374
00375
00376
00377
00378
00379
00380
00381
00382
00383
00384
00385
00386
00387
00388
00389
00390
00391
00392
00393 class Solver {
00394 public:
00395 Solver() {};
00396 virtual ~Solver() {};
00397
00398 struct SolutionInfo {
00399 double obj;
00400 double rho;
00401 double upper_bound_p;
00402 double upper_bound_n;
00403 double r;
00404 };
00405
00406 void Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
00407 double *alpha_, double Cp, double Cn, double eps,
00408 SolutionInfo* si, int shrinking);
00409 protected:
00410 int active_size;
00411 schar *y;
00412 double *G;
00413 enum { LOWER_BOUND, UPPER_BOUND, FREE };
00414 char *alpha_status;
00415 double *alpha;
00416 const QMatrix *Q;
00417 const double *QD;
00418 double eps;
00419 double Cp,Cn;
00420 double *p;
00421 int *active_set;
00422 double *G_bar;
00423 int l;
00424 bool unshrink;
00425
00426 double get_C(int i)
00427 {
00428 return (y[i] > 0)? Cp : Cn;
00429 }
00430 void update_alpha_status(int i)
00431 {
00432 if(alpha[i] >= get_C(i))
00433 alpha_status[i] = UPPER_BOUND;
00434 else if(alpha[i] <= 0)
00435 alpha_status[i] = LOWER_BOUND;
00436 else alpha_status[i] = FREE;
00437 }
00438 bool is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; }
00439 bool is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; }
00440 bool is_free(int i) { return alpha_status[i] == FREE; }
00441 void swap_index(int i, int j);
00442 void reconstruct_gradient();
00443 virtual int select_working_set(int &i, int &j);
00444 virtual double calculate_rho();
00445 virtual void do_shrinking();
00446 private:
00447 bool be_shrunk(int i, double Gmax1, double Gmax2);
00448 };
00449
00450 void Solver::swap_index(int i, int j)
00451 {
00452 Q->swap_index(i,j);
00453 swap(y[i],y[j]);
00454 swap(G[i],G[j]);
00455 swap(alpha_status[i],alpha_status[j]);
00456 swap(alpha[i],alpha[j]);
00457 swap(p[i],p[j]);
00458 swap(active_set[i],active_set[j]);
00459 swap(G_bar[i],G_bar[j]);
00460 }
00461
00462 void Solver::reconstruct_gradient()
00463 {
00464
00465
00466 if(active_size == l) return;
00467
00468 int i,j;
00469 int nr_free = 0;
00470
00471 for(j=active_size;j<l;j++)
00472 G[j] = G_bar[j] + p[j];
00473
00474 for(j=0;j<active_size;j++)
00475 if(is_free(j))
00476 nr_free++;
00477
00478 if(2*nr_free < active_size)
00479 info("\nWARNING: using -h 0 may be faster\n");
00480
00481 if (nr_free*l > 2*active_size*(l-active_size))
00482 {
00483 for(i=active_size;i<l;i++)
00484 {
00485 const Qfloat *Q_i = Q->get_Q(i,active_size);
00486 for(j=0;j<active_size;j++)
00487 if(is_free(j))
00488 G[i] += alpha[j] * Q_i[j];
00489 }
00490 }
00491 else
00492 {
00493 for(i=0;i<active_size;i++)
00494 if(is_free(i))
00495 {
00496 const Qfloat *Q_i = Q->get_Q(i,l);
00497 double alpha_i = alpha[i];
00498 for(j=active_size;j<l;j++)
00499 G[j] += alpha_i * Q_i[j];
00500 }
00501 }
00502 }
00503
00504 void Solver::Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
00505 double *alpha_, double Cp, double Cn, double eps,
00506 SolutionInfo* si, int shrinking)
00507 {
00508 this->l = l;
00509 this->Q = &Q;
00510 QD=Q.get_QD();
00511 clone(p, p_,l);
00512 clone(y, y_,l);
00513 clone(alpha,alpha_,l);
00514 this->Cp = Cp;
00515 this->Cn = Cn;
00516 this->eps = eps;
00517 unshrink = false;
00518
00519
00520 {
00521 alpha_status = new char[l];
00522 for(int i=0;i<l;i++)
00523 update_alpha_status(i);
00524 }
00525
00526
00527 {
00528 active_set = new int[l];
00529 for(int i=0;i<l;i++)
00530 active_set[i] = i;
00531 active_size = l;
00532 }
00533
00534
00535 {
00536 G = new double[l];
00537 G_bar = new double[l];
00538 int i;
00539 for(i=0;i<l;i++)
00540 {
00541 G[i] = p[i];
00542 G_bar[i] = 0;
00543 }
00544 for(i=0;i<l;i++)
00545 if(!is_lower_bound(i))
00546 {
00547 const Qfloat *Q_i = Q.get_Q(i,l);
00548 double alpha_i = alpha[i];
00549 int j;
00550 for(j=0;j<l;j++)
00551 G[j] += alpha_i*Q_i[j];
00552 if(is_upper_bound(i))
00553 for(j=0;j<l;j++)
00554 G_bar[j] += get_C(i) * Q_i[j];
00555 }
00556 }
00557
00558
00559
00560 int iter = 0;
00561 int max_iter = max(10000000, l>INT_MAX/100 ? INT_MAX : 100*l);
00562 int counter = min(l,1000)+1;
00563
00564 while(iter < max_iter)
00565 {
00566
00567
00568 if(--counter == 0)
00569 {
00570 counter = min(l,1000);
00571 if(shrinking) do_shrinking();
00572 info(".");
00573 }
00574
00575 int i,j;
00576 if(select_working_set(i,j)!=0)
00577 {
00578
00579 reconstruct_gradient();
00580
00581 active_size = l;
00582 info("*");
00583 if(select_working_set(i,j)!=0)
00584 break;
00585 else
00586 counter = 1;
00587 }
00588
00589 ++iter;
00590
00591
00592
00593 const Qfloat *Q_i = Q.get_Q(i,active_size);
00594 const Qfloat *Q_j = Q.get_Q(j,active_size);
00595
00596 double C_i = get_C(i);
00597 double C_j = get_C(j);
00598
00599 double old_alpha_i = alpha[i];
00600 double old_alpha_j = alpha[j];
00601
00602 if(y[i]!=y[j])
00603 {
00604 double quad_coef = QD[i]+QD[j]+2*Q_i[j];
00605 if (quad_coef <= 0)
00606 quad_coef = TAU;
00607 double delta = (-G[i]-G[j])/quad_coef;
00608 double diff = alpha[i] - alpha[j];
00609 alpha[i] += delta;
00610 alpha[j] += delta;
00611
00612 if(diff > 0)
00613 {
00614 if(alpha[j] < 0)
00615 {
00616 alpha[j] = 0;
00617 alpha[i] = diff;
00618 }
00619 }
00620 else
00621 {
00622 if(alpha[i] < 0)
00623 {
00624 alpha[i] = 0;
00625 alpha[j] = -diff;
00626 }
00627 }
00628 if(diff > C_i - C_j)
00629 {
00630 if(alpha[i] > C_i)
00631 {
00632 alpha[i] = C_i;
00633 alpha[j] = C_i - diff;
00634 }
00635 }
00636 else
00637 {
00638 if(alpha[j] > C_j)
00639 {
00640 alpha[j] = C_j;
00641 alpha[i] = C_j + diff;
00642 }
00643 }
00644 }
00645 else
00646 {
00647 double quad_coef = QD[i]+QD[j]-2*Q_i[j];
00648 if (quad_coef <= 0)
00649 quad_coef = TAU;
00650 double delta = (G[i]-G[j])/quad_coef;
00651 double sum = alpha[i] + alpha[j];
00652 alpha[i] -= delta;
00653 alpha[j] += delta;
00654
00655 if(sum > C_i)
00656 {
00657 if(alpha[i] > C_i)
00658 {
00659 alpha[i] = C_i;
00660 alpha[j] = sum - C_i;
00661 }
00662 }
00663 else
00664 {
00665 if(alpha[j] < 0)
00666 {
00667 alpha[j] = 0;
00668 alpha[i] = sum;
00669 }
00670 }
00671 if(sum > C_j)
00672 {
00673 if(alpha[j] > C_j)
00674 {
00675 alpha[j] = C_j;
00676 alpha[i] = sum - C_j;
00677 }
00678 }
00679 else
00680 {
00681 if(alpha[i] < 0)
00682 {
00683 alpha[i] = 0;
00684 alpha[j] = sum;
00685 }
00686 }
00687 }
00688
00689
00690
00691 double delta_alpha_i = alpha[i] - old_alpha_i;
00692 double delta_alpha_j = alpha[j] - old_alpha_j;
00693
00694 for(int k=0;k<active_size;k++)
00695 {
00696 G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j;
00697 }
00698
00699
00700
00701 {
00702 bool ui = is_upper_bound(i);
00703 bool uj = is_upper_bound(j);
00704 update_alpha_status(i);
00705 update_alpha_status(j);
00706 int k;
00707 if(ui != is_upper_bound(i))
00708 {
00709 Q_i = Q.get_Q(i,l);
00710 if(ui)
00711 for(k=0;k<l;k++)
00712 G_bar[k] -= C_i * Q_i[k];
00713 else
00714 for(k=0;k<l;k++)
00715 G_bar[k] += C_i * Q_i[k];
00716 }
00717
00718 if(uj != is_upper_bound(j))
00719 {
00720 Q_j = Q.get_Q(j,l);
00721 if(uj)
00722 for(k=0;k<l;k++)
00723 G_bar[k] -= C_j * Q_j[k];
00724 else
00725 for(k=0;k<l;k++)
00726 G_bar[k] += C_j * Q_j[k];
00727 }
00728 }
00729 }
00730
00731 if(iter >= max_iter)
00732 {
00733 if(active_size < l)
00734 {
00735
00736 reconstruct_gradient();
00737 active_size = l;
00738 info("*");
00739 }
00740 fprintf(stderr,"\nWARNING: reaching max number of iterations\n");
00741 }
00742
00743
00744
00745 si->rho = calculate_rho();
00746
00747
00748 {
00749 double v = 0;
00750 int i;
00751 for(i=0;i<l;i++)
00752 v += alpha[i] * (G[i] + p[i]);
00753
00754 si->obj = v/2;
00755 }
00756
00757
00758 {
00759 for(int i=0;i<l;i++)
00760 alpha_[active_set[i]] = alpha[i];
00761 }
00762
00763
00764
00765
00766
00767
00768
00769
00770
00771 si->upper_bound_p = Cp;
00772 si->upper_bound_n = Cn;
00773
00774 info("\noptimization finished, #iter = %d\n",iter);
00775
00776 delete[] p;
00777 delete[] y;
00778 delete[] alpha;
00779 delete[] alpha_status;
00780 delete[] active_set;
00781 delete[] G;
00782 delete[] G_bar;
00783 }
00784
00785
00786 int Solver::select_working_set(int &out_i, int &out_j)
00787 {
00788
00789
00790
00791
00792
00793
00794 double Gmax = -INF;
00795 double Gmax2 = -INF;
00796 int Gmax_idx = -1;
00797 int Gmin_idx = -1;
00798 double obj_diff_min = INF;
00799
00800 for(int t=0;t<active_size;t++)
00801 if(y[t]==+1)
00802 {
00803 if(!is_upper_bound(t))
00804 if(-G[t] >= Gmax)
00805 {
00806 Gmax = -G[t];
00807 Gmax_idx = t;
00808 }
00809 }
00810 else
00811 {
00812 if(!is_lower_bound(t))
00813 if(G[t] >= Gmax)
00814 {
00815 Gmax = G[t];
00816 Gmax_idx = t;
00817 }
00818 }
00819
00820 int i = Gmax_idx;
00821 const Qfloat *Q_i = NULL;
00822 if(i != -1)
00823 Q_i = Q->get_Q(i,active_size);
00824
00825 for(int j=0;j<active_size;j++)
00826 {
00827 if(y[j]==+1)
00828 {
00829 if (!is_lower_bound(j))
00830 {
00831 double grad_diff=Gmax+G[j];
00832 if (G[j] >= Gmax2)
00833 Gmax2 = G[j];
00834 if (grad_diff > 0)
00835 {
00836 double obj_diff;
00837 double quad_coef = QD[i]+QD[j]-2.0*y[i]*Q_i[j];
00838 if (quad_coef > 0)
00839 obj_diff = -(grad_diff*grad_diff)/quad_coef;
00840 else
00841 obj_diff = -(grad_diff*grad_diff)/TAU;
00842
00843 if (obj_diff <= obj_diff_min)
00844 {
00845 Gmin_idx=j;
00846 obj_diff_min = obj_diff;
00847 }
00848 }
00849 }
00850 }
00851 else
00852 {
00853 if (!is_upper_bound(j))
00854 {
00855 double grad_diff= Gmax-G[j];
00856 if (-G[j] >= Gmax2)
00857 Gmax2 = -G[j];
00858 if (grad_diff > 0)
00859 {
00860 double obj_diff;
00861 double quad_coef = QD[i]+QD[j]+2.0*y[i]*Q_i[j];
00862 if (quad_coef > 0)
00863 obj_diff = -(grad_diff*grad_diff)/quad_coef;
00864 else
00865 obj_diff = -(grad_diff*grad_diff)/TAU;
00866
00867 if (obj_diff <= obj_diff_min)
00868 {
00869 Gmin_idx=j;
00870 obj_diff_min = obj_diff;
00871 }
00872 }
00873 }
00874 }
00875 }
00876
00877 if(Gmax+Gmax2 < eps || Gmin_idx == -1)
00878 return 1;
00879
00880 out_i = Gmax_idx;
00881 out_j = Gmin_idx;
00882 return 0;
00883 }
00884
00885 bool Solver::be_shrunk(int i, double Gmax1, double Gmax2)
00886 {
00887 if(is_upper_bound(i))
00888 {
00889 if(y[i]==+1)
00890 return(-G[i] > Gmax1);
00891 else
00892 return(-G[i] > Gmax2);
00893 }
00894 else if(is_lower_bound(i))
00895 {
00896 if(y[i]==+1)
00897 return(G[i] > Gmax2);
00898 else
00899 return(G[i] > Gmax1);
00900 }
00901 else
00902 return(false);
00903 }
00904
00905 void Solver::do_shrinking()
00906 {
00907 int i;
00908 double Gmax1 = -INF;
00909 double Gmax2 = -INF;
00910
00911
00912 for(i=0;i<active_size;i++)
00913 {
00914 if(y[i]==+1)
00915 {
00916 if(!is_upper_bound(i))
00917 {
00918 if(-G[i] >= Gmax1)
00919 Gmax1 = -G[i];
00920 }
00921 if(!is_lower_bound(i))
00922 {
00923 if(G[i] >= Gmax2)
00924 Gmax2 = G[i];
00925 }
00926 }
00927 else
00928 {
00929 if(!is_upper_bound(i))
00930 {
00931 if(-G[i] >= Gmax2)
00932 Gmax2 = -G[i];
00933 }
00934 if(!is_lower_bound(i))
00935 {
00936 if(G[i] >= Gmax1)
00937 Gmax1 = G[i];
00938 }
00939 }
00940 }
00941
00942 if(unshrink == false && Gmax1 + Gmax2 <= eps*10)
00943 {
00944 unshrink = true;
00945 reconstruct_gradient();
00946 active_size = l;
00947 info("*");
00948 }
00949
00950 for(i=0;i<active_size;i++)
00951 if (be_shrunk(i, Gmax1, Gmax2))
00952 {
00953 active_size--;
00954 while (active_size > i)
00955 {
00956 if (!be_shrunk(active_size, Gmax1, Gmax2))
00957 {
00958 swap_index(i,active_size);
00959 break;
00960 }
00961 active_size--;
00962 }
00963 }
00964 }
00965
00966 double Solver::calculate_rho()
00967 {
00968 double r;
00969 int nr_free = 0;
00970 double ub = INF, lb = -INF, sum_free = 0;
00971 for(int i=0;i<active_size;i++)
00972 {
00973 double yG = y[i]*G[i];
00974
00975 if(is_upper_bound(i))
00976 {
00977 if(y[i]==-1)
00978 ub = min(ub,yG);
00979 else
00980 lb = max(lb,yG);
00981 }
00982 else if(is_lower_bound(i))
00983 {
00984 if(y[i]==+1)
00985 ub = min(ub,yG);
00986 else
00987 lb = max(lb,yG);
00988 }
00989 else
00990 {
00991 ++nr_free;
00992 sum_free += yG;
00993 }
00994 }
00995
00996 if(nr_free>0)
00997 r = sum_free/nr_free;
00998 else
00999 r = (ub+lb)/2;
01000
01001 return r;
01002 }
01003
01004
01005
01006
01007
01008
01009 class Solver_NU: public Solver
01010 {
01011 public:
01012 Solver_NU() {}
01013 void Solve(int l, const QMatrix& Q, const double *p, const schar *y,
01014 double *alpha, double Cp, double Cn, double eps,
01015 SolutionInfo* si, int shrinking)
01016 {
01017 this->si = si;
01018 Solver::Solve(l,Q,p,y,alpha,Cp,Cn,eps,si,shrinking);
01019 }
01020 private:
01021 SolutionInfo *si;
01022 int select_working_set(int &i, int &j);
01023 double calculate_rho();
01024 bool be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4);
01025 void do_shrinking();
01026 };
01027
01028
01029 int Solver_NU::select_working_set(int &out_i, int &out_j)
01030 {
01031
01032
01033
01034
01035
01036
01037 double Gmaxp = -INF;
01038 double Gmaxp2 = -INF;
01039 int Gmaxp_idx = -1;
01040
01041 double Gmaxn = -INF;
01042 double Gmaxn2 = -INF;
01043 int Gmaxn_idx = -1;
01044
01045 int Gmin_idx = -1;
01046 double obj_diff_min = INF;
01047
01048 for(int t=0;t<active_size;t++)
01049 if(y[t]==+1)
01050 {
01051 if(!is_upper_bound(t))
01052 if(-G[t] >= Gmaxp)
01053 {
01054 Gmaxp = -G[t];
01055 Gmaxp_idx = t;
01056 }
01057 }
01058 else
01059 {
01060 if(!is_lower_bound(t))
01061 if(G[t] >= Gmaxn)
01062 {
01063 Gmaxn = G[t];
01064 Gmaxn_idx = t;
01065 }
01066 }
01067
01068 int ip = Gmaxp_idx;
01069 int in = Gmaxn_idx;
01070 const Qfloat *Q_ip = NULL;
01071 const Qfloat *Q_in = NULL;
01072 if(ip != -1)
01073 Q_ip = Q->get_Q(ip,active_size);
01074 if(in != -1)
01075 Q_in = Q->get_Q(in,active_size);
01076
01077 for(int j=0;j<active_size;j++)
01078 {
01079 if(y[j]==+1)
01080 {
01081 if (!is_lower_bound(j))
01082 {
01083 double grad_diff=Gmaxp+G[j];
01084 if (G[j] >= Gmaxp2)
01085 Gmaxp2 = G[j];
01086 if (grad_diff > 0)
01087 {
01088 double obj_diff;
01089 double quad_coef = QD[ip]+QD[j]-2*Q_ip[j];
01090 if (quad_coef > 0)
01091 obj_diff = -(grad_diff*grad_diff)/quad_coef;
01092 else
01093 obj_diff = -(grad_diff*grad_diff)/TAU;
01094
01095 if (obj_diff <= obj_diff_min)
01096 {
01097 Gmin_idx=j;
01098 obj_diff_min = obj_diff;
01099 }
01100 }
01101 }
01102 }
01103 else
01104 {
01105 if (!is_upper_bound(j))
01106 {
01107 double grad_diff=Gmaxn-G[j];
01108 if (-G[j] >= Gmaxn2)
01109 Gmaxn2 = -G[j];
01110 if (grad_diff > 0)
01111 {
01112 double obj_diff;
01113 double quad_coef = QD[in]+QD[j]-2*Q_in[j];
01114 if (quad_coef > 0)
01115 obj_diff = -(grad_diff*grad_diff)/quad_coef;
01116 else
01117 obj_diff = -(grad_diff*grad_diff)/TAU;
01118
01119 if (obj_diff <= obj_diff_min)
01120 {
01121 Gmin_idx=j;
01122 obj_diff_min = obj_diff;
01123 }
01124 }
01125 }
01126 }
01127 }
01128
01129 if(max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2) < eps || Gmin_idx == -1)
01130 return 1;
01131
01132 if (y[Gmin_idx] == +1)
01133 out_i = Gmaxp_idx;
01134 else
01135 out_i = Gmaxn_idx;
01136 out_j = Gmin_idx;
01137
01138 return 0;
01139 }
01140
01141 bool Solver_NU::be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4)
01142 {
01143 if(is_upper_bound(i))
01144 {
01145 if(y[i]==+1)
01146 return(-G[i] > Gmax1);
01147 else
01148 return(-G[i] > Gmax4);
01149 }
01150 else if(is_lower_bound(i))
01151 {
01152 if(y[i]==+1)
01153 return(G[i] > Gmax2);
01154 else
01155 return(G[i] > Gmax3);
01156 }
01157 else
01158 return(false);
01159 }
01160
01161 void Solver_NU::do_shrinking()
01162 {
01163 double Gmax1 = -INF;
01164 double Gmax2 = -INF;
01165 double Gmax3 = -INF;
01166 double Gmax4 = -INF;
01167
01168
01169 int i;
01170 for(i=0;i<active_size;i++)
01171 {
01172 if(!is_upper_bound(i))
01173 {
01174 if(y[i]==+1)
01175 {
01176 if(-G[i] > Gmax1) Gmax1 = -G[i];
01177 }
01178 else if(-G[i] > Gmax4) Gmax4 = -G[i];
01179 }
01180 if(!is_lower_bound(i))
01181 {
01182 if(y[i]==+1)
01183 {
01184 if(G[i] > Gmax2) Gmax2 = G[i];
01185 }
01186 else if(G[i] > Gmax3) Gmax3 = G[i];
01187 }
01188 }
01189
01190 if(unshrink == false && max(Gmax1+Gmax2,Gmax3+Gmax4) <= eps*10)
01191 {
01192 unshrink = true;
01193 reconstruct_gradient();
01194 active_size = l;
01195 }
01196
01197 for(i=0;i<active_size;i++)
01198 if (be_shrunk(i, Gmax1, Gmax2, Gmax3, Gmax4))
01199 {
01200 active_size--;
01201 while (active_size > i)
01202 {
01203 if (!be_shrunk(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
01204 {
01205 swap_index(i,active_size);
01206 break;
01207 }
01208 active_size--;
01209 }
01210 }
01211 }
01212
01213 double Solver_NU::calculate_rho()
01214 {
01215 int nr_free1 = 0,nr_free2 = 0;
01216 double ub1 = INF, ub2 = INF;
01217 double lb1 = -INF, lb2 = -INF;
01218 double sum_free1 = 0, sum_free2 = 0;
01219
01220 for(int i=0;i<active_size;i++)
01221 {
01222 if(y[i]==+1)
01223 {
01224 if(is_upper_bound(i))
01225 lb1 = max(lb1,G[i]);
01226 else if(is_lower_bound(i))
01227 ub1 = min(ub1,G[i]);
01228 else
01229 {
01230 ++nr_free1;
01231 sum_free1 += G[i];
01232 }
01233 }
01234 else
01235 {
01236 if(is_upper_bound(i))
01237 lb2 = max(lb2,G[i]);
01238 else if(is_lower_bound(i))
01239 ub2 = min(ub2,G[i]);
01240 else
01241 {
01242 ++nr_free2;
01243 sum_free2 += G[i];
01244 }
01245 }
01246 }
01247
01248 double r1,r2;
01249 if(nr_free1 > 0)
01250 r1 = sum_free1/nr_free1;
01251 else
01252 r1 = (ub1+lb1)/2;
01253
01254 if(nr_free2 > 0)
01255 r2 = sum_free2/nr_free2;
01256 else
01257 r2 = (ub2+lb2)/2;
01258
01259 si->r = (r1+r2)/2;
01260 return (r1-r2)/2;
01261 }
01262
01263
01264
01265
01266 class SVC_Q: public Kernel
01267 {
01268 public:
01269 SVC_Q(const svm_problem& prob, const svm_parameter& param, const schar *y_)
01270 :Kernel(prob.l, prob.x, param)
01271 {
01272 clone(y,y_,prob.l);
01273 cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
01274 QD = new double[prob.l];
01275 for(int i=0;i<prob.l;i++)
01276 QD[i] = (this->*kernel_function)(i,i);
01277 }
01278
01279 Qfloat *get_Q(int i, int len) const
01280 {
01281 Qfloat *data;
01282 int start, j;
01283 if((start = cache->get_data(i,&data,len)) < len)
01284 {
01285 for(j=start;j<len;j++)
01286 data[j] = (Qfloat)(y[i]*y[j]*(this->*kernel_function)(i,j));
01287 }
01288 return data;
01289 }
01290
01291 double *get_QD() const
01292 {
01293 return QD;
01294 }
01295
01296 void swap_index(int i, int j) const
01297 {
01298 cache->swap_index(i,j);
01299 Kernel::swap_index(i,j);
01300 swap(y[i],y[j]);
01301 swap(QD[i],QD[j]);
01302 }
01303
01304 ~SVC_Q()
01305 {
01306 delete[] y;
01307 delete cache;
01308 delete[] QD;
01309 }
01310 private:
01311 schar *y;
01312 Cache *cache;
01313 double *QD;
01314 };
01315
01316 class ONE_CLASS_Q: public Kernel
01317 {
01318 public:
01319 ONE_CLASS_Q(const svm_problem& prob, const svm_parameter& param)
01320 :Kernel(prob.l, prob.x, param)
01321 {
01322 cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
01323 QD = new double[prob.l];
01324 for(int i=0;i<prob.l;i++)
01325 QD[i] = (this->*kernel_function)(i,i);
01326 }
01327
01328 Qfloat *get_Q(int i, int len) const
01329 {
01330 Qfloat *data;
01331 int start, j;
01332 if((start = cache->get_data(i,&data,len)) < len)
01333 {
01334 for(j=start;j<len;j++)
01335 data[j] = (Qfloat)(this->*kernel_function)(i,j);
01336 }
01337 return data;
01338 }
01339
01340 double *get_QD() const
01341 {
01342 return QD;
01343 }
01344
01345 void swap_index(int i, int j) const
01346 {
01347 cache->swap_index(i,j);
01348 Kernel::swap_index(i,j);
01349 swap(QD[i],QD[j]);
01350 }
01351
01352 ~ONE_CLASS_Q()
01353 {
01354 delete cache;
01355 delete[] QD;
01356 }
01357 private:
01358 Cache *cache;
01359 double *QD;
01360 };
01361
01362 class SVR_Q: public Kernel
01363 {
01364 public:
01365 SVR_Q(const svm_problem& prob, const svm_parameter& param)
01366 :Kernel(prob.l, prob.x, param)
01367 {
01368 l = prob.l;
01369 cache = new Cache(l,(long int)(param.cache_size*(1<<20)));
01370 QD = new double[2*l];
01371 sign = new schar[2*l];
01372 index = new int[2*l];
01373 for(int k=0;k<l;k++)
01374 {
01375 sign[k] = 1;
01376 sign[k+l] = -1;
01377 index[k] = k;
01378 index[k+l] = k;
01379 QD[k] = (this->*kernel_function)(k,k);
01380 QD[k+l] = QD[k];
01381 }
01382 buffer[0] = new Qfloat[2*l];
01383 buffer[1] = new Qfloat[2*l];
01384 next_buffer = 0;
01385 }
01386
01387 void swap_index(int i, int j) const
01388 {
01389 swap(sign[i],sign[j]);
01390 swap(index[i],index[j]);
01391 swap(QD[i],QD[j]);
01392 }
01393
01394 Qfloat *get_Q(int i, int len) const
01395 {
01396 Qfloat *data;
01397 int j, real_i = index[i];
01398 if(cache->get_data(real_i,&data,l) < l)
01399 {
01400 for(j=0;j<l;j++)
01401 data[j] = (Qfloat)(this->*kernel_function)(real_i,j);
01402 }
01403
01404
01405 Qfloat *buf = buffer[next_buffer];
01406 next_buffer = 1 - next_buffer;
01407 schar si = sign[i];
01408 for(j=0;j<len;j++)
01409 buf[j] = (Qfloat) si * (Qfloat) sign[j] * data[index[j]];
01410 return buf;
01411 }
01412
01413 double *get_QD() const
01414 {
01415 return QD;
01416 }
01417
01418 ~SVR_Q()
01419 {
01420 delete cache;
01421 delete[] sign;
01422 delete[] index;
01423 delete[] buffer[0];
01424 delete[] buffer[1];
01425 delete[] QD;
01426 }
01427 private:
01428 int l;
01429 Cache *cache;
01430 schar *sign;
01431 int *index;
01432 mutable int next_buffer;
01433 Qfloat *buffer[2];
01434 double *QD;
01435 };
01436
01437
01438
01439
01440 static void solve_c_svc(
01441 const svm_problem *prob, const svm_parameter* param,
01442 double *alpha, Solver::SolutionInfo* si, double Cp, double Cn)
01443 {
01444 int l = prob->l;
01445 double *minus_ones = new double[l];
01446 schar *y = new schar[l];
01447
01448 int i;
01449
01450 for(i=0;i<l;i++)
01451 {
01452 alpha[i] = 0;
01453 minus_ones[i] = -1;
01454 if(prob->y[i] > 0) y[i] = +1; else y[i] = -1;
01455 }
01456
01457 Solver s;
01458 s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y,
01459 alpha, Cp, Cn, param->eps, si, param->shrinking);
01460
01461 double sum_alpha=0;
01462 for(i=0;i<l;i++)
01463 sum_alpha += alpha[i];
01464
01465 if (Cp==Cn)
01466 info("nu = %f\n", sum_alpha/(Cp*prob->l));
01467
01468 for(i=0;i<l;i++)
01469 alpha[i] *= y[i];
01470
01471 delete[] minus_ones;
01472 delete[] y;
01473 }
01474
01475 static void solve_nu_svc(
01476 const svm_problem *prob, const svm_parameter *param,
01477 double *alpha, Solver::SolutionInfo* si)
01478 {
01479 int i;
01480 int l = prob->l;
01481 double nu = param->nu;
01482
01483 schar *y = new schar[l];
01484
01485 for(i=0;i<l;i++)
01486 if(prob->y[i]>0)
01487 y[i] = +1;
01488 else
01489 y[i] = -1;
01490
01491 double sum_pos = nu*l/2;
01492 double sum_neg = nu*l/2;
01493
01494 for(i=0;i<l;i++)
01495 if(y[i] == +1)
01496 {
01497 alpha[i] = min(1.0,sum_pos);
01498 sum_pos -= alpha[i];
01499 }
01500 else
01501 {
01502 alpha[i] = min(1.0,sum_neg);
01503 sum_neg -= alpha[i];
01504 }
01505
01506 double *zeros = new double[l];
01507
01508 for(i=0;i<l;i++)
01509 zeros[i] = 0;
01510
01511 Solver_NU s;
01512 s.Solve(l, SVC_Q(*prob,*param,y), zeros, y,
01513 alpha, 1.0, 1.0, param->eps, si, param->shrinking);
01514 double r = si->r;
01515
01516 info("C = %f\n",1/r);
01517
01518 for(i=0;i<l;i++)
01519 alpha[i] *= y[i]/r;
01520
01521 si->rho /= r;
01522 si->obj /= (r*r);
01523 si->upper_bound_p = 1/r;
01524 si->upper_bound_n = 1/r;
01525
01526 delete[] y;
01527 delete[] zeros;
01528 }
01529
01530 static void solve_one_class(
01531 const svm_problem *prob, const svm_parameter *param,
01532 double *alpha, Solver::SolutionInfo* si)
01533 {
01534 int l = prob->l;
01535 double *zeros = new double[l];
01536 schar *ones = new schar[l];
01537 int i;
01538
01539 int n = (int)(param->nu*prob->l);
01540
01541 for(i=0;i<n;i++)
01542 alpha[i] = 1;
01543 if(n<prob->l)
01544 alpha[n] = param->nu * prob->l - n;
01545 for(i=n+1;i<l;i++)
01546 alpha[i] = 0;
01547
01548 for(i=0;i<l;i++)
01549 {
01550 zeros[i] = 0;
01551 ones[i] = 1;
01552 }
01553
01554 Solver s;
01555 s.Solve(l, ONE_CLASS_Q(*prob,*param), zeros, ones,
01556 alpha, 1.0, 1.0, param->eps, si, param->shrinking);
01557
01558 delete[] zeros;
01559 delete[] ones;
01560 }
01561
01562 static void solve_epsilon_svr(
01563 const svm_problem *prob, const svm_parameter *param,
01564 double *alpha, Solver::SolutionInfo* si)
01565 {
01566 int l = prob->l;
01567 double *alpha2 = new double[2*l];
01568 double *linear_term = new double[2*l];
01569 schar *y = new schar[2*l];
01570 int i;
01571
01572 for(i=0;i<l;i++)
01573 {
01574 alpha2[i] = 0;
01575 linear_term[i] = param->p - prob->y[i];
01576 y[i] = 1;
01577
01578 alpha2[i+l] = 0;
01579 linear_term[i+l] = param->p + prob->y[i];
01580 y[i+l] = -1;
01581 }
01582
01583 Solver s;
01584 s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
01585 alpha2, param->C, param->C, param->eps, si, param->shrinking);
01586
01587 double sum_alpha = 0;
01588 for(i=0;i<l;i++)
01589 {
01590 alpha[i] = alpha2[i] - alpha2[i+l];
01591 sum_alpha += fabs(alpha[i]);
01592 }
01593 info("nu = %f\n",sum_alpha/(param->C*l));
01594
01595 delete[] alpha2;
01596 delete[] linear_term;
01597 delete[] y;
01598 }
01599
01600 static void solve_nu_svr(
01601 const svm_problem *prob, const svm_parameter *param,
01602 double *alpha, Solver::SolutionInfo* si)
01603 {
01604 int l = prob->l;
01605 double C = param->C;
01606 double *alpha2 = new double[2*l];
01607 double *linear_term = new double[2*l];
01608 schar *y = new schar[2*l];
01609 int i;
01610
01611 double sum = C * param->nu * l / 2;
01612 for(i=0;i<l;i++)
01613 {
01614 alpha2[i] = alpha2[i+l] = min(sum,C);
01615 sum -= alpha2[i];
01616
01617 linear_term[i] = - prob->y[i];
01618 y[i] = 1;
01619
01620 linear_term[i+l] = prob->y[i];
01621 y[i+l] = -1;
01622 }
01623
01624 Solver_NU s;
01625 s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
01626 alpha2, C, C, param->eps, si, param->shrinking);
01627
01628 info("epsilon = %f\n",-si->r);
01629
01630 for(i=0;i<l;i++)
01631 alpha[i] = alpha2[i] - alpha2[i+l];
01632
01633 delete[] alpha2;
01634 delete[] linear_term;
01635 delete[] y;
01636 }
01637
01638
01639
01640
01641 struct decision_function
01642 {
01643 double *alpha;
01644 double rho;
01645 };
01646
01647 static decision_function svm_train_one(
01648 const svm_problem *prob, const svm_parameter *param,
01649 double Cp, double Cn)
01650 {
01651 double *alpha = Malloc(double,prob->l);
01652 Solver::SolutionInfo si;
01653 switch(param->svm_type)
01654 {
01655 case C_SVC:
01656 solve_c_svc(prob,param,alpha,&si,Cp,Cn);
01657 break;
01658 case NU_SVC:
01659 solve_nu_svc(prob,param,alpha,&si);
01660 break;
01661 case ONE_CLASS:
01662 solve_one_class(prob,param,alpha,&si);
01663 break;
01664 case EPSILON_SVR:
01665 solve_epsilon_svr(prob,param,alpha,&si);
01666 break;
01667 case NU_SVR:
01668 solve_nu_svr(prob,param,alpha,&si);
01669 break;
01670 }
01671
01672 info("obj = %f, rho = %f\n",si.obj,si.rho);
01673
01674
01675
01676 int nSV = 0;
01677 int nBSV = 0;
01678 for(int i=0;i<prob->l;i++)
01679 {
01680 if(fabs(alpha[i]) > 0)
01681 {
01682 ++nSV;
01683 if(prob->y[i] > 0)
01684 {
01685 if(fabs(alpha[i]) >= si.upper_bound_p)
01686 ++nBSV;
01687 }
01688 else
01689 {
01690 if(fabs(alpha[i]) >= si.upper_bound_n)
01691 ++nBSV;
01692 }
01693 }
01694 }
01695
01696 info("nSV = %d, nBSV = %d\n",nSV,nBSV);
01697
01698 decision_function f;
01699 f.alpha = alpha;
01700 f.rho = si.rho;
01701 return f;
01702 }
01703
01704
01705 static void sigmoid_train(
01706 int l, const double *dec_values, const double *labels,
01707 double& A, double& B)
01708 {
01709 double prior1=0, prior0 = 0;
01710 int i;
01711
01712 for (i=0;i<l;i++)
01713 if (labels[i] > 0) prior1+=1;
01714 else prior0+=1;
01715
01716 int max_iter=100;
01717 double min_step=1e-10;
01718 double sigma=1e-12;
01719 double eps=1e-5;
01720 double hiTarget=(prior1+1.0)/(prior1+2.0);
01721 double loTarget=1/(prior0+2.0);
01722 double *t=Malloc(double,l);
01723 double fApB,p,q,h11,h22,h21,g1,g2,det,dA,dB,gd,stepsize;
01724 double newA,newB,newf,d1,d2;
01725 int iter;
01726
01727
01728 A=0.0; B=log((prior0+1.0)/(prior1+1.0));
01729 double fval = 0.0;
01730
01731 for (i=0;i<l;i++)
01732 {
01733 if (labels[i]>0) t[i]=hiTarget;
01734 else t[i]=loTarget;
01735 fApB = dec_values[i]*A+B;
01736 if (fApB>=0)
01737 fval += t[i]*fApB + log(1+exp(-fApB));
01738 else
01739 fval += (t[i] - 1)*fApB +log(1+exp(fApB));
01740 }
01741 for (iter=0;iter<max_iter;iter++)
01742 {
01743
01744 h11=sigma;
01745 h22=sigma;
01746 h21=0.0;g1=0.0;g2=0.0;
01747 for (i=0;i<l;i++)
01748 {
01749 fApB = dec_values[i]*A+B;
01750 if (fApB >= 0)
01751 {
01752 p=exp(-fApB)/(1.0+exp(-fApB));
01753 q=1.0/(1.0+exp(-fApB));
01754 }
01755 else
01756 {
01757 p=1.0/(1.0+exp(fApB));
01758 q=exp(fApB)/(1.0+exp(fApB));
01759 }
01760 d2=p*q;
01761 h11+=dec_values[i]*dec_values[i]*d2;
01762 h22+=d2;
01763 h21+=dec_values[i]*d2;
01764 d1=t[i]-p;
01765 g1+=dec_values[i]*d1;
01766 g2+=d1;
01767 }
01768
01769
01770 if (fabs(g1)<eps && fabs(g2)<eps)
01771 break;
01772
01773
01774 det=h11*h22-h21*h21;
01775 dA=-(h22*g1 - h21 * g2) / det;
01776 dB=-(-h21*g1+ h11 * g2) / det;
01777 gd=g1*dA+g2*dB;
01778
01779
01780 stepsize = 1;
01781 while (stepsize >= min_step)
01782 {
01783 newA = A + stepsize * dA;
01784 newB = B + stepsize * dB;
01785
01786
01787 newf = 0.0;
01788 for (i=0;i<l;i++)
01789 {
01790 fApB = dec_values[i]*newA+newB;
01791 if (fApB >= 0)
01792 newf += t[i]*fApB + log(1+exp(-fApB));
01793 else
01794 newf += (t[i] - 1)*fApB +log(1+exp(fApB));
01795 }
01796
01797 if (newf<fval+0.0001*stepsize*gd)
01798 {
01799 A=newA;B=newB;fval=newf;
01800 break;
01801 }
01802 else
01803 stepsize = stepsize / 2.0;
01804 }
01805
01806 if (stepsize < min_step)
01807 {
01808 info("Line search fails in two-class probability estimates\n");
01809 break;
01810 }
01811 }
01812
01813 if (iter>=max_iter)
01814 info("Reaching maximal iterations in two-class probability estimates\n");
01815 free(t);
01816 }
01817
01818 static double sigmoid_predict(double decision_value, double A, double B)
01819 {
01820 double fApB = decision_value*A+B;
01821
01822 if (fApB >= 0)
01823 return exp(-fApB)/(1.0+exp(-fApB));
01824 else
01825 return 1.0/(1+exp(fApB)) ;
01826 }
01827
01828
01829 static void multiclass_probability(int k, double **r, double *p)
01830 {
01831 int t,j;
01832 int iter = 0, max_iter=max(100,k);
01833 double **Q=Malloc(double *,k);
01834 double *Qp=Malloc(double,k);
01835 double pQp, eps=0.005/k;
01836
01837 for (t=0;t<k;t++)
01838 {
01839 p[t]=1.0/k;
01840 Q[t]=Malloc(double,k);
01841 Q[t][t]=0;
01842 for (j=0;j<t;j++)
01843 {
01844 Q[t][t]+=r[j][t]*r[j][t];
01845 Q[t][j]=Q[j][t];
01846 }
01847 for (j=t+1;j<k;j++)
01848 {
01849 Q[t][t]+=r[j][t]*r[j][t];
01850 Q[t][j]=-r[j][t]*r[t][j];
01851 }
01852 }
01853 for (iter=0;iter<max_iter;iter++)
01854 {
01855
01856 pQp=0;
01857 for (t=0;t<k;t++)
01858 {
01859 Qp[t]=0;
01860 for (j=0;j<k;j++)
01861 Qp[t]+=Q[t][j]*p[j];
01862 pQp+=p[t]*Qp[t];
01863 }
01864 double max_error=0;
01865 for (t=0;t<k;t++)
01866 {
01867 double error=fabs(Qp[t]-pQp);
01868 if (error>max_error)
01869 max_error=error;
01870 }
01871 if (max_error<eps) break;
01872
01873 for (t=0;t<k;t++)
01874 {
01875 double diff=(-Qp[t]+pQp)/Q[t][t];
01876 p[t]+=diff;
01877 pQp=(pQp+diff*(diff*Q[t][t]+2*Qp[t]))/(1+diff)/(1+diff);
01878 for (j=0;j<k;j++)
01879 {
01880 Qp[j]=(Qp[j]+diff*Q[t][j])/(1+diff);
01881 p[j]/=(1+diff);
01882 }
01883 }
01884 }
01885 if (iter>=max_iter)
01886 info("Exceeds max_iter in multiclass_prob\n");
01887 for(t=0;t<k;t++) free(Q[t]);
01888 free(Q);
01889 free(Qp);
01890 }
01891
01892
01893 static void svm_binary_svc_probability(
01894 const svm_problem *prob, const svm_parameter *param,
01895 double Cp, double Cn, double& probA, double& probB)
01896 {
01897 int i;
01898 int nr_fold = 5;
01899 int *perm = Malloc(int,prob->l);
01900 double *dec_values = Malloc(double,prob->l);
01901
01902
01903 for(i=0;i<prob->l;i++) perm[i]=i;
01904 for(i=0;i<prob->l;i++)
01905 {
01906 int j = i+rand()%(prob->l-i);
01907 swap(perm[i],perm[j]);
01908 }
01909 for(i=0;i<nr_fold;i++)
01910 {
01911 int begin = i*prob->l/nr_fold;
01912 int end = (i+1)*prob->l/nr_fold;
01913 int j,k;
01914 struct svm_problem subprob;
01915
01916 subprob.l = prob->l-(end-begin);
01917 subprob.x = Malloc(struct svm_node*,subprob.l);
01918 subprob.y = Malloc(double,subprob.l);
01919
01920 k=0;
01921 for(j=0;j<begin;j++)
01922 {
01923 subprob.x[k] = prob->x[perm[j]];
01924 subprob.y[k] = prob->y[perm[j]];
01925 ++k;
01926 }
01927 for(j=end;j<prob->l;j++)
01928 {
01929 subprob.x[k] = prob->x[perm[j]];
01930 subprob.y[k] = prob->y[perm[j]];
01931 ++k;
01932 }
01933 int p_count=0,n_count=0;
01934 for(j=0;j<k;j++)
01935 if(subprob.y[j]>0)
01936 p_count++;
01937 else
01938 n_count++;
01939
01940 if(p_count==0 && n_count==0)
01941 for(j=begin;j<end;j++)
01942 dec_values[perm[j]] = 0;
01943 else if(p_count > 0 && n_count == 0)
01944 for(j=begin;j<end;j++)
01945 dec_values[perm[j]] = 1;
01946 else if(p_count == 0 && n_count > 0)
01947 for(j=begin;j<end;j++)
01948 dec_values[perm[j]] = -1;
01949 else
01950 {
01951 svm_parameter subparam = *param;
01952 subparam.probability=0;
01953 subparam.C=1.0;
01954 subparam.nr_weight=2;
01955 subparam.weight_label = Malloc(int,2);
01956 subparam.weight = Malloc(double,2);
01957 subparam.weight_label[0]=+1;
01958 subparam.weight_label[1]=-1;
01959 subparam.weight[0]=Cp;
01960 subparam.weight[1]=Cn;
01961 struct svm_model *submodel = svm_train(&subprob,&subparam);
01962 for(j=begin;j<end;j++)
01963 {
01964 svm_predict_values(submodel,prob->x[perm[j]],&(dec_values[perm[j]]));
01965
01966 dec_values[perm[j]] *= submodel->label[0];
01967 }
01968 svm_free_and_destroy_model(&submodel);
01969 svm_destroy_param(&subparam);
01970 }
01971 free(subprob.x);
01972 free(subprob.y);
01973 }
01974 sigmoid_train(prob->l,dec_values,prob->y,probA,probB);
01975 free(dec_values);
01976 free(perm);
01977 }
01978
01979
01980 static double svm_svr_probability(
01981 const svm_problem *prob, const svm_parameter *param)
01982 {
01983 int i;
01984 int nr_fold = 5;
01985 double *ymv = Malloc(double,prob->l);
01986 double mae = 0;
01987
01988 svm_parameter newparam = *param;
01989 newparam.probability = 0;
01990 svm_cross_validation(prob,&newparam,nr_fold,ymv);
01991 for(i=0;i<prob->l;i++)
01992 {
01993 ymv[i]=prob->y[i]-ymv[i];
01994 mae += fabs(ymv[i]);
01995 }
01996 mae /= prob->l;
01997 double std=sqrt(2*mae*mae);
01998 int count=0;
01999 mae=0;
02000 for(i=0;i<prob->l;i++)
02001 if (fabs(ymv[i]) > 5*std)
02002 count=count+1;
02003 else
02004 mae+=fabs(ymv[i]);
02005 mae /= (prob->l-count);
02006 info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma= %g\n",mae);
02007 free(ymv);
02008 return mae;
02009 }
02010
02011
02012
02013
02014 static void svm_group_classes(const svm_problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
02015 {
02016 int l = prob->l;
02017 int max_nr_class = 16;
02018 int nr_class = 0;
02019 int *label = Malloc(int,max_nr_class);
02020 int *count = Malloc(int,max_nr_class);
02021 int *data_label = Malloc(int,l);
02022 int i;
02023
02024 for(i=0;i<l;i++)
02025 {
02026 int this_label = (int)prob->y[i];
02027 int j;
02028 for(j=0;j<nr_class;j++)
02029 {
02030 if(this_label == label[j])
02031 {
02032 ++count[j];
02033 break;
02034 }
02035 }
02036 data_label[i] = j;
02037 if(j == nr_class)
02038 {
02039 if(nr_class == max_nr_class)
02040 {
02041 max_nr_class *= 2;
02042 label = (int *)realloc(label,max_nr_class*sizeof(int));
02043 count = (int *)realloc(count,max_nr_class*sizeof(int));
02044 }
02045 label[nr_class] = this_label;
02046 count[nr_class] = 1;
02047 ++nr_class;
02048 }
02049 }
02050
02051
02052
02053
02054
02055
02056 if (nr_class == 2 && label[0] == -1 && label[1] == 1)
02057 {
02058 swap(label[0],label[1]);
02059 swap(count[0],count[1]);
02060 for(i=0;i<l;i++)
02061 {
02062 if(data_label[i] == 0)
02063 data_label[i] = 1;
02064 else
02065 data_label[i] = 0;
02066 }
02067 }
02068
02069 int *start = Malloc(int,nr_class);
02070 start[0] = 0;
02071 for(i=1;i<nr_class;i++)
02072 start[i] = start[i-1]+count[i-1];
02073 for(i=0;i<l;i++)
02074 {
02075 perm[start[data_label[i]]] = i;
02076 ++start[data_label[i]];
02077 }
02078 start[0] = 0;
02079 for(i=1;i<nr_class;i++)
02080 start[i] = start[i-1]+count[i-1];
02081
02082 *nr_class_ret = nr_class;
02083 *label_ret = label;
02084 *start_ret = start;
02085 *count_ret = count;
02086 free(data_label);
02087 }
02088
02089
02090
02091
02092 svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
02093 {
02094 svm_model *model = Malloc(svm_model,1);
02095 model->param = *param;
02096 model->free_sv = 0;
02097
02098 if(param->svm_type == ONE_CLASS ||
02099 param->svm_type == EPSILON_SVR ||
02100 param->svm_type == NU_SVR)
02101 {
02102
02103 model->nr_class = 2;
02104 model->label = NULL;
02105 model->nSV = NULL;
02106 model->probA = NULL; model->probB = NULL;
02107 model->sv_coef = Malloc(double *,1);
02108
02109 if(param->probability &&
02110 (param->svm_type == EPSILON_SVR ||
02111 param->svm_type == NU_SVR))
02112 {
02113 model->probA = Malloc(double,1);
02114 model->probA[0] = svm_svr_probability(prob,param);
02115 }
02116
02117 decision_function f = svm_train_one(prob,param,0,0);
02118 model->rho = Malloc(double,1);
02119 model->rho[0] = f.rho;
02120
02121 int nSV = 0;
02122 int i;
02123 for(i=0;i<prob->l;i++)
02124 if(fabs(f.alpha[i]) > 0) ++nSV;
02125 model->l = nSV;
02126 model->SV = Malloc(svm_node *,nSV);
02127 model->sv_coef[0] = Malloc(double,nSV);
02128 model->sv_indices = Malloc(int,nSV);
02129 int j = 0;
02130 for(i=0;i<prob->l;i++)
02131 if(fabs(f.alpha[i]) > 0)
02132 {
02133 model->SV[j] = prob->x[i];
02134 model->sv_coef[0][j] = f.alpha[i];
02135 model->sv_indices[j] = i+1;
02136 ++j;
02137 }
02138
02139 free(f.alpha);
02140 }
02141 else
02142 {
02143
02144 int l = prob->l;
02145 int nr_class;
02146 int *label = NULL;
02147 int *start = NULL;
02148 int *count = NULL;
02149 int *perm = Malloc(int,l);
02150
02151
02152 svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
02153 if(nr_class == 1)
02154 info("WARNING: training data in only one class. See README for details.\n");
02155
02156 svm_node **x = Malloc(svm_node *,l);
02157 int i;
02158 for(i=0;i<l;i++)
02159 x[i] = prob->x[perm[i]];
02160
02161
02162
02163 double *weighted_C = Malloc(double, nr_class);
02164 for(i=0;i<nr_class;i++)
02165 weighted_C[i] = param->C;
02166 for(i=0;i<param->nr_weight;i++)
02167 {
02168 int j;
02169 for(j=0;j<nr_class;j++)
02170 if(param->weight_label[i] == label[j])
02171 break;
02172 if(j == nr_class)
02173 fprintf(stderr,"WARNING: class label %d specified in weight is not found\n", param->weight_label[i]);
02174 else
02175 weighted_C[j] *= param->weight[i];
02176 }
02177
02178
02179
02180 bool *nonzero = Malloc(bool,l);
02181 for(i=0;i<l;i++)
02182 nonzero[i] = false;
02183 decision_function *f = Malloc(decision_function,nr_class*(nr_class-1)/2);
02184
02185 double *probA=NULL,*probB=NULL;
02186 if (param->probability)
02187 {
02188 probA=Malloc(double,nr_class*(nr_class-1)/2);
02189 probB=Malloc(double,nr_class*(nr_class-1)/2);
02190 }
02191
02192 int p = 0;
02193 for(i=0;i<nr_class;i++)
02194 for(int j=i+1;j<nr_class;j++)
02195 {
02196 svm_problem sub_prob;
02197 int si = start[i], sj = start[j];
02198 int ci = count[i], cj = count[j];
02199 sub_prob.l = ci+cj;
02200 sub_prob.x = Malloc(svm_node *,sub_prob.l);
02201 sub_prob.y = Malloc(double,sub_prob.l);
02202 int k;
02203 for(k=0;k<ci;k++)
02204 {
02205 sub_prob.x[k] = x[si+k];
02206 sub_prob.y[k] = +1;
02207 }
02208 for(k=0;k<cj;k++)
02209 {
02210 sub_prob.x[ci+k] = x[sj+k];
02211 sub_prob.y[ci+k] = -1;
02212 }
02213
02214 if(param->probability)
02215 svm_binary_svc_probability(&sub_prob,param,weighted_C[i],weighted_C[j],probA[p],probB[p]);
02216
02217 f[p] = svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j]);
02218 for(k=0;k<ci;k++)
02219 if(!nonzero[si+k] && fabs(f[p].alpha[k]) > 0)
02220 nonzero[si+k] = true;
02221 for(k=0;k<cj;k++)
02222 if(!nonzero[sj+k] && fabs(f[p].alpha[ci+k]) > 0)
02223 nonzero[sj+k] = true;
02224 free(sub_prob.x);
02225 free(sub_prob.y);
02226 ++p;
02227 }
02228
02229
02230
02231 model->nr_class = nr_class;
02232
02233 model->label = Malloc(int,nr_class);
02234 for(i=0;i<nr_class;i++)
02235 model->label[i] = label[i];
02236
02237 model->rho = Malloc(double,nr_class*(nr_class-1)/2);
02238 for(i=0;i<nr_class*(nr_class-1)/2;i++)
02239 model->rho[i] = f[i].rho;
02240
02241 if(param->probability)
02242 {
02243 model->probA = Malloc(double,nr_class*(nr_class-1)/2);
02244 model->probB = Malloc(double,nr_class*(nr_class-1)/2);
02245 for(i=0;i<nr_class*(nr_class-1)/2;i++)
02246 {
02247 model->probA[i] = probA[i];
02248 model->probB[i] = probB[i];
02249 }
02250 }
02251 else
02252 {
02253 model->probA=NULL;
02254 model->probB=NULL;
02255 }
02256
02257 int total_sv = 0;
02258 int *nz_count = Malloc(int,nr_class);
02259 model->nSV = Malloc(int,nr_class);
02260 for(i=0;i<nr_class;i++)
02261 {
02262 int nSV = 0;
02263 for(int j=0;j<count[i];j++)
02264 if(nonzero[start[i]+j])
02265 {
02266 ++nSV;
02267 ++total_sv;
02268 }
02269 model->nSV[i] = nSV;
02270 nz_count[i] = nSV;
02271 }
02272
02273 info("Total nSV = %d\n",total_sv);
02274
02275 model->l = total_sv;
02276 model->SV = Malloc(svm_node *,total_sv);
02277 model->sv_indices = Malloc(int,total_sv);
02278 p = 0;
02279 for(i=0;i<l;i++)
02280 if(nonzero[i])
02281 {
02282 model->SV[p] = x[i];
02283 model->sv_indices[p++] = perm[i] + 1;
02284 }
02285
02286 int *nz_start = Malloc(int,nr_class);
02287 nz_start[0] = 0;
02288 for(i=1;i<nr_class;i++)
02289 nz_start[i] = nz_start[i-1]+nz_count[i-1];
02290
02291 model->sv_coef = Malloc(double *,nr_class-1);
02292 for(i=0;i<nr_class-1;i++)
02293 model->sv_coef[i] = Malloc(double,total_sv);
02294
02295 p = 0;
02296 for(i=0;i<nr_class;i++)
02297 for(int j=i+1;j<nr_class;j++)
02298 {
02299
02300
02301
02302
02303 int si = start[i];
02304 int sj = start[j];
02305 int ci = count[i];
02306 int cj = count[j];
02307
02308 int q = nz_start[i];
02309 int k;
02310 for(k=0;k<ci;k++)
02311 if(nonzero[si+k])
02312 model->sv_coef[j-1][q++] = f[p].alpha[k];
02313 q = nz_start[j];
02314 for(k=0;k<cj;k++)
02315 if(nonzero[sj+k])
02316 model->sv_coef[i][q++] = f[p].alpha[ci+k];
02317 ++p;
02318 }
02319
02320 free(label);
02321 free(probA);
02322 free(probB);
02323 free(count);
02324 free(perm);
02325 free(start);
02326 free(x);
02327 free(weighted_C);
02328 free(nonzero);
02329 for(i=0;i<nr_class*(nr_class-1)/2;i++)
02330 free(f[i].alpha);
02331 free(f);
02332 free(nz_count);
02333 free(nz_start);
02334 }
02335 return model;
02336 }
02337
02338
02339 void svm_cross_validation(const svm_problem *prob, const svm_parameter *param, int nr_fold, double *target)
02340 {
02341 int i;
02342 int *fold_start;
02343 int l = prob->l;
02344 int *perm = Malloc(int,l);
02345 int nr_class;
02346 if (nr_fold > l)
02347 {
02348 nr_fold = l;
02349 fprintf(stderr,"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n");
02350 }
02351 fold_start = Malloc(int,nr_fold+1);
02352
02353
02354 if((param->svm_type == C_SVC ||
02355 param->svm_type == NU_SVC) && nr_fold < l)
02356 {
02357 int *start = NULL;
02358 int *label = NULL;
02359 int *count = NULL;
02360 svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
02361
02362
02363 int *fold_count = Malloc(int,nr_fold);
02364 int c;
02365 int *index = Malloc(int,l);
02366 for(i=0;i<l;i++)
02367 index[i]=perm[i];
02368 for (c=0; c<nr_class; c++)
02369 for(i=0;i<count[c];i++)
02370 {
02371 int j = i+rand()%(count[c]-i);
02372 swap(index[start[c]+j],index[start[c]+i]);
02373 }
02374 for(i=0;i<nr_fold;i++)
02375 {
02376 fold_count[i] = 0;
02377 for (c=0; c<nr_class;c++)
02378 fold_count[i]+=(i+1)*count[c]/nr_fold-i*count[c]/nr_fold;
02379 }
02380 fold_start[0]=0;
02381 for (i=1;i<=nr_fold;i++)
02382 fold_start[i] = fold_start[i-1]+fold_count[i-1];
02383 for (c=0; c<nr_class;c++)
02384 for(i=0;i<nr_fold;i++)
02385 {
02386 int begin = start[c]+i*count[c]/nr_fold;
02387 int end = start[c]+(i+1)*count[c]/nr_fold;
02388 for(int j=begin;j<end;j++)
02389 {
02390 perm[fold_start[i]] = index[j];
02391 fold_start[i]++;
02392 }
02393 }
02394 fold_start[0]=0;
02395 for (i=1;i<=nr_fold;i++)
02396 fold_start[i] = fold_start[i-1]+fold_count[i-1];
02397 free(start);
02398 free(label);
02399 free(count);
02400 free(index);
02401 free(fold_count);
02402 }
02403 else
02404 {
02405 for(i=0;i<l;i++) perm[i]=i;
02406 for(i=0;i<l;i++)
02407 {
02408 int j = i+rand()%(l-i);
02409 swap(perm[i],perm[j]);
02410 }
02411 for(i=0;i<=nr_fold;i++)
02412 fold_start[i]=i*l/nr_fold;
02413 }
02414
02415 for(i=0;i<nr_fold;i++)
02416 {
02417 int begin = fold_start[i];
02418 int end = fold_start[i+1];
02419 int j,k;
02420 struct svm_problem subprob;
02421
02422 subprob.l = l-(end-begin);
02423 subprob.x = Malloc(struct svm_node*,subprob.l);
02424 subprob.y = Malloc(double,subprob.l);
02425
02426 k=0;
02427 for(j=0;j<begin;j++)
02428 {
02429 subprob.x[k] = prob->x[perm[j]];
02430 subprob.y[k] = prob->y[perm[j]];
02431 ++k;
02432 }
02433 for(j=end;j<l;j++)
02434 {
02435 subprob.x[k] = prob->x[perm[j]];
02436 subprob.y[k] = prob->y[perm[j]];
02437 ++k;
02438 }
02439 struct svm_model *submodel = svm_train(&subprob,param);
02440 if(param->probability &&
02441 (param->svm_type == C_SVC || param->svm_type == NU_SVC))
02442 {
02443 double *prob_estimates=Malloc(double,svm_get_nr_class(submodel));
02444 for(j=begin;j<end;j++)
02445 target[perm[j]] = svm_predict_probability(submodel,prob->x[perm[j]],prob_estimates);
02446 free(prob_estimates);
02447 }
02448 else
02449 for(j=begin;j<end;j++)
02450 target[perm[j]] = svm_predict(submodel,prob->x[perm[j]]);
02451 svm_free_and_destroy_model(&submodel);
02452 free(subprob.x);
02453 free(subprob.y);
02454 }
02455 free(fold_start);
02456 free(perm);
02457 }
02458
02459
02460 int svm_get_svm_type(const svm_model *model)
02461 {
02462 return model->param.svm_type;
02463 }
02464
02465 int svm_get_nr_class(const svm_model *model)
02466 {
02467 return model->nr_class;
02468 }
02469
02470 void svm_get_labels(const svm_model *model, int* label)
02471 {
02472 if (model->label != NULL)
02473 for(int i=0;i<model->nr_class;i++)
02474 label[i] = model->label[i];
02475 }
02476
02477 void svm_get_sv_indices(const svm_model *model, int* indices)
02478 {
02479 if (model->sv_indices != NULL)
02480 for(int i=0;i<model->l;i++)
02481 indices[i] = model->sv_indices[i];
02482 }
02483
02484 int svm_get_nr_sv(const svm_model *model)
02485 {
02486 return model->l;
02487 }
02488
02489 double svm_get_svr_probability(const svm_model *model)
02490 {
02491 if ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
02492 model->probA!=NULL)
02493 return model->probA[0];
02494 else
02495 {
02496 fprintf(stderr,"Model doesn't contain information for SVR probability inference\n");
02497 return 0;
02498 }
02499 }
02500
02501 double svm_predict_values(const svm_model *model, const svm_node *x, double* dec_values)
02502 {
02503 int i;
02504 if(model->param.svm_type == ONE_CLASS ||
02505 model->param.svm_type == EPSILON_SVR ||
02506 model->param.svm_type == NU_SVR)
02507 {
02508 double *sv_coef = model->sv_coef[0];
02509 double sum = 0;
02510 for(i=0;i<model->l;i++)
02511 sum += sv_coef[i] * Kernel::k_function(x,model->SV[i],model->param);
02512 sum -= model->rho[0];
02513 *dec_values = sum;
02514
02515 if(model->param.svm_type == ONE_CLASS)
02516 return (sum>0)?1:-1;
02517 else
02518 return sum;
02519 }
02520 else
02521 {
02522 int nr_class = model->nr_class;
02523 int l = model->l;
02524
02525 double *kvalue = Malloc(double,l);
02526 for(i=0;i<l;i++)
02527 kvalue[i] = Kernel::k_function(x,model->SV[i],model->param);
02528
02529 int *start = Malloc(int,nr_class);
02530 start[0] = 0;
02531 for(i=1;i<nr_class;i++)
02532 start[i] = start[i-1]+model->nSV[i-1];
02533
02534 int *vote = Malloc(int,nr_class);
02535 for(i=0;i<nr_class;i++)
02536 vote[i] = 0;
02537
02538 int p=0;
02539 for(i=0;i<nr_class;i++)
02540 for(int j=i+1;j<nr_class;j++)
02541 {
02542 double sum = 0;
02543 int si = start[i];
02544 int sj = start[j];
02545 int ci = model->nSV[i];
02546 int cj = model->nSV[j];
02547
02548 int k;
02549 double *coef1 = model->sv_coef[j-1];
02550 double *coef2 = model->sv_coef[i];
02551 for(k=0;k<ci;k++)
02552 sum += coef1[si+k] * kvalue[si+k];
02553 for(k=0;k<cj;k++)
02554 sum += coef2[sj+k] * kvalue[sj+k];
02555 sum -= model->rho[p];
02556 dec_values[p] = sum;
02557
02558 if(dec_values[p] > 0)
02559 ++vote[i];
02560 else
02561 ++vote[j];
02562 p++;
02563 }
02564
02565 int vote_max_idx = 0;
02566 for(i=1;i<nr_class;i++)
02567 if(vote[i] > vote[vote_max_idx])
02568 vote_max_idx = i;
02569
02570 free(kvalue);
02571 free(start);
02572 free(vote);
02573 return model->label[vote_max_idx];
02574 }
02575 }
02576
02577 double svm_predict(const svm_model *model, const svm_node *x)
02578 {
02579 int nr_class = model->nr_class;
02580 double *dec_values;
02581 if(model->param.svm_type == ONE_CLASS ||
02582 model->param.svm_type == EPSILON_SVR ||
02583 model->param.svm_type == NU_SVR)
02584 dec_values = Malloc(double, 1);
02585 else
02586 dec_values = Malloc(double, nr_class*(nr_class-1)/2);
02587 double pred_result = svm_predict_values(model, x, dec_values);
02588 free(dec_values);
02589 return pred_result;
02590 }
02591
02592 double svm_predict_probability(
02593 const svm_model *model, const svm_node *x, double *prob_estimates)
02594 {
02595 if ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
02596 model->probA!=NULL && model->probB!=NULL)
02597 {
02598 int i;
02599 int nr_class = model->nr_class;
02600 double *dec_values = Malloc(double, nr_class*(nr_class-1)/2);
02601 svm_predict_values(model, x, dec_values);
02602
02603 double min_prob=1e-7;
02604 double **pairwise_prob=Malloc(double *,nr_class);
02605 for(i=0;i<nr_class;i++)
02606 pairwise_prob[i]=Malloc(double,nr_class);
02607 int k=0;
02608 for(i=0;i<nr_class;i++)
02609 for(int j=i+1;j<nr_class;j++)
02610 {
02611 pairwise_prob[i][j]=min(max(sigmoid_predict(dec_values[k],model->probA[k],model->probB[k]),min_prob),1-min_prob);
02612 pairwise_prob[j][i]=1-pairwise_prob[i][j];
02613 k++;
02614 }
02615 multiclass_probability(nr_class,pairwise_prob,prob_estimates);
02616
02617 int prob_max_idx = 0;
02618 for(i=1;i<nr_class;i++)
02619 if(prob_estimates[i] > prob_estimates[prob_max_idx])
02620 prob_max_idx = i;
02621 for(i=0;i<nr_class;i++)
02622 free(pairwise_prob[i]);
02623 free(dec_values);
02624 free(pairwise_prob);
02625 return model->label[prob_max_idx];
02626 }
02627 else
02628 return svm_predict(model, x);
02629 }
02630
02631 static const char *svm_type_table[] =
02632 {
02633 "c_svc","nu_svc","one_class","epsilon_svr","nu_svr",NULL
02634 };
02635
02636 static const char *kernel_type_table[]=
02637 {
02638 "linear","polynomial","rbf","sigmoid","precomputed",NULL
02639 };
02640
02641 int svm_save_model(const char *model_file_name, const svm_model *model)
02642 {
02643 FILE *fp = fopen(model_file_name,"w");
02644 if(fp==NULL) return -1;
02645
02646 char *old_locale = setlocale(LC_ALL, NULL);
02647 if (old_locale) {
02648 old_locale = strdup(old_locale);
02649 }
02650 setlocale(LC_ALL, "C");
02651
02652 const svm_parameter& param = model->param;
02653
02654 fprintf(fp,"svm_type %s\n", svm_type_table[param.svm_type]);
02655 fprintf(fp,"kernel_type %s\n", kernel_type_table[param.kernel_type]);
02656
02657 if(param.kernel_type == POLY)
02658 fprintf(fp,"degree %d\n", param.degree);
02659
02660 if(param.kernel_type == POLY || param.kernel_type == RBF || param.kernel_type == SIGMOID)
02661 fprintf(fp,"gamma %g\n", param.gamma);
02662
02663 if(param.kernel_type == POLY || param.kernel_type == SIGMOID)
02664 fprintf(fp,"coef0 %g\n", param.coef0);
02665
02666 int nr_class = model->nr_class;
02667 int l = model->l;
02668 fprintf(fp, "nr_class %d\n", nr_class);
02669 fprintf(fp, "total_sv %d\n",l);
02670
02671 {
02672 fprintf(fp, "rho");
02673 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
02674 fprintf(fp," %g",model->rho[i]);
02675 fprintf(fp, "\n");
02676 }
02677
02678 if(model->label)
02679 {
02680 fprintf(fp, "label");
02681 for(int i=0;i<nr_class;i++)
02682 fprintf(fp," %d",model->label[i]);
02683 fprintf(fp, "\n");
02684 }
02685
02686 if(model->probA)
02687 {
02688 fprintf(fp, "probA");
02689 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
02690 fprintf(fp," %g",model->probA[i]);
02691 fprintf(fp, "\n");
02692 }
02693 if(model->probB)
02694 {
02695 fprintf(fp, "probB");
02696 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
02697 fprintf(fp," %g",model->probB[i]);
02698 fprintf(fp, "\n");
02699 }
02700
02701 if(model->nSV)
02702 {
02703 fprintf(fp, "nr_sv");
02704 for(int i=0;i<nr_class;i++)
02705 fprintf(fp," %d",model->nSV[i]);
02706 fprintf(fp, "\n");
02707 }
02708
02709 fprintf(fp, "SV\n");
02710 const double * const *sv_coef = model->sv_coef;
02711 const svm_node * const *SV = model->SV;
02712
02713 for(int i=0;i<l;i++)
02714 {
02715 for(int j=0;j<nr_class-1;j++)
02716 fprintf(fp, "%.16g ",sv_coef[j][i]);
02717
02718 const svm_node *p = SV[i];
02719
02720 if(param.kernel_type == PRECOMPUTED)
02721 fprintf(fp,"0:%d ",(int)(p->value));
02722 else
02723 while(p->index != -1)
02724 {
02725 fprintf(fp,"%d:%.8g ",p->index,p->value);
02726 p++;
02727 }
02728 fprintf(fp, "\n");
02729 }
02730
02731 setlocale(LC_ALL, old_locale);
02732 free(old_locale);
02733
02734 if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
02735 else return 0;
02736 }
02737
02738 static char *line = NULL;
02739 static int max_line_len;
02740
02741 static char* readline(FILE *input)
02742 {
02743 int len;
02744
02745 if(fgets(line,max_line_len,input) == NULL)
02746 return NULL;
02747
02748 while(strrchr(line,'\n') == NULL)
02749 {
02750 max_line_len *= 2;
02751 line = (char *) realloc(line,max_line_len);
02752 len = (int) strlen(line);
02753 if(fgets(line+len,max_line_len-len,input) == NULL)
02754 break;
02755 }
02756 return line;
02757 }
02758
02759
02760
02761
02762
02763
02764
02765
02766 #define FSCANF(_stream, _format, _var) do{ if (fscanf(_stream, _format, _var) != 1) return false; }while(0)
02767 bool read_model_header(FILE *fp, svm_model* model)
02768 {
02769 svm_parameter& param = model->param;
02770 char cmd[81];
02771 while(1)
02772 {
02773 FSCANF(fp,"%80s",cmd);
02774
02775 if(strcmp(cmd,"svm_type")==0)
02776 {
02777 FSCANF(fp,"%80s",cmd);
02778 int i;
02779 for(i=0;svm_type_table[i];i++)
02780 {
02781 if(strcmp(svm_type_table[i],cmd)==0)
02782 {
02783 param.svm_type=i;
02784 break;
02785 }
02786 }
02787 if(svm_type_table[i] == NULL)
02788 {
02789 fprintf(stderr,"unknown svm type.\n");
02790 return false;
02791 }
02792 }
02793 else if(strcmp(cmd,"kernel_type")==0)
02794 {
02795 FSCANF(fp,"%80s",cmd);
02796 int i;
02797 for(i=0;kernel_type_table[i];i++)
02798 {
02799 if(strcmp(kernel_type_table[i],cmd)==0)
02800 {
02801 param.kernel_type=i;
02802 break;
02803 }
02804 }
02805 if(kernel_type_table[i] == NULL)
02806 {
02807 fprintf(stderr,"unknown kernel function.\n");
02808 return false;
02809 }
02810 }
02811 else if(strcmp(cmd,"degree")==0)
02812 FSCANF(fp,"%d",¶m.degree);
02813 else if(strcmp(cmd,"gamma")==0)
02814 FSCANF(fp,"%lf",¶m.gamma);
02815 else if(strcmp(cmd,"coef0")==0)
02816 FSCANF(fp,"%lf",¶m.coef0);
02817 else if(strcmp(cmd,"nr_class")==0)
02818 FSCANF(fp,"%d",&model->nr_class);
02819 else if(strcmp(cmd,"total_sv")==0)
02820 FSCANF(fp,"%d",&model->l);
02821 else if(strcmp(cmd,"rho")==0)
02822 {
02823 int n = model->nr_class * (model->nr_class-1)/2;
02824 model->rho = Malloc(double,n);
02825 for(int i=0;i<n;i++)
02826 FSCANF(fp,"%lf",&model->rho[i]);
02827 }
02828 else if(strcmp(cmd,"label")==0)
02829 {
02830 int n = model->nr_class;
02831 model->label = Malloc(int,n);
02832 for(int i=0;i<n;i++)
02833 FSCANF(fp,"%d",&model->label[i]);
02834 }
02835 else if(strcmp(cmd,"probA")==0)
02836 {
02837 int n = model->nr_class * (model->nr_class-1)/2;
02838 model->probA = Malloc(double,n);
02839 for(int i=0;i<n;i++)
02840 FSCANF(fp,"%lf",&model->probA[i]);
02841 }
02842 else if(strcmp(cmd,"probB")==0)
02843 {
02844 int n = model->nr_class * (model->nr_class-1)/2;
02845 model->probB = Malloc(double,n);
02846 for(int i=0;i<n;i++)
02847 FSCANF(fp,"%lf",&model->probB[i]);
02848 }
02849 else if(strcmp(cmd,"nr_sv")==0)
02850 {
02851 int n = model->nr_class;
02852 model->nSV = Malloc(int,n);
02853 for(int i=0;i<n;i++)
02854 FSCANF(fp,"%d",&model->nSV[i]);
02855 }
02856 else if(strcmp(cmd,"SV")==0)
02857 {
02858 while(1)
02859 {
02860 int c = getc(fp);
02861 if(c==EOF || c=='\n') break;
02862 }
02863 break;
02864 }
02865 else
02866 {
02867 fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
02868 return false;
02869 }
02870 }
02871
02872 return true;
02873
02874 }
02875
02876 svm_model *svm_load_model(const char *model_file_name)
02877 {
02878 FILE *fp = fopen(model_file_name,"rb");
02879 if(fp==NULL) return NULL;
02880
02881 char *old_locale = setlocale(LC_ALL, NULL);
02882 if (old_locale) {
02883 old_locale = strdup(old_locale);
02884 }
02885 setlocale(LC_ALL, "C");
02886
02887
02888
02889 svm_model *model = Malloc(svm_model,1);
02890 model->rho = NULL;
02891 model->probA = NULL;
02892 model->probB = NULL;
02893 model->sv_indices = NULL;
02894 model->label = NULL;
02895 model->nSV = NULL;
02896
02897
02898 if (!read_model_header(fp, model))
02899 {
02900 fprintf(stderr, "ERROR: fscanf failed to read model\n");
02901 setlocale(LC_ALL, old_locale);
02902 free(old_locale);
02903 free(model->rho);
02904 free(model->label);
02905 free(model->nSV);
02906 free(model);
02907 return NULL;
02908 }
02909
02910
02911
02912 int elements = 0;
02913 long pos = ftell(fp);
02914
02915 max_line_len = 1024;
02916 line = Malloc(char,max_line_len);
02917 char *p,*endptr,*idx,*val;
02918
02919 while(readline(fp)!=NULL)
02920 {
02921 p = strtok(line,":");
02922 while(1)
02923 {
02924 p = strtok(NULL,":");
02925 if(p == NULL)
02926 break;
02927 ++elements;
02928 }
02929 }
02930 elements += model->l;
02931
02932 fseek(fp,pos,SEEK_SET);
02933
02934 int m = model->nr_class - 1;
02935 int l = model->l;
02936 model->sv_coef = Malloc(double *,m);
02937 int i;
02938 for(i=0;i<m;i++)
02939 model->sv_coef[i] = Malloc(double,l);
02940 model->SV = Malloc(svm_node*,l);
02941 svm_node *x_space = NULL;
02942 if(l>0) x_space = Malloc(svm_node,elements);
02943
02944 int j=0;
02945 for(i=0;i<l;i++)
02946 {
02947 readline(fp);
02948 model->SV[i] = &x_space[j];
02949
02950 p = strtok(line, " \t");
02951 model->sv_coef[0][i] = strtod(p,&endptr);
02952 for(int k=1;k<m;k++)
02953 {
02954 p = strtok(NULL, " \t");
02955 model->sv_coef[k][i] = strtod(p,&endptr);
02956 }
02957
02958 while(1)
02959 {
02960 idx = strtok(NULL, ":");
02961 val = strtok(NULL, " \t");
02962
02963 if(val == NULL)
02964 break;
02965 x_space[j].index = (int) strtol(idx,&endptr,10);
02966 x_space[j].value = strtod(val,&endptr);
02967
02968 ++j;
02969 }
02970 x_space[j++].index = -1;
02971 }
02972 free(line);
02973
02974 setlocale(LC_ALL, old_locale);
02975 free(old_locale);
02976
02977 if (ferror(fp) != 0 || fclose(fp) != 0)
02978 return NULL;
02979
02980 model->free_sv = 1;
02981 return model;
02982 }
02983
02984 void svm_free_model_content(svm_model* model_ptr)
02985 {
02986 if(model_ptr->free_sv && model_ptr->l > 0 && model_ptr->SV != NULL)
02987 free((void *)(model_ptr->SV[0]));
02988 if(model_ptr->sv_coef)
02989 {
02990 for(int i=0;i<model_ptr->nr_class-1;i++)
02991 free(model_ptr->sv_coef[i]);
02992 }
02993
02994 free(model_ptr->SV);
02995 model_ptr->SV = NULL;
02996
02997 free(model_ptr->sv_coef);
02998 model_ptr->sv_coef = NULL;
02999
03000 free(model_ptr->rho);
03001 model_ptr->rho = NULL;
03002
03003 free(model_ptr->label);
03004 model_ptr->label= NULL;
03005
03006 free(model_ptr->probA);
03007 model_ptr->probA = NULL;
03008
03009 free(model_ptr->probB);
03010 model_ptr->probB= NULL;
03011
03012 free(model_ptr->sv_indices);
03013 model_ptr->sv_indices = NULL;
03014
03015 free(model_ptr->nSV);
03016 model_ptr->nSV = NULL;
03017 }
03018
03019 void svm_free_and_destroy_model(svm_model** model_ptr_ptr)
03020 {
03021 if(model_ptr_ptr != NULL && *model_ptr_ptr != NULL)
03022 {
03023 svm_free_model_content(*model_ptr_ptr);
03024 free(*model_ptr_ptr);
03025 *model_ptr_ptr = NULL;
03026 }
03027 }
03028
03029 void svm_destroy_param(svm_parameter* param)
03030 {
03031 free(param->weight_label);
03032 free(param->weight);
03033 }
03034
03035 const char *svm_check_parameter(const svm_problem *prob, const svm_parameter *param)
03036 {
03037
03038
03039 int svm_type = param->svm_type;
03040 if(svm_type != C_SVC &&
03041 svm_type != NU_SVC &&
03042 svm_type != ONE_CLASS &&
03043 svm_type != EPSILON_SVR &&
03044 svm_type != NU_SVR)
03045 return "unknown svm type";
03046
03047
03048
03049 int kernel_type = param->kernel_type;
03050 if(kernel_type != LINEAR &&
03051 kernel_type != POLY &&
03052 kernel_type != RBF &&
03053 kernel_type != SIGMOID &&
03054 kernel_type != PRECOMPUTED)
03055 return "unknown kernel type";
03056
03057 if(param->gamma < 0)
03058 return "gamma < 0";
03059
03060 if(param->degree < 0)
03061 return "degree of polynomial kernel < 0";
03062
03063
03064
03065 if(param->cache_size <= 0)
03066 return "cache_size <= 0";
03067
03068 if(param->eps <= 0)
03069 return "eps <= 0";
03070
03071 if(svm_type == C_SVC ||
03072 svm_type == EPSILON_SVR ||
03073 svm_type == NU_SVR)
03074 if(param->C <= 0)
03075 return "C <= 0";
03076
03077 if(svm_type == NU_SVC ||
03078 svm_type == ONE_CLASS ||
03079 svm_type == NU_SVR)
03080 if(param->nu <= 0 || param->nu > 1)
03081 return "nu <= 0 or nu > 1";
03082
03083 if(svm_type == EPSILON_SVR)
03084 if(param->p < 0)
03085 return "p < 0";
03086
03087 if(param->shrinking != 0 &&
03088 param->shrinking != 1)
03089 return "shrinking != 0 and shrinking != 1";
03090
03091 if(param->probability != 0 &&
03092 param->probability != 1)
03093 return "probability != 0 and probability != 1";
03094
03095 if(param->probability == 1 &&
03096 svm_type == ONE_CLASS)
03097 return "one-class SVM probability output not supported yet";
03098
03099
03100
03101
03102 if(svm_type == NU_SVC)
03103 {
03104 int l = prob->l;
03105 int max_nr_class = 16;
03106 int nr_class = 0;
03107 int *label = Malloc(int,max_nr_class);
03108 int *count = Malloc(int,max_nr_class);
03109
03110 int i;
03111 for(i=0;i<l;i++)
03112 {
03113 int this_label = (int)prob->y[i];
03114 int j;
03115 for(j=0;j<nr_class;j++)
03116 if(this_label == label[j])
03117 {
03118 ++count[j];
03119 break;
03120 }
03121 if(j == nr_class)
03122 {
03123 if(nr_class == max_nr_class)
03124 {
03125 max_nr_class *= 2;
03126 label = (int *)realloc(label,max_nr_class*sizeof(int));
03127 count = (int *)realloc(count,max_nr_class*sizeof(int));
03128 }
03129 label[nr_class] = this_label;
03130 count[nr_class] = 1;
03131 ++nr_class;
03132 }
03133 }
03134
03135 for(i=0;i<nr_class;i++)
03136 {
03137 int n1 = count[i];
03138 for(int j=i+1;j<nr_class;j++)
03139 {
03140 int n2 = count[j];
03141 if(param->nu*(n1+n2)/2 > min(n1,n2))
03142 {
03143 free(label);
03144 free(count);
03145 return "specified nu is infeasible";
03146 }
03147 }
03148 }
03149 free(label);
03150 free(count);
03151 }
03152
03153 return NULL;
03154 }
03155
03156 int svm_check_probability_model(const svm_model *model)
03157 {
03158 return ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
03159 model->probA!=NULL && model->probB!=NULL) ||
03160 ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
03161 model->probA!=NULL);
03162 }
03163
03164 void svm_set_print_string_function(void (*print_func)(const char *))
03165 {
03166 if(print_func == NULL)
03167 svm_print_string = &print_string_stdout;
03168 else
03169 svm_print_string = print_func;
03170 }