00001 #include <math.h>
00002 #include <stdio.h>
00003 #include <stdlib.h>
00004 #include <ctype.h>
00005 #include <float.h>
00006 #include <string.h>
00007 #include <stdarg.h>
00008 #include <limits.h>
00009 #include <locale.h>
00010 #include "svm.h"
00011 int libsvm_version = LIBSVM_VERSION;
00012 typedef float Qfloat;
00013 typedef signed char schar;
00014 #ifndef min
00015 template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
00016 #endif
00017 #ifndef max
00018 template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
00019 #endif
00020 template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
00021 template <class S, class T> static inline void clone(T*& dst, S* src, int n)
00022 {
00023 dst = new T[n];
00024 memcpy((void *)dst,(void *)src,sizeof(T)*n);
00025 }
00026 static inline double powi(double base, int times)
00027 {
00028 double tmp = base, ret = 1.0;
00029
00030 for(int t=times; t>0; t/=2)
00031 {
00032 if(t%2==1) ret*=tmp;
00033 tmp = tmp * tmp;
00034 }
00035 return ret;
00036 }
00037 #define INF HUGE_VAL
00038 #define TAU 1e-12
00039 #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
00040
00041 static void print_string_stdout(const char *s)
00042 {
00043 fputs(s,stdout);
00044 fflush(stdout);
00045 }
00046 static void (*svm_print_string) (const char *) = &print_string_stdout;
00047 #if 1
00048 static void info(const char *fmt,...)
00049 {
00050 char buf[BUFSIZ];
00051 va_list ap;
00052 va_start(ap,fmt);
00053 vsprintf(buf,fmt,ap);
00054 va_end(ap);
00055 (*svm_print_string)(buf);
00056 }
00057 #else
00058 static void info(const char *fmt,...) {}
00059 #endif
00060
00061
00062
00063
00064
00065
00066
00067 class Cache
00068 {
00069 public:
00070 Cache(int l,long int size);
00071 ~Cache();
00072
00073
00074
00075
00076 int get_data(const int index, Qfloat **data, int len);
00077 void swap_index(int i, int j);
00078 private:
00079 int l;
00080 long int size;
00081 struct head_t
00082 {
00083 head_t *prev, *next;
00084 Qfloat *data;
00085 int len;
00086 };
00087
00088 head_t *head;
00089 head_t lru_head;
00090 void lru_delete(head_t *h);
00091 void lru_insert(head_t *h);
00092 };
00093
00094 Cache::Cache(int l_,long int size_):l(l_),size(size_)
00095 {
00096 head = (head_t *)calloc(l,sizeof(head_t));
00097 size /= sizeof(Qfloat);
00098 size -= l * sizeof(head_t) / sizeof(Qfloat);
00099 size = max(size, 2 * (long int) l);
00100 lru_head.next = lru_head.prev = &lru_head;
00101 }
00102
00103 Cache::~Cache()
00104 {
00105 for(head_t *h = lru_head.next; h != &lru_head; h=h->next)
00106 free(h->data);
00107 free(head);
00108 }
00109
00110 void Cache::lru_delete(head_t *h)
00111 {
00112
00113 h->prev->next = h->next;
00114 h->next->prev = h->prev;
00115 }
00116
00117 void Cache::lru_insert(head_t *h)
00118 {
00119
00120 h->next = &lru_head;
00121 h->prev = lru_head.prev;
00122 h->prev->next = h;
00123 h->next->prev = h;
00124 }
00125
00126 int Cache::get_data(const int index, Qfloat **data, int len)
00127 {
00128 head_t *h = &head[index];
00129 if(h->len) lru_delete(h);
00130 int more = len - h->len;
00131
00132 if(more > 0)
00133 {
00134
00135 while(size < more)
00136 {
00137 head_t *old = lru_head.next;
00138 lru_delete(old);
00139 free(old->data);
00140 size += old->len;
00141 old->data = 0;
00142 old->len = 0;
00143 }
00144
00145
00146 h->data = (Qfloat *)realloc(h->data,sizeof(Qfloat)*len);
00147 size -= more;
00148 swap(h->len,len);
00149 }
00150
00151 lru_insert(h);
00152 *data = h->data;
00153 return len;
00154 }
00155
00156 void Cache::swap_index(int i, int j)
00157 {
00158 if(i==j) return;
00159
00160 if(head[i].len) lru_delete(&head[i]);
00161 if(head[j].len) lru_delete(&head[j]);
00162 swap(head[i].data,head[j].data);
00163 swap(head[i].len,head[j].len);
00164 if(head[i].len) lru_insert(&head[i]);
00165 if(head[j].len) lru_insert(&head[j]);
00166
00167 if(i>j) swap(i,j);
00168 for(head_t *h = lru_head.next; h!=&lru_head; h=h->next)
00169 {
00170 if(h->len > i)
00171 {
00172 if(h->len > j)
00173 swap(h->data[i],h->data[j]);
00174 else
00175 {
00176
00177 lru_delete(h);
00178 free(h->data);
00179 size += h->len;
00180 h->data = 0;
00181 h->len = 0;
00182 }
00183 }
00184 }
00185 }
00186
00187
00188
00189
00190
00191
00192
00193
00194 class QMatrix {
00195 public:
00196 virtual Qfloat *get_Q(int column, int len) const = 0;
00197 virtual double *get_QD() const = 0;
00198 virtual void swap_index(int i, int j) const = 0;
00199 virtual ~QMatrix() {}
00200 };
00201
00202 class Kernel: public QMatrix {
00203 public:
00204 Kernel(int l, svm_node * const * x, const svm_parameter& param);
00205 virtual ~Kernel();
00206
00207 static double k_function(const svm_node *x, const svm_node *y,
00208 const svm_parameter& param);
00209 virtual Qfloat *get_Q(int column, int len) const = 0;
00210 virtual double *get_QD() const = 0;
00211 virtual void swap_index(int i, int j) const
00212 {
00213 swap(x[i],x[j]);
00214 if(x_square) swap(x_square[i],x_square[j]);
00215 }
00216 protected:
00217
00218 double (Kernel::*kernel_function)(int i, int j) const;
00219
00220 private:
00221 const svm_node **x;
00222 double *x_square;
00223
00224
00225 const int kernel_type;
00226 const int degree;
00227 const double gamma;
00228 const double coef0;
00229
00230 static double dot(const svm_node *px, const svm_node *py);
00231 double kernel_linear(int i, int j) const
00232 {
00233 return dot(x[i],x[j]);
00234 }
00235 double kernel_poly(int i, int j) const
00236 {
00237 return powi(gamma*dot(x[i],x[j])+coef0,degree);
00238 }
00239 double kernel_rbf(int i, int j) const
00240 {
00241 return exp(-gamma*(x_square[i]+x_square[j]-2*dot(x[i],x[j])));
00242 }
00243 double kernel_sigmoid(int i, int j) const
00244 {
00245 return tanh(gamma*dot(x[i],x[j])+coef0);
00246 }
00247 double kernel_precomputed(int i, int j) const
00248 {
00249 return x[i][(int)(x[j][0].value)].value;
00250 }
00251 };
00252
00253 Kernel::Kernel(int l, svm_node * const * x_, const svm_parameter& param)
00254 :kernel_type(param.kernel_type), degree(param.degree),
00255 gamma(param.gamma), coef0(param.coef0)
00256 {
00257 switch(kernel_type)
00258 {
00259 case LINEAR:
00260 kernel_function = &Kernel::kernel_linear;
00261 break;
00262 case POLY:
00263 kernel_function = &Kernel::kernel_poly;
00264 break;
00265 case RBF:
00266 kernel_function = &Kernel::kernel_rbf;
00267 break;
00268 case SIGMOID:
00269 kernel_function = &Kernel::kernel_sigmoid;
00270 break;
00271 case PRECOMPUTED:
00272 kernel_function = &Kernel::kernel_precomputed;
00273 break;
00274 }
00275
00276 clone(x,x_,l);
00277
00278 if(kernel_type == RBF)
00279 {
00280 x_square = new double[l];
00281 for(int i=0;i<l;i++)
00282 x_square[i] = dot(x[i],x[i]);
00283 }
00284 else
00285 x_square = 0;
00286 }
00287
00288 Kernel::~Kernel()
00289 {
00290 delete[] x;
00291 delete[] x_square;
00292 }
00293
00294 double Kernel::dot(const svm_node *px, const svm_node *py)
00295 {
00296 double sum = 0;
00297 while(px->index != -1 && py->index != -1)
00298 {
00299 if(px->index == py->index)
00300 {
00301 sum += px->value * py->value;
00302 ++px;
00303 ++py;
00304 }
00305 else
00306 {
00307 if(px->index > py->index)
00308 ++py;
00309 else
00310 ++px;
00311 }
00312 }
00313 return sum;
00314 }
00315
00316 double Kernel::k_function(const svm_node *x, const svm_node *y,
00317 const svm_parameter& param)
00318 {
00319 switch(param.kernel_type)
00320 {
00321 case LINEAR:
00322 return dot(x,y);
00323 case POLY:
00324 return powi(param.gamma*dot(x,y)+param.coef0,param.degree);
00325 case RBF:
00326 {
00327 double sum = 0;
00328 while(x->index != -1 && y->index !=-1)
00329 {
00330 if(x->index == y->index)
00331 {
00332 double d = x->value - y->value;
00333 sum += d*d;
00334 ++x;
00335 ++y;
00336 }
00337 else
00338 {
00339 if(x->index > y->index)
00340 {
00341 sum += y->value * y->value;
00342 ++y;
00343 }
00344 else
00345 {
00346 sum += x->value * x->value;
00347 ++x;
00348 }
00349 }
00350 }
00351
00352 while(x->index != -1)
00353 {
00354 sum += x->value * x->value;
00355 ++x;
00356 }
00357
00358 while(y->index != -1)
00359 {
00360 sum += y->value * y->value;
00361 ++y;
00362 }
00363
00364 return exp(-param.gamma*sum);
00365 }
00366 case SIGMOID:
00367 return tanh(param.gamma*dot(x,y)+param.coef0);
00368 case PRECOMPUTED:
00369 return x[(int)(y->value)].value;
00370 default:
00371 return 0;
00372 }
00373 }
00374
00375
00376
00377
00378
00379
00380
00381
00382
00383
00384
00385
00386
00387
00388
00389
00390
00391
00392
00393 class Solver {
00394 public:
00395 Solver() {};
00396 virtual ~Solver() {};
00397
00398 struct SolutionInfo {
00399 double obj;
00400 double rho;
00401 double upper_bound_p;
00402 double upper_bound_n;
00403 double r;
00404 };
00405
00406 void Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
00407 double *alpha_, double Cp, double Cn, double eps,
00408 SolutionInfo* si, int shrinking);
00409 protected:
00410 int active_size;
00411 schar *y;
00412 double *G;
00413 enum { LOWER_BOUND, UPPER_BOUND, FREE };
00414 char *alpha_status;
00415 double *alpha;
00416 const QMatrix *Q;
00417 const double *QD;
00418 double eps;
00419 double Cp,Cn;
00420 double *p;
00421 int *active_set;
00422 double *G_bar;
00423 int l;
00424 bool unshrink;
00425
00426 double get_C(int i)
00427 {
00428 return (y[i] > 0)? Cp : Cn;
00429 }
00430 void update_alpha_status(int i)
00431 {
00432 if(alpha[i] >= get_C(i))
00433 alpha_status[i] = UPPER_BOUND;
00434 else if(alpha[i] <= 0)
00435 alpha_status[i] = LOWER_BOUND;
00436 else alpha_status[i] = FREE;
00437 }
00438 bool is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; }
00439 bool is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; }
00440 bool is_free(int i) { return alpha_status[i] == FREE; }
00441 void swap_index(int i, int j);
00442 void reconstruct_gradient();
00443 virtual int select_working_set(int &i, int &j);
00444 virtual double calculate_rho();
00445 virtual void do_shrinking();
00446 private:
00447 bool be_shrunk(int i, double Gmax1, double Gmax2);
00448 };
00449
00450 void Solver::swap_index(int i, int j)
00451 {
00452 Q->swap_index(i,j);
00453 swap(y[i],y[j]);
00454 swap(G[i],G[j]);
00455 swap(alpha_status[i],alpha_status[j]);
00456 swap(alpha[i],alpha[j]);
00457 swap(p[i],p[j]);
00458 swap(active_set[i],active_set[j]);
00459 swap(G_bar[i],G_bar[j]);
00460 }
00461
00462 void Solver::reconstruct_gradient()
00463 {
00464
00465
00466 if(active_size == l) return;
00467
00468 int i,j;
00469 int nr_free = 0;
00470
00471 for(j=active_size;j<l;j++)
00472 G[j] = G_bar[j] + p[j];
00473
00474 for(j=0;j<active_size;j++)
00475 if(is_free(j))
00476 nr_free++;
00477
00478 if(2*nr_free < active_size)
00479 info("\nWARNING: using -h 0 may be faster\n");
00480
00481 if (nr_free*l > 2*active_size*(l-active_size))
00482 {
00483 for(i=active_size;i<l;i++)
00484 {
00485 const Qfloat *Q_i = Q->get_Q(i,active_size);
00486 for(j=0;j<active_size;j++)
00487 if(is_free(j))
00488 G[i] += alpha[j] * Q_i[j];
00489 }
00490 }
00491 else
00492 {
00493 for(i=0;i<active_size;i++)
00494 if(is_free(i))
00495 {
00496 const Qfloat *Q_i = Q->get_Q(i,l);
00497 double alpha_i = alpha[i];
00498 for(j=active_size;j<l;j++)
00499 G[j] += alpha_i * Q_i[j];
00500 }
00501 }
00502 }
00503
00504 void Solver::Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
00505 double *alpha_, double Cp, double Cn, double eps,
00506 SolutionInfo* si, int shrinking)
00507 {
00508 this->l = l;
00509 this->Q = &Q;
00510 QD=Q.get_QD();
00511 clone(p, p_,l);
00512 clone(y, y_,l);
00513 clone(alpha,alpha_,l);
00514 this->Cp = Cp;
00515 this->Cn = Cn;
00516 this->eps = eps;
00517 unshrink = false;
00518
00519
00520 {
00521 alpha_status = new char[l];
00522 for(int i=0;i<l;i++)
00523 update_alpha_status(i);
00524 }
00525
00526
00527 {
00528 active_set = new int[l];
00529 for(int i=0;i<l;i++)
00530 active_set[i] = i;
00531 active_size = l;
00532 }
00533
00534
00535 {
00536 G = new double[l];
00537 G_bar = new double[l];
00538 int i;
00539 for(i=0;i<l;i++)
00540 {
00541 G[i] = p[i];
00542 G_bar[i] = 0;
00543 }
00544 for(i=0;i<l;i++)
00545 if(!is_lower_bound(i))
00546 {
00547 const Qfloat *Q_i = Q.get_Q(i,l);
00548 double alpha_i = alpha[i];
00549 int j;
00550 for(j=0;j<l;j++)
00551 G[j] += alpha_i*Q_i[j];
00552 if(is_upper_bound(i))
00553 for(j=0;j<l;j++)
00554 G_bar[j] += get_C(i) * Q_i[j];
00555 }
00556 }
00557
00558
00559
00560 int iter = 0;
00561 int max_iter = max(10000000, l>INT_MAX/100 ? INT_MAX : 100*l);
00562 int counter = min(l,1000)+1;
00563
00564 while(iter < max_iter)
00565 {
00566
00567
00568 if(--counter == 0)
00569 {
00570 counter = min(l,1000);
00571 if(shrinking) do_shrinking();
00572 info(".");
00573 }
00574
00575 int i,j;
00576 if(select_working_set(i,j)!=0)
00577 {
00578
00579 reconstruct_gradient();
00580
00581 active_size = l;
00582 info("*");
00583 if(select_working_set(i,j)!=0)
00584 break;
00585 else
00586 counter = 1;
00587 }
00588
00589 ++iter;
00590
00591
00592
00593 const Qfloat *Q_i = Q.get_Q(i,active_size);
00594 const Qfloat *Q_j = Q.get_Q(j,active_size);
00595
00596 double C_i = get_C(i);
00597 double C_j = get_C(j);
00598
00599 double old_alpha_i = alpha[i];
00600 double old_alpha_j = alpha[j];
00601
00602 if(y[i]!=y[j])
00603 {
00604 double quad_coef = QD[i]+QD[j]+2*Q_i[j];
00605 if (quad_coef <= 0)
00606 quad_coef = TAU;
00607 double delta = (-G[i]-G[j])/quad_coef;
00608 double diff = alpha[i] - alpha[j];
00609 alpha[i] += delta;
00610 alpha[j] += delta;
00611
00612 if(diff > 0)
00613 {
00614 if(alpha[j] < 0)
00615 {
00616 alpha[j] = 0;
00617 alpha[i] = diff;
00618 }
00619 }
00620 else
00621 {
00622 if(alpha[i] < 0)
00623 {
00624 alpha[i] = 0;
00625 alpha[j] = -diff;
00626 }
00627 }
00628 if(diff > C_i - C_j)
00629 {
00630 if(alpha[i] > C_i)
00631 {
00632 alpha[i] = C_i;
00633 alpha[j] = C_i - diff;
00634 }
00635 }
00636 else
00637 {
00638 if(alpha[j] > C_j)
00639 {
00640 alpha[j] = C_j;
00641 alpha[i] = C_j + diff;
00642 }
00643 }
00644 }
00645 else
00646 {
00647 double quad_coef = QD[i]+QD[j]-2*Q_i[j];
00648 if (quad_coef <= 0)
00649 quad_coef = TAU;
00650 double delta = (G[i]-G[j])/quad_coef;
00651 double sum = alpha[i] + alpha[j];
00652 alpha[i] -= delta;
00653 alpha[j] += delta;
00654
00655 if(sum > C_i)
00656 {
00657 if(alpha[i] > C_i)
00658 {
00659 alpha[i] = C_i;
00660 alpha[j] = sum - C_i;
00661 }
00662 }
00663 else
00664 {
00665 if(alpha[j] < 0)
00666 {
00667 alpha[j] = 0;
00668 alpha[i] = sum;
00669 }
00670 }
00671 if(sum > C_j)
00672 {
00673 if(alpha[j] > C_j)
00674 {
00675 alpha[j] = C_j;
00676 alpha[i] = sum - C_j;
00677 }
00678 }
00679 else
00680 {
00681 if(alpha[i] < 0)
00682 {
00683 alpha[i] = 0;
00684 alpha[j] = sum;
00685 }
00686 }
00687 }
00688
00689
00690
00691 double delta_alpha_i = alpha[i] - old_alpha_i;
00692 double delta_alpha_j = alpha[j] - old_alpha_j;
00693
00694 for(int k=0;k<active_size;k++)
00695 {
00696 G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j;
00697 }
00698
00699
00700
00701 {
00702 bool ui = is_upper_bound(i);
00703 bool uj = is_upper_bound(j);
00704 update_alpha_status(i);
00705 update_alpha_status(j);
00706 int k;
00707 if(ui != is_upper_bound(i))
00708 {
00709 Q_i = Q.get_Q(i,l);
00710 if(ui)
00711 for(k=0;k<l;k++)
00712 G_bar[k] -= C_i * Q_i[k];
00713 else
00714 for(k=0;k<l;k++)
00715 G_bar[k] += C_i * Q_i[k];
00716 }
00717
00718 if(uj != is_upper_bound(j))
00719 {
00720 Q_j = Q.get_Q(j,l);
00721 if(uj)
00722 for(k=0;k<l;k++)
00723 G_bar[k] -= C_j * Q_j[k];
00724 else
00725 for(k=0;k<l;k++)
00726 G_bar[k] += C_j * Q_j[k];
00727 }
00728 }
00729 }
00730
00731 if(iter >= max_iter)
00732 {
00733 if(active_size < l)
00734 {
00735
00736 reconstruct_gradient();
00737 active_size = l;
00738 info("*");
00739 }
00740 info("\nWARNING: reaching max number of iterations");
00741 }
00742
00743
00744
00745 si->rho = calculate_rho();
00746
00747
00748 {
00749 double v = 0;
00750 int i;
00751 for(i=0;i<l;i++)
00752 v += alpha[i] * (G[i] + p[i]);
00753
00754 si->obj = v/2;
00755 }
00756
00757
00758 {
00759 for(int i=0;i<l;i++)
00760 alpha_[active_set[i]] = alpha[i];
00761 }
00762
00763
00764
00765
00766
00767
00768
00769
00770
00771 si->upper_bound_p = Cp;
00772 si->upper_bound_n = Cn;
00773
00774 info("\noptimization finished, #iter = %d\n",iter);
00775
00776 delete[] p;
00777 delete[] y;
00778 delete[] alpha;
00779 delete[] alpha_status;
00780 delete[] active_set;
00781 delete[] G;
00782 delete[] G_bar;
00783 }
00784
00785
00786 int Solver::select_working_set(int &out_i, int &out_j)
00787 {
00788
00789
00790
00791
00792
00793
00794 double Gmax = -INF;
00795 double Gmax2 = -INF;
00796 int Gmax_idx = -1;
00797 int Gmin_idx = -1;
00798 double obj_diff_min = INF;
00799
00800 for(int t=0;t<active_size;t++)
00801 if(y[t]==+1)
00802 {
00803 if(!is_upper_bound(t))
00804 if(-G[t] >= Gmax)
00805 {
00806 Gmax = -G[t];
00807 Gmax_idx = t;
00808 }
00809 }
00810 else
00811 {
00812 if(!is_lower_bound(t))
00813 if(G[t] >= Gmax)
00814 {
00815 Gmax = G[t];
00816 Gmax_idx = t;
00817 }
00818 }
00819
00820 int i = Gmax_idx;
00821 const Qfloat *Q_i = NULL;
00822 if(i != -1)
00823 Q_i = Q->get_Q(i,active_size);
00824
00825 for(int j=0;j<active_size;j++)
00826 {
00827 if(y[j]==+1)
00828 {
00829 if (!is_lower_bound(j))
00830 {
00831 double grad_diff=Gmax+G[j];
00832 if (G[j] >= Gmax2)
00833 Gmax2 = G[j];
00834 if (grad_diff > 0)
00835 {
00836 double obj_diff;
00837 double quad_coef = QD[i]+QD[j]-2.0*y[i]*Q_i[j];
00838 if (quad_coef > 0)
00839 obj_diff = -(grad_diff*grad_diff)/quad_coef;
00840 else
00841 obj_diff = -(grad_diff*grad_diff)/TAU;
00842
00843 if (obj_diff <= obj_diff_min)
00844 {
00845 Gmin_idx=j;
00846 obj_diff_min = obj_diff;
00847 }
00848 }
00849 }
00850 }
00851 else
00852 {
00853 if (!is_upper_bound(j))
00854 {
00855 double grad_diff= Gmax-G[j];
00856 if (-G[j] >= Gmax2)
00857 Gmax2 = -G[j];
00858 if (grad_diff > 0)
00859 {
00860 double obj_diff;
00861 double quad_coef = QD[i]+QD[j]+2.0*y[i]*Q_i[j];
00862 if (quad_coef > 0)
00863 obj_diff = -(grad_diff*grad_diff)/quad_coef;
00864 else
00865 obj_diff = -(grad_diff*grad_diff)/TAU;
00866
00867 if (obj_diff <= obj_diff_min)
00868 {
00869 Gmin_idx=j;
00870 obj_diff_min = obj_diff;
00871 }
00872 }
00873 }
00874 }
00875 }
00876
00877 if(Gmax+Gmax2 < eps)
00878 return 1;
00879
00880 out_i = Gmax_idx;
00881 out_j = Gmin_idx;
00882 return 0;
00883 }
00884
00885 bool Solver::be_shrunk(int i, double Gmax1, double Gmax2)
00886 {
00887 if(is_upper_bound(i))
00888 {
00889 if(y[i]==+1)
00890 return(-G[i] > Gmax1);
00891 else
00892 return(-G[i] > Gmax2);
00893 }
00894 else if(is_lower_bound(i))
00895 {
00896 if(y[i]==+1)
00897 return(G[i] > Gmax2);
00898 else
00899 return(G[i] > Gmax1);
00900 }
00901 else
00902 return(false);
00903 }
00904
00905 void Solver::do_shrinking()
00906 {
00907 int i;
00908 double Gmax1 = -INF;
00909 double Gmax2 = -INF;
00910
00911
00912 for(i=0;i<active_size;i++)
00913 {
00914 if(y[i]==+1)
00915 {
00916 if(!is_upper_bound(i))
00917 {
00918 if(-G[i] >= Gmax1)
00919 Gmax1 = -G[i];
00920 }
00921 if(!is_lower_bound(i))
00922 {
00923 if(G[i] >= Gmax2)
00924 Gmax2 = G[i];
00925 }
00926 }
00927 else
00928 {
00929 if(!is_upper_bound(i))
00930 {
00931 if(-G[i] >= Gmax2)
00932 Gmax2 = -G[i];
00933 }
00934 if(!is_lower_bound(i))
00935 {
00936 if(G[i] >= Gmax1)
00937 Gmax1 = G[i];
00938 }
00939 }
00940 }
00941
00942 if(unshrink == false && Gmax1 + Gmax2 <= eps*10)
00943 {
00944 unshrink = true;
00945 reconstruct_gradient();
00946 active_size = l;
00947 info("*");
00948 }
00949
00950 for(i=0;i<active_size;i++)
00951 if (be_shrunk(i, Gmax1, Gmax2))
00952 {
00953 active_size--;
00954 while (active_size > i)
00955 {
00956 if (!be_shrunk(active_size, Gmax1, Gmax2))
00957 {
00958 swap_index(i,active_size);
00959 break;
00960 }
00961 active_size--;
00962 }
00963 }
00964 }
00965
00966 double Solver::calculate_rho()
00967 {
00968 double r;
00969 int nr_free = 0;
00970 double ub = INF, lb = -INF, sum_free = 0;
00971 for(int i=0;i<active_size;i++)
00972 {
00973 double yG = y[i]*G[i];
00974
00975 if(is_upper_bound(i))
00976 {
00977 if(y[i]==-1)
00978 ub = min(ub,yG);
00979 else
00980 lb = max(lb,yG);
00981 }
00982 else if(is_lower_bound(i))
00983 {
00984 if(y[i]==+1)
00985 ub = min(ub,yG);
00986 else
00987 lb = max(lb,yG);
00988 }
00989 else
00990 {
00991 ++nr_free;
00992 sum_free += yG;
00993 }
00994 }
00995
00996 if(nr_free>0)
00997 r = sum_free/nr_free;
00998 else
00999 r = (ub+lb)/2;
01000
01001 return r;
01002 }
01003
01004
01005
01006
01007
01008
01009 class Solver_NU : public Solver
01010 {
01011 public:
01012 Solver_NU() {}
01013 void Solve(int l, const QMatrix& Q, const double *p, const schar *y,
01014 double *alpha, double Cp, double Cn, double eps,
01015 SolutionInfo* si, int shrinking)
01016 {
01017 this->si = si;
01018 Solver::Solve(l,Q,p,y,alpha,Cp,Cn,eps,si,shrinking);
01019 }
01020 private:
01021 SolutionInfo *si;
01022 int select_working_set(int &i, int &j);
01023 double calculate_rho();
01024 bool be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4);
01025 void do_shrinking();
01026 };
01027
01028
01029 int Solver_NU::select_working_set(int &out_i, int &out_j)
01030 {
01031
01032
01033
01034
01035
01036
01037 double Gmaxp = -INF;
01038 double Gmaxp2 = -INF;
01039 int Gmaxp_idx = -1;
01040
01041 double Gmaxn = -INF;
01042 double Gmaxn2 = -INF;
01043 int Gmaxn_idx = -1;
01044
01045 int Gmin_idx = -1;
01046 double obj_diff_min = INF;
01047
01048 for(int t=0;t<active_size;t++)
01049 if(y[t]==+1)
01050 {
01051 if(!is_upper_bound(t))
01052 if(-G[t] >= Gmaxp)
01053 {
01054 Gmaxp = -G[t];
01055 Gmaxp_idx = t;
01056 }
01057 }
01058 else
01059 {
01060 if(!is_lower_bound(t))
01061 if(G[t] >= Gmaxn)
01062 {
01063 Gmaxn = G[t];
01064 Gmaxn_idx = t;
01065 }
01066 }
01067
01068 int ip = Gmaxp_idx;
01069 int in = Gmaxn_idx;
01070 const Qfloat *Q_ip = NULL;
01071 const Qfloat *Q_in = NULL;
01072 if(ip != -1)
01073 Q_ip = Q->get_Q(ip,active_size);
01074 if(in != -1)
01075 Q_in = Q->get_Q(in,active_size);
01076
01077 for(int j=0;j<active_size;j++)
01078 {
01079 if(y[j]==+1)
01080 {
01081 if (!is_lower_bound(j))
01082 {
01083 double grad_diff=Gmaxp+G[j];
01084 if (G[j] >= Gmaxp2)
01085 Gmaxp2 = G[j];
01086 if (grad_diff > 0)
01087 {
01088 double obj_diff;
01089 double quad_coef = QD[ip]+QD[j]-2*Q_ip[j];
01090 if (quad_coef > 0)
01091 obj_diff = -(grad_diff*grad_diff)/quad_coef;
01092 else
01093 obj_diff = -(grad_diff*grad_diff)/TAU;
01094
01095 if (obj_diff <= obj_diff_min)
01096 {
01097 Gmin_idx=j;
01098 obj_diff_min = obj_diff;
01099 }
01100 }
01101 }
01102 }
01103 else
01104 {
01105 if (!is_upper_bound(j))
01106 {
01107 double grad_diff=Gmaxn-G[j];
01108 if (-G[j] >= Gmaxn2)
01109 Gmaxn2 = -G[j];
01110 if (grad_diff > 0)
01111 {
01112 double obj_diff;
01113 double quad_coef = QD[in]+QD[j]-2*Q_in[j];
01114 if (quad_coef > 0)
01115 obj_diff = -(grad_diff*grad_diff)/quad_coef;
01116 else
01117 obj_diff = -(grad_diff*grad_diff)/TAU;
01118
01119 if (obj_diff <= obj_diff_min)
01120 {
01121 Gmin_idx=j;
01122 obj_diff_min = obj_diff;
01123 }
01124 }
01125 }
01126 }
01127 }
01128
01129 if(max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2) < eps)
01130 return 1;
01131
01132 if (y[Gmin_idx] == +1)
01133 out_i = Gmaxp_idx;
01134 else
01135 out_i = Gmaxn_idx;
01136 out_j = Gmin_idx;
01137
01138 return 0;
01139 }
01140
01141 bool Solver_NU::be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4)
01142 {
01143 if(is_upper_bound(i))
01144 {
01145 if(y[i]==+1)
01146 return(-G[i] > Gmax1);
01147 else
01148 return(-G[i] > Gmax4);
01149 }
01150 else if(is_lower_bound(i))
01151 {
01152 if(y[i]==+1)
01153 return(G[i] > Gmax2);
01154 else
01155 return(G[i] > Gmax3);
01156 }
01157 else
01158 return(false);
01159 }
01160
01161 void Solver_NU::do_shrinking()
01162 {
01163 double Gmax1 = -INF;
01164 double Gmax2 = -INF;
01165 double Gmax3 = -INF;
01166 double Gmax4 = -INF;
01167
01168
01169 int i;
01170 for(i=0;i<active_size;i++)
01171 {
01172 if(!is_upper_bound(i))
01173 {
01174 if(y[i]==+1)
01175 {
01176 if(-G[i] > Gmax1) Gmax1 = -G[i];
01177 }
01178 else if(-G[i] > Gmax4) Gmax4 = -G[i];
01179 }
01180 if(!is_lower_bound(i))
01181 {
01182 if(y[i]==+1)
01183 {
01184 if(G[i] > Gmax2) Gmax2 = G[i];
01185 }
01186 else if(G[i] > Gmax3) Gmax3 = G[i];
01187 }
01188 }
01189
01190 if(unshrink == false && max(Gmax1+Gmax2,Gmax3+Gmax4) <= eps*10)
01191 {
01192 unshrink = true;
01193 reconstruct_gradient();
01194 active_size = l;
01195 }
01196
01197 for(i=0;i<active_size;i++)
01198 if (be_shrunk(i, Gmax1, Gmax2, Gmax3, Gmax4))
01199 {
01200 active_size--;
01201 while (active_size > i)
01202 {
01203 if (!be_shrunk(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
01204 {
01205 swap_index(i,active_size);
01206 break;
01207 }
01208 active_size--;
01209 }
01210 }
01211 }
01212
01213 double Solver_NU::calculate_rho()
01214 {
01215 int nr_free1 = 0,nr_free2 = 0;
01216 double ub1 = INF, ub2 = INF;
01217 double lb1 = -INF, lb2 = -INF;
01218 double sum_free1 = 0, sum_free2 = 0;
01219
01220 for(int i=0;i<active_size;i++)
01221 {
01222 if(y[i]==+1)
01223 {
01224 if(is_upper_bound(i))
01225 lb1 = max(lb1,G[i]);
01226 else if(is_lower_bound(i))
01227 ub1 = min(ub1,G[i]);
01228 else
01229 {
01230 ++nr_free1;
01231 sum_free1 += G[i];
01232 }
01233 }
01234 else
01235 {
01236 if(is_upper_bound(i))
01237 lb2 = max(lb2,G[i]);
01238 else if(is_lower_bound(i))
01239 ub2 = min(ub2,G[i]);
01240 else
01241 {
01242 ++nr_free2;
01243 sum_free2 += G[i];
01244 }
01245 }
01246 }
01247
01248 double r1,r2;
01249 if(nr_free1 > 0)
01250 r1 = sum_free1/nr_free1;
01251 else
01252 r1 = (ub1+lb1)/2;
01253
01254 if(nr_free2 > 0)
01255 r2 = sum_free2/nr_free2;
01256 else
01257 r2 = (ub2+lb2)/2;
01258
01259 si->r = (r1+r2)/2;
01260 return (r1-r2)/2;
01261 }
01262
01263
01264
01265
01266 class SVC_Q: public Kernel
01267 {
01268 public:
01269 SVC_Q(const svm_problem& prob, const svm_parameter& param, const schar *y_)
01270 :Kernel(prob.l, prob.x, param)
01271 {
01272 clone(y,y_,prob.l);
01273 cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
01274 QD = new double[prob.l];
01275 for(int i=0;i<prob.l;i++)
01276 QD[i] = (this->*kernel_function)(i,i);
01277 }
01278
01279 Qfloat *get_Q(int i, int len) const
01280 {
01281 Qfloat *data;
01282 int start, j;
01283 if((start = cache->get_data(i,&data,len)) < len)
01284 {
01285 for(j=start;j<len;j++)
01286 data[j] = (Qfloat)(y[i]*y[j]*(this->*kernel_function)(i,j));
01287 }
01288 return data;
01289 }
01290
01291 double *get_QD() const
01292 {
01293 return QD;
01294 }
01295
01296 void swap_index(int i, int j) const
01297 {
01298 cache->swap_index(i,j);
01299 Kernel::swap_index(i,j);
01300 swap(y[i],y[j]);
01301 swap(QD[i],QD[j]);
01302 }
01303
01304 ~SVC_Q()
01305 {
01306 delete[] y;
01307 delete cache;
01308 delete[] QD;
01309 }
01310 private:
01311 schar *y;
01312 Cache *cache;
01313 double *QD;
01314 };
01315
01316 class ONE_CLASS_Q: public Kernel
01317 {
01318 public:
01319 ONE_CLASS_Q(const svm_problem& prob, const svm_parameter& param)
01320 :Kernel(prob.l, prob.x, param)
01321 {
01322 cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
01323 QD = new double[prob.l];
01324 for(int i=0;i<prob.l;i++)
01325 QD[i] = (this->*kernel_function)(i,i);
01326 }
01327
01328 Qfloat *get_Q(int i, int len) const
01329 {
01330 Qfloat *data;
01331 int start, j;
01332 if((start = cache->get_data(i,&data,len)) < len)
01333 {
01334 for(j=start;j<len;j++)
01335 data[j] = (Qfloat)(this->*kernel_function)(i,j);
01336 }
01337 return data;
01338 }
01339
01340 double *get_QD() const
01341 {
01342 return QD;
01343 }
01344
01345 void swap_index(int i, int j) const
01346 {
01347 cache->swap_index(i,j);
01348 Kernel::swap_index(i,j);
01349 swap(QD[i],QD[j]);
01350 }
01351
01352 ~ONE_CLASS_Q()
01353 {
01354 delete cache;
01355 delete[] QD;
01356 }
01357 private:
01358 Cache *cache;
01359 double *QD;
01360 };
01361
01362 class SVR_Q: public Kernel
01363 {
01364 public:
01365 SVR_Q(const svm_problem& prob, const svm_parameter& param)
01366 :Kernel(prob.l, prob.x, param)
01367 {
01368 l = prob.l;
01369 cache = new Cache(l,(long int)(param.cache_size*(1<<20)));
01370 QD = new double[2*l];
01371 sign = new schar[2*l];
01372 index = new int[2*l];
01373 for(int k=0;k<l;k++)
01374 {
01375 sign[k] = 1;
01376 sign[k+l] = -1;
01377 index[k] = k;
01378 index[k+l] = k;
01379 QD[k] = (this->*kernel_function)(k,k);
01380 QD[k+l] = QD[k];
01381 }
01382 buffer[0] = new Qfloat[2*l];
01383 buffer[1] = new Qfloat[2*l];
01384 next_buffer = 0;
01385 }
01386
01387 void swap_index(int i, int j) const
01388 {
01389 swap(sign[i],sign[j]);
01390 swap(index[i],index[j]);
01391 swap(QD[i],QD[j]);
01392 }
01393
01394 Qfloat *get_Q(int i, int len) const
01395 {
01396 Qfloat *data;
01397 int j, real_i = index[i];
01398 if(cache->get_data(real_i,&data,l) < l)
01399 {
01400 for(j=0;j<l;j++)
01401 data[j] = (Qfloat)(this->*kernel_function)(real_i,j);
01402 }
01403
01404
01405 Qfloat *buf = buffer[next_buffer];
01406 next_buffer = 1 - next_buffer;
01407 schar si = sign[i];
01408 for(j=0;j<len;j++)
01409 buf[j] = (Qfloat) si * (Qfloat) sign[j] * data[index[j]];
01410 return buf;
01411 }
01412
01413 double *get_QD() const
01414 {
01415 return QD;
01416 }
01417
01418 ~SVR_Q()
01419 {
01420 delete cache;
01421 delete[] sign;
01422 delete[] index;
01423 delete[] buffer[0];
01424 delete[] buffer[1];
01425 delete[] QD;
01426 }
01427 private:
01428 int l;
01429 Cache *cache;
01430 schar *sign;
01431 int *index;
01432 mutable int next_buffer;
01433 Qfloat *buffer[2];
01434 double *QD;
01435 };
01436
01437
01438
01439
01440 static void solve_c_svc(
01441 const svm_problem *prob, const svm_parameter* param,
01442 double *alpha, Solver::SolutionInfo* si, double Cp, double Cn)
01443 {
01444 int l = prob->l;
01445 double *minus_ones = new double[l];
01446 schar *y = new schar[l];
01447
01448 int i;
01449
01450 for(i=0;i<l;i++)
01451 {
01452 alpha[i] = 0;
01453 minus_ones[i] = -1;
01454 if(prob->y[i] > 0) y[i] = +1; else y[i] = -1;
01455 }
01456
01457 Solver s;
01458 s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y,
01459 alpha, Cp, Cn, param->eps, si, param->shrinking);
01460
01461 double sum_alpha=0;
01462 for(i=0;i<l;i++)
01463 sum_alpha += alpha[i];
01464
01465 if (Cp==Cn)
01466 info("nu = %f\n", sum_alpha/(Cp*prob->l));
01467
01468 for(i=0;i<l;i++)
01469 alpha[i] *= y[i];
01470
01471 delete[] minus_ones;
01472 delete[] y;
01473 }
01474
01475 static void solve_nu_svc(
01476 const svm_problem *prob, const svm_parameter *param,
01477 double *alpha, Solver::SolutionInfo* si)
01478 {
01479 int i;
01480 int l = prob->l;
01481 double nu = param->nu;
01482
01483 schar *y = new schar[l];
01484
01485 for(i=0;i<l;i++)
01486 if(prob->y[i]>0)
01487 y[i] = +1;
01488 else
01489 y[i] = -1;
01490
01491 double sum_pos = nu*l/2;
01492 double sum_neg = nu*l/2;
01493
01494 for(i=0;i<l;i++)
01495 if(y[i] == +1)
01496 {
01497 alpha[i] = min(1.0,sum_pos);
01498 sum_pos -= alpha[i];
01499 }
01500 else
01501 {
01502 alpha[i] = min(1.0,sum_neg);
01503 sum_neg -= alpha[i];
01504 }
01505
01506 double *zeros = new double[l];
01507
01508 for(i=0;i<l;i++)
01509 zeros[i] = 0;
01510
01511 Solver_NU s;
01512 s.Solve(l, SVC_Q(*prob,*param,y), zeros, y,
01513 alpha, 1.0, 1.0, param->eps, si, param->shrinking);
01514 double r = si->r;
01515
01516 info("C = %f\n",1/r);
01517
01518 for(i=0;i<l;i++)
01519 alpha[i] *= y[i]/r;
01520
01521 si->rho /= r;
01522 si->obj /= (r*r);
01523 si->upper_bound_p = 1/r;
01524 si->upper_bound_n = 1/r;
01525
01526 delete[] y;
01527 delete[] zeros;
01528 }
01529
01530 static void solve_one_class(
01531 const svm_problem *prob, const svm_parameter *param,
01532 double *alpha, Solver::SolutionInfo* si)
01533 {
01534 int l = prob->l;
01535 double *zeros = new double[l];
01536 schar *ones = new schar[l];
01537 int i;
01538
01539 int n = (int)(param->nu*prob->l);
01540
01541 for(i=0;i<n;i++)
01542 alpha[i] = 1;
01543 if(n<prob->l)
01544 alpha[n] = param->nu * prob->l - n;
01545 for(i=n+1;i<l;i++)
01546 alpha[i] = 0;
01547
01548 for(i=0;i<l;i++)
01549 {
01550 zeros[i] = 0;
01551 ones[i] = 1;
01552 }
01553
01554 Solver s;
01555 s.Solve(l, ONE_CLASS_Q(*prob,*param), zeros, ones,
01556 alpha, 1.0, 1.0, param->eps, si, param->shrinking);
01557
01558 delete[] zeros;
01559 delete[] ones;
01560 }
01561
01562 static void solve_epsilon_svr(
01563 const svm_problem *prob, const svm_parameter *param,
01564 double *alpha, Solver::SolutionInfo* si)
01565 {
01566 int l = prob->l;
01567 double *alpha2 = new double[2*l];
01568 double *linear_term = new double[2*l];
01569 schar *y = new schar[2*l];
01570 int i;
01571
01572 for(i=0;i<l;i++)
01573 {
01574 alpha2[i] = 0;
01575 linear_term[i] = param->p - prob->y[i];
01576 y[i] = 1;
01577
01578 alpha2[i+l] = 0;
01579 linear_term[i+l] = param->p + prob->y[i];
01580 y[i+l] = -1;
01581 }
01582
01583 Solver s;
01584 s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
01585 alpha2, param->C, param->C, param->eps, si, param->shrinking);
01586
01587 double sum_alpha = 0;
01588 for(i=0;i<l;i++)
01589 {
01590 alpha[i] = alpha2[i] - alpha2[i+l];
01591 sum_alpha += fabs(alpha[i]);
01592 }
01593 info("nu = %f\n",sum_alpha/(param->C*l));
01594
01595 delete[] alpha2;
01596 delete[] linear_term;
01597 delete[] y;
01598 }
01599
01600 static void solve_nu_svr(
01601 const svm_problem *prob, const svm_parameter *param,
01602 double *alpha, Solver::SolutionInfo* si)
01603 {
01604 int l = prob->l;
01605 double C = param->C;
01606 double *alpha2 = new double[2*l];
01607 double *linear_term = new double[2*l];
01608 schar *y = new schar[2*l];
01609 int i;
01610
01611 double sum = C * param->nu * l / 2;
01612 for(i=0;i<l;i++)
01613 {
01614 alpha2[i] = alpha2[i+l] = min(sum,C);
01615 sum -= alpha2[i];
01616
01617 linear_term[i] = - prob->y[i];
01618 y[i] = 1;
01619
01620 linear_term[i+l] = prob->y[i];
01621 y[i+l] = -1;
01622 }
01623
01624 Solver_NU s;
01625 s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
01626 alpha2, C, C, param->eps, si, param->shrinking);
01627
01628 info("epsilon = %f\n",-si->r);
01629
01630 for(i=0;i<l;i++)
01631 alpha[i] = alpha2[i] - alpha2[i+l];
01632
01633 delete[] alpha2;
01634 delete[] linear_term;
01635 delete[] y;
01636 }
01637
01638
01639
01640
01641 struct decision_function
01642 {
01643 double *alpha;
01644 double rho;
01645 };
01646
01647 static decision_function svm_train_one(
01648 const svm_problem *prob, const svm_parameter *param,
01649 double Cp, double Cn)
01650 {
01651 double *alpha = Malloc(double,prob->l);
01652 Solver::SolutionInfo si;
01653 switch(param->svm_type)
01654 {
01655 case C_SVC:
01656 solve_c_svc(prob,param,alpha,&si,Cp,Cn);
01657 break;
01658 case NU_SVC:
01659 solve_nu_svc(prob,param,alpha,&si);
01660 break;
01661 case ONE_CLASS:
01662 solve_one_class(prob,param,alpha,&si);
01663 break;
01664 case EPSILON_SVR:
01665 solve_epsilon_svr(prob,param,alpha,&si);
01666 break;
01667 case NU_SVR:
01668 solve_nu_svr(prob,param,alpha,&si);
01669 break;
01670 }
01671
01672 info("obj = %f, rho = %f\n",si.obj,si.rho);
01673
01674
01675
01676 int nSV = 0;
01677 int nBSV = 0;
01678 for(int i=0;i<prob->l;i++)
01679 {
01680 if(fabs(alpha[i]) > 0)
01681 {
01682 ++nSV;
01683 if(prob->y[i] > 0)
01684 {
01685 if(fabs(alpha[i]) >= si.upper_bound_p)
01686 ++nBSV;
01687 }
01688 else
01689 {
01690 if(fabs(alpha[i]) >= si.upper_bound_n)
01691 ++nBSV;
01692 }
01693 }
01694 }
01695
01696 info("nSV = %d, nBSV = %d\n",nSV,nBSV);
01697
01698 decision_function f;
01699 f.alpha = alpha;
01700 f.rho = si.rho;
01701 return f;
01702 }
01703
01704
01705 static void sigmoid_train(
01706 int l, const double *dec_values, const double *labels,
01707 double& A, double& B)
01708 {
01709 double prior1=0, prior0 = 0;
01710 int i;
01711
01712 for (i=0;i<l;i++)
01713 if (labels[i] > 0) prior1+=1;
01714 else prior0+=1;
01715
01716 int max_iter=100;
01717 double min_step=1e-10;
01718 double sigma=1e-12;
01719 double eps=1e-5;
01720 double hiTarget=(prior1+1.0)/(prior1+2.0);
01721 double loTarget=1/(prior0+2.0);
01722 double *t=Malloc(double,l);
01723 double fApB,p,q,h11,h22,h21,g1,g2,det,dA,dB,gd,stepsize;
01724 double newA,newB,newf,d1,d2;
01725 int iter;
01726
01727
01728 A=0.0; B=log((prior0+1.0)/(prior1+1.0));
01729 double fval = 0.0;
01730
01731 for (i=0;i<l;i++)
01732 {
01733 if (labels[i]>0) t[i]=hiTarget;
01734 else t[i]=loTarget;
01735 fApB = dec_values[i]*A+B;
01736 if (fApB>=0)
01737 fval += t[i]*fApB + log(1+exp(-fApB));
01738 else
01739 fval += (t[i] - 1)*fApB +log(1+exp(fApB));
01740 }
01741 for (iter=0;iter<max_iter;iter++)
01742 {
01743
01744 h11=sigma;
01745 h22=sigma;
01746 h21=0.0;g1=0.0;g2=0.0;
01747 for (i=0;i<l;i++)
01748 {
01749 fApB = dec_values[i]*A+B;
01750 if (fApB >= 0)
01751 {
01752 p=exp(-fApB)/(1.0+exp(-fApB));
01753 q=1.0/(1.0+exp(-fApB));
01754 }
01755 else
01756 {
01757 p=1.0/(1.0+exp(fApB));
01758 q=exp(fApB)/(1.0+exp(fApB));
01759 }
01760 d2=p*q;
01761 h11+=dec_values[i]*dec_values[i]*d2;
01762 h22+=d2;
01763 h21+=dec_values[i]*d2;
01764 d1=t[i]-p;
01765 g1+=dec_values[i]*d1;
01766 g2+=d1;
01767 }
01768
01769
01770 if (fabs(g1)<eps && fabs(g2)<eps)
01771 break;
01772
01773
01774 det=h11*h22-h21*h21;
01775 dA=-(h22*g1 - h21 * g2) / det;
01776 dB=-(-h21*g1+ h11 * g2) / det;
01777 gd=g1*dA+g2*dB;
01778
01779
01780 stepsize = 1;
01781 while (stepsize >= min_step)
01782 {
01783 newA = A + stepsize * dA;
01784 newB = B + stepsize * dB;
01785
01786
01787 newf = 0.0;
01788 for (i=0;i<l;i++)
01789 {
01790 fApB = dec_values[i]*newA+newB;
01791 if (fApB >= 0)
01792 newf += t[i]*fApB + log(1+exp(-fApB));
01793 else
01794 newf += (t[i] - 1)*fApB +log(1+exp(fApB));
01795 }
01796
01797 if (newf<fval+0.0001*stepsize*gd)
01798 {
01799 A=newA;B=newB;fval=newf;
01800 break;
01801 }
01802 else
01803 stepsize = stepsize / 2.0;
01804 }
01805
01806 if (stepsize < min_step)
01807 {
01808 info("Line search fails in two-class probability estimates\n");
01809 break;
01810 }
01811 }
01812
01813 if (iter>=max_iter)
01814 info("Reaching maximal iterations in two-class probability estimates\n");
01815 free(t);
01816 }
01817
01818 static double sigmoid_predict(double decision_value, double A, double B)
01819 {
01820 double fApB = decision_value*A+B;
01821
01822 if (fApB >= 0)
01823 return exp(-fApB)/(1.0+exp(-fApB));
01824 else
01825 return 1.0/(1+exp(fApB)) ;
01826 }
01827
01828
01829 static void multiclass_probability(int k, double **r, double *p)
01830 {
01831 int t,j;
01832 int iter = 0, max_iter=max(100,k);
01833 double **Q=Malloc(double *,k);
01834 double *Qp=Malloc(double,k);
01835 double pQp, eps=0.005/k;
01836
01837 for (t=0;t<k;t++)
01838 {
01839 p[t]=1.0/k;
01840 Q[t]=Malloc(double,k);
01841 Q[t][t]=0;
01842 for (j=0;j<t;j++)
01843 {
01844 Q[t][t]+=r[j][t]*r[j][t];
01845 Q[t][j]=Q[j][t];
01846 }
01847 for (j=t+1;j<k;j++)
01848 {
01849 Q[t][t]+=r[j][t]*r[j][t];
01850 Q[t][j]=-r[j][t]*r[t][j];
01851 }
01852 }
01853 for (iter=0;iter<max_iter;iter++)
01854 {
01855
01856 pQp=0;
01857 for (t=0;t<k;t++)
01858 {
01859 Qp[t]=0;
01860 for (j=0;j<k;j++)
01861 Qp[t]+=Q[t][j]*p[j];
01862 pQp+=p[t]*Qp[t];
01863 }
01864 double max_error=0;
01865 for (t=0;t<k;t++)
01866 {
01867 double error=fabs(Qp[t]-pQp);
01868 if (error>max_error)
01869 max_error=error;
01870 }
01871 if (max_error<eps) break;
01872
01873 for (t=0;t<k;t++)
01874 {
01875 double diff=(-Qp[t]+pQp)/Q[t][t];
01876 p[t]+=diff;
01877 pQp=(pQp+diff*(diff*Q[t][t]+2*Qp[t]))/(1+diff)/(1+diff);
01878 for (j=0;j<k;j++)
01879 {
01880 Qp[j]=(Qp[j]+diff*Q[t][j])/(1+diff);
01881 p[j]/=(1+diff);
01882 }
01883 }
01884 }
01885 if (iter>=max_iter)
01886 info("Exceeds max_iter in multiclass_prob\n");
01887 for(t=0;t<k;t++) free(Q[t]);
01888 free(Q);
01889 free(Qp);
01890 }
01891
01892
01893 static void svm_binary_svc_probability(
01894 const svm_problem *prob, const svm_parameter *param,
01895 double Cp, double Cn, double& probA, double& probB)
01896 {
01897 int i;
01898 int nr_fold = 5;
01899 int *perm = Malloc(int,prob->l);
01900 double *dec_values = Malloc(double,prob->l);
01901
01902
01903 for(i=0;i<prob->l;i++) perm[i]=i;
01904 for(i=0;i<prob->l;i++)
01905 {
01906 int j = i+rand()%(prob->l-i);
01907 swap(perm[i],perm[j]);
01908 }
01909 for(i=0;i<nr_fold;i++)
01910 {
01911 int begin = i*prob->l/nr_fold;
01912 int end = (i+1)*prob->l/nr_fold;
01913 int j,k;
01914 struct svm_problem subprob;
01915
01916 subprob.l = prob->l-(end-begin);
01917 subprob.x = Malloc(struct svm_node*,subprob.l);
01918 subprob.y = Malloc(double,subprob.l);
01919
01920 k=0;
01921 for(j=0;j<begin;j++)
01922 {
01923 subprob.x[k] = prob->x[perm[j]];
01924 subprob.y[k] = prob->y[perm[j]];
01925 ++k;
01926 }
01927 for(j=end;j<prob->l;j++)
01928 {
01929 subprob.x[k] = prob->x[perm[j]];
01930 subprob.y[k] = prob->y[perm[j]];
01931 ++k;
01932 }
01933 int p_count=0,n_count=0;
01934 for(j=0;j<k;j++)
01935 if(subprob.y[j]>0)
01936 p_count++;
01937 else
01938 n_count++;
01939
01940 if(p_count==0 && n_count==0)
01941 for(j=begin;j<end;j++)
01942 dec_values[perm[j]] = 0;
01943 else if(p_count > 0 && n_count == 0)
01944 for(j=begin;j<end;j++)
01945 dec_values[perm[j]] = 1;
01946 else if(p_count == 0 && n_count > 0)
01947 for(j=begin;j<end;j++)
01948 dec_values[perm[j]] = -1;
01949 else
01950 {
01951 svm_parameter subparam = *param;
01952 subparam.probability=0;
01953 subparam.C=1.0;
01954 subparam.nr_weight=2;
01955 subparam.weight_label = Malloc(int,2);
01956 subparam.weight = Malloc(double,2);
01957 subparam.weight_label[0]=+1;
01958 subparam.weight_label[1]=-1;
01959 subparam.weight[0]=Cp;
01960 subparam.weight[1]=Cn;
01961 struct svm_model *submodel = svm_train(&subprob,&subparam);
01962 for(j=begin;j<end;j++)
01963 {
01964 svm_predict_values(submodel,prob->x[perm[j]],&(dec_values[perm[j]]));
01965
01966 dec_values[perm[j]] *= submodel->label[0];
01967 }
01968 svm_free_and_destroy_model(&submodel);
01969 svm_destroy_param(&subparam);
01970 }
01971 free(subprob.x);
01972 free(subprob.y);
01973 }
01974 sigmoid_train(prob->l,dec_values,prob->y,probA,probB);
01975 free(dec_values);
01976 free(perm);
01977 }
01978
01979
01980 static double svm_svr_probability(
01981 const svm_problem *prob, const svm_parameter *param)
01982 {
01983 int i;
01984 int nr_fold = 5;
01985 double *ymv = Malloc(double,prob->l);
01986 double mae = 0;
01987
01988 svm_parameter newparam = *param;
01989 newparam.probability = 0;
01990 svm_cross_validation(prob,&newparam,nr_fold,ymv);
01991 for(i=0;i<prob->l;i++)
01992 {
01993 ymv[i]=prob->y[i]-ymv[i];
01994 mae += fabs(ymv[i]);
01995 }
01996 mae /= prob->l;
01997 double std=sqrt(2*mae*mae);
01998 int count=0;
01999 mae=0;
02000 for(i=0;i<prob->l;i++)
02001 if (fabs(ymv[i]) > 5*std)
02002 count=count+1;
02003 else
02004 mae+=fabs(ymv[i]);
02005 mae /= (prob->l-count);
02006 info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma= %g\n",mae);
02007 free(ymv);
02008 return mae;
02009 }
02010
02011
02012
02013
02014 static void svm_group_classes(const svm_problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
02015 {
02016 int l = prob->l;
02017 int max_nr_class = 16;
02018 int nr_class = 0;
02019 int *label = Malloc(int,max_nr_class);
02020 int *count = Malloc(int,max_nr_class);
02021 int *data_label = Malloc(int,l);
02022 int i;
02023
02024 for(i=0;i<l;i++)
02025 {
02026 int this_label = (int)prob->y[i];
02027 int j;
02028 for(j=0;j<nr_class;j++)
02029 {
02030 if(this_label == label[j])
02031 {
02032 ++count[j];
02033 break;
02034 }
02035 }
02036 data_label[i] = j;
02037 if(j == nr_class)
02038 {
02039 if(nr_class == max_nr_class)
02040 {
02041 max_nr_class *= 2;
02042 label = (int *)realloc(label,max_nr_class*sizeof(int));
02043 count = (int *)realloc(count,max_nr_class*sizeof(int));
02044 }
02045 label[nr_class] = this_label;
02046 count[nr_class] = 1;
02047 ++nr_class;
02048 }
02049 }
02050
02051 int *start = Malloc(int,nr_class);
02052 start[0] = 0;
02053 for(i=1;i<nr_class;i++)
02054 start[i] = start[i-1]+count[i-1];
02055 for(i=0;i<l;i++)
02056 {
02057 perm[start[data_label[i]]] = i;
02058 ++start[data_label[i]];
02059 }
02060 start[0] = 0;
02061 for(i=1;i<nr_class;i++)
02062 start[i] = start[i-1]+count[i-1];
02063
02064 *nr_class_ret = nr_class;
02065 *label_ret = label;
02066 *start_ret = start;
02067 *count_ret = count;
02068 free(data_label);
02069 }
02070
02071
02072
02073
02074 svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
02075 {
02076 svm_model *model = Malloc(svm_model,1);
02077 model->param = *param;
02078 model->free_sv = 0;
02079
02080 if(param->svm_type == ONE_CLASS ||
02081 param->svm_type == EPSILON_SVR ||
02082 param->svm_type == NU_SVR)
02083 {
02084
02085 model->nr_class = 2;
02086 model->label = NULL;
02087 model->nSV = NULL;
02088 model->probA = NULL; model->probB = NULL;
02089 model->sv_coef = Malloc(double *,1);
02090
02091 if(param->probability &&
02092 (param->svm_type == EPSILON_SVR ||
02093 param->svm_type == NU_SVR))
02094 {
02095 model->probA = Malloc(double,1);
02096 model->probA[0] = svm_svr_probability(prob,param);
02097 }
02098
02099 decision_function f = svm_train_one(prob,param,0,0);
02100 model->rho = Malloc(double,1);
02101 model->rho[0] = f.rho;
02102
02103 int nSV = 0;
02104 int i;
02105 for(i=0;i<prob->l;i++)
02106 if(fabs(f.alpha[i]) > 0) ++nSV;
02107 model->l = nSV;
02108 model->SV = Malloc(svm_node *,nSV);
02109 model->sv_coef[0] = Malloc(double,nSV);
02110 int j = 0;
02111 for(i=0;i<prob->l;i++)
02112 if(fabs(f.alpha[i]) > 0)
02113 {
02114 model->SV[j] = prob->x[i];
02115 model->sv_coef[0][j] = f.alpha[i];
02116 ++j;
02117 }
02118
02119 free(f.alpha);
02120 }
02121 else
02122 {
02123
02124 int l = prob->l;
02125 int nr_class;
02126 int *label = NULL;
02127 int *start = NULL;
02128 int *count = NULL;
02129 int *perm = Malloc(int,l);
02130
02131
02132 svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
02133 if(nr_class == 1)
02134 info("WARNING: training data in only one class. See README for details.\n");
02135
02136 svm_node **x = Malloc(svm_node *,l);
02137 int i;
02138 for(i=0;i<l;i++)
02139 x[i] = prob->x[perm[i]];
02140
02141
02142
02143 double *weighted_C = Malloc(double, nr_class);
02144 for(i=0;i<nr_class;i++)
02145 weighted_C[i] = param->C;
02146 for(i=0;i<param->nr_weight;i++)
02147 {
02148 int j;
02149 for(j=0;j<nr_class;j++)
02150 if(param->weight_label[i] == label[j])
02151 break;
02152 if(j == nr_class)
02153 fprintf(stderr,"WARNING: class label %d specified in weight is not found\n", param->weight_label[i]);
02154 else
02155 weighted_C[j] *= param->weight[i];
02156 }
02157
02158
02159
02160 bool *nonzero = Malloc(bool,l);
02161 for(i=0;i<l;i++)
02162 nonzero[i] = false;
02163 decision_function *f = Malloc(decision_function,nr_class*(nr_class-1)/2);
02164
02165 double *probA=NULL,*probB=NULL;
02166 if (param->probability)
02167 {
02168 probA=Malloc(double,nr_class*(nr_class-1)/2);
02169 probB=Malloc(double,nr_class*(nr_class-1)/2);
02170 }
02171
02172 int p = 0;
02173 for(i=0;i<nr_class;i++)
02174 for(int j=i+1;j<nr_class;j++)
02175 {
02176 svm_problem sub_prob;
02177 int si = start[i], sj = start[j];
02178 int ci = count[i], cj = count[j];
02179 sub_prob.l = ci+cj;
02180 sub_prob.x = Malloc(svm_node *,sub_prob.l);
02181 sub_prob.y = Malloc(double,sub_prob.l);
02182 int k;
02183 for(k=0;k<ci;k++)
02184 {
02185 sub_prob.x[k] = x[si+k];
02186 sub_prob.y[k] = +1;
02187 }
02188 for(k=0;k<cj;k++)
02189 {
02190 sub_prob.x[ci+k] = x[sj+k];
02191 sub_prob.y[ci+k] = -1;
02192 }
02193
02194 if(param->probability)
02195 svm_binary_svc_probability(&sub_prob,param,weighted_C[i],weighted_C[j],probA[p],probB[p]);
02196
02197 f[p] = svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j]);
02198 for(k=0;k<ci;k++)
02199 if(!nonzero[si+k] && fabs(f[p].alpha[k]) > 0)
02200 nonzero[si+k] = true;
02201 for(k=0;k<cj;k++)
02202 if(!nonzero[sj+k] && fabs(f[p].alpha[ci+k]) > 0)
02203 nonzero[sj+k] = true;
02204 free(sub_prob.x);
02205 free(sub_prob.y);
02206 ++p;
02207 }
02208
02209
02210
02211 model->nr_class = nr_class;
02212
02213 model->label = Malloc(int,nr_class);
02214 for(i=0;i<nr_class;i++)
02215 model->label[i] = label[i];
02216
02217 model->rho = Malloc(double,nr_class*(nr_class-1)/2);
02218 for(i=0;i<nr_class*(nr_class-1)/2;i++)
02219 model->rho[i] = f[i].rho;
02220
02221 if(param->probability)
02222 {
02223 model->probA = Malloc(double,nr_class*(nr_class-1)/2);
02224 model->probB = Malloc(double,nr_class*(nr_class-1)/2);
02225 for(i=0;i<nr_class*(nr_class-1)/2;i++)
02226 {
02227 model->probA[i] = probA[i];
02228 model->probB[i] = probB[i];
02229 }
02230 }
02231 else
02232 {
02233 model->probA=NULL;
02234 model->probB=NULL;
02235 }
02236
02237 int total_sv = 0;
02238 int *nz_count = Malloc(int,nr_class);
02239 model->nSV = Malloc(int,nr_class);
02240 for(i=0;i<nr_class;i++)
02241 {
02242 int nSV = 0;
02243 for(int j=0;j<count[i];j++)
02244 if(nonzero[start[i]+j])
02245 {
02246 ++nSV;
02247 ++total_sv;
02248 }
02249 model->nSV[i] = nSV;
02250 nz_count[i] = nSV;
02251 }
02252
02253 info("Total nSV = %d\n",total_sv);
02254
02255 model->l = total_sv;
02256 model->SV = Malloc(svm_node *,total_sv);
02257 p = 0;
02258 for(i=0;i<l;i++)
02259 if(nonzero[i]) model->SV[p++] = x[i];
02260
02261 int *nz_start = Malloc(int,nr_class);
02262 nz_start[0] = 0;
02263 for(i=1;i<nr_class;i++)
02264 nz_start[i] = nz_start[i-1]+nz_count[i-1];
02265
02266 model->sv_coef = Malloc(double *,nr_class-1);
02267 for(i=0;i<nr_class-1;i++)
02268 model->sv_coef[i] = Malloc(double,total_sv);
02269
02270 p = 0;
02271 for(i=0;i<nr_class;i++)
02272 for(int j=i+1;j<nr_class;j++)
02273 {
02274
02275
02276
02277
02278 int si = start[i];
02279 int sj = start[j];
02280 int ci = count[i];
02281 int cj = count[j];
02282
02283 int q = nz_start[i];
02284 int k;
02285 for(k=0;k<ci;k++)
02286 if(nonzero[si+k])
02287 model->sv_coef[j-1][q++] = f[p].alpha[k];
02288 q = nz_start[j];
02289 for(k=0;k<cj;k++)
02290 if(nonzero[sj+k])
02291 model->sv_coef[i][q++] = f[p].alpha[ci+k];
02292 ++p;
02293 }
02294
02295 free(label);
02296 free(probA);
02297 free(probB);
02298 free(count);
02299 free(perm);
02300 free(start);
02301 free(x);
02302 free(weighted_C);
02303 free(nonzero);
02304 for(i=0;i<nr_class*(nr_class-1)/2;i++)
02305 free(f[i].alpha);
02306 free(f);
02307 free(nz_count);
02308 free(nz_start);
02309 }
02310 return model;
02311 }
02312
02313
02314 void svm_cross_validation(const svm_problem *prob, const svm_parameter *param, int nr_fold, double *target)
02315 {
02316 int i;
02317 int *fold_start = Malloc(int,nr_fold+1);
02318 int l = prob->l;
02319 int *perm = Malloc(int,l);
02320 int nr_class;
02321
02322
02323
02324 if((param->svm_type == C_SVC ||
02325 param->svm_type == NU_SVC) && nr_fold < l)
02326 {
02327 int *start = NULL;
02328 int *label = NULL;
02329 int *count = NULL;
02330 svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
02331
02332
02333 int *fold_count = Malloc(int,nr_fold);
02334 int c;
02335 int *index = Malloc(int,l);
02336 for(i=0;i<l;i++)
02337 index[i]=perm[i];
02338 for (c=0; c<nr_class; c++)
02339 for(i=0;i<count[c];i++)
02340 {
02341 int j = i+rand()%(count[c]-i);
02342 swap(index[start[c]+j],index[start[c]+i]);
02343 }
02344 for(i=0;i<nr_fold;i++)
02345 {
02346 fold_count[i] = 0;
02347 for (c=0; c<nr_class;c++)
02348 fold_count[i]+=(i+1)*count[c]/nr_fold-i*count[c]/nr_fold;
02349 }
02350 fold_start[0]=0;
02351 for (i=1;i<=nr_fold;i++)
02352 fold_start[i] = fold_start[i-1]+fold_count[i-1];
02353 for (c=0; c<nr_class;c++)
02354 for(i=0;i<nr_fold;i++)
02355 {
02356 int begin = start[c]+i*count[c]/nr_fold;
02357 int end = start[c]+(i+1)*count[c]/nr_fold;
02358 for(int j=begin;j<end;j++)
02359 {
02360 perm[fold_start[i]] = index[j];
02361 fold_start[i]++;
02362 }
02363 }
02364 fold_start[0]=0;
02365 for (i=1;i<=nr_fold;i++)
02366 fold_start[i] = fold_start[i-1]+fold_count[i-1];
02367 free(start);
02368 free(label);
02369 free(count);
02370 free(index);
02371 free(fold_count);
02372 }
02373 else
02374 {
02375 for(i=0;i<l;i++) perm[i]=i;
02376 for(i=0;i<l;i++)
02377 {
02378 int j = i+rand()%(l-i);
02379 swap(perm[i],perm[j]);
02380 }
02381 for(i=0;i<=nr_fold;i++)
02382 fold_start[i]=i*l/nr_fold;
02383 }
02384
02385 for(i=0;i<nr_fold;i++)
02386 {
02387 int begin = fold_start[i];
02388 int end = fold_start[i+1];
02389 int j,k;
02390 struct svm_problem subprob;
02391
02392 subprob.l = l-(end-begin);
02393 subprob.x = Malloc(struct svm_node*,subprob.l);
02394 subprob.y = Malloc(double,subprob.l);
02395
02396 k=0;
02397 for(j=0;j<begin;j++)
02398 {
02399 subprob.x[k] = prob->x[perm[j]];
02400 subprob.y[k] = prob->y[perm[j]];
02401 ++k;
02402 }
02403 for(j=end;j<l;j++)
02404 {
02405 subprob.x[k] = prob->x[perm[j]];
02406 subprob.y[k] = prob->y[perm[j]];
02407 ++k;
02408 }
02409 struct svm_model *submodel = svm_train(&subprob,param);
02410 if(param->probability &&
02411 (param->svm_type == C_SVC || param->svm_type == NU_SVC))
02412 {
02413 double *prob_estimates=Malloc(double,svm_get_nr_class(submodel));
02414 for(j=begin;j<end;j++)
02415 target[perm[j]] = svm_predict_probability(submodel,prob->x[perm[j]],prob_estimates);
02416 free(prob_estimates);
02417 }
02418 else
02419 for(j=begin;j<end;j++)
02420 target[perm[j]] = svm_predict(submodel,prob->x[perm[j]]);
02421 svm_free_and_destroy_model(&submodel);
02422 free(subprob.x);
02423 free(subprob.y);
02424 }
02425 free(fold_start);
02426 free(perm);
02427 }
02428
02429
02430 int svm_get_svm_type(const svm_model *model)
02431 {
02432 return model->param.svm_type;
02433 }
02434
02435 int svm_get_nr_class(const svm_model *model)
02436 {
02437 return model->nr_class;
02438 }
02439
02440 void svm_get_labels(const svm_model *model, int* label)
02441 {
02442 if (model->label != NULL)
02443 for(int i=0;i<model->nr_class;i++)
02444 label[i] = model->label[i];
02445 }
02446
02447 double svm_get_svr_probability(const svm_model *model)
02448 {
02449 if ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
02450 model->probA!=NULL)
02451 return model->probA[0];
02452 else
02453 {
02454 fprintf(stderr,"Model doesn't contain information for SVR probability inference\n");
02455 return 0;
02456 }
02457 }
02458
02459 double svm_predict_values(const svm_model *model, const svm_node *x, double* dec_values)
02460 {
02461 int i;
02462 if(model->param.svm_type == ONE_CLASS ||
02463 model->param.svm_type == EPSILON_SVR ||
02464 model->param.svm_type == NU_SVR)
02465 {
02466 double *sv_coef = model->sv_coef[0];
02467 double sum = 0;
02468 for(i=0;i<model->l;i++)
02469 sum += sv_coef[i] * Kernel::k_function(x,model->SV[i],model->param);
02470 sum -= model->rho[0];
02471 *dec_values = sum;
02472
02473 if(model->param.svm_type == ONE_CLASS)
02474 return (sum>0)?1:-1;
02475 else
02476 return sum;
02477 }
02478 else
02479 {
02480 int nr_class = model->nr_class;
02481 int l = model->l;
02482
02483 double *kvalue = Malloc(double,l);
02484 for(i=0;i<l;i++)
02485 kvalue[i] = Kernel::k_function(x,model->SV[i],model->param);
02486
02487 int *start = Malloc(int,nr_class);
02488 start[0] = 0;
02489 for(i=1;i<nr_class;i++)
02490 start[i] = start[i-1]+model->nSV[i-1];
02491
02492 int *vote = Malloc(int,nr_class);
02493 for(i=0;i<nr_class;i++)
02494 vote[i] = 0;
02495
02496 int p=0;
02497 for(i=0;i<nr_class;i++)
02498 for(int j=i+1;j<nr_class;j++)
02499 {
02500 double sum = 0;
02501 int si = start[i];
02502 int sj = start[j];
02503 int ci = model->nSV[i];
02504 int cj = model->nSV[j];
02505
02506 int k;
02507 double *coef1 = model->sv_coef[j-1];
02508 double *coef2 = model->sv_coef[i];
02509 for(k=0;k<ci;k++)
02510 sum += coef1[si+k] * kvalue[si+k];
02511 for(k=0;k<cj;k++)
02512 sum += coef2[sj+k] * kvalue[sj+k];
02513 sum -= model->rho[p];
02514 dec_values[p] = sum;
02515
02516 if(dec_values[p] > 0)
02517 ++vote[i];
02518 else
02519 ++vote[j];
02520 p++;
02521 }
02522
02523 int vote_max_idx = 0;
02524 for(i=1;i<nr_class;i++)
02525 if(vote[i] > vote[vote_max_idx])
02526 vote_max_idx = i;
02527
02528 free(kvalue);
02529 free(start);
02530 free(vote);
02531 return model->label[vote_max_idx];
02532 }
02533 }
02534
02535 double svm_predict(const svm_model *model, const svm_node *x)
02536 {
02537 int nr_class = model->nr_class;
02538 double *dec_values;
02539 if(model->param.svm_type == ONE_CLASS ||
02540 model->param.svm_type == EPSILON_SVR ||
02541 model->param.svm_type == NU_SVR)
02542 dec_values = Malloc(double, 1);
02543 else
02544 dec_values = Malloc(double, nr_class*(nr_class-1)/2);
02545 double pred_result = svm_predict_values(model, x, dec_values);
02546 free(dec_values);
02547 return pred_result;
02548 }
02549
02550 double svm_predict_probability(
02551 const svm_model *model, const svm_node *x, double *prob_estimates)
02552 {
02553 if ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
02554 model->probA!=NULL && model->probB!=NULL)
02555 {
02556 int i;
02557 int nr_class = model->nr_class;
02558 double *dec_values = Malloc(double, nr_class*(nr_class-1)/2);
02559 svm_predict_values(model, x, dec_values);
02560
02561 double min_prob=1e-7;
02562 double **pairwise_prob=Malloc(double *,nr_class);
02563 for(i=0;i<nr_class;i++)
02564 pairwise_prob[i]=Malloc(double,nr_class);
02565 int k=0;
02566 for(i=0;i<nr_class;i++)
02567 for(int j=i+1;j<nr_class;j++)
02568 {
02569 pairwise_prob[i][j]=min(max(sigmoid_predict(dec_values[k],model->probA[k],model->probB[k]),min_prob),1-min_prob);
02570 pairwise_prob[j][i]=1-pairwise_prob[i][j];
02571 k++;
02572 }
02573 multiclass_probability(nr_class,pairwise_prob,prob_estimates);
02574
02575 int prob_max_idx = 0;
02576 for(i=1;i<nr_class;i++)
02577 if(prob_estimates[i] > prob_estimates[prob_max_idx])
02578 prob_max_idx = i;
02579 for(i=0;i<nr_class;i++)
02580 free(pairwise_prob[i]);
02581 free(dec_values);
02582 free(pairwise_prob);
02583 return model->label[prob_max_idx];
02584 }
02585 else
02586 return svm_predict(model, x);
02587 }
02588
02589 static const char *svm_type_table[] =
02590 {
02591 "c_svc","nu_svc","one_class","epsilon_svr","nu_svr",NULL
02592 };
02593
02594 static const char *kernel_type_table[]=
02595 {
02596 "linear","polynomial","rbf","sigmoid","precomputed",NULL
02597 };
02598
02599 int svm_save_model(const char *model_file_name, const svm_model *model)
02600 {
02601 FILE *fp = fopen(model_file_name,"w");
02602 if(fp==NULL) return -1;
02603
02604 char *old_locale = strdup(setlocale(LC_ALL, NULL));
02605 setlocale(LC_ALL, "C");
02606
02607 const svm_parameter& param = model->param;
02608
02609 fprintf(fp,"svm_type %s\n", svm_type_table[param.svm_type]);
02610 fprintf(fp,"kernel_type %s\n", kernel_type_table[param.kernel_type]);
02611
02612 if(param.kernel_type == POLY)
02613 fprintf(fp,"degree %d\n", param.degree);
02614
02615 if(param.kernel_type == POLY || param.kernel_type == RBF || param.kernel_type == SIGMOID)
02616 fprintf(fp,"gamma %g\n", param.gamma);
02617
02618 if(param.kernel_type == POLY || param.kernel_type == SIGMOID)
02619 fprintf(fp,"coef0 %g\n", param.coef0);
02620
02621 int nr_class = model->nr_class;
02622 int l = model->l;
02623 fprintf(fp, "nr_class %d\n", nr_class);
02624 fprintf(fp, "total_sv %d\n",l);
02625
02626 {
02627 fprintf(fp, "rho");
02628 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
02629 fprintf(fp," %g",model->rho[i]);
02630 fprintf(fp, "\n");
02631 }
02632
02633 if(model->label)
02634 {
02635 fprintf(fp, "label");
02636 for(int i=0;i<nr_class;i++)
02637 fprintf(fp," %d",model->label[i]);
02638 fprintf(fp, "\n");
02639 }
02640
02641 if(model->probA)
02642 {
02643 fprintf(fp, "probA");
02644 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
02645 fprintf(fp," %g",model->probA[i]);
02646 fprintf(fp, "\n");
02647 }
02648 if(model->probB)
02649 {
02650 fprintf(fp, "probB");
02651 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
02652 fprintf(fp," %g",model->probB[i]);
02653 fprintf(fp, "\n");
02654 }
02655
02656 if(model->nSV)
02657 {
02658 fprintf(fp, "nr_sv");
02659 for(int i=0;i<nr_class;i++)
02660 fprintf(fp," %d",model->nSV[i]);
02661 fprintf(fp, "\n");
02662 }
02663
02664 fprintf(fp, "SV\n");
02665 const double * const *sv_coef = model->sv_coef;
02666 const svm_node * const *SV = model->SV;
02667
02668 for(int i=0;i<l;i++)
02669 {
02670 for(int j=0;j<nr_class-1;j++)
02671 fprintf(fp, "%.16g ",sv_coef[j][i]);
02672
02673 const svm_node *p = SV[i];
02674
02675 if(param.kernel_type == PRECOMPUTED)
02676 fprintf(fp,"0:%d ",(int)(p->value));
02677 else
02678 while(p->index != -1)
02679 {
02680 fprintf(fp,"%d:%.8g ",p->index,p->value);
02681 p++;
02682 }
02683 fprintf(fp, "\n");
02684 }
02685
02686 setlocale(LC_ALL, old_locale);
02687 free(old_locale);
02688
02689 if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
02690 else return 0;
02691 }
02692
02693 static char *line = NULL;
02694 static int max_line_len;
02695
02696 static char* readline(FILE *input)
02697 {
02698 int len;
02699
02700 if(fgets(line,max_line_len,input) == NULL)
02701 return NULL;
02702
02703 while(strrchr(line,'\n') == NULL)
02704 {
02705 max_line_len *= 2;
02706 line = (char *) realloc(line,max_line_len);
02707 len = (int) strlen(line);
02708 if(fgets(line+len,max_line_len-len,input) == NULL)
02709 break;
02710 }
02711 return line;
02712 }
02713
02714 svm_model *svm_load_model(const char *model_file_name)
02715 {
02716 FILE *fp = fopen(model_file_name,"rb");
02717 if(fp==NULL) return NULL;
02718
02719 char *old_locale = strdup(setlocale(LC_ALL, NULL));
02720 setlocale(LC_ALL, "C");
02721
02722
02723
02724 svm_model *model = Malloc(svm_model,1);
02725 svm_parameter& param = model->param;
02726 model->rho = NULL;
02727 model->probA = NULL;
02728 model->probB = NULL;
02729 model->label = NULL;
02730 model->nSV = NULL;
02731
02732 char cmd[81];
02733 while(1)
02734 {
02735 fscanf(fp,"%80s",cmd);
02736
02737 if(strcmp(cmd,"svm_type")==0)
02738 {
02739 fscanf(fp,"%80s",cmd);
02740 int i;
02741 for(i=0;svm_type_table[i];i++)
02742 {
02743 if(strcmp(svm_type_table[i],cmd)==0)
02744 {
02745 param.svm_type=i;
02746 break;
02747 }
02748 }
02749 if(svm_type_table[i] == NULL)
02750 {
02751 fprintf(stderr,"unknown svm type.\n");
02752
02753 setlocale(LC_ALL, old_locale);
02754 free(old_locale);
02755 free(model->rho);
02756 free(model->label);
02757 free(model->nSV);
02758 free(model);
02759 return NULL;
02760 }
02761 }
02762 else if(strcmp(cmd,"kernel_type")==0)
02763 {
02764 fscanf(fp,"%80s",cmd);
02765 int i;
02766 for(i=0;kernel_type_table[i];i++)
02767 {
02768 if(strcmp(kernel_type_table[i],cmd)==0)
02769 {
02770 param.kernel_type=i;
02771 break;
02772 }
02773 }
02774 if(kernel_type_table[i] == NULL)
02775 {
02776 fprintf(stderr,"unknown kernel function.\n");
02777
02778 setlocale(LC_ALL, old_locale);
02779 free(old_locale);
02780 free(model->rho);
02781 free(model->label);
02782 free(model->nSV);
02783 free(model);
02784 return NULL;
02785 }
02786 }
02787 else if(strcmp(cmd,"degree")==0)
02788 fscanf(fp,"%d",¶m.degree);
02789 else if(strcmp(cmd,"gamma")==0)
02790 fscanf(fp,"%lf",¶m.gamma);
02791 else if(strcmp(cmd,"coef0")==0)
02792 fscanf(fp,"%lf",¶m.coef0);
02793 else if(strcmp(cmd,"nr_class")==0)
02794 fscanf(fp,"%d",&model->nr_class);
02795 else if(strcmp(cmd,"total_sv")==0)
02796 fscanf(fp,"%d",&model->l);
02797 else if(strcmp(cmd,"rho")==0)
02798 {
02799 int n = model->nr_class * (model->nr_class-1)/2;
02800 model->rho = Malloc(double,n);
02801 for(int i=0;i<n;i++)
02802 fscanf(fp,"%lf",&model->rho[i]);
02803 }
02804 else if(strcmp(cmd,"label")==0)
02805 {
02806 int n = model->nr_class;
02807 model->label = Malloc(int,n);
02808 for(int i=0;i<n;i++)
02809 fscanf(fp,"%d",&model->label[i]);
02810 }
02811 else if(strcmp(cmd,"probA")==0)
02812 {
02813 int n = model->nr_class * (model->nr_class-1)/2;
02814 model->probA = Malloc(double,n);
02815 for(int i=0;i<n;i++)
02816 fscanf(fp,"%lf",&model->probA[i]);
02817 }
02818 else if(strcmp(cmd,"probB")==0)
02819 {
02820 int n = model->nr_class * (model->nr_class-1)/2;
02821 model->probB = Malloc(double,n);
02822 for(int i=0;i<n;i++)
02823 fscanf(fp,"%lf",&model->probB[i]);
02824 }
02825 else if(strcmp(cmd,"nr_sv")==0)
02826 {
02827 int n = model->nr_class;
02828 model->nSV = Malloc(int,n);
02829 for(int i=0;i<n;i++)
02830 fscanf(fp,"%d",&model->nSV[i]);
02831 }
02832 else if(strcmp(cmd,"SV")==0)
02833 {
02834 while(1)
02835 {
02836 int c = getc(fp);
02837 if(c==EOF || c=='\n') break;
02838 }
02839 break;
02840 }
02841 else
02842 {
02843 fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
02844
02845 setlocale(LC_ALL, old_locale);
02846 free(old_locale);
02847 free(model->rho);
02848 free(model->label);
02849 free(model->nSV);
02850 free(model);
02851 return NULL;
02852 }
02853 }
02854
02855
02856
02857 int elements = 0;
02858 long pos = ftell(fp);
02859
02860 max_line_len = 1024;
02861 line = Malloc(char,max_line_len);
02862 char *p,*endptr,*idx,*val;
02863
02864 while(readline(fp)!=NULL)
02865 {
02866 p = strtok(line,":");
02867 while(1)
02868 {
02869 p = strtok(NULL,":");
02870 if(p == NULL)
02871 break;
02872 ++elements;
02873 }
02874 }
02875 elements += model->l;
02876
02877 fseek(fp,pos,SEEK_SET);
02878
02879 int m = model->nr_class - 1;
02880 int l = model->l;
02881 model->sv_coef = Malloc(double *,m);
02882 int i;
02883 for(i=0;i<m;i++)
02884 model->sv_coef[i] = Malloc(double,l);
02885 model->SV = Malloc(svm_node*,l);
02886 svm_node *x_space = NULL;
02887 if(l>0) x_space = Malloc(svm_node,elements);
02888
02889 int j=0;
02890 for(i=0;i<l;i++)
02891 {
02892 readline(fp);
02893 model->SV[i] = &x_space[j];
02894
02895 p = strtok(line, " \t");
02896 model->sv_coef[0][i] = strtod(p,&endptr);
02897 for(int k=1;k<m;k++)
02898 {
02899 p = strtok(NULL, " \t");
02900 model->sv_coef[k][i] = strtod(p,&endptr);
02901 }
02902
02903 while(1)
02904 {
02905 idx = strtok(NULL, ":");
02906 val = strtok(NULL, " \t");
02907
02908 if(val == NULL)
02909 break;
02910 x_space[j].index = (int) strtol(idx,&endptr,10);
02911 x_space[j].value = strtod(val,&endptr);
02912
02913 ++j;
02914 }
02915 x_space[j++].index = -1;
02916 }
02917 free(line);
02918
02919 setlocale(LC_ALL, old_locale);
02920 free(old_locale);
02921
02922 if (ferror(fp) != 0 || fclose(fp) != 0)
02923 return NULL;
02924
02925 model->free_sv = 1;
02926 return model;
02927 }
02928
02929 void svm_free_model_content(svm_model* model_ptr)
02930 {
02931 if(model_ptr->free_sv && model_ptr->l > 0 && model_ptr->SV != NULL)
02932 free((void *)(model_ptr->SV[0]));
02933 if(model_ptr->sv_coef)
02934 {
02935 for(int i=0;i<model_ptr->nr_class-1;i++)
02936 free(model_ptr->sv_coef[i]);
02937 }
02938
02939 free(model_ptr->SV);
02940 model_ptr->SV = NULL;
02941
02942 free(model_ptr->sv_coef);
02943 model_ptr->sv_coef = NULL;
02944
02945 free(model_ptr->rho);
02946 model_ptr->rho = NULL;
02947
02948 free(model_ptr->label);
02949 model_ptr->label= NULL;
02950
02951 free(model_ptr->probA);
02952 model_ptr->probA = NULL;
02953
02954 free(model_ptr->probB);
02955 model_ptr->probB= NULL;
02956
02957 free(model_ptr->nSV);
02958 model_ptr->nSV = NULL;
02959 }
02960
02961 void svm_free_and_destroy_model(svm_model** model_ptr_ptr)
02962 {
02963 if(model_ptr_ptr != NULL && *model_ptr_ptr != NULL)
02964 {
02965 svm_free_model_content(*model_ptr_ptr);
02966 free(*model_ptr_ptr);
02967 *model_ptr_ptr = NULL;
02968 }
02969 }
02970
02971 void svm_destroy_param(svm_parameter* param)
02972 {
02973 free(param->weight_label);
02974 free(param->weight);
02975 }
02976
02977 const char *svm_check_parameter(const svm_problem *prob, const svm_parameter *param)
02978 {
02979
02980
02981 int svm_type = param->svm_type;
02982 if(svm_type != C_SVC &&
02983 svm_type != NU_SVC &&
02984 svm_type != ONE_CLASS &&
02985 svm_type != EPSILON_SVR &&
02986 svm_type != NU_SVR)
02987 return "unknown svm type";
02988
02989
02990
02991 int kernel_type = param->kernel_type;
02992 if(kernel_type != LINEAR &&
02993 kernel_type != POLY &&
02994 kernel_type != RBF &&
02995 kernel_type != SIGMOID &&
02996 kernel_type != PRECOMPUTED)
02997 return "unknown kernel type";
02998
02999 if(param->gamma < 0)
03000 return "gamma < 0";
03001
03002 if(param->degree < 0)
03003 return "degree of polynomial kernel < 0";
03004
03005
03006
03007 if(param->cache_size <= 0)
03008 return "cache_size <= 0";
03009
03010 if(param->eps <= 0)
03011 return "eps <= 0";
03012
03013 if(svm_type == C_SVC ||
03014 svm_type == EPSILON_SVR ||
03015 svm_type == NU_SVR)
03016 if(param->C <= 0)
03017 return "C <= 0";
03018
03019 if(svm_type == NU_SVC ||
03020 svm_type == ONE_CLASS ||
03021 svm_type == NU_SVR)
03022 if(param->nu <= 0 || param->nu > 1)
03023 return "nu <= 0 or nu > 1";
03024
03025 if(svm_type == EPSILON_SVR)
03026 if(param->p < 0)
03027 return "p < 0";
03028
03029 if(param->shrinking != 0 &&
03030 param->shrinking != 1)
03031 return "shrinking != 0 and shrinking != 1";
03032
03033 if(param->probability != 0 &&
03034 param->probability != 1)
03035 return "probability != 0 and probability != 1";
03036
03037 if(param->probability == 1 &&
03038 svm_type == ONE_CLASS)
03039 return "one-class SVM probability output not supported yet";
03040
03041
03042
03043
03044 if(svm_type == NU_SVC)
03045 {
03046 int l = prob->l;
03047 int max_nr_class = 16;
03048 int nr_class = 0;
03049 int *label = Malloc(int,max_nr_class);
03050 int *count = Malloc(int,max_nr_class);
03051
03052 int i;
03053 for(i=0;i<l;i++)
03054 {
03055 int this_label = (int)prob->y[i];
03056 int j;
03057 for(j=0;j<nr_class;j++)
03058 if(this_label == label[j])
03059 {
03060 ++count[j];
03061 break;
03062 }
03063 if(j == nr_class)
03064 {
03065 if(nr_class == max_nr_class)
03066 {
03067 max_nr_class *= 2;
03068 label = (int *)realloc(label,max_nr_class*sizeof(int));
03069 count = (int *)realloc(count,max_nr_class*sizeof(int));
03070 }
03071 label[nr_class] = this_label;
03072 count[nr_class] = 1;
03073 ++nr_class;
03074 }
03075 }
03076
03077 for(i=0;i<nr_class;i++)
03078 {
03079 int n1 = count[i];
03080 for(int j=i+1;j<nr_class;j++)
03081 {
03082 int n2 = count[j];
03083 if(param->nu*(n1+n2)/2 > min(n1,n2))
03084 {
03085 free(label);
03086 free(count);
03087 return "specified nu is infeasible";
03088 }
03089 }
03090 }
03091 free(label);
03092 free(count);
03093 }
03094
03095 return NULL;
03096 }
03097
03098 int svm_check_probability_model(const svm_model *model)
03099 {
03100 return ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
03101 model->probA!=NULL && model->probB!=NULL) ||
03102 ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
03103 model->probA!=NULL);
03104 }
03105
03106 void svm_set_print_string_function(void (*print_func)(const char *))
03107 {
03108 if(print_func == NULL)
03109 svm_print_string = &print_string_stdout;
03110 else
03111 svm_print_string = print_func;
03112 }