cl_libsvm: svm.cpp Source File

Go to the documentation of this file.
00001 #include <math.h>
00002 #include <stdio.h>
00003 #include <stdlib.h>
00004 #include <ctype.h>
00005 #include <float.h>
00006 #include <string.h>
00007 #include <stdarg.h>
00008 #include "svm.h"
00009 typedef float Qfloat;
00010 typedef signed char schar;
00011 #ifndef min
00012 template <class T> inline T min(T x,T y) { return (x<y)?x:y; }
00013 #endif
00014 #ifndef max
00015 template <class T> inline T max(T x,T y) { return (x>y)?x:y; }
00016 #endif
00017 template <class T> inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
00018 template <class S, class T> inline void clone(T*& dst, S* src, int n)
00019 {
00020         dst = new T[n];
00021         memcpy((void *)dst,(void *)src,sizeof(T)*n);
00022 }
00023 inline double powi(double base, int times)
00024 {
00025         double tmp = base, ret = 1.0;
00026 
00027         for(int t=times; t>0; t/=2)
00028         {
00029                 if(t%2==1) ret*=tmp;
00030                 tmp = tmp * tmp;
00031         }
00032         return ret;
00033 }
00034 #define INF HUGE_VAL
00035 #define TAU 1e-12
00036 #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
00037 #if 0
00038 void info(const char *fmt,...)
00039 {
00040         va_list ap;
00041         va_start(ap,fmt);
00042         vprintf(fmt,ap);
00043         va_end(ap);
00044 }
00045 void info_flush()
00046 {
00047         fflush(stdout);
00048 }
00049 #else
00050 void info(char *fmt,...) {}
00051 void info_flush() {}
00052 #endif
00053 
00054 //
00055 // Kernel Cache
00056 //
00057 // l is the number of total data items
00058 // size is the cache size limit in bytes
00059 //
00060 class Cache
00061 {
00062 public:
00063         Cache(int l,long int size);
00064         ~Cache();
00065 
00066         // request data [0,len)
00067         // return some position p where [p,len) need to be filled
00068         // (p >= len if nothing needs to be filled)
00069         int get_data(const int index, Qfloat **data, int len);
00070         void swap_index(int i, int j);  // future_option
00071 private:
00072         int l;
00073         long int size;
00074         struct head_t
00075         {
00076                 head_t *prev, *next;    // a cicular list
00077                 Qfloat *data;
00078                 int len;                // data[0,len) is cached in this entry
00079         };
00080 
00081         head_t *head;
00082         head_t lru_head;
00083         void lru_delete(head_t *h);
00084         void lru_insert(head_t *h);
00085 };
00086 
00087 Cache::Cache(int l_,long int size_):l(l_),size(size_)
00088 {
00089         head = (head_t *)calloc(l,sizeof(head_t));      // initialized to 0
00090         size /= sizeof(Qfloat);
00091         size -= l * sizeof(head_t) / sizeof(Qfloat);
00092         size = max(size, 2 * (long int) l);     // cache must be large enough for two columns
00093         lru_head.next = lru_head.prev = &lru_head;
00094 }
00095 
00096 Cache::~Cache()
00097 {
00098         for(head_t *h = lru_head.next; h != &lru_head; h=h->next)
00099                 free(h->data);
00100         free(head);
00101 }
00102 
00103 void Cache::lru_delete(head_t *h)
00104 {
00105         // delete from current location
00106         h->prev->next = h->next;
00107         h->next->prev = h->prev;
00108 }
00109 
00110 void Cache::lru_insert(head_t *h)
00111 {
00112         // insert to last position
00113         h->next = &lru_head;
00114         h->prev = lru_head.prev;
00115         h->prev->next = h;
00116         h->next->prev = h;
00117 }
00118 
00119 int Cache::get_data(const int index, Qfloat **data, int len)
00120 {
00121         head_t *h = &head[index];
00122         if(h->len) lru_delete(h);
00123         int more = len - h->len;
00124 
00125         if(more > 0)
00126         {
00127                 // free old space
00128                 while(size < more)
00129                 {
00130                         head_t *old = lru_head.next;
00131                         lru_delete(old);
00132                         free(old->data);
00133                         size += old->len;
00134                         old->data = 0;
00135                         old->len = 0;
00136                 }
00137 
00138                 // allocate new space
00139                 h->data = (Qfloat *)realloc(h->data,sizeof(Qfloat)*len);
00140                 size -= more;
00141                 swap(h->len,len);
00142         }
00143 
00144         lru_insert(h);
00145         *data = h->data;
00146         return len;
00147 }
00148 
00149 void Cache::swap_index(int i, int j)
00150 {
00151         if(i==j) return;
00152 
00153         if(head[i].len) lru_delete(&head[i]);
00154         if(head[j].len) lru_delete(&head[j]);
00155         swap(head[i].data,head[j].data);
00156         swap(head[i].len,head[j].len);
00157         if(head[i].len) lru_insert(&head[i]);
00158         if(head[j].len) lru_insert(&head[j]);
00159 
00160         if(i>j) swap(i,j);
00161         for(head_t *h = lru_head.next; h!=&lru_head; h=h->next)
00162         {
00163                 if(h->len > i)
00164                 {
00165                         if(h->len > j)
00166                                 swap(h->data[i],h->data[j]);
00167                         else
00168                         {
00169                                 // give up
00170                                 lru_delete(h);
00171                                 free(h->data);
00172                                 size += h->len;
00173                                 h->data = 0;
00174                                 h->len = 0;
00175                         }
00176                 }
00177         }
00178 }
00179 
00180 //
00181 // Kernel evaluation
00182 //
00183 // the static method k_function is for doing single kernel evaluation
00184 // the constructor of Kernel prepares to calculate the l*l kernel matrix
00185 // the member function get_Q is for getting one column from the Q Matrix
00186 //
00187 class QMatrix {
00188 public:
00189         virtual Qfloat *get_Q(int column, int len) const = 0;
00190         virtual Qfloat *get_QD() const = 0;
00191         virtual void swap_index(int i, int j) const = 0;
00192         virtual ~QMatrix() {}
00193 };
00194 
00195 class Kernel: public QMatrix {
00196 public:
00197         Kernel(int l, svm_node * const * x, const svm_parameter& param);
00198         virtual ~Kernel();
00199 
00200         static double k_function(const svm_node *x, const svm_node *y,
00201                                  const svm_parameter& param);
00202         virtual Qfloat *get_Q(int column, int len) const = 0;
00203         virtual Qfloat *get_QD() const = 0;
00204         virtual void swap_index(int i, int j) const     // no so const...
00205         {
00206                 swap(x[i],x[j]);
00207                 if(x_square) swap(x_square[i],x_square[j]);
00208         }
00209 protected:
00210 
00211         double (Kernel::*kernel_function)(int i, int j) const;
00212 
00213 private:
00214         const svm_node **x;
00215         double *x_square;
00216 
00217         // svm_parameter
00218         const int kernel_type;
00219         const int degree;
00220         const double gamma;
00221         const double coef0;
00222 
00223         static double dot(const svm_node *px, const svm_node *py);
00224         double kernel_linear(int i, int j) const
00225         {
00226                 return dot(x[i],x[j]);
00227         }
00228         double kernel_poly(int i, int j) const
00229         {
00230                 return powi(gamma*dot(x[i],x[j])+coef0,degree);
00231         }
00232         double kernel_rbf(int i, int j) const
00233         {
00234                 return exp(-gamma*(x_square[i]+x_square[j]-2*dot(x[i],x[j])));
00235         }
00236         double kernel_sigmoid(int i, int j) const
00237         {
00238                 return tanh(gamma*dot(x[i],x[j])+coef0);
00239         }
00240         double kernel_precomputed(int i, int j) const
00241         {
00242                 return x[i][(int)(x[j][0].value)].value;
00243         }
00244 };
00245 
00246 Kernel::Kernel(int l, svm_node * const * x_, const svm_parameter& param)
00247 :kernel_type(param.kernel_type), degree(param.degree),
00248  gamma(param.gamma), coef0(param.coef0)
00249 {
00250         switch(kernel_type)
00251         {
00252                 case LINEAR:
00253                         kernel_function = &Kernel::kernel_linear;
00254                         break;
00255                 case POLY:
00256                         kernel_function = &Kernel::kernel_poly;
00257                         break;
00258                 case RBF:
00259                         kernel_function = &Kernel::kernel_rbf;
00260                         break;
00261                 case SIGMOID:
00262                         kernel_function = &Kernel::kernel_sigmoid;
00263                         break;
00264                 case PRECOMPUTED:
00265                         kernel_function = &Kernel::kernel_precomputed;
00266                         break;
00267         }
00268 
00269         clone(x,x_,l);
00270 
00271         if(kernel_type == RBF)
00272         {
00273                 x_square = new double[l];
00274                 for(int i=0;i<l;i++)
00275                         x_square[i] = dot(x[i],x[i]);
00276         }
00277         else
00278                 x_square = 0;
00279 }
00280 
00281 Kernel::~Kernel()
00282 {
00283         delete[] x;
00284         delete[] x_square;
00285 }
00286 
00287 double Kernel::dot(const svm_node *px, const svm_node *py)
00288 {
00289         double sum = 0;
00290         while(px->index != -1 && py->index != -1)
00291         {
00292                 if(px->index == py->index)
00293                 {
00294                         sum += px->value * py->value;
00295                         ++px;
00296                         ++py;
00297                 }
00298                 else
00299                 {
00300                         if(px->index > py->index)
00301                                 ++py;
00302                         else
00303                                 ++px;
00304                 }                       
00305         }
00306         return sum;
00307 }
00308 
00309 double Kernel::k_function(const svm_node *x, const svm_node *y,
00310                           const svm_parameter& param)
00311 {
00312         switch(param.kernel_type)
00313         {
00314                 case LINEAR:
00315                         return dot(x,y);
00316                 case POLY:
00317                         return powi(param.gamma*dot(x,y)+param.coef0,param.degree);
00318                 case RBF:
00319                 {
00320                         double sum = 0;
00321                         while(x->index != -1 && y->index !=-1)
00322                         {
00323                                 if(x->index == y->index)
00324                                 {
00325                                         double d = x->value - y->value;
00326                                         sum += d*d;
00327                                         ++x;
00328                                         ++y;
00329                                 }
00330                                 else
00331                                 {
00332                                         if(x->index > y->index)
00333                                         {       
00334                                                 sum += y->value * y->value;
00335                                                 ++y;
00336                                         }
00337                                         else
00338                                         {
00339                                                 sum += x->value * x->value;
00340                                                 ++x;
00341                                         }
00342                                 }
00343                         }
00344 
00345                         while(x->index != -1)
00346                         {
00347                                 sum += x->value * x->value;
00348                                 ++x;
00349                         }
00350 
00351                         while(y->index != -1)
00352                         {
00353                                 sum += y->value * y->value;
00354                                 ++y;
00355                         }
00356                         
00357                         return exp(-param.gamma*sum);
00358                 }
00359                 case SIGMOID:
00360                         return tanh(param.gamma*dot(x,y)+param.coef0);
00361                 case PRECOMPUTED:  //x: test (validation), y: SV
00362                         return x[(int)(y->value)].value;
00363                 default:
00364                         return 0;  // Unreachable 
00365         }
00366 }
00367 
00368 // An SMO algorithm in Fan et al., JMLR 6(2005), p. 1889--1918
00369 // Solves:
00370 //
00371 //      min 0.5(\alpha^T Q \alpha) + p^T \alpha
00372 //
00373 //              y^T \alpha = \delta
00374 //              y_i = +1 or -1
00375 //              0 <= alpha_i <= Cp for y_i = 1
00376 //              0 <= alpha_i <= Cn for y_i = -1
00377 //
00378 // Given:
00379 //
00380 //      Q, p, y, Cp, Cn, and an initial feasible point \alpha
00381 //      l is the size of vectors and matrices
00382 //      eps is the stopping tolerance
00383 //
00384 // solution will be put in \alpha, objective value will be put in obj
00385 //
00386 class Solver {
00387 public:
00388         Solver() {};
00389         virtual ~Solver() {};
00390 
00391         struct SolutionInfo {
00392                 double obj;
00393                 double rho;
00394                 double upper_bound_p;
00395                 double upper_bound_n;
00396                 double r;       // for Solver_NU
00397         };
00398 
00399         void Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
00400                    double *alpha_, double Cp, double Cn, double eps,
00401                    SolutionInfo* si, int shrinking);
00402 protected:
00403         int active_size;
00404         schar *y;
00405         double *G;              // gradient of objective function
00406         enum { LOWER_BOUND, UPPER_BOUND, FREE };
00407         char *alpha_status;     // LOWER_BOUND, UPPER_BOUND, FREE
00408         double *alpha;
00409         const QMatrix *Q;
00410         const Qfloat *QD;
00411         double eps;
00412         double Cp,Cn;
00413         double *p;
00414         int *active_set;
00415         double *G_bar;          // gradient, if we treat free variables as 0
00416         int l;
00417         bool unshrinked;        // XXX
00418 
00419         double get_C(int i)
00420         {
00421                 return (y[i] > 0)? Cp : Cn;
00422         }
00423         void update_alpha_status(int i)
00424         {
00425                 if(alpha[i] >= get_C(i))
00426                         alpha_status[i] = UPPER_BOUND;
00427                 else if(alpha[i] <= 0)
00428                         alpha_status[i] = LOWER_BOUND;
00429                 else alpha_status[i] = FREE;
00430         }
00431         bool is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; }
00432         bool is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; }
00433         bool is_free(int i) { return alpha_status[i] == FREE; }
00434         void swap_index(int i, int j);
00435         void reconstruct_gradient();
00436         virtual int select_working_set(int &i, int &j);
00437         virtual double calculate_rho();
00438         virtual void do_shrinking();
00439 private:
00440         bool be_shrunken(int i, double Gmax1, double Gmax2);    
00441 };
00442 
00443 void Solver::swap_index(int i, int j)
00444 {
00445         Q->swap_index(i,j);
00446         swap(y[i],y[j]);
00447         swap(G[i],G[j]);
00448         swap(alpha_status[i],alpha_status[j]);
00449         swap(alpha[i],alpha[j]);
00450         swap(p[i],p[j]);
00451         swap(active_set[i],active_set[j]);
00452         swap(G_bar[i],G_bar[j]);
00453 }
00454 
00455 void Solver::reconstruct_gradient()
00456 {
00457         // reconstruct inactive elements of G from G_bar and free variables
00458 
00459         if(active_size == l) return;
00460 
00461         int i;
00462         for(i=active_size;i<l;i++)
00463                 G[i] = G_bar[i] + p[i];
00464         
00465         for(i=0;i<active_size;i++)
00466                 if(is_free(i))
00467                 {
00468                         const Qfloat *Q_i = Q->get_Q(i,l);
00469                         double alpha_i = alpha[i];
00470                         for(int j=active_size;j<l;j++)
00471                                 G[j] += alpha_i * Q_i[j];
00472                 }
00473 }
00474 
00475 void Solver::Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
00476                    double *alpha_, double Cp, double Cn, double eps,
00477                    SolutionInfo* si, int shrinking)
00478 {
00479         this->l = l;
00480         this->Q = &Q;
00481         QD=Q.get_QD();
00482         clone(p, p_,l);
00483         clone(y, y_,l);
00484         clone(alpha,alpha_,l);
00485         this->Cp = Cp;
00486         this->Cn = Cn;
00487         this->eps = eps;
00488         unshrinked = false;
00489 
00490         // initialize alpha_status
00491         {
00492                 alpha_status = new char[l];
00493                 for(int i=0;i<l;i++)
00494                         update_alpha_status(i);
00495         }
00496 
00497         // initialize active set (for shrinking)
00498         {
00499                 active_set = new int[l];
00500                 for(int i=0;i<l;i++)
00501                         active_set[i] = i;
00502                 active_size = l;
00503         }
00504 
00505         // initialize gradient
00506         {
00507                 G = new double[l];
00508                 G_bar = new double[l];
00509                 int i;
00510                 for(i=0;i<l;i++)
00511                 {
00512                         G[i] = p[i];
00513                         G_bar[i] = 0;
00514                 }
00515                 for(i=0;i<l;i++)
00516                         if(!is_lower_bound(i))
00517                         {
00518                                 const Qfloat *Q_i = Q.get_Q(i,l);
00519                                 double alpha_i = alpha[i];
00520                                 int j;
00521                                 for(j=0;j<l;j++)
00522                                         G[j] += alpha_i*Q_i[j];
00523                                 if(is_upper_bound(i))
00524                                         for(j=0;j<l;j++)
00525                                                 G_bar[j] += get_C(i) * Q_i[j];
00526                         }
00527         }
00528 
00529         // optimization step
00530 
00531         int iter = 0;
00532         int counter = min(l,1000)+1;
00533 
00534         while(1)
00535         {
00536                 // show progress and do shrinking
00537 
00538                 if(--counter == 0)
00539                 {
00540                         counter = min(l,1000);
00541                         if(shrinking) do_shrinking();
00542                         info("."); info_flush();
00543                 }
00544 
00545                 int i,j;
00546                 if(select_working_set(i,j)!=0)
00547                 {
00548                         // reconstruct the whole gradient
00549                         reconstruct_gradient();
00550                         // reset active set size and check
00551                         active_size = l;
00552                         info("*"); info_flush();
00553                         if(select_working_set(i,j)!=0)
00554                                 break;
00555                         else
00556                                 counter = 1;    // do shrinking next iteration
00557                 }
00558                 
00559                 ++iter;
00560 
00561                 // update alpha[i] and alpha[j], handle bounds carefully
00562                 
00563                 const Qfloat *Q_i = Q.get_Q(i,active_size);
00564                 const Qfloat *Q_j = Q.get_Q(j,active_size);
00565 
00566                 double C_i = get_C(i);
00567                 double C_j = get_C(j);
00568 
00569                 double old_alpha_i = alpha[i];
00570                 double old_alpha_j = alpha[j];
00571 
00572                 if(y[i]!=y[j])
00573                 {
00574                         double quad_coef = Q_i[i]+Q_j[j]+2*Q_i[j];
00575                         if (quad_coef <= 0)
00576                                 quad_coef = TAU;
00577                         double delta = (-G[i]-G[j])/quad_coef;
00578                         double diff = alpha[i] - alpha[j];
00579                         alpha[i] += delta;
00580                         alpha[j] += delta;
00581                         
00582                         if(diff > 0)
00583                         {
00584                                 if(alpha[j] < 0)
00585                                 {
00586                                         alpha[j] = 0;
00587                                         alpha[i] = diff;
00588                                 }
00589                         }
00590                         else
00591                         {
00592                                 if(alpha[i] < 0)
00593                                 {
00594                                         alpha[i] = 0;
00595                                         alpha[j] = -diff;
00596                                 }
00597                         }
00598                         if(diff > C_i - C_j)
00599                         {
00600                                 if(alpha[i] > C_i)
00601                                 {
00602                                         alpha[i] = C_i;
00603                                         alpha[j] = C_i - diff;
00604                                 }
00605                         }
00606                         else
00607                         {
00608                                 if(alpha[j] > C_j)
00609                                 {
00610                                         alpha[j] = C_j;
00611                                         alpha[i] = C_j + diff;
00612                                 }
00613                         }
00614                 }
00615                 else
00616                 {
00617                         double quad_coef = Q_i[i]+Q_j[j]-2*Q_i[j];
00618                         if (quad_coef <= 0)
00619                                 quad_coef = TAU;
00620                         double delta = (G[i]-G[j])/quad_coef;
00621                         double sum = alpha[i] + alpha[j];
00622                         alpha[i] -= delta;
00623                         alpha[j] += delta;
00624 
00625                         if(sum > C_i)
00626                         {
00627                                 if(alpha[i] > C_i)
00628                                 {
00629                                         alpha[i] = C_i;
00630                                         alpha[j] = sum - C_i;
00631                                 }
00632                         }
00633                         else
00634                         {
00635                                 if(alpha[j] < 0)
00636                                 {
00637                                         alpha[j] = 0;
00638                                         alpha[i] = sum;
00639                                 }
00640                         }
00641                         if(sum > C_j)
00642                         {
00643                                 if(alpha[j] > C_j)
00644                                 {
00645                                         alpha[j] = C_j;
00646                                         alpha[i] = sum - C_j;
00647                                 }
00648                         }
00649                         else
00650                         {
00651                                 if(alpha[i] < 0)
00652                                 {
00653                                         alpha[i] = 0;
00654                                         alpha[j] = sum;
00655                                 }
00656                         }
00657                 }
00658 
00659                 // update G
00660 
00661                 double delta_alpha_i = alpha[i] - old_alpha_i;
00662                 double delta_alpha_j = alpha[j] - old_alpha_j;
00663                 
00664                 for(int k=0;k<active_size;k++)
00665                 {
00666                         G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j;
00667                 }
00668 
00669                 // update alpha_status and G_bar
00670 
00671                 {
00672                         bool ui = is_upper_bound(i);
00673                         bool uj = is_upper_bound(j);
00674                         update_alpha_status(i);
00675                         update_alpha_status(j);
00676                         int k;
00677                         if(ui != is_upper_bound(i))
00678                         {
00679                                 Q_i = Q.get_Q(i,l);
00680                                 if(ui)
00681                                         for(k=0;k<l;k++)
00682                                                 G_bar[k] -= C_i * Q_i[k];
00683                                 else
00684                                         for(k=0;k<l;k++)
00685                                                 G_bar[k] += C_i * Q_i[k];
00686                         }
00687 
00688                         if(uj != is_upper_bound(j))
00689                         {
00690                                 Q_j = Q.get_Q(j,l);
00691                                 if(uj)
00692                                         for(k=0;k<l;k++)
00693                                                 G_bar[k] -= C_j * Q_j[k];
00694                                 else
00695                                         for(k=0;k<l;k++)
00696                                                 G_bar[k] += C_j * Q_j[k];
00697                         }
00698                 }
00699         }
00700 
00701         // calculate rho
00702 
00703         si->rho = calculate_rho();
00704 
00705         // calculate objective value
00706         {
00707                 double v = 0;
00708                 int i;
00709                 for(i=0;i<l;i++)
00710                         v += alpha[i] * (G[i] + p[i]);
00711 
00712                 si->obj = v/2;
00713         }
00714 
00715         // put back the solution
00716         {
00717                 for(int i=0;i<l;i++)
00718                         alpha_[active_set[i]] = alpha[i];
00719         }
00720 
00721         // juggle everything back
00722         /*{
00723                 for(int i=0;i<l;i++)
00724                         while(active_set[i] != i)
00725                                 swap_index(i,active_set[i]);
00726                                 // or Q.swap_index(i,active_set[i]);
00727         }*/
00728 
00729         si->upper_bound_p = Cp;
00730         si->upper_bound_n = Cn;
00731 
00732         info("\noptimization finished, #iter = %d\n",iter);
00733 
00734         delete[] p;
00735         delete[] y;
00736         delete[] alpha;
00737         delete[] alpha_status;
00738         delete[] active_set;
00739         delete[] G;
00740         delete[] G_bar;
00741 }
00742 
00743 // return 1 if already optimal, return 0 otherwise
00744 int Solver::select_working_set(int &out_i, int &out_j)
00745 {
00746         // return i,j such that
00747         // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
00748         // j: minimizes the decrease of obj value
00749         //    (if quadratic coefficeint <= 0, replace it with tau)
00750         //    -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
00751         
00752         double Gmax = -INF;
00753         double Gmax2 = -INF;
00754         int Gmax_idx = -1;
00755         int Gmin_idx = -1;
00756         double obj_diff_min = INF;
00757 
00758         for(int t=0;t<active_size;t++)
00759                 if(y[t]==+1)    
00760                 {
00761                         if(!is_upper_bound(t))
00762                                 if(-G[t] >= Gmax)
00763                                 {
00764                                         Gmax = -G[t];
00765                                         Gmax_idx = t;
00766                                 }
00767                 }
00768                 else
00769                 {
00770                         if(!is_lower_bound(t))
00771                                 if(G[t] >= Gmax)
00772                                 {
00773                                         Gmax = G[t];
00774                                         Gmax_idx = t;
00775                                 }
00776                 }
00777 
00778         int i = Gmax_idx;
00779         const Qfloat *Q_i = NULL;
00780         if(i != -1) // NULL Q_i not accessed: Gmax=-INF if i=-1
00781                 Q_i = Q->get_Q(i,active_size);
00782 
00783         for(int j=0;j<active_size;j++)
00784         {
00785                 if(y[j]==+1)
00786                 {
00787                         if (!is_lower_bound(j))
00788                         {
00789                                 double grad_diff=Gmax+G[j];
00790                                 if (G[j] >= Gmax2)
00791                                         Gmax2 = G[j];
00792                                 if (grad_diff > 0)
00793                                 {
00794                                         double obj_diff; 
00795                                         double quad_coef=Q_i[i]+QD[j]-2*y[i]*Q_i[j];
00796                                         if (quad_coef > 0)
00797                                                 obj_diff = -(grad_diff*grad_diff)/quad_coef;
00798                                         else
00799                                                 obj_diff = -(grad_diff*grad_diff)/TAU;
00800 
00801                                         if (obj_diff <= obj_diff_min)
00802                                         {
00803                                                 Gmin_idx=j;
00804                                                 obj_diff_min = obj_diff;
00805                                         }
00806                                 }
00807                         }
00808                 }
00809                 else
00810                 {
00811                         if (!is_upper_bound(j))
00812                         {
00813                                 double grad_diff= Gmax-G[j];
00814                                 if (-G[j] >= Gmax2)
00815                                         Gmax2 = -G[j];
00816                                 if (grad_diff > 0)
00817                                 {
00818                                         double obj_diff; 
00819                                         double quad_coef=Q_i[i]+QD[j]+2*y[i]*Q_i[j];
00820                                         if (quad_coef > 0)
00821                                                 obj_diff = -(grad_diff*grad_diff)/quad_coef;
00822                                         else
00823                                                 obj_diff = -(grad_diff*grad_diff)/TAU;
00824 
00825                                         if (obj_diff <= obj_diff_min)
00826                                         {
00827                                                 Gmin_idx=j;
00828                                                 obj_diff_min = obj_diff;
00829                                         }
00830                                 }
00831                         }
00832                 }
00833         }
00834 
00835         if(Gmax+Gmax2 < eps)
00836                 return 1;
00837 
00838         out_i = Gmax_idx;
00839         out_j = Gmin_idx;
00840         return 0;
00841 }
00842 
00843 bool Solver::be_shrunken(int i, double Gmax1, double Gmax2)
00844 {
00845         if(is_upper_bound(i))
00846         {
00847                 if(y[i]==+1)
00848                         return(-G[i] > Gmax1);
00849                 else
00850                         return(-G[i] > Gmax2);
00851         }
00852         else if(is_lower_bound(i))
00853         {
00854                 if(y[i]==+1)
00855                         return(G[i] > Gmax2);
00856                 else    
00857                         return(G[i] > Gmax1);
00858         }
00859         else
00860                 return(false);
00861 }
00862 
00863 void Solver::do_shrinking()
00864 {
00865         int i;
00866         double Gmax1 = -INF;            // max { -y_i * grad(f)_i | i in I_up(\alpha) }
00867         double Gmax2 = -INF;            // max { y_i * grad(f)_i | i in I_low(\alpha) }
00868 
00869         // find maximal violating pair first
00870         for(i=0;i<active_size;i++)
00871         {
00872                 if(y[i]==+1)    
00873                 {
00874                         if(!is_upper_bound(i))  
00875                         {
00876                                 if(-G[i] >= Gmax1)
00877                                         Gmax1 = -G[i];
00878                         }
00879                         if(!is_lower_bound(i))  
00880                         {
00881                                 if(G[i] >= Gmax2)
00882                                         Gmax2 = G[i];
00883                         }
00884                 }
00885                 else    
00886                 {
00887                         if(!is_upper_bound(i))  
00888                         {
00889                                 if(-G[i] >= Gmax2)
00890                                         Gmax2 = -G[i];
00891                         }
00892                         if(!is_lower_bound(i))  
00893                         {
00894                                 if(G[i] >= Gmax1)
00895                                         Gmax1 = G[i];
00896                         }
00897                 }
00898         }
00899 
00900         // shrink
00901 
00902         for(i=0;i<active_size;i++)
00903                 if (be_shrunken(i, Gmax1, Gmax2))
00904                 {
00905                         active_size--;
00906                         while (active_size > i)
00907                         {
00908                                 if (!be_shrunken(active_size, Gmax1, Gmax2))
00909                                 {
00910                                         swap_index(i,active_size);
00911                                         break;
00912                                 }
00913                                 active_size--;
00914                         }
00915                 }
00916 
00917         // unshrink, check all variables again before final iterations
00918 
00919         if(unshrinked || Gmax1 + Gmax2 > eps*10) return;
00920         
00921         unshrinked = true;
00922         reconstruct_gradient();
00923 
00924         for(i=l-1;i>=active_size;i--)
00925                 if (!be_shrunken(i, Gmax1, Gmax2))
00926                 {
00927                         while (active_size < i)
00928                         {
00929                                 if (be_shrunken(active_size, Gmax1, Gmax2))
00930                                 {
00931                                         swap_index(i,active_size);
00932                                         break;
00933                                 }
00934                                 active_size++;
00935                         }
00936                         active_size++;
00937                 }
00938 }
00939 
00940 double Solver::calculate_rho()
00941 {
00942         double r;
00943         int nr_free = 0;
00944         double ub = INF, lb = -INF, sum_free = 0;
00945         for(int i=0;i<active_size;i++)
00946         {
00947                 double yG = y[i]*G[i];
00948 
00949                 if(is_upper_bound(i))
00950                 {
00951                         if(y[i]==-1)
00952                                 ub = min(ub,yG);
00953                         else
00954                                 lb = max(lb,yG);
00955                 }
00956                 else if(is_lower_bound(i))
00957                 {
00958                         if(y[i]==+1)
00959                                 ub = min(ub,yG);
00960                         else
00961                                 lb = max(lb,yG);
00962                 }
00963                 else
00964                 {
00965                         ++nr_free;
00966                         sum_free += yG;
00967                 }
00968         }
00969 
00970         if(nr_free>0)
00971                 r = sum_free/nr_free;
00972         else
00973                 r = (ub+lb)/2;
00974 
00975         return r;
00976 }
00977 
00978 //
00979 // Solver for nu-svm classification and regression
00980 //
00981 // additional constraint: e^T \alpha = constant
00982 //
00983 class Solver_NU : public Solver
00984 {
00985 public:
00986         Solver_NU() {}
00987         void Solve(int l, const QMatrix& Q, const double *p, const schar *y,
00988                    double *alpha, double Cp, double Cn, double eps,
00989                    SolutionInfo* si, int shrinking)
00990         {
00991                 this->si = si;
00992                 Solver::Solve(l,Q,p,y,alpha,Cp,Cn,eps,si,shrinking);
00993         }
00994 private:
00995         SolutionInfo *si;
00996         int select_working_set(int &i, int &j);
00997         double calculate_rho();
00998         bool be_shrunken(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4);
00999         void do_shrinking();
01000 };
01001 
01002 // return 1 if already optimal, return 0 otherwise
01003 int Solver_NU::select_working_set(int &out_i, int &out_j)
01004 {
01005         // return i,j such that y_i = y_j and
01006         // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
01007         // j: minimizes the decrease of obj value
01008         //    (if quadratic coefficeint <= 0, replace it with tau)
01009         //    -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
01010 
01011         double Gmaxp = -INF;
01012         double Gmaxp2 = -INF;
01013         int Gmaxp_idx = -1;
01014 
01015         double Gmaxn = -INF;
01016         double Gmaxn2 = -INF;
01017         int Gmaxn_idx = -1;
01018 
01019         int Gmin_idx = -1;
01020         double obj_diff_min = INF;
01021 
01022         for(int t=0;t<active_size;t++)
01023                 if(y[t]==+1)
01024                 {
01025                         if(!is_upper_bound(t))
01026                                 if(-G[t] >= Gmaxp)
01027                                 {
01028                                         Gmaxp = -G[t];
01029                                         Gmaxp_idx = t;
01030                                 }
01031                 }
01032                 else
01033                 {
01034                         if(!is_lower_bound(t))
01035                                 if(G[t] >= Gmaxn)
01036                                 {
01037                                         Gmaxn = G[t];
01038                                         Gmaxn_idx = t;
01039                                 }
01040                 }
01041 
01042         int ip = Gmaxp_idx;
01043         int in = Gmaxn_idx;
01044         const Qfloat *Q_ip = NULL;
01045         const Qfloat *Q_in = NULL;
01046         if(ip != -1) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1
01047                 Q_ip = Q->get_Q(ip,active_size);
01048         if(in != -1)
01049                 Q_in = Q->get_Q(in,active_size);
01050 
01051         for(int j=0;j<active_size;j++)
01052         {
01053                 if(y[j]==+1)
01054                 {
01055                         if (!is_lower_bound(j)) 
01056                         {
01057                                 double grad_diff=Gmaxp+G[j];
01058                                 if (G[j] >= Gmaxp2)
01059                                         Gmaxp2 = G[j];
01060                                 if (grad_diff > 0)
01061                                 {
01062                                         double obj_diff; 
01063                                         double quad_coef = Q_ip[ip]+QD[j]-2*Q_ip[j];
01064                                         if (quad_coef > 0)
01065                                                 obj_diff = -(grad_diff*grad_diff)/quad_coef;
01066                                         else
01067                                                 obj_diff = -(grad_diff*grad_diff)/TAU;
01068 
01069                                         if (obj_diff <= obj_diff_min)
01070                                         {
01071                                                 Gmin_idx=j;
01072                                                 obj_diff_min = obj_diff;
01073                                         }
01074                                 }
01075                         }
01076                 }
01077                 else
01078                 {
01079                         if (!is_upper_bound(j))
01080                         {
01081                                 double grad_diff=Gmaxn-G[j];
01082                                 if (-G[j] >= Gmaxn2)
01083                                         Gmaxn2 = -G[j];
01084                                 if (grad_diff > 0)
01085                                 {
01086                                         double obj_diff; 
01087                                         double quad_coef = Q_in[in]+QD[j]-2*Q_in[j];
01088                                         if (quad_coef > 0)
01089                                                 obj_diff = -(grad_diff*grad_diff)/quad_coef;
01090                                         else
01091                                                 obj_diff = -(grad_diff*grad_diff)/TAU;
01092 
01093                                         if (obj_diff <= obj_diff_min)
01094                                         {
01095                                                 Gmin_idx=j;
01096                                                 obj_diff_min = obj_diff;
01097                                         }
01098                                 }
01099                         }
01100                 }
01101         }
01102 
01103         if(max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2) < eps)
01104                 return 1;
01105 
01106         if (y[Gmin_idx] == +1)
01107                 out_i = Gmaxp_idx;
01108         else
01109                 out_i = Gmaxn_idx;
01110         out_j = Gmin_idx;
01111 
01112         return 0;
01113 }
01114 
01115 bool Solver_NU::be_shrunken(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4)
01116 {
01117         if(is_upper_bound(i))
01118         {
01119                 if(y[i]==+1)
01120                         return(-G[i] > Gmax1);
01121                 else    
01122                         return(-G[i] > Gmax4);
01123         }
01124         else if(is_lower_bound(i))
01125         {
01126                 if(y[i]==+1)
01127                         return(G[i] > Gmax2);
01128                 else    
01129                         return(G[i] > Gmax3);
01130         }
01131         else
01132                 return(false);
01133 }
01134 
01135 void Solver_NU::do_shrinking()
01136 {
01137         double Gmax1 = -INF;    // max { -y_i * grad(f)_i | y_i = +1, i in I_up(\alpha) }
01138         double Gmax2 = -INF;    // max { y_i * grad(f)_i | y_i = +1, i in I_low(\alpha) }
01139         double Gmax3 = -INF;    // max { -y_i * grad(f)_i | y_i = -1, i in I_up(\alpha) }
01140         double Gmax4 = -INF;    // max { y_i * grad(f)_i | y_i = -1, i in I_low(\alpha) }
01141 
01142         // find maximal violating pair first
01143         int i;
01144         for(i=0;i<active_size;i++)
01145         {
01146                 if(!is_upper_bound(i))
01147                 {
01148                         if(y[i]==+1)
01149                         {
01150                                 if(-G[i] > Gmax1) Gmax1 = -G[i];
01151                         }
01152                         else    if(-G[i] > Gmax4) Gmax4 = -G[i];
01153                 }
01154                 if(!is_lower_bound(i))
01155                 {
01156                         if(y[i]==+1)
01157                         {       
01158                                 if(G[i] > Gmax2) Gmax2 = G[i];
01159                         }
01160                         else    if(G[i] > Gmax3) Gmax3 = G[i];
01161                 }
01162         }
01163 
01164         // shrinking
01165 
01166         for(i=0;i<active_size;i++)
01167                 if (be_shrunken(i, Gmax1, Gmax2, Gmax3, Gmax4))
01168                 {
01169                         active_size--;
01170                         while (active_size > i)
01171                         {
01172                                 if (!be_shrunken(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
01173                                 {
01174                                         swap_index(i,active_size);
01175                                         break;
01176                                 }
01177                                 active_size--;
01178                         }
01179                 }
01180 
01181         // unshrink, check all variables again before final iterations
01182 
01183         if(unshrinked || max(Gmax1+Gmax2,Gmax3+Gmax4) > eps*10) return;
01184         
01185         unshrinked = true;
01186         reconstruct_gradient();
01187 
01188         for(i=l-1;i>=active_size;i--)
01189                 if (!be_shrunken(i, Gmax1, Gmax2, Gmax3, Gmax4))
01190                 {
01191                         while (active_size < i)
01192                         {
01193                                 if (be_shrunken(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
01194                                 {
01195                                         swap_index(i,active_size);
01196                                         break;
01197                                 }
01198                                 active_size++;
01199                         }
01200                         active_size++;
01201                 }
01202 }
01203 
01204 double Solver_NU::calculate_rho()
01205 {
01206         int nr_free1 = 0,nr_free2 = 0;
01207         double ub1 = INF, ub2 = INF;
01208         double lb1 = -INF, lb2 = -INF;
01209         double sum_free1 = 0, sum_free2 = 0;
01210 
01211         for(int i=0;i<active_size;i++)
01212         {
01213                 if(y[i]==+1)
01214                 {
01215                         if(is_upper_bound(i))
01216                                 lb1 = max(lb1,G[i]);
01217                         else if(is_lower_bound(i))
01218                                 ub1 = min(ub1,G[i]);
01219                         else
01220                         {
01221                                 ++nr_free1;
01222                                 sum_free1 += G[i];
01223                         }
01224                 }
01225                 else
01226                 {
01227                         if(is_upper_bound(i))
01228                                 lb2 = max(lb2,G[i]);
01229                         else if(is_lower_bound(i))
01230                                 ub2 = min(ub2,G[i]);
01231                         else
01232                         {
01233                                 ++nr_free2;
01234                                 sum_free2 += G[i];
01235                         }
01236                 }
01237         }
01238 
01239         double r1,r2;
01240         if(nr_free1 > 0)
01241                 r1 = sum_free1/nr_free1;
01242         else
01243                 r1 = (ub1+lb1)/2;
01244         
01245         if(nr_free2 > 0)
01246                 r2 = sum_free2/nr_free2;
01247         else
01248                 r2 = (ub2+lb2)/2;
01249         
01250         si->r = (r1+r2)/2;
01251         return (r1-r2)/2;
01252 }
01253 
01254 //
01255 // Q matrices for various formulations
01256 //
01257 class SVC_Q: public Kernel
01258 { 
01259 public:
01260         SVC_Q(const svm_problem& prob, const svm_parameter& param, const schar *y_)
01261         :Kernel(prob.l, prob.x, param)
01262         {
01263                 clone(y,y_,prob.l);
01264                 cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
01265                 QD = new Qfloat[prob.l];
01266                 for(int i=0;i<prob.l;i++)
01267                         QD[i]= (Qfloat)(this->*kernel_function)(i,i);
01268         }
01269         
01270         Qfloat *get_Q(int i, int len) const
01271         {
01272                 Qfloat *data;
01273                 int start;
01274                 if((start = cache->get_data(i,&data,len)) < len)
01275                 {
01276                         for(int j=start;j<len;j++)
01277                                 data[j] = (Qfloat)(y[i]*y[j]*(this->*kernel_function)(i,j));
01278                 }
01279                 return data;
01280         }
01281 
01282         Qfloat *get_QD() const
01283         {
01284                 return QD;
01285         }
01286 
01287         void swap_index(int i, int j) const
01288         {
01289                 cache->swap_index(i,j);
01290                 Kernel::swap_index(i,j);
01291                 swap(y[i],y[j]);
01292                 swap(QD[i],QD[j]);
01293         }
01294 
01295         ~SVC_Q()
01296         {
01297                 delete[] y;
01298                 delete cache;
01299                 delete[] QD;
01300         }
01301 private:
01302         schar *y;
01303         Cache *cache;
01304         Qfloat *QD;
01305 };
01306 
01307 class ONE_CLASS_Q: public Kernel
01308 {
01309 public:
01310         ONE_CLASS_Q(const svm_problem& prob, const svm_parameter& param)
01311         :Kernel(prob.l, prob.x, param)
01312         {
01313                 cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
01314                 QD = new Qfloat[prob.l];
01315                 for(int i=0;i<prob.l;i++)
01316                         QD[i]= (Qfloat)(this->*kernel_function)(i,i);
01317         }
01318         
01319         Qfloat *get_Q(int i, int len) const
01320         {
01321                 Qfloat *data;
01322                 int start;
01323                 if((start = cache->get_data(i,&data,len)) < len)
01324                 {
01325                         for(int j=start;j<len;j++)
01326                                 data[j] = (Qfloat)(this->*kernel_function)(i,j);
01327                 }
01328                 return data;
01329         }
01330 
01331         Qfloat *get_QD() const
01332         {
01333                 return QD;
01334         }
01335 
01336         void swap_index(int i, int j) const
01337         {
01338                 cache->swap_index(i,j);
01339                 Kernel::swap_index(i,j);
01340                 swap(QD[i],QD[j]);
01341         }
01342 
01343         ~ONE_CLASS_Q()
01344         {
01345                 delete cache;
01346                 delete[] QD;
01347         }
01348 private:
01349         Cache *cache;
01350         Qfloat *QD;
01351 };
01352 
01353 class SVR_Q: public Kernel
01354 { 
01355 public:
01356         SVR_Q(const svm_problem& prob, const svm_parameter& param)
01357         :Kernel(prob.l, prob.x, param)
01358         {
01359                 l = prob.l;
01360                 cache = new Cache(l,(long int)(param.cache_size*(1<<20)));
01361                 QD = new Qfloat[2*l];
01362                 sign = new schar[2*l];
01363                 index = new int[2*l];
01364                 for(int k=0;k<l;k++)
01365                 {
01366                         sign[k] = 1;
01367                         sign[k+l] = -1;
01368                         index[k] = k;
01369                         index[k+l] = k;
01370                         QD[k]= (Qfloat)(this->*kernel_function)(k,k);
01371                         QD[k+l]=QD[k];
01372                 }
01373                 buffer[0] = new Qfloat[2*l];
01374                 buffer[1] = new Qfloat[2*l];
01375                 next_buffer = 0;
01376         }
01377 
01378         void swap_index(int i, int j) const
01379         {
01380                 swap(sign[i],sign[j]);
01381                 swap(index[i],index[j]);
01382                 swap(QD[i],QD[j]);
01383         }
01384         
01385         Qfloat *get_Q(int i, int len) const
01386         {
01387                 Qfloat *data;
01388                 int real_i = index[i];
01389                 if(cache->get_data(real_i,&data,l) < l)
01390                 {
01391                         for(int j=0;j<l;j++)
01392                                 data[j] = (Qfloat)(this->*kernel_function)(real_i,j);
01393                 }
01394 
01395                 // reorder and copy
01396                 Qfloat *buf = buffer[next_buffer];
01397                 next_buffer = 1 - next_buffer;
01398                 schar si = sign[i];
01399                 for(int j=0;j<len;j++)
01400                         buf[j] = si * sign[j] * data[index[j]];
01401                 return buf;
01402         }
01403 
01404         Qfloat *get_QD() const
01405         {
01406                 return QD;
01407         }
01408 
01409         ~SVR_Q()
01410         {
01411                 delete cache;
01412                 delete[] sign;
01413                 delete[] index;
01414                 delete[] buffer[0];
01415                 delete[] buffer[1];
01416                 delete[] QD;
01417         }
01418 private:
01419         int l;
01420         Cache *cache;
01421         schar *sign;
01422         int *index;
01423         mutable int next_buffer;
01424         Qfloat *buffer[2];
01425         Qfloat *QD;
01426 };
01427 
01428 //
01429 // construct and solve various formulations
01430 //
01431 static void solve_c_svc(
01432         const svm_problem *prob, const svm_parameter* param,
01433         double *alpha, Solver::SolutionInfo* si, double Cp, double Cn)
01434 {
01435         int l = prob->l;
01436         double *minus_ones = new double[l];
01437         schar *y = new schar[l];
01438 
01439         int i;
01440 
01441         for(i=0;i<l;i++)
01442         {
01443                 alpha[i] = 0;
01444                 minus_ones[i] = -1;
01445                 if(prob->y[i] > 0) y[i] = +1; else y[i]=-1;
01446         }
01447 
01448         Solver s;
01449         s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y,
01450                 alpha, Cp, Cn, param->eps, si, param->shrinking);
01451 
01452         double sum_alpha=0;
01453         for(i=0;i<l;i++)
01454                 sum_alpha += alpha[i];
01455 
01456         if (Cp==Cn)
01457                 info("nu = %f\n", sum_alpha/(Cp*prob->l));
01458 
01459         for(i=0;i<l;i++)
01460                 alpha[i] *= y[i];
01461 
01462         delete[] minus_ones;
01463         delete[] y;
01464 }
01465 
01466 static void solve_nu_svc(
01467         const svm_problem *prob, const svm_parameter *param,
01468         double *alpha, Solver::SolutionInfo* si)
01469 {
01470         int i;
01471         int l = prob->l;
01472         double nu = param->nu;
01473 
01474         schar *y = new schar[l];
01475 
01476         for(i=0;i<l;i++)
01477                 if(prob->y[i]>0)
01478                         y[i] = +1;
01479                 else
01480                         y[i] = -1;
01481 
01482         double sum_pos = nu*l/2;
01483         double sum_neg = nu*l/2;
01484 
01485         for(i=0;i<l;i++)
01486                 if(y[i] == +1)
01487                 {
01488                         alpha[i] = min(1.0,sum_pos);
01489                         sum_pos -= alpha[i];
01490                 }
01491                 else
01492                 {
01493                         alpha[i] = min(1.0,sum_neg);
01494                         sum_neg -= alpha[i];
01495                 }
01496 
01497         double *zeros = new double[l];
01498 
01499         for(i=0;i<l;i++)
01500                 zeros[i] = 0;
01501 
01502         Solver_NU s;
01503         s.Solve(l, SVC_Q(*prob,*param,y), zeros, y,
01504                 alpha, 1.0, 1.0, param->eps, si,  param->shrinking);
01505         double r = si->r;
01506 
01507         info("C = %f\n",1/r);
01508 
01509         for(i=0;i<l;i++)
01510                 alpha[i] *= y[i]/r;
01511 
01512         si->rho /= r;
01513         si->obj /= (r*r);
01514         si->upper_bound_p = 1/r;
01515         si->upper_bound_n = 1/r;
01516 
01517         delete[] y;
01518         delete[] zeros;
01519 }
01520 
01521 static void solve_one_class(
01522         const svm_problem *prob, const svm_parameter *param,
01523         double *alpha, Solver::SolutionInfo* si)
01524 {
01525         int l = prob->l;
01526         double *zeros = new double[l];
01527         schar *ones = new schar[l];
01528         int i;
01529 
01530         int n = (int)(param->nu*prob->l);       // # of alpha's at upper bound
01531 
01532         for(i=0;i<n;i++)
01533                 alpha[i] = 1;
01534         if(n<prob->l)
01535                 alpha[n] = param->nu * prob->l - n;
01536         for(i=n+1;i<l;i++)
01537                 alpha[i] = 0;
01538 
01539         for(i=0;i<l;i++)
01540         {
01541                 zeros[i] = 0;
01542                 ones[i] = 1;
01543         }
01544 
01545         Solver s;
01546         s.Solve(l, ONE_CLASS_Q(*prob,*param), zeros, ones,
01547                 alpha, 1.0, 1.0, param->eps, si, param->shrinking);
01548 
01549         delete[] zeros;
01550         delete[] ones;
01551 }
01552 
01553 static void solve_epsilon_svr(
01554         const svm_problem *prob, const svm_parameter *param,
01555         double *alpha, Solver::SolutionInfo* si)
01556 {
01557         int l = prob->l;
01558         double *alpha2 = new double[2*l];
01559         double *linear_term = new double[2*l];
01560         schar *y = new schar[2*l];
01561         int i;
01562 
01563         for(i=0;i<l;i++)
01564         {
01565                 alpha2[i] = 0;
01566                 linear_term[i] = param->p - prob->y[i];
01567                 y[i] = 1;
01568 
01569                 alpha2[i+l] = 0;
01570                 linear_term[i+l] = param->p + prob->y[i];
01571                 y[i+l] = -1;
01572         }
01573 
01574         Solver s;
01575         s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
01576                 alpha2, param->C, param->C, param->eps, si, param->shrinking);
01577 
01578         double sum_alpha = 0;
01579         for(i=0;i<l;i++)
01580         {
01581                 alpha[i] = alpha2[i] - alpha2[i+l];
01582                 sum_alpha += fabs(alpha[i]);
01583         }
01584         info("nu = %f\n",sum_alpha/(param->C*l));
01585 
01586         delete[] alpha2;
01587         delete[] linear_term;
01588         delete[] y;
01589 }
01590 
01591 static void solve_nu_svr(
01592         const svm_problem *prob, const svm_parameter *param,
01593         double *alpha, Solver::SolutionInfo* si)
01594 {
01595         int l = prob->l;
01596         double C = param->C;
01597         double *alpha2 = new double[2*l];
01598         double *linear_term = new double[2*l];
01599         schar *y = new schar[2*l];
01600         int i;
01601 
01602         double sum = C * param->nu * l / 2;
01603         for(i=0;i<l;i++)
01604         {
01605                 alpha2[i] = alpha2[i+l] = min(sum,C);
01606                 sum -= alpha2[i];
01607 
01608                 linear_term[i] = - prob->y[i];
01609                 y[i] = 1;
01610 
01611                 linear_term[i+l] = prob->y[i];
01612                 y[i+l] = -1;
01613         }
01614 
01615         Solver_NU s;
01616         s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
01617                 alpha2, C, C, param->eps, si, param->shrinking);
01618 
01619         info("epsilon = %f\n",-si->r);
01620 
01621         for(i=0;i<l;i++)
01622                 alpha[i] = alpha2[i] - alpha2[i+l];
01623 
01624         delete[] alpha2;
01625         delete[] linear_term;
01626         delete[] y;
01627 }
01628 
01629 //
01630 // decision_function
01631 //
01632 struct decision_function
01633 {
01634         double *alpha;
01635         double rho;     
01636 };
01637 
01638 decision_function svm_train_one(
01639         const svm_problem *prob, const svm_parameter *param,
01640         double Cp, double Cn)
01641 {
01642         double *alpha = Malloc(double,prob->l);
01643         Solver::SolutionInfo si;
01644         switch(param->svm_type)
01645         {
01646                 case C_SVC:
01647                         solve_c_svc(prob,param,alpha,&si,Cp,Cn);
01648                         break;
01649                 case NU_SVC:
01650                         solve_nu_svc(prob,param,alpha,&si);
01651                         break;
01652                 case ONE_CLASS:
01653                         solve_one_class(prob,param,alpha,&si);
01654                         break;
01655                 case EPSILON_SVR:
01656                         solve_epsilon_svr(prob,param,alpha,&si);
01657                         break;
01658                 case NU_SVR:
01659                         solve_nu_svr(prob,param,alpha,&si);
01660                         break;
01661         }
01662 
01663         info("obj = %f, rho = %f\n",si.obj,si.rho);
01664 
01665         // output SVs
01666 
01667         int nSV = 0;
01668         int nBSV = 0;
01669         for(int i=0;i<prob->l;i++)
01670         {
01671                 if(fabs(alpha[i]) > 0)
01672                 {
01673                         ++nSV;
01674                         if(prob->y[i] > 0)
01675                         {
01676                                 if(fabs(alpha[i]) >= si.upper_bound_p)
01677                                         ++nBSV;
01678                         }
01679                         else
01680                         {
01681                                 if(fabs(alpha[i]) >= si.upper_bound_n)
01682                                         ++nBSV;
01683                         }
01684                 }
01685         }
01686 
01687         info("nSV = %d, nBSV = %d\n",nSV,nBSV);
01688 
01689         decision_function f;
01690         f.alpha = alpha;
01691         f.rho = si.rho;
01692         return f;
01693 }
01694 
01695 //
01696 // svm_model
01697 //
01698 struct svm_model
01699 {
01700         svm_parameter param;    // parameter
01701         int nr_class;           // number of classes, = 2 in regression/one class svm
01702         int l;                  // total #SV
01703         svm_node **SV;          // SVs (SV[l])
01704         double **sv_coef;       // coefficients for SVs in decision functions (sv_coef[k-1][l])
01705         double *rho;            // constants in decision functions (rho[k*(k-1)/2])
01706         double *probA;          // pariwise probability information
01707         double *probB;
01708 
01709         // for classification only
01710 
01711         int *label;             // label of each class (label[k])
01712         int *nSV;               // number of SVs for each class (nSV[k])
01713                                 // nSV[0] + nSV[1] + ... + nSV[k-1] = l
01714         // XXX
01715         int free_sv;            // 1 if svm_model is created by svm_load_model
01716                                 // 0 if svm_model is created by svm_train
01717 };
01718 
01719 // Platt's binary SVM Probablistic Output: an improvement from Lin et al.
01720 void sigmoid_train(
01721         int l, const double *dec_values, const double *labels, 
01722         double& A, double& B)
01723 {
01724         double prior1=0, prior0 = 0;
01725         int i;
01726 
01727         for (i=0;i<l;i++)
01728                 if (labels[i] > 0) prior1+=1;
01729                 else prior0+=1;
01730         
01731         int max_iter=100;       // Maximal number of iterations
01732         double min_step=1e-10;  // Minimal step taken in line search
01733         double sigma=1e-12;     // For numerically strict PD of Hessian
01734         double eps=1e-5;
01735         double hiTarget=(prior1+1.0)/(prior1+2.0);
01736         double loTarget=1/(prior0+2.0);
01737         double *t=Malloc(double,l);
01738         double fApB,p,q,h11,h22,h21,g1,g2,det,dA,dB,gd,stepsize;
01739         double newA,newB,newf,d1,d2;
01740         int iter; 
01741         
01742         // Initial Point and Initial Fun Value
01743         A=0.0; B=log((prior0+1.0)/(prior1+1.0));
01744         double fval = 0.0;
01745 
01746         for (i=0;i<l;i++)
01747         {
01748                 if (labels[i]>0) t[i]=hiTarget;
01749                 else t[i]=loTarget;
01750                 fApB = dec_values[i]*A+B;
01751                 if (fApB>=0)
01752                         fval += t[i]*fApB + log(1+exp(-fApB));
01753                 else
01754                         fval += (t[i] - 1)*fApB +log(1+exp(fApB));
01755         }
01756         for (iter=0;iter<max_iter;iter++)
01757         {
01758                 // Update Gradient and Hessian (use H' = H + sigma I)
01759                 h11=sigma; // numerically ensures strict PD
01760                 h22=sigma;
01761                 h21=0.0;g1=0.0;g2=0.0;
01762                 for (i=0;i<l;i++)
01763                 {
01764                         fApB = dec_values[i]*A+B;
01765                         if (fApB >= 0)
01766                         {
01767                                 p=exp(-fApB)/(1.0+exp(-fApB));
01768                                 q=1.0/(1.0+exp(-fApB));
01769                         }
01770                         else
01771                         {
01772                                 p=1.0/(1.0+exp(fApB));
01773                                 q=exp(fApB)/(1.0+exp(fApB));
01774                         }
01775                         d2=p*q;
01776                         h11+=dec_values[i]*dec_values[i]*d2;
01777                         h22+=d2;
01778                         h21+=dec_values[i]*d2;
01779                         d1=t[i]-p;
01780                         g1+=dec_values[i]*d1;
01781                         g2+=d1;
01782                 }
01783 
01784                 // Stopping Criteria
01785                 if (fabs(g1)<eps && fabs(g2)<eps)
01786                         break;
01787 
01788                 // Finding Newton direction: -inv(H') * g
01789                 det=h11*h22-h21*h21;
01790                 dA=-(h22*g1 - h21 * g2) / det;
01791                 dB=-(-h21*g1+ h11 * g2) / det;
01792                 gd=g1*dA+g2*dB;
01793 
01794 
01795                 stepsize = 1;           // Line Search
01796                 while (stepsize >= min_step)
01797                 {
01798                         newA = A + stepsize * dA;
01799                         newB = B + stepsize * dB;
01800 
01801                         // New function value
01802                         newf = 0.0;
01803                         for (i=0;i<l;i++)
01804                         {
01805                                 fApB = dec_values[i]*newA+newB;
01806                                 if (fApB >= 0)
01807                                         newf += t[i]*fApB + log(1+exp(-fApB));
01808                                 else
01809                                         newf += (t[i] - 1)*fApB +log(1+exp(fApB));
01810                         }
01811                         // Check sufficient decrease
01812                         if (newf<fval+0.0001*stepsize*gd)
01813                         {
01814                                 A=newA;B=newB;fval=newf;
01815                                 break;
01816                         }
01817                         else
01818                                 stepsize = stepsize / 2.0;
01819                 }
01820 
01821                 if (stepsize < min_step)
01822                 {
01823                         info("Line search fails in two-class probability estimates\n");
01824                         break;
01825                 }
01826         }
01827 
01828         if (iter>=max_iter)
01829                 info("Reaching maximal iterations in two-class probability estimates\n");
01830         free(t);
01831 }
01832 
01833 double sigmoid_predict(double decision_value, double A, double B)
01834 {
01835         double fApB = decision_value*A+B;
01836         if (fApB >= 0)
01837                 return exp(-fApB)/(1.0+exp(-fApB));
01838         else
01839                 return 1.0/(1+exp(fApB)) ;
01840 }
01841 
01842 // Method 2 from the multiclass_prob paper by Wu, Lin, and Weng
01843 void multiclass_probability(int k, double **r, double *p)
01844 {
01845         int t,j;
01846         int iter = 0, max_iter=max(100,k);
01847         double **Q=Malloc(double *,k);
01848         double *Qp=Malloc(double,k);
01849         double pQp, eps=0.005/k;
01850         
01851         for (t=0;t<k;t++)
01852         {
01853                 p[t]=1.0/k;  // Valid if k = 1
01854                 Q[t]=Malloc(double,k);
01855                 Q[t][t]=0;
01856                 for (j=0;j<t;j++)
01857                 {
01858                         Q[t][t]+=r[j][t]*r[j][t];
01859                         Q[t][j]=Q[j][t];
01860                 }
01861                 for (j=t+1;j<k;j++)
01862                 {
01863                         Q[t][t]+=r[j][t]*r[j][t];
01864                         Q[t][j]=-r[j][t]*r[t][j];
01865                 }
01866         }
01867         for (iter=0;iter<max_iter;iter++)
01868         {
01869                 // stopping condition, recalculate QP,pQP for numerical accuracy
01870                 pQp=0;
01871                 for (t=0;t<k;t++)
01872                 {
01873                         Qp[t]=0;
01874                         for (j=0;j<k;j++)
01875                                 Qp[t]+=Q[t][j]*p[j];
01876                         pQp+=p[t]*Qp[t];
01877                 }
01878                 double max_error=0;
01879                 for (t=0;t<k;t++)
01880                 {
01881                         double error=fabs(Qp[t]-pQp);
01882                         if (error>max_error)
01883                                 max_error=error;
01884                 }
01885                 if (max_error<eps) break;
01886                 
01887                 for (t=0;t<k;t++)
01888                 {
01889                         double diff=(-Qp[t]+pQp)/Q[t][t];
01890                         p[t]+=diff;
01891                         pQp=(pQp+diff*(diff*Q[t][t]+2*Qp[t]))/(1+diff)/(1+diff);
01892                         for (j=0;j<k;j++)
01893                         {
01894                                 Qp[j]=(Qp[j]+diff*Q[t][j])/(1+diff);
01895                                 p[j]/=(1+diff);
01896                         }
01897                 }
01898         }
01899         if (iter>=max_iter)
01900                 info("Exceeds max_iter in multiclass_prob\n");
01901         for(t=0;t<k;t++) free(Q[t]);
01902         free(Q);
01903         free(Qp);
01904 }
01905 
01906 // Cross-validation decision values for probability estimates
01907 void svm_binary_svc_probability(
01908         const svm_problem *prob, const svm_parameter *param,
01909         double Cp, double Cn, double& probA, double& probB)
01910 {
01911         int i;
01912         int nr_fold = 5;
01913         int *perm = Malloc(int,prob->l);
01914         double *dec_values = Malloc(double,prob->l);
01915 
01916         // random shuffle
01917         for(i=0;i<prob->l;i++) perm[i]=i;
01918         for(i=0;i<prob->l;i++)
01919         {
01920                 int j = i+rand()%(prob->l-i);
01921                 swap(perm[i],perm[j]);
01922         }
01923         for(i=0;i<nr_fold;i++)
01924         {
01925                 int begin = i*prob->l/nr_fold;
01926                 int end = (i+1)*prob->l/nr_fold;
01927                 int j,k;
01928                 struct svm_problem subprob;
01929 
01930                 subprob.l = prob->l-(end-begin);
01931                 subprob.x = Malloc(struct svm_node*,subprob.l);
01932                 subprob.y = Malloc(double,subprob.l);
01933                         
01934                 k=0;
01935                 for(j=0;j<begin;j++)
01936                 {
01937                         subprob.x[k] = prob->x[perm[j]];
01938                         subprob.y[k] = prob->y[perm[j]];
01939                         ++k;
01940                 }
01941                 for(j=end;j<prob->l;j++)
01942                 {
01943                         subprob.x[k] = prob->x[perm[j]];
01944                         subprob.y[k] = prob->y[perm[j]];
01945                         ++k;
01946                 }
01947                 int p_count=0,n_count=0;
01948                 for(j=0;j<k;j++)
01949                         if(subprob.y[j]>0)
01950                                 p_count++;
01951                         else
01952                                 n_count++;
01953 
01954                 if(p_count==0 && n_count==0)
01955                         for(j=begin;j<end;j++)
01956                                 dec_values[perm[j]] = 0;
01957                 else if(p_count > 0 && n_count == 0)
01958                         for(j=begin;j<end;j++)
01959                                 dec_values[perm[j]] = 1;
01960                 else if(p_count == 0 && n_count > 0)
01961                         for(j=begin;j<end;j++)
01962                                 dec_values[perm[j]] = -1;
01963                 else
01964                 {
01965                         svm_parameter subparam = *param;
01966                         subparam.probability=0;
01967                         subparam.C=1.0;
01968                         subparam.nr_weight=2;
01969                         subparam.weight_label = Malloc(int,2);
01970                         subparam.weight = Malloc(double,2);
01971                         subparam.weight_label[0]=+1;
01972                         subparam.weight_label[1]=-1;
01973                         subparam.weight[0]=Cp;
01974                         subparam.weight[1]=Cn;
01975                         struct svm_model *submodel = svm_train(&subprob,&subparam);
01976                         for(j=begin;j<end;j++)
01977                         {
01978                                 svm_predict_values(submodel,prob->x[perm[j]],&(dec_values[perm[j]])); 
01979                                 // ensure +1 -1 order; reason not using CV subroutine
01980                                 dec_values[perm[j]] *= submodel->label[0];
01981                         }               
01982                         svm_destroy_model(submodel);
01983                         svm_destroy_param(&subparam);
01984                 }
01985                 free(subprob.x);
01986                 free(subprob.y);
01987         }               
01988         sigmoid_train(prob->l,dec_values,prob->y,probA,probB);
01989         free(dec_values);
01990         free(perm);
01991 }
01992 
01993 // Return parameter of a Laplace distribution 
01994 double svm_svr_probability(
01995         const svm_problem *prob, const svm_parameter *param)
01996 {
01997         int i;
01998         int nr_fold = 5;
01999         double *ymv = Malloc(double,prob->l);
02000         double mae = 0;
02001 
02002         svm_parameter newparam = *param;
02003         newparam.probability = 0;
02004         svm_cross_validation(prob,&newparam,nr_fold,ymv);
02005         for(i=0;i<prob->l;i++)
02006         {
02007                 ymv[i]=prob->y[i]-ymv[i];
02008                 mae += fabs(ymv[i]);
02009         }               
02010         mae /= prob->l;
02011         double std=sqrt(2*mae*mae);
02012         int count=0;
02013         mae=0;
02014         for(i=0;i<prob->l;i++)
02015                 if (fabs(ymv[i]) > 5*std) 
02016                         count=count+1;
02017                 else 
02018                         mae+=fabs(ymv[i]);
02019         mae /= (prob->l-count);
02020         info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma= %g\n",mae);
02021         free(ymv);
02022         return mae;
02023 }
02024 
02025 
02026 // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
02027 // perm, length l, must be allocated before calling this subroutine
02028 void svm_group_classes(const svm_problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
02029 {
02030         int l = prob->l;
02031         int max_nr_class = 16;
02032         int nr_class = 0;
02033         int *label = Malloc(int,max_nr_class);
02034         int *count = Malloc(int,max_nr_class);
02035         int *data_label = Malloc(int,l);        
02036         int i;
02037 
02038         for(i=0;i<l;i++)
02039         {
02040                 int this_label = (int)prob->y[i];
02041                 int j;
02042                 for(j=0;j<nr_class;j++)
02043                 {
02044                         if(this_label == label[j])
02045                         {
02046                                 ++count[j];
02047                                 break;
02048                         }
02049                 }
02050                 data_label[i] = j;
02051                 if(j == nr_class)
02052                 {
02053                         if(nr_class == max_nr_class)
02054                         {
02055                                 max_nr_class *= 2;
02056                                 label = (int *)realloc(label,max_nr_class*sizeof(int));
02057                                 count = (int *)realloc(count,max_nr_class*sizeof(int));
02058                         }
02059                         label[nr_class] = this_label;
02060                         count[nr_class] = 1;
02061                         ++nr_class;
02062                 }
02063         }
02064 
02065         int *start = Malloc(int,nr_class);
02066         start[0] = 0;
02067         for(i=1;i<nr_class;i++)
02068                 start[i] = start[i-1]+count[i-1];
02069         for(i=0;i<l;i++)
02070         {
02071                 perm[start[data_label[i]]] = i;
02072                 ++start[data_label[i]];
02073         }
02074         start[0] = 0;
02075         for(i=1;i<nr_class;i++)
02076                 start[i] = start[i-1]+count[i-1];
02077 
02078         *nr_class_ret = nr_class;
02079         *label_ret = label;
02080         *start_ret = start;
02081         *count_ret = count;
02082         free(data_label);
02083 }
02084 
02085 //
02086 // Interface functions
02087 //
02088 svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
02089 {
02090         svm_model *model = Malloc(svm_model,1);
02091         model->param = *param;
02092         model->free_sv = 0;     // XXX
02093 
02094         if(param->svm_type == ONE_CLASS ||
02095            param->svm_type == EPSILON_SVR ||
02096            param->svm_type == NU_SVR)
02097         {
02098                 // regression or one-class-svm
02099                 model->nr_class = 2;
02100                 model->label = NULL;
02101                 model->nSV = NULL;
02102                 model->probA = NULL; model->probB = NULL;
02103                 model->sv_coef = Malloc(double *,1);
02104 
02105                 if(param->probability && 
02106                    (param->svm_type == EPSILON_SVR ||
02107                     param->svm_type == NU_SVR))
02108                 {
02109                         model->probA = Malloc(double,1);
02110                         model->probA[0] = svm_svr_probability(prob,param);
02111                 }
02112 
02113                 decision_function f = svm_train_one(prob,param,0,0);
02114                 model->rho = Malloc(double,1);
02115                 model->rho[0] = f.rho;
02116 
02117                 int nSV = 0;
02118                 int i;
02119                 for(i=0;i<prob->l;i++)
02120                         if(fabs(f.alpha[i]) > 0) ++nSV;
02121                 model->l = nSV;
02122                 model->SV = Malloc(svm_node *,nSV);
02123                 model->sv_coef[0] = Malloc(double,nSV);
02124                 int j = 0;
02125                 for(i=0;i<prob->l;i++)
02126                         if(fabs(f.alpha[i]) > 0)
02127                         {
02128                                 model->SV[j] = prob->x[i];
02129                                 model->sv_coef[0][j] = f.alpha[i];
02130                                 ++j;
02131                         }               
02132 
02133                 free(f.alpha);
02134         }
02135         else
02136         {
02137                 // classification
02138                 int l = prob->l;
02139                 int nr_class;
02140                 int *label = NULL;
02141                 int *start = NULL;
02142                 int *count = NULL;
02143                 int *perm = Malloc(int,l);
02144 
02145                 // group training data of the same class
02146                 svm_group_classes(prob,&nr_class,&label,&start,&count,perm);            
02147                 svm_node **x = Malloc(svm_node *,l);
02148                 int i;
02149                 for(i=0;i<l;i++)
02150                         x[i] = prob->x[perm[i]];
02151 
02152                 // calculate weighted C
02153 
02154                 double *weighted_C = Malloc(double, nr_class);
02155                 for(i=0;i<nr_class;i++)
02156                         weighted_C[i] = param->C;
02157                 for(i=0;i<param->nr_weight;i++)
02158                 {       
02159                         int j;
02160                         for(j=0;j<nr_class;j++)
02161                                 if(param->weight_label[i] == label[j])
02162                                         break;
02163                         if(j == nr_class)
02164                                 fprintf(stderr,"warning: class label %d specified in weight is not found\n", param->weight_label[i]);
02165                         else
02166                                 weighted_C[j] *= param->weight[i];
02167                 }
02168 
02169                 // train k*(k-1)/2 models
02170                 
02171                 bool *nonzero = Malloc(bool,l);
02172                 for(i=0;i<l;i++)
02173                         nonzero[i] = false;
02174                 decision_function *f = Malloc(decision_function,nr_class*(nr_class-1)/2);
02175 
02176                 double *probA=NULL,*probB=NULL;
02177                 if (param->probability)
02178                 {
02179                         probA=Malloc(double,nr_class*(nr_class-1)/2);
02180                         probB=Malloc(double,nr_class*(nr_class-1)/2);
02181                 }
02182 
02183                 int p = 0;
02184                 for(i=0;i<nr_class;i++)
02185                         for(int j=i+1;j<nr_class;j++)
02186                         {
02187                                 svm_problem sub_prob;
02188                                 int si = start[i], sj = start[j];
02189                                 int ci = count[i], cj = count[j];
02190                                 sub_prob.l = ci+cj;
02191                                 sub_prob.x = Malloc(svm_node *,sub_prob.l);
02192                                 sub_prob.y = Malloc(double,sub_prob.l);
02193                                 int k;
02194                                 for(k=0;k<ci;k++)
02195                                 {
02196                                         sub_prob.x[k] = x[si+k];
02197                                         sub_prob.y[k] = +1;
02198                                 }
02199                                 for(k=0;k<cj;k++)
02200                                 {
02201                                         sub_prob.x[ci+k] = x[sj+k];
02202                                         sub_prob.y[ci+k] = -1;
02203                                 }
02204 
02205                                 if(param->probability)
02206                                         svm_binary_svc_probability(&sub_prob,param,weighted_C[i],weighted_C[j],probA[p],probB[p]);
02207 
02208                                 f[p] = svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j]);
02209                                 for(k=0;k<ci;k++)
02210                                         if(!nonzero[si+k] && fabs(f[p].alpha[k]) > 0)
02211                                                 nonzero[si+k] = true;
02212                                 for(k=0;k<cj;k++)
02213                                         if(!nonzero[sj+k] && fabs(f[p].alpha[ci+k]) > 0)
02214                                                 nonzero[sj+k] = true;
02215                                 free(sub_prob.x);
02216                                 free(sub_prob.y);
02217                                 ++p;
02218                         }
02219 
02220                 // build output
02221 
02222                 model->nr_class = nr_class;
02223                 
02224                 model->label = Malloc(int,nr_class);
02225                 for(i=0;i<nr_class;i++)
02226                         model->label[i] = label[i];
02227                 
02228                 model->rho = Malloc(double,nr_class*(nr_class-1)/2);
02229                 for(i=0;i<nr_class*(nr_class-1)/2;i++)
02230                         model->rho[i] = f[i].rho;
02231 
02232                 if(param->probability)
02233                 {
02234                         model->probA = Malloc(double,nr_class*(nr_class-1)/2);
02235                         model->probB = Malloc(double,nr_class*(nr_class-1)/2);
02236                         for(i=0;i<nr_class*(nr_class-1)/2;i++)
02237                         {
02238                                 model->probA[i] = probA[i];
02239                                 model->probB[i] = probB[i];
02240                         }
02241                 }
02242                 else
02243                 {
02244                         model->probA=NULL;
02245                         model->probB=NULL;
02246                 }
02247 
02248                 int total_sv = 0;
02249                 int *nz_count = Malloc(int,nr_class);
02250                 model->nSV = Malloc(int,nr_class);
02251                 for(i=0;i<nr_class;i++)
02252                 {
02253                         int nSV = 0;
02254                         for(int j=0;j<count[i];j++)
02255                                 if(nonzero[start[i]+j])
02256                                 {       
02257                                         ++nSV;
02258                                         ++total_sv;
02259                                 }
02260                         model->nSV[i] = nSV;
02261                         nz_count[i] = nSV;
02262                 }
02263                 
02264                 info("Total nSV = %d\n",total_sv);
02265 
02266                 model->l = total_sv;
02267                 model->SV = Malloc(svm_node *,total_sv);
02268                 p = 0;
02269                 for(i=0;i<l;i++)
02270                         if(nonzero[i]) model->SV[p++] = x[i];
02271 
02272                 int *nz_start = Malloc(int,nr_class);
02273                 nz_start[0] = 0;
02274                 for(i=1;i<nr_class;i++)
02275                         nz_start[i] = nz_start[i-1]+nz_count[i-1];
02276 
02277                 model->sv_coef = Malloc(double *,nr_class-1);
02278                 for(i=0;i<nr_class-1;i++)
02279                         model->sv_coef[i] = Malloc(double,total_sv);
02280 
02281                 p = 0;
02282                 for(i=0;i<nr_class;i++)
02283                         for(int j=i+1;j<nr_class;j++)
02284                         {
02285                                 // classifier (i,j): coefficients with
02286                                 // i are in sv_coef[j-1][nz_start[i]...],
02287                                 // j are in sv_coef[i][nz_start[j]...]
02288 
02289                                 int si = start[i];
02290                                 int sj = start[j];
02291                                 int ci = count[i];
02292                                 int cj = count[j];
02293                                 
02294                                 int q = nz_start[i];
02295                                 int k;
02296                                 for(k=0;k<ci;k++)
02297                                         if(nonzero[si+k])
02298                                                 model->sv_coef[j-1][q++] = f[p].alpha[k];
02299                                 q = nz_start[j];
02300                                 for(k=0;k<cj;k++)
02301                                         if(nonzero[sj+k])
02302                                                 model->sv_coef[i][q++] = f[p].alpha[ci+k];
02303                                 ++p;
02304                         }
02305                 
02306                 free(label);
02307                 free(probA);
02308                 free(probB);
02309                 free(count);
02310                 free(perm);
02311                 free(start);
02312                 free(x);
02313                 free(weighted_C);
02314                 free(nonzero);
02315                 for(i=0;i<nr_class*(nr_class-1)/2;i++)
02316                         free(f[i].alpha);
02317                 free(f);
02318                 free(nz_count);
02319                 free(nz_start);
02320         }
02321         return model;
02322 }
02323 
02324 // Stratified cross validation
02325 void svm_cross_validation(const svm_problem *prob, const svm_parameter *param, int nr_fold, double *target)
02326 {
02327         int i;
02328         int *fold_start = Malloc(int,nr_fold+1);
02329         int l = prob->l;
02330         int *perm = Malloc(int,l);
02331         int nr_class;
02332 
02333         // stratified cv may not give leave-one-out rate
02334         // Each class to l folds -> some folds may have zero elements
02335         if((param->svm_type == C_SVC ||
02336             param->svm_type == NU_SVC) && nr_fold < l)
02337         {
02338                 int *start = NULL;
02339                 int *label = NULL;
02340                 int *count = NULL;
02341                 svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
02342 
02343                 // random shuffle and then data grouped by fold using the array perm
02344                 int *fold_count = Malloc(int,nr_fold);
02345                 int c;
02346                 int *index = Malloc(int,l);
02347                 for(i=0;i<l;i++)
02348                         index[i]=perm[i];
02349                 for (c=0; c<nr_class; c++) 
02350                         for(i=0;i<count[c];i++)
02351                         {
02352                                 int j = i+rand()%(count[c]-i);
02353                                 swap(index[start[c]+j],index[start[c]+i]);
02354                         }
02355                 for(i=0;i<nr_fold;i++)
02356                 {
02357                         fold_count[i] = 0;
02358                         for (c=0; c<nr_class;c++)
02359                                 fold_count[i]+=(i+1)*count[c]/nr_fold-i*count[c]/nr_fold;
02360                 }
02361                 fold_start[0]=0;
02362                 for (i=1;i<=nr_fold;i++)
02363                         fold_start[i] = fold_start[i-1]+fold_count[i-1];
02364                 for (c=0; c<nr_class;c++)
02365                         for(i=0;i<nr_fold;i++)
02366                         {
02367                                 int begin = start[c]+i*count[c]/nr_fold;
02368                                 int end = start[c]+(i+1)*count[c]/nr_fold;
02369                                 for(int j=begin;j<end;j++)
02370                                 {
02371                                         perm[fold_start[i]] = index[j];
02372                                         fold_start[i]++;
02373                                 }
02374                         }
02375                 fold_start[0]=0;
02376                 for (i=1;i<=nr_fold;i++)
02377                         fold_start[i] = fold_start[i-1]+fold_count[i-1];
02378                 free(start);    
02379                 free(label);
02380                 free(count);    
02381                 free(index);
02382                 free(fold_count);
02383         }
02384         else
02385         {
02386                 for(i=0;i<l;i++) perm[i]=i;
02387                 for(i=0;i<l;i++)
02388                 {
02389                         int j = i+rand()%(l-i);
02390                         swap(perm[i],perm[j]);
02391                 }
02392                 for(i=0;i<=nr_fold;i++)
02393                         fold_start[i]=i*l/nr_fold;
02394         }
02395 
02396         for(i=0;i<nr_fold;i++)
02397         {
02398                 int begin = fold_start[i];
02399                 int end = fold_start[i+1];
02400                 int j,k;
02401                 struct svm_problem subprob;
02402 
02403                 subprob.l = l-(end-begin);
02404                 subprob.x = Malloc(struct svm_node*,subprob.l);
02405                 subprob.y = Malloc(double,subprob.l);
02406                         
02407                 k=0;
02408                 for(j=0;j<begin;j++)
02409                 {
02410                         subprob.x[k] = prob->x[perm[j]];
02411                         subprob.y[k] = prob->y[perm[j]];
02412                         ++k;
02413                 }
02414                 for(j=end;j<l;j++)
02415                 {
02416                         subprob.x[k] = prob->x[perm[j]];
02417                         subprob.y[k] = prob->y[perm[j]];
02418                         ++k;
02419                 }
02420                 struct svm_model *submodel = svm_train(&subprob,param);
02421                 if(param->probability && 
02422                    (param->svm_type == C_SVC || param->svm_type == NU_SVC))
02423                 {
02424                         double *prob_estimates=Malloc(double,svm_get_nr_class(submodel));
02425                         for(j=begin;j<end;j++)
02426                                 target[perm[j]] = svm_predict_probability(submodel,prob->x[perm[j]],prob_estimates);
02427                         free(prob_estimates);                   
02428                 }
02429                 else
02430                         for(j=begin;j<end;j++)
02431                                 target[perm[j]] = svm_predict(submodel,prob->x[perm[j]]);
02432                 svm_destroy_model(submodel);
02433                 free(subprob.x);
02434                 free(subprob.y);
02435         }               
02436         free(fold_start);
02437         free(perm);     
02438 }
02439 
02440 
02441 int svm_get_svm_type(const svm_model *model)
02442 {
02443         return model->param.svm_type;
02444 }
02445 
02446 int svm_get_nr_class(const svm_model *model)
02447 {
02448         return model->nr_class;
02449 }
02450 
02451 void svm_get_labels(const svm_model *model, int* label)
02452 {
02453         if (model->label != NULL)
02454                 for(int i=0;i<model->nr_class;i++)
02455                         label[i] = model->label[i];
02456 }
02457 
02458 double svm_get_svr_probability(const svm_model *model)
02459 {
02460         if ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
02461             model->probA!=NULL)
02462                 return model->probA[0];
02463         else
02464         {
02465                 info("Model doesn't contain information for SVR probability inference\n");
02466                 return 0;
02467         }
02468 }
02469 
02470 void svm_predict_values(const svm_model *model, const svm_node *x, double* dec_values)
02471 {
02472         if(model->param.svm_type == ONE_CLASS ||
02473            model->param.svm_type == EPSILON_SVR ||
02474            model->param.svm_type == NU_SVR)
02475         {
02476                 double *sv_coef = model->sv_coef[0];
02477                 double sum = 0;
02478                 for(int i=0;i<model->l;i++)
02479                         sum += sv_coef[i] * Kernel::k_function(x,model->SV[i],model->param);
02480                 sum -= model->rho[0];
02481                 *dec_values = sum;
02482         }
02483         else
02484         {
02485                 int i;
02486                 int nr_class = model->nr_class;
02487                 int l = model->l;
02488                 
02489                 double *kvalue = Malloc(double,l);
02490                 for(i=0;i<l;i++)
02491                         kvalue[i] = Kernel::k_function(x,model->SV[i],model->param);
02492 
02493                 int *start = Malloc(int,nr_class);
02494                 start[0] = 0;
02495                 for(i=1;i<nr_class;i++)
02496                         start[i] = start[i-1]+model->nSV[i-1];
02497 
02498                 int p=0;
02499                 for(i=0;i<nr_class;i++)
02500                         for(int j=i+1;j<nr_class;j++)
02501                         {
02502                                 double sum = 0;
02503                                 int si = start[i];
02504                                 int sj = start[j];
02505                                 int ci = model->nSV[i];
02506                                 int cj = model->nSV[j];
02507                                 
02508                                 int k;
02509                                 double *coef1 = model->sv_coef[j-1];
02510                                 double *coef2 = model->sv_coef[i];
02511                                 for(k=0;k<ci;k++)
02512                                         sum += coef1[si+k] * kvalue[si+k];
02513                                 for(k=0;k<cj;k++)
02514                                         sum += coef2[sj+k] * kvalue[sj+k];
02515                                 sum -= model->rho[p];
02516                                 dec_values[p] = sum;
02517                                 p++;
02518                         }
02519 
02520                 free(kvalue);
02521                 free(start);
02522         }
02523 }
02524 
02525 double svm_predict(const svm_model *model, const svm_node *x)
02526 {
02527         if(model->param.svm_type == ONE_CLASS ||
02528            model->param.svm_type == EPSILON_SVR ||
02529            model->param.svm_type == NU_SVR)
02530         {
02531                 double res;
02532                 svm_predict_values(model, x, &res);
02533                 
02534                 if(model->param.svm_type == ONE_CLASS)
02535                         return (res>0)?1:-1;
02536                 else
02537                         return res;
02538         }
02539         else
02540         {
02541                 int i;
02542                 int nr_class = model->nr_class;
02543                 double *dec_values = Malloc(double, nr_class*(nr_class-1)/2);
02544                 svm_predict_values(model, x, dec_values);
02545 
02546                 int *vote = Malloc(int,nr_class);
02547                 for(i=0;i<nr_class;i++)
02548                         vote[i] = 0;
02549                 int pos=0;
02550                 for(i=0;i<nr_class;i++)
02551                         for(int j=i+1;j<nr_class;j++)
02552                         {
02553                                 if(dec_values[pos++] > 0)
02554                                         ++vote[i];
02555                                 else
02556                                         ++vote[j];
02557                         }
02558 
02559                 int vote_max_idx = 0;
02560                 for(i=1;i<nr_class;i++)
02561                         if(vote[i] > vote[vote_max_idx])
02562                                 vote_max_idx = i;
02563                 free(vote);
02564                 free(dec_values);
02565                 return model->label[vote_max_idx];
02566         }
02567 }
02568 
02569 double svm_predict_probability(
02570         const svm_model *model, const svm_node *x, double *prob_estimates)
02571 {
02572         if ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
02573             model->probA!=NULL && model->probB!=NULL)
02574         {
02575                 int i;
02576                 int nr_class = model->nr_class;
02577                 double *dec_values = Malloc(double, nr_class*(nr_class-1)/2);
02578                 svm_predict_values(model, x, dec_values);
02579 
02580                 double min_prob=1e-7;
02581                 double **pairwise_prob=Malloc(double *,nr_class);
02582                 for(i=0;i<nr_class;i++)
02583                         pairwise_prob[i]=Malloc(double,nr_class);
02584                 int k=0;
02585                 for(i=0;i<nr_class;i++)
02586                         for(int j=i+1;j<nr_class;j++)
02587                         {
02588                                 pairwise_prob[i][j]=min(max(sigmoid_predict(dec_values[k],model->probA[k],model->probB[k]),min_prob),1-min_prob);
02589                                 pairwise_prob[j][i]=1-pairwise_prob[i][j];
02590                                 k++;
02591                         }
02592                 multiclass_probability(nr_class,pairwise_prob,prob_estimates);
02593 
02594                 int prob_max_idx = 0;
02595                 for(i=1;i<nr_class;i++)
02596                         if(prob_estimates[i] > prob_estimates[prob_max_idx])
02597                                 prob_max_idx = i;
02598                 for(i=0;i<nr_class;i++)
02599                         free(pairwise_prob[i]);
02600                 free(dec_values);
02601                 free(pairwise_prob);         
02602                 return model->label[prob_max_idx];
02603         }
02604         else 
02605                 return svm_predict(model, x);
02606 }
02607 
02608 const char *svm_type_table[] =
02609 {
02610         "c_svc","nu_svc","one_class","epsilon_svr","nu_svr",NULL
02611 };
02612 
02613 const char *kernel_type_table[]=
02614 {
02615         "linear","polynomial","rbf","sigmoid","precomputed",NULL
02616 };
02617 
02618 int svm_save_model(const char *model_file_name, const svm_model *model)
02619 {
02620         FILE *fp = fopen(model_file_name,"w");
02621         if(fp==NULL) return -1;
02622 
02623         const svm_parameter& param = model->param;
02624 
02625         fprintf(fp,"svm_type %s\n", svm_type_table[param.svm_type]);
02626         fprintf(fp,"kernel_type %s\n", kernel_type_table[param.kernel_type]);
02627 
02628         if(param.kernel_type == POLY)
02629                 fprintf(fp,"degree %d\n", param.degree);
02630 
02631         if(param.kernel_type == POLY || param.kernel_type == RBF || param.kernel_type == SIGMOID)
02632                 fprintf(fp,"gamma %g\n", param.gamma);
02633 
02634         if(param.kernel_type == POLY || param.kernel_type == SIGMOID)
02635                 fprintf(fp,"coef0 %g\n", param.coef0);
02636 
02637         int nr_class = model->nr_class;
02638         int l = model->l;
02639         fprintf(fp, "nr_class %d\n", nr_class);
02640         fprintf(fp, "total_sv %d\n",l);
02641         
02642         {
02643                 fprintf(fp, "rho");
02644                 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
02645                         fprintf(fp," %g",model->rho[i]);
02646                 fprintf(fp, "\n");
02647         }
02648         
02649         if(model->label)
02650         {
02651                 fprintf(fp, "label");
02652                 for(int i=0;i<nr_class;i++)
02653                         fprintf(fp," %d",model->label[i]);
02654                 fprintf(fp, "\n");
02655         }
02656 
02657         if(model->probA) // regression has probA only
02658         {
02659                 fprintf(fp, "probA");
02660                 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
02661                         fprintf(fp," %g",model->probA[i]);
02662                 fprintf(fp, "\n");
02663         }
02664         if(model->probB)
02665         {
02666                 fprintf(fp, "probB");
02667                 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
02668                         fprintf(fp," %g",model->probB[i]);
02669                 fprintf(fp, "\n");
02670         }
02671 
02672         if(model->nSV)
02673         {
02674                 fprintf(fp, "nr_sv");
02675                 for(int i=0;i<nr_class;i++)
02676                         fprintf(fp," %d",model->nSV[i]);
02677                 fprintf(fp, "\n");
02678         }
02679 
02680         fprintf(fp, "SV\n");
02681         const double * const *sv_coef = model->sv_coef;
02682         const svm_node * const *SV = model->SV;
02683 
02684         for(int i=0;i<l;i++)
02685         {
02686                 for(int j=0;j<nr_class-1;j++)
02687                         fprintf(fp, "%.16g ",sv_coef[j][i]);
02688 
02689                 const svm_node *p = SV[i];
02690 
02691                 if(param.kernel_type == PRECOMPUTED)
02692                         fprintf(fp,"0:%d ",(int)(p->value));
02693                 else
02694                         while(p->index != -1)
02695                         {
02696                                 fprintf(fp,"%d:%.8g ",p->index,p->value);
02697                                 p++;
02698                         }
02699                 fprintf(fp, "\n");
02700         }
02701         if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
02702         else return 0;
02703 }
02704 
02705 svm_model *svm_load_model(const char *model_file_name)
02706 {
02707         FILE *fp = fopen(model_file_name,"r");
02708         if(fp==NULL) return NULL;
02709         
02710         // read parameters
02711 
02712         svm_model *model = Malloc(svm_model,1);
02713         svm_parameter& param = model->param;
02714         model->rho = NULL;
02715         model->probA = NULL;
02716         model->probB = NULL;
02717         model->label = NULL;
02718         model->nSV = NULL;
02719 
02720         char cmd[81];
02721         while(1)
02722         {
02723                 fscanf(fp,"%80s",cmd);
02724 
02725                 if(strcmp(cmd,"svm_type")==0)
02726                 {
02727                         fscanf(fp,"%80s",cmd);
02728                         int i;
02729                         for(i=0;svm_type_table[i];i++)
02730                         {
02731                                 if(strcmp(svm_type_table[i],cmd)==0)
02732                                 {
02733                                         param.svm_type=i;
02734                                         break;
02735                                 }
02736                         }
02737                         if(svm_type_table[i] == NULL)
02738                         {
02739                                 fprintf(stderr,"unknown svm type.\n");
02740                                 free(model->rho);
02741                                 free(model->label);
02742                                 free(model->nSV);
02743                                 free(model);
02744                                 return NULL;
02745                         }
02746                 }
02747                 else if(strcmp(cmd,"kernel_type")==0)
02748                 {               
02749                         fscanf(fp,"%80s",cmd);
02750                         int i;
02751                         for(i=0;kernel_type_table[i];i++)
02752                         {
02753                                 if(strcmp(kernel_type_table[i],cmd)==0)
02754                                 {
02755                                         param.kernel_type=i;
02756                                         break;
02757                                 }
02758                         }
02759                         if(kernel_type_table[i] == NULL)
02760                         {
02761                                 fprintf(stderr,"unknown kernel function.\n");
02762                                 free(model->rho);
02763                                 free(model->label);
02764                                 free(model->nSV);
02765                                 free(model);
02766                                 return NULL;
02767                         }
02768                 }
02769                 else if(strcmp(cmd,"degree")==0)
02770                         fscanf(fp,"%d",&param.degree);
02771                 else if(strcmp(cmd,"gamma")==0)
02772                         fscanf(fp,"%lf",&param.gamma);
02773                 else if(strcmp(cmd,"coef0")==0)
02774                         fscanf(fp,"%lf",&param.coef0);
02775                 else if(strcmp(cmd,"nr_class")==0)
02776                         fscanf(fp,"%d",&model->nr_class);
02777                 else if(strcmp(cmd,"total_sv")==0)
02778                         fscanf(fp,"%d",&model->l);
02779                 else if(strcmp(cmd,"rho")==0)
02780                 {
02781                         int n = model->nr_class * (model->nr_class-1)/2;
02782                         model->rho = Malloc(double,n);
02783                         for(int i=0;i<n;i++)
02784                                 fscanf(fp,"%lf",&model->rho[i]);
02785                 }
02786                 else if(strcmp(cmd,"label")==0)
02787                 {
02788                         int n = model->nr_class;
02789                         model->label = Malloc(int,n);
02790                         for(int i=0;i<n;i++)
02791                                 fscanf(fp,"%d",&model->label[i]);
02792                 }
02793                 else if(strcmp(cmd,"probA")==0)
02794                 {
02795                         int n = model->nr_class * (model->nr_class-1)/2;
02796                         model->probA = Malloc(double,n);
02797                         for(int i=0;i<n;i++)
02798                                 fscanf(fp,"%lf",&model->probA[i]);
02799                 }
02800                 else if(strcmp(cmd,"probB")==0)
02801                 {
02802                         int n = model->nr_class * (model->nr_class-1)/2;
02803                         model->probB = Malloc(double,n);
02804                         for(int i=0;i<n;i++)
02805                                 fscanf(fp,"%lf",&model->probB[i]);
02806                 }
02807                 else if(strcmp(cmd,"nr_sv")==0)
02808                 {
02809                         int n = model->nr_class;
02810                         model->nSV = Malloc(int,n);
02811                         for(int i=0;i<n;i++)
02812                                 fscanf(fp,"%d",&model->nSV[i]);
02813                 }
02814                 else if(strcmp(cmd,"SV")==0)
02815                 {
02816                         while(1)
02817                         {
02818                                 int c = getc(fp);
02819                                 if(c==EOF || c=='\n') break;    
02820                         }
02821                         break;
02822                 }
02823                 else
02824                 {
02825                         fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
02826                         free(model->rho);
02827                         free(model->label);
02828                         free(model->nSV);
02829                         free(model);
02830                         return NULL;
02831                 }
02832         }
02833 
02834         // read sv_coef and SV
02835 
02836         int elements = 0;
02837         long pos = ftell(fp);
02838 
02839         while(1)
02840         {
02841                 int c = fgetc(fp);
02842                 switch(c)
02843                 {
02844                         case '\n':
02845                                 // count the '-1' element
02846                         case ':':
02847                                 ++elements;
02848                                 break;
02849                         case EOF:
02850                                 goto out;
02851                         default:
02852                                 ;
02853                 }
02854         }
02855 out:
02856         fseek(fp,pos,SEEK_SET);
02857 
02858         int m = model->nr_class - 1;
02859         int l = model->l;
02860         model->sv_coef = Malloc(double *,m);
02861         int i;
02862         for(i=0;i<m;i++)
02863                 model->sv_coef[i] = Malloc(double,l);
02864         model->SV = Malloc(svm_node*,l);
02865         svm_node *x_space=NULL;
02866         if(l>0) x_space = Malloc(svm_node,elements);
02867 
02868         int j=0;
02869         for(i=0;i<l;i++)
02870         {
02871                 model->SV[i] = &x_space[j];
02872                 for(int k=0;k<m;k++)
02873                         fscanf(fp,"%lf",&model->sv_coef[k][i]);
02874                 while(1)
02875                 {
02876                         int c;
02877                         do {
02878                                 c = getc(fp);
02879                                 if(c=='\n') goto out2;
02880                         } while(isspace(c));
02881                         ungetc(c,fp);
02882                         fscanf(fp,"%d:%lf",&(x_space[j].index),&(x_space[j].value));
02883                         ++j;
02884                 }       
02885 out2:
02886                 x_space[j++].index = -1;
02887         }
02888         if (ferror(fp) != 0 || fclose(fp) != 0) return NULL;
02889 
02890         model->free_sv = 1;     // XXX
02891         return model;
02892 }
02893 
02894 void svm_destroy_model(svm_model* model)
02895 {
02896         if(model->free_sv && model->l > 0)
02897                 free((void *)(model->SV[0]));
02898         for(int i=0;i<model->nr_class-1;i++)
02899                 free(model->sv_coef[i]);
02900         free(model->SV);
02901         free(model->sv_coef);
02902         free(model->rho);
02903         free(model->label);
02904         free(model->probA);
02905         free(model->probB);
02906         free(model->nSV);
02907         free(model);
02908 }
02909 
02910 void svm_destroy_param(svm_parameter* param)
02911 {
02912         free(param->weight_label);
02913         free(param->weight);
02914 }
02915 
02916 const char *svm_check_parameter(const svm_problem *prob, const svm_parameter *param)
02917 {
02918         // svm_type
02919 
02920         int svm_type = param->svm_type;
02921         if(svm_type != C_SVC &&
02922            svm_type != NU_SVC &&
02923            svm_type != ONE_CLASS &&
02924            svm_type != EPSILON_SVR &&
02925            svm_type != NU_SVR)
02926                 return "unknown svm type";
02927         
02928         // kernel_type, degree
02929         
02930         int kernel_type = param->kernel_type;
02931         if(kernel_type != LINEAR &&
02932            kernel_type != POLY &&
02933            kernel_type != RBF &&
02934            kernel_type != SIGMOID &&
02935            kernel_type != PRECOMPUTED)
02936                 return "unknown kernel type";
02937 
02938         if(param->degree < 0)
02939                 return "degree of polynomial kernel < 0";
02940 
02941         // cache_size,eps,C,nu,p,shrinking
02942 
02943         if(param->cache_size <= 0)
02944                 return "cache_size <= 0";
02945 
02946         if(param->eps <= 0)
02947                 return "eps <= 0";
02948 
02949         if(svm_type == C_SVC ||
02950            svm_type == EPSILON_SVR ||
02951            svm_type == NU_SVR)
02952                 if(param->C <= 0)
02953                         return "C <= 0";
02954 
02955         if(svm_type == NU_SVC ||
02956            svm_type == ONE_CLASS ||
02957            svm_type == NU_SVR)
02958                 if(param->nu <= 0 || param->nu > 1)
02959                         return "nu <= 0 or nu > 1";
02960 
02961         if(svm_type == EPSILON_SVR)
02962                 if(param->p < 0)
02963                         return "p < 0";
02964 
02965         if(param->shrinking != 0 &&
02966            param->shrinking != 1)
02967                 return "shrinking != 0 and shrinking != 1";
02968 
02969         if(param->probability != 0 &&
02970            param->probability != 1)
02971                 return "probability != 0 and probability != 1";
02972 
02973         if(param->probability == 1 &&
02974            svm_type == ONE_CLASS)
02975                 return "one-class SVM probability output not supported yet";
02976 
02977 
02978         // check whether nu-svc is feasible
02979         
02980         if(svm_type == NU_SVC)
02981         {
02982                 int l = prob->l;
02983                 int max_nr_class = 16;
02984                 int nr_class = 0;
02985                 int *label = Malloc(int,max_nr_class);
02986                 int *count = Malloc(int,max_nr_class);
02987 
02988                 int i;
02989                 for(i=0;i<l;i++)
02990                 {
02991                         int this_label = (int)prob->y[i];
02992                         int j;
02993                         for(j=0;j<nr_class;j++)
02994                                 if(this_label == label[j])
02995                                 {
02996                                         ++count[j];
02997                                         break;
02998                                 }
02999                         if(j == nr_class)
03000                         {
03001                                 if(nr_class == max_nr_class)
03002                                 {
03003                                         max_nr_class *= 2;
03004                                         label = (int *)realloc(label,max_nr_class*sizeof(int));
03005                                         count = (int *)realloc(count,max_nr_class*sizeof(int));
03006                                 }
03007                                 label[nr_class] = this_label;
03008                                 count[nr_class] = 1;
03009                                 ++nr_class;
03010                         }
03011                 }
03012         
03013                 for(i=0;i<nr_class;i++)
03014                 {
03015                         int n1 = count[i];
03016                         for(int j=i+1;j<nr_class;j++)
03017                         {
03018                                 int n2 = count[j];
03019                                 if(param->nu*(n1+n2)/2 > min(n1,n2))
03020                                 {
03021                                         free(label);
03022                                         free(count);
03023                                         return "specified nu is infeasible";
03024                                 }
03025                         }
03026                 }
03027                 free(label);
03028                 free(count);
03029         }
03030 
03031         return NULL;
03032 }
03033 
03034 int svm_check_probability_model(const svm_model *model)
03035 {
03036         return ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
03037                 model->probA!=NULL && model->probB!=NULL) ||
03038                 ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
03039                  model->probA!=NULL);
03040 }