svm.cpp
Go to the documentation of this file.
00001 #include <math.h>
00002 #include <stdio.h>
00003 #include <stdlib.h>
00004 #include <ctype.h>
00005 #include <float.h>
00006 #include <string.h>
00007 #include <stdarg.h>
00008 #include <limits.h>
00009 #include <locale.h>
00010 #include "svm.h"
00011 int libsvm_version = LIBSVM_VERSION;
00012 typedef float Qfloat;
00013 typedef signed char schar;
00014 #ifndef min
00015 template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
00016 #endif
00017 #ifndef max
00018 template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
00019 #endif
00020 template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
00021 template <class S, class T> static inline void clone(T*& dst, S* src, int n)
00022 {
00023         dst = new T[n];
00024         memcpy((void *)dst,(void *)src,sizeof(T)*n);
00025 }
00026 static inline double powi(double base, int times)
00027 {
00028         double tmp = base, ret = 1.0;
00029 
00030         for(int t=times; t>0; t/=2)
00031         {
00032                 if(t%2==1) ret*=tmp;
00033                 tmp = tmp * tmp;
00034         }
00035         return ret;
00036 }
00037 #define INF HUGE_VAL
00038 #define TAU 1e-12
00039 #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
00040 
00041 static void print_string_stdout(const char *s)
00042 {
00043         fputs(s,stdout);
00044         fflush(stdout);
00045 }
00046 static void (*svm_print_string) (const char *) = &print_string_stdout;
00047 #if 1
00048 static void info(const char *fmt,...)
00049 {
00050         char buf[BUFSIZ];
00051         va_list ap;
00052         va_start(ap,fmt);
00053         vsprintf(buf,fmt,ap);
00054         va_end(ap);
00055         (*svm_print_string)(buf);
00056 }
00057 #else
00058 static void info(const char *fmt,...) {}
00059 #endif
00060 
00061 //
00062 // Kernel Cache
00063 //
00064 // l is the number of total data items
00065 // size is the cache size limit in bytes
00066 //
00067 class Cache
00068 {
00069 public:
00070         Cache(int l,long int size);
00071         ~Cache();
00072 
00073         // request data [0,len)
00074         // return some position p where [p,len) need to be filled
00075         // (p >= len if nothing needs to be filled)
00076         int get_data(const int index, Qfloat **data, int len);
00077         void swap_index(int i, int j);
00078 private:
00079         int l;
00080         long int size;
00081         struct head_t
00082         {
00083                 head_t *prev, *next;    // a circular list
00084                 Qfloat *data;
00085                 int len;                // data[0,len) is cached in this entry
00086         };
00087 
00088         head_t *head;
00089         head_t lru_head;
00090         void lru_delete(head_t *h);
00091         void lru_insert(head_t *h);
00092 };
00093 
00094 Cache::Cache(int l_,long int size_):l(l_),size(size_)
00095 {
00096         head = (head_t *)calloc(l,sizeof(head_t));      // initialized to 0
00097         size /= sizeof(Qfloat);
00098         size -= l * sizeof(head_t) / sizeof(Qfloat);
00099         size = max(size, 2 * (long int) l);     // cache must be large enough for two columns
00100         lru_head.next = lru_head.prev = &lru_head;
00101 }
00102 
00103 Cache::~Cache()
00104 {
00105         for(head_t *h = lru_head.next; h != &lru_head; h=h->next)
00106                 free(h->data);
00107         free(head);
00108 }
00109 
00110 void Cache::lru_delete(head_t *h)
00111 {
00112         // delete from current location
00113         h->prev->next = h->next;
00114         h->next->prev = h->prev;
00115 }
00116 
00117 void Cache::lru_insert(head_t *h)
00118 {
00119         // insert to last position
00120         h->next = &lru_head;
00121         h->prev = lru_head.prev;
00122         h->prev->next = h;
00123         h->next->prev = h;
00124 }
00125 
00126 int Cache::get_data(const int index, Qfloat **data, int len)
00127 {
00128         head_t *h = &head[index];
00129         if(h->len) lru_delete(h);
00130         int more = len - h->len;
00131 
00132         if(more > 0)
00133         {
00134                 // free old space
00135                 while(size < more)
00136                 {
00137                         head_t *old = lru_head.next;
00138                         lru_delete(old);
00139                         free(old->data);
00140                         size += old->len;
00141                         old->data = 0;
00142                         old->len = 0;
00143                 }
00144 
00145                 // allocate new space
00146                 h->data = (Qfloat *)realloc(h->data,sizeof(Qfloat)*len);
00147                 size -= more;
00148                 swap(h->len,len);
00149         }
00150 
00151         lru_insert(h);
00152         *data = h->data;
00153         return len;
00154 }
00155 
00156 void Cache::swap_index(int i, int j)
00157 {
00158         if(i==j) return;
00159 
00160         if(head[i].len) lru_delete(&head[i]);
00161         if(head[j].len) lru_delete(&head[j]);
00162         swap(head[i].data,head[j].data);
00163         swap(head[i].len,head[j].len);
00164         if(head[i].len) lru_insert(&head[i]);
00165         if(head[j].len) lru_insert(&head[j]);
00166 
00167         if(i>j) swap(i,j);
00168         for(head_t *h = lru_head.next; h!=&lru_head; h=h->next)
00169         {
00170                 if(h->len > i)
00171                 {
00172                         if(h->len > j)
00173                                 swap(h->data[i],h->data[j]);
00174                         else
00175                         {
00176                                 // give up
00177                                 lru_delete(h);
00178                                 free(h->data);
00179                                 size += h->len;
00180                                 h->data = 0;
00181                                 h->len = 0;
00182                         }
00183                 }
00184         }
00185 }
00186 
00187 //
00188 // Kernel evaluation
00189 //
00190 // the static method k_function is for doing single kernel evaluation
00191 // the constructor of Kernel prepares to calculate the l*l kernel matrix
00192 // the member function get_Q is for getting one column from the Q Matrix
00193 //
00194 class QMatrix {
00195 public:
00196         virtual Qfloat *get_Q(int column, int len) const = 0;
00197         virtual double *get_QD() const = 0;
00198         virtual void swap_index(int i, int j) const = 0;
00199         virtual ~QMatrix() {}
00200 };
00201 
00202 class Kernel: public QMatrix {
00203 public:
00204         Kernel(int l, svm_node * const * x, const svm_parameter& param);
00205         virtual ~Kernel();
00206 
00207         static double k_function(const svm_node *x, const svm_node *y,
00208                                  const svm_parameter& param);
00209         virtual Qfloat *get_Q(int column, int len) const = 0;
00210         virtual double *get_QD() const = 0;
00211         virtual void swap_index(int i, int j) const     // no so const...
00212         {
00213                 swap(x[i],x[j]);
00214                 if(x_square) swap(x_square[i],x_square[j]);
00215         }
00216 protected:
00217 
00218         double (Kernel::*kernel_function)(int i, int j) const;
00219 
00220 private:
00221         const svm_node **x;
00222         double *x_square;
00223 
00224         // svm_parameter
00225         const int kernel_type;
00226         const int degree;
00227         const double gamma;
00228         const double coef0;
00229 
00230         static double dot(const svm_node *px, const svm_node *py);
00231         double kernel_linear(int i, int j) const
00232         {
00233                 return dot(x[i],x[j]);
00234         }
00235         double kernel_poly(int i, int j) const
00236         {
00237                 return powi(gamma*dot(x[i],x[j])+coef0,degree);
00238         }
00239         double kernel_rbf(int i, int j) const
00240         {
00241                 return exp(-gamma*(x_square[i]+x_square[j]-2*dot(x[i],x[j])));
00242         }
00243         double kernel_sigmoid(int i, int j) const
00244         {
00245                 return tanh(gamma*dot(x[i],x[j])+coef0);
00246         }
00247         double kernel_precomputed(int i, int j) const
00248         {
00249                 return x[i][(int)(x[j][0].value)].value;
00250         }
00251 };
00252 
00253 Kernel::Kernel(int l, svm_node * const * x_, const svm_parameter& param)
00254 :kernel_type(param.kernel_type), degree(param.degree),
00255  gamma(param.gamma), coef0(param.coef0)
00256 {
00257         switch(kernel_type)
00258         {
00259                 case LINEAR:
00260                         kernel_function = &Kernel::kernel_linear;
00261                         break;
00262                 case POLY:
00263                         kernel_function = &Kernel::kernel_poly;
00264                         break;
00265                 case RBF:
00266                         kernel_function = &Kernel::kernel_rbf;
00267                         break;
00268                 case SIGMOID:
00269                         kernel_function = &Kernel::kernel_sigmoid;
00270                         break;
00271                 case PRECOMPUTED:
00272                         kernel_function = &Kernel::kernel_precomputed;
00273                         break;
00274         }
00275 
00276         clone(x,x_,l);
00277 
00278         if(kernel_type == RBF)
00279         {
00280                 x_square = new double[l];
00281                 for(int i=0;i<l;i++)
00282                         x_square[i] = dot(x[i],x[i]);
00283         }
00284         else
00285                 x_square = 0;
00286 }
00287 
00288 Kernel::~Kernel()
00289 {
00290         delete[] x;
00291         delete[] x_square;
00292 }
00293 
00294 double Kernel::dot(const svm_node *px, const svm_node *py)
00295 {
00296         double sum = 0;
00297         while(px->index != -1 && py->index != -1)
00298         {
00299                 if(px->index == py->index)
00300                 {
00301                         sum += px->value * py->value;
00302                         ++px;
00303                         ++py;
00304                 }
00305                 else
00306                 {
00307                         if(px->index > py->index)
00308                                 ++py;
00309                         else
00310                                 ++px;
00311                 }                       
00312         }
00313         return sum;
00314 }
00315 
00316 double Kernel::k_function(const svm_node *x, const svm_node *y,
00317                           const svm_parameter& param)
00318 {
00319         switch(param.kernel_type)
00320         {
00321                 case LINEAR:
00322                         return dot(x,y);
00323                 case POLY:
00324                         return powi(param.gamma*dot(x,y)+param.coef0,param.degree);
00325                 case RBF:
00326                 {
00327                         double sum = 0;
00328                         while(x->index != -1 && y->index !=-1)
00329                         {
00330                                 if(x->index == y->index)
00331                                 {
00332                                         double d = x->value - y->value;
00333                                         sum += d*d;
00334                                         ++x;
00335                                         ++y;
00336                                 }
00337                                 else
00338                                 {
00339                                         if(x->index > y->index)
00340                                         {       
00341                                                 sum += y->value * y->value;
00342                                                 ++y;
00343                                         }
00344                                         else
00345                                         {
00346                                                 sum += x->value * x->value;
00347                                                 ++x;
00348                                         }
00349                                 }
00350                         }
00351 
00352                         while(x->index != -1)
00353                         {
00354                                 sum += x->value * x->value;
00355                                 ++x;
00356                         }
00357 
00358                         while(y->index != -1)
00359                         {
00360                                 sum += y->value * y->value;
00361                                 ++y;
00362                         }
00363                         
00364                         return exp(-param.gamma*sum);
00365                 }
00366                 case SIGMOID:
00367                         return tanh(param.gamma*dot(x,y)+param.coef0);
00368                 case PRECOMPUTED:  //x: test (validation), y: SV
00369                         return x[(int)(y->value)].value;
00370                 default:
00371                         return 0;  // Unreachable 
00372         }
00373 }
00374 
00375 // An SMO algorithm in Fan et al., JMLR 6(2005), p. 1889--1918
00376 // Solves:
00377 //
00378 //      min 0.5(\alpha^T Q \alpha) + p^T \alpha
00379 //
00380 //              y^T \alpha = \delta
00381 //              y_i = +1 or -1
00382 //              0 <= alpha_i <= Cp for y_i = 1
00383 //              0 <= alpha_i <= Cn for y_i = -1
00384 //
00385 // Given:
00386 //
00387 //      Q, p, y, Cp, Cn, and an initial feasible point \alpha
00388 //      l is the size of vectors and matrices
00389 //      eps is the stopping tolerance
00390 //
00391 // solution will be put in \alpha, objective value will be put in obj
00392 //
00393 class Solver {
00394 public:
00395         Solver() {};
00396         virtual ~Solver() {};
00397 
00398         struct SolutionInfo {
00399                 double obj;
00400                 double rho;
00401                 double upper_bound_p;
00402                 double upper_bound_n;
00403                 double r;       // for Solver_NU
00404         };
00405 
00406         void Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
00407                    double *alpha_, double Cp, double Cn, double eps,
00408                    SolutionInfo* si, int shrinking);
00409 protected:
00410         int active_size;
00411         schar *y;
00412         double *G;              // gradient of objective function
00413         enum { LOWER_BOUND, UPPER_BOUND, FREE };
00414         char *alpha_status;     // LOWER_BOUND, UPPER_BOUND, FREE
00415         double *alpha;
00416         const QMatrix *Q;
00417         const double *QD;
00418         double eps;
00419         double Cp,Cn;
00420         double *p;
00421         int *active_set;
00422         double *G_bar;          // gradient, if we treat free variables as 0
00423         int l;
00424         bool unshrink;  // XXX
00425 
00426         double get_C(int i)
00427         {
00428                 return (y[i] > 0)? Cp : Cn;
00429         }
00430         void update_alpha_status(int i)
00431         {
00432                 if(alpha[i] >= get_C(i))
00433                         alpha_status[i] = UPPER_BOUND;
00434                 else if(alpha[i] <= 0)
00435                         alpha_status[i] = LOWER_BOUND;
00436                 else alpha_status[i] = FREE;
00437         }
00438         bool is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; }
00439         bool is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; }
00440         bool is_free(int i) { return alpha_status[i] == FREE; }
00441         void swap_index(int i, int j);
00442         void reconstruct_gradient();
00443         virtual int select_working_set(int &i, int &j);
00444         virtual double calculate_rho();
00445         virtual void do_shrinking();
00446 private:
00447         bool be_shrunk(int i, double Gmax1, double Gmax2);
00448 };
00449 
00450 void Solver::swap_index(int i, int j)
00451 {
00452         Q->swap_index(i,j);
00453         swap(y[i],y[j]);
00454         swap(G[i],G[j]);
00455         swap(alpha_status[i],alpha_status[j]);
00456         swap(alpha[i],alpha[j]);
00457         swap(p[i],p[j]);
00458         swap(active_set[i],active_set[j]);
00459         swap(G_bar[i],G_bar[j]);
00460 }
00461 
00462 void Solver::reconstruct_gradient()
00463 {
00464         // reconstruct inactive elements of G from G_bar and free variables
00465 
00466         if(active_size == l) return;
00467 
00468         int i,j;
00469         int nr_free = 0;
00470 
00471         for(j=active_size;j<l;j++)
00472                 G[j] = G_bar[j] + p[j];
00473 
00474         for(j=0;j<active_size;j++)
00475                 if(is_free(j))
00476                         nr_free++;
00477 
00478         if(2*nr_free < active_size)
00479                 info("\nWARNING: using -h 0 may be faster\n");
00480 
00481         if (nr_free*l > 2*active_size*(l-active_size))
00482         {
00483                 for(i=active_size;i<l;i++)
00484                 {
00485                         const Qfloat *Q_i = Q->get_Q(i,active_size);
00486                         for(j=0;j<active_size;j++)
00487                                 if(is_free(j))
00488                                         G[i] += alpha[j] * Q_i[j];
00489                 }
00490         }
00491         else
00492         {
00493                 for(i=0;i<active_size;i++)
00494                         if(is_free(i))
00495                         {
00496                                 const Qfloat *Q_i = Q->get_Q(i,l);
00497                                 double alpha_i = alpha[i];
00498                                 for(j=active_size;j<l;j++)
00499                                         G[j] += alpha_i * Q_i[j];
00500                         }
00501         }
00502 }
00503 
00504 void Solver::Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
00505                    double *alpha_, double Cp, double Cn, double eps,
00506                    SolutionInfo* si, int shrinking)
00507 {
00508         this->l = l;
00509         this->Q = &Q;
00510         QD=Q.get_QD();
00511         clone(p, p_,l);
00512         clone(y, y_,l);
00513         clone(alpha,alpha_,l);
00514         this->Cp = Cp;
00515         this->Cn = Cn;
00516         this->eps = eps;
00517         unshrink = false;
00518 
00519         // initialize alpha_status
00520         {
00521                 alpha_status = new char[l];
00522                 for(int i=0;i<l;i++)
00523                         update_alpha_status(i);
00524         }
00525 
00526         // initialize active set (for shrinking)
00527         {
00528                 active_set = new int[l];
00529                 for(int i=0;i<l;i++)
00530                         active_set[i] = i;
00531                 active_size = l;
00532         }
00533 
00534         // initialize gradient
00535         {
00536                 G = new double[l];
00537                 G_bar = new double[l];
00538                 int i;
00539                 for(i=0;i<l;i++)
00540                 {
00541                         G[i] = p[i];
00542                         G_bar[i] = 0;
00543                 }
00544                 for(i=0;i<l;i++)
00545                         if(!is_lower_bound(i))
00546                         {
00547                                 const Qfloat *Q_i = Q.get_Q(i,l);
00548                                 double alpha_i = alpha[i];
00549                                 int j;
00550                                 for(j=0;j<l;j++)
00551                                         G[j] += alpha_i*Q_i[j];
00552                                 if(is_upper_bound(i))
00553                                         for(j=0;j<l;j++)
00554                                                 G_bar[j] += get_C(i) * Q_i[j];
00555                         }
00556         }
00557 
00558         // optimization step
00559 
00560         int iter = 0;
00561         int max_iter = max(10000000, l>INT_MAX/100 ? INT_MAX : 100*l);
00562         int counter = min(l,1000)+1;
00563         
00564         while(iter < max_iter)
00565         {
00566                 // show progress and do shrinking
00567 
00568                 if(--counter == 0)
00569                 {
00570                         counter = min(l,1000);
00571                         if(shrinking) do_shrinking();
00572                         info(".");
00573                 }
00574 
00575                 int i,j;
00576                 if(select_working_set(i,j)!=0)
00577                 {
00578                         // reconstruct the whole gradient
00579                         reconstruct_gradient();
00580                         // reset active set size and check
00581                         active_size = l;
00582                         info("*");
00583                         if(select_working_set(i,j)!=0)
00584                                 break;
00585                         else
00586                                 counter = 1;    // do shrinking next iteration
00587                 }
00588                 
00589                 ++iter;
00590 
00591                 // update alpha[i] and alpha[j], handle bounds carefully
00592                 
00593                 const Qfloat *Q_i = Q.get_Q(i,active_size);
00594                 const Qfloat *Q_j = Q.get_Q(j,active_size);
00595 
00596                 double C_i = get_C(i);
00597                 double C_j = get_C(j);
00598 
00599                 double old_alpha_i = alpha[i];
00600                 double old_alpha_j = alpha[j];
00601 
00602                 if(y[i]!=y[j])
00603                 {
00604                         double quad_coef = QD[i]+QD[j]+2*Q_i[j];
00605                         if (quad_coef <= 0)
00606                                 quad_coef = TAU;
00607                         double delta = (-G[i]-G[j])/quad_coef;
00608                         double diff = alpha[i] - alpha[j];
00609                         alpha[i] += delta;
00610                         alpha[j] += delta;
00611                         
00612                         if(diff > 0)
00613                         {
00614                                 if(alpha[j] < 0)
00615                                 {
00616                                         alpha[j] = 0;
00617                                         alpha[i] = diff;
00618                                 }
00619                         }
00620                         else
00621                         {
00622                                 if(alpha[i] < 0)
00623                                 {
00624                                         alpha[i] = 0;
00625                                         alpha[j] = -diff;
00626                                 }
00627                         }
00628                         if(diff > C_i - C_j)
00629                         {
00630                                 if(alpha[i] > C_i)
00631                                 {
00632                                         alpha[i] = C_i;
00633                                         alpha[j] = C_i - diff;
00634                                 }
00635                         }
00636                         else
00637                         {
00638                                 if(alpha[j] > C_j)
00639                                 {
00640                                         alpha[j] = C_j;
00641                                         alpha[i] = C_j + diff;
00642                                 }
00643                         }
00644                 }
00645                 else
00646                 {
00647                         double quad_coef = QD[i]+QD[j]-2*Q_i[j];
00648                         if (quad_coef <= 0)
00649                                 quad_coef = TAU;
00650                         double delta = (G[i]-G[j])/quad_coef;
00651                         double sum = alpha[i] + alpha[j];
00652                         alpha[i] -= delta;
00653                         alpha[j] += delta;
00654 
00655                         if(sum > C_i)
00656                         {
00657                                 if(alpha[i] > C_i)
00658                                 {
00659                                         alpha[i] = C_i;
00660                                         alpha[j] = sum - C_i;
00661                                 }
00662                         }
00663                         else
00664                         {
00665                                 if(alpha[j] < 0)
00666                                 {
00667                                         alpha[j] = 0;
00668                                         alpha[i] = sum;
00669                                 }
00670                         }
00671                         if(sum > C_j)
00672                         {
00673                                 if(alpha[j] > C_j)
00674                                 {
00675                                         alpha[j] = C_j;
00676                                         alpha[i] = sum - C_j;
00677                                 }
00678                         }
00679                         else
00680                         {
00681                                 if(alpha[i] < 0)
00682                                 {
00683                                         alpha[i] = 0;
00684                                         alpha[j] = sum;
00685                                 }
00686                         }
00687                 }
00688 
00689                 // update G
00690 
00691                 double delta_alpha_i = alpha[i] - old_alpha_i;
00692                 double delta_alpha_j = alpha[j] - old_alpha_j;
00693                 
00694                 for(int k=0;k<active_size;k++)
00695                 {
00696                         G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j;
00697                 }
00698 
00699                 // update alpha_status and G_bar
00700 
00701                 {
00702                         bool ui = is_upper_bound(i);
00703                         bool uj = is_upper_bound(j);
00704                         update_alpha_status(i);
00705                         update_alpha_status(j);
00706                         int k;
00707                         if(ui != is_upper_bound(i))
00708                         {
00709                                 Q_i = Q.get_Q(i,l);
00710                                 if(ui)
00711                                         for(k=0;k<l;k++)
00712                                                 G_bar[k] -= C_i * Q_i[k];
00713                                 else
00714                                         for(k=0;k<l;k++)
00715                                                 G_bar[k] += C_i * Q_i[k];
00716                         }
00717 
00718                         if(uj != is_upper_bound(j))
00719                         {
00720                                 Q_j = Q.get_Q(j,l);
00721                                 if(uj)
00722                                         for(k=0;k<l;k++)
00723                                                 G_bar[k] -= C_j * Q_j[k];
00724                                 else
00725                                         for(k=0;k<l;k++)
00726                                                 G_bar[k] += C_j * Q_j[k];
00727                         }
00728                 }
00729         }
00730 
00731         if(iter >= max_iter)
00732         {
00733                 if(active_size < l)
00734                 {
00735                         // reconstruct the whole gradient to calculate objective value
00736                         reconstruct_gradient();
00737                         active_size = l;
00738                         info("*");
00739                 }
00740                 fprintf(stderr,"\nWARNING: reaching max number of iterations\n");
00741         }
00742 
00743         // calculate rho
00744 
00745         si->rho = calculate_rho();
00746 
00747         // calculate objective value
00748         {
00749                 double v = 0;
00750                 int i;
00751                 for(i=0;i<l;i++)
00752                         v += alpha[i] * (G[i] + p[i]);
00753 
00754                 si->obj = v/2;
00755         }
00756 
00757         // put back the solution
00758         {
00759                 for(int i=0;i<l;i++)
00760                         alpha_[active_set[i]] = alpha[i];
00761         }
00762 
00763         // juggle everything back
00764         /*{
00765                 for(int i=0;i<l;i++)
00766                         while(active_set[i] != i)
00767                                 swap_index(i,active_set[i]);
00768                                 // or Q.swap_index(i,active_set[i]);
00769         }*/
00770 
00771         si->upper_bound_p = Cp;
00772         si->upper_bound_n = Cn;
00773 
00774         info("\noptimization finished, #iter = %d\n",iter);
00775 
00776         delete[] p;
00777         delete[] y;
00778         delete[] alpha;
00779         delete[] alpha_status;
00780         delete[] active_set;
00781         delete[] G;
00782         delete[] G_bar;
00783 }
00784 
00785 // return 1 if already optimal, return 0 otherwise
00786 int Solver::select_working_set(int &out_i, int &out_j)
00787 {
00788         // return i,j such that
00789         // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
00790         // j: minimizes the decrease of obj value
00791         //    (if quadratic coefficeint <= 0, replace it with tau)
00792         //    -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
00793         
00794         double Gmax = -INF;
00795         double Gmax2 = -INF;
00796         int Gmax_idx = -1;
00797         int Gmin_idx = -1;
00798         double obj_diff_min = INF;
00799 
00800         for(int t=0;t<active_size;t++)
00801                 if(y[t]==+1)    
00802                 {
00803                         if(!is_upper_bound(t))
00804                                 if(-G[t] >= Gmax)
00805                                 {
00806                                         Gmax = -G[t];
00807                                         Gmax_idx = t;
00808                                 }
00809                 }
00810                 else
00811                 {
00812                         if(!is_lower_bound(t))
00813                                 if(G[t] >= Gmax)
00814                                 {
00815                                         Gmax = G[t];
00816                                         Gmax_idx = t;
00817                                 }
00818                 }
00819 
00820         int i = Gmax_idx;
00821         const Qfloat *Q_i = NULL;
00822         if(i != -1) // NULL Q_i not accessed: Gmax=-INF if i=-1
00823                 Q_i = Q->get_Q(i,active_size);
00824 
00825         for(int j=0;j<active_size;j++)
00826         {
00827                 if(y[j]==+1)
00828                 {
00829                         if (!is_lower_bound(j))
00830                         {
00831                                 double grad_diff=Gmax+G[j];
00832                                 if (G[j] >= Gmax2)
00833                                         Gmax2 = G[j];
00834                                 if (grad_diff > 0)
00835                                 {
00836                                         double obj_diff;
00837                                         double quad_coef = QD[i]+QD[j]-2.0*y[i]*Q_i[j];
00838                                         if (quad_coef > 0)
00839                                                 obj_diff = -(grad_diff*grad_diff)/quad_coef;
00840                                         else
00841                                                 obj_diff = -(grad_diff*grad_diff)/TAU;
00842 
00843                                         if (obj_diff <= obj_diff_min)
00844                                         {
00845                                                 Gmin_idx=j;
00846                                                 obj_diff_min = obj_diff;
00847                                         }
00848                                 }
00849                         }
00850                 }
00851                 else
00852                 {
00853                         if (!is_upper_bound(j))
00854                         {
00855                                 double grad_diff= Gmax-G[j];
00856                                 if (-G[j] >= Gmax2)
00857                                         Gmax2 = -G[j];
00858                                 if (grad_diff > 0)
00859                                 {
00860                                         double obj_diff;
00861                                         double quad_coef = QD[i]+QD[j]+2.0*y[i]*Q_i[j];
00862                                         if (quad_coef > 0)
00863                                                 obj_diff = -(grad_diff*grad_diff)/quad_coef;
00864                                         else
00865                                                 obj_diff = -(grad_diff*grad_diff)/TAU;
00866 
00867                                         if (obj_diff <= obj_diff_min)
00868                                         {
00869                                                 Gmin_idx=j;
00870                                                 obj_diff_min = obj_diff;
00871                                         }
00872                                 }
00873                         }
00874                 }
00875         }
00876 
00877         if(Gmax+Gmax2 < eps || Gmin_idx == -1)
00878                 return 1;
00879 
00880         out_i = Gmax_idx;
00881         out_j = Gmin_idx;
00882         return 0;
00883 }
00884 
00885 bool Solver::be_shrunk(int i, double Gmax1, double Gmax2)
00886 {
00887         if(is_upper_bound(i))
00888         {
00889                 if(y[i]==+1)
00890                         return(-G[i] > Gmax1);
00891                 else
00892                         return(-G[i] > Gmax2);
00893         }
00894         else if(is_lower_bound(i))
00895         {
00896                 if(y[i]==+1)
00897                         return(G[i] > Gmax2);
00898                 else    
00899                         return(G[i] > Gmax1);
00900         }
00901         else
00902                 return(false);
00903 }
00904 
00905 void Solver::do_shrinking()
00906 {
00907         int i;
00908         double Gmax1 = -INF;            // max { -y_i * grad(f)_i | i in I_up(\alpha) }
00909         double Gmax2 = -INF;            // max { y_i * grad(f)_i | i in I_low(\alpha) }
00910 
00911         // find maximal violating pair first
00912         for(i=0;i<active_size;i++)
00913         {
00914                 if(y[i]==+1)    
00915                 {
00916                         if(!is_upper_bound(i))  
00917                         {
00918                                 if(-G[i] >= Gmax1)
00919                                         Gmax1 = -G[i];
00920                         }
00921                         if(!is_lower_bound(i))  
00922                         {
00923                                 if(G[i] >= Gmax2)
00924                                         Gmax2 = G[i];
00925                         }
00926                 }
00927                 else    
00928                 {
00929                         if(!is_upper_bound(i))  
00930                         {
00931                                 if(-G[i] >= Gmax2)
00932                                         Gmax2 = -G[i];
00933                         }
00934                         if(!is_lower_bound(i))  
00935                         {
00936                                 if(G[i] >= Gmax1)
00937                                         Gmax1 = G[i];
00938                         }
00939                 }
00940         }
00941 
00942         if(unshrink == false && Gmax1 + Gmax2 <= eps*10) 
00943         {
00944                 unshrink = true;
00945                 reconstruct_gradient();
00946                 active_size = l;
00947                 info("*");
00948         }
00949 
00950         for(i=0;i<active_size;i++)
00951                 if (be_shrunk(i, Gmax1, Gmax2))
00952                 {
00953                         active_size--;
00954                         while (active_size > i)
00955                         {
00956                                 if (!be_shrunk(active_size, Gmax1, Gmax2))
00957                                 {
00958                                         swap_index(i,active_size);
00959                                         break;
00960                                 }
00961                                 active_size--;
00962                         }
00963                 }
00964 }
00965 
00966 double Solver::calculate_rho()
00967 {
00968         double r;
00969         int nr_free = 0;
00970         double ub = INF, lb = -INF, sum_free = 0;
00971         for(int i=0;i<active_size;i++)
00972         {
00973                 double yG = y[i]*G[i];
00974 
00975                 if(is_upper_bound(i))
00976                 {
00977                         if(y[i]==-1)
00978                                 ub = min(ub,yG);
00979                         else
00980                                 lb = max(lb,yG);
00981                 }
00982                 else if(is_lower_bound(i))
00983                 {
00984                         if(y[i]==+1)
00985                                 ub = min(ub,yG);
00986                         else
00987                                 lb = max(lb,yG);
00988                 }
00989                 else
00990                 {
00991                         ++nr_free;
00992                         sum_free += yG;
00993                 }
00994         }
00995 
00996         if(nr_free>0)
00997                 r = sum_free/nr_free;
00998         else
00999                 r = (ub+lb)/2;
01000 
01001         return r;
01002 }
01003 
01004 //
01005 // Solver for nu-svm classification and regression
01006 //
01007 // additional constraint: e^T \alpha = constant
01008 //
01009 class Solver_NU: public Solver
01010 {
01011 public:
01012         Solver_NU() {}
01013         void Solve(int l, const QMatrix& Q, const double *p, const schar *y,
01014                    double *alpha, double Cp, double Cn, double eps,
01015                    SolutionInfo* si, int shrinking)
01016         {
01017                 this->si = si;
01018                 Solver::Solve(l,Q,p,y,alpha,Cp,Cn,eps,si,shrinking);
01019         }
01020 private:
01021         SolutionInfo *si;
01022         int select_working_set(int &i, int &j);
01023         double calculate_rho();
01024         bool be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4);
01025         void do_shrinking();
01026 };
01027 
01028 // return 1 if already optimal, return 0 otherwise
01029 int Solver_NU::select_working_set(int &out_i, int &out_j)
01030 {
01031         // return i,j such that y_i = y_j and
01032         // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
01033         // j: minimizes the decrease of obj value
01034         //    (if quadratic coefficeint <= 0, replace it with tau)
01035         //    -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
01036 
01037         double Gmaxp = -INF;
01038         double Gmaxp2 = -INF;
01039         int Gmaxp_idx = -1;
01040 
01041         double Gmaxn = -INF;
01042         double Gmaxn2 = -INF;
01043         int Gmaxn_idx = -1;
01044 
01045         int Gmin_idx = -1;
01046         double obj_diff_min = INF;
01047 
01048         for(int t=0;t<active_size;t++)
01049                 if(y[t]==+1)
01050                 {
01051                         if(!is_upper_bound(t))
01052                                 if(-G[t] >= Gmaxp)
01053                                 {
01054                                         Gmaxp = -G[t];
01055                                         Gmaxp_idx = t;
01056                                 }
01057                 }
01058                 else
01059                 {
01060                         if(!is_lower_bound(t))
01061                                 if(G[t] >= Gmaxn)
01062                                 {
01063                                         Gmaxn = G[t];
01064                                         Gmaxn_idx = t;
01065                                 }
01066                 }
01067 
01068         int ip = Gmaxp_idx;
01069         int in = Gmaxn_idx;
01070         const Qfloat *Q_ip = NULL;
01071         const Qfloat *Q_in = NULL;
01072         if(ip != -1) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1
01073                 Q_ip = Q->get_Q(ip,active_size);
01074         if(in != -1)
01075                 Q_in = Q->get_Q(in,active_size);
01076 
01077         for(int j=0;j<active_size;j++)
01078         {
01079                 if(y[j]==+1)
01080                 {
01081                         if (!is_lower_bound(j)) 
01082                         {
01083                                 double grad_diff=Gmaxp+G[j];
01084                                 if (G[j] >= Gmaxp2)
01085                                         Gmaxp2 = G[j];
01086                                 if (grad_diff > 0)
01087                                 {
01088                                         double obj_diff;
01089                                         double quad_coef = QD[ip]+QD[j]-2*Q_ip[j];
01090                                         if (quad_coef > 0)
01091                                                 obj_diff = -(grad_diff*grad_diff)/quad_coef;
01092                                         else
01093                                                 obj_diff = -(grad_diff*grad_diff)/TAU;
01094 
01095                                         if (obj_diff <= obj_diff_min)
01096                                         {
01097                                                 Gmin_idx=j;
01098                                                 obj_diff_min = obj_diff;
01099                                         }
01100                                 }
01101                         }
01102                 }
01103                 else
01104                 {
01105                         if (!is_upper_bound(j))
01106                         {
01107                                 double grad_diff=Gmaxn-G[j];
01108                                 if (-G[j] >= Gmaxn2)
01109                                         Gmaxn2 = -G[j];
01110                                 if (grad_diff > 0)
01111                                 {
01112                                         double obj_diff;
01113                                         double quad_coef = QD[in]+QD[j]-2*Q_in[j];
01114                                         if (quad_coef > 0)
01115                                                 obj_diff = -(grad_diff*grad_diff)/quad_coef;
01116                                         else
01117                                                 obj_diff = -(grad_diff*grad_diff)/TAU;
01118 
01119                                         if (obj_diff <= obj_diff_min)
01120                                         {
01121                                                 Gmin_idx=j;
01122                                                 obj_diff_min = obj_diff;
01123                                         }
01124                                 }
01125                         }
01126                 }
01127         }
01128 
01129         if(max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2) < eps || Gmin_idx == -1)
01130                 return 1;
01131 
01132         if (y[Gmin_idx] == +1)
01133                 out_i = Gmaxp_idx;
01134         else
01135                 out_i = Gmaxn_idx;
01136         out_j = Gmin_idx;
01137 
01138         return 0;
01139 }
01140 
01141 bool Solver_NU::be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4)
01142 {
01143         if(is_upper_bound(i))
01144         {
01145                 if(y[i]==+1)
01146                         return(-G[i] > Gmax1);
01147                 else    
01148                         return(-G[i] > Gmax4);
01149         }
01150         else if(is_lower_bound(i))
01151         {
01152                 if(y[i]==+1)
01153                         return(G[i] > Gmax2);
01154                 else    
01155                         return(G[i] > Gmax3);
01156         }
01157         else
01158                 return(false);
01159 }
01160 
01161 void Solver_NU::do_shrinking()
01162 {
01163         double Gmax1 = -INF;    // max { -y_i * grad(f)_i | y_i = +1, i in I_up(\alpha) }
01164         double Gmax2 = -INF;    // max { y_i * grad(f)_i | y_i = +1, i in I_low(\alpha) }
01165         double Gmax3 = -INF;    // max { -y_i * grad(f)_i | y_i = -1, i in I_up(\alpha) }
01166         double Gmax4 = -INF;    // max { y_i * grad(f)_i | y_i = -1, i in I_low(\alpha) }
01167 
01168         // find maximal violating pair first
01169         int i;
01170         for(i=0;i<active_size;i++)
01171         {
01172                 if(!is_upper_bound(i))
01173                 {
01174                         if(y[i]==+1)
01175                         {
01176                                 if(-G[i] > Gmax1) Gmax1 = -G[i];
01177                         }
01178                         else    if(-G[i] > Gmax4) Gmax4 = -G[i];
01179                 }
01180                 if(!is_lower_bound(i))
01181                 {
01182                         if(y[i]==+1)
01183                         {       
01184                                 if(G[i] > Gmax2) Gmax2 = G[i];
01185                         }
01186                         else    if(G[i] > Gmax3) Gmax3 = G[i];
01187                 }
01188         }
01189 
01190         if(unshrink == false && max(Gmax1+Gmax2,Gmax3+Gmax4) <= eps*10) 
01191         {
01192                 unshrink = true;
01193                 reconstruct_gradient();
01194                 active_size = l;
01195         }
01196 
01197         for(i=0;i<active_size;i++)
01198                 if (be_shrunk(i, Gmax1, Gmax2, Gmax3, Gmax4))
01199                 {
01200                         active_size--;
01201                         while (active_size > i)
01202                         {
01203                                 if (!be_shrunk(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
01204                                 {
01205                                         swap_index(i,active_size);
01206                                         break;
01207                                 }
01208                                 active_size--;
01209                         }
01210                 }
01211 }
01212 
01213 double Solver_NU::calculate_rho()
01214 {
01215         int nr_free1 = 0,nr_free2 = 0;
01216         double ub1 = INF, ub2 = INF;
01217         double lb1 = -INF, lb2 = -INF;
01218         double sum_free1 = 0, sum_free2 = 0;
01219 
01220         for(int i=0;i<active_size;i++)
01221         {
01222                 if(y[i]==+1)
01223                 {
01224                         if(is_upper_bound(i))
01225                                 lb1 = max(lb1,G[i]);
01226                         else if(is_lower_bound(i))
01227                                 ub1 = min(ub1,G[i]);
01228                         else
01229                         {
01230                                 ++nr_free1;
01231                                 sum_free1 += G[i];
01232                         }
01233                 }
01234                 else
01235                 {
01236                         if(is_upper_bound(i))
01237                                 lb2 = max(lb2,G[i]);
01238                         else if(is_lower_bound(i))
01239                                 ub2 = min(ub2,G[i]);
01240                         else
01241                         {
01242                                 ++nr_free2;
01243                                 sum_free2 += G[i];
01244                         }
01245                 }
01246         }
01247 
01248         double r1,r2;
01249         if(nr_free1 > 0)
01250                 r1 = sum_free1/nr_free1;
01251         else
01252                 r1 = (ub1+lb1)/2;
01253         
01254         if(nr_free2 > 0)
01255                 r2 = sum_free2/nr_free2;
01256         else
01257                 r2 = (ub2+lb2)/2;
01258         
01259         si->r = (r1+r2)/2;
01260         return (r1-r2)/2;
01261 }
01262 
01263 //
01264 // Q matrices for various formulations
01265 //
01266 class SVC_Q: public Kernel
01267 { 
01268 public:
01269         SVC_Q(const svm_problem& prob, const svm_parameter& param, const schar *y_)
01270         :Kernel(prob.l, prob.x, param)
01271         {
01272                 clone(y,y_,prob.l);
01273                 cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
01274                 QD = new double[prob.l];
01275                 for(int i=0;i<prob.l;i++)
01276                         QD[i] = (this->*kernel_function)(i,i);
01277         }
01278         
01279         Qfloat *get_Q(int i, int len) const
01280         {
01281                 Qfloat *data;
01282                 int start, j;
01283                 if((start = cache->get_data(i,&data,len)) < len)
01284                 {
01285                         for(j=start;j<len;j++)
01286                                 data[j] = (Qfloat)(y[i]*y[j]*(this->*kernel_function)(i,j));
01287                 }
01288                 return data;
01289         }
01290 
01291         double *get_QD() const
01292         {
01293                 return QD;
01294         }
01295 
01296         void swap_index(int i, int j) const
01297         {
01298                 cache->swap_index(i,j);
01299                 Kernel::swap_index(i,j);
01300                 swap(y[i],y[j]);
01301                 swap(QD[i],QD[j]);
01302         }
01303 
01304         ~SVC_Q()
01305         {
01306                 delete[] y;
01307                 delete cache;
01308                 delete[] QD;
01309         }
01310 private:
01311         schar *y;
01312         Cache *cache;
01313         double *QD;
01314 };
01315 
01316 class ONE_CLASS_Q: public Kernel
01317 {
01318 public:
01319         ONE_CLASS_Q(const svm_problem& prob, const svm_parameter& param)
01320         :Kernel(prob.l, prob.x, param)
01321         {
01322                 cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
01323                 QD = new double[prob.l];
01324                 for(int i=0;i<prob.l;i++)
01325                         QD[i] = (this->*kernel_function)(i,i);
01326         }
01327         
01328         Qfloat *get_Q(int i, int len) const
01329         {
01330                 Qfloat *data;
01331                 int start, j;
01332                 if((start = cache->get_data(i,&data,len)) < len)
01333                 {
01334                         for(j=start;j<len;j++)
01335                                 data[j] = (Qfloat)(this->*kernel_function)(i,j);
01336                 }
01337                 return data;
01338         }
01339 
01340         double *get_QD() const
01341         {
01342                 return QD;
01343         }
01344 
01345         void swap_index(int i, int j) const
01346         {
01347                 cache->swap_index(i,j);
01348                 Kernel::swap_index(i,j);
01349                 swap(QD[i],QD[j]);
01350         }
01351 
01352         ~ONE_CLASS_Q()
01353         {
01354                 delete cache;
01355                 delete[] QD;
01356         }
01357 private:
01358         Cache *cache;
01359         double *QD;
01360 };
01361 
01362 class SVR_Q: public Kernel
01363 { 
01364 public:
01365         SVR_Q(const svm_problem& prob, const svm_parameter& param)
01366         :Kernel(prob.l, prob.x, param)
01367         {
01368                 l = prob.l;
01369                 cache = new Cache(l,(long int)(param.cache_size*(1<<20)));
01370                 QD = new double[2*l];
01371                 sign = new schar[2*l];
01372                 index = new int[2*l];
01373                 for(int k=0;k<l;k++)
01374                 {
01375                         sign[k] = 1;
01376                         sign[k+l] = -1;
01377                         index[k] = k;
01378                         index[k+l] = k;
01379                         QD[k] = (this->*kernel_function)(k,k);
01380                         QD[k+l] = QD[k];
01381                 }
01382                 buffer[0] = new Qfloat[2*l];
01383                 buffer[1] = new Qfloat[2*l];
01384                 next_buffer = 0;
01385         }
01386 
01387         void swap_index(int i, int j) const
01388         {
01389                 swap(sign[i],sign[j]);
01390                 swap(index[i],index[j]);
01391                 swap(QD[i],QD[j]);
01392         }
01393         
01394         Qfloat *get_Q(int i, int len) const
01395         {
01396                 Qfloat *data;
01397                 int j, real_i = index[i];
01398                 if(cache->get_data(real_i,&data,l) < l)
01399                 {
01400                         for(j=0;j<l;j++)
01401                                 data[j] = (Qfloat)(this->*kernel_function)(real_i,j);
01402                 }
01403 
01404                 // reorder and copy
01405                 Qfloat *buf = buffer[next_buffer];
01406                 next_buffer = 1 - next_buffer;
01407                 schar si = sign[i];
01408                 for(j=0;j<len;j++)
01409                         buf[j] = (Qfloat) si * (Qfloat) sign[j] * data[index[j]];
01410                 return buf;
01411         }
01412 
01413         double *get_QD() const
01414         {
01415                 return QD;
01416         }
01417 
01418         ~SVR_Q()
01419         {
01420                 delete cache;
01421                 delete[] sign;
01422                 delete[] index;
01423                 delete[] buffer[0];
01424                 delete[] buffer[1];
01425                 delete[] QD;
01426         }
01427 private:
01428         int l;
01429         Cache *cache;
01430         schar *sign;
01431         int *index;
01432         mutable int next_buffer;
01433         Qfloat *buffer[2];
01434         double *QD;
01435 };
01436 
01437 //
01438 // construct and solve various formulations
01439 //
01440 static void solve_c_svc(
01441         const svm_problem *prob, const svm_parameter* param,
01442         double *alpha, Solver::SolutionInfo* si, double Cp, double Cn)
01443 {
01444         int l = prob->l;
01445         double *minus_ones = new double[l];
01446         schar *y = new schar[l];
01447 
01448         int i;
01449 
01450         for(i=0;i<l;i++)
01451         {
01452                 alpha[i] = 0;
01453                 minus_ones[i] = -1;
01454                 if(prob->y[i] > 0) y[i] = +1; else y[i] = -1;
01455         }
01456 
01457         Solver s;
01458         s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y,
01459                 alpha, Cp, Cn, param->eps, si, param->shrinking);
01460 
01461         double sum_alpha=0;
01462         for(i=0;i<l;i++)
01463                 sum_alpha += alpha[i];
01464 
01465         if (Cp==Cn)
01466                 info("nu = %f\n", sum_alpha/(Cp*prob->l));
01467 
01468         for(i=0;i<l;i++)
01469                 alpha[i] *= y[i];
01470 
01471         delete[] minus_ones;
01472         delete[] y;
01473 }
01474 
01475 static void solve_nu_svc(
01476         const svm_problem *prob, const svm_parameter *param,
01477         double *alpha, Solver::SolutionInfo* si)
01478 {
01479         int i;
01480         int l = prob->l;
01481         double nu = param->nu;
01482 
01483         schar *y = new schar[l];
01484 
01485         for(i=0;i<l;i++)
01486                 if(prob->y[i]>0)
01487                         y[i] = +1;
01488                 else
01489                         y[i] = -1;
01490 
01491         double sum_pos = nu*l/2;
01492         double sum_neg = nu*l/2;
01493 
01494         for(i=0;i<l;i++)
01495                 if(y[i] == +1)
01496                 {
01497                         alpha[i] = min(1.0,sum_pos);
01498                         sum_pos -= alpha[i];
01499                 }
01500                 else
01501                 {
01502                         alpha[i] = min(1.0,sum_neg);
01503                         sum_neg -= alpha[i];
01504                 }
01505 
01506         double *zeros = new double[l];
01507 
01508         for(i=0;i<l;i++)
01509                 zeros[i] = 0;
01510 
01511         Solver_NU s;
01512         s.Solve(l, SVC_Q(*prob,*param,y), zeros, y,
01513                 alpha, 1.0, 1.0, param->eps, si,  param->shrinking);
01514         double r = si->r;
01515 
01516         info("C = %f\n",1/r);
01517 
01518         for(i=0;i<l;i++)
01519                 alpha[i] *= y[i]/r;
01520 
01521         si->rho /= r;
01522         si->obj /= (r*r);
01523         si->upper_bound_p = 1/r;
01524         si->upper_bound_n = 1/r;
01525 
01526         delete[] y;
01527         delete[] zeros;
01528 }
01529 
01530 static void solve_one_class(
01531         const svm_problem *prob, const svm_parameter *param,
01532         double *alpha, Solver::SolutionInfo* si)
01533 {
01534         int l = prob->l;
01535         double *zeros = new double[l];
01536         schar *ones = new schar[l];
01537         int i;
01538 
01539         int n = (int)(param->nu*prob->l);       // # of alpha's at upper bound
01540 
01541         for(i=0;i<n;i++)
01542                 alpha[i] = 1;
01543         if(n<prob->l)
01544                 alpha[n] = param->nu * prob->l - n;
01545         for(i=n+1;i<l;i++)
01546                 alpha[i] = 0;
01547 
01548         for(i=0;i<l;i++)
01549         {
01550                 zeros[i] = 0;
01551                 ones[i] = 1;
01552         }
01553 
01554         Solver s;
01555         s.Solve(l, ONE_CLASS_Q(*prob,*param), zeros, ones,
01556                 alpha, 1.0, 1.0, param->eps, si, param->shrinking);
01557 
01558         delete[] zeros;
01559         delete[] ones;
01560 }
01561 
01562 static void solve_epsilon_svr(
01563         const svm_problem *prob, const svm_parameter *param,
01564         double *alpha, Solver::SolutionInfo* si)
01565 {
01566         int l = prob->l;
01567         double *alpha2 = new double[2*l];
01568         double *linear_term = new double[2*l];
01569         schar *y = new schar[2*l];
01570         int i;
01571 
01572         for(i=0;i<l;i++)
01573         {
01574                 alpha2[i] = 0;
01575                 linear_term[i] = param->p - prob->y[i];
01576                 y[i] = 1;
01577 
01578                 alpha2[i+l] = 0;
01579                 linear_term[i+l] = param->p + prob->y[i];
01580                 y[i+l] = -1;
01581         }
01582 
01583         Solver s;
01584         s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
01585                 alpha2, param->C, param->C, param->eps, si, param->shrinking);
01586 
01587         double sum_alpha = 0;
01588         for(i=0;i<l;i++)
01589         {
01590                 alpha[i] = alpha2[i] - alpha2[i+l];
01591                 sum_alpha += fabs(alpha[i]);
01592         }
01593         info("nu = %f\n",sum_alpha/(param->C*l));
01594 
01595         delete[] alpha2;
01596         delete[] linear_term;
01597         delete[] y;
01598 }
01599 
01600 static void solve_nu_svr(
01601         const svm_problem *prob, const svm_parameter *param,
01602         double *alpha, Solver::SolutionInfo* si)
01603 {
01604         int l = prob->l;
01605         double C = param->C;
01606         double *alpha2 = new double[2*l];
01607         double *linear_term = new double[2*l];
01608         schar *y = new schar[2*l];
01609         int i;
01610 
01611         double sum = C * param->nu * l / 2;
01612         for(i=0;i<l;i++)
01613         {
01614                 alpha2[i] = alpha2[i+l] = min(sum,C);
01615                 sum -= alpha2[i];
01616 
01617                 linear_term[i] = - prob->y[i];
01618                 y[i] = 1;
01619 
01620                 linear_term[i+l] = prob->y[i];
01621                 y[i+l] = -1;
01622         }
01623 
01624         Solver_NU s;
01625         s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
01626                 alpha2, C, C, param->eps, si, param->shrinking);
01627 
01628         info("epsilon = %f\n",-si->r);
01629 
01630         for(i=0;i<l;i++)
01631                 alpha[i] = alpha2[i] - alpha2[i+l];
01632 
01633         delete[] alpha2;
01634         delete[] linear_term;
01635         delete[] y;
01636 }
01637 
01638 //
01639 // decision_function
01640 //
01641 struct decision_function
01642 {
01643         double *alpha;
01644         double rho;
01645 };
01646 
01647 static decision_function svm_train_one(
01648         const svm_problem *prob, const svm_parameter *param,
01649         double Cp, double Cn)
01650 {
01651         double *alpha = Malloc(double,prob->l);
01652         Solver::SolutionInfo si;
01653         switch(param->svm_type)
01654         {
01655                 case C_SVC:
01656                         solve_c_svc(prob,param,alpha,&si,Cp,Cn);
01657                         break;
01658                 case NU_SVC:
01659                         solve_nu_svc(prob,param,alpha,&si);
01660                         break;
01661                 case ONE_CLASS:
01662                         solve_one_class(prob,param,alpha,&si);
01663                         break;
01664                 case EPSILON_SVR:
01665                         solve_epsilon_svr(prob,param,alpha,&si);
01666                         break;
01667                 case NU_SVR:
01668                         solve_nu_svr(prob,param,alpha,&si);
01669                         break;
01670         }
01671 
01672         info("obj = %f, rho = %f\n",si.obj,si.rho);
01673 
01674         // output SVs
01675 
01676         int nSV = 0;
01677         int nBSV = 0;
01678         for(int i=0;i<prob->l;i++)
01679         {
01680                 if(fabs(alpha[i]) > 0)
01681                 {
01682                         ++nSV;
01683                         if(prob->y[i] > 0)
01684                         {
01685                                 if(fabs(alpha[i]) >= si.upper_bound_p)
01686                                         ++nBSV;
01687                         }
01688                         else
01689                         {
01690                                 if(fabs(alpha[i]) >= si.upper_bound_n)
01691                                         ++nBSV;
01692                         }
01693                 }
01694         }
01695 
01696         info("nSV = %d, nBSV = %d\n",nSV,nBSV);
01697 
01698         decision_function f;
01699         f.alpha = alpha;
01700         f.rho = si.rho;
01701         return f;
01702 }
01703 
01704 // Platt's binary SVM Probablistic Output: an improvement from Lin et al.
01705 static void sigmoid_train(
01706         int l, const double *dec_values, const double *labels, 
01707         double& A, double& B)
01708 {
01709         double prior1=0, prior0 = 0;
01710         int i;
01711 
01712         for (i=0;i<l;i++)
01713                 if (labels[i] > 0) prior1+=1;
01714                 else prior0+=1;
01715         
01716         int max_iter=100;       // Maximal number of iterations
01717         double min_step=1e-10;  // Minimal step taken in line search
01718         double sigma=1e-12;     // For numerically strict PD of Hessian
01719         double eps=1e-5;
01720         double hiTarget=(prior1+1.0)/(prior1+2.0);
01721         double loTarget=1/(prior0+2.0);
01722         double *t=Malloc(double,l);
01723         double fApB,p,q,h11,h22,h21,g1,g2,det,dA,dB,gd,stepsize;
01724         double newA,newB,newf,d1,d2;
01725         int iter;
01726         
01727         // Initial Point and Initial Fun Value
01728         A=0.0; B=log((prior0+1.0)/(prior1+1.0));
01729         double fval = 0.0;
01730 
01731         for (i=0;i<l;i++)
01732         {
01733                 if (labels[i]>0) t[i]=hiTarget;
01734                 else t[i]=loTarget;
01735                 fApB = dec_values[i]*A+B;
01736                 if (fApB>=0)
01737                         fval += t[i]*fApB + log(1+exp(-fApB));
01738                 else
01739                         fval += (t[i] - 1)*fApB +log(1+exp(fApB));
01740         }
01741         for (iter=0;iter<max_iter;iter++)
01742         {
01743                 // Update Gradient and Hessian (use H' = H + sigma I)
01744                 h11=sigma; // numerically ensures strict PD
01745                 h22=sigma;
01746                 h21=0.0;g1=0.0;g2=0.0;
01747                 for (i=0;i<l;i++)
01748                 {
01749                         fApB = dec_values[i]*A+B;
01750                         if (fApB >= 0)
01751                         {
01752                                 p=exp(-fApB)/(1.0+exp(-fApB));
01753                                 q=1.0/(1.0+exp(-fApB));
01754                         }
01755                         else
01756                         {
01757                                 p=1.0/(1.0+exp(fApB));
01758                                 q=exp(fApB)/(1.0+exp(fApB));
01759                         }
01760                         d2=p*q;
01761                         h11+=dec_values[i]*dec_values[i]*d2;
01762                         h22+=d2;
01763                         h21+=dec_values[i]*d2;
01764                         d1=t[i]-p;
01765                         g1+=dec_values[i]*d1;
01766                         g2+=d1;
01767                 }
01768 
01769                 // Stopping Criteria
01770                 if (fabs(g1)<eps && fabs(g2)<eps)
01771                         break;
01772 
01773                 // Finding Newton direction: -inv(H') * g
01774                 det=h11*h22-h21*h21;
01775                 dA=-(h22*g1 - h21 * g2) / det;
01776                 dB=-(-h21*g1+ h11 * g2) / det;
01777                 gd=g1*dA+g2*dB;
01778 
01779 
01780                 stepsize = 1;           // Line Search
01781                 while (stepsize >= min_step)
01782                 {
01783                         newA = A + stepsize * dA;
01784                         newB = B + stepsize * dB;
01785 
01786                         // New function value
01787                         newf = 0.0;
01788                         for (i=0;i<l;i++)
01789                         {
01790                                 fApB = dec_values[i]*newA+newB;
01791                                 if (fApB >= 0)
01792                                         newf += t[i]*fApB + log(1+exp(-fApB));
01793                                 else
01794                                         newf += (t[i] - 1)*fApB +log(1+exp(fApB));
01795                         }
01796                         // Check sufficient decrease
01797                         if (newf<fval+0.0001*stepsize*gd)
01798                         {
01799                                 A=newA;B=newB;fval=newf;
01800                                 break;
01801                         }
01802                         else
01803                                 stepsize = stepsize / 2.0;
01804                 }
01805 
01806                 if (stepsize < min_step)
01807                 {
01808                         info("Line search fails in two-class probability estimates\n");
01809                         break;
01810                 }
01811         }
01812 
01813         if (iter>=max_iter)
01814                 info("Reaching maximal iterations in two-class probability estimates\n");
01815         free(t);
01816 }
01817 
01818 static double sigmoid_predict(double decision_value, double A, double B)
01819 {
01820         double fApB = decision_value*A+B;
01821         // 1-p used later; avoid catastrophic cancellation
01822         if (fApB >= 0)
01823                 return exp(-fApB)/(1.0+exp(-fApB));
01824         else
01825                 return 1.0/(1+exp(fApB)) ;
01826 }
01827 
01828 // Method 2 from the multiclass_prob paper by Wu, Lin, and Weng
01829 static void multiclass_probability(int k, double **r, double *p)
01830 {
01831         int t,j;
01832         int iter = 0, max_iter=max(100,k);
01833         double **Q=Malloc(double *,k);
01834         double *Qp=Malloc(double,k);
01835         double pQp, eps=0.005/k;
01836         
01837         for (t=0;t<k;t++)
01838         {
01839                 p[t]=1.0/k;  // Valid if k = 1
01840                 Q[t]=Malloc(double,k);
01841                 Q[t][t]=0;
01842                 for (j=0;j<t;j++)
01843                 {
01844                         Q[t][t]+=r[j][t]*r[j][t];
01845                         Q[t][j]=Q[j][t];
01846                 }
01847                 for (j=t+1;j<k;j++)
01848                 {
01849                         Q[t][t]+=r[j][t]*r[j][t];
01850                         Q[t][j]=-r[j][t]*r[t][j];
01851                 }
01852         }
01853         for (iter=0;iter<max_iter;iter++)
01854         {
01855                 // stopping condition, recalculate QP,pQP for numerical accuracy
01856                 pQp=0;
01857                 for (t=0;t<k;t++)
01858                 {
01859                         Qp[t]=0;
01860                         for (j=0;j<k;j++)
01861                                 Qp[t]+=Q[t][j]*p[j];
01862                         pQp+=p[t]*Qp[t];
01863                 }
01864                 double max_error=0;
01865                 for (t=0;t<k;t++)
01866                 {
01867                         double error=fabs(Qp[t]-pQp);
01868                         if (error>max_error)
01869                                 max_error=error;
01870                 }
01871                 if (max_error<eps) break;
01872                 
01873                 for (t=0;t<k;t++)
01874                 {
01875                         double diff=(-Qp[t]+pQp)/Q[t][t];
01876                         p[t]+=diff;
01877                         pQp=(pQp+diff*(diff*Q[t][t]+2*Qp[t]))/(1+diff)/(1+diff);
01878                         for (j=0;j<k;j++)
01879                         {
01880                                 Qp[j]=(Qp[j]+diff*Q[t][j])/(1+diff);
01881                                 p[j]/=(1+diff);
01882                         }
01883                 }
01884         }
01885         if (iter>=max_iter)
01886                 info("Exceeds max_iter in multiclass_prob\n");
01887         for(t=0;t<k;t++) free(Q[t]);
01888         free(Q);
01889         free(Qp);
01890 }
01891 
01892 // Cross-validation decision values for probability estimates
01893 static void svm_binary_svc_probability(
01894         const svm_problem *prob, const svm_parameter *param,
01895         double Cp, double Cn, double& probA, double& probB)
01896 {
01897         int i;
01898         int nr_fold = 5;
01899         int *perm = Malloc(int,prob->l);
01900         double *dec_values = Malloc(double,prob->l);
01901 
01902         // random shuffle
01903         for(i=0;i<prob->l;i++) perm[i]=i;
01904         for(i=0;i<prob->l;i++)
01905         {
01906                 int j = i+rand()%(prob->l-i);
01907                 swap(perm[i],perm[j]);
01908         }
01909         for(i=0;i<nr_fold;i++)
01910         {
01911                 int begin = i*prob->l/nr_fold;
01912                 int end = (i+1)*prob->l/nr_fold;
01913                 int j,k;
01914                 struct svm_problem subprob;
01915 
01916                 subprob.l = prob->l-(end-begin);
01917                 subprob.x = Malloc(struct svm_node*,subprob.l);
01918                 subprob.y = Malloc(double,subprob.l);
01919                         
01920                 k=0;
01921                 for(j=0;j<begin;j++)
01922                 {
01923                         subprob.x[k] = prob->x[perm[j]];
01924                         subprob.y[k] = prob->y[perm[j]];
01925                         ++k;
01926                 }
01927                 for(j=end;j<prob->l;j++)
01928                 {
01929                         subprob.x[k] = prob->x[perm[j]];
01930                         subprob.y[k] = prob->y[perm[j]];
01931                         ++k;
01932                 }
01933                 int p_count=0,n_count=0;
01934                 for(j=0;j<k;j++)
01935                         if(subprob.y[j]>0)
01936                                 p_count++;
01937                         else
01938                                 n_count++;
01939 
01940                 if(p_count==0 && n_count==0)
01941                         for(j=begin;j<end;j++)
01942                                 dec_values[perm[j]] = 0;
01943                 else if(p_count > 0 && n_count == 0)
01944                         for(j=begin;j<end;j++)
01945                                 dec_values[perm[j]] = 1;
01946                 else if(p_count == 0 && n_count > 0)
01947                         for(j=begin;j<end;j++)
01948                                 dec_values[perm[j]] = -1;
01949                 else
01950                 {
01951                         svm_parameter subparam = *param;
01952                         subparam.probability=0;
01953                         subparam.C=1.0;
01954                         subparam.nr_weight=2;
01955                         subparam.weight_label = Malloc(int,2);
01956                         subparam.weight = Malloc(double,2);
01957                         subparam.weight_label[0]=+1;
01958                         subparam.weight_label[1]=-1;
01959                         subparam.weight[0]=Cp;
01960                         subparam.weight[1]=Cn;
01961                         struct svm_model *submodel = svm_train(&subprob,&subparam);
01962                         for(j=begin;j<end;j++)
01963                         {
01964                                 svm_predict_values(submodel,prob->x[perm[j]],&(dec_values[perm[j]]));
01965                                 // ensure +1 -1 order; reason not using CV subroutine
01966                                 dec_values[perm[j]] *= submodel->label[0];
01967                         }               
01968                         svm_free_and_destroy_model(&submodel);
01969                         svm_destroy_param(&subparam);
01970                 }
01971                 free(subprob.x);
01972                 free(subprob.y);
01973         }               
01974         sigmoid_train(prob->l,dec_values,prob->y,probA,probB);
01975         free(dec_values);
01976         free(perm);
01977 }
01978 
01979 // Return parameter of a Laplace distribution 
01980 static double svm_svr_probability(
01981         const svm_problem *prob, const svm_parameter *param)
01982 {
01983         int i;
01984         int nr_fold = 5;
01985         double *ymv = Malloc(double,prob->l);
01986         double mae = 0;
01987 
01988         svm_parameter newparam = *param;
01989         newparam.probability = 0;
01990         svm_cross_validation(prob,&newparam,nr_fold,ymv);
01991         for(i=0;i<prob->l;i++)
01992         {
01993                 ymv[i]=prob->y[i]-ymv[i];
01994                 mae += fabs(ymv[i]);
01995         }               
01996         mae /= prob->l;
01997         double std=sqrt(2*mae*mae);
01998         int count=0;
01999         mae=0;
02000         for(i=0;i<prob->l;i++)
02001                 if (fabs(ymv[i]) > 5*std) 
02002                         count=count+1;
02003                 else 
02004                         mae+=fabs(ymv[i]);
02005         mae /= (prob->l-count);
02006         info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma= %g\n",mae);
02007         free(ymv);
02008         return mae;
02009 }
02010 
02011 
02012 // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
02013 // perm, length l, must be allocated before calling this subroutine
02014 static void svm_group_classes(const svm_problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
02015 {
02016         int l = prob->l;
02017         int max_nr_class = 16;
02018         int nr_class = 0;
02019         int *label = Malloc(int,max_nr_class);
02020         int *count = Malloc(int,max_nr_class);
02021         int *data_label = Malloc(int,l);
02022         int i;
02023 
02024         for(i=0;i<l;i++)
02025         {
02026                 int this_label = (int)prob->y[i];
02027                 int j;
02028                 for(j=0;j<nr_class;j++)
02029                 {
02030                         if(this_label == label[j])
02031                         {
02032                                 ++count[j];
02033                                 break;
02034                         }
02035                 }
02036                 data_label[i] = j;
02037                 if(j == nr_class)
02038                 {
02039                         if(nr_class == max_nr_class)
02040                         {
02041                                 max_nr_class *= 2;
02042                                 label = (int *)realloc(label,max_nr_class*sizeof(int));
02043                                 count = (int *)realloc(count,max_nr_class*sizeof(int));
02044                         }
02045                         label[nr_class] = this_label;
02046                         count[nr_class] = 1;
02047                         ++nr_class;
02048                 }
02049         }
02050 
02051         //
02052         // Labels are ordered by their first occurrence in the training set. 
02053         // However, for two-class sets with -1/+1 labels and -1 appears first, 
02054         // we swap labels to ensure that internally the binary SVM has positive data corresponding to the +1 instances.
02055         //
02056         if (nr_class == 2 && label[0] == -1 && label[1] == 1)
02057         {
02058                 swap(label[0],label[1]);
02059                 swap(count[0],count[1]);
02060                 for(i=0;i<l;i++)
02061                 {
02062                         if(data_label[i] == 0)
02063                                 data_label[i] = 1;
02064                         else
02065                                 data_label[i] = 0;
02066                 }
02067         }
02068 
02069         int *start = Malloc(int,nr_class);
02070         start[0] = 0;
02071         for(i=1;i<nr_class;i++)
02072                 start[i] = start[i-1]+count[i-1];
02073         for(i=0;i<l;i++)
02074         {
02075                 perm[start[data_label[i]]] = i;
02076                 ++start[data_label[i]];
02077         }
02078         start[0] = 0;
02079         for(i=1;i<nr_class;i++)
02080                 start[i] = start[i-1]+count[i-1];
02081 
02082         *nr_class_ret = nr_class;
02083         *label_ret = label;
02084         *start_ret = start;
02085         *count_ret = count;
02086         free(data_label);
02087 }
02088 
02089 //
02090 // Interface functions
02091 //
02092 svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
02093 {
02094         svm_model *model = Malloc(svm_model,1);
02095         model->param = *param;
02096         model->free_sv = 0;     // XXX
02097 
02098         if(param->svm_type == ONE_CLASS ||
02099            param->svm_type == EPSILON_SVR ||
02100            param->svm_type == NU_SVR)
02101         {
02102                 // regression or one-class-svm
02103                 model->nr_class = 2;
02104                 model->label = NULL;
02105                 model->nSV = NULL;
02106                 model->probA = NULL; model->probB = NULL;
02107                 model->sv_coef = Malloc(double *,1);
02108 
02109                 if(param->probability && 
02110                    (param->svm_type == EPSILON_SVR ||
02111                     param->svm_type == NU_SVR))
02112                 {
02113                         model->probA = Malloc(double,1);
02114                         model->probA[0] = svm_svr_probability(prob,param);
02115                 }
02116 
02117                 decision_function f = svm_train_one(prob,param,0,0);
02118                 model->rho = Malloc(double,1);
02119                 model->rho[0] = f.rho;
02120 
02121                 int nSV = 0;
02122                 int i;
02123                 for(i=0;i<prob->l;i++)
02124                         if(fabs(f.alpha[i]) > 0) ++nSV;
02125                 model->l = nSV;
02126                 model->SV = Malloc(svm_node *,nSV);
02127                 model->sv_coef[0] = Malloc(double,nSV);
02128                 model->sv_indices = Malloc(int,nSV);
02129                 int j = 0;
02130                 for(i=0;i<prob->l;i++)
02131                         if(fabs(f.alpha[i]) > 0)
02132                         {
02133                                 model->SV[j] = prob->x[i];
02134                                 model->sv_coef[0][j] = f.alpha[i];
02135                                 model->sv_indices[j] = i+1;
02136                                 ++j;
02137                         }               
02138 
02139                 free(f.alpha);
02140         }
02141         else
02142         {
02143                 // classification
02144                 int l = prob->l;
02145                 int nr_class;
02146                 int *label = NULL;
02147                 int *start = NULL;
02148                 int *count = NULL;
02149                 int *perm = Malloc(int,l);
02150 
02151                 // group training data of the same class
02152                 svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
02153                 if(nr_class == 1) 
02154                         info("WARNING: training data in only one class. See README for details.\n");
02155                 
02156                 svm_node **x = Malloc(svm_node *,l);
02157                 int i;
02158                 for(i=0;i<l;i++)
02159                         x[i] = prob->x[perm[i]];
02160 
02161                 // calculate weighted C
02162 
02163                 double *weighted_C = Malloc(double, nr_class);
02164                 for(i=0;i<nr_class;i++)
02165                         weighted_C[i] = param->C;
02166                 for(i=0;i<param->nr_weight;i++)
02167                 {       
02168                         int j;
02169                         for(j=0;j<nr_class;j++)
02170                                 if(param->weight_label[i] == label[j])
02171                                         break;
02172                         if(j == nr_class)
02173                                 fprintf(stderr,"WARNING: class label %d specified in weight is not found\n", param->weight_label[i]);
02174                         else
02175                                 weighted_C[j] *= param->weight[i];
02176                 }
02177 
02178                 // train k*(k-1)/2 models
02179                 
02180                 bool *nonzero = Malloc(bool,l);
02181                 for(i=0;i<l;i++)
02182                         nonzero[i] = false;
02183                 decision_function *f = Malloc(decision_function,nr_class*(nr_class-1)/2);
02184 
02185                 double *probA=NULL,*probB=NULL;
02186                 if (param->probability)
02187                 {
02188                         probA=Malloc(double,nr_class*(nr_class-1)/2);
02189                         probB=Malloc(double,nr_class*(nr_class-1)/2);
02190                 }
02191 
02192                 int p = 0;
02193                 for(i=0;i<nr_class;i++)
02194                         for(int j=i+1;j<nr_class;j++)
02195                         {
02196                                 svm_problem sub_prob;
02197                                 int si = start[i], sj = start[j];
02198                                 int ci = count[i], cj = count[j];
02199                                 sub_prob.l = ci+cj;
02200                                 sub_prob.x = Malloc(svm_node *,sub_prob.l);
02201                                 sub_prob.y = Malloc(double,sub_prob.l);
02202                                 int k;
02203                                 for(k=0;k<ci;k++)
02204                                 {
02205                                         sub_prob.x[k] = x[si+k];
02206                                         sub_prob.y[k] = +1;
02207                                 }
02208                                 for(k=0;k<cj;k++)
02209                                 {
02210                                         sub_prob.x[ci+k] = x[sj+k];
02211                                         sub_prob.y[ci+k] = -1;
02212                                 }
02213 
02214                                 if(param->probability)
02215                                         svm_binary_svc_probability(&sub_prob,param,weighted_C[i],weighted_C[j],probA[p],probB[p]);
02216 
02217                                 f[p] = svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j]);
02218                                 for(k=0;k<ci;k++)
02219                                         if(!nonzero[si+k] && fabs(f[p].alpha[k]) > 0)
02220                                                 nonzero[si+k] = true;
02221                                 for(k=0;k<cj;k++)
02222                                         if(!nonzero[sj+k] && fabs(f[p].alpha[ci+k]) > 0)
02223                                                 nonzero[sj+k] = true;
02224                                 free(sub_prob.x);
02225                                 free(sub_prob.y);
02226                                 ++p;
02227                         }
02228 
02229                 // build output
02230 
02231                 model->nr_class = nr_class;
02232                 
02233                 model->label = Malloc(int,nr_class);
02234                 for(i=0;i<nr_class;i++)
02235                         model->label[i] = label[i];
02236                 
02237                 model->rho = Malloc(double,nr_class*(nr_class-1)/2);
02238                 for(i=0;i<nr_class*(nr_class-1)/2;i++)
02239                         model->rho[i] = f[i].rho;
02240 
02241                 if(param->probability)
02242                 {
02243                         model->probA = Malloc(double,nr_class*(nr_class-1)/2);
02244                         model->probB = Malloc(double,nr_class*(nr_class-1)/2);
02245                         for(i=0;i<nr_class*(nr_class-1)/2;i++)
02246                         {
02247                                 model->probA[i] = probA[i];
02248                                 model->probB[i] = probB[i];
02249                         }
02250                 }
02251                 else
02252                 {
02253                         model->probA=NULL;
02254                         model->probB=NULL;
02255                 }
02256 
02257                 int total_sv = 0;
02258                 int *nz_count = Malloc(int,nr_class);
02259                 model->nSV = Malloc(int,nr_class);
02260                 for(i=0;i<nr_class;i++)
02261                 {
02262                         int nSV = 0;
02263                         for(int j=0;j<count[i];j++)
02264                                 if(nonzero[start[i]+j])
02265                                 {       
02266                                         ++nSV;
02267                                         ++total_sv;
02268                                 }
02269                         model->nSV[i] = nSV;
02270                         nz_count[i] = nSV;
02271                 }
02272                 
02273                 info("Total nSV = %d\n",total_sv);
02274 
02275                 model->l = total_sv;
02276                 model->SV = Malloc(svm_node *,total_sv);
02277                 model->sv_indices = Malloc(int,total_sv);
02278                 p = 0;
02279                 for(i=0;i<l;i++)
02280                         if(nonzero[i])
02281                         {
02282                                 model->SV[p] = x[i];
02283                                 model->sv_indices[p++] = perm[i] + 1;
02284                         }
02285 
02286                 int *nz_start = Malloc(int,nr_class);
02287                 nz_start[0] = 0;
02288                 for(i=1;i<nr_class;i++)
02289                         nz_start[i] = nz_start[i-1]+nz_count[i-1];
02290 
02291                 model->sv_coef = Malloc(double *,nr_class-1);
02292                 for(i=0;i<nr_class-1;i++)
02293                         model->sv_coef[i] = Malloc(double,total_sv);
02294 
02295                 p = 0;
02296                 for(i=0;i<nr_class;i++)
02297                         for(int j=i+1;j<nr_class;j++)
02298                         {
02299                                 // classifier (i,j): coefficients with
02300                                 // i are in sv_coef[j-1][nz_start[i]...],
02301                                 // j are in sv_coef[i][nz_start[j]...]
02302 
02303                                 int si = start[i];
02304                                 int sj = start[j];
02305                                 int ci = count[i];
02306                                 int cj = count[j];
02307                                 
02308                                 int q = nz_start[i];
02309                                 int k;
02310                                 for(k=0;k<ci;k++)
02311                                         if(nonzero[si+k])
02312                                                 model->sv_coef[j-1][q++] = f[p].alpha[k];
02313                                 q = nz_start[j];
02314                                 for(k=0;k<cj;k++)
02315                                         if(nonzero[sj+k])
02316                                                 model->sv_coef[i][q++] = f[p].alpha[ci+k];
02317                                 ++p;
02318                         }
02319                 
02320                 free(label);
02321                 free(probA);
02322                 free(probB);
02323                 free(count);
02324                 free(perm);
02325                 free(start);
02326                 free(x);
02327                 free(weighted_C);
02328                 free(nonzero);
02329                 for(i=0;i<nr_class*(nr_class-1)/2;i++)
02330                         free(f[i].alpha);
02331                 free(f);
02332                 free(nz_count);
02333                 free(nz_start);
02334         }
02335         return model;
02336 }
02337 
02338 // Stratified cross validation
02339 void svm_cross_validation(const svm_problem *prob, const svm_parameter *param, int nr_fold, double *target)
02340 {
02341         int i;
02342         int *fold_start;
02343         int l = prob->l;
02344         int *perm = Malloc(int,l);
02345         int nr_class;
02346         if (nr_fold > l)
02347         {
02348                 nr_fold = l;
02349                 fprintf(stderr,"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n");
02350         }
02351         fold_start = Malloc(int,nr_fold+1);
02352         // stratified cv may not give leave-one-out rate
02353         // Each class to l folds -> some folds may have zero elements
02354         if((param->svm_type == C_SVC ||
02355             param->svm_type == NU_SVC) && nr_fold < l)
02356         {
02357                 int *start = NULL;
02358                 int *label = NULL;
02359                 int *count = NULL;
02360                 svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
02361 
02362                 // random shuffle and then data grouped by fold using the array perm
02363                 int *fold_count = Malloc(int,nr_fold);
02364                 int c;
02365                 int *index = Malloc(int,l);
02366                 for(i=0;i<l;i++)
02367                         index[i]=perm[i];
02368                 for (c=0; c<nr_class; c++) 
02369                         for(i=0;i<count[c];i++)
02370                         {
02371                                 int j = i+rand()%(count[c]-i);
02372                                 swap(index[start[c]+j],index[start[c]+i]);
02373                         }
02374                 for(i=0;i<nr_fold;i++)
02375                 {
02376                         fold_count[i] = 0;
02377                         for (c=0; c<nr_class;c++)
02378                                 fold_count[i]+=(i+1)*count[c]/nr_fold-i*count[c]/nr_fold;
02379                 }
02380                 fold_start[0]=0;
02381                 for (i=1;i<=nr_fold;i++)
02382                         fold_start[i] = fold_start[i-1]+fold_count[i-1];
02383                 for (c=0; c<nr_class;c++)
02384                         for(i=0;i<nr_fold;i++)
02385                         {
02386                                 int begin = start[c]+i*count[c]/nr_fold;
02387                                 int end = start[c]+(i+1)*count[c]/nr_fold;
02388                                 for(int j=begin;j<end;j++)
02389                                 {
02390                                         perm[fold_start[i]] = index[j];
02391                                         fold_start[i]++;
02392                                 }
02393                         }
02394                 fold_start[0]=0;
02395                 for (i=1;i<=nr_fold;i++)
02396                         fold_start[i] = fold_start[i-1]+fold_count[i-1];
02397                 free(start);
02398                 free(label);
02399                 free(count);
02400                 free(index);
02401                 free(fold_count);
02402         }
02403         else
02404         {
02405                 for(i=0;i<l;i++) perm[i]=i;
02406                 for(i=0;i<l;i++)
02407                 {
02408                         int j = i+rand()%(l-i);
02409                         swap(perm[i],perm[j]);
02410                 }
02411                 for(i=0;i<=nr_fold;i++)
02412                         fold_start[i]=i*l/nr_fold;
02413         }
02414 
02415         for(i=0;i<nr_fold;i++)
02416         {
02417                 int begin = fold_start[i];
02418                 int end = fold_start[i+1];
02419                 int j,k;
02420                 struct svm_problem subprob;
02421 
02422                 subprob.l = l-(end-begin);
02423                 subprob.x = Malloc(struct svm_node*,subprob.l);
02424                 subprob.y = Malloc(double,subprob.l);
02425                         
02426                 k=0;
02427                 for(j=0;j<begin;j++)
02428                 {
02429                         subprob.x[k] = prob->x[perm[j]];
02430                         subprob.y[k] = prob->y[perm[j]];
02431                         ++k;
02432                 }
02433                 for(j=end;j<l;j++)
02434                 {
02435                         subprob.x[k] = prob->x[perm[j]];
02436                         subprob.y[k] = prob->y[perm[j]];
02437                         ++k;
02438                 }
02439                 struct svm_model *submodel = svm_train(&subprob,param);
02440                 if(param->probability && 
02441                    (param->svm_type == C_SVC || param->svm_type == NU_SVC))
02442                 {
02443                         double *prob_estimates=Malloc(double,svm_get_nr_class(submodel));
02444                         for(j=begin;j<end;j++)
02445                                 target[perm[j]] = svm_predict_probability(submodel,prob->x[perm[j]],prob_estimates);
02446                         free(prob_estimates);
02447                 }
02448                 else
02449                         for(j=begin;j<end;j++)
02450                                 target[perm[j]] = svm_predict(submodel,prob->x[perm[j]]);
02451                 svm_free_and_destroy_model(&submodel);
02452                 free(subprob.x);
02453                 free(subprob.y);
02454         }               
02455         free(fold_start);
02456         free(perm);
02457 }
02458 
02459 
02460 int svm_get_svm_type(const svm_model *model)
02461 {
02462         return model->param.svm_type;
02463 }
02464 
02465 int svm_get_nr_class(const svm_model *model)
02466 {
02467         return model->nr_class;
02468 }
02469 
02470 void svm_get_labels(const svm_model *model, int* label)
02471 {
02472         if (model->label != NULL)
02473                 for(int i=0;i<model->nr_class;i++)
02474                         label[i] = model->label[i];
02475 }
02476 
02477 void svm_get_sv_indices(const svm_model *model, int* indices)
02478 {
02479         if (model->sv_indices != NULL)
02480                 for(int i=0;i<model->l;i++)
02481                         indices[i] = model->sv_indices[i];
02482 }
02483 
02484 int svm_get_nr_sv(const svm_model *model)
02485 {
02486         return model->l;
02487 }
02488 
02489 double svm_get_svr_probability(const svm_model *model)
02490 {
02491         if ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
02492             model->probA!=NULL)
02493                 return model->probA[0];
02494         else
02495         {
02496                 fprintf(stderr,"Model doesn't contain information for SVR probability inference\n");
02497                 return 0;
02498         }
02499 }
02500 
02501 double svm_predict_values(const svm_model *model, const svm_node *x, double* dec_values)
02502 {
02503         int i;
02504         if(model->param.svm_type == ONE_CLASS ||
02505            model->param.svm_type == EPSILON_SVR ||
02506            model->param.svm_type == NU_SVR)
02507         {
02508                 double *sv_coef = model->sv_coef[0];
02509                 double sum = 0;
02510                 for(i=0;i<model->l;i++)
02511                         sum += sv_coef[i] * Kernel::k_function(x,model->SV[i],model->param);
02512                 sum -= model->rho[0];
02513                 *dec_values = sum;
02514 
02515                 if(model->param.svm_type == ONE_CLASS)
02516                         return (sum>0)?1:-1;
02517                 else
02518                         return sum;
02519         }
02520         else
02521         {
02522                 int nr_class = model->nr_class;
02523                 int l = model->l;
02524                 
02525                 double *kvalue = Malloc(double,l);
02526                 for(i=0;i<l;i++)
02527                         kvalue[i] = Kernel::k_function(x,model->SV[i],model->param);
02528 
02529                 int *start = Malloc(int,nr_class);
02530                 start[0] = 0;
02531                 for(i=1;i<nr_class;i++)
02532                         start[i] = start[i-1]+model->nSV[i-1];
02533 
02534                 int *vote = Malloc(int,nr_class);
02535                 for(i=0;i<nr_class;i++)
02536                         vote[i] = 0;
02537 
02538                 int p=0;
02539                 for(i=0;i<nr_class;i++)
02540                         for(int j=i+1;j<nr_class;j++)
02541                         {
02542                                 double sum = 0;
02543                                 int si = start[i];
02544                                 int sj = start[j];
02545                                 int ci = model->nSV[i];
02546                                 int cj = model->nSV[j];
02547                                 
02548                                 int k;
02549                                 double *coef1 = model->sv_coef[j-1];
02550                                 double *coef2 = model->sv_coef[i];
02551                                 for(k=0;k<ci;k++)
02552                                         sum += coef1[si+k] * kvalue[si+k];
02553                                 for(k=0;k<cj;k++)
02554                                         sum += coef2[sj+k] * kvalue[sj+k];
02555                                 sum -= model->rho[p];
02556                                 dec_values[p] = sum;
02557 
02558                                 if(dec_values[p] > 0)
02559                                         ++vote[i];
02560                                 else
02561                                         ++vote[j];
02562                                 p++;
02563                         }
02564 
02565                 int vote_max_idx = 0;
02566                 for(i=1;i<nr_class;i++)
02567                         if(vote[i] > vote[vote_max_idx])
02568                                 vote_max_idx = i;
02569 
02570                 free(kvalue);
02571                 free(start);
02572                 free(vote);
02573                 return model->label[vote_max_idx];
02574         }
02575 }
02576 
02577 double svm_predict(const svm_model *model, const svm_node *x)
02578 {
02579         int nr_class = model->nr_class;
02580         double *dec_values;
02581         if(model->param.svm_type == ONE_CLASS ||
02582            model->param.svm_type == EPSILON_SVR ||
02583            model->param.svm_type == NU_SVR)
02584                 dec_values = Malloc(double, 1);
02585         else 
02586                 dec_values = Malloc(double, nr_class*(nr_class-1)/2);
02587         double pred_result = svm_predict_values(model, x, dec_values);
02588         free(dec_values);
02589         return pred_result;
02590 }
02591 
02592 double svm_predict_probability(
02593         const svm_model *model, const svm_node *x, double *prob_estimates)
02594 {
02595         if ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
02596             model->probA!=NULL && model->probB!=NULL)
02597         {
02598                 int i;
02599                 int nr_class = model->nr_class;
02600                 double *dec_values = Malloc(double, nr_class*(nr_class-1)/2);
02601                 svm_predict_values(model, x, dec_values);
02602 
02603                 double min_prob=1e-7;
02604                 double **pairwise_prob=Malloc(double *,nr_class);
02605                 for(i=0;i<nr_class;i++)
02606                         pairwise_prob[i]=Malloc(double,nr_class);
02607                 int k=0;
02608                 for(i=0;i<nr_class;i++)
02609                         for(int j=i+1;j<nr_class;j++)
02610                         {
02611                                 pairwise_prob[i][j]=min(max(sigmoid_predict(dec_values[k],model->probA[k],model->probB[k]),min_prob),1-min_prob);
02612                                 pairwise_prob[j][i]=1-pairwise_prob[i][j];
02613                                 k++;
02614                         }
02615                 multiclass_probability(nr_class,pairwise_prob,prob_estimates);
02616 
02617                 int prob_max_idx = 0;
02618                 for(i=1;i<nr_class;i++)
02619                         if(prob_estimates[i] > prob_estimates[prob_max_idx])
02620                                 prob_max_idx = i;
02621                 for(i=0;i<nr_class;i++)
02622                         free(pairwise_prob[i]);
02623                 free(dec_values);
02624                 free(pairwise_prob);
02625                 return model->label[prob_max_idx];
02626         }
02627         else 
02628                 return svm_predict(model, x);
02629 }
02630 
02631 static const char *svm_type_table[] =
02632 {
02633         "c_svc","nu_svc","one_class","epsilon_svr","nu_svr",NULL
02634 };
02635 
02636 static const char *kernel_type_table[]=
02637 {
02638         "linear","polynomial","rbf","sigmoid","precomputed",NULL
02639 };
02640 
02641 int svm_save_model(const char *model_file_name, const svm_model *model)
02642 {
02643         FILE *fp = fopen(model_file_name,"w");
02644         if(fp==NULL) return -1;
02645 
02646         char *old_locale = setlocale(LC_ALL, NULL);
02647         if (old_locale) {
02648                 old_locale = strdup(old_locale);
02649         }
02650         setlocale(LC_ALL, "C");
02651 
02652         const svm_parameter& param = model->param;
02653 
02654         fprintf(fp,"svm_type %s\n", svm_type_table[param.svm_type]);
02655         fprintf(fp,"kernel_type %s\n", kernel_type_table[param.kernel_type]);
02656 
02657         if(param.kernel_type == POLY)
02658                 fprintf(fp,"degree %d\n", param.degree);
02659 
02660         if(param.kernel_type == POLY || param.kernel_type == RBF || param.kernel_type == SIGMOID)
02661                 fprintf(fp,"gamma %g\n", param.gamma);
02662 
02663         if(param.kernel_type == POLY || param.kernel_type == SIGMOID)
02664                 fprintf(fp,"coef0 %g\n", param.coef0);
02665 
02666         int nr_class = model->nr_class;
02667         int l = model->l;
02668         fprintf(fp, "nr_class %d\n", nr_class);
02669         fprintf(fp, "total_sv %d\n",l);
02670         
02671         {
02672                 fprintf(fp, "rho");
02673                 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
02674                         fprintf(fp," %g",model->rho[i]);
02675                 fprintf(fp, "\n");
02676         }
02677         
02678         if(model->label)
02679         {
02680                 fprintf(fp, "label");
02681                 for(int i=0;i<nr_class;i++)
02682                         fprintf(fp," %d",model->label[i]);
02683                 fprintf(fp, "\n");
02684         }
02685 
02686         if(model->probA) // regression has probA only
02687         {
02688                 fprintf(fp, "probA");
02689                 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
02690                         fprintf(fp," %g",model->probA[i]);
02691                 fprintf(fp, "\n");
02692         }
02693         if(model->probB)
02694         {
02695                 fprintf(fp, "probB");
02696                 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
02697                         fprintf(fp," %g",model->probB[i]);
02698                 fprintf(fp, "\n");
02699         }
02700 
02701         if(model->nSV)
02702         {
02703                 fprintf(fp, "nr_sv");
02704                 for(int i=0;i<nr_class;i++)
02705                         fprintf(fp," %d",model->nSV[i]);
02706                 fprintf(fp, "\n");
02707         }
02708 
02709         fprintf(fp, "SV\n");
02710         const double * const *sv_coef = model->sv_coef;
02711         const svm_node * const *SV = model->SV;
02712 
02713         for(int i=0;i<l;i++)
02714         {
02715                 for(int j=0;j<nr_class-1;j++)
02716                         fprintf(fp, "%.16g ",sv_coef[j][i]);
02717 
02718                 const svm_node *p = SV[i];
02719 
02720                 if(param.kernel_type == PRECOMPUTED)
02721                         fprintf(fp,"0:%d ",(int)(p->value));
02722                 else
02723                         while(p->index != -1)
02724                         {
02725                                 fprintf(fp,"%d:%.8g ",p->index,p->value);
02726                                 p++;
02727                         }
02728                 fprintf(fp, "\n");
02729         }
02730 
02731         setlocale(LC_ALL, old_locale);
02732         free(old_locale);
02733 
02734         if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
02735         else return 0;
02736 }
02737 
02738 static char *line = NULL;
02739 static int max_line_len;
02740 
02741 static char* readline(FILE *input)
02742 {
02743         int len;
02744 
02745         if(fgets(line,max_line_len,input) == NULL)
02746                 return NULL;
02747 
02748         while(strrchr(line,'\n') == NULL)
02749         {
02750                 max_line_len *= 2;
02751                 line = (char *) realloc(line,max_line_len);
02752                 len = (int) strlen(line);
02753                 if(fgets(line+len,max_line_len-len,input) == NULL)
02754                         break;
02755         }
02756         return line;
02757 }
02758 
02759 //
02760 // FSCANF helps to handle fscanf failures.
02761 // Its do-while block avoids the ambiguity when
02762 // if (...)
02763 //    FSCANF();
02764 // is used
02765 //
02766 #define FSCANF(_stream, _format, _var) do{ if (fscanf(_stream, _format, _var) != 1) return false; }while(0)
02767 bool read_model_header(FILE *fp, svm_model* model)
02768 {
02769         svm_parameter& param = model->param;
02770         char cmd[81];
02771         while(1)
02772         {
02773                 FSCANF(fp,"%80s",cmd);
02774 
02775                 if(strcmp(cmd,"svm_type")==0)
02776                 {
02777                         FSCANF(fp,"%80s",cmd);
02778                         int i;
02779                         for(i=0;svm_type_table[i];i++)
02780                         {
02781                                 if(strcmp(svm_type_table[i],cmd)==0)
02782                                 {
02783                                         param.svm_type=i;
02784                                         break;
02785                                 }
02786                         }
02787                         if(svm_type_table[i] == NULL)
02788                         {
02789                                 fprintf(stderr,"unknown svm type.\n");
02790                                 return false;
02791                         }
02792                 }
02793                 else if(strcmp(cmd,"kernel_type")==0)
02794                 {               
02795                         FSCANF(fp,"%80s",cmd);
02796                         int i;
02797                         for(i=0;kernel_type_table[i];i++)
02798                         {
02799                                 if(strcmp(kernel_type_table[i],cmd)==0)
02800                                 {
02801                                         param.kernel_type=i;
02802                                         break;
02803                                 }
02804                         }
02805                         if(kernel_type_table[i] == NULL)
02806                         {
02807                                 fprintf(stderr,"unknown kernel function.\n");   
02808                                 return false;
02809                         }
02810                 }
02811                 else if(strcmp(cmd,"degree")==0)
02812                         FSCANF(fp,"%d",&param.degree);
02813                 else if(strcmp(cmd,"gamma")==0)
02814                         FSCANF(fp,"%lf",&param.gamma);
02815                 else if(strcmp(cmd,"coef0")==0)
02816                         FSCANF(fp,"%lf",&param.coef0);
02817                 else if(strcmp(cmd,"nr_class")==0)
02818                         FSCANF(fp,"%d",&model->nr_class);
02819                 else if(strcmp(cmd,"total_sv")==0)
02820                         FSCANF(fp,"%d",&model->l);
02821                 else if(strcmp(cmd,"rho")==0)
02822                 {
02823                         int n = model->nr_class * (model->nr_class-1)/2;
02824                         model->rho = Malloc(double,n);
02825                         for(int i=0;i<n;i++)
02826                                 FSCANF(fp,"%lf",&model->rho[i]);
02827                 }
02828                 else if(strcmp(cmd,"label")==0)
02829                 {
02830                         int n = model->nr_class;
02831                         model->label = Malloc(int,n);
02832                         for(int i=0;i<n;i++)
02833                                 FSCANF(fp,"%d",&model->label[i]);
02834                 }
02835                 else if(strcmp(cmd,"probA")==0)
02836                 {
02837                         int n = model->nr_class * (model->nr_class-1)/2;
02838                         model->probA = Malloc(double,n);
02839                         for(int i=0;i<n;i++)
02840                                 FSCANF(fp,"%lf",&model->probA[i]);
02841                 }
02842                 else if(strcmp(cmd,"probB")==0)
02843                 {
02844                         int n = model->nr_class * (model->nr_class-1)/2;
02845                         model->probB = Malloc(double,n);
02846                         for(int i=0;i<n;i++)
02847                                 FSCANF(fp,"%lf",&model->probB[i]);
02848                 }
02849                 else if(strcmp(cmd,"nr_sv")==0)
02850                 {
02851                         int n = model->nr_class;
02852                         model->nSV = Malloc(int,n);
02853                         for(int i=0;i<n;i++)
02854                                 FSCANF(fp,"%d",&model->nSV[i]);
02855                 }
02856                 else if(strcmp(cmd,"SV")==0)
02857                 {
02858                         while(1)
02859                         {
02860                                 int c = getc(fp);
02861                                 if(c==EOF || c=='\n') break;
02862                         }
02863                         break;
02864                 }
02865                 else
02866                 {
02867                         fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
02868                         return false;
02869                 }
02870         }
02871 
02872         return true;
02873 
02874 }
02875 
02876 svm_model *svm_load_model(const char *model_file_name)
02877 {
02878         FILE *fp = fopen(model_file_name,"rb");
02879         if(fp==NULL) return NULL;
02880 
02881         char *old_locale = setlocale(LC_ALL, NULL);
02882         if (old_locale) {
02883                 old_locale = strdup(old_locale);
02884         }
02885         setlocale(LC_ALL, "C");
02886 
02887         // read parameters
02888 
02889         svm_model *model = Malloc(svm_model,1);
02890         model->rho = NULL;
02891         model->probA = NULL;
02892         model->probB = NULL;
02893         model->sv_indices = NULL;
02894         model->label = NULL;
02895         model->nSV = NULL;
02896         
02897         // read header
02898         if (!read_model_header(fp, model))
02899         {
02900                 fprintf(stderr, "ERROR: fscanf failed to read model\n");
02901                 setlocale(LC_ALL, old_locale);
02902                 free(old_locale);
02903                 free(model->rho);
02904                 free(model->label);
02905                 free(model->nSV);
02906                 free(model);
02907                 return NULL;
02908         }
02909         
02910         // read sv_coef and SV
02911 
02912         int elements = 0;
02913         long pos = ftell(fp);
02914 
02915         max_line_len = 1024;
02916         line = Malloc(char,max_line_len);
02917         char *p,*endptr,*idx,*val;
02918 
02919         while(readline(fp)!=NULL)
02920         {
02921                 p = strtok(line,":");
02922                 while(1)
02923                 {
02924                         p = strtok(NULL,":");
02925                         if(p == NULL)
02926                                 break;
02927                         ++elements;
02928                 }
02929         }
02930         elements += model->l;
02931 
02932         fseek(fp,pos,SEEK_SET);
02933 
02934         int m = model->nr_class - 1;
02935         int l = model->l;
02936         model->sv_coef = Malloc(double *,m);
02937         int i;
02938         for(i=0;i<m;i++)
02939                 model->sv_coef[i] = Malloc(double,l);
02940         model->SV = Malloc(svm_node*,l);
02941         svm_node *x_space = NULL;
02942         if(l>0) x_space = Malloc(svm_node,elements);
02943 
02944         int j=0;
02945         for(i=0;i<l;i++)
02946         {
02947                 readline(fp);
02948                 model->SV[i] = &x_space[j];
02949 
02950                 p = strtok(line, " \t");
02951                 model->sv_coef[0][i] = strtod(p,&endptr);
02952                 for(int k=1;k<m;k++)
02953                 {
02954                         p = strtok(NULL, " \t");
02955                         model->sv_coef[k][i] = strtod(p,&endptr);
02956                 }
02957 
02958                 while(1)
02959                 {
02960                         idx = strtok(NULL, ":");
02961                         val = strtok(NULL, " \t");
02962 
02963                         if(val == NULL)
02964                                 break;
02965                         x_space[j].index = (int) strtol(idx,&endptr,10);
02966                         x_space[j].value = strtod(val,&endptr);
02967 
02968                         ++j;
02969                 }
02970                 x_space[j++].index = -1;
02971         }
02972         free(line);
02973 
02974         setlocale(LC_ALL, old_locale);
02975         free(old_locale);
02976 
02977         if (ferror(fp) != 0 || fclose(fp) != 0)
02978                 return NULL;
02979 
02980         model->free_sv = 1;     // XXX
02981         return model;
02982 }
02983 
02984 void svm_free_model_content(svm_model* model_ptr)
02985 {
02986         if(model_ptr->free_sv && model_ptr->l > 0 && model_ptr->SV != NULL)
02987                 free((void *)(model_ptr->SV[0]));
02988         if(model_ptr->sv_coef)
02989         {
02990                 for(int i=0;i<model_ptr->nr_class-1;i++)
02991                         free(model_ptr->sv_coef[i]);
02992         }
02993 
02994         free(model_ptr->SV);
02995         model_ptr->SV = NULL;
02996 
02997         free(model_ptr->sv_coef);
02998         model_ptr->sv_coef = NULL;
02999 
03000         free(model_ptr->rho);
03001         model_ptr->rho = NULL;
03002 
03003         free(model_ptr->label);
03004         model_ptr->label= NULL;
03005 
03006         free(model_ptr->probA);
03007         model_ptr->probA = NULL;
03008 
03009         free(model_ptr->probB);
03010         model_ptr->probB= NULL;
03011 
03012         free(model_ptr->sv_indices);
03013         model_ptr->sv_indices = NULL;
03014 
03015         free(model_ptr->nSV);
03016         model_ptr->nSV = NULL;
03017 }
03018 
03019 void svm_free_and_destroy_model(svm_model** model_ptr_ptr)
03020 {
03021         if(model_ptr_ptr != NULL && *model_ptr_ptr != NULL)
03022         {
03023                 svm_free_model_content(*model_ptr_ptr);
03024                 free(*model_ptr_ptr);
03025                 *model_ptr_ptr = NULL;
03026         }
03027 }
03028 
03029 void svm_destroy_param(svm_parameter* param)
03030 {
03031         free(param->weight_label);
03032         free(param->weight);
03033 }
03034 
03035 const char *svm_check_parameter(const svm_problem *prob, const svm_parameter *param)
03036 {
03037         // svm_type
03038 
03039         int svm_type = param->svm_type;
03040         if(svm_type != C_SVC &&
03041            svm_type != NU_SVC &&
03042            svm_type != ONE_CLASS &&
03043            svm_type != EPSILON_SVR &&
03044            svm_type != NU_SVR)
03045                 return "unknown svm type";
03046         
03047         // kernel_type, degree
03048         
03049         int kernel_type = param->kernel_type;
03050         if(kernel_type != LINEAR &&
03051            kernel_type != POLY &&
03052            kernel_type != RBF &&
03053            kernel_type != SIGMOID &&
03054            kernel_type != PRECOMPUTED)
03055                 return "unknown kernel type";
03056 
03057         if(param->gamma < 0)
03058                 return "gamma < 0";
03059 
03060         if(param->degree < 0)
03061                 return "degree of polynomial kernel < 0";
03062 
03063         // cache_size,eps,C,nu,p,shrinking
03064 
03065         if(param->cache_size <= 0)
03066                 return "cache_size <= 0";
03067 
03068         if(param->eps <= 0)
03069                 return "eps <= 0";
03070 
03071         if(svm_type == C_SVC ||
03072            svm_type == EPSILON_SVR ||
03073            svm_type == NU_SVR)
03074                 if(param->C <= 0)
03075                         return "C <= 0";
03076 
03077         if(svm_type == NU_SVC ||
03078            svm_type == ONE_CLASS ||
03079            svm_type == NU_SVR)
03080                 if(param->nu <= 0 || param->nu > 1)
03081                         return "nu <= 0 or nu > 1";
03082 
03083         if(svm_type == EPSILON_SVR)
03084                 if(param->p < 0)
03085                         return "p < 0";
03086 
03087         if(param->shrinking != 0 &&
03088            param->shrinking != 1)
03089                 return "shrinking != 0 and shrinking != 1";
03090 
03091         if(param->probability != 0 &&
03092            param->probability != 1)
03093                 return "probability != 0 and probability != 1";
03094 
03095         if(param->probability == 1 &&
03096            svm_type == ONE_CLASS)
03097                 return "one-class SVM probability output not supported yet";
03098 
03099 
03100         // check whether nu-svc is feasible
03101         
03102         if(svm_type == NU_SVC)
03103         {
03104                 int l = prob->l;
03105                 int max_nr_class = 16;
03106                 int nr_class = 0;
03107                 int *label = Malloc(int,max_nr_class);
03108                 int *count = Malloc(int,max_nr_class);
03109 
03110                 int i;
03111                 for(i=0;i<l;i++)
03112                 {
03113                         int this_label = (int)prob->y[i];
03114                         int j;
03115                         for(j=0;j<nr_class;j++)
03116                                 if(this_label == label[j])
03117                                 {
03118                                         ++count[j];
03119                                         break;
03120                                 }
03121                         if(j == nr_class)
03122                         {
03123                                 if(nr_class == max_nr_class)
03124                                 {
03125                                         max_nr_class *= 2;
03126                                         label = (int *)realloc(label,max_nr_class*sizeof(int));
03127                                         count = (int *)realloc(count,max_nr_class*sizeof(int));
03128                                 }
03129                                 label[nr_class] = this_label;
03130                                 count[nr_class] = 1;
03131                                 ++nr_class;
03132                         }
03133                 }
03134         
03135                 for(i=0;i<nr_class;i++)
03136                 {
03137                         int n1 = count[i];
03138                         for(int j=i+1;j<nr_class;j++)
03139                         {
03140                                 int n2 = count[j];
03141                                 if(param->nu*(n1+n2)/2 > min(n1,n2))
03142                                 {
03143                                         free(label);
03144                                         free(count);
03145                                         return "specified nu is infeasible";
03146                                 }
03147                         }
03148                 }
03149                 free(label);
03150                 free(count);
03151         }
03152 
03153         return NULL;
03154 }
03155 
03156 int svm_check_probability_model(const svm_model *model)
03157 {
03158         return ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
03159                 model->probA!=NULL && model->probB!=NULL) ||
03160                 ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
03161                  model->probA!=NULL);
03162 }
03163 
03164 void svm_set_print_string_function(void (*print_func)(const char *))
03165 {
03166         if(print_func == NULL)
03167                 svm_print_string = &print_string_stdout;
03168         else
03169                 svm_print_string = print_func;
03170 }


target_obejct_detector
Author(s): CIR-KIT
autogenerated on Thu Jun 6 2019 20:19:57