libsvm3: svm.cpp Source File

Go to the documentation of this file.
00001 #include <math.h>
00002 #include <stdio.h>
00003 #include <stdlib.h>
00004 #include <ctype.h>
00005 #include <float.h>
00006 #include <string.h>
00007 #include <stdarg.h>
00008 #include "svm.h"
00009 int libsvm_version = LIBSVM_VERSION;
00010 typedef float Qfloat;
00011 typedef signed char schar;
00012 #ifndef min
00013 template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
00014 #endif
00015 #ifndef max
00016 template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
00017 #endif
00018 template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
00019 template <class S, class T> static inline void clone(T*& dst, S* src, int n)
00020 {
00021         dst = new T[n];
00022         memcpy((void *)dst,(void *)src,sizeof(T)*n);
00023 }
00024 static inline double powi(double base, int times)
00025 {
00026         double tmp = base, ret = 1.0;
00027 
00028         for(int t=times; t>0; t/=2)
00029         {
00030                 if(t%2==1) ret*=tmp;
00031                 tmp = tmp * tmp;
00032         }
00033         return ret;
00034 }
00035 #define INF HUGE_VAL
00036 #define TAU 1e-12
00037 #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
00038 
00039 static void print_string_stdout(const char *s)
00040 {
00041         fputs(s,stdout);
00042         fflush(stdout);
00043 }
00044 static void (*svm_print_string) (const char *) = &print_string_stdout;
00045 #if 1
00046 static void info(const char *fmt,...)
00047 {
00048         char buf[BUFSIZ];
00049         va_list ap;
00050         va_start(ap,fmt);
00051         vsprintf(buf,fmt,ap);
00052         va_end(ap);
00053         (*svm_print_string)(buf);
00054 }
00055 #else
00056 static void info(const char *fmt,...) {}
00057 #endif
00058 
00059 //
00060 // Kernel Cache
00061 //
00062 // l is the number of total data items
00063 // size is the cache size limit in bytes
00064 //
00065 class Cache
00066 {
00067 public:
00068         Cache(int l,long int size);
00069         ~Cache();
00070 
00071         // request data [0,len)
00072         // return some position p where [p,len) need to be filled
00073         // (p >= len if nothing needs to be filled)
00074         int get_data(const int index, Qfloat **data, int len);
00075         void swap_index(int i, int j);  
00076 private:
00077         int l;
00078         long int size;
00079         struct head_t
00080         {
00081                 head_t *prev, *next;    // a circular list
00082                 Qfloat *data;
00083                 int len;                // data[0,len) is cached in this entry
00084         };
00085 
00086         head_t *head;
00087         head_t lru_head;
00088         void lru_delete(head_t *h);
00089         void lru_insert(head_t *h);
00090 };
00091 
00092 Cache::Cache(int l_,long int size_):l(l_),size(size_)
00093 {
00094         head = (head_t *)calloc(l,sizeof(head_t));      // initialized to 0
00095         size /= sizeof(Qfloat);
00096         size -= l * sizeof(head_t) / sizeof(Qfloat);
00097         size = max(size, 2 * (long int) l);     // cache must be large enough for two columns
00098         lru_head.next = lru_head.prev = &lru_head;
00099 }
00100 
00101 Cache::~Cache()
00102 {
00103         for(head_t *h = lru_head.next; h != &lru_head; h=h->next)
00104                 free(h->data);
00105         free(head);
00106 }
00107 
00108 void Cache::lru_delete(head_t *h)
00109 {
00110         // delete from current location
00111         h->prev->next = h->next;
00112         h->next->prev = h->prev;
00113 }
00114 
00115 void Cache::lru_insert(head_t *h)
00116 {
00117         // insert to last position
00118         h->next = &lru_head;
00119         h->prev = lru_head.prev;
00120         h->prev->next = h;
00121         h->next->prev = h;
00122 }
00123 
00124 int Cache::get_data(const int index, Qfloat **data, int len)
00125 {
00126         head_t *h = &head[index];
00127         if(h->len) lru_delete(h);
00128         int more = len - h->len;
00129 
00130         if(more > 0)
00131         {
00132                 // free old space
00133                 while(size < more)
00134                 {
00135                         head_t *old = lru_head.next;
00136                         lru_delete(old);
00137                         free(old->data);
00138                         size += old->len;
00139                         old->data = 0;
00140                         old->len = 0;
00141                 }
00142 
00143                 // allocate new space
00144                 h->data = (Qfloat *)realloc(h->data,sizeof(Qfloat)*len);
00145                 size -= more;
00146                 swap(h->len,len);
00147         }
00148 
00149         lru_insert(h);
00150         *data = h->data;
00151         return len;
00152 }
00153 
00154 void Cache::swap_index(int i, int j)
00155 {
00156         if(i==j) return;
00157 
00158         if(head[i].len) lru_delete(&head[i]);
00159         if(head[j].len) lru_delete(&head[j]);
00160         swap(head[i].data,head[j].data);
00161         swap(head[i].len,head[j].len);
00162         if(head[i].len) lru_insert(&head[i]);
00163         if(head[j].len) lru_insert(&head[j]);
00164 
00165         if(i>j) swap(i,j);
00166         for(head_t *h = lru_head.next; h!=&lru_head; h=h->next)
00167         {
00168                 if(h->len > i)
00169                 {
00170                         if(h->len > j)
00171                                 swap(h->data[i],h->data[j]);
00172                         else
00173                         {
00174                                 // give up
00175                                 lru_delete(h);
00176                                 free(h->data);
00177                                 size += h->len;
00178                                 h->data = 0;
00179                                 h->len = 0;
00180                         }
00181                 }
00182         }
00183 }
00184 
00185 //
00186 // Kernel evaluation
00187 //
00188 // the static method k_function is for doing single kernel evaluation
00189 // the constructor of Kernel prepares to calculate the l*l kernel matrix
00190 // the member function get_Q is for getting one column from the Q Matrix
00191 //
00192 class QMatrix {
00193 public:
00194         virtual Qfloat *get_Q(int column, int len) const = 0;
00195         virtual double *get_QD() const = 0;
00196         virtual void swap_index(int i, int j) const = 0;
00197         virtual ~QMatrix() {}
00198 };
00199 
00200 class Kernel: public QMatrix {
00201 public:
00202         Kernel(int l, svm_node * const * x, const svm_parameter& param);
00203         virtual ~Kernel();
00204 
00205         static double k_function(const svm_node *x, const svm_node *y,
00206                                  const svm_parameter& param);
00207         virtual Qfloat *get_Q(int column, int len) const = 0;
00208         virtual double *get_QD() const = 0;
00209         virtual void swap_index(int i, int j) const     // no so const...
00210         {
00211                 swap(x[i],x[j]);
00212                 if(x_square) swap(x_square[i],x_square[j]);
00213         }
00214 protected:
00215 
00216         double (Kernel::*kernel_function)(int i, int j) const;
00217 
00218 private:
00219         const svm_node **x;
00220         double *x_square;
00221 
00222         // svm_parameter
00223         const int kernel_type;
00224         const int degree;
00225         const double gamma;
00226         const double coef0;
00227 
00228         static double dot(const svm_node *px, const svm_node *py);
00229         double kernel_linear(int i, int j) const
00230         {
00231                 return dot(x[i],x[j]);
00232         }
00233         double kernel_poly(int i, int j) const
00234         {
00235                 return powi(gamma*dot(x[i],x[j])+coef0,degree);
00236         }
00237         double kernel_rbf(int i, int j) const
00238         {
00239                 return exp(-gamma*(x_square[i]+x_square[j]-2*dot(x[i],x[j])));
00240         }
00241         double kernel_sigmoid(int i, int j) const
00242         {
00243                 return tanh(gamma*dot(x[i],x[j])+coef0);
00244         }
00245         double kernel_precomputed(int i, int j) const
00246         {
00247                 return x[i][(int)(x[j][0].value)].value;
00248         }
00249 };
00250 
00251 Kernel::Kernel(int l, svm_node * const * x_, const svm_parameter& param)
00252 :kernel_type(param.kernel_type), degree(param.degree),
00253  gamma(param.gamma), coef0(param.coef0)
00254 {
00255         switch(kernel_type)
00256         {
00257                 case LINEAR:
00258                         kernel_function = &Kernel::kernel_linear;
00259                         break;
00260                 case POLY:
00261                         kernel_function = &Kernel::kernel_poly;
00262                         break;
00263                 case RBF:
00264                         kernel_function = &Kernel::kernel_rbf;
00265                         break;
00266                 case SIGMOID:
00267                         kernel_function = &Kernel::kernel_sigmoid;
00268                         break;
00269                 case PRECOMPUTED:
00270                         kernel_function = &Kernel::kernel_precomputed;
00271                         break;
00272         }
00273 
00274         clone(x,x_,l);
00275 
00276         if(kernel_type == RBF)
00277         {
00278                 x_square = new double[l];
00279                 for(int i=0;i<l;i++)
00280                         x_square[i] = dot(x[i],x[i]);
00281         }
00282         else
00283                 x_square = 0;
00284 }
00285 
00286 Kernel::~Kernel()
00287 {
00288         delete[] x;
00289         delete[] x_square;
00290 }
00291 
00292 double Kernel::dot(const svm_node *px, const svm_node *py)
00293 {
00294         double sum = 0;
00295         while(px->index != -1 && py->index != -1)
00296         {
00297                 if(px->index == py->index)
00298                 {
00299                         sum += px->value * py->value;
00300                         ++px;
00301                         ++py;
00302                 }
00303                 else
00304                 {
00305                         if(px->index > py->index)
00306                                 ++py;
00307                         else
00308                                 ++px;
00309                 }                       
00310         }
00311         return sum;
00312 }
00313 
00314 double Kernel::k_function(const svm_node *x, const svm_node *y,
00315                           const svm_parameter& param)
00316 {
00317         switch(param.kernel_type)
00318         {
00319                 case LINEAR:
00320                         return dot(x,y);
00321                 case POLY:
00322                         return powi(param.gamma*dot(x,y)+param.coef0,param.degree);
00323                 case RBF:
00324                 {
00325                         double sum = 0;
00326                         while(x->index != -1 && y->index !=-1)
00327                         {
00328                                 if(x->index == y->index)
00329                                 {
00330                                         double d = x->value - y->value;
00331                                         sum += d*d;
00332                                         ++x;
00333                                         ++y;
00334                                 }
00335                                 else
00336                                 {
00337                                         if(x->index > y->index)
00338                                         {       
00339                                                 sum += y->value * y->value;
00340                                                 ++y;
00341                                         }
00342                                         else
00343                                         {
00344                                                 sum += x->value * x->value;
00345                                                 ++x;
00346                                         }
00347                                 }
00348                         }
00349 
00350                         while(x->index != -1)
00351                         {
00352                                 sum += x->value * x->value;
00353                                 ++x;
00354                         }
00355 
00356                         while(y->index != -1)
00357                         {
00358                                 sum += y->value * y->value;
00359                                 ++y;
00360                         }
00361                         
00362                         return exp(-param.gamma*sum);
00363                 }
00364                 case SIGMOID:
00365                         return tanh(param.gamma*dot(x,y)+param.coef0);
00366                 case PRECOMPUTED:  //x: test (validation), y: SV
00367                         return x[(int)(y->value)].value;
00368                 default:
00369                         return 0;  // Unreachable 
00370         }
00371 }
00372 
00373 // An SMO algorithm in Fan et al., JMLR 6(2005), p. 1889--1918
00374 // Solves:
00375 //
00376 //      min 0.5(\alpha^T Q \alpha) + p^T \alpha
00377 //
00378 //              y^T \alpha = \delta
00379 //              y_i = +1 or -1
00380 //              0 <= alpha_i <= Cp for y_i = 1
00381 //              0 <= alpha_i <= Cn for y_i = -1
00382 //
00383 // Given:
00384 //
00385 //      Q, p, y, Cp, Cn, and an initial feasible point \alpha
00386 //      l is the size of vectors and matrices
00387 //      eps is the stopping tolerance
00388 //
00389 // solution will be put in \alpha, objective value will be put in obj
00390 //
00391 class Solver {
00392 public:
00393         Solver() {};
00394         virtual ~Solver() {};
00395 
00396         struct SolutionInfo {
00397                 double obj;
00398                 double rho;
00399                 double upper_bound_p;
00400                 double upper_bound_n;
00401                 double r;       // for Solver_NU
00402         };
00403 
00404         void Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
00405                    double *alpha_, double Cp, double Cn, double eps,
00406                    SolutionInfo* si, int shrinking);
00407 protected:
00408         int active_size;
00409         schar *y;
00410         double *G;              // gradient of objective function
00411         enum { LOWER_BOUND, UPPER_BOUND, FREE };
00412         char *alpha_status;     // LOWER_BOUND, UPPER_BOUND, FREE
00413         double *alpha;
00414         const QMatrix *Q;
00415         const double *QD;
00416         double eps;
00417         double Cp,Cn;
00418         double *p;
00419         int *active_set;
00420         double *G_bar;          // gradient, if we treat free variables as 0
00421         int l;
00422         bool unshrink;  // XXX
00423 
00424         double get_C(int i)
00425         {
00426                 return (y[i] > 0)? Cp : Cn;
00427         }
00428         void update_alpha_status(int i)
00429         {
00430                 if(alpha[i] >= get_C(i))
00431                         alpha_status[i] = UPPER_BOUND;
00432                 else if(alpha[i] <= 0)
00433                         alpha_status[i] = LOWER_BOUND;
00434                 else alpha_status[i] = FREE;
00435         }
00436         bool is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; }
00437         bool is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; }
00438         bool is_free(int i) { return alpha_status[i] == FREE; }
00439         void swap_index(int i, int j);
00440         void reconstruct_gradient();
00441         virtual int select_working_set(int &i, int &j);
00442         virtual double calculate_rho();
00443         virtual void do_shrinking();
00444 private:
00445         bool be_shrunk(int i, double Gmax1, double Gmax2);      
00446 };
00447 
00448 void Solver::swap_index(int i, int j)
00449 {
00450         Q->swap_index(i,j);
00451         swap(y[i],y[j]);
00452         swap(G[i],G[j]);
00453         swap(alpha_status[i],alpha_status[j]);
00454         swap(alpha[i],alpha[j]);
00455         swap(p[i],p[j]);
00456         swap(active_set[i],active_set[j]);
00457         swap(G_bar[i],G_bar[j]);
00458 }
00459 
00460 void Solver::reconstruct_gradient()
00461 {
00462         // reconstruct inactive elements of G from G_bar and free variables
00463 
00464         if(active_size == l) return;
00465 
00466         int i,j;
00467         int nr_free = 0;
00468 
00469         for(j=active_size;j<l;j++)
00470                 G[j] = G_bar[j] + p[j];
00471 
00472         for(j=0;j<active_size;j++)
00473                 if(is_free(j))
00474                         nr_free++;
00475 
00476         if(2*nr_free < active_size)
00477                 info("\nWarning: using -h 0 may be faster\n");
00478 
00479         if (nr_free*l > 2*active_size*(l-active_size))
00480         {
00481                 for(i=active_size;i<l;i++)
00482                 {
00483                         const Qfloat *Q_i = Q->get_Q(i,active_size);
00484                         for(j=0;j<active_size;j++)
00485                                 if(is_free(j))
00486                                         G[i] += alpha[j] * Q_i[j];
00487                 }
00488         }
00489         else
00490         {
00491                 for(i=0;i<active_size;i++)
00492                         if(is_free(i))
00493                         {
00494                                 const Qfloat *Q_i = Q->get_Q(i,l);
00495                                 double alpha_i = alpha[i];
00496                                 for(j=active_size;j<l;j++)
00497                                         G[j] += alpha_i * Q_i[j];
00498                         }
00499         }
00500 }
00501 
00502 void Solver::Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
00503                    double *alpha_, double Cp, double Cn, double eps,
00504                    SolutionInfo* si, int shrinking)
00505 {
00506         this->l = l;
00507         this->Q = &Q;
00508         QD=Q.get_QD();
00509         clone(p, p_,l);
00510         clone(y, y_,l);
00511         clone(alpha,alpha_,l);
00512         this->Cp = Cp;
00513         this->Cn = Cn;
00514         this->eps = eps;
00515         unshrink = false;
00516 
00517         // initialize alpha_status
00518         {
00519                 alpha_status = new char[l];
00520                 for(int i=0;i<l;i++)
00521                         update_alpha_status(i);
00522         }
00523 
00524         // initialize active set (for shrinking)
00525         {
00526                 active_set = new int[l];
00527                 for(int i=0;i<l;i++)
00528                         active_set[i] = i;
00529                 active_size = l;
00530         }
00531 
00532         // initialize gradient
00533         {
00534                 G = new double[l];
00535                 G_bar = new double[l];
00536                 int i;
00537                 for(i=0;i<l;i++)
00538                 {
00539                         G[i] = p[i];
00540                         G_bar[i] = 0;
00541                 }
00542                 for(i=0;i<l;i++)
00543                         if(!is_lower_bound(i))
00544                         {
00545                                 const Qfloat *Q_i = Q.get_Q(i,l);
00546                                 double alpha_i = alpha[i];
00547                                 int j;
00548                                 for(j=0;j<l;j++)
00549                                         G[j] += alpha_i*Q_i[j];
00550                                 if(is_upper_bound(i))
00551                                         for(j=0;j<l;j++)
00552                                                 G_bar[j] += get_C(i) * Q_i[j];
00553                         }
00554         }
00555 
00556         // optimization step
00557 
00558         int iter = 0;
00559         int counter = min(l,1000)+1;
00560 
00561         while(1)
00562         {
00563                 // show progress and do shrinking
00564 
00565                 if(--counter == 0)
00566                 {
00567                         counter = min(l,1000);
00568                         if(shrinking) do_shrinking();
00569                         info(".");
00570                 }
00571 
00572                 int i,j;
00573                 if(select_working_set(i,j)!=0)
00574                 {
00575                         // reconstruct the whole gradient
00576                         reconstruct_gradient();
00577                         // reset active set size and check
00578                         active_size = l;
00579                         info("*");
00580                         if(select_working_set(i,j)!=0)
00581                                 break;
00582                         else
00583                                 counter = 1;    // do shrinking next iteration
00584                 }
00585                 
00586                 ++iter;
00587 
00588                 // update alpha[i] and alpha[j], handle bounds carefully
00589                 
00590                 const Qfloat *Q_i = Q.get_Q(i,active_size);
00591                 const Qfloat *Q_j = Q.get_Q(j,active_size);
00592 
00593                 double C_i = get_C(i);
00594                 double C_j = get_C(j);
00595 
00596                 double old_alpha_i = alpha[i];
00597                 double old_alpha_j = alpha[j];
00598 
00599                 if(y[i]!=y[j])
00600                 {
00601                         double quad_coef = QD[i]+QD[j]+2*Q_i[j];
00602                         if (quad_coef <= 0)
00603                                 quad_coef = TAU;
00604                         double delta = (-G[i]-G[j])/quad_coef;
00605                         double diff = alpha[i] - alpha[j];
00606                         alpha[i] += delta;
00607                         alpha[j] += delta;
00608                         
00609                         if(diff > 0)
00610                         {
00611                                 if(alpha[j] < 0)
00612                                 {
00613                                         alpha[j] = 0;
00614                                         alpha[i] = diff;
00615                                 }
00616                         }
00617                         else
00618                         {
00619                                 if(alpha[i] < 0)
00620                                 {
00621                                         alpha[i] = 0;
00622                                         alpha[j] = -diff;
00623                                 }
00624                         }
00625                         if(diff > C_i - C_j)
00626                         {
00627                                 if(alpha[i] > C_i)
00628                                 {
00629                                         alpha[i] = C_i;
00630                                         alpha[j] = C_i - diff;
00631                                 }
00632                         }
00633                         else
00634                         {
00635                                 if(alpha[j] > C_j)
00636                                 {
00637                                         alpha[j] = C_j;
00638                                         alpha[i] = C_j + diff;
00639                                 }
00640                         }
00641                 }
00642                 else
00643                 {
00644                         double quad_coef = QD[i]+QD[j]-2*Q_i[j];
00645                         if (quad_coef <= 0)
00646                                 quad_coef = TAU;
00647                         double delta = (G[i]-G[j])/quad_coef;
00648                         double sum = alpha[i] + alpha[j];
00649                         alpha[i] -= delta;
00650                         alpha[j] += delta;
00651 
00652                         if(sum > C_i)
00653                         {
00654                                 if(alpha[i] > C_i)
00655                                 {
00656                                         alpha[i] = C_i;
00657                                         alpha[j] = sum - C_i;
00658                                 }
00659                         }
00660                         else
00661                         {
00662                                 if(alpha[j] < 0)
00663                                 {
00664                                         alpha[j] = 0;
00665                                         alpha[i] = sum;
00666                                 }
00667                         }
00668                         if(sum > C_j)
00669                         {
00670                                 if(alpha[j] > C_j)
00671                                 {
00672                                         alpha[j] = C_j;
00673                                         alpha[i] = sum - C_j;
00674                                 }
00675                         }
00676                         else
00677                         {
00678                                 if(alpha[i] < 0)
00679                                 {
00680                                         alpha[i] = 0;
00681                                         alpha[j] = sum;
00682                                 }
00683                         }
00684                 }
00685 
00686                 // update G
00687 
00688                 double delta_alpha_i = alpha[i] - old_alpha_i;
00689                 double delta_alpha_j = alpha[j] - old_alpha_j;
00690                 
00691                 for(int k=0;k<active_size;k++)
00692                 {
00693                         G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j;
00694                 }
00695 
00696                 // update alpha_status and G_bar
00697 
00698                 {
00699                         bool ui = is_upper_bound(i);
00700                         bool uj = is_upper_bound(j);
00701                         update_alpha_status(i);
00702                         update_alpha_status(j);
00703                         int k;
00704                         if(ui != is_upper_bound(i))
00705                         {
00706                                 Q_i = Q.get_Q(i,l);
00707                                 if(ui)
00708                                         for(k=0;k<l;k++)
00709                                                 G_bar[k] -= C_i * Q_i[k];
00710                                 else
00711                                         for(k=0;k<l;k++)
00712                                                 G_bar[k] += C_i * Q_i[k];
00713                         }
00714 
00715                         if(uj != is_upper_bound(j))
00716                         {
00717                                 Q_j = Q.get_Q(j,l);
00718                                 if(uj)
00719                                         for(k=0;k<l;k++)
00720                                                 G_bar[k] -= C_j * Q_j[k];
00721                                 else
00722                                         for(k=0;k<l;k++)
00723                                                 G_bar[k] += C_j * Q_j[k];
00724                         }
00725                 }
00726         }
00727 
00728         // calculate rho
00729 
00730         si->rho = calculate_rho();
00731 
00732         // calculate objective value
00733         {
00734                 double v = 0;
00735                 int i;
00736                 for(i=0;i<l;i++)
00737                         v += alpha[i] * (G[i] + p[i]);
00738 
00739                 si->obj = v/2;
00740         }
00741 
00742         // put back the solution
00743         {
00744                 for(int i=0;i<l;i++)
00745                         alpha_[active_set[i]] = alpha[i];
00746         }
00747 
00748         // juggle everything back
00749         /*{
00750                 for(int i=0;i<l;i++)
00751                         while(active_set[i] != i)
00752                                 swap_index(i,active_set[i]);
00753                                 // or Q.swap_index(i,active_set[i]);
00754         }*/
00755 
00756         si->upper_bound_p = Cp;
00757         si->upper_bound_n = Cn;
00758 
00759         info("\noptimization finished, #iter = %d\n",iter);
00760 
00761         delete[] p;
00762         delete[] y;
00763         delete[] alpha;
00764         delete[] alpha_status;
00765         delete[] active_set;
00766         delete[] G;
00767         delete[] G_bar;
00768 }
00769 
00770 // return 1 if already optimal, return 0 otherwise
00771 int Solver::select_working_set(int &out_i, int &out_j)
00772 {
00773         // return i,j such that
00774         // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
00775         // j: minimizes the decrease of obj value
00776         //    (if quadratic coefficeint <= 0, replace it with tau)
00777         //    -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
00778         
00779         double Gmax = -INF;
00780         double Gmax2 = -INF;
00781         int Gmax_idx = -1;
00782         int Gmin_idx = -1;
00783         double obj_diff_min = INF;
00784 
00785         for(int t=0;t<active_size;t++)
00786                 if(y[t]==+1)    
00787                 {
00788                         if(!is_upper_bound(t))
00789                                 if(-G[t] >= Gmax)
00790                                 {
00791                                         Gmax = -G[t];
00792                                         Gmax_idx = t;
00793                                 }
00794                 }
00795                 else
00796                 {
00797                         if(!is_lower_bound(t))
00798                                 if(G[t] >= Gmax)
00799                                 {
00800                                         Gmax = G[t];
00801                                         Gmax_idx = t;
00802                                 }
00803                 }
00804 
00805         int i = Gmax_idx;
00806         const Qfloat *Q_i = NULL;
00807         if(i != -1) // NULL Q_i not accessed: Gmax=-INF if i=-1
00808                 Q_i = Q->get_Q(i,active_size);
00809 
00810         for(int j=0;j<active_size;j++)
00811         {
00812                 if(y[j]==+1)
00813                 {
00814                         if (!is_lower_bound(j))
00815                         {
00816                                 double grad_diff=Gmax+G[j];
00817                                 if (G[j] >= Gmax2)
00818                                         Gmax2 = G[j];
00819                                 if (grad_diff > 0)
00820                                 {
00821                                         double obj_diff; 
00822                                         double quad_coef = QD[i]+QD[j]-2.0*y[i]*Q_i[j];
00823                                         if (quad_coef > 0)
00824                                                 obj_diff = -(grad_diff*grad_diff)/quad_coef;
00825                                         else
00826                                                 obj_diff = -(grad_diff*grad_diff)/TAU;
00827 
00828                                         if (obj_diff <= obj_diff_min)
00829                                         {
00830                                                 Gmin_idx=j;
00831                                                 obj_diff_min = obj_diff;
00832                                         }
00833                                 }
00834                         }
00835                 }
00836                 else
00837                 {
00838                         if (!is_upper_bound(j))
00839                         {
00840                                 double grad_diff= Gmax-G[j];
00841                                 if (-G[j] >= Gmax2)
00842                                         Gmax2 = -G[j];
00843                                 if (grad_diff > 0)
00844                                 {
00845                                         double obj_diff; 
00846                                         double quad_coef = QD[i]+QD[j]+2.0*y[i]*Q_i[j];
00847                                         if (quad_coef > 0)
00848                                                 obj_diff = -(grad_diff*grad_diff)/quad_coef;
00849                                         else
00850                                                 obj_diff = -(grad_diff*grad_diff)/TAU;
00851 
00852                                         if (obj_diff <= obj_diff_min)
00853                                         {
00854                                                 Gmin_idx=j;
00855                                                 obj_diff_min = obj_diff;
00856                                         }
00857                                 }
00858                         }
00859                 }
00860         }
00861 
00862         if(Gmax+Gmax2 < eps)
00863                 return 1;
00864 
00865         out_i = Gmax_idx;
00866         out_j = Gmin_idx;
00867         return 0;
00868 }
00869 
00870 bool Solver::be_shrunk(int i, double Gmax1, double Gmax2)
00871 {
00872         if(is_upper_bound(i))
00873         {
00874                 if(y[i]==+1)
00875                         return(-G[i] > Gmax1);
00876                 else
00877                         return(-G[i] > Gmax2);
00878         }
00879         else if(is_lower_bound(i))
00880         {
00881                 if(y[i]==+1)
00882                         return(G[i] > Gmax2);
00883                 else    
00884                         return(G[i] > Gmax1);
00885         }
00886         else
00887                 return(false);
00888 }
00889 
00890 void Solver::do_shrinking()
00891 {
00892         int i;
00893         double Gmax1 = -INF;            // max { -y_i * grad(f)_i | i in I_up(\alpha) }
00894         double Gmax2 = -INF;            // max { y_i * grad(f)_i | i in I_low(\alpha) }
00895 
00896         // find maximal violating pair first
00897         for(i=0;i<active_size;i++)
00898         {
00899                 if(y[i]==+1)    
00900                 {
00901                         if(!is_upper_bound(i))  
00902                         {
00903                                 if(-G[i] >= Gmax1)
00904                                         Gmax1 = -G[i];
00905                         }
00906                         if(!is_lower_bound(i))  
00907                         {
00908                                 if(G[i] >= Gmax2)
00909                                         Gmax2 = G[i];
00910                         }
00911                 }
00912                 else    
00913                 {
00914                         if(!is_upper_bound(i))  
00915                         {
00916                                 if(-G[i] >= Gmax2)
00917                                         Gmax2 = -G[i];
00918                         }
00919                         if(!is_lower_bound(i))  
00920                         {
00921                                 if(G[i] >= Gmax1)
00922                                         Gmax1 = G[i];
00923                         }
00924                 }
00925         }
00926 
00927         if(unshrink == false && Gmax1 + Gmax2 <= eps*10) 
00928         {
00929                 unshrink = true;
00930                 reconstruct_gradient();
00931                 active_size = l;
00932                 info("*");
00933         }
00934 
00935         for(i=0;i<active_size;i++)
00936                 if (be_shrunk(i, Gmax1, Gmax2))
00937                 {
00938                         active_size--;
00939                         while (active_size > i)
00940                         {
00941                                 if (!be_shrunk(active_size, Gmax1, Gmax2))
00942                                 {
00943                                         swap_index(i,active_size);
00944                                         break;
00945                                 }
00946                                 active_size--;
00947                         }
00948                 }
00949 }
00950 
00951 double Solver::calculate_rho()
00952 {
00953         double r;
00954         int nr_free = 0;
00955         double ub = INF, lb = -INF, sum_free = 0;
00956         for(int i=0;i<active_size;i++)
00957         {
00958                 double yG = y[i]*G[i];
00959 
00960                 if(is_upper_bound(i))
00961                 {
00962                         if(y[i]==-1)
00963                                 ub = min(ub,yG);
00964                         else
00965                                 lb = max(lb,yG);
00966                 }
00967                 else if(is_lower_bound(i))
00968                 {
00969                         if(y[i]==+1)
00970                                 ub = min(ub,yG);
00971                         else
00972                                 lb = max(lb,yG);
00973                 }
00974                 else
00975                 {
00976                         ++nr_free;
00977                         sum_free += yG;
00978                 }
00979         }
00980 
00981         if(nr_free>0)
00982                 r = sum_free/nr_free;
00983         else
00984                 r = (ub+lb)/2;
00985 
00986         return r;
00987 }
00988 
00989 //
00990 // Solver for nu-svm classification and regression
00991 //
00992 // additional constraint: e^T \alpha = constant
00993 //
00994 class Solver_NU : public Solver
00995 {
00996 public:
00997         Solver_NU() {}
00998         void Solve(int l, const QMatrix& Q, const double *p, const schar *y,
00999                    double *alpha, double Cp, double Cn, double eps,
01000                    SolutionInfo* si, int shrinking)
01001         {
01002                 this->si = si;
01003                 Solver::Solve(l,Q,p,y,alpha,Cp,Cn,eps,si,shrinking);
01004         }
01005 private:
01006         SolutionInfo *si;
01007         int select_working_set(int &i, int &j);
01008         double calculate_rho();
01009         bool be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4);
01010         void do_shrinking();
01011 };
01012 
01013 // return 1 if already optimal, return 0 otherwise
01014 int Solver_NU::select_working_set(int &out_i, int &out_j)
01015 {
01016         // return i,j such that y_i = y_j and
01017         // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
01018         // j: minimizes the decrease of obj value
01019         //    (if quadratic coefficeint <= 0, replace it with tau)
01020         //    -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
01021 
01022         double Gmaxp = -INF;
01023         double Gmaxp2 = -INF;
01024         int Gmaxp_idx = -1;
01025 
01026         double Gmaxn = -INF;
01027         double Gmaxn2 = -INF;
01028         int Gmaxn_idx = -1;
01029 
01030         int Gmin_idx = -1;
01031         double obj_diff_min = INF;
01032 
01033         for(int t=0;t<active_size;t++)
01034                 if(y[t]==+1)
01035                 {
01036                         if(!is_upper_bound(t))
01037                                 if(-G[t] >= Gmaxp)
01038                                 {
01039                                         Gmaxp = -G[t];
01040                                         Gmaxp_idx = t;
01041                                 }
01042                 }
01043                 else
01044                 {
01045                         if(!is_lower_bound(t))
01046                                 if(G[t] >= Gmaxn)
01047                                 {
01048                                         Gmaxn = G[t];
01049                                         Gmaxn_idx = t;
01050                                 }
01051                 }
01052 
01053         int ip = Gmaxp_idx;
01054         int in = Gmaxn_idx;
01055         const Qfloat *Q_ip = NULL;
01056         const Qfloat *Q_in = NULL;
01057         if(ip != -1) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1
01058                 Q_ip = Q->get_Q(ip,active_size);
01059         if(in != -1)
01060                 Q_in = Q->get_Q(in,active_size);
01061 
01062         for(int j=0;j<active_size;j++)
01063         {
01064                 if(y[j]==+1)
01065                 {
01066                         if (!is_lower_bound(j)) 
01067                         {
01068                                 double grad_diff=Gmaxp+G[j];
01069                                 if (G[j] >= Gmaxp2)
01070                                         Gmaxp2 = G[j];
01071                                 if (grad_diff > 0)
01072                                 {
01073                                         double obj_diff; 
01074                                         double quad_coef = QD[ip]+QD[j]-2*Q_ip[j];
01075                                         if (quad_coef > 0)
01076                                                 obj_diff = -(grad_diff*grad_diff)/quad_coef;
01077                                         else
01078                                                 obj_diff = -(grad_diff*grad_diff)/TAU;
01079 
01080                                         if (obj_diff <= obj_diff_min)
01081                                         {
01082                                                 Gmin_idx=j;
01083                                                 obj_diff_min = obj_diff;
01084                                         }
01085                                 }
01086                         }
01087                 }
01088                 else
01089                 {
01090                         if (!is_upper_bound(j))
01091                         {
01092                                 double grad_diff=Gmaxn-G[j];
01093                                 if (-G[j] >= Gmaxn2)
01094                                         Gmaxn2 = -G[j];
01095                                 if (grad_diff > 0)
01096                                 {
01097                                         double obj_diff; 
01098                                         double quad_coef = QD[in]+QD[j]-2*Q_in[j];
01099                                         if (quad_coef > 0)
01100                                                 obj_diff = -(grad_diff*grad_diff)/quad_coef;
01101                                         else
01102                                                 obj_diff = -(grad_diff*grad_diff)/TAU;
01103 
01104                                         if (obj_diff <= obj_diff_min)
01105                                         {
01106                                                 Gmin_idx=j;
01107                                                 obj_diff_min = obj_diff;
01108                                         }
01109                                 }
01110                         }
01111                 }
01112         }
01113 
01114         if(max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2) < eps)
01115                 return 1;
01116 
01117         if (y[Gmin_idx] == +1)
01118                 out_i = Gmaxp_idx;
01119         else
01120                 out_i = Gmaxn_idx;
01121         out_j = Gmin_idx;
01122 
01123         return 0;
01124 }
01125 
01126 bool Solver_NU::be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4)
01127 {
01128         if(is_upper_bound(i))
01129         {
01130                 if(y[i]==+1)
01131                         return(-G[i] > Gmax1);
01132                 else    
01133                         return(-G[i] > Gmax4);
01134         }
01135         else if(is_lower_bound(i))
01136         {
01137                 if(y[i]==+1)
01138                         return(G[i] > Gmax2);
01139                 else    
01140                         return(G[i] > Gmax3);
01141         }
01142         else
01143                 return(false);
01144 }
01145 
01146 void Solver_NU::do_shrinking()
01147 {
01148         double Gmax1 = -INF;    // max { -y_i * grad(f)_i | y_i = +1, i in I_up(\alpha) }
01149         double Gmax2 = -INF;    // max { y_i * grad(f)_i | y_i = +1, i in I_low(\alpha) }
01150         double Gmax3 = -INF;    // max { -y_i * grad(f)_i | y_i = -1, i in I_up(\alpha) }
01151         double Gmax4 = -INF;    // max { y_i * grad(f)_i | y_i = -1, i in I_low(\alpha) }
01152 
01153         // find maximal violating pair first
01154         int i;
01155         for(i=0;i<active_size;i++)
01156         {
01157                 if(!is_upper_bound(i))
01158                 {
01159                         if(y[i]==+1)
01160                         {
01161                                 if(-G[i] > Gmax1) Gmax1 = -G[i];
01162                         }
01163                         else    if(-G[i] > Gmax4) Gmax4 = -G[i];
01164                 }
01165                 if(!is_lower_bound(i))
01166                 {
01167                         if(y[i]==+1)
01168                         {       
01169                                 if(G[i] > Gmax2) Gmax2 = G[i];
01170                         }
01171                         else    if(G[i] > Gmax3) Gmax3 = G[i];
01172                 }
01173         }
01174 
01175         if(unshrink == false && max(Gmax1+Gmax2,Gmax3+Gmax4) <= eps*10) 
01176         {
01177                 unshrink = true;
01178                 reconstruct_gradient();
01179                 active_size = l;
01180         }
01181 
01182         for(i=0;i<active_size;i++)
01183                 if (be_shrunk(i, Gmax1, Gmax2, Gmax3, Gmax4))
01184                 {
01185                         active_size--;
01186                         while (active_size > i)
01187                         {
01188                                 if (!be_shrunk(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
01189                                 {
01190                                         swap_index(i,active_size);
01191                                         break;
01192                                 }
01193                                 active_size--;
01194                         }
01195                 }
01196 }
01197 
01198 double Solver_NU::calculate_rho()
01199 {
01200         int nr_free1 = 0,nr_free2 = 0;
01201         double ub1 = INF, ub2 = INF;
01202         double lb1 = -INF, lb2 = -INF;
01203         double sum_free1 = 0, sum_free2 = 0;
01204 
01205         for(int i=0;i<active_size;i++)
01206         {
01207                 if(y[i]==+1)
01208                 {
01209                         if(is_upper_bound(i))
01210                                 lb1 = max(lb1,G[i]);
01211                         else if(is_lower_bound(i))
01212                                 ub1 = min(ub1,G[i]);
01213                         else
01214                         {
01215                                 ++nr_free1;
01216                                 sum_free1 += G[i];
01217                         }
01218                 }
01219                 else
01220                 {
01221                         if(is_upper_bound(i))
01222                                 lb2 = max(lb2,G[i]);
01223                         else if(is_lower_bound(i))
01224                                 ub2 = min(ub2,G[i]);
01225                         else
01226                         {
01227                                 ++nr_free2;
01228                                 sum_free2 += G[i];
01229                         }
01230                 }
01231         }
01232 
01233         double r1,r2;
01234         if(nr_free1 > 0)
01235                 r1 = sum_free1/nr_free1;
01236         else
01237                 r1 = (ub1+lb1)/2;
01238         
01239         if(nr_free2 > 0)
01240                 r2 = sum_free2/nr_free2;
01241         else
01242                 r2 = (ub2+lb2)/2;
01243         
01244         si->r = (r1+r2)/2;
01245         return (r1-r2)/2;
01246 }
01247 
01248 //
01249 // Q matrices for various formulations
01250 //
01251 class SVC_Q: public Kernel
01252 { 
01253 public:
01254         SVC_Q(const svm_problem& prob, const svm_parameter& param, const schar *y_)
01255         :Kernel(prob.l, prob.x, param)
01256         {
01257                 clone(y,y_,prob.l);
01258                 cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
01259                 QD = new double[prob.l];
01260                 for(int i=0;i<prob.l;i++)
01261                         QD[i] = (this->*kernel_function)(i,i);
01262         }
01263         
01264         Qfloat *get_Q(int i, int len) const
01265         {
01266                 Qfloat *data;
01267                 int start, j;
01268                 if((start = cache->get_data(i,&data,len)) < len)
01269                 {
01270                         for(j=start;j<len;j++)
01271                                 data[j] = (Qfloat)(y[i]*y[j]*(this->*kernel_function)(i,j));
01272                 }
01273                 return data;
01274         }
01275 
01276         double *get_QD() const
01277         {
01278                 return QD;
01279         }
01280 
01281         void swap_index(int i, int j) const
01282         {
01283                 cache->swap_index(i,j);
01284                 Kernel::swap_index(i,j);
01285                 swap(y[i],y[j]);
01286                 swap(QD[i],QD[j]);
01287         }
01288 
01289         ~SVC_Q()
01290         {
01291                 delete[] y;
01292                 delete cache;
01293                 delete[] QD;
01294         }
01295 private:
01296         schar *y;
01297         Cache *cache;
01298         double *QD;
01299 };
01300 
01301 class ONE_CLASS_Q: public Kernel
01302 {
01303 public:
01304         ONE_CLASS_Q(const svm_problem& prob, const svm_parameter& param)
01305         :Kernel(prob.l, prob.x, param)
01306         {
01307                 cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
01308                 QD = new double[prob.l];
01309                 for(int i=0;i<prob.l;i++)
01310                         QD[i] = (this->*kernel_function)(i,i);
01311         }
01312         
01313         Qfloat *get_Q(int i, int len) const
01314         {
01315                 Qfloat *data;
01316                 int start, j;
01317                 if((start = cache->get_data(i,&data,len)) < len)
01318                 {
01319                         for(j=start;j<len;j++)
01320                                 data[j] = (Qfloat)(this->*kernel_function)(i,j);
01321                 }
01322                 return data;
01323         }
01324 
01325         double *get_QD() const
01326         {
01327                 return QD;
01328         }
01329 
01330         void swap_index(int i, int j) const
01331         {
01332                 cache->swap_index(i,j);
01333                 Kernel::swap_index(i,j);
01334                 swap(QD[i],QD[j]);
01335         }
01336 
01337         ~ONE_CLASS_Q()
01338         {
01339                 delete cache;
01340                 delete[] QD;
01341         }
01342 private:
01343         Cache *cache;
01344         double *QD;
01345 };
01346 
01347 class SVR_Q: public Kernel
01348 { 
01349 public:
01350         SVR_Q(const svm_problem& prob, const svm_parameter& param)
01351         :Kernel(prob.l, prob.x, param)
01352         {
01353                 l = prob.l;
01354                 cache = new Cache(l,(long int)(param.cache_size*(1<<20)));
01355                 QD = new double[2*l];
01356                 sign = new schar[2*l];
01357                 index = new int[2*l];
01358                 for(int k=0;k<l;k++)
01359                 {
01360                         sign[k] = 1;
01361                         sign[k+l] = -1;
01362                         index[k] = k;
01363                         index[k+l] = k;
01364                         QD[k] = (this->*kernel_function)(k,k);
01365                         QD[k+l] = QD[k];
01366                 }
01367                 buffer[0] = new Qfloat[2*l];
01368                 buffer[1] = new Qfloat[2*l];
01369                 next_buffer = 0;
01370         }
01371 
01372         void swap_index(int i, int j) const
01373         {
01374                 swap(sign[i],sign[j]);
01375                 swap(index[i],index[j]);
01376                 swap(QD[i],QD[j]);
01377         }
01378         
01379         Qfloat *get_Q(int i, int len) const
01380         {
01381                 Qfloat *data;
01382                 int j, real_i = index[i];
01383                 if(cache->get_data(real_i,&data,l) < l)
01384                 {
01385                         for(j=0;j<l;j++)
01386                                 data[j] = (Qfloat)(this->*kernel_function)(real_i,j);
01387                 }
01388 
01389                 // reorder and copy
01390                 Qfloat *buf = buffer[next_buffer];
01391                 next_buffer = 1 - next_buffer;
01392                 schar si = sign[i];
01393                 for(j=0;j<len;j++)
01394                         buf[j] = (Qfloat) si * (Qfloat) sign[j] * data[index[j]];
01395                 return buf;
01396         }
01397 
01398         double *get_QD() const
01399         {
01400                 return QD;
01401         }
01402 
01403         ~SVR_Q()
01404         {
01405                 delete cache;
01406                 delete[] sign;
01407                 delete[] index;
01408                 delete[] buffer[0];
01409                 delete[] buffer[1];
01410                 delete[] QD;
01411         }
01412 private:
01413         int l;
01414         Cache *cache;
01415         schar *sign;
01416         int *index;
01417         mutable int next_buffer;
01418         Qfloat *buffer[2];
01419         double *QD;
01420 };
01421 
01422 //
01423 // construct and solve various formulations
01424 //
01425 static void solve_c_svc(
01426         const svm_problem *prob, const svm_parameter* param,
01427         double *alpha, Solver::SolutionInfo* si, double Cp, double Cn)
01428 {
01429         int l = prob->l;
01430         double *minus_ones = new double[l];
01431         schar *y = new schar[l];
01432 
01433         int i;
01434 
01435         for(i=0;i<l;i++)
01436         {
01437                 alpha[i] = 0;
01438                 minus_ones[i] = -1;
01439                 if(prob->y[i] > 0) y[i] = +1; else y[i] = -1;
01440         }
01441 
01442         Solver s;
01443         s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y,
01444                 alpha, Cp, Cn, param->eps, si, param->shrinking);
01445 
01446         double sum_alpha=0;
01447         for(i=0;i<l;i++)
01448                 sum_alpha += alpha[i];
01449 
01450         if (Cp==Cn)
01451                 info("nu = %f\n", sum_alpha/(Cp*prob->l));
01452 
01453         for(i=0;i<l;i++)
01454                 alpha[i] *= y[i];
01455 
01456         delete[] minus_ones;
01457         delete[] y;
01458 }
01459 
01460 static void solve_nu_svc(
01461         const svm_problem *prob, const svm_parameter *param,
01462         double *alpha, Solver::SolutionInfo* si)
01463 {
01464         int i;
01465         int l = prob->l;
01466         double nu = param->nu;
01467 
01468         schar *y = new schar[l];
01469 
01470         for(i=0;i<l;i++)
01471                 if(prob->y[i]>0)
01472                         y[i] = +1;
01473                 else
01474                         y[i] = -1;
01475 
01476         double sum_pos = nu*l/2;
01477         double sum_neg = nu*l/2;
01478 
01479         for(i=0;i<l;i++)
01480                 if(y[i] == +1)
01481                 {
01482                         alpha[i] = min(1.0,sum_pos);
01483                         sum_pos -= alpha[i];
01484                 }
01485                 else
01486                 {
01487                         alpha[i] = min(1.0,sum_neg);
01488                         sum_neg -= alpha[i];
01489                 }
01490 
01491         double *zeros = new double[l];
01492 
01493         for(i=0;i<l;i++)
01494                 zeros[i] = 0;
01495 
01496         Solver_NU s;
01497         s.Solve(l, SVC_Q(*prob,*param,y), zeros, y,
01498                 alpha, 1.0, 1.0, param->eps, si,  param->shrinking);
01499         double r = si->r;
01500 
01501         info("C = %f\n",1/r);
01502 
01503         for(i=0;i<l;i++)
01504                 alpha[i] *= y[i]/r;
01505 
01506         si->rho /= r;
01507         si->obj /= (r*r);
01508         si->upper_bound_p = 1/r;
01509         si->upper_bound_n = 1/r;
01510 
01511         delete[] y;
01512         delete[] zeros;
01513 }
01514 
01515 static void solve_one_class(
01516         const svm_problem *prob, const svm_parameter *param,
01517         double *alpha, Solver::SolutionInfo* si)
01518 {
01519         int l = prob->l;
01520         double *zeros = new double[l];
01521         schar *ones = new schar[l];
01522         int i;
01523 
01524         int n = (int)(param->nu*prob->l);       // # of alpha's at upper bound
01525 
01526         for(i=0;i<n;i++)
01527                 alpha[i] = 1;
01528         if(n<prob->l)
01529                 alpha[n] = param->nu * prob->l - n;
01530         for(i=n+1;i<l;i++)
01531                 alpha[i] = 0;
01532 
01533         for(i=0;i<l;i++)
01534         {
01535                 zeros[i] = 0;
01536                 ones[i] = 1;
01537         }
01538 
01539         Solver s;
01540         s.Solve(l, ONE_CLASS_Q(*prob,*param), zeros, ones,
01541                 alpha, 1.0, 1.0, param->eps, si, param->shrinking);
01542 
01543         delete[] zeros;
01544         delete[] ones;
01545 }
01546 
01547 static void solve_epsilon_svr(
01548         const svm_problem *prob, const svm_parameter *param,
01549         double *alpha, Solver::SolutionInfo* si)
01550 {
01551         int l = prob->l;
01552         double *alpha2 = new double[2*l];
01553         double *linear_term = new double[2*l];
01554         schar *y = new schar[2*l];
01555         int i;
01556 
01557         for(i=0;i<l;i++)
01558         {
01559                 alpha2[i] = 0;
01560                 linear_term[i] = param->p - prob->y[i];
01561                 y[i] = 1;
01562 
01563                 alpha2[i+l] = 0;
01564                 linear_term[i+l] = param->p + prob->y[i];
01565                 y[i+l] = -1;
01566         }
01567 
01568         Solver s;
01569         s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
01570                 alpha2, param->C, param->C, param->eps, si, param->shrinking);
01571 
01572         double sum_alpha = 0;
01573         for(i=0;i<l;i++)
01574         {
01575                 alpha[i] = alpha2[i] - alpha2[i+l];
01576                 sum_alpha += fabs(alpha[i]);
01577         }
01578         info("nu = %f\n",sum_alpha/(param->C*l));
01579 
01580         delete[] alpha2;
01581         delete[] linear_term;
01582         delete[] y;
01583 }
01584 
01585 static void solve_nu_svr(
01586         const svm_problem *prob, const svm_parameter *param,
01587         double *alpha, Solver::SolutionInfo* si)
01588 {
01589         int l = prob->l;
01590         double C = param->C;
01591         double *alpha2 = new double[2*l];
01592         double *linear_term = new double[2*l];
01593         schar *y = new schar[2*l];
01594         int i;
01595 
01596         double sum = C * param->nu * l / 2;
01597         for(i=0;i<l;i++)
01598         {
01599                 alpha2[i] = alpha2[i+l] = min(sum,C);
01600                 sum -= alpha2[i];
01601 
01602                 linear_term[i] = - prob->y[i];
01603                 y[i] = 1;
01604 
01605                 linear_term[i+l] = prob->y[i];
01606                 y[i+l] = -1;
01607         }
01608 
01609         Solver_NU s;
01610         s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
01611                 alpha2, C, C, param->eps, si, param->shrinking);
01612 
01613         info("epsilon = %f\n",-si->r);
01614 
01615         for(i=0;i<l;i++)
01616                 alpha[i] = alpha2[i] - alpha2[i+l];
01617 
01618         delete[] alpha2;
01619         delete[] linear_term;
01620         delete[] y;
01621 }
01622 
01623 //
01624 // decision_function
01625 //
01626 struct decision_function
01627 {
01628         double *alpha;
01629         double rho;     
01630 };
01631 
01632 static decision_function svm_train_one(
01633         const svm_problem *prob, const svm_parameter *param,
01634         double Cp, double Cn)
01635 {
01636         double *alpha = Malloc(double,prob->l);
01637         Solver::SolutionInfo si;
01638         switch(param->svm_type)
01639         {
01640                 case C_SVC:
01641                         solve_c_svc(prob,param,alpha,&si,Cp,Cn);
01642                         break;
01643                 case NU_SVC:
01644                         solve_nu_svc(prob,param,alpha,&si);
01645                         break;
01646                 case ONE_CLASS:
01647                         solve_one_class(prob,param,alpha,&si);
01648                         break;
01649                 case EPSILON_SVR:
01650                         solve_epsilon_svr(prob,param,alpha,&si);
01651                         break;
01652                 case NU_SVR:
01653                         solve_nu_svr(prob,param,alpha,&si);
01654                         break;
01655         }
01656 
01657         info("obj = %f, rho = %f\n",si.obj,si.rho);
01658 
01659         // output SVs
01660 
01661         int nSV = 0;
01662         int nBSV = 0;
01663         for(int i=0;i<prob->l;i++)
01664         {
01665                 if(fabs(alpha[i]) > 0)
01666                 {
01667                         ++nSV;
01668                         if(prob->y[i] > 0)
01669                         {
01670                                 if(fabs(alpha[i]) >= si.upper_bound_p)
01671                                         ++nBSV;
01672                         }
01673                         else
01674                         {
01675                                 if(fabs(alpha[i]) >= si.upper_bound_n)
01676                                         ++nBSV;
01677                         }
01678                 }
01679         }
01680 
01681         info("nSV = %d, nBSV = %d\n",nSV,nBSV);
01682 
01683         decision_function f;
01684         f.alpha = alpha;
01685         f.rho = si.rho;
01686         return f;
01687 }
01688 
01689 // Platt's binary SVM Probablistic Output: an improvement from Lin et al.
01690 static void sigmoid_train(
01691         int l, const double *dec_values, const double *labels, 
01692         double& A, double& B)
01693 {
01694         double prior1=0, prior0 = 0;
01695         int i;
01696 
01697         for (i=0;i<l;i++)
01698                 if (labels[i] > 0) prior1+=1;
01699                 else prior0+=1;
01700         
01701         int max_iter=100;       // Maximal number of iterations
01702         double min_step=1e-10;  // Minimal step taken in line search
01703         double sigma=1e-12;     // For numerically strict PD of Hessian
01704         double eps=1e-5;
01705         double hiTarget=(prior1+1.0)/(prior1+2.0);
01706         double loTarget=1/(prior0+2.0);
01707         double *t=Malloc(double,l);
01708         double fApB,p,q,h11,h22,h21,g1,g2,det,dA,dB,gd,stepsize;
01709         double newA,newB,newf,d1,d2;
01710         int iter; 
01711         
01712         // Initial Point and Initial Fun Value
01713         A=0.0; B=log((prior0+1.0)/(prior1+1.0));
01714         double fval = 0.0;
01715 
01716         for (i=0;i<l;i++)
01717         {
01718                 if (labels[i]>0) t[i]=hiTarget;
01719                 else t[i]=loTarget;
01720                 fApB = dec_values[i]*A+B;
01721                 if (fApB>=0)
01722                         fval += t[i]*fApB + log(1+exp(-fApB));
01723                 else
01724                         fval += (t[i] - 1)*fApB +log(1+exp(fApB));
01725         }
01726         for (iter=0;iter<max_iter;iter++)
01727         {
01728                 // Update Gradient and Hessian (use H' = H + sigma I)
01729                 h11=sigma; // numerically ensures strict PD
01730                 h22=sigma;
01731                 h21=0.0;g1=0.0;g2=0.0;
01732                 for (i=0;i<l;i++)
01733                 {
01734                         fApB = dec_values[i]*A+B;
01735                         if (fApB >= 0)
01736                         {
01737                                 p=exp(-fApB)/(1.0+exp(-fApB));
01738                                 q=1.0/(1.0+exp(-fApB));
01739                         }
01740                         else
01741                         {
01742                                 p=1.0/(1.0+exp(fApB));
01743                                 q=exp(fApB)/(1.0+exp(fApB));
01744                         }
01745                         d2=p*q;
01746                         h11+=dec_values[i]*dec_values[i]*d2;
01747                         h22+=d2;
01748                         h21+=dec_values[i]*d2;
01749                         d1=t[i]-p;
01750                         g1+=dec_values[i]*d1;
01751                         g2+=d1;
01752                 }
01753 
01754                 // Stopping Criteria
01755                 if (fabs(g1)<eps && fabs(g2)<eps)
01756                         break;
01757 
01758                 // Finding Newton direction: -inv(H') * g
01759                 det=h11*h22-h21*h21;
01760                 dA=-(h22*g1 - h21 * g2) / det;
01761                 dB=-(-h21*g1+ h11 * g2) / det;
01762                 gd=g1*dA+g2*dB;
01763 
01764 
01765                 stepsize = 1;           // Line Search
01766                 while (stepsize >= min_step)
01767                 {
01768                         newA = A + stepsize * dA;
01769                         newB = B + stepsize * dB;
01770 
01771                         // New function value
01772                         newf = 0.0;
01773                         for (i=0;i<l;i++)
01774                         {
01775                                 fApB = dec_values[i]*newA+newB;
01776                                 if (fApB >= 0)
01777                                         newf += t[i]*fApB + log(1+exp(-fApB));
01778                                 else
01779                                         newf += (t[i] - 1)*fApB +log(1+exp(fApB));
01780                         }
01781                         // Check sufficient decrease
01782                         if (newf<fval+0.0001*stepsize*gd)
01783                         {
01784                                 A=newA;B=newB;fval=newf;
01785                                 break;
01786                         }
01787                         else
01788                                 stepsize = stepsize / 2.0;
01789                 }
01790 
01791                 if (stepsize < min_step)
01792                 {
01793                         info("Line search fails in two-class probability estimates\n");
01794                         break;
01795                 }
01796         }
01797 
01798         if (iter>=max_iter)
01799                 info("Reaching maximal iterations in two-class probability estimates\n");
01800         free(t);
01801 }
01802 
01803 static double sigmoid_predict(double decision_value, double A, double B)
01804 {
01805         double fApB = decision_value*A+B;
01806         if (fApB >= 0)
01807                 return exp(-fApB)/(1.0+exp(-fApB));
01808         else
01809                 return 1.0/(1+exp(fApB)) ;
01810 }
01811 
01812 // Method 2 from the multiclass_prob paper by Wu, Lin, and Weng
01813 static void multiclass_probability(int k, double **r, double *p)
01814 {
01815         int t,j;
01816         int iter = 0, max_iter=max(100,k);
01817         double **Q=Malloc(double *,k);
01818         double *Qp=Malloc(double,k);
01819         double pQp, eps=0.005/k;
01820         
01821         for (t=0;t<k;t++)
01822         {
01823                 p[t]=1.0/k;  // Valid if k = 1
01824                 Q[t]=Malloc(double,k);
01825                 Q[t][t]=0;
01826                 for (j=0;j<t;j++)
01827                 {
01828                         Q[t][t]+=r[j][t]*r[j][t];
01829                         Q[t][j]=Q[j][t];
01830                 }
01831                 for (j=t+1;j<k;j++)
01832                 {
01833                         Q[t][t]+=r[j][t]*r[j][t];
01834                         Q[t][j]=-r[j][t]*r[t][j];
01835                 }
01836         }
01837         for (iter=0;iter<max_iter;iter++)
01838         {
01839                 // stopping condition, recalculate QP,pQP for numerical accuracy
01840                 pQp=0;
01841                 for (t=0;t<k;t++)
01842                 {
01843                         Qp[t]=0;
01844                         for (j=0;j<k;j++)
01845                                 Qp[t]+=Q[t][j]*p[j];
01846                         pQp+=p[t]*Qp[t];
01847                 }
01848                 double max_error=0;
01849                 for (t=0;t<k;t++)
01850                 {
01851                         double error=fabs(Qp[t]-pQp);
01852                         if (error>max_error)
01853                                 max_error=error;
01854                 }
01855                 if (max_error<eps) break;
01856                 
01857                 for (t=0;t<k;t++)
01858                 {
01859                         double diff=(-Qp[t]+pQp)/Q[t][t];
01860                         p[t]+=diff;
01861                         pQp=(pQp+diff*(diff*Q[t][t]+2*Qp[t]))/(1+diff)/(1+diff);
01862                         for (j=0;j<k;j++)
01863                         {
01864                                 Qp[j]=(Qp[j]+diff*Q[t][j])/(1+diff);
01865                                 p[j]/=(1+diff);
01866                         }
01867                 }
01868         }
01869         if (iter>=max_iter)
01870                 info("Exceeds max_iter in multiclass_prob\n");
01871         for(t=0;t<k;t++) free(Q[t]);
01872         free(Q);
01873         free(Qp);
01874 }
01875 
01876 // Cross-validation decision values for probability estimates
01877 static void svm_binary_svc_probability(
01878         const svm_problem *prob, const svm_parameter *param,
01879         double Cp, double Cn, double& probA, double& probB)
01880 {
01881         int i;
01882         int nr_fold = 5;
01883         int *perm = Malloc(int,prob->l);
01884         double *dec_values = Malloc(double,prob->l);
01885 
01886         // random shuffle
01887         for(i=0;i<prob->l;i++) perm[i]=i;
01888         for(i=0;i<prob->l;i++)
01889         {
01890                 int j = i+rand()%(prob->l-i);
01891                 swap(perm[i],perm[j]);
01892         }
01893         for(i=0;i<nr_fold;i++)
01894         {
01895                 int begin = i*prob->l/nr_fold;
01896                 int end = (i+1)*prob->l/nr_fold;
01897                 int j,k;
01898                 struct svm_problem subprob;
01899 
01900                 subprob.l = prob->l-(end-begin);
01901                 subprob.x = Malloc(struct svm_node*,subprob.l);
01902                 subprob.y = Malloc(double,subprob.l);
01903                         
01904                 k=0;
01905                 for(j=0;j<begin;j++)
01906                 {
01907                         subprob.x[k] = prob->x[perm[j]];
01908                         subprob.y[k] = prob->y[perm[j]];
01909                         ++k;
01910                 }
01911                 for(j=end;j<prob->l;j++)
01912                 {
01913                         subprob.x[k] = prob->x[perm[j]];
01914                         subprob.y[k] = prob->y[perm[j]];
01915                         ++k;
01916                 }
01917                 int p_count=0,n_count=0;
01918                 for(j=0;j<k;j++)
01919                         if(subprob.y[j]>0)
01920                                 p_count++;
01921                         else
01922                                 n_count++;
01923 
01924                 if(p_count==0 && n_count==0)
01925                         for(j=begin;j<end;j++)
01926                                 dec_values[perm[j]] = 0;
01927                 else if(p_count > 0 && n_count == 0)
01928                         for(j=begin;j<end;j++)
01929                                 dec_values[perm[j]] = 1;
01930                 else if(p_count == 0 && n_count > 0)
01931                         for(j=begin;j<end;j++)
01932                                 dec_values[perm[j]] = -1;
01933                 else
01934                 {
01935                         svm_parameter subparam = *param;
01936                         subparam.probability=0;
01937                         subparam.C=1.0;
01938                         subparam.nr_weight=2;
01939                         subparam.weight_label = Malloc(int,2);
01940                         subparam.weight = Malloc(double,2);
01941                         subparam.weight_label[0]=+1;
01942                         subparam.weight_label[1]=-1;
01943                         subparam.weight[0]=Cp;
01944                         subparam.weight[1]=Cn;
01945                         struct svm_model *submodel = svm_train(&subprob,&subparam);
01946                         for(j=begin;j<end;j++)
01947                         {
01948                                 svm_predict_values(submodel,prob->x[perm[j]],&(dec_values[perm[j]])); 
01949                                 // ensure +1 -1 order; reason not using CV subroutine
01950                                 dec_values[perm[j]] *= submodel->label[0];
01951                         }               
01952                         svm_free_and_destroy_model(&submodel);
01953                         svm_destroy_param(&subparam);
01954                 }
01955                 free(subprob.x);
01956                 free(subprob.y);
01957         }               
01958         sigmoid_train(prob->l,dec_values,prob->y,probA,probB);
01959         free(dec_values);
01960         free(perm);
01961 }
01962 
01963 // Return parameter of a Laplace distribution 
01964 static double svm_svr_probability(
01965         const svm_problem *prob, const svm_parameter *param)
01966 {
01967         int i;
01968         int nr_fold = 5;
01969         double *ymv = Malloc(double,prob->l);
01970         double mae = 0;
01971 
01972         svm_parameter newparam = *param;
01973         newparam.probability = 0;
01974         svm_cross_validation(prob,&newparam,nr_fold,ymv);
01975         for(i=0;i<prob->l;i++)
01976         {
01977                 ymv[i]=prob->y[i]-ymv[i];
01978                 mae += fabs(ymv[i]);
01979         }               
01980         mae /= prob->l;
01981         double std=sqrt(2*mae*mae);
01982         int count=0;
01983         mae=0;
01984         for(i=0;i<prob->l;i++)
01985                 if (fabs(ymv[i]) > 5*std) 
01986                         count=count+1;
01987                 else 
01988                         mae+=fabs(ymv[i]);
01989         mae /= (prob->l-count);
01990         info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma= %g\n",mae);
01991         free(ymv);
01992         return mae;
01993 }
01994 
01995 
01996 // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
01997 // perm, length l, must be allocated before calling this subroutine
01998 static void svm_group_classes(const svm_problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
01999 {
02000         int l = prob->l;
02001         int max_nr_class = 16;
02002         int nr_class = 0;
02003         int *label = Malloc(int,max_nr_class);
02004         int *count = Malloc(int,max_nr_class);
02005         int *data_label = Malloc(int,l);        
02006         int i;
02007 
02008         for(i=0;i<l;i++)
02009         {
02010                 int this_label = (int)prob->y[i];
02011                 int j;
02012                 for(j=0;j<nr_class;j++)
02013                 {
02014                         if(this_label == label[j])
02015                         {
02016                                 ++count[j];
02017                                 break;
02018                         }
02019                 }
02020                 data_label[i] = j;
02021                 if(j == nr_class)
02022                 {
02023                         if(nr_class == max_nr_class)
02024                         {
02025                                 max_nr_class *= 2;
02026                                 label = (int *)realloc(label,max_nr_class*sizeof(int));
02027                                 count = (int *)realloc(count,max_nr_class*sizeof(int));
02028                         }
02029                         label[nr_class] = this_label;
02030                         count[nr_class] = 1;
02031                         ++nr_class;
02032                 }
02033         }
02034 
02035         int *start = Malloc(int,nr_class);
02036         start[0] = 0;
02037         for(i=1;i<nr_class;i++)
02038                 start[i] = start[i-1]+count[i-1];
02039         for(i=0;i<l;i++)
02040         {
02041                 perm[start[data_label[i]]] = i;
02042                 ++start[data_label[i]];
02043         }
02044         start[0] = 0;
02045         for(i=1;i<nr_class;i++)
02046                 start[i] = start[i-1]+count[i-1];
02047 
02048         *nr_class_ret = nr_class;
02049         *label_ret = label;
02050         *start_ret = start;
02051         *count_ret = count;
02052         free(data_label);
02053 }
02054 
02055 //
02056 // Interface functions
02057 //
02058 svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
02059 {
02060         svm_model *model = Malloc(svm_model,1);
02061         model->param = *param;
02062         model->free_sv = 0;     // XXX
02063 
02064         if(param->svm_type == ONE_CLASS ||
02065            param->svm_type == EPSILON_SVR ||
02066            param->svm_type == NU_SVR)
02067         {
02068                 // regression or one-class-svm
02069                 model->nr_class = 2;
02070                 model->label = NULL;
02071                 model->nSV = NULL;
02072                 model->probA = NULL; model->probB = NULL;
02073                 model->sv_coef = Malloc(double *,1);
02074 
02075                 if(param->probability && 
02076                    (param->svm_type == EPSILON_SVR ||
02077                     param->svm_type == NU_SVR))
02078                 {
02079                         model->probA = Malloc(double,1);
02080                         model->probA[0] = svm_svr_probability(prob,param);
02081                 }
02082 
02083                 decision_function f = svm_train_one(prob,param,0,0);
02084                 model->rho = Malloc(double,1);
02085                 model->rho[0] = f.rho;
02086 
02087                 int nSV = 0;
02088                 int i;
02089                 for(i=0;i<prob->l;i++)
02090                         if(fabs(f.alpha[i]) > 0) ++nSV;
02091                 model->l = nSV;
02092                 model->SV = Malloc(svm_node *,nSV);
02093                 model->sv_coef[0] = Malloc(double,nSV);
02094                 int j = 0;
02095                 for(i=0;i<prob->l;i++)
02096                         if(fabs(f.alpha[i]) > 0)
02097                         {
02098                                 model->SV[j] = prob->x[i];
02099                                 model->sv_coef[0][j] = f.alpha[i];
02100                                 ++j;
02101                         }               
02102 
02103                 free(f.alpha);
02104         }
02105         else
02106         {
02107                 // classification
02108                 int l = prob->l;
02109                 int nr_class;
02110                 int *label = NULL;
02111                 int *start = NULL;
02112                 int *count = NULL;
02113                 int *perm = Malloc(int,l);
02114 
02115                 // group training data of the same class
02116                 svm_group_classes(prob,&nr_class,&label,&start,&count,perm);            
02117                 svm_node **x = Malloc(svm_node *,l);
02118                 int i;
02119                 for(i=0;i<l;i++)
02120                         x[i] = prob->x[perm[i]];
02121 
02122                 // calculate weighted C
02123 
02124                 double *weighted_C = Malloc(double, nr_class);
02125                 for(i=0;i<nr_class;i++)
02126                         weighted_C[i] = param->C;
02127                 for(i=0;i<param->nr_weight;i++)
02128                 {       
02129                         int j;
02130                         for(j=0;j<nr_class;j++)
02131                                 if(param->weight_label[i] == label[j])
02132                                         break;
02133                         if(j == nr_class)
02134                                 fprintf(stderr,"warning: class label %d specified in weight is not found\n", param->weight_label[i]);
02135                         else
02136                                 weighted_C[j] *= param->weight[i];
02137                 }
02138 
02139                 // train k*(k-1)/2 models
02140                 
02141                 bool *nonzero = Malloc(bool,l);
02142                 for(i=0;i<l;i++)
02143                         nonzero[i] = false;
02144                 decision_function *f = Malloc(decision_function,nr_class*(nr_class-1)/2);
02145 
02146                 double *probA=NULL,*probB=NULL;
02147                 if (param->probability)
02148                 {
02149                         probA=Malloc(double,nr_class*(nr_class-1)/2);
02150                         probB=Malloc(double,nr_class*(nr_class-1)/2);
02151                 }
02152 
02153                 int p = 0;
02154                 for(i=0;i<nr_class;i++)
02155                         for(int j=i+1;j<nr_class;j++)
02156                         {
02157                                 svm_problem sub_prob;
02158                                 int si = start[i], sj = start[j];
02159                                 int ci = count[i], cj = count[j];
02160                                 sub_prob.l = ci+cj;
02161                                 sub_prob.x = Malloc(svm_node *,sub_prob.l);
02162                                 sub_prob.y = Malloc(double,sub_prob.l);
02163                                 int k;
02164                                 for(k=0;k<ci;k++)
02165                                 {
02166                                         sub_prob.x[k] = x[si+k];
02167                                         sub_prob.y[k] = +1;
02168                                 }
02169                                 for(k=0;k<cj;k++)
02170                                 {
02171                                         sub_prob.x[ci+k] = x[sj+k];
02172                                         sub_prob.y[ci+k] = -1;
02173                                 }
02174 
02175                                 if(param->probability)
02176                                         svm_binary_svc_probability(&sub_prob,param,weighted_C[i],weighted_C[j],probA[p],probB[p]);
02177 
02178                                 f[p] = svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j]);
02179                                 for(k=0;k<ci;k++)
02180                                         if(!nonzero[si+k] && fabs(f[p].alpha[k]) > 0)
02181                                                 nonzero[si+k] = true;
02182                                 for(k=0;k<cj;k++)
02183                                         if(!nonzero[sj+k] && fabs(f[p].alpha[ci+k]) > 0)
02184                                                 nonzero[sj+k] = true;
02185                                 free(sub_prob.x);
02186                                 free(sub_prob.y);
02187                                 ++p;
02188                         }
02189 
02190                 // build output
02191 
02192                 model->nr_class = nr_class;
02193                 
02194                 model->label = Malloc(int,nr_class);
02195                 for(i=0;i<nr_class;i++)
02196                         model->label[i] = label[i];
02197                 
02198                 model->rho = Malloc(double,nr_class*(nr_class-1)/2);
02199                 for(i=0;i<nr_class*(nr_class-1)/2;i++)
02200                         model->rho[i] = f[i].rho;
02201 
02202                 if(param->probability)
02203                 {
02204                         model->probA = Malloc(double,nr_class*(nr_class-1)/2);
02205                         model->probB = Malloc(double,nr_class*(nr_class-1)/2);
02206                         for(i=0;i<nr_class*(nr_class-1)/2;i++)
02207                         {
02208                                 model->probA[i] = probA[i];
02209                                 model->probB[i] = probB[i];
02210                         }
02211                 }
02212                 else
02213                 {
02214                         model->probA=NULL;
02215                         model->probB=NULL;
02216                 }
02217 
02218                 int total_sv = 0;
02219                 int *nz_count = Malloc(int,nr_class);
02220                 model->nSV = Malloc(int,nr_class);
02221                 for(i=0;i<nr_class;i++)
02222                 {
02223                         int nSV = 0;
02224                         for(int j=0;j<count[i];j++)
02225                                 if(nonzero[start[i]+j])
02226                                 {       
02227                                         ++nSV;
02228                                         ++total_sv;
02229                                 }
02230                         model->nSV[i] = nSV;
02231                         nz_count[i] = nSV;
02232                 }
02233                 
02234                 info("Total nSV = %d\n",total_sv);
02235 
02236                 model->l = total_sv;
02237                 model->SV = Malloc(svm_node *,total_sv);
02238                 p = 0;
02239                 for(i=0;i<l;i++)
02240             if(nonzero[i]) 
02241                         {
02242                                 model->SV[p++] = x[i];
02243                                 //info("support vec %d\n", perm[i]);
02244                         }
02245                         //if(nonzero[i]) model->SV[p++] = x[i];
02246 
02247                 int *nz_start = Malloc(int,nr_class);
02248                 nz_start[0] = 0;
02249                 for(i=1;i<nr_class;i++)
02250                         nz_start[i] = nz_start[i-1]+nz_count[i-1];
02251 
02252                 model->sv_coef = Malloc(double *,nr_class-1);
02253                 for(i=0;i<nr_class-1;i++)
02254                         model->sv_coef[i] = Malloc(double,total_sv);
02255 
02256                 p = 0;
02257                 for(i=0;i<nr_class;i++)
02258                         for(int j=i+1;j<nr_class;j++)
02259                         {
02260                                 // classifier (i,j): coefficients with
02261                                 // i are in sv_coef[j-1][nz_start[i]...],
02262                                 // j are in sv_coef[i][nz_start[j]...]
02263 
02264                                 int si = start[i];
02265                                 int sj = start[j];
02266                                 int ci = count[i];
02267                                 int cj = count[j];
02268                                 
02269                                 int q = nz_start[i];
02270                                 int k;
02271                                 for(k=0;k<ci;k++)
02272                                         if(nonzero[si+k])
02273                                                 model->sv_coef[j-1][q++] = f[p].alpha[k];
02274                                 q = nz_start[j];
02275                                 for(k=0;k<cj;k++)
02276                                         if(nonzero[sj+k])
02277                                                 model->sv_coef[i][q++] = f[p].alpha[ci+k];
02278                                 ++p;
02279                         }
02280             //fprintf(stdout, "size of bool %d %d %d", sizeof(bool), sizeof(unsigned int), sizeof(unsigned char));      
02281         model->SVidx = (unsigned char*) nonzero;
02282                 free(label);
02283                 free(probA);
02284                 free(probB);
02285                 free(count);
02286                 free(perm);
02287                 free(start);
02288                 free(x);
02289                 free(weighted_C);
02290                 //free(nonzero);
02291                 for(i=0;i<nr_class*(nr_class-1)/2;i++)
02292                         free(f[i].alpha);
02293                 free(f);
02294                 free(nz_count);
02295                 free(nz_start);
02296         }
02297         return model;
02298 }
02299 
02300 // Stratified cross validation
02301 void svm_cross_validation(const svm_problem *prob, const svm_parameter *param, int nr_fold, double *target)
02302 {
02303         int i;
02304         int *fold_start = Malloc(int,nr_fold+1);
02305         int l = prob->l;
02306         int *perm = Malloc(int,l);
02307         int nr_class;
02308 
02309         // stratified cv may not give leave-one-out rate
02310         // Each class to l folds -> some folds may have zero elements
02311         if((param->svm_type == C_SVC ||
02312             param->svm_type == NU_SVC) && nr_fold < l)
02313         {
02314                 int *start = NULL;
02315                 int *label = NULL;
02316                 int *count = NULL;
02317                 svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
02318 
02319                 // random shuffle and then data grouped by fold using the array perm
02320                 int *fold_count = Malloc(int,nr_fold);
02321                 int c;
02322                 int *index = Malloc(int,l);
02323                 for(i=0;i<l;i++)
02324                         index[i]=perm[i];
02325                 for (c=0; c<nr_class; c++) 
02326                         for(i=0;i<count[c];i++)
02327                         {
02328                                 int j = i+rand()%(count[c]-i);
02329                                 swap(index[start[c]+j],index[start[c]+i]);
02330                         }
02331                 for(i=0;i<nr_fold;i++)
02332                 {
02333                         fold_count[i] = 0;
02334                         for (c=0; c<nr_class;c++)
02335                                 fold_count[i]+=(i+1)*count[c]/nr_fold-i*count[c]/nr_fold;
02336                 }
02337                 fold_start[0]=0;
02338                 for (i=1;i<=nr_fold;i++)
02339                         fold_start[i] = fold_start[i-1]+fold_count[i-1];
02340                 for (c=0; c<nr_class;c++)
02341                         for(i=0;i<nr_fold;i++)
02342                         {
02343                                 int begin = start[c]+i*count[c]/nr_fold;
02344                                 int end = start[c]+(i+1)*count[c]/nr_fold;
02345                                 for(int j=begin;j<end;j++)
02346                                 {
02347                                         perm[fold_start[i]] = index[j];
02348                                         fold_start[i]++;
02349                                 }
02350                         }
02351                 fold_start[0]=0;
02352                 for (i=1;i<=nr_fold;i++)
02353                         fold_start[i] = fold_start[i-1]+fold_count[i-1];
02354                 free(start);    
02355                 free(label);
02356                 free(count);    
02357                 free(index);
02358                 free(fold_count);
02359         }
02360         else
02361         {
02362                 for(i=0;i<l;i++) perm[i]=i;
02363                 for(i=0;i<l;i++)
02364                 {
02365                         int j = i+rand()%(l-i);
02366                         swap(perm[i],perm[j]);
02367                 }
02368                 for(i=0;i<=nr_fold;i++)
02369                         fold_start[i]=i*l/nr_fold;
02370         }
02371 
02372         for(i=0;i<nr_fold;i++)
02373         {
02374                 int begin = fold_start[i];
02375                 int end = fold_start[i+1];
02376                 int j,k;
02377                 struct svm_problem subprob;
02378 
02379                 subprob.l = l-(end-begin);
02380                 subprob.x = Malloc(struct svm_node*,subprob.l);
02381                 subprob.y = Malloc(double,subprob.l);
02382                         
02383                 k=0;
02384                 for(j=0;j<begin;j++)
02385                 {
02386                         subprob.x[k] = prob->x[perm[j]];
02387                         subprob.y[k] = prob->y[perm[j]];
02388                         ++k;
02389                 }
02390                 for(j=end;j<l;j++)
02391                 {
02392                         subprob.x[k] = prob->x[perm[j]];
02393                         subprob.y[k] = prob->y[perm[j]];
02394                         ++k;
02395                 }
02396                 struct svm_model *submodel = svm_train(&subprob,param);
02397                 if(param->probability && 
02398                    (param->svm_type == C_SVC || param->svm_type == NU_SVC))
02399                 {
02400                         double *prob_estimates=Malloc(double,svm_get_nr_class(submodel));
02401                         for(j=begin;j<end;j++)
02402                                 target[perm[j]] = svm_predict_probability(submodel,prob->x[perm[j]],prob_estimates);
02403                         free(prob_estimates);                   
02404                 }
02405                 else
02406                         for(j=begin;j<end;j++)
02407                                 target[perm[j]] = svm_predict(submodel,prob->x[perm[j]]);
02408                 svm_free_and_destroy_model(&submodel);
02409                 free(subprob.x);
02410                 free(subprob.y);
02411         }               
02412         free(fold_start);
02413         free(perm);     
02414 }
02415 
02416 
02417 int svm_get_svm_type(const svm_model *model)
02418 {
02419         return model->param.svm_type;
02420 }
02421 
02422 int svm_get_nr_class(const svm_model *model)
02423 {
02424         return model->nr_class;
02425 }
02426 
02427 void svm_get_labels(const svm_model *model, int* label)
02428 {
02429         if (model->label != NULL)
02430                 for(int i=0;i<model->nr_class;i++)
02431                         label[i] = model->label[i];
02432 }
02433 
02434 void svm_get_support_vectors(const svm_model *model, int nx, int* sv)
02435 {
02436     //fprintf(stdout, "nx is %d", nx);
02437         if (model->label != NULL)
02438     {
02439                 for(int i=0;i<nx;i++)
02440         {
02441             //fprintf(stdout, "at %d", i);
02442                         sv[i] = model->SVidx[i];
02443         }
02444     }
02445 }
02446 
02447 double svm_get_svr_probability(const svm_model *model)
02448 {
02449         if ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
02450             model->probA!=NULL)
02451                 return model->probA[0];
02452         else
02453         {
02454                 fprintf(stderr,"Model doesn't contain information for SVR probability inference\n");
02455                 return 0;
02456         }
02457 }
02458 
02459 double svm_predict_values(const svm_model *model, const svm_node *x, double* dec_values)
02460 {
02461         if(model->param.svm_type == ONE_CLASS ||
02462            model->param.svm_type == EPSILON_SVR ||
02463            model->param.svm_type == NU_SVR)
02464         {
02465                 double *sv_coef = model->sv_coef[0];
02466                 double sum = 0;
02467                 for(int i=0;i<model->l;i++)
02468                         sum += sv_coef[i] * Kernel::k_function(x,model->SV[i],model->param);
02469                 sum -= model->rho[0];
02470                 *dec_values = sum;
02471 
02472                 if(model->param.svm_type == ONE_CLASS)
02473                         return (sum>0)?1:-1;
02474                 else
02475                         return sum;
02476         }
02477         else
02478         {
02479                 int i;
02480                 int nr_class = model->nr_class;
02481                 int l = model->l;
02482                 
02483                 double *kvalue = Malloc(double,l);
02484                 for(i=0;i<l;i++)
02485                         kvalue[i] = Kernel::k_function(x,model->SV[i],model->param);
02486 
02487                 int *start = Malloc(int,nr_class);
02488                 start[0] = 0;
02489                 for(i=1;i<nr_class;i++)
02490                         start[i] = start[i-1]+model->nSV[i-1];
02491 
02492                 int *vote = Malloc(int,nr_class);
02493                 for(i=0;i<nr_class;i++)
02494                         vote[i] = 0;
02495 
02496                 int p=0;
02497         //fprintf(stdout, "nr_class %d", nr_class);
02498                 for(i=0;i<nr_class;i++)
02499                         for(int j=i+1;j<nr_class;j++)
02500                         {
02501                                 double sum = 0;
02502                                 int si = start[i];
02503                                 int sj = start[j];
02504                                 int ci = model->nSV[i];
02505                                 int cj = model->nSV[j];
02506                                 
02507                                 int k;
02508                                 double *coef1 = model->sv_coef[j-1];
02509                                 double *coef2 = model->sv_coef[i];
02510                                 for(k=0;k<ci;k++)
02511                                         sum += coef1[si+k] * kvalue[si+k];
02512                                 for(k=0;k<cj;k++)
02513                                         sum += coef2[sj+k] * kvalue[sj+k];
02514                                 sum -= model->rho[p];
02515                                 dec_values[p] = sum;
02516                 //fprintf(stdout, "dec_values %d vs %d result %d %.3f\n", i, j, p, dec_values[p]);
02517 
02518                                 if(dec_values[p] > 0)
02519                                         ++vote[i];
02520                                 else
02521                                         ++vote[j];
02522                                 p++;
02523                         }
02524 
02525                 int vote_max_idx = 0;
02526                 for(i=1;i<nr_class;i++)
02527                         if(vote[i] > vote[vote_max_idx])
02528                                 vote_max_idx = i;
02529 
02530                 free(kvalue);
02531                 free(start);
02532                 free(vote);
02533                 return model->label[vote_max_idx];
02534         }
02535 }
02536 
02537 double svm_predict(const svm_model *model, const svm_node *x)
02538 {
02539         int nr_class = model->nr_class;
02540         double *dec_values;
02541         if(model->param.svm_type == ONE_CLASS ||
02542            model->param.svm_type == EPSILON_SVR ||
02543            model->param.svm_type == NU_SVR)
02544                 dec_values = Malloc(double, 1);
02545         else 
02546                 dec_values = Malloc(double, nr_class*(nr_class-1)/2);
02547         double pred_result = svm_predict_values(model, x, dec_values);
02548         free(dec_values);
02549         return pred_result;
02550 }
02551 
02552 double svm_predict_probability(
02553         const svm_model *model, const svm_node *x, double *prob_estimates)
02554 {
02555         if ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
02556             model->probA!=NULL && model->probB!=NULL)
02557         {
02558                 int i;
02559                 int nr_class = model->nr_class;
02560                 double *dec_values = Malloc(double, nr_class*(nr_class-1)/2);
02561                 svm_predict_values(model, x, dec_values);
02562 
02563                 double min_prob=1e-7;
02564                 double **pairwise_prob=Malloc(double *,nr_class);
02565                 for(i=0;i<nr_class;i++)
02566                         pairwise_prob[i]=Malloc(double,nr_class);
02567                 int k=0;
02568                 for(i=0;i<nr_class;i++)
02569                         for(int j=i+1;j<nr_class;j++)
02570                         {
02571                                 pairwise_prob[i][j]=min(max(sigmoid_predict(dec_values[k],model->probA[k],model->probB[k]),min_prob),1-min_prob);
02572                                 pairwise_prob[j][i]=1-pairwise_prob[i][j];
02573                                 k++;
02574                         }
02575                 multiclass_probability(nr_class,pairwise_prob,prob_estimates);
02576 
02577                 int prob_max_idx = 0;
02578                 for(i=1;i<nr_class;i++)
02579                         if(prob_estimates[i] > prob_estimates[prob_max_idx])
02580                                 prob_max_idx = i;
02581                 for(i=0;i<nr_class;i++)
02582                         free(pairwise_prob[i]);
02583                 free(dec_values);
02584                 free(pairwise_prob);         
02585                 return model->label[prob_max_idx];
02586         }
02587         else 
02588                 return svm_predict(model, x);
02589 }
02590 
02591 static const char *svm_type_table[] =
02592 {
02593         "c_svc","nu_svc","one_class","epsilon_svr","nu_svr",NULL
02594 };
02595 
02596 static const char *kernel_type_table[]=
02597 {
02598         "linear","polynomial","rbf","sigmoid","precomputed",NULL
02599 };
02600 
02601 int svm_save_model(const char *model_file_name, const svm_model *model)
02602 {
02603         FILE *fp = fopen(model_file_name,"w");
02604         if(fp==NULL) return -1;
02605 
02606         const svm_parameter& param = model->param;
02607 
02608         fprintf(fp,"svm_type %s\n", svm_type_table[param.svm_type]);
02609         fprintf(fp,"kernel_type %s\n", kernel_type_table[param.kernel_type]);
02610 
02611         if(param.kernel_type == POLY)
02612                 fprintf(fp,"degree %d\n", param.degree);
02613 
02614         if(param.kernel_type == POLY || param.kernel_type == RBF || param.kernel_type == SIGMOID)
02615                 fprintf(fp,"gamma %g\n", param.gamma);
02616 
02617         if(param.kernel_type == POLY || param.kernel_type == SIGMOID)
02618                 fprintf(fp,"coef0 %g\n", param.coef0);
02619 
02620         int nr_class = model->nr_class;
02621         int l = model->l;
02622         fprintf(fp, "nr_class %d\n", nr_class);
02623         fprintf(fp, "total_sv %d\n",l);
02624         
02625         {
02626                 fprintf(fp, "rho");
02627                 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
02628                         fprintf(fp," %g",model->rho[i]);
02629                 fprintf(fp, "\n");
02630         }
02631         
02632         if(model->label)
02633         {
02634                 fprintf(fp, "label");
02635                 for(int i=0;i<nr_class;i++)
02636                         fprintf(fp," %d",model->label[i]);
02637                 fprintf(fp, "\n");
02638         }
02639 
02640         if(model->probA) // regression has probA only
02641         {
02642                 fprintf(fp, "probA");
02643                 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
02644                         fprintf(fp," %g",model->probA[i]);
02645                 fprintf(fp, "\n");
02646         }
02647         if(model->probB)
02648         {
02649                 fprintf(fp, "probB");
02650                 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
02651                         fprintf(fp," %g",model->probB[i]);
02652                 fprintf(fp, "\n");
02653         }
02654 
02655         if(model->nSV)
02656         {
02657                 fprintf(fp, "nr_sv");
02658                 for(int i=0;i<nr_class;i++)
02659                         fprintf(fp," %d",model->nSV[i]);
02660                 fprintf(fp, "\n");
02661         }
02662 
02663         fprintf(fp, "SV\n");
02664         const double * const *sv_coef = model->sv_coef;
02665         const svm_node * const *SV = model->SV;
02666 
02667         for(int i=0;i<l;i++)
02668         {
02669                 for(int j=0;j<nr_class-1;j++)
02670                         fprintf(fp, "%.16g ",sv_coef[j][i]);
02671 
02672                 const svm_node *p = SV[i];
02673 
02674                 if(param.kernel_type == PRECOMPUTED)
02675                         fprintf(fp,"0:%d ",(int)(p->value));
02676                 else
02677                         while(p->index != -1)
02678                         {
02679                                 fprintf(fp,"%d:%.8g ",p->index,p->value);
02680                                 p++;
02681                         }
02682                 fprintf(fp, "\n");
02683         }
02684         if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
02685         else return 0;
02686 }
02687 
02688 static char *line = NULL;
02689 static int max_line_len;
02690 
02691 static char* readline(FILE *input)
02692 {
02693         int len;
02694 
02695         if(fgets(line,max_line_len,input) == NULL)
02696                 return NULL;
02697 
02698         while(strrchr(line,'\n') == NULL)
02699         {
02700                 max_line_len *= 2;
02701                 line = (char *) realloc(line,max_line_len);
02702                 len = (int) strlen(line);
02703                 if(fgets(line+len,max_line_len-len,input) == NULL)
02704                         break;
02705         }
02706         return line;
02707 }
02708 
02709 svm_model *svm_load_model(const char *model_file_name)
02710 {
02711         FILE *fp = fopen(model_file_name,"rb");
02712         if(fp==NULL) return NULL;
02713         
02714         // read parameters
02715 
02716         svm_model *model = Malloc(svm_model,1);
02717         svm_parameter& param = model->param;
02718         model->rho = NULL;
02719         model->probA = NULL;
02720         model->probB = NULL;
02721         model->label = NULL;
02722         model->nSV = NULL;
02723 
02724         char cmd[81];
02725         while(1)
02726         {
02727                 fscanf(fp,"%80s",cmd);
02728 
02729                 if(strcmp(cmd,"svm_type")==0)
02730                 {
02731                         fscanf(fp,"%80s",cmd);
02732                         int i;
02733                         for(i=0;svm_type_table[i];i++)
02734                         {
02735                                 if(strcmp(svm_type_table[i],cmd)==0)
02736                                 {
02737                                         param.svm_type=i;
02738                                         break;
02739                                 }
02740                         }
02741                         if(svm_type_table[i] == NULL)
02742                         {
02743                                 fprintf(stderr,"unknown svm type.\n");
02744                                 free(model->rho);
02745                                 free(model->label);
02746                                 free(model->nSV);
02747                                 free(model);
02748                                 return NULL;
02749                         }
02750                 }
02751                 else if(strcmp(cmd,"kernel_type")==0)
02752                 {               
02753                         fscanf(fp,"%80s",cmd);
02754                         int i;
02755                         for(i=0;kernel_type_table[i];i++)
02756                         {
02757                                 if(strcmp(kernel_type_table[i],cmd)==0)
02758                                 {
02759                                         param.kernel_type=i;
02760                                         break;
02761                                 }
02762                         }
02763                         if(kernel_type_table[i] == NULL)
02764                         {
02765                                 fprintf(stderr,"unknown kernel function.\n");
02766                                 free(model->rho);
02767                                 free(model->label);
02768                                 free(model->nSV);
02769                                 free(model);
02770                                 return NULL;
02771                         }
02772                 }
02773                 else if(strcmp(cmd,"degree")==0)
02774                         fscanf(fp,"%d",&param.degree);
02775                 else if(strcmp(cmd,"gamma")==0)
02776                         fscanf(fp,"%lf",&param.gamma);
02777                 else if(strcmp(cmd,"coef0")==0)
02778                         fscanf(fp,"%lf",&param.coef0);
02779                 else if(strcmp(cmd,"nr_class")==0)
02780                         fscanf(fp,"%d",&model->nr_class);
02781                 else if(strcmp(cmd,"total_sv")==0)
02782                         fscanf(fp,"%d",&model->l);
02783                 else if(strcmp(cmd,"rho")==0)
02784                 {
02785                         int n = model->nr_class * (model->nr_class-1)/2;
02786                         model->rho = Malloc(double,n);
02787                         for(int i=0;i<n;i++)
02788                                 fscanf(fp,"%lf",&model->rho[i]);
02789                 }
02790                 else if(strcmp(cmd,"label")==0)
02791                 {
02792                         int n = model->nr_class;
02793                         model->label = Malloc(int,n);
02794                         for(int i=0;i<n;i++)
02795                                 fscanf(fp,"%d",&model->label[i]);
02796                 }
02797                 else if(strcmp(cmd,"probA")==0)
02798                 {
02799                         int n = model->nr_class * (model->nr_class-1)/2;
02800                         model->probA = Malloc(double,n);
02801                         for(int i=0;i<n;i++)
02802                                 fscanf(fp,"%lf",&model->probA[i]);
02803                 }
02804                 else if(strcmp(cmd,"probB")==0)
02805                 {
02806                         int n = model->nr_class * (model->nr_class-1)/2;
02807                         model->probB = Malloc(double,n);
02808                         for(int i=0;i<n;i++)
02809                                 fscanf(fp,"%lf",&model->probB[i]);
02810                 }
02811                 else if(strcmp(cmd,"nr_sv")==0)
02812                 {
02813                         int n = model->nr_class;
02814                         model->nSV = Malloc(int,n);
02815                         for(int i=0;i<n;i++)
02816                                 fscanf(fp,"%d",&model->nSV[i]);
02817                 }
02818                 else if(strcmp(cmd,"SV")==0)
02819                 {
02820                         while(1)
02821                         {
02822                                 int c = getc(fp);
02823                                 if(c==EOF || c=='\n') break;    
02824                         }
02825                         break;
02826                 }
02827                 else
02828                 {
02829                         fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
02830                         free(model->rho);
02831                         free(model->label);
02832                         free(model->nSV);
02833                         free(model);
02834                         return NULL;
02835                 }
02836         }
02837 
02838         // read sv_coef and SV
02839 
02840         int elements = 0;
02841         long pos = ftell(fp);
02842 
02843         max_line_len = 1024;
02844         line = Malloc(char,max_line_len);
02845         char *p,*endptr,*idx,*val;
02846 
02847         while(readline(fp)!=NULL)
02848         {
02849                 p = strtok(line,":");
02850                 while(1)
02851                 {
02852                         p = strtok(NULL,":");
02853                         if(p == NULL)
02854                                 break;
02855                         ++elements;
02856                 }
02857         }
02858         elements += model->l;
02859 
02860         fseek(fp,pos,SEEK_SET);
02861 
02862         int m = model->nr_class - 1;
02863         int l = model->l;
02864         model->sv_coef = Malloc(double *,m);
02865         int i;
02866         for(i=0;i<m;i++)
02867                 model->sv_coef[i] = Malloc(double,l);
02868         model->SV = Malloc(svm_node*,l);
02869         svm_node *x_space = NULL;
02870         if(l>0) x_space = Malloc(svm_node,elements);
02871 
02872         int j=0;
02873         for(i=0;i<l;i++)
02874         {
02875                 readline(fp);
02876                 model->SV[i] = &x_space[j];
02877 
02878                 p = strtok(line, " \t");
02879                 model->sv_coef[0][i] = strtod(p,&endptr);
02880                 for(int k=1;k<m;k++)
02881                 {
02882                         p = strtok(NULL, " \t");
02883                         model->sv_coef[k][i] = strtod(p,&endptr);
02884                 }
02885 
02886                 while(1)
02887                 {
02888                         idx = strtok(NULL, ":");
02889                         val = strtok(NULL, " \t");
02890 
02891                         if(val == NULL)
02892                                 break;
02893                         x_space[j].index = (int) strtol(idx,&endptr,10);
02894                         x_space[j].value = strtod(val,&endptr);
02895 
02896                         ++j;
02897                 }
02898                 x_space[j++].index = -1;
02899         }
02900         free(line);
02901 
02902         if (ferror(fp) != 0 || fclose(fp) != 0)
02903                 return NULL;
02904 
02905         model->free_sv = 1;     // XXX
02906         return model;
02907 }
02908 
02909 void svm_free_model_content(svm_model* model_ptr)
02910 {
02911         if(model_ptr->free_sv && model_ptr->l > 0)
02912                 free((void *)(model_ptr->SV[0]));
02913         for(int i=0;i<model_ptr->nr_class-1;i++)
02914                 free(model_ptr->sv_coef[i]);
02915         free(model_ptr->SV);
02916         free(model_ptr->sv_coef);
02917         free(model_ptr->rho);
02918         free(model_ptr->label);
02919         free(model_ptr->probA);
02920         free(model_ptr->probB);
02921         free(model_ptr->nSV);
02922         free(model_ptr->SVidx);
02923 }
02924 
02925 void svm_free_and_destroy_model(svm_model** model_ptr_ptr)
02926 {
02927         svm_model* model_ptr = *model_ptr_ptr;
02928         if(model_ptr != NULL)
02929         {
02930                 svm_free_model_content(model_ptr);
02931                 free(model_ptr);
02932         }
02933 }
02934 
02935 void svm_destroy_model(svm_model* model_ptr)
02936 {
02937         fprintf(stderr,"warning: svm_destroy_model is deprecated and should not be used. Please use svm_free_and_destroy_model(svm_model **model_ptr_ptr)\n");
02938         svm_free_and_destroy_model(&model_ptr);
02939 }
02940 
02941 void svm_destroy_param(svm_parameter* param)
02942 {
02943         free(param->weight_label);
02944         free(param->weight);
02945 }
02946 
02947 const char *svm_check_parameter(const svm_problem *prob, const svm_parameter *param)
02948 {
02949         // svm_type
02950 
02951         int svm_type = param->svm_type;
02952         if(svm_type != C_SVC &&
02953            svm_type != NU_SVC &&
02954            svm_type != ONE_CLASS &&
02955            svm_type != EPSILON_SVR &&
02956            svm_type != NU_SVR)
02957                 return "unknown svm type";
02958         
02959         // kernel_type, degree
02960         
02961         int kernel_type = param->kernel_type;
02962         if(kernel_type != LINEAR &&
02963            kernel_type != POLY &&
02964            kernel_type != RBF &&
02965            kernel_type != SIGMOID &&
02966            kernel_type != PRECOMPUTED)
02967                 return "unknown kernel type";
02968 
02969         if(param->gamma < 0)
02970                 return "gamma < 0";
02971 
02972         if(param->degree < 0)
02973                 return "degree of polynomial kernel < 0";
02974 
02975         // cache_size,eps,C,nu,p,shrinking
02976 
02977         if(param->cache_size <= 0)
02978                 return "cache_size <= 0";
02979 
02980         if(param->eps <= 0)
02981                 return "eps <= 0";
02982 
02983         if(svm_type == C_SVC ||
02984            svm_type == EPSILON_SVR ||
02985            svm_type == NU_SVR)
02986                 if(param->C <= 0)
02987                         return "C <= 0";
02988 
02989         if(svm_type == NU_SVC ||
02990            svm_type == ONE_CLASS ||
02991            svm_type == NU_SVR)
02992                 if(param->nu <= 0 || param->nu > 1)
02993                         return "nu <= 0 or nu > 1";
02994 
02995         if(svm_type == EPSILON_SVR)
02996                 if(param->p < 0)
02997                         return "p < 0";
02998 
02999         if(param->shrinking != 0 &&
03000            param->shrinking != 1)
03001                 return "shrinking != 0 and shrinking != 1";
03002 
03003         if(param->probability != 0 &&
03004            param->probability != 1)
03005                 return "probability != 0 and probability != 1";
03006 
03007         if(param->probability == 1 &&
03008            svm_type == ONE_CLASS)
03009                 return "one-class SVM probability output not supported yet";
03010 
03011 
03012         // check whether nu-svc is feasible
03013         
03014         if(svm_type == NU_SVC)
03015         {
03016                 int l = prob->l;
03017                 int max_nr_class = 16;
03018                 int nr_class = 0;
03019                 int *label = Malloc(int,max_nr_class);
03020                 int *count = Malloc(int,max_nr_class);
03021 
03022                 int i;
03023                 for(i=0;i<l;i++)
03024                 {
03025                         int this_label = (int)prob->y[i];
03026                         int j;
03027                         for(j=0;j<nr_class;j++)
03028                                 if(this_label == label[j])
03029                                 {
03030                                         ++count[j];
03031                                         break;
03032                                 }
03033                         if(j == nr_class)
03034                         {
03035                                 if(nr_class == max_nr_class)
03036                                 {
03037                                         max_nr_class *= 2;
03038                                         label = (int *)realloc(label,max_nr_class*sizeof(int));
03039                                         count = (int *)realloc(count,max_nr_class*sizeof(int));
03040                                 }
03041                                 label[nr_class] = this_label;
03042                                 count[nr_class] = 1;
03043                                 ++nr_class;
03044                         }
03045                 }
03046         
03047                 for(i=0;i<nr_class;i++)
03048                 {
03049                         int n1 = count[i];
03050                         for(int j=i+1;j<nr_class;j++)
03051                         {
03052                                 int n2 = count[j];
03053                                 if(param->nu*(n1+n2)/2 > min(n1,n2))
03054                                 {
03055                                         free(label);
03056                                         free(count);
03057                                         return "specified nu is infeasible";
03058                                 }
03059                         }
03060                 }
03061                 free(label);
03062                 free(count);
03063         }
03064 
03065         return NULL;
03066 }
03067 
03068 int svm_check_probability_model(const svm_model *model)
03069 {
03070         return ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
03071                 model->probA!=NULL && model->probB!=NULL) ||
03072                 ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
03073                  model->probA!=NULL);
03074 }
03075 
03076 void svm_set_print_string_function(void (*print_func)(const char *))
03077 {
03078         if(print_func == NULL)
03079                 svm_print_string = &print_string_stdout;
03080         else
03081                 svm_print_string = print_func;
03082 }