haf_grasping: svm.cpp Source File

Go to the documentation of this file.
00001 #include <math.h>
00002 #include <stdio.h>
00003 #include <stdlib.h>
00004 #include <ctype.h>
00005 #include <float.h>
00006 #include <string.h>
00007 #include <stdarg.h>
00008 #include <limits.h>
00009 #include <locale.h>
00010 #include "svm.h"
00011 int libsvm_version = LIBSVM_VERSION;
00012 typedef float Qfloat;
00013 typedef signed char schar;
00014 #ifndef min
00015 template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
00016 #endif
00017 #ifndef max
00018 template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
00019 #endif
00020 template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
00021 template <class S, class T> static inline void clone(T*& dst, S* src, int n)
00022 {
00023         dst = new T[n];
00024         memcpy((void *)dst,(void *)src,sizeof(T)*n);
00025 }
00026 static inline double powi(double base, int times)
00027 {
00028         double tmp = base, ret = 1.0;
00029 
00030         for(int t=times; t>0; t/=2)
00031         {
00032                 if(t%2==1) ret*=tmp;
00033                 tmp = tmp * tmp;
00034         }
00035         return ret;
00036 }
00037 #define INF HUGE_VAL
00038 #define TAU 1e-12
00039 #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
00040 
00041 static void print_string_stdout(const char *s)
00042 {
00043         fputs(s,stdout);
00044         fflush(stdout);
00045 }
00046 static void (*svm_print_string) (const char *) = &print_string_stdout;
00047 #if 1
00048 static void info(const char *fmt,...)
00049 {
00050         char buf[BUFSIZ];
00051         va_list ap;
00052         va_start(ap,fmt);
00053         vsprintf(buf,fmt,ap);
00054         va_end(ap);
00055         (*svm_print_string)(buf);
00056 }
00057 #else
00058 static void info(const char *fmt,...) {}
00059 #endif
00060 
00061 //
00062 // Kernel Cache
00063 //
00064 // l is the number of total data items
00065 // size is the cache size limit in bytes
00066 //
00067 class Cache
00068 {
00069 public:
00070         Cache(int l,long int size);
00071         ~Cache();
00072 
00073         // request data [0,len)
00074         // return some position p where [p,len) need to be filled
00075         // (p >= len if nothing needs to be filled)
00076         int get_data(const int index, Qfloat **data, int len);
00077         void swap_index(int i, int j);  
00078 private:
00079         int l;
00080         long int size;
00081         struct head_t
00082         {
00083                 head_t *prev, *next;    // a circular list
00084                 Qfloat *data;
00085                 int len;                // data[0,len) is cached in this entry
00086         };
00087 
00088         head_t *head;
00089         head_t lru_head;
00090         void lru_delete(head_t *h);
00091         void lru_insert(head_t *h);
00092 };
00093 
00094 Cache::Cache(int l_,long int size_):l(l_),size(size_)
00095 {
00096         head = (head_t *)calloc(l,sizeof(head_t));      // initialized to 0
00097         size /= sizeof(Qfloat);
00098         size -= l * sizeof(head_t) / sizeof(Qfloat);
00099         size = max(size, 2 * (long int) l);     // cache must be large enough for two columns
00100         lru_head.next = lru_head.prev = &lru_head;
00101 }
00102 
00103 Cache::~Cache()
00104 {
00105         for(head_t *h = lru_head.next; h != &lru_head; h=h->next)
00106                 free(h->data);
00107         free(head);
00108 }
00109 
00110 void Cache::lru_delete(head_t *h)
00111 {
00112         // delete from current location
00113         h->prev->next = h->next;
00114         h->next->prev = h->prev;
00115 }
00116 
00117 void Cache::lru_insert(head_t *h)
00118 {
00119         // insert to last position
00120         h->next = &lru_head;
00121         h->prev = lru_head.prev;
00122         h->prev->next = h;
00123         h->next->prev = h;
00124 }
00125 
00126 int Cache::get_data(const int index, Qfloat **data, int len)
00127 {
00128         head_t *h = &head[index];
00129         if(h->len) lru_delete(h);
00130         int more = len - h->len;
00131 
00132         if(more > 0)
00133         {
00134                 // free old space
00135                 while(size < more)
00136                 {
00137                         head_t *old = lru_head.next;
00138                         lru_delete(old);
00139                         free(old->data);
00140                         size += old->len;
00141                         old->data = 0;
00142                         old->len = 0;
00143                 }
00144 
00145                 // allocate new space
00146                 h->data = (Qfloat *)realloc(h->data,sizeof(Qfloat)*len);
00147                 size -= more;
00148                 swap(h->len,len);
00149         }
00150 
00151         lru_insert(h);
00152         *data = h->data;
00153         return len;
00154 }
00155 
00156 void Cache::swap_index(int i, int j)
00157 {
00158         if(i==j) return;
00159 
00160         if(head[i].len) lru_delete(&head[i]);
00161         if(head[j].len) lru_delete(&head[j]);
00162         swap(head[i].data,head[j].data);
00163         swap(head[i].len,head[j].len);
00164         if(head[i].len) lru_insert(&head[i]);
00165         if(head[j].len) lru_insert(&head[j]);
00166 
00167         if(i>j) swap(i,j);
00168         for(head_t *h = lru_head.next; h!=&lru_head; h=h->next)
00169         {
00170                 if(h->len > i)
00171                 {
00172                         if(h->len > j)
00173                                 swap(h->data[i],h->data[j]);
00174                         else
00175                         {
00176                                 // give up
00177                                 lru_delete(h);
00178                                 free(h->data);
00179                                 size += h->len;
00180                                 h->data = 0;
00181                                 h->len = 0;
00182                         }
00183                 }
00184         }
00185 }
00186 
00187 //
00188 // Kernel evaluation
00189 //
00190 // the static method k_function is for doing single kernel evaluation
00191 // the constructor of Kernel prepares to calculate the l*l kernel matrix
00192 // the member function get_Q is for getting one column from the Q Matrix
00193 //
00194 class QMatrix {
00195 public:
00196         virtual Qfloat *get_Q(int column, int len) const = 0;
00197         virtual double *get_QD() const = 0;
00198         virtual void swap_index(int i, int j) const = 0;
00199         virtual ~QMatrix() {}
00200 };
00201 
00202 class Kernel: public QMatrix {
00203 public:
00204         Kernel(int l, svm_node * const * x, const svm_parameter& param);
00205         virtual ~Kernel();
00206 
00207         static double k_function(const svm_node *x, const svm_node *y,
00208                                  const svm_parameter& param);
00209         virtual Qfloat *get_Q(int column, int len) const = 0;
00210         virtual double *get_QD() const = 0;
00211         virtual void swap_index(int i, int j) const     // no so const...
00212         {
00213                 swap(x[i],x[j]);
00214                 if(x_square) swap(x_square[i],x_square[j]);
00215         }
00216 protected:
00217 
00218         double (Kernel::*kernel_function)(int i, int j) const;
00219 
00220 private:
00221         const svm_node **x;
00222         double *x_square;
00223 
00224         // svm_parameter
00225         const int kernel_type;
00226         const int degree;
00227         const double gamma;
00228         const double coef0;
00229 
00230         static double dot(const svm_node *px, const svm_node *py);
00231         double kernel_linear(int i, int j) const
00232         {
00233                 return dot(x[i],x[j]);
00234         }
00235         double kernel_poly(int i, int j) const
00236         {
00237                 return powi(gamma*dot(x[i],x[j])+coef0,degree);
00238         }
00239         double kernel_rbf(int i, int j) const
00240         {
00241                 return exp(-gamma*(x_square[i]+x_square[j]-2*dot(x[i],x[j])));
00242         }
00243         double kernel_sigmoid(int i, int j) const
00244         {
00245                 return tanh(gamma*dot(x[i],x[j])+coef0);
00246         }
00247         double kernel_precomputed(int i, int j) const
00248         {
00249                 return x[i][(int)(x[j][0].value)].value;
00250         }
00251 };
00252 
00253 Kernel::Kernel(int l, svm_node * const * x_, const svm_parameter& param)
00254 :kernel_type(param.kernel_type), degree(param.degree),
00255  gamma(param.gamma), coef0(param.coef0)
00256 {
00257         switch(kernel_type)
00258         {
00259                 case LINEAR:
00260                         kernel_function = &Kernel::kernel_linear;
00261                         break;
00262                 case POLY:
00263                         kernel_function = &Kernel::kernel_poly;
00264                         break;
00265                 case RBF:
00266                         kernel_function = &Kernel::kernel_rbf;
00267                         break;
00268                 case SIGMOID:
00269                         kernel_function = &Kernel::kernel_sigmoid;
00270                         break;
00271                 case PRECOMPUTED:
00272                         kernel_function = &Kernel::kernel_precomputed;
00273                         break;
00274         }
00275 
00276         clone(x,x_,l);
00277 
00278         if(kernel_type == RBF)
00279         {
00280                 x_square = new double[l];
00281                 for(int i=0;i<l;i++)
00282                         x_square[i] = dot(x[i],x[i]);
00283         }
00284         else
00285                 x_square = 0;
00286 }
00287 
00288 Kernel::~Kernel()
00289 {
00290         delete[] x;
00291         delete[] x_square;
00292 }
00293 
00294 double Kernel::dot(const svm_node *px, const svm_node *py)
00295 {
00296         double sum = 0;
00297         while(px->index != -1 && py->index != -1)
00298         {
00299                 if(px->index == py->index)
00300                 {
00301                         sum += px->value * py->value;
00302                         ++px;
00303                         ++py;
00304                 }
00305                 else
00306                 {
00307                         if(px->index > py->index)
00308                                 ++py;
00309                         else
00310                                 ++px;
00311                 }                       
00312         }
00313         return sum;
00314 }
00315 
00316 double Kernel::k_function(const svm_node *x, const svm_node *y,
00317                           const svm_parameter& param)
00318 {
00319         switch(param.kernel_type)
00320         {
00321                 case LINEAR:
00322                         return dot(x,y);
00323                 case POLY:
00324                         return powi(param.gamma*dot(x,y)+param.coef0,param.degree);
00325                 case RBF:
00326                 {
00327                         double sum = 0;
00328                         while(x->index != -1 && y->index !=-1)
00329                         {
00330                                 if(x->index == y->index)
00331                                 {
00332                                         double d = x->value - y->value;
00333                                         sum += d*d;
00334                                         ++x;
00335                                         ++y;
00336                                 }
00337                                 else
00338                                 {
00339                                         if(x->index > y->index)
00340                                         {       
00341                                                 sum += y->value * y->value;
00342                                                 ++y;
00343                                         }
00344                                         else
00345                                         {
00346                                                 sum += x->value * x->value;
00347                                                 ++x;
00348                                         }
00349                                 }
00350                         }
00351 
00352                         while(x->index != -1)
00353                         {
00354                                 sum += x->value * x->value;
00355                                 ++x;
00356                         }
00357 
00358                         while(y->index != -1)
00359                         {
00360                                 sum += y->value * y->value;
00361                                 ++y;
00362                         }
00363                         
00364                         return exp(-param.gamma*sum);
00365                 }
00366                 case SIGMOID:
00367                         return tanh(param.gamma*dot(x,y)+param.coef0);
00368                 case PRECOMPUTED:  //x: test (validation), y: SV
00369                         return x[(int)(y->value)].value;
00370                 default:
00371                         return 0;  // Unreachable 
00372         }
00373 }
00374 
00375 // An SMO algorithm in Fan et al., JMLR 6(2005), p. 1889--1918
00376 // Solves:
00377 //
00378 //      min 0.5(\alpha^T Q \alpha) + p^T \alpha
00379 //
00380 //              y^T \alpha = \delta
00381 //              y_i = +1 or -1
00382 //              0 <= alpha_i <= Cp for y_i = 1
00383 //              0 <= alpha_i <= Cn for y_i = -1
00384 //
00385 // Given:
00386 //
00387 //      Q, p, y, Cp, Cn, and an initial feasible point \alpha
00388 //      l is the size of vectors and matrices
00389 //      eps is the stopping tolerance
00390 //
00391 // solution will be put in \alpha, objective value will be put in obj
00392 //
00393 class Solver {
00394 public:
00395         Solver() {};
00396         virtual ~Solver() {};
00397 
00398         struct SolutionInfo {
00399                 double obj;
00400                 double rho;
00401                 double upper_bound_p;
00402                 double upper_bound_n;
00403                 double r;       // for Solver_NU
00404         };
00405 
00406         void Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
00407                    double *alpha_, double Cp, double Cn, double eps,
00408                    SolutionInfo* si, int shrinking);
00409 protected:
00410         int active_size;
00411         schar *y;
00412         double *G;              // gradient of objective function
00413         enum { LOWER_BOUND, UPPER_BOUND, FREE };
00414         char *alpha_status;     // LOWER_BOUND, UPPER_BOUND, FREE
00415         double *alpha;
00416         const QMatrix *Q;
00417         const double *QD;
00418         double eps;
00419         double Cp,Cn;
00420         double *p;
00421         int *active_set;
00422         double *G_bar;          // gradient, if we treat free variables as 0
00423         int l;
00424         bool unshrink;  // XXX
00425 
00426         double get_C(int i)
00427         {
00428                 return (y[i] > 0)? Cp : Cn;
00429         }
00430         void update_alpha_status(int i)
00431         {
00432                 if(alpha[i] >= get_C(i))
00433                         alpha_status[i] = UPPER_BOUND;
00434                 else if(alpha[i] <= 0)
00435                         alpha_status[i] = LOWER_BOUND;
00436                 else alpha_status[i] = FREE;
00437         }
00438         bool is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; }
00439         bool is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; }
00440         bool is_free(int i) { return alpha_status[i] == FREE; }
00441         void swap_index(int i, int j);
00442         void reconstruct_gradient();
00443         virtual int select_working_set(int &i, int &j);
00444         virtual double calculate_rho();
00445         virtual void do_shrinking();
00446 private:
00447         bool be_shrunk(int i, double Gmax1, double Gmax2);      
00448 };
00449 
00450 void Solver::swap_index(int i, int j)
00451 {
00452         Q->swap_index(i,j);
00453         swap(y[i],y[j]);
00454         swap(G[i],G[j]);
00455         swap(alpha_status[i],alpha_status[j]);
00456         swap(alpha[i],alpha[j]);
00457         swap(p[i],p[j]);
00458         swap(active_set[i],active_set[j]);
00459         swap(G_bar[i],G_bar[j]);
00460 }
00461 
00462 void Solver::reconstruct_gradient()
00463 {
00464         // reconstruct inactive elements of G from G_bar and free variables
00465 
00466         if(active_size == l) return;
00467 
00468         int i,j;
00469         int nr_free = 0;
00470 
00471         for(j=active_size;j<l;j++)
00472                 G[j] = G_bar[j] + p[j];
00473 
00474         for(j=0;j<active_size;j++)
00475                 if(is_free(j))
00476                         nr_free++;
00477 
00478         if(2*nr_free < active_size)
00479                 info("\nWARNING: using -h 0 may be faster\n");
00480 
00481         if (nr_free*l > 2*active_size*(l-active_size))
00482         {
00483                 for(i=active_size;i<l;i++)
00484                 {
00485                         const Qfloat *Q_i = Q->get_Q(i,active_size);
00486                         for(j=0;j<active_size;j++)
00487                                 if(is_free(j))
00488                                         G[i] += alpha[j] * Q_i[j];
00489                 }
00490         }
00491         else
00492         {
00493                 for(i=0;i<active_size;i++)
00494                         if(is_free(i))
00495                         {
00496                                 const Qfloat *Q_i = Q->get_Q(i,l);
00497                                 double alpha_i = alpha[i];
00498                                 for(j=active_size;j<l;j++)
00499                                         G[j] += alpha_i * Q_i[j];
00500                         }
00501         }
00502 }
00503 
00504 void Solver::Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
00505                    double *alpha_, double Cp, double Cn, double eps,
00506                    SolutionInfo* si, int shrinking)
00507 {
00508         this->l = l;
00509         this->Q = &Q;
00510         QD=Q.get_QD();
00511         clone(p, p_,l);
00512         clone(y, y_,l);
00513         clone(alpha,alpha_,l);
00514         this->Cp = Cp;
00515         this->Cn = Cn;
00516         this->eps = eps;
00517         unshrink = false;
00518 
00519         // initialize alpha_status
00520         {
00521                 alpha_status = new char[l];
00522                 for(int i=0;i<l;i++)
00523                         update_alpha_status(i);
00524         }
00525 
00526         // initialize active set (for shrinking)
00527         {
00528                 active_set = new int[l];
00529                 for(int i=0;i<l;i++)
00530                         active_set[i] = i;
00531                 active_size = l;
00532         }
00533 
00534         // initialize gradient
00535         {
00536                 G = new double[l];
00537                 G_bar = new double[l];
00538                 int i;
00539                 for(i=0;i<l;i++)
00540                 {
00541                         G[i] = p[i];
00542                         G_bar[i] = 0;
00543                 }
00544                 for(i=0;i<l;i++)
00545                         if(!is_lower_bound(i))
00546                         {
00547                                 const Qfloat *Q_i = Q.get_Q(i,l);
00548                                 double alpha_i = alpha[i];
00549                                 int j;
00550                                 for(j=0;j<l;j++)
00551                                         G[j] += alpha_i*Q_i[j];
00552                                 if(is_upper_bound(i))
00553                                         for(j=0;j<l;j++)
00554                                                 G_bar[j] += get_C(i) * Q_i[j];
00555                         }
00556         }
00557 
00558         // optimization step
00559 
00560         int iter = 0;
00561         int max_iter = max(10000000, l>INT_MAX/100 ? INT_MAX : 100*l);
00562         int counter = min(l,1000)+1;
00563         
00564         while(iter < max_iter)
00565         {
00566                 // show progress and do shrinking
00567 
00568                 if(--counter == 0)
00569                 {
00570                         counter = min(l,1000);
00571                         if(shrinking) do_shrinking();
00572                         info(".");
00573                 }
00574 
00575                 int i,j;
00576                 if(select_working_set(i,j)!=0)
00577                 {
00578                         // reconstruct the whole gradient
00579                         reconstruct_gradient();
00580                         // reset active set size and check
00581                         active_size = l;
00582                         info("*");
00583                         if(select_working_set(i,j)!=0)
00584                                 break;
00585                         else
00586                                 counter = 1;    // do shrinking next iteration
00587                 }
00588                 
00589                 ++iter;
00590 
00591                 // update alpha[i] and alpha[j], handle bounds carefully
00592                 
00593                 const Qfloat *Q_i = Q.get_Q(i,active_size);
00594                 const Qfloat *Q_j = Q.get_Q(j,active_size);
00595 
00596                 double C_i = get_C(i);
00597                 double C_j = get_C(j);
00598 
00599                 double old_alpha_i = alpha[i];
00600                 double old_alpha_j = alpha[j];
00601 
00602                 if(y[i]!=y[j])
00603                 {
00604                         double quad_coef = QD[i]+QD[j]+2*Q_i[j];
00605                         if (quad_coef <= 0)
00606                                 quad_coef = TAU;
00607                         double delta = (-G[i]-G[j])/quad_coef;
00608                         double diff = alpha[i] - alpha[j];
00609                         alpha[i] += delta;
00610                         alpha[j] += delta;
00611                         
00612                         if(diff > 0)
00613                         {
00614                                 if(alpha[j] < 0)
00615                                 {
00616                                         alpha[j] = 0;
00617                                         alpha[i] = diff;
00618                                 }
00619                         }
00620                         else
00621                         {
00622                                 if(alpha[i] < 0)
00623                                 {
00624                                         alpha[i] = 0;
00625                                         alpha[j] = -diff;
00626                                 }
00627                         }
00628                         if(diff > C_i - C_j)
00629                         {
00630                                 if(alpha[i] > C_i)
00631                                 {
00632                                         alpha[i] = C_i;
00633                                         alpha[j] = C_i - diff;
00634                                 }
00635                         }
00636                         else
00637                         {
00638                                 if(alpha[j] > C_j)
00639                                 {
00640                                         alpha[j] = C_j;
00641                                         alpha[i] = C_j + diff;
00642                                 }
00643                         }
00644                 }
00645                 else
00646                 {
00647                         double quad_coef = QD[i]+QD[j]-2*Q_i[j];
00648                         if (quad_coef <= 0)
00649                                 quad_coef = TAU;
00650                         double delta = (G[i]-G[j])/quad_coef;
00651                         double sum = alpha[i] + alpha[j];
00652                         alpha[i] -= delta;
00653                         alpha[j] += delta;
00654 
00655                         if(sum > C_i)
00656                         {
00657                                 if(alpha[i] > C_i)
00658                                 {
00659                                         alpha[i] = C_i;
00660                                         alpha[j] = sum - C_i;
00661                                 }
00662                         }
00663                         else
00664                         {
00665                                 if(alpha[j] < 0)
00666                                 {
00667                                         alpha[j] = 0;
00668                                         alpha[i] = sum;
00669                                 }
00670                         }
00671                         if(sum > C_j)
00672                         {
00673                                 if(alpha[j] > C_j)
00674                                 {
00675                                         alpha[j] = C_j;
00676                                         alpha[i] = sum - C_j;
00677                                 }
00678                         }
00679                         else
00680                         {
00681                                 if(alpha[i] < 0)
00682                                 {
00683                                         alpha[i] = 0;
00684                                         alpha[j] = sum;
00685                                 }
00686                         }
00687                 }
00688 
00689                 // update G
00690 
00691                 double delta_alpha_i = alpha[i] - old_alpha_i;
00692                 double delta_alpha_j = alpha[j] - old_alpha_j;
00693                 
00694                 for(int k=0;k<active_size;k++)
00695                 {
00696                         G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j;
00697                 }
00698 
00699                 // update alpha_status and G_bar
00700 
00701                 {
00702                         bool ui = is_upper_bound(i);
00703                         bool uj = is_upper_bound(j);
00704                         update_alpha_status(i);
00705                         update_alpha_status(j);
00706                         int k;
00707                         if(ui != is_upper_bound(i))
00708                         {
00709                                 Q_i = Q.get_Q(i,l);
00710                                 if(ui)
00711                                         for(k=0;k<l;k++)
00712                                                 G_bar[k] -= C_i * Q_i[k];
00713                                 else
00714                                         for(k=0;k<l;k++)
00715                                                 G_bar[k] += C_i * Q_i[k];
00716                         }
00717 
00718                         if(uj != is_upper_bound(j))
00719                         {
00720                                 Q_j = Q.get_Q(j,l);
00721                                 if(uj)
00722                                         for(k=0;k<l;k++)
00723                                                 G_bar[k] -= C_j * Q_j[k];
00724                                 else
00725                                         for(k=0;k<l;k++)
00726                                                 G_bar[k] += C_j * Q_j[k];
00727                         }
00728                 }
00729         }
00730 
00731         if(iter >= max_iter)
00732         {
00733                 if(active_size < l)
00734                 {
00735                         // reconstruct the whole gradient to calculate objective value
00736                         reconstruct_gradient();
00737                         active_size = l;
00738                         info("*");
00739                 }
00740                 info("\nWARNING: reaching max number of iterations");
00741         }
00742 
00743         // calculate rho
00744 
00745         si->rho = calculate_rho();
00746 
00747         // calculate objective value
00748         {
00749                 double v = 0;
00750                 int i;
00751                 for(i=0;i<l;i++)
00752                         v += alpha[i] * (G[i] + p[i]);
00753 
00754                 si->obj = v/2;
00755         }
00756 
00757         // put back the solution
00758         {
00759                 for(int i=0;i<l;i++)
00760                         alpha_[active_set[i]] = alpha[i];
00761         }
00762 
00763         // juggle everything back
00764         /*{
00765                 for(int i=0;i<l;i++)
00766                         while(active_set[i] != i)
00767                                 swap_index(i,active_set[i]);
00768                                 // or Q.swap_index(i,active_set[i]);
00769         }*/
00770 
00771         si->upper_bound_p = Cp;
00772         si->upper_bound_n = Cn;
00773 
00774         info("\noptimization finished, #iter = %d\n",iter);
00775 
00776         delete[] p;
00777         delete[] y;
00778         delete[] alpha;
00779         delete[] alpha_status;
00780         delete[] active_set;
00781         delete[] G;
00782         delete[] G_bar;
00783 }
00784 
00785 // return 1 if already optimal, return 0 otherwise
00786 int Solver::select_working_set(int &out_i, int &out_j)
00787 {
00788         // return i,j such that
00789         // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
00790         // j: minimizes the decrease of obj value
00791         //    (if quadratic coefficeint <= 0, replace it with tau)
00792         //    -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
00793         
00794         double Gmax = -INF;
00795         double Gmax2 = -INF;
00796         int Gmax_idx = -1;
00797         int Gmin_idx = -1;
00798         double obj_diff_min = INF;
00799 
00800         for(int t=0;t<active_size;t++)
00801                 if(y[t]==+1)    
00802                 {
00803                         if(!is_upper_bound(t))
00804                                 if(-G[t] >= Gmax)
00805                                 {
00806                                         Gmax = -G[t];
00807                                         Gmax_idx = t;
00808                                 }
00809                 }
00810                 else
00811                 {
00812                         if(!is_lower_bound(t))
00813                                 if(G[t] >= Gmax)
00814                                 {
00815                                         Gmax = G[t];
00816                                         Gmax_idx = t;
00817                                 }
00818                 }
00819 
00820         int i = Gmax_idx;
00821         const Qfloat *Q_i = NULL;
00822         if(i != -1) // NULL Q_i not accessed: Gmax=-INF if i=-1
00823                 Q_i = Q->get_Q(i,active_size);
00824 
00825         for(int j=0;j<active_size;j++)
00826         {
00827                 if(y[j]==+1)
00828                 {
00829                         if (!is_lower_bound(j))
00830                         {
00831                                 double grad_diff=Gmax+G[j];
00832                                 if (G[j] >= Gmax2)
00833                                         Gmax2 = G[j];
00834                                 if (grad_diff > 0)
00835                                 {
00836                                         double obj_diff; 
00837                                         double quad_coef = QD[i]+QD[j]-2.0*y[i]*Q_i[j];
00838                                         if (quad_coef > 0)
00839                                                 obj_diff = -(grad_diff*grad_diff)/quad_coef;
00840                                         else
00841                                                 obj_diff = -(grad_diff*grad_diff)/TAU;
00842 
00843                                         if (obj_diff <= obj_diff_min)
00844                                         {
00845                                                 Gmin_idx=j;
00846                                                 obj_diff_min = obj_diff;
00847                                         }
00848                                 }
00849                         }
00850                 }
00851                 else
00852                 {
00853                         if (!is_upper_bound(j))
00854                         {
00855                                 double grad_diff= Gmax-G[j];
00856                                 if (-G[j] >= Gmax2)
00857                                         Gmax2 = -G[j];
00858                                 if (grad_diff > 0)
00859                                 {
00860                                         double obj_diff; 
00861                                         double quad_coef = QD[i]+QD[j]+2.0*y[i]*Q_i[j];
00862                                         if (quad_coef > 0)
00863                                                 obj_diff = -(grad_diff*grad_diff)/quad_coef;
00864                                         else
00865                                                 obj_diff = -(grad_diff*grad_diff)/TAU;
00866 
00867                                         if (obj_diff <= obj_diff_min)
00868                                         {
00869                                                 Gmin_idx=j;
00870                                                 obj_diff_min = obj_diff;
00871                                         }
00872                                 }
00873                         }
00874                 }
00875         }
00876 
00877         if(Gmax+Gmax2 < eps)
00878                 return 1;
00879 
00880         out_i = Gmax_idx;
00881         out_j = Gmin_idx;
00882         return 0;
00883 }
00884 
00885 bool Solver::be_shrunk(int i, double Gmax1, double Gmax2)
00886 {
00887         if(is_upper_bound(i))
00888         {
00889                 if(y[i]==+1)
00890                         return(-G[i] > Gmax1);
00891                 else
00892                         return(-G[i] > Gmax2);
00893         }
00894         else if(is_lower_bound(i))
00895         {
00896                 if(y[i]==+1)
00897                         return(G[i] > Gmax2);
00898                 else    
00899                         return(G[i] > Gmax1);
00900         }
00901         else
00902                 return(false);
00903 }
00904 
00905 void Solver::do_shrinking()
00906 {
00907         int i;
00908         double Gmax1 = -INF;            // max { -y_i * grad(f)_i | i in I_up(\alpha) }
00909         double Gmax2 = -INF;            // max { y_i * grad(f)_i | i in I_low(\alpha) }
00910 
00911         // find maximal violating pair first
00912         for(i=0;i<active_size;i++)
00913         {
00914                 if(y[i]==+1)    
00915                 {
00916                         if(!is_upper_bound(i))  
00917                         {
00918                                 if(-G[i] >= Gmax1)
00919                                         Gmax1 = -G[i];
00920                         }
00921                         if(!is_lower_bound(i))  
00922                         {
00923                                 if(G[i] >= Gmax2)
00924                                         Gmax2 = G[i];
00925                         }
00926                 }
00927                 else    
00928                 {
00929                         if(!is_upper_bound(i))  
00930                         {
00931                                 if(-G[i] >= Gmax2)
00932                                         Gmax2 = -G[i];
00933                         }
00934                         if(!is_lower_bound(i))  
00935                         {
00936                                 if(G[i] >= Gmax1)
00937                                         Gmax1 = G[i];
00938                         }
00939                 }
00940         }
00941 
00942         if(unshrink == false && Gmax1 + Gmax2 <= eps*10) 
00943         {
00944                 unshrink = true;
00945                 reconstruct_gradient();
00946                 active_size = l;
00947                 info("*");
00948         }
00949 
00950         for(i=0;i<active_size;i++)
00951                 if (be_shrunk(i, Gmax1, Gmax2))
00952                 {
00953                         active_size--;
00954                         while (active_size > i)
00955                         {
00956                                 if (!be_shrunk(active_size, Gmax1, Gmax2))
00957                                 {
00958                                         swap_index(i,active_size);
00959                                         break;
00960                                 }
00961                                 active_size--;
00962                         }
00963                 }
00964 }
00965 
00966 double Solver::calculate_rho()
00967 {
00968         double r;
00969         int nr_free = 0;
00970         double ub = INF, lb = -INF, sum_free = 0;
00971         for(int i=0;i<active_size;i++)
00972         {
00973                 double yG = y[i]*G[i];
00974 
00975                 if(is_upper_bound(i))
00976                 {
00977                         if(y[i]==-1)
00978                                 ub = min(ub,yG);
00979                         else
00980                                 lb = max(lb,yG);
00981                 }
00982                 else if(is_lower_bound(i))
00983                 {
00984                         if(y[i]==+1)
00985                                 ub = min(ub,yG);
00986                         else
00987                                 lb = max(lb,yG);
00988                 }
00989                 else
00990                 {
00991                         ++nr_free;
00992                         sum_free += yG;
00993                 }
00994         }
00995 
00996         if(nr_free>0)
00997                 r = sum_free/nr_free;
00998         else
00999                 r = (ub+lb)/2;
01000 
01001         return r;
01002 }
01003 
01004 //
01005 // Solver for nu-svm classification and regression
01006 //
01007 // additional constraint: e^T \alpha = constant
01008 //
01009 class Solver_NU : public Solver
01010 {
01011 public:
01012         Solver_NU() {}
01013         void Solve(int l, const QMatrix& Q, const double *p, const schar *y,
01014                    double *alpha, double Cp, double Cn, double eps,
01015                    SolutionInfo* si, int shrinking)
01016         {
01017                 this->si = si;
01018                 Solver::Solve(l,Q,p,y,alpha,Cp,Cn,eps,si,shrinking);
01019         }
01020 private:
01021         SolutionInfo *si;
01022         int select_working_set(int &i, int &j);
01023         double calculate_rho();
01024         bool be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4);
01025         void do_shrinking();
01026 };
01027 
01028 // return 1 if already optimal, return 0 otherwise
01029 int Solver_NU::select_working_set(int &out_i, int &out_j)
01030 {
01031         // return i,j such that y_i = y_j and
01032         // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
01033         // j: minimizes the decrease of obj value
01034         //    (if quadratic coefficeint <= 0, replace it with tau)
01035         //    -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
01036 
01037         double Gmaxp = -INF;
01038         double Gmaxp2 = -INF;
01039         int Gmaxp_idx = -1;
01040 
01041         double Gmaxn = -INF;
01042         double Gmaxn2 = -INF;
01043         int Gmaxn_idx = -1;
01044 
01045         int Gmin_idx = -1;
01046         double obj_diff_min = INF;
01047 
01048         for(int t=0;t<active_size;t++)
01049                 if(y[t]==+1)
01050                 {
01051                         if(!is_upper_bound(t))
01052                                 if(-G[t] >= Gmaxp)
01053                                 {
01054                                         Gmaxp = -G[t];
01055                                         Gmaxp_idx = t;
01056                                 }
01057                 }
01058                 else
01059                 {
01060                         if(!is_lower_bound(t))
01061                                 if(G[t] >= Gmaxn)
01062                                 {
01063                                         Gmaxn = G[t];
01064                                         Gmaxn_idx = t;
01065                                 }
01066                 }
01067 
01068         int ip = Gmaxp_idx;
01069         int in = Gmaxn_idx;
01070         const Qfloat *Q_ip = NULL;
01071         const Qfloat *Q_in = NULL;
01072         if(ip != -1) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1
01073                 Q_ip = Q->get_Q(ip,active_size);
01074         if(in != -1)
01075                 Q_in = Q->get_Q(in,active_size);
01076 
01077         for(int j=0;j<active_size;j++)
01078         {
01079                 if(y[j]==+1)
01080                 {
01081                         if (!is_lower_bound(j)) 
01082                         {
01083                                 double grad_diff=Gmaxp+G[j];
01084                                 if (G[j] >= Gmaxp2)
01085                                         Gmaxp2 = G[j];
01086                                 if (grad_diff > 0)
01087                                 {
01088                                         double obj_diff; 
01089                                         double quad_coef = QD[ip]+QD[j]-2*Q_ip[j];
01090                                         if (quad_coef > 0)
01091                                                 obj_diff = -(grad_diff*grad_diff)/quad_coef;
01092                                         else
01093                                                 obj_diff = -(grad_diff*grad_diff)/TAU;
01094 
01095                                         if (obj_diff <= obj_diff_min)
01096                                         {
01097                                                 Gmin_idx=j;
01098                                                 obj_diff_min = obj_diff;
01099                                         }
01100                                 }
01101                         }
01102                 }
01103                 else
01104                 {
01105                         if (!is_upper_bound(j))
01106                         {
01107                                 double grad_diff=Gmaxn-G[j];
01108                                 if (-G[j] >= Gmaxn2)
01109                                         Gmaxn2 = -G[j];
01110                                 if (grad_diff > 0)
01111                                 {
01112                                         double obj_diff; 
01113                                         double quad_coef = QD[in]+QD[j]-2*Q_in[j];
01114                                         if (quad_coef > 0)
01115                                                 obj_diff = -(grad_diff*grad_diff)/quad_coef;
01116                                         else
01117                                                 obj_diff = -(grad_diff*grad_diff)/TAU;
01118 
01119                                         if (obj_diff <= obj_diff_min)
01120                                         {
01121                                                 Gmin_idx=j;
01122                                                 obj_diff_min = obj_diff;
01123                                         }
01124                                 }
01125                         }
01126                 }
01127         }
01128 
01129         if(max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2) < eps)
01130                 return 1;
01131 
01132         if (y[Gmin_idx] == +1)
01133                 out_i = Gmaxp_idx;
01134         else
01135                 out_i = Gmaxn_idx;
01136         out_j = Gmin_idx;
01137 
01138         return 0;
01139 }
01140 
01141 bool Solver_NU::be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4)
01142 {
01143         if(is_upper_bound(i))
01144         {
01145                 if(y[i]==+1)
01146                         return(-G[i] > Gmax1);
01147                 else    
01148                         return(-G[i] > Gmax4);
01149         }
01150         else if(is_lower_bound(i))
01151         {
01152                 if(y[i]==+1)
01153                         return(G[i] > Gmax2);
01154                 else    
01155                         return(G[i] > Gmax3);
01156         }
01157         else
01158                 return(false);
01159 }
01160 
01161 void Solver_NU::do_shrinking()
01162 {
01163         double Gmax1 = -INF;    // max { -y_i * grad(f)_i | y_i = +1, i in I_up(\alpha) }
01164         double Gmax2 = -INF;    // max { y_i * grad(f)_i | y_i = +1, i in I_low(\alpha) }
01165         double Gmax3 = -INF;    // max { -y_i * grad(f)_i | y_i = -1, i in I_up(\alpha) }
01166         double Gmax4 = -INF;    // max { y_i * grad(f)_i | y_i = -1, i in I_low(\alpha) }
01167 
01168         // find maximal violating pair first
01169         int i;
01170         for(i=0;i<active_size;i++)
01171         {
01172                 if(!is_upper_bound(i))
01173                 {
01174                         if(y[i]==+1)
01175                         {
01176                                 if(-G[i] > Gmax1) Gmax1 = -G[i];
01177                         }
01178                         else    if(-G[i] > Gmax4) Gmax4 = -G[i];
01179                 }
01180                 if(!is_lower_bound(i))
01181                 {
01182                         if(y[i]==+1)
01183                         {       
01184                                 if(G[i] > Gmax2) Gmax2 = G[i];
01185                         }
01186                         else    if(G[i] > Gmax3) Gmax3 = G[i];
01187                 }
01188         }
01189 
01190         if(unshrink == false && max(Gmax1+Gmax2,Gmax3+Gmax4) <= eps*10) 
01191         {
01192                 unshrink = true;
01193                 reconstruct_gradient();
01194                 active_size = l;
01195         }
01196 
01197         for(i=0;i<active_size;i++)
01198                 if (be_shrunk(i, Gmax1, Gmax2, Gmax3, Gmax4))
01199                 {
01200                         active_size--;
01201                         while (active_size > i)
01202                         {
01203                                 if (!be_shrunk(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
01204                                 {
01205                                         swap_index(i,active_size);
01206                                         break;
01207                                 }
01208                                 active_size--;
01209                         }
01210                 }
01211 }
01212 
01213 double Solver_NU::calculate_rho()
01214 {
01215         int nr_free1 = 0,nr_free2 = 0;
01216         double ub1 = INF, ub2 = INF;
01217         double lb1 = -INF, lb2 = -INF;
01218         double sum_free1 = 0, sum_free2 = 0;
01219 
01220         for(int i=0;i<active_size;i++)
01221         {
01222                 if(y[i]==+1)
01223                 {
01224                         if(is_upper_bound(i))
01225                                 lb1 = max(lb1,G[i]);
01226                         else if(is_lower_bound(i))
01227                                 ub1 = min(ub1,G[i]);
01228                         else
01229                         {
01230                                 ++nr_free1;
01231                                 sum_free1 += G[i];
01232                         }
01233                 }
01234                 else
01235                 {
01236                         if(is_upper_bound(i))
01237                                 lb2 = max(lb2,G[i]);
01238                         else if(is_lower_bound(i))
01239                                 ub2 = min(ub2,G[i]);
01240                         else
01241                         {
01242                                 ++nr_free2;
01243                                 sum_free2 += G[i];
01244                         }
01245                 }
01246         }
01247 
01248         double r1,r2;
01249         if(nr_free1 > 0)
01250                 r1 = sum_free1/nr_free1;
01251         else
01252                 r1 = (ub1+lb1)/2;
01253         
01254         if(nr_free2 > 0)
01255                 r2 = sum_free2/nr_free2;
01256         else
01257                 r2 = (ub2+lb2)/2;
01258         
01259         si->r = (r1+r2)/2;
01260         return (r1-r2)/2;
01261 }
01262 
01263 //
01264 // Q matrices for various formulations
01265 //
01266 class SVC_Q: public Kernel
01267 { 
01268 public:
01269         SVC_Q(const svm_problem& prob, const svm_parameter& param, const schar *y_)
01270         :Kernel(prob.l, prob.x, param)
01271         {
01272                 clone(y,y_,prob.l);
01273                 cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
01274                 QD = new double[prob.l];
01275                 for(int i=0;i<prob.l;i++)
01276                         QD[i] = (this->*kernel_function)(i,i);
01277         }
01278         
01279         Qfloat *get_Q(int i, int len) const
01280         {
01281                 Qfloat *data;
01282                 int start, j;
01283                 if((start = cache->get_data(i,&data,len)) < len)
01284                 {
01285                         for(j=start;j<len;j++)
01286                                 data[j] = (Qfloat)(y[i]*y[j]*(this->*kernel_function)(i,j));
01287                 }
01288                 return data;
01289         }
01290 
01291         double *get_QD() const
01292         {
01293                 return QD;
01294         }
01295 
01296         void swap_index(int i, int j) const
01297         {
01298                 cache->swap_index(i,j);
01299                 Kernel::swap_index(i,j);
01300                 swap(y[i],y[j]);
01301                 swap(QD[i],QD[j]);
01302         }
01303 
01304         ~SVC_Q()
01305         {
01306                 delete[] y;
01307                 delete cache;
01308                 delete[] QD;
01309         }
01310 private:
01311         schar *y;
01312         Cache *cache;
01313         double *QD;
01314 };
01315 
01316 class ONE_CLASS_Q: public Kernel
01317 {
01318 public:
01319         ONE_CLASS_Q(const svm_problem& prob, const svm_parameter& param)
01320         :Kernel(prob.l, prob.x, param)
01321         {
01322                 cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
01323                 QD = new double[prob.l];
01324                 for(int i=0;i<prob.l;i++)
01325                         QD[i] = (this->*kernel_function)(i,i);
01326         }
01327         
01328         Qfloat *get_Q(int i, int len) const
01329         {
01330                 Qfloat *data;
01331                 int start, j;
01332                 if((start = cache->get_data(i,&data,len)) < len)
01333                 {
01334                         for(j=start;j<len;j++)
01335                                 data[j] = (Qfloat)(this->*kernel_function)(i,j);
01336                 }
01337                 return data;
01338         }
01339 
01340         double *get_QD() const
01341         {
01342                 return QD;
01343         }
01344 
01345         void swap_index(int i, int j) const
01346         {
01347                 cache->swap_index(i,j);
01348                 Kernel::swap_index(i,j);
01349                 swap(QD[i],QD[j]);
01350         }
01351 
01352         ~ONE_CLASS_Q()
01353         {
01354                 delete cache;
01355                 delete[] QD;
01356         }
01357 private:
01358         Cache *cache;
01359         double *QD;
01360 };
01361 
01362 class SVR_Q: public Kernel
01363 { 
01364 public:
01365         SVR_Q(const svm_problem& prob, const svm_parameter& param)
01366         :Kernel(prob.l, prob.x, param)
01367         {
01368                 l = prob.l;
01369                 cache = new Cache(l,(long int)(param.cache_size*(1<<20)));
01370                 QD = new double[2*l];
01371                 sign = new schar[2*l];
01372                 index = new int[2*l];
01373                 for(int k=0;k<l;k++)
01374                 {
01375                         sign[k] = 1;
01376                         sign[k+l] = -1;
01377                         index[k] = k;
01378                         index[k+l] = k;
01379                         QD[k] = (this->*kernel_function)(k,k);
01380                         QD[k+l] = QD[k];
01381                 }
01382                 buffer[0] = new Qfloat[2*l];
01383                 buffer[1] = new Qfloat[2*l];
01384                 next_buffer = 0;
01385         }
01386 
01387         void swap_index(int i, int j) const
01388         {
01389                 swap(sign[i],sign[j]);
01390                 swap(index[i],index[j]);
01391                 swap(QD[i],QD[j]);
01392         }
01393         
01394         Qfloat *get_Q(int i, int len) const
01395         {
01396                 Qfloat *data;
01397                 int j, real_i = index[i];
01398                 if(cache->get_data(real_i,&data,l) < l)
01399                 {
01400                         for(j=0;j<l;j++)
01401                                 data[j] = (Qfloat)(this->*kernel_function)(real_i,j);
01402                 }
01403 
01404                 // reorder and copy
01405                 Qfloat *buf = buffer[next_buffer];
01406                 next_buffer = 1 - next_buffer;
01407                 schar si = sign[i];
01408                 for(j=0;j<len;j++)
01409                         buf[j] = (Qfloat) si * (Qfloat) sign[j] * data[index[j]];
01410                 return buf;
01411         }
01412 
01413         double *get_QD() const
01414         {
01415                 return QD;
01416         }
01417 
01418         ~SVR_Q()
01419         {
01420                 delete cache;
01421                 delete[] sign;
01422                 delete[] index;
01423                 delete[] buffer[0];
01424                 delete[] buffer[1];
01425                 delete[] QD;
01426         }
01427 private:
01428         int l;
01429         Cache *cache;
01430         schar *sign;
01431         int *index;
01432         mutable int next_buffer;
01433         Qfloat *buffer[2];
01434         double *QD;
01435 };
01436 
01437 //
01438 // construct and solve various formulations
01439 //
01440 static void solve_c_svc(
01441         const svm_problem *prob, const svm_parameter* param,
01442         double *alpha, Solver::SolutionInfo* si, double Cp, double Cn)
01443 {
01444         int l = prob->l;
01445         double *minus_ones = new double[l];
01446         schar *y = new schar[l];
01447 
01448         int i;
01449 
01450         for(i=0;i<l;i++)
01451         {
01452                 alpha[i] = 0;
01453                 minus_ones[i] = -1;
01454                 if(prob->y[i] > 0) y[i] = +1; else y[i] = -1;
01455         }
01456 
01457         Solver s;
01458         s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y,
01459                 alpha, Cp, Cn, param->eps, si, param->shrinking);
01460 
01461         double sum_alpha=0;
01462         for(i=0;i<l;i++)
01463                 sum_alpha += alpha[i];
01464 
01465         if (Cp==Cn)
01466                 info("nu = %f\n", sum_alpha/(Cp*prob->l));
01467 
01468         for(i=0;i<l;i++)
01469                 alpha[i] *= y[i];
01470 
01471         delete[] minus_ones;
01472         delete[] y;
01473 }
01474 
01475 static void solve_nu_svc(
01476         const svm_problem *prob, const svm_parameter *param,
01477         double *alpha, Solver::SolutionInfo* si)
01478 {
01479         int i;
01480         int l = prob->l;
01481         double nu = param->nu;
01482 
01483         schar *y = new schar[l];
01484 
01485         for(i=0;i<l;i++)
01486                 if(prob->y[i]>0)
01487                         y[i] = +1;
01488                 else
01489                         y[i] = -1;
01490 
01491         double sum_pos = nu*l/2;
01492         double sum_neg = nu*l/2;
01493 
01494         for(i=0;i<l;i++)
01495                 if(y[i] == +1)
01496                 {
01497                         alpha[i] = min(1.0,sum_pos);
01498                         sum_pos -= alpha[i];
01499                 }
01500                 else
01501                 {
01502                         alpha[i] = min(1.0,sum_neg);
01503                         sum_neg -= alpha[i];
01504                 }
01505 
01506         double *zeros = new double[l];
01507 
01508         for(i=0;i<l;i++)
01509                 zeros[i] = 0;
01510 
01511         Solver_NU s;
01512         s.Solve(l, SVC_Q(*prob,*param,y), zeros, y,
01513                 alpha, 1.0, 1.0, param->eps, si,  param->shrinking);
01514         double r = si->r;
01515 
01516         info("C = %f\n",1/r);
01517 
01518         for(i=0;i<l;i++)
01519                 alpha[i] *= y[i]/r;
01520 
01521         si->rho /= r;
01522         si->obj /= (r*r);
01523         si->upper_bound_p = 1/r;
01524         si->upper_bound_n = 1/r;
01525 
01526         delete[] y;
01527         delete[] zeros;
01528 }
01529 
01530 static void solve_one_class(
01531         const svm_problem *prob, const svm_parameter *param,
01532         double *alpha, Solver::SolutionInfo* si)
01533 {
01534         int l = prob->l;
01535         double *zeros = new double[l];
01536         schar *ones = new schar[l];
01537         int i;
01538 
01539         int n = (int)(param->nu*prob->l);       // # of alpha's at upper bound
01540 
01541         for(i=0;i<n;i++)
01542                 alpha[i] = 1;
01543         if(n<prob->l)
01544                 alpha[n] = param->nu * prob->l - n;
01545         for(i=n+1;i<l;i++)
01546                 alpha[i] = 0;
01547 
01548         for(i=0;i<l;i++)
01549         {
01550                 zeros[i] = 0;
01551                 ones[i] = 1;
01552         }
01553 
01554         Solver s;
01555         s.Solve(l, ONE_CLASS_Q(*prob,*param), zeros, ones,
01556                 alpha, 1.0, 1.0, param->eps, si, param->shrinking);
01557 
01558         delete[] zeros;
01559         delete[] ones;
01560 }
01561 
01562 static void solve_epsilon_svr(
01563         const svm_problem *prob, const svm_parameter *param,
01564         double *alpha, Solver::SolutionInfo* si)
01565 {
01566         int l = prob->l;
01567         double *alpha2 = new double[2*l];
01568         double *linear_term = new double[2*l];
01569         schar *y = new schar[2*l];
01570         int i;
01571 
01572         for(i=0;i<l;i++)
01573         {
01574                 alpha2[i] = 0;
01575                 linear_term[i] = param->p - prob->y[i];
01576                 y[i] = 1;
01577 
01578                 alpha2[i+l] = 0;
01579                 linear_term[i+l] = param->p + prob->y[i];
01580                 y[i+l] = -1;
01581         }
01582 
01583         Solver s;
01584         s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
01585                 alpha2, param->C, param->C, param->eps, si, param->shrinking);
01586 
01587         double sum_alpha = 0;
01588         for(i=0;i<l;i++)
01589         {
01590                 alpha[i] = alpha2[i] - alpha2[i+l];
01591                 sum_alpha += fabs(alpha[i]);
01592         }
01593         info("nu = %f\n",sum_alpha/(param->C*l));
01594 
01595         delete[] alpha2;
01596         delete[] linear_term;
01597         delete[] y;
01598 }
01599 
01600 static void solve_nu_svr(
01601         const svm_problem *prob, const svm_parameter *param,
01602         double *alpha, Solver::SolutionInfo* si)
01603 {
01604         int l = prob->l;
01605         double C = param->C;
01606         double *alpha2 = new double[2*l];
01607         double *linear_term = new double[2*l];
01608         schar *y = new schar[2*l];
01609         int i;
01610 
01611         double sum = C * param->nu * l / 2;
01612         for(i=0;i<l;i++)
01613         {
01614                 alpha2[i] = alpha2[i+l] = min(sum,C);
01615                 sum -= alpha2[i];
01616 
01617                 linear_term[i] = - prob->y[i];
01618                 y[i] = 1;
01619 
01620                 linear_term[i+l] = prob->y[i];
01621                 y[i+l] = -1;
01622         }
01623 
01624         Solver_NU s;
01625         s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
01626                 alpha2, C, C, param->eps, si, param->shrinking);
01627 
01628         info("epsilon = %f\n",-si->r);
01629 
01630         for(i=0;i<l;i++)
01631                 alpha[i] = alpha2[i] - alpha2[i+l];
01632 
01633         delete[] alpha2;
01634         delete[] linear_term;
01635         delete[] y;
01636 }
01637 
01638 //
01639 // decision_function
01640 //
01641 struct decision_function
01642 {
01643         double *alpha;
01644         double rho;     
01645 };
01646 
01647 static decision_function svm_train_one(
01648         const svm_problem *prob, const svm_parameter *param,
01649         double Cp, double Cn)
01650 {
01651         double *alpha = Malloc(double,prob->l);
01652         Solver::SolutionInfo si;
01653         switch(param->svm_type)
01654         {
01655                 case C_SVC:
01656                         solve_c_svc(prob,param,alpha,&si,Cp,Cn);
01657                         break;
01658                 case NU_SVC:
01659                         solve_nu_svc(prob,param,alpha,&si);
01660                         break;
01661                 case ONE_CLASS:
01662                         solve_one_class(prob,param,alpha,&si);
01663                         break;
01664                 case EPSILON_SVR:
01665                         solve_epsilon_svr(prob,param,alpha,&si);
01666                         break;
01667                 case NU_SVR:
01668                         solve_nu_svr(prob,param,alpha,&si);
01669                         break;
01670         }
01671 
01672         info("obj = %f, rho = %f\n",si.obj,si.rho);
01673 
01674         // output SVs
01675 
01676         int nSV = 0;
01677         int nBSV = 0;
01678         for(int i=0;i<prob->l;i++)
01679         {
01680                 if(fabs(alpha[i]) > 0)
01681                 {
01682                         ++nSV;
01683                         if(prob->y[i] > 0)
01684                         {
01685                                 if(fabs(alpha[i]) >= si.upper_bound_p)
01686                                         ++nBSV;
01687                         }
01688                         else
01689                         {
01690                                 if(fabs(alpha[i]) >= si.upper_bound_n)
01691                                         ++nBSV;
01692                         }
01693                 }
01694         }
01695 
01696         info("nSV = %d, nBSV = %d\n",nSV,nBSV);
01697 
01698         decision_function f;
01699         f.alpha = alpha;
01700         f.rho = si.rho;
01701         return f;
01702 }
01703 
01704 // Platt's binary SVM Probablistic Output: an improvement from Lin et al.
01705 static void sigmoid_train(
01706         int l, const double *dec_values, const double *labels, 
01707         double& A, double& B)
01708 {
01709         double prior1=0, prior0 = 0;
01710         int i;
01711 
01712         for (i=0;i<l;i++)
01713                 if (labels[i] > 0) prior1+=1;
01714                 else prior0+=1;
01715         
01716         int max_iter=100;       // Maximal number of iterations
01717         double min_step=1e-10;  // Minimal step taken in line search
01718         double sigma=1e-12;     // For numerically strict PD of Hessian
01719         double eps=1e-5;
01720         double hiTarget=(prior1+1.0)/(prior1+2.0);
01721         double loTarget=1/(prior0+2.0);
01722         double *t=Malloc(double,l);
01723         double fApB,p,q,h11,h22,h21,g1,g2,det,dA,dB,gd,stepsize;
01724         double newA,newB,newf,d1,d2;
01725         int iter; 
01726         
01727         // Initial Point and Initial Fun Value
01728         A=0.0; B=log((prior0+1.0)/(prior1+1.0));
01729         double fval = 0.0;
01730 
01731         for (i=0;i<l;i++)
01732         {
01733                 if (labels[i]>0) t[i]=hiTarget;
01734                 else t[i]=loTarget;
01735                 fApB = dec_values[i]*A+B;
01736                 if (fApB>=0)
01737                         fval += t[i]*fApB + log(1+exp(-fApB));
01738                 else
01739                         fval += (t[i] - 1)*fApB +log(1+exp(fApB));
01740         }
01741         for (iter=0;iter<max_iter;iter++)
01742         {
01743                 // Update Gradient and Hessian (use H' = H + sigma I)
01744                 h11=sigma; // numerically ensures strict PD
01745                 h22=sigma;
01746                 h21=0.0;g1=0.0;g2=0.0;
01747                 for (i=0;i<l;i++)
01748                 {
01749                         fApB = dec_values[i]*A+B;
01750                         if (fApB >= 0)
01751                         {
01752                                 p=exp(-fApB)/(1.0+exp(-fApB));
01753                                 q=1.0/(1.0+exp(-fApB));
01754                         }
01755                         else
01756                         {
01757                                 p=1.0/(1.0+exp(fApB));
01758                                 q=exp(fApB)/(1.0+exp(fApB));
01759                         }
01760                         d2=p*q;
01761                         h11+=dec_values[i]*dec_values[i]*d2;
01762                         h22+=d2;
01763                         h21+=dec_values[i]*d2;
01764                         d1=t[i]-p;
01765                         g1+=dec_values[i]*d1;
01766                         g2+=d1;
01767                 }
01768 
01769                 // Stopping Criteria
01770                 if (fabs(g1)<eps && fabs(g2)<eps)
01771                         break;
01772 
01773                 // Finding Newton direction: -inv(H') * g
01774                 det=h11*h22-h21*h21;
01775                 dA=-(h22*g1 - h21 * g2) / det;
01776                 dB=-(-h21*g1+ h11 * g2) / det;
01777                 gd=g1*dA+g2*dB;
01778 
01779 
01780                 stepsize = 1;           // Line Search
01781                 while (stepsize >= min_step)
01782                 {
01783                         newA = A + stepsize * dA;
01784                         newB = B + stepsize * dB;
01785 
01786                         // New function value
01787                         newf = 0.0;
01788                         for (i=0;i<l;i++)
01789                         {
01790                                 fApB = dec_values[i]*newA+newB;
01791                                 if (fApB >= 0)
01792                                         newf += t[i]*fApB + log(1+exp(-fApB));
01793                                 else
01794                                         newf += (t[i] - 1)*fApB +log(1+exp(fApB));
01795                         }
01796                         // Check sufficient decrease
01797                         if (newf<fval+0.0001*stepsize*gd)
01798                         {
01799                                 A=newA;B=newB;fval=newf;
01800                                 break;
01801                         }
01802                         else
01803                                 stepsize = stepsize / 2.0;
01804                 }
01805 
01806                 if (stepsize < min_step)
01807                 {
01808                         info("Line search fails in two-class probability estimates\n");
01809                         break;
01810                 }
01811         }
01812 
01813         if (iter>=max_iter)
01814                 info("Reaching maximal iterations in two-class probability estimates\n");
01815         free(t);
01816 }
01817 
01818 static double sigmoid_predict(double decision_value, double A, double B)
01819 {
01820         double fApB = decision_value*A+B;
01821         // 1-p used later; avoid catastrophic cancellation
01822         if (fApB >= 0)
01823                 return exp(-fApB)/(1.0+exp(-fApB));
01824         else
01825                 return 1.0/(1+exp(fApB)) ;
01826 }
01827 
01828 // Method 2 from the multiclass_prob paper by Wu, Lin, and Weng
01829 static void multiclass_probability(int k, double **r, double *p)
01830 {
01831         int t,j;
01832         int iter = 0, max_iter=max(100,k);
01833         double **Q=Malloc(double *,k);
01834         double *Qp=Malloc(double,k);
01835         double pQp, eps=0.005/k;
01836         
01837         for (t=0;t<k;t++)
01838         {
01839                 p[t]=1.0/k;  // Valid if k = 1
01840                 Q[t]=Malloc(double,k);
01841                 Q[t][t]=0;
01842                 for (j=0;j<t;j++)
01843                 {
01844                         Q[t][t]+=r[j][t]*r[j][t];
01845                         Q[t][j]=Q[j][t];
01846                 }
01847                 for (j=t+1;j<k;j++)
01848                 {
01849                         Q[t][t]+=r[j][t]*r[j][t];
01850                         Q[t][j]=-r[j][t]*r[t][j];
01851                 }
01852         }
01853         for (iter=0;iter<max_iter;iter++)
01854         {
01855                 // stopping condition, recalculate QP,pQP for numerical accuracy
01856                 pQp=0;
01857                 for (t=0;t<k;t++)
01858                 {
01859                         Qp[t]=0;
01860                         for (j=0;j<k;j++)
01861                                 Qp[t]+=Q[t][j]*p[j];
01862                         pQp+=p[t]*Qp[t];
01863                 }
01864                 double max_error=0;
01865                 for (t=0;t<k;t++)
01866                 {
01867                         double error=fabs(Qp[t]-pQp);
01868                         if (error>max_error)
01869                                 max_error=error;
01870                 }
01871                 if (max_error<eps) break;
01872                 
01873                 for (t=0;t<k;t++)
01874                 {
01875                         double diff=(-Qp[t]+pQp)/Q[t][t];
01876                         p[t]+=diff;
01877                         pQp=(pQp+diff*(diff*Q[t][t]+2*Qp[t]))/(1+diff)/(1+diff);
01878                         for (j=0;j<k;j++)
01879                         {
01880                                 Qp[j]=(Qp[j]+diff*Q[t][j])/(1+diff);
01881                                 p[j]/=(1+diff);
01882                         }
01883                 }
01884         }
01885         if (iter>=max_iter)
01886                 info("Exceeds max_iter in multiclass_prob\n");
01887         for(t=0;t<k;t++) free(Q[t]);
01888         free(Q);
01889         free(Qp);
01890 }
01891 
01892 // Cross-validation decision values for probability estimates
01893 static void svm_binary_svc_probability(
01894         const svm_problem *prob, const svm_parameter *param,
01895         double Cp, double Cn, double& probA, double& probB)
01896 {
01897         int i;
01898         int nr_fold = 5;
01899         int *perm = Malloc(int,prob->l);
01900         double *dec_values = Malloc(double,prob->l);
01901 
01902         // random shuffle
01903         for(i=0;i<prob->l;i++) perm[i]=i;
01904         for(i=0;i<prob->l;i++)
01905         {
01906                 int j = i+rand()%(prob->l-i);
01907                 swap(perm[i],perm[j]);
01908         }
01909         for(i=0;i<nr_fold;i++)
01910         {
01911                 int begin = i*prob->l/nr_fold;
01912                 int end = (i+1)*prob->l/nr_fold;
01913                 int j,k;
01914                 struct svm_problem subprob;
01915 
01916                 subprob.l = prob->l-(end-begin);
01917                 subprob.x = Malloc(struct svm_node*,subprob.l);
01918                 subprob.y = Malloc(double,subprob.l);
01919                         
01920                 k=0;
01921                 for(j=0;j<begin;j++)
01922                 {
01923                         subprob.x[k] = prob->x[perm[j]];
01924                         subprob.y[k] = prob->y[perm[j]];
01925                         ++k;
01926                 }
01927                 for(j=end;j<prob->l;j++)
01928                 {
01929                         subprob.x[k] = prob->x[perm[j]];
01930                         subprob.y[k] = prob->y[perm[j]];
01931                         ++k;
01932                 }
01933                 int p_count=0,n_count=0;
01934                 for(j=0;j<k;j++)
01935                         if(subprob.y[j]>0)
01936                                 p_count++;
01937                         else
01938                                 n_count++;
01939 
01940                 if(p_count==0 && n_count==0)
01941                         for(j=begin;j<end;j++)
01942                                 dec_values[perm[j]] = 0;
01943                 else if(p_count > 0 && n_count == 0)
01944                         for(j=begin;j<end;j++)
01945                                 dec_values[perm[j]] = 1;
01946                 else if(p_count == 0 && n_count > 0)
01947                         for(j=begin;j<end;j++)
01948                                 dec_values[perm[j]] = -1;
01949                 else
01950                 {
01951                         svm_parameter subparam = *param;
01952                         subparam.probability=0;
01953                         subparam.C=1.0;
01954                         subparam.nr_weight=2;
01955                         subparam.weight_label = Malloc(int,2);
01956                         subparam.weight = Malloc(double,2);
01957                         subparam.weight_label[0]=+1;
01958                         subparam.weight_label[1]=-1;
01959                         subparam.weight[0]=Cp;
01960                         subparam.weight[1]=Cn;
01961                         struct svm_model *submodel = svm_train(&subprob,&subparam);
01962                         for(j=begin;j<end;j++)
01963                         {
01964                                 svm_predict_values(submodel,prob->x[perm[j]],&(dec_values[perm[j]])); 
01965                                 // ensure +1 -1 order; reason not using CV subroutine
01966                                 dec_values[perm[j]] *= submodel->label[0];
01967                         }               
01968                         svm_free_and_destroy_model(&submodel);
01969                         svm_destroy_param(&subparam);
01970                 }
01971                 free(subprob.x);
01972                 free(subprob.y);
01973         }               
01974         sigmoid_train(prob->l,dec_values,prob->y,probA,probB);
01975         free(dec_values);
01976         free(perm);
01977 }
01978 
01979 // Return parameter of a Laplace distribution 
01980 static double svm_svr_probability(
01981         const svm_problem *prob, const svm_parameter *param)
01982 {
01983         int i;
01984         int nr_fold = 5;
01985         double *ymv = Malloc(double,prob->l);
01986         double mae = 0;
01987 
01988         svm_parameter newparam = *param;
01989         newparam.probability = 0;
01990         svm_cross_validation(prob,&newparam,nr_fold,ymv);
01991         for(i=0;i<prob->l;i++)
01992         {
01993                 ymv[i]=prob->y[i]-ymv[i];
01994                 mae += fabs(ymv[i]);
01995         }               
01996         mae /= prob->l;
01997         double std=sqrt(2*mae*mae);
01998         int count=0;
01999         mae=0;
02000         for(i=0;i<prob->l;i++)
02001                 if (fabs(ymv[i]) > 5*std) 
02002                         count=count+1;
02003                 else 
02004                         mae+=fabs(ymv[i]);
02005         mae /= (prob->l-count);
02006         info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma= %g\n",mae);
02007         free(ymv);
02008         return mae;
02009 }
02010 
02011 
02012 // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
02013 // perm, length l, must be allocated before calling this subroutine
02014 static void svm_group_classes(const svm_problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
02015 {
02016         int l = prob->l;
02017         int max_nr_class = 16;
02018         int nr_class = 0;
02019         int *label = Malloc(int,max_nr_class);
02020         int *count = Malloc(int,max_nr_class);
02021         int *data_label = Malloc(int,l);        
02022         int i;
02023 
02024         for(i=0;i<l;i++)
02025         {
02026                 int this_label = (int)prob->y[i];
02027                 int j;
02028                 for(j=0;j<nr_class;j++)
02029                 {
02030                         if(this_label == label[j])
02031                         {
02032                                 ++count[j];
02033                                 break;
02034                         }
02035                 }
02036                 data_label[i] = j;
02037                 if(j == nr_class)
02038                 {
02039                         if(nr_class == max_nr_class)
02040                         {
02041                                 max_nr_class *= 2;
02042                                 label = (int *)realloc(label,max_nr_class*sizeof(int));
02043                                 count = (int *)realloc(count,max_nr_class*sizeof(int));
02044                         }
02045                         label[nr_class] = this_label;
02046                         count[nr_class] = 1;
02047                         ++nr_class;
02048                 }
02049         }
02050 
02051         int *start = Malloc(int,nr_class);
02052         start[0] = 0;
02053         for(i=1;i<nr_class;i++)
02054                 start[i] = start[i-1]+count[i-1];
02055         for(i=0;i<l;i++)
02056         {
02057                 perm[start[data_label[i]]] = i;
02058                 ++start[data_label[i]];
02059         }
02060         start[0] = 0;
02061         for(i=1;i<nr_class;i++)
02062                 start[i] = start[i-1]+count[i-1];
02063 
02064         *nr_class_ret = nr_class;
02065         *label_ret = label;
02066         *start_ret = start;
02067         *count_ret = count;
02068         free(data_label);
02069 }
02070 
02071 //
02072 // Interface functions
02073 //
02074 svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
02075 {
02076         svm_model *model = Malloc(svm_model,1);
02077         model->param = *param;
02078         model->free_sv = 0;     // XXX
02079 
02080         if(param->svm_type == ONE_CLASS ||
02081            param->svm_type == EPSILON_SVR ||
02082            param->svm_type == NU_SVR)
02083         {
02084                 // regression or one-class-svm
02085                 model->nr_class = 2;
02086                 model->label = NULL;
02087                 model->nSV = NULL;
02088                 model->probA = NULL; model->probB = NULL;
02089                 model->sv_coef = Malloc(double *,1);
02090 
02091                 if(param->probability && 
02092                    (param->svm_type == EPSILON_SVR ||
02093                     param->svm_type == NU_SVR))
02094                 {
02095                         model->probA = Malloc(double,1);
02096                         model->probA[0] = svm_svr_probability(prob,param);
02097                 }
02098 
02099                 decision_function f = svm_train_one(prob,param,0,0);
02100                 model->rho = Malloc(double,1);
02101                 model->rho[0] = f.rho;
02102 
02103                 int nSV = 0;
02104                 int i;
02105                 for(i=0;i<prob->l;i++)
02106                         if(fabs(f.alpha[i]) > 0) ++nSV;
02107                 model->l = nSV;
02108                 model->SV = Malloc(svm_node *,nSV);
02109                 model->sv_coef[0] = Malloc(double,nSV);
02110                 int j = 0;
02111                 for(i=0;i<prob->l;i++)
02112                         if(fabs(f.alpha[i]) > 0)
02113                         {
02114                                 model->SV[j] = prob->x[i];
02115                                 model->sv_coef[0][j] = f.alpha[i];
02116                                 ++j;
02117                         }               
02118 
02119                 free(f.alpha);
02120         }
02121         else
02122         {
02123                 // classification
02124                 int l = prob->l;
02125                 int nr_class;
02126                 int *label = NULL;
02127                 int *start = NULL;
02128                 int *count = NULL;
02129                 int *perm = Malloc(int,l);
02130 
02131                 // group training data of the same class
02132                 svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
02133                 if(nr_class == 1) 
02134                         info("WARNING: training data in only one class. See README for details.\n");
02135                 
02136                 svm_node **x = Malloc(svm_node *,l);
02137                 int i;
02138                 for(i=0;i<l;i++)
02139                         x[i] = prob->x[perm[i]];
02140 
02141                 // calculate weighted C
02142 
02143                 double *weighted_C = Malloc(double, nr_class);
02144                 for(i=0;i<nr_class;i++)
02145                         weighted_C[i] = param->C;
02146                 for(i=0;i<param->nr_weight;i++)
02147                 {       
02148                         int j;
02149                         for(j=0;j<nr_class;j++)
02150                                 if(param->weight_label[i] == label[j])
02151                                         break;
02152                         if(j == nr_class)
02153                                 fprintf(stderr,"WARNING: class label %d specified in weight is not found\n", param->weight_label[i]);
02154                         else
02155                                 weighted_C[j] *= param->weight[i];
02156                 }
02157 
02158                 // train k*(k-1)/2 models
02159                 
02160                 bool *nonzero = Malloc(bool,l);
02161                 for(i=0;i<l;i++)
02162                         nonzero[i] = false;
02163                 decision_function *f = Malloc(decision_function,nr_class*(nr_class-1)/2);
02164 
02165                 double *probA=NULL,*probB=NULL;
02166                 if (param->probability)
02167                 {
02168                         probA=Malloc(double,nr_class*(nr_class-1)/2);
02169                         probB=Malloc(double,nr_class*(nr_class-1)/2);
02170                 }
02171 
02172                 int p = 0;
02173                 for(i=0;i<nr_class;i++)
02174                         for(int j=i+1;j<nr_class;j++)
02175                         {
02176                                 svm_problem sub_prob;
02177                                 int si = start[i], sj = start[j];
02178                                 int ci = count[i], cj = count[j];
02179                                 sub_prob.l = ci+cj;
02180                                 sub_prob.x = Malloc(svm_node *,sub_prob.l);
02181                                 sub_prob.y = Malloc(double,sub_prob.l);
02182                                 int k;
02183                                 for(k=0;k<ci;k++)
02184                                 {
02185                                         sub_prob.x[k] = x[si+k];
02186                                         sub_prob.y[k] = +1;
02187                                 }
02188                                 for(k=0;k<cj;k++)
02189                                 {
02190                                         sub_prob.x[ci+k] = x[sj+k];
02191                                         sub_prob.y[ci+k] = -1;
02192                                 }
02193 
02194                                 if(param->probability)
02195                                         svm_binary_svc_probability(&sub_prob,param,weighted_C[i],weighted_C[j],probA[p],probB[p]);
02196 
02197                                 f[p] = svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j]);
02198                                 for(k=0;k<ci;k++)
02199                                         if(!nonzero[si+k] && fabs(f[p].alpha[k]) > 0)
02200                                                 nonzero[si+k] = true;
02201                                 for(k=0;k<cj;k++)
02202                                         if(!nonzero[sj+k] && fabs(f[p].alpha[ci+k]) > 0)
02203                                                 nonzero[sj+k] = true;
02204                                 free(sub_prob.x);
02205                                 free(sub_prob.y);
02206                                 ++p;
02207                         }
02208 
02209                 // build output
02210 
02211                 model->nr_class = nr_class;
02212                 
02213                 model->label = Malloc(int,nr_class);
02214                 for(i=0;i<nr_class;i++)
02215                         model->label[i] = label[i];
02216                 
02217                 model->rho = Malloc(double,nr_class*(nr_class-1)/2);
02218                 for(i=0;i<nr_class*(nr_class-1)/2;i++)
02219                         model->rho[i] = f[i].rho;
02220 
02221                 if(param->probability)
02222                 {
02223                         model->probA = Malloc(double,nr_class*(nr_class-1)/2);
02224                         model->probB = Malloc(double,nr_class*(nr_class-1)/2);
02225                         for(i=0;i<nr_class*(nr_class-1)/2;i++)
02226                         {
02227                                 model->probA[i] = probA[i];
02228                                 model->probB[i] = probB[i];
02229                         }
02230                 }
02231                 else
02232                 {
02233                         model->probA=NULL;
02234                         model->probB=NULL;
02235                 }
02236 
02237                 int total_sv = 0;
02238                 int *nz_count = Malloc(int,nr_class);
02239                 model->nSV = Malloc(int,nr_class);
02240                 for(i=0;i<nr_class;i++)
02241                 {
02242                         int nSV = 0;
02243                         for(int j=0;j<count[i];j++)
02244                                 if(nonzero[start[i]+j])
02245                                 {       
02246                                         ++nSV;
02247                                         ++total_sv;
02248                                 }
02249                         model->nSV[i] = nSV;
02250                         nz_count[i] = nSV;
02251                 }
02252                 
02253                 info("Total nSV = %d\n",total_sv);
02254 
02255                 model->l = total_sv;
02256                 model->SV = Malloc(svm_node *,total_sv);
02257                 p = 0;
02258                 for(i=0;i<l;i++)
02259                         if(nonzero[i]) model->SV[p++] = x[i];
02260 
02261                 int *nz_start = Malloc(int,nr_class);
02262                 nz_start[0] = 0;
02263                 for(i=1;i<nr_class;i++)
02264                         nz_start[i] = nz_start[i-1]+nz_count[i-1];
02265 
02266                 model->sv_coef = Malloc(double *,nr_class-1);
02267                 for(i=0;i<nr_class-1;i++)
02268                         model->sv_coef[i] = Malloc(double,total_sv);
02269 
02270                 p = 0;
02271                 for(i=0;i<nr_class;i++)
02272                         for(int j=i+1;j<nr_class;j++)
02273                         {
02274                                 // classifier (i,j): coefficients with
02275                                 // i are in sv_coef[j-1][nz_start[i]...],
02276                                 // j are in sv_coef[i][nz_start[j]...]
02277 
02278                                 int si = start[i];
02279                                 int sj = start[j];
02280                                 int ci = count[i];
02281                                 int cj = count[j];
02282                                 
02283                                 int q = nz_start[i];
02284                                 int k;
02285                                 for(k=0;k<ci;k++)
02286                                         if(nonzero[si+k])
02287                                                 model->sv_coef[j-1][q++] = f[p].alpha[k];
02288                                 q = nz_start[j];
02289                                 for(k=0;k<cj;k++)
02290                                         if(nonzero[sj+k])
02291                                                 model->sv_coef[i][q++] = f[p].alpha[ci+k];
02292                                 ++p;
02293                         }
02294                 
02295                 free(label);
02296                 free(probA);
02297                 free(probB);
02298                 free(count);
02299                 free(perm);
02300                 free(start);
02301                 free(x);
02302                 free(weighted_C);
02303                 free(nonzero);
02304                 for(i=0;i<nr_class*(nr_class-1)/2;i++)
02305                         free(f[i].alpha);
02306                 free(f);
02307                 free(nz_count);
02308                 free(nz_start);
02309         }
02310         return model;
02311 }
02312 
02313 // Stratified cross validation
02314 void svm_cross_validation(const svm_problem *prob, const svm_parameter *param, int nr_fold, double *target)
02315 {
02316         int i;
02317         int *fold_start = Malloc(int,nr_fold+1);
02318         int l = prob->l;
02319         int *perm = Malloc(int,l);
02320         int nr_class;
02321 
02322         // stratified cv may not give leave-one-out rate
02323         // Each class to l folds -> some folds may have zero elements
02324         if((param->svm_type == C_SVC ||
02325             param->svm_type == NU_SVC) && nr_fold < l)
02326         {
02327                 int *start = NULL;
02328                 int *label = NULL;
02329                 int *count = NULL;
02330                 svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
02331 
02332                 // random shuffle and then data grouped by fold using the array perm
02333                 int *fold_count = Malloc(int,nr_fold);
02334                 int c;
02335                 int *index = Malloc(int,l);
02336                 for(i=0;i<l;i++)
02337                         index[i]=perm[i];
02338                 for (c=0; c<nr_class; c++) 
02339                         for(i=0;i<count[c];i++)
02340                         {
02341                                 int j = i+rand()%(count[c]-i);
02342                                 swap(index[start[c]+j],index[start[c]+i]);
02343                         }
02344                 for(i=0;i<nr_fold;i++)
02345                 {
02346                         fold_count[i] = 0;
02347                         for (c=0; c<nr_class;c++)
02348                                 fold_count[i]+=(i+1)*count[c]/nr_fold-i*count[c]/nr_fold;
02349                 }
02350                 fold_start[0]=0;
02351                 for (i=1;i<=nr_fold;i++)
02352                         fold_start[i] = fold_start[i-1]+fold_count[i-1];
02353                 for (c=0; c<nr_class;c++)
02354                         for(i=0;i<nr_fold;i++)
02355                         {
02356                                 int begin = start[c]+i*count[c]/nr_fold;
02357                                 int end = start[c]+(i+1)*count[c]/nr_fold;
02358                                 for(int j=begin;j<end;j++)
02359                                 {
02360                                         perm[fold_start[i]] = index[j];
02361                                         fold_start[i]++;
02362                                 }
02363                         }
02364                 fold_start[0]=0;
02365                 for (i=1;i<=nr_fold;i++)
02366                         fold_start[i] = fold_start[i-1]+fold_count[i-1];
02367                 free(start);    
02368                 free(label);
02369                 free(count);    
02370                 free(index);
02371                 free(fold_count);
02372         }
02373         else
02374         {
02375                 for(i=0;i<l;i++) perm[i]=i;
02376                 for(i=0;i<l;i++)
02377                 {
02378                         int j = i+rand()%(l-i);
02379                         swap(perm[i],perm[j]);
02380                 }
02381                 for(i=0;i<=nr_fold;i++)
02382                         fold_start[i]=i*l/nr_fold;
02383         }
02384 
02385         for(i=0;i<nr_fold;i++)
02386         {
02387                 int begin = fold_start[i];
02388                 int end = fold_start[i+1];
02389                 int j,k;
02390                 struct svm_problem subprob;
02391 
02392                 subprob.l = l-(end-begin);
02393                 subprob.x = Malloc(struct svm_node*,subprob.l);
02394                 subprob.y = Malloc(double,subprob.l);
02395                         
02396                 k=0;
02397                 for(j=0;j<begin;j++)
02398                 {
02399                         subprob.x[k] = prob->x[perm[j]];
02400                         subprob.y[k] = prob->y[perm[j]];
02401                         ++k;
02402                 }
02403                 for(j=end;j<l;j++)
02404                 {
02405                         subprob.x[k] = prob->x[perm[j]];
02406                         subprob.y[k] = prob->y[perm[j]];
02407                         ++k;
02408                 }
02409                 struct svm_model *submodel = svm_train(&subprob,param);
02410                 if(param->probability && 
02411                    (param->svm_type == C_SVC || param->svm_type == NU_SVC))
02412                 {
02413                         double *prob_estimates=Malloc(double,svm_get_nr_class(submodel));
02414                         for(j=begin;j<end;j++)
02415                                 target[perm[j]] = svm_predict_probability(submodel,prob->x[perm[j]],prob_estimates);
02416                         free(prob_estimates);                   
02417                 }
02418                 else
02419                         for(j=begin;j<end;j++)
02420                                 target[perm[j]] = svm_predict(submodel,prob->x[perm[j]]);
02421                 svm_free_and_destroy_model(&submodel);
02422                 free(subprob.x);
02423                 free(subprob.y);
02424         }               
02425         free(fold_start);
02426         free(perm);     
02427 }
02428 
02429 
02430 int svm_get_svm_type(const svm_model *model)
02431 {
02432         return model->param.svm_type;
02433 }
02434 
02435 int svm_get_nr_class(const svm_model *model)
02436 {
02437         return model->nr_class;
02438 }
02439 
02440 void svm_get_labels(const svm_model *model, int* label)
02441 {
02442         if (model->label != NULL)
02443                 for(int i=0;i<model->nr_class;i++)
02444                         label[i] = model->label[i];
02445 }
02446 
02447 double svm_get_svr_probability(const svm_model *model)
02448 {
02449         if ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
02450             model->probA!=NULL)
02451                 return model->probA[0];
02452         else
02453         {
02454                 fprintf(stderr,"Model doesn't contain information for SVR probability inference\n");
02455                 return 0;
02456         }
02457 }
02458 
02459 double svm_predict_values(const svm_model *model, const svm_node *x, double* dec_values)
02460 {
02461         int i;
02462         if(model->param.svm_type == ONE_CLASS ||
02463            model->param.svm_type == EPSILON_SVR ||
02464            model->param.svm_type == NU_SVR)
02465         {
02466                 double *sv_coef = model->sv_coef[0];
02467                 double sum = 0;
02468                 for(i=0;i<model->l;i++)
02469                         sum += sv_coef[i] * Kernel::k_function(x,model->SV[i],model->param);
02470                 sum -= model->rho[0];
02471                 *dec_values = sum;
02472 
02473                 if(model->param.svm_type == ONE_CLASS)
02474                         return (sum>0)?1:-1;
02475                 else
02476                         return sum;
02477         }
02478         else
02479         {
02480                 int nr_class = model->nr_class;
02481                 int l = model->l;
02482                 
02483                 double *kvalue = Malloc(double,l);
02484                 for(i=0;i<l;i++)
02485                         kvalue[i] = Kernel::k_function(x,model->SV[i],model->param);
02486 
02487                 int *start = Malloc(int,nr_class);
02488                 start[0] = 0;
02489                 for(i=1;i<nr_class;i++)
02490                         start[i] = start[i-1]+model->nSV[i-1];
02491 
02492                 int *vote = Malloc(int,nr_class);
02493                 for(i=0;i<nr_class;i++)
02494                         vote[i] = 0;
02495 
02496                 int p=0;
02497                 for(i=0;i<nr_class;i++)
02498                         for(int j=i+1;j<nr_class;j++)
02499                         {
02500                                 double sum = 0;
02501                                 int si = start[i];
02502                                 int sj = start[j];
02503                                 int ci = model->nSV[i];
02504                                 int cj = model->nSV[j];
02505                                 
02506                                 int k;
02507                                 double *coef1 = model->sv_coef[j-1];
02508                                 double *coef2 = model->sv_coef[i];
02509                                 for(k=0;k<ci;k++)
02510                                         sum += coef1[si+k] * kvalue[si+k];
02511                                 for(k=0;k<cj;k++)
02512                                         sum += coef2[sj+k] * kvalue[sj+k];
02513                                 sum -= model->rho[p];
02514                                 dec_values[p] = sum;
02515 
02516                                 if(dec_values[p] > 0)
02517                                         ++vote[i];
02518                                 else
02519                                         ++vote[j];
02520                                 p++;
02521                         }
02522 
02523                 int vote_max_idx = 0;
02524                 for(i=1;i<nr_class;i++)
02525                         if(vote[i] > vote[vote_max_idx])
02526                                 vote_max_idx = i;
02527 
02528                 free(kvalue);
02529                 free(start);
02530                 free(vote);
02531                 return model->label[vote_max_idx];
02532         }
02533 }
02534 
02535 double svm_predict(const svm_model *model, const svm_node *x)
02536 {
02537         int nr_class = model->nr_class;
02538         double *dec_values;
02539         if(model->param.svm_type == ONE_CLASS ||
02540            model->param.svm_type == EPSILON_SVR ||
02541            model->param.svm_type == NU_SVR)
02542                 dec_values = Malloc(double, 1);
02543         else 
02544                 dec_values = Malloc(double, nr_class*(nr_class-1)/2);
02545         double pred_result = svm_predict_values(model, x, dec_values);
02546         free(dec_values);
02547         return pred_result;
02548 }
02549 
02550 double svm_predict_probability(
02551         const svm_model *model, const svm_node *x, double *prob_estimates)
02552 {
02553         if ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
02554             model->probA!=NULL && model->probB!=NULL)
02555         {
02556                 int i;
02557                 int nr_class = model->nr_class;
02558                 double *dec_values = Malloc(double, nr_class*(nr_class-1)/2);
02559                 svm_predict_values(model, x, dec_values);
02560 
02561                 double min_prob=1e-7;
02562                 double **pairwise_prob=Malloc(double *,nr_class);
02563                 for(i=0;i<nr_class;i++)
02564                         pairwise_prob[i]=Malloc(double,nr_class);
02565                 int k=0;
02566                 for(i=0;i<nr_class;i++)
02567                         for(int j=i+1;j<nr_class;j++)
02568                         {
02569                                 pairwise_prob[i][j]=min(max(sigmoid_predict(dec_values[k],model->probA[k],model->probB[k]),min_prob),1-min_prob);
02570                                 pairwise_prob[j][i]=1-pairwise_prob[i][j];
02571                                 k++;
02572                         }
02573                 multiclass_probability(nr_class,pairwise_prob,prob_estimates);
02574 
02575                 int prob_max_idx = 0;
02576                 for(i=1;i<nr_class;i++)
02577                         if(prob_estimates[i] > prob_estimates[prob_max_idx])
02578                                 prob_max_idx = i;
02579                 for(i=0;i<nr_class;i++)
02580                         free(pairwise_prob[i]);
02581                 free(dec_values);
02582                 free(pairwise_prob);         
02583                 return model->label[prob_max_idx];
02584         }
02585         else 
02586                 return svm_predict(model, x);
02587 }
02588 
02589 static const char *svm_type_table[] =
02590 {
02591         "c_svc","nu_svc","one_class","epsilon_svr","nu_svr",NULL
02592 };
02593 
02594 static const char *kernel_type_table[]=
02595 {
02596         "linear","polynomial","rbf","sigmoid","precomputed",NULL
02597 };
02598 
02599 int svm_save_model(const char *model_file_name, const svm_model *model)
02600 {
02601         FILE *fp = fopen(model_file_name,"w");
02602         if(fp==NULL) return -1;
02603 
02604         char *old_locale = strdup(setlocale(LC_ALL, NULL));
02605         setlocale(LC_ALL, "C");
02606 
02607         const svm_parameter& param = model->param;
02608 
02609         fprintf(fp,"svm_type %s\n", svm_type_table[param.svm_type]);
02610         fprintf(fp,"kernel_type %s\n", kernel_type_table[param.kernel_type]);
02611 
02612         if(param.kernel_type == POLY)
02613                 fprintf(fp,"degree %d\n", param.degree);
02614 
02615         if(param.kernel_type == POLY || param.kernel_type == RBF || param.kernel_type == SIGMOID)
02616                 fprintf(fp,"gamma %g\n", param.gamma);
02617 
02618         if(param.kernel_type == POLY || param.kernel_type == SIGMOID)
02619                 fprintf(fp,"coef0 %g\n", param.coef0);
02620 
02621         int nr_class = model->nr_class;
02622         int l = model->l;
02623         fprintf(fp, "nr_class %d\n", nr_class);
02624         fprintf(fp, "total_sv %d\n",l);
02625         
02626         {
02627                 fprintf(fp, "rho");
02628                 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
02629                         fprintf(fp," %g",model->rho[i]);
02630                 fprintf(fp, "\n");
02631         }
02632         
02633         if(model->label)
02634         {
02635                 fprintf(fp, "label");
02636                 for(int i=0;i<nr_class;i++)
02637                         fprintf(fp," %d",model->label[i]);
02638                 fprintf(fp, "\n");
02639         }
02640 
02641         if(model->probA) // regression has probA only
02642         {
02643                 fprintf(fp, "probA");
02644                 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
02645                         fprintf(fp," %g",model->probA[i]);
02646                 fprintf(fp, "\n");
02647         }
02648         if(model->probB)
02649         {
02650                 fprintf(fp, "probB");
02651                 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
02652                         fprintf(fp," %g",model->probB[i]);
02653                 fprintf(fp, "\n");
02654         }
02655 
02656         if(model->nSV)
02657         {
02658                 fprintf(fp, "nr_sv");
02659                 for(int i=0;i<nr_class;i++)
02660                         fprintf(fp," %d",model->nSV[i]);
02661                 fprintf(fp, "\n");
02662         }
02663 
02664         fprintf(fp, "SV\n");
02665         const double * const *sv_coef = model->sv_coef;
02666         const svm_node * const *SV = model->SV;
02667 
02668         for(int i=0;i<l;i++)
02669         {
02670                 for(int j=0;j<nr_class-1;j++)
02671                         fprintf(fp, "%.16g ",sv_coef[j][i]);
02672 
02673                 const svm_node *p = SV[i];
02674 
02675                 if(param.kernel_type == PRECOMPUTED)
02676                         fprintf(fp,"0:%d ",(int)(p->value));
02677                 else
02678                         while(p->index != -1)
02679                         {
02680                                 fprintf(fp,"%d:%.8g ",p->index,p->value);
02681                                 p++;
02682                         }
02683                 fprintf(fp, "\n");
02684         }
02685 
02686         setlocale(LC_ALL, old_locale);
02687         free(old_locale);
02688 
02689         if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
02690         else return 0;
02691 }
02692 
02693 static char *line = NULL;
02694 static int max_line_len;
02695 
02696 static char* readline(FILE *input)
02697 {
02698         int len;
02699 
02700         if(fgets(line,max_line_len,input) == NULL)
02701                 return NULL;
02702 
02703         while(strrchr(line,'\n') == NULL)
02704         {
02705                 max_line_len *= 2;
02706                 line = (char *) realloc(line,max_line_len);
02707                 len = (int) strlen(line);
02708                 if(fgets(line+len,max_line_len-len,input) == NULL)
02709                         break;
02710         }
02711         return line;
02712 }
02713 
02714 svm_model *svm_load_model(const char *model_file_name)
02715 {
02716         FILE *fp = fopen(model_file_name,"rb");
02717         if(fp==NULL) return NULL;
02718 
02719         char *old_locale = strdup(setlocale(LC_ALL, NULL));
02720         setlocale(LC_ALL, "C");
02721 
02722         // read parameters
02723 
02724         svm_model *model = Malloc(svm_model,1);
02725         svm_parameter& param = model->param;
02726         model->rho = NULL;
02727         model->probA = NULL;
02728         model->probB = NULL;
02729         model->label = NULL;
02730         model->nSV = NULL;
02731 
02732         char cmd[81];
02733         while(1)
02734         {
02735                 fscanf(fp,"%80s",cmd);
02736 
02737                 if(strcmp(cmd,"svm_type")==0)
02738                 {
02739                         fscanf(fp,"%80s",cmd);
02740                         int i;
02741                         for(i=0;svm_type_table[i];i++)
02742                         {
02743                                 if(strcmp(svm_type_table[i],cmd)==0)
02744                                 {
02745                                         param.svm_type=i;
02746                                         break;
02747                                 }
02748                         }
02749                         if(svm_type_table[i] == NULL)
02750                         {
02751                                 fprintf(stderr,"unknown svm type.\n");
02752                                 
02753                                 setlocale(LC_ALL, old_locale);
02754                                 free(old_locale);
02755                                 free(model->rho);
02756                                 free(model->label);
02757                                 free(model->nSV);
02758                                 free(model);
02759                                 return NULL;
02760                         }
02761                 }
02762                 else if(strcmp(cmd,"kernel_type")==0)
02763                 {               
02764                         fscanf(fp,"%80s",cmd);
02765                         int i;
02766                         for(i=0;kernel_type_table[i];i++)
02767                         {
02768                                 if(strcmp(kernel_type_table[i],cmd)==0)
02769                                 {
02770                                         param.kernel_type=i;
02771                                         break;
02772                                 }
02773                         }
02774                         if(kernel_type_table[i] == NULL)
02775                         {
02776                                 fprintf(stderr,"unknown kernel function.\n");
02777                                 
02778                                 setlocale(LC_ALL, old_locale);
02779                                 free(old_locale);
02780                                 free(model->rho);
02781                                 free(model->label);
02782                                 free(model->nSV);
02783                                 free(model);
02784                                 return NULL;
02785                         }
02786                 }
02787                 else if(strcmp(cmd,"degree")==0)
02788                         fscanf(fp,"%d",&param.degree);
02789                 else if(strcmp(cmd,"gamma")==0)
02790                         fscanf(fp,"%lf",&param.gamma);
02791                 else if(strcmp(cmd,"coef0")==0)
02792                         fscanf(fp,"%lf",&param.coef0);
02793                 else if(strcmp(cmd,"nr_class")==0)
02794                         fscanf(fp,"%d",&model->nr_class);
02795                 else if(strcmp(cmd,"total_sv")==0)
02796                         fscanf(fp,"%d",&model->l);
02797                 else if(strcmp(cmd,"rho")==0)
02798                 {
02799                         int n = model->nr_class * (model->nr_class-1)/2;
02800                         model->rho = Malloc(double,n);
02801                         for(int i=0;i<n;i++)
02802                                 fscanf(fp,"%lf",&model->rho[i]);
02803                 }
02804                 else if(strcmp(cmd,"label")==0)
02805                 {
02806                         int n = model->nr_class;
02807                         model->label = Malloc(int,n);
02808                         for(int i=0;i<n;i++)
02809                                 fscanf(fp,"%d",&model->label[i]);
02810                 }
02811                 else if(strcmp(cmd,"probA")==0)
02812                 {
02813                         int n = model->nr_class * (model->nr_class-1)/2;
02814                         model->probA = Malloc(double,n);
02815                         for(int i=0;i<n;i++)
02816                                 fscanf(fp,"%lf",&model->probA[i]);
02817                 }
02818                 else if(strcmp(cmd,"probB")==0)
02819                 {
02820                         int n = model->nr_class * (model->nr_class-1)/2;
02821                         model->probB = Malloc(double,n);
02822                         for(int i=0;i<n;i++)
02823                                 fscanf(fp,"%lf",&model->probB[i]);
02824                 }
02825                 else if(strcmp(cmd,"nr_sv")==0)
02826                 {
02827                         int n = model->nr_class;
02828                         model->nSV = Malloc(int,n);
02829                         for(int i=0;i<n;i++)
02830                                 fscanf(fp,"%d",&model->nSV[i]);
02831                 }
02832                 else if(strcmp(cmd,"SV")==0)
02833                 {
02834                         while(1)
02835                         {
02836                                 int c = getc(fp);
02837                                 if(c==EOF || c=='\n') break;    
02838                         }
02839                         break;
02840                 }
02841                 else
02842                 {
02843                         fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
02844                         
02845                         setlocale(LC_ALL, old_locale);
02846                         free(old_locale);
02847                         free(model->rho);
02848                         free(model->label);
02849                         free(model->nSV);
02850                         free(model);
02851                         return NULL;
02852                 }
02853         }
02854 
02855         // read sv_coef and SV
02856 
02857         int elements = 0;
02858         long pos = ftell(fp);
02859 
02860         max_line_len = 1024;
02861         line = Malloc(char,max_line_len);
02862         char *p,*endptr,*idx,*val;
02863 
02864         while(readline(fp)!=NULL)
02865         {
02866                 p = strtok(line,":");
02867                 while(1)
02868                 {
02869                         p = strtok(NULL,":");
02870                         if(p == NULL)
02871                                 break;
02872                         ++elements;
02873                 }
02874         }
02875         elements += model->l;
02876 
02877         fseek(fp,pos,SEEK_SET);
02878 
02879         int m = model->nr_class - 1;
02880         int l = model->l;
02881         model->sv_coef = Malloc(double *,m);
02882         int i;
02883         for(i=0;i<m;i++)
02884                 model->sv_coef[i] = Malloc(double,l);
02885         model->SV = Malloc(svm_node*,l);
02886         svm_node *x_space = NULL;
02887         if(l>0) x_space = Malloc(svm_node,elements);
02888 
02889         int j=0;
02890         for(i=0;i<l;i++)
02891         {
02892                 readline(fp);
02893                 model->SV[i] = &x_space[j];
02894 
02895                 p = strtok(line, " \t");
02896                 model->sv_coef[0][i] = strtod(p,&endptr);
02897                 for(int k=1;k<m;k++)
02898                 {
02899                         p = strtok(NULL, " \t");
02900                         model->sv_coef[k][i] = strtod(p,&endptr);
02901                 }
02902 
02903                 while(1)
02904                 {
02905                         idx = strtok(NULL, ":");
02906                         val = strtok(NULL, " \t");
02907 
02908                         if(val == NULL)
02909                                 break;
02910                         x_space[j].index = (int) strtol(idx,&endptr,10);
02911                         x_space[j].value = strtod(val,&endptr);
02912 
02913                         ++j;
02914                 }
02915                 x_space[j++].index = -1;
02916         }
02917         free(line);
02918 
02919         setlocale(LC_ALL, old_locale);
02920         free(old_locale);
02921 
02922         if (ferror(fp) != 0 || fclose(fp) != 0)
02923                 return NULL;
02924 
02925         model->free_sv = 1;     // XXX
02926         return model;
02927 }
02928 
02929 void svm_free_model_content(svm_model* model_ptr)
02930 {
02931         if(model_ptr->free_sv && model_ptr->l > 0 && model_ptr->SV != NULL)
02932                 free((void *)(model_ptr->SV[0]));
02933         if(model_ptr->sv_coef)
02934         {
02935                 for(int i=0;i<model_ptr->nr_class-1;i++)
02936                         free(model_ptr->sv_coef[i]);
02937         }
02938 
02939         free(model_ptr->SV);
02940         model_ptr->SV = NULL;
02941 
02942         free(model_ptr->sv_coef);
02943         model_ptr->sv_coef = NULL;
02944 
02945         free(model_ptr->rho);
02946         model_ptr->rho = NULL;
02947 
02948         free(model_ptr->label);
02949         model_ptr->label= NULL;
02950 
02951         free(model_ptr->probA);
02952         model_ptr->probA = NULL;
02953 
02954         free(model_ptr->probB);
02955         model_ptr->probB= NULL;
02956 
02957         free(model_ptr->nSV);
02958         model_ptr->nSV = NULL;
02959 }
02960 
02961 void svm_free_and_destroy_model(svm_model** model_ptr_ptr)
02962 {
02963         if(model_ptr_ptr != NULL && *model_ptr_ptr != NULL)
02964         {
02965                 svm_free_model_content(*model_ptr_ptr);
02966                 free(*model_ptr_ptr);
02967                 *model_ptr_ptr = NULL;
02968         }
02969 }
02970 
02971 void svm_destroy_param(svm_parameter* param)
02972 {
02973         free(param->weight_label);
02974         free(param->weight);
02975 }
02976 
02977 const char *svm_check_parameter(const svm_problem *prob, const svm_parameter *param)
02978 {
02979         // svm_type
02980 
02981         int svm_type = param->svm_type;
02982         if(svm_type != C_SVC &&
02983            svm_type != NU_SVC &&
02984            svm_type != ONE_CLASS &&
02985            svm_type != EPSILON_SVR &&
02986            svm_type != NU_SVR)
02987                 return "unknown svm type";
02988         
02989         // kernel_type, degree
02990         
02991         int kernel_type = param->kernel_type;
02992         if(kernel_type != LINEAR &&
02993            kernel_type != POLY &&
02994            kernel_type != RBF &&
02995            kernel_type != SIGMOID &&
02996            kernel_type != PRECOMPUTED)
02997                 return "unknown kernel type";
02998 
02999         if(param->gamma < 0)
03000                 return "gamma < 0";
03001 
03002         if(param->degree < 0)
03003                 return "degree of polynomial kernel < 0";
03004 
03005         // cache_size,eps,C,nu,p,shrinking
03006 
03007         if(param->cache_size <= 0)
03008                 return "cache_size <= 0";
03009 
03010         if(param->eps <= 0)
03011                 return "eps <= 0";
03012 
03013         if(svm_type == C_SVC ||
03014            svm_type == EPSILON_SVR ||
03015            svm_type == NU_SVR)
03016                 if(param->C <= 0)
03017                         return "C <= 0";
03018 
03019         if(svm_type == NU_SVC ||
03020            svm_type == ONE_CLASS ||
03021            svm_type == NU_SVR)
03022                 if(param->nu <= 0 || param->nu > 1)
03023                         return "nu <= 0 or nu > 1";
03024 
03025         if(svm_type == EPSILON_SVR)
03026                 if(param->p < 0)
03027                         return "p < 0";
03028 
03029         if(param->shrinking != 0 &&
03030            param->shrinking != 1)
03031                 return "shrinking != 0 and shrinking != 1";
03032 
03033         if(param->probability != 0 &&
03034            param->probability != 1)
03035                 return "probability != 0 and probability != 1";
03036 
03037         if(param->probability == 1 &&
03038            svm_type == ONE_CLASS)
03039                 return "one-class SVM probability output not supported yet";
03040 
03041 
03042         // check whether nu-svc is feasible
03043         
03044         if(svm_type == NU_SVC)
03045         {
03046                 int l = prob->l;
03047                 int max_nr_class = 16;
03048                 int nr_class = 0;
03049                 int *label = Malloc(int,max_nr_class);
03050                 int *count = Malloc(int,max_nr_class);
03051 
03052                 int i;
03053                 for(i=0;i<l;i++)
03054                 {
03055                         int this_label = (int)prob->y[i];
03056                         int j;
03057                         for(j=0;j<nr_class;j++)
03058                                 if(this_label == label[j])
03059                                 {
03060                                         ++count[j];
03061                                         break;
03062                                 }
03063                         if(j == nr_class)
03064                         {
03065                                 if(nr_class == max_nr_class)
03066                                 {
03067                                         max_nr_class *= 2;
03068                                         label = (int *)realloc(label,max_nr_class*sizeof(int));
03069                                         count = (int *)realloc(count,max_nr_class*sizeof(int));
03070                                 }
03071                                 label[nr_class] = this_label;
03072                                 count[nr_class] = 1;
03073                                 ++nr_class;
03074                         }
03075                 }
03076         
03077                 for(i=0;i<nr_class;i++)
03078                 {
03079                         int n1 = count[i];
03080                         for(int j=i+1;j<nr_class;j++)
03081                         {
03082                                 int n2 = count[j];
03083                                 if(param->nu*(n1+n2)/2 > min(n1,n2))
03084                                 {
03085                                         free(label);
03086                                         free(count);
03087                                         return "specified nu is infeasible";
03088                                 }
03089                         }
03090                 }
03091                 free(label);
03092                 free(count);
03093         }
03094 
03095         return NULL;
03096 }
03097 
03098 int svm_check_probability_model(const svm_model *model)
03099 {
03100         return ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
03101                 model->probA!=NULL && model->probB!=NULL) ||
03102                 ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
03103                  model->probA!=NULL);
03104 }
03105 
03106 void svm_set_print_string_function(void (*print_func)(const char *))
03107 {
03108         if(print_func == NULL)
03109                 svm_print_string = &print_string_stdout;
03110         else
03111                 svm_print_string = print_func;
03112 }