svm_classifier.cpp
Go to the documentation of this file.
00001 /*********************************************************************
00002  *
00003  * Software License Agreement (BSD License)
00004  *
00005  *  Copyright (c) 2012, Scott Niekum
00006  *  All rights reserved.
00007  *
00008  *  Redistribution and use in source and binary forms, with or without
00009  *  modification, are permitted provided that the following conditions
00010  *  are met:
00011  *
00012  *   * Redistributions of source code must retain the above copyright
00013  *     notice, this list of conditions and the following disclaimer.
00014  *   * Redistributions in binary form must reproduce the above
00015  *     copyright notice, this list of conditions and the following
00016  *     disclaimer in the documentation and/or other materials provided
00017  *     with the distribution.
00018  *   * Neither the name of the Willow Garage nor the names of its
00019  *     contributors may be used to endorse or promote products derived
00020  *     from this software without specific prior written permission.
00021  *
00022  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00023  *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00024  *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
00025  *  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
00026  *  COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
00027  *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
00028  *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
00029  *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
00030  *  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00031  *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
00032  *  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00033  *  POSSIBILITY OF SUCH DAMAGE.
00034  *
00035  *********************************************************************/
00036 
00042 #include "ml_classifiers/svm_classifier.h"
00043 #include <pluginlib/class_list_macros.h>
00044 #include <math.h>
00045 
00046 PLUGINLIB_DECLARE_CLASS(ml_classifiers, SVMClassifier, ml_classifiers::SVMClassifier, ml_classifiers::Classifier)
00047 
00048 using namespace std;
00049 
00050 namespace ml_classifiers{
00051 
00052     SVMClassifier::SVMClassifier(){}
00053     
00054     SVMClassifier::~SVMClassifier(){}
00055     
00056     void SVMClassifier::save(const std::string filename){}
00057     
00058     bool SVMClassifier::load(const std::string filename){return false;}
00059     
00060     void SVMClassifier::addTrainingPoint(string target_class, const std::vector<double> point)
00061     {
00062         class_data[target_class].push_back(point);
00063     }
00064     
00065     void SVMClassifier::train()
00066     {
00067         if(class_data.size() == 0){
00068             printf("SVMClassifier::train() -- No training data available! Doing nothing.\n");
00069             return;
00070         }
00071         
00072         int n_classes = class_data.size();
00073         
00074         //Count the training data
00075         int n_data = 0;
00076         int dims = class_data.begin()->second[0].size();
00077         for(ClassMap::iterator iter = class_data.begin(); iter != class_data.end(); iter++){
00078             CPointList cpl = iter->second;
00079             if(cpl.size() == 1)
00080                 n_data += 2;    //There's a bug in libSVM for classes with only 1 data point, so we will duplicate them later
00081             else
00082                 n_data += cpl.size();
00083         }
00084         
00085         //Allocate space for data in an svm_problem structure
00086         svm_data.l = n_data;
00087         svm_data.y = new double[n_data];
00088         svm_data.x = new svm_node*[n_data]; 
00089         for(int i=0; i<n_data; i++)
00090             svm_data.x[i] = new svm_node[dims+1];
00091         
00092         //Create maps between string labels and int labels
00093         label_str_to_int.clear();
00094         label_int_to_str.clear();
00095         int label_n = 0;
00096         for(ClassMap::iterator iter = class_data.begin(); iter != class_data.end(); iter++){
00097             string cname = iter->first;
00098             label_str_to_int[cname] = label_n;
00099             label_int_to_str[label_n] = cname;
00100             //cout << "MAP: " << label_n << "   " << cname << "   Size: " << iter->second.size() << endl;
00101             ++label_n;
00102         }
00103                 
00104         //Find the range of the data in each dim and calc the scaling factors to scale from 0 to 1
00105         scaling_factors = new double*[dims];
00106         for(int i=0; i<dims; i++)
00107             scaling_factors[i] = new double[2];
00108             
00109         //Scale each dimension separately
00110         for(int j=0; j<dims; j++){
00111             //First find the min, max, and scaling factor
00112             double minval = INFINITY;
00113             double maxval = -INFINITY;
00114             for(ClassMap::iterator iter = class_data.begin(); iter != class_data.end(); iter++){
00115                 CPointList cpl = iter->second;
00116                 for(size_t i=0; i<cpl.size(); i++){
00117                     if(cpl[i][j] < minval) 
00118                         minval = cpl[i][j];
00119                     if(cpl[i][j] > maxval) 
00120                         maxval = cpl[i][j];
00121                 }
00122             }
00123             double factor = maxval-minval;
00124             double offset = minval;
00125             
00126             //Do the scaling and save the scaling factor and offset
00127             for(ClassMap::iterator iter = class_data.begin(); iter != class_data.end(); iter++){
00128                 for(size_t i=0; i<iter->second.size(); i++){
00129                     iter->second[i][j] = (iter->second[i][j] - offset) / factor;
00130                 }
00131             }
00132             scaling_factors[j][0] = offset;
00133             scaling_factors[j][1] = factor;
00134         }
00135         
00136         //Put the training data into the svm_problem
00137         int n = 0;
00138         for(ClassMap::iterator iter = class_data.begin(); iter != class_data.end(); iter++){
00139             string cname = iter->first;
00140             CPointList cpl = iter->second;
00141             
00142             //Account for bug in libSVM with classes with only 1 data point by duplicating it.
00143             if(cpl.size() == 1){
00144                 svm_data.y[n] = label_str_to_int[cname];
00145                 svm_data.y[n+1] = label_str_to_int[cname];
00146                 for(int j=0; j<dims; j++){
00147                     svm_data.x[n][j].index = j;
00148                     svm_data.x[n][j].value = cpl[0][j] + 0.001;
00149                     svm_data.x[n+1][j].index = j;
00150                     svm_data.x[n+1][j].value = cpl[0][j] + 0.001;
00151                 }
00152                 svm_data.x[n][dims].index = -1;
00153                 svm_data.x[n+1][dims].index = -1;
00154                 n = n + 2;
00155             }
00156             else{
00157                 for(size_t i=0; i<cpl.size(); i++){
00158                     svm_data.y[n] = label_str_to_int[cname];
00159                     for(int j=0; j<dims; j++){
00160                         svm_data.x[n][j].index = j;
00161                         svm_data.x[n][j].value = cpl[i][j];
00162                     }
00163                 svm_data.x[n][dims].index = -1;
00164                 n = n + 1;
00165                 }
00166             }
00167         } 
00168         
00169         //Set the training params
00170         svm_parameter params;
00171         params.svm_type = C_SVC;
00172         params.kernel_type = RBF;
00173         params.cache_size = 100.0;  
00174         params.gamma = 1.0;
00175         params.C = 1.0;
00176         params.eps = 0.001;
00177         params.shrinking = 1;
00178         params.probability = 0;
00179         params.degree = 0;
00180         params.nr_weight = 0;
00181         //params.weight_label = 
00182         //params.weight = 
00183         
00184         const char *err_str = svm_check_parameter(&svm_data, &params);
00185         if(err_str){
00186             printf("SVMClassifier::train() -- Bad SVM parameters!\n");
00187             printf("%s\n",err_str);
00188             return;
00189         }
00190         
00191         //Grid Search for best C and gamma params
00192         int n_folds = min(10, n_data);  //Make sure there at least as many points as folds
00193         double *resp = new double[n_data];
00194         double best_accy = 0.0;
00195         double best_g = 0.0;
00196         double best_c = 0.0;
00197         
00198         //First, do a coarse search
00199         for(double c = -5.0; c <= 15.0; c += 2.0){
00200             for(double g = 3.0; g >= -15.0; g -= 2.0){    
00201                 params.gamma = pow(2,g);
00202                 params.C = pow(2,c);
00203                 
00204                 svm_cross_validation(&svm_data, &params, n_folds, resp);
00205                 
00206                 //Figure out the accuracy using these params
00207                 int correct = 0;
00208                 for(int i=0; i<n_data; i++){
00209                     if(resp[i] == svm_data.y[i])
00210                         ++correct;
00211                     double accy = double(correct) / double(n_data);
00212                     if(accy > best_accy){
00213                         best_accy = accy;
00214                         best_g = params.gamma;
00215                         best_c = params.C;
00216                     }
00217                 }
00218             }
00219         }
00220         
00221         //Now do a finer grid search based on coarse results   
00222         double start_c = best_c - 1.0;
00223         double end_c = best_c + 1.0;
00224         double start_g = best_g + 1.0;
00225         double end_g = best_g - 1.0;
00226         for(double c = start_c; c <= end_c; c += 0.1){
00227             for(double g = start_g; g >= end_g; g -= 0.1){
00228                 params.gamma = pow(2,g);
00229                 params.C = pow(2,c);
00230                 svm_cross_validation(&svm_data, &params, n_folds, resp);
00231                 
00232                 //Figure out the accuracy using these params
00233                 int correct = 0;
00234                 for(int i=0; i<n_data; i++){
00235                     if(resp[i] == svm_data.y[i])
00236                         ++correct;
00237                     double accy = double(correct) / double(n_data);
00238                     
00239                     if(accy > best_accy){
00240                         best_accy = accy;
00241                         best_g = params.gamma;
00242                         best_c = params.C;
00243                     }
00244                 }
00245             }
00246         }
00247 
00248         // Set params to best found in grid search
00249         params.gamma = best_g;
00250         params.C = best_c;
00251     
00252         printf("BEST PARAMS  ncl: %i   c: %f   g: %f   accy: %f \n\n", n_classes, best_c, best_g, best_accy);
00253         
00254         //Train the SVM
00255         trained_model = svm_train(&svm_data, &params);
00256     }
00257     
00258     void SVMClassifier::clear()
00259     {
00260         class_data.clear();
00261         label_str_to_int.clear();
00262         label_int_to_str.clear();
00263         trained_model = NULL;  // Actually, this should get freed
00264         scaling_factors = NULL;
00265     }
00266     
00267     string SVMClassifier::classifyPoint(const std::vector<double> point)
00268     {
00269         //Copy the point to be classified into an svm_node
00270         int dims = point.size();
00271         svm_node* test_pt = new svm_node[dims+1];
00272         for(int i=0; i<dims; i++){
00273             test_pt[i].index = i;
00274             //Scale the point using the training scaling values
00275             test_pt[i].value = (point[i]-scaling_factors[i][0]) / scaling_factors[i][1];
00276         }
00277         test_pt[dims].index = -1;
00278         
00279         //Classify the point using the currently trained SVM
00280         int label_n = svm_predict(trained_model, test_pt);
00281         return label_int_to_str[label_n];
00282     }
00283 }
00284 


ml_classifiers
Author(s): Scott Niekum
autogenerated on Mon Oct 6 2014 02:20:58