libsvmread.c
Go to the documentation of this file.
00001 #include <stdio.h>
00002 #include <string.h>
00003 #include <stdlib.h>
00004 #include <ctype.h>
00005 #include <errno.h>
00006 
00007 #include "mex.h"
00008 
00009 #ifdef MX_API_VER
00010 #if MX_API_VER < 0x07030000
00011 typedef int mwIndex;
00012 #endif 
00013 #endif 
00014 #ifndef max
00015 #define max(x,y) (((x)>(y))?(x):(y))
00016 #endif
00017 #ifndef min
00018 #define min(x,y) (((x)<(y))?(x):(y))
00019 #endif
00020 
00021 void exit_with_help()
00022 {
00023         mexPrintf(
00024         "Usage: [label_vector, instance_matrix] = libsvmread('filename');\n"
00025         );
00026 }
00027 
00028 static void fake_answer(mxArray *plhs[])
00029 {
00030         plhs[0] = mxCreateDoubleMatrix(0, 0, mxREAL);
00031         plhs[1] = mxCreateDoubleMatrix(0, 0, mxREAL);
00032 }
00033 
00034 static char *line;
00035 static int max_line_len;
00036 
00037 static char* readline(FILE *input)
00038 {
00039         int len;
00040         
00041         if(fgets(line,max_line_len,input) == NULL)
00042                 return NULL;
00043 
00044         while(strrchr(line,'\n') == NULL)
00045         {
00046                 max_line_len *= 2;
00047                 line = (char *) realloc(line, max_line_len);
00048                 len = (int) strlen(line);
00049                 if(fgets(line+len,max_line_len-len,input) == NULL)
00050                         break;
00051         }
00052         return line;
00053 }
00054 
00055 // read in a problem (in libsvm format)
00056 void read_problem(const char *filename, mxArray *plhs[])
00057 {
00058         int max_index, min_index, inst_max_index, i;
00059         long elements, k;
00060         FILE *fp = fopen(filename,"r");
00061         int l = 0;
00062         char *endptr;
00063         mwIndex *ir, *jc;
00064         double *labels, *samples;
00065         
00066         if(fp == NULL)
00067         {
00068                 mexPrintf("can't open input file %s\n",filename);
00069                 fake_answer(plhs);
00070                 return;
00071         }
00072 
00073         max_line_len = 1024;
00074         line = (char *) malloc(max_line_len*sizeof(char));
00075 
00076         max_index = 0;
00077         min_index = 1; // our index starts from 1
00078         elements = 0;
00079         while(readline(fp) != NULL)
00080         {
00081                 char *idx, *val;
00082                 // features
00083                 int index = 0;
00084 
00085                 inst_max_index = -1; // strtol gives 0 if wrong format, and precomputed kernel has <index> start from 0
00086                 strtok(line," \t"); // label
00087                 while (1)
00088                 {
00089                         idx = strtok(NULL,":"); // index:value
00090                         val = strtok(NULL," \t");
00091                         if(val == NULL)
00092                                 break;
00093 
00094                         errno = 0;
00095                         index = (int) strtol(idx,&endptr,10);
00096                         if(endptr == idx || errno != 0 || *endptr != '\0' || index <= inst_max_index)
00097                         {
00098                                 mexPrintf("Wrong input format at line %d\n",l+1);
00099                                 fake_answer(plhs);
00100                                 return;
00101                         }
00102                         else
00103                                 inst_max_index = index;
00104 
00105                         min_index = min(min_index, index);
00106                         elements++;
00107                 }
00108                 max_index = max(max_index, inst_max_index);
00109                 l++;
00110         }
00111         rewind(fp);
00112 
00113         // y
00114         plhs[0] = mxCreateDoubleMatrix(l, 1, mxREAL);
00115         // x^T
00116         if (min_index <= 0)
00117                 plhs[1] = mxCreateSparse(max_index-min_index+1, l, elements, mxREAL);
00118         else
00119                 plhs[1] = mxCreateSparse(max_index, l, elements, mxREAL);
00120 
00121         labels = mxGetPr(plhs[0]);
00122         samples = mxGetPr(plhs[1]);
00123         ir = mxGetIr(plhs[1]);
00124         jc = mxGetJc(plhs[1]);
00125 
00126         k=0;
00127         for(i=0;i<l;i++)
00128         {
00129                 char *idx, *val, *label;
00130                 jc[i] = k;
00131 
00132                 readline(fp);
00133 
00134                 label = strtok(line," \t\n");
00135                 if(label == NULL)
00136                 {
00137                         mexPrintf("Empty line at line %d\n",i+1);
00138                         fake_answer(plhs);
00139                         return;
00140                 }
00141                 labels[i] = strtod(label,&endptr);
00142                 if(endptr == label || *endptr != '\0')
00143                 {
00144                         mexPrintf("Wrong input format at line %d\n",i+1);
00145                         fake_answer(plhs);
00146                         return;
00147                 }
00148 
00149                 // features
00150                 while(1)
00151                 {
00152                         idx = strtok(NULL,":");
00153                         val = strtok(NULL," \t");
00154                         if(val == NULL)
00155                                 break;
00156 
00157                         ir[k] = (mwIndex) (strtol(idx,&endptr,10) - min_index); // precomputed kernel has <index> start from 0
00158 
00159                         errno = 0;
00160                         samples[k] = strtod(val,&endptr);
00161                         if (endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr)))
00162                         {
00163                                 mexPrintf("Wrong input format at line %d\n",i+1);
00164                                 fake_answer(plhs);
00165                                 return;
00166                         }
00167                         ++k;
00168                 }
00169         }
00170         jc[l] = k;
00171 
00172         fclose(fp);
00173         free(line);
00174 
00175         {
00176                 mxArray *rhs[1], *lhs[1];
00177                 rhs[0] = plhs[1];
00178                 if(mexCallMATLAB(1, lhs, 1, rhs, "transpose"))
00179                 {
00180                         mexPrintf("Error: cannot transpose problem\n");
00181                         fake_answer(plhs);
00182                         return;
00183                 }
00184                 plhs[1] = lhs[0];
00185         }
00186 }
00187 
00188 void mexFunction( int nlhs, mxArray *plhs[],
00189                 int nrhs, const mxArray *prhs[] )
00190 {
00191         if(nrhs == 1)
00192         {
00193                 char filename[256];
00194 
00195                 mxGetString(prhs[0], filename, mxGetN(prhs[0]) + 1);
00196 
00197                 if(filename == NULL)
00198                 {
00199                         mexPrintf("Error: filename is NULL\n");
00200                         return;
00201                 }
00202 
00203                 read_problem(filename, plhs);
00204         }
00205         else
00206         {
00207                 exit_with_help();
00208                 fake_answer(plhs);
00209                 return;
00210         }
00211 }
00212 


ml_classifiers
Author(s): Scott Niekum
autogenerated on Fri Jan 3 2014 11:30:23