svm-scale.c
Go to the documentation of this file.
1 #include <float.h>
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <ctype.h>
5 #include <string.h>
6 
8 {
9  printf(
10  "Usage: svm-scale [options] data_filename\n"
11  "options:\n"
12  "-l lower : x scaling lower limit (default -1)\n"
13  "-u upper : x scaling upper limit (default +1)\n"
14  "-y y_lower y_upper : y scaling limits (default: no y scaling)\n"
15  "-s save_filename : save scaling parameters to save_filename\n"
16  "-r restore_filename : restore scaling parameters from restore_filename\n"
17  );
18  exit(1);
19 }
20 
21 char *line = NULL;
22 int max_line_len = 1024;
23 double lower=-1.0,upper=1.0,y_lower,y_upper;
24 int y_scaling = 0;
25 double *feature_max;
26 double *feature_min;
27 double y_max = -DBL_MAX;
28 double y_min = DBL_MAX;
30 long int num_nonzeros = 0;
31 long int new_num_nonzeros = 0;
32 
33 #define max(x,y) (((x)>(y))?(x):(y))
34 #define min(x,y) (((x)<(y))?(x):(y))
35 
36 void output_target(double value);
37 void output(int index, double value);
38 char* readline(FILE *input);
39 
40 int main(int argc,char **argv)
41 {
42  int i,index;
43  FILE *fp, *fp_restore = NULL;
44  char *save_filename = NULL;
45  char *restore_filename = NULL;
46 
47  for(i=1;i<argc;i++)
48  {
49  if(argv[i][0] != '-') break;
50  ++i;
51  switch(argv[i-1][1])
52  {
53  case 'l': lower = atof(argv[i]); break;
54  case 'u': upper = atof(argv[i]); break;
55  case 'y':
56  y_lower = atof(argv[i]);
57  ++i;
58  y_upper = atof(argv[i]);
59  y_scaling = 1;
60  break;
61  case 's': save_filename = argv[i]; break;
62  case 'r': restore_filename = argv[i]; break;
63  default:
64  fprintf(stderr,"unknown option\n");
66  }
67  }
68 
69  if(!(upper > lower) || (y_scaling && !(y_upper > y_lower)))
70  {
71  fprintf(stderr,"inconsistent lower/upper specification\n");
72  exit(1);
73  }
74 
75  if(restore_filename && save_filename)
76  {
77  fprintf(stderr,"cannot use -r and -s simultaneously\n");
78  exit(1);
79  }
80 
81  if(argc != i+1)
83 
84  fp=fopen(argv[i],"r");
85 
86  if(fp==NULL)
87  {
88  fprintf(stderr,"can't open file %s\n", argv[i]);
89  exit(1);
90  }
91 
92  line = (char *) malloc(max_line_len*sizeof(char));
93 
94 #define SKIP_TARGET\
95  while(isspace(*p)) ++p;\
96  while(!isspace(*p)) ++p;
97 
98 #define SKIP_ELEMENT\
99  while(*p!=':') ++p;\
100  ++p;\
101  while(isspace(*p)) ++p;\
102  while(*p && !isspace(*p)) ++p;
103 
104  /* assumption: min index of attributes is 1 */
105  /* pass 1: find out max index of attributes */
106  max_index = 0;
107 
108  if(restore_filename)
109  {
110  int idx, c;
111 
112  fp_restore = fopen(restore_filename,"r");
113  if(fp_restore==NULL)
114  {
115  fprintf(stderr,"can't open file %s\n", restore_filename);
116  exit(1);
117  }
118 
119  c = fgetc(fp_restore);
120  if(c == 'y')
121  {
122  readline(fp_restore);
123  readline(fp_restore);
124  readline(fp_restore);
125  }
126  readline(fp_restore);
127  readline(fp_restore);
128 
129  while(fscanf(fp_restore,"%d %*f %*f\n",&idx) == 1)
130  max_index = max(idx,max_index);
131  rewind(fp_restore);
132  }
133 
134  while(readline(fp)!=NULL)
135  {
136  char *p=line;
137 
139 
140  while(sscanf(p,"%d:%*f",&index)==1)
141  {
142  max_index = max(max_index, index);
144  num_nonzeros++;
145  }
146  }
147  rewind(fp);
148 
149  feature_max = (double *)malloc((max_index+1)* sizeof(double));
150  feature_min = (double *)malloc((max_index+1)* sizeof(double));
151 
152  if(feature_max == NULL || feature_min == NULL)
153  {
154  fprintf(stderr,"can't allocate enough memory\n");
155  exit(1);
156  }
157 
158  for(i=0;i<=max_index;i++)
159  {
160  feature_max[i]=-DBL_MAX;
161  feature_min[i]=DBL_MAX;
162  }
163 
164  /* pass 2: find out min/max value */
165  while(readline(fp)!=NULL)
166  {
167  char *p=line;
168  int next_index=1;
169  double target;
170  double value;
171 
172  sscanf(p,"%lf",&target);
173  y_max = max(y_max,target);
174  y_min = min(y_min,target);
175 
177 
178  while(sscanf(p,"%d:%lf",&index,&value)==2)
179  {
180  for(i=next_index;i<index;i++)
181  {
182  feature_max[i]=max(feature_max[i],0);
183  feature_min[i]=min(feature_min[i],0);
184  }
185 
186  feature_max[index]=max(feature_max[index],value);
187  feature_min[index]=min(feature_min[index],value);
188 
190  next_index=index+1;
191  }
192 
193  for(i=next_index;i<=max_index;i++)
194  {
195  feature_max[i]=max(feature_max[i],0);
196  feature_min[i]=min(feature_min[i],0);
197  }
198  }
199 
200  rewind(fp);
201 
202  /* pass 2.5: save/restore feature_min/feature_max */
203 
204  if(restore_filename)
205  {
206  /* fp_restore rewinded in finding max_index */
207  int idx, c;
208  double fmin, fmax;
209 
210  if((c = fgetc(fp_restore)) == 'y')
211  {
212  fscanf(fp_restore, "%lf %lf\n", &y_lower, &y_upper);
213  fscanf(fp_restore, "%lf %lf\n", &y_min, &y_max);
214  y_scaling = 1;
215  }
216  else
217  ungetc(c, fp_restore);
218 
219  if (fgetc(fp_restore) == 'x') {
220  fscanf(fp_restore, "%lf %lf\n", &lower, &upper);
221  while(fscanf(fp_restore,"%d %lf %lf\n",&idx,&fmin,&fmax)==3)
222  {
223  if(idx<=max_index)
224  {
225  feature_min[idx] = fmin;
226  feature_max[idx] = fmax;
227  }
228  }
229  }
230  fclose(fp_restore);
231  }
232 
233  if(save_filename)
234  {
235  FILE *fp_save = fopen(save_filename,"w");
236  if(fp_save==NULL)
237  {
238  fprintf(stderr,"can't open file %s\n", save_filename);
239  exit(1);
240  }
241  if(y_scaling)
242  {
243  fprintf(fp_save, "y\n");
244  fprintf(fp_save, "%.16g %.16g\n", y_lower, y_upper);
245  fprintf(fp_save, "%.16g %.16g\n", y_min, y_max);
246  }
247  fprintf(fp_save, "x\n");
248  fprintf(fp_save, "%.16g %.16g\n", lower, upper);
249  for(i=1;i<=max_index;i++)
250  {
251  if(feature_min[i]!=feature_max[i])
252  fprintf(fp_save,"%d %.16g %.16g\n",i,feature_min[i],feature_max[i]);
253  }
254  fclose(fp_save);
255  }
256 
257  /* pass 3: scale */
258  while(readline(fp)!=NULL)
259  {
260  char *p=line;
261  int next_index=1;
262  double target;
263  double value;
264 
265  sscanf(p,"%lf",&target);
266  output_target(target);
267 
269 
270  while(sscanf(p,"%d:%lf",&index,&value)==2)
271  {
272  for(i=next_index;i<index;i++)
273  output(i,0);
274 
275  output(index,value);
276 
278  next_index=index+1;
279  }
280 
281  for(i=next_index;i<=max_index;i++)
282  output(i,0);
283 
284  printf("\n");
285  }
286 
288  fprintf(stderr,
289  "WARNING: original #nonzeros %ld\n"
290  " new #nonzeros %ld\n"
291  "Use -l 0 if many original feature values are zeros\n",
293 
294  free(line);
295  free(feature_max);
296  free(feature_min);
297  fclose(fp);
298  return 0;
299 }
300 
301 char* readline(FILE *input)
302 {
303  int len;
304 
305  if(fgets(line,max_line_len,input) == NULL)
306  return NULL;
307 
308  while(strrchr(line,'\n') == NULL)
309  {
310  max_line_len *= 2;
311  line = (char *) realloc(line, max_line_len);
312  len = (int) strlen(line);
313  if(fgets(line+len,max_line_len-len,input) == NULL)
314  break;
315  }
316  return line;
317 }
318 
319 void output_target(double value)
320 {
321  if(y_scaling)
322  {
323  if(value == y_min)
324  value = y_lower;
325  else if(value == y_max)
326  value = y_upper;
327  else value = y_lower + (y_upper-y_lower) *
328  (value - y_min)/(y_max-y_min);
329  }
330  printf("%g ",value);
331 }
332 
333 void output(int index, double value)
334 {
335  /* skip single-valued attribute */
336  if(feature_max[index] == feature_min[index])
337  return;
338 
339  if(value == feature_min[index])
340  value = lower;
341  else if(value == feature_max[index])
342  value = upper;
343  else
344  value = lower + (upper-lower) *
345  (value-feature_min[index])/
347 
348  if(value != 0)
349  {
350  printf("%d:%g ",index, value);
352  }
353 }
long int new_num_nonzeros
Definition: svm-scale.c:31
#define min(x, y)
Definition: svm-scale.c:34
void output_target(double value)
Definition: svm-scale.c:319
double value
Definition: svm.h:15
int y_scaling
Definition: svm-scale.c:24
#define SKIP_TARGET
double y_min
Definition: svm-scale.c:28
long int num_nonzeros
Definition: svm-scale.c:30
char * readline(FILE *input)
Definition: svm-scale.c:301
double y_lower
Definition: svm-scale.c:23
#define SKIP_ELEMENT
void output(int index, double value)
Definition: svm-scale.c:333
index
Definition: subset.py:58
double * feature_min
Definition: svm-scale.c:26
void exit_with_help()
Definition: svm-scale.c:7
int max_index
Definition: svm-scale.c:29
double * feature_max
Definition: svm-scale.c:25
double lower
Definition: svm-scale.c:23
double upper
Definition: svm-scale.c:23
char * line
Definition: svm-scale.c:21
#define max(x, y)
Definition: svm-scale.c:33
int max_line_len
Definition: svm-scale.c:22
double y_upper
Definition: svm-scale.c:23
c
Definition: easy.py:61
int main(int argc, char **argv)
Definition: svm-scale.c:40
double y_max
Definition: svm-scale.c:27


haf_grasping
Author(s): David Fischinger
autogenerated on Mon Jun 10 2019 13:28:43