svm-scale.c
Go to the documentation of this file.
1 #include <float.h>
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <ctype.h>
5 #include <string.h>
6 
8 {
9  printf(
10  "Usage: svm-scale [options] data_filename\n"
11  "options:\n"
12  "-l lower : x scaling lower limit (default -1)\n"
13  "-u upper : x scaling upper limit (default +1)\n"
14  "-y y_lower y_upper : y scaling limits (default: no y scaling)\n"
15  "-s save_filename : save scaling parameters to save_filename\n"
16  "-r restore_filename : restore scaling parameters from restore_filename\n"
17  );
18  exit(1);
19 }
20 
21 char *line = NULL;
22 int max_line_len = 1024;
23 double lower = -1.0, upper = 1.0, y_lower, y_upper;
24 int y_scaling = 0;
25 double *feature_max;
26 double *feature_min;
27 double y_max = -DBL_MAX;
28 double y_min = DBL_MAX;
31 long int num_nonzeros = 0;
32 long int new_num_nonzeros = 0;
33 
34 #define max(x,y) (((x)>(y))?(x):(y))
35 #define min(x,y) (((x)<(y))?(x):(y))
36 
37 void output_target(double value);
38 void output(int index, double value);
39 char* readline(FILE *input);
40 
41 int main(int argc, char **argv)
42 {
43  int i, index;
44  FILE *fp, *fp_restore = NULL;
45  char *save_filename = NULL;
46  char *restore_filename = NULL;
47 
48  for (i = 1; i < argc; i++)
49  {
50  if (argv[i][0] != '-') break;
51  ++i;
52  switch (argv[i - 1][1])
53  {
54  case 'l':
55  lower = atof(argv[i]);
56  break;
57  case 'u':
58  upper = atof(argv[i]);
59  break;
60  case 'y':
61  y_lower = atof(argv[i]);
62  ++i;
63  y_upper = atof(argv[i]);
64  y_scaling = 1;
65  break;
66  case 's':
67  save_filename = argv[i];
68  break;
69  case 'r':
70  restore_filename = argv[i];
71  break;
72  default:
73  fprintf(stderr, "unknown option\n");
75  }
76  }
77 
78  if (!(upper > lower) || (y_scaling && !(y_upper > y_lower)))
79  {
80  fprintf(stderr, "inconsistent lower/upper specification\n");
81  exit(1);
82  }
83 
84  if (restore_filename && save_filename)
85  {
86  fprintf(stderr, "cannot use -r and -s simultaneously\n");
87  exit(1);
88  }
89 
90  if (argc != i + 1)
92 
93  fp = fopen(argv[i], "r");
94 
95  if (fp == NULL)
96  {
97  fprintf(stderr, "can't open file %s\n", argv[i]);
98  exit(1);
99  }
100 
101  line = (char *) malloc(max_line_len * sizeof(char));
102 
103 #define SKIP_TARGET\
104  while(isspace(*p)) ++p;\
105  while(!isspace(*p)) ++p;
106 
107 #define SKIP_ELEMENT\
108  while(*p!=':') ++p;\
109  ++p;\
110  while(isspace(*p)) ++p;\
111  while(*p && !isspace(*p)) ++p;
112 
113  /* assumption: min index of attributes is 1 */
114  /* pass 1: find out max index of attributes */
115  max_index = 0;
116  min_index = 1;
117 
118  if (restore_filename)
119  {
120  int idx, c;
121 
122  fp_restore = fopen(restore_filename, "r");
123  if (fp_restore == NULL)
124  {
125  fprintf(stderr, "can't open file %s\n", restore_filename);
126  exit(1);
127  }
128 
129  c = fgetc(fp_restore);
130  if (c == 'y')
131  {
132  readline(fp_restore);
133  readline(fp_restore);
134  readline(fp_restore);
135  }
136  readline(fp_restore);
137  readline(fp_restore);
138 
139  while (fscanf(fp_restore, "%d %*f %*f\n", &idx) == 1)
140  max_index = max(idx, max_index);
141  rewind(fp_restore);
142  }
143 
144  while (readline(fp) != NULL)
145  {
146  char *p = line;
147 
149 
150  while (sscanf(p, "%d:%*f", &index) == 1)
151  {
152  max_index = max(max_index, index);
153  min_index = min(min_index, index);
155  num_nonzeros++;
156  }
157  }
158 
159  if (min_index < 1)
160  fprintf(stderr,
161  "WARNING: minimal feature index is %d, but indices should start from 1\n", min_index);
162 
163  rewind(fp);
164 
165  feature_max = (double *)malloc((max_index + 1) * sizeof(double));
166  feature_min = (double *)malloc((max_index + 1) * sizeof(double));
167 
168  if (feature_max == NULL || feature_min == NULL)
169  {
170  fprintf(stderr, "can't allocate enough memory\n");
171  exit(1);
172  }
173 
174  for (i = 0; i <= max_index; i++)
175  {
176  feature_max[i] = -DBL_MAX;
177  feature_min[i] = DBL_MAX;
178  }
179 
180  /* pass 2: find out min/max value */
181  while (readline(fp) != NULL)
182  {
183  char *p = line;
184  int next_index = 1;
185  double target;
186  double value;
187 
188  sscanf(p, "%lf", &target);
189  y_max = max(y_max, target);
190  y_min = min(y_min, target);
191 
193 
194  while (sscanf(p, "%d:%lf", &index, &value) == 2)
195  {
196  for (i = next_index; i < index; i++)
197  {
198  feature_max[i] = max(feature_max[i], 0);
199  feature_min[i] = min(feature_min[i], 0);
200  }
201 
202  feature_max[index] = max(feature_max[index], value);
203  feature_min[index] = min(feature_min[index], value);
204 
206  next_index = index + 1;
207  }
208 
209  for (i = next_index; i <= max_index; i++)
210  {
211  feature_max[i] = max(feature_max[i], 0);
212  feature_min[i] = min(feature_min[i], 0);
213  }
214  }
215 
216  rewind(fp);
217 
218  /* pass 2.5: save/restore feature_min/feature_max */
219 
220  if (restore_filename)
221  {
222  /* fp_restore rewinded in finding max_index */
223  int idx, c;
224  double fmin, fmax;
225  int next_index = 1;
226 
227  if ((c = fgetc(fp_restore)) == 'y')
228  {
229  fscanf(fp_restore, "%lf %lf\n", &y_lower, &y_upper);
230  fscanf(fp_restore, "%lf %lf\n", &y_min, &y_max);
231  y_scaling = 1;
232  }
233  else
234  ungetc(c, fp_restore);
235 
236  if (fgetc(fp_restore) == 'x')
237  {
238  fscanf(fp_restore, "%lf %lf\n", &lower, &upper);
239  while (fscanf(fp_restore, "%d %lf %lf\n", &idx, &fmin, &fmax) == 3)
240  {
241  for (i = next_index; i < idx; i++)
242  if (feature_min[i] != feature_max[i])
243  fprintf(stderr,
244  "WARNING: feature index %d appeared in file %s was not seen in the scaling factor file %s.\n",
245  i, argv[argc - 1], restore_filename);
246 
247  feature_min[idx] = fmin;
248  feature_max[idx] = fmax;
249 
250  next_index = idx + 1;
251  }
252 
253  for (i = next_index; i <= max_index; i++)
254  if (feature_min[i] != feature_max[i])
255  fprintf(stderr,
256  "WARNING: feature index %d appeared in file %s was not seen in the scaling factor file %s.\n",
257  i, argv[argc - 1], restore_filename);
258  }
259  fclose(fp_restore);
260  }
261 
262  if (save_filename)
263  {
264  FILE *fp_save = fopen(save_filename, "w");
265  if (fp_save == NULL)
266  {
267  fprintf(stderr, "can't open file %s\n", save_filename);
268  exit(1);
269  }
270  if (y_scaling)
271  {
272  fprintf(fp_save, "y\n");
273  fprintf(fp_save, "%.16g %.16g\n", y_lower, y_upper);
274  fprintf(fp_save, "%.16g %.16g\n", y_min, y_max);
275  }
276  fprintf(fp_save, "x\n");
277  fprintf(fp_save, "%.16g %.16g\n", lower, upper);
278  for (i = 1; i <= max_index; i++)
279  {
280  if (feature_min[i] != feature_max[i])
281  fprintf(fp_save, "%d %.16g %.16g\n", i, feature_min[i], feature_max[i]);
282  }
283 
284  if (min_index < 1)
285  fprintf(stderr,
286  "WARNING: scaling factors with indices smaller than 1 are not stored to the file %s.\n", save_filename);
287 
288  fclose(fp_save);
289  }
290 
291  /* pass 3: scale */
292  while (readline(fp) != NULL)
293  {
294  char *p = line;
295  int next_index = 1;
296  double target;
297  double value;
298 
299  sscanf(p, "%lf", &target);
300  output_target(target);
301 
303 
304  while (sscanf(p, "%d:%lf", &index, &value) == 2)
305  {
306  for (i = next_index; i < index; i++)
307  output(i, 0);
308 
309  output(index, value);
310 
312  next_index = index + 1;
313  }
314 
315  for (i = next_index; i <= max_index; i++)
316  output(i, 0);
317 
318  printf("\n");
319  }
320 
322  fprintf(stderr,
323  "WARNING: original #nonzeros %ld\n"
324  " new #nonzeros %ld\n"
325  "Use -l 0 if many original feature values are zeros\n",
327 
328  free(line);
329  free(feature_max);
330  free(feature_min);
331  fclose(fp);
332  return 0;
333 }
334 
335 char* readline(FILE *input)
336 {
337  int len;
338 
339  if (fgets(line, max_line_len, input) == NULL)
340  return NULL;
341 
342  while (strrchr(line, '\n') == NULL)
343  {
344  max_line_len *= 2;
345  line = (char *) realloc(line, max_line_len);
346  len = (int) strlen(line);
347  if (fgets(line + len, max_line_len - len, input) == NULL)
348  break;
349  }
350  return line;
351 }
352 
353 void output_target(double value)
354 {
355  if (y_scaling)
356  {
357  if (value == y_min)
358  value = y_lower;
359  else if (value == y_max)
360  value = y_upper;
361  else value = y_lower + (y_upper - y_lower) *
362  (value - y_min) / (y_max - y_min);
363  }
364  printf("%g ", value);
365 }
366 
367 void output(int index, double value)
368 {
369  /* skip single-valued attribute */
370  if (feature_max[index] == feature_min[index])
371  return;
372 
373  if (value == feature_min[index])
374  value = lower;
375  else if (value == feature_max[index])
376  value = upper;
377  else
378  value = lower + (upper - lower) *
379  (value - feature_min[index]) /
381 
382  if (value != 0)
383  {
384  printf("%d:%g ", index, value);
386  }
387 }
long int new_num_nonzeros
Definition: svm-scale.c:32
#define min(x, y)
Definition: svm-scale.c:35
void output_target(double value)
Definition: svm-scale.c:353
double value
Definition: svm.h:15
int y_scaling
Definition: svm-scale.c:24
#define SKIP_TARGET
double y_min
Definition: svm-scale.c:28
int min_index
Definition: svm-scale.c:30
long int num_nonzeros
Definition: svm-scale.c:31
char * readline(FILE *input)
Definition: svm-scale.c:335
double y_lower
Definition: svm-scale.c:23
#define SKIP_ELEMENT
void output(int index, double value)
Definition: svm-scale.c:367
double * feature_min
Definition: svm-scale.c:26
void exit_with_help()
Definition: svm-scale.c:7
int max_index
Definition: svm-scale.c:29
double * feature_max
Definition: svm-scale.c:25
double lower
Definition: svm-scale.c:23
double upper
Definition: svm-scale.c:23
int index
Definition: svm.h:14
char * line
Definition: svm-scale.c:21
#define max(x, y)
Definition: svm-scale.c:34
int max_line_len
Definition: svm-scale.c:22
double y_upper
Definition: svm-scale.c:23
c
Definition: easy.py:61
int main(int argc, char **argv)
Definition: svm-scale.c:41
double y_max
Definition: svm-scale.c:27


ml_classifiers
Author(s): Scott Niekum , Joshua Whitley
autogenerated on Mon Feb 28 2022 22:46:49