fis.c
Go to the documentation of this file.
1 
10 #include <GKlib.h>
11 
12 /*************************************************************************/
14 /*************************************************************************/
15 typedef struct {
16  ssize_t minlen, maxlen;
17  ssize_t minfreq, maxfreq;
18  char *filename;
19  int silent;
21  char *clabelfile;
22  char **clabels;
23 } params_t;
24 
25 /*************************************************************************/
27 /*************************************************************************/
28 #define CMD_MINLEN 1
29 #define CMD_MAXLEN 2
30 #define CMD_MINFREQ 3
31 #define CMD_MAXFREQ 4
32 #define CMD_SILENT 5
33 #define CMD_CLABELFILE 6
34 #define CMD_HELP 10
35 
36 
37 /*************************************************************************/
39 /*************************************************************************/
40 static struct gk_option long_options[] = {
41  {"minlen", 1, 0, CMD_MINLEN},
42  {"maxlen", 1, 0, CMD_MAXLEN},
43  {"minfreq", 1, 0, CMD_MINFREQ},
44  {"maxfreq", 1, 0, CMD_MAXFREQ},
45  {"silent", 0, 0, CMD_SILENT},
46  {"clabels", 1, 0, CMD_CLABELFILE},
47  {"help", 0, 0, CMD_HELP},
48  {0, 0, 0, 0}
49 };
50 
51 
52 /*-------------------------------------------------------------------*/
53 /* Mini help */
54 /*-------------------------------------------------------------------*/
55 static char helpstr[][100] = {
56 " ",
57 "Usage: fis [options] <mat-file>",
58 " ",
59 " Required parameters",
60 " mat-file",
61 " The name of the file storing the transactions. The file is in ",
62 " Cluto's .mat format.",
63 " ",
64 " Optional parameters",
65 " -minlen=int",
66 " Specifies the minimum length of the patterns. [default: 1]",
67 " ",
68 " -maxlen=int",
69 " Specifies the maximum length of the patterns. [default: none]",
70 " ",
71 " -minfreq=int",
72 " Specifies the minimum frequency of the patterns. [default: 10]",
73 " ",
74 " -maxfreq=int",
75 " Specifies the maximum frequency of the patterns. [default: none]",
76 " ",
77 " -silent",
78 " Does not print the discovered itemsets.",
79 " ",
80 " -clabels=filename",
81 " Specifies the name of the file that stores the column labels.",
82 " ",
83 " -help",
84 " Prints this message.",
85 ""
86 };
87 
88 static char shorthelpstr[][100] = {
89 " ",
90 " Usage: fis [options] <mat-file>",
91 " use 'fis -help' for a summary of the options.",
92 ""
93 };
94 
95 
96 
97 /*************************************************************************/
99 /*************************************************************************/
101 void print_final_info(params_t *params);
102 params_t *parse_cmdline(int argc, char *argv[]);
103 void print_an_itemset(void *stateptr, int nitems, int *itemind,
104  int ntrans, int *tranind);
105 
106 
107 /*************************************************************************/
109 /**************************************************************************/
110 int main(int argc, char *argv[])
111 {
112  ssize_t i;
113  char line[8192];
114  FILE *fpin;
115  params_t *params;
116  gk_csr_t *mat;
117 
118  params = parse_cmdline(argc, argv);
119  params->nitemsets = 0;
120 
121  /* read the data */
122  mat = gk_csr_Read(params->filename, GK_CSR_FMT_CLUTO, 1, 1);
124 
125  /* read the column labels */
126  params->clabels = (char **)gk_malloc(mat->ncols*sizeof(char *), "main: clabels");
127  if (params->clabelfile == NULL) {
128  for (i=0; i<mat->ncols; i++) {
129  sprintf(line, "%zd", i);
130  params->clabels[i] = gk_strdup(line);
131  }
132  }
133  else {
134  fpin = gk_fopen(params->clabelfile, "r", "main: fpin");
135  for (i=0; i<mat->ncols; i++) {
136  if (fgets(line, 8192, fpin) == NULL)
137  errexit("Failed on fgets.\n");
138  params->clabels[i] = gk_strdup(gk_strtprune(line, " \n\t"));
139  }
140  gk_fclose(fpin);
141  }
142 
143 
144  print_init_info(params, mat);
145 
146  gk_find_frequent_itemsets(mat->nrows, mat->rowptr, mat->rowind,
147  params->minfreq, params->maxfreq, params->minlen, params->maxlen,
148  &print_an_itemset, (void *)params);
149 
150  printf("Total itemsets found: %zd\n", params->nitemsets);
151 
152  print_final_info(params);
153 }
154 
155 
156 
157 /*************************************************************************/
159 /*************************************************************************/
160 void print_init_info(params_t *params, gk_csr_t *mat)
161 {
162  printf("*******************************************************************************\n");
163  printf(" fis\n\n");
164  printf("Matrix Information ---------------------------------------------------------\n");
165  printf(" input file=%s, [%d, %d, %zd]\n",
166  params->filename, mat->nrows, mat->ncols, mat->rowptr[mat->nrows]);
167 
168  printf("\n");
169  printf("Options --------------------------------------------------------------------\n");
170  printf(" minlen=%zd, maxlen=%zd, minfeq=%zd, maxfreq=%zd\n",
171  params->minlen, params->maxlen, params->minfreq, params->maxfreq);
172 
173  printf("\n");
174  printf("Finding patterns... -----------------------------------------------------\n");
175 }
176 
177 
178 /*************************************************************************/
180 /*************************************************************************/
182 {
183  printf("\n");
184  printf("Memory Usage Information -----------------------------------------------------\n");
185  printf(" Maximum memory used: %10zd bytes\n", (ssize_t) gk_GetMaxMemoryUsed());
186  printf(" Current memory used: %10zd bytes\n", (ssize_t) gk_GetCurMemoryUsed());
187  printf("********************************************************************************\n");
188 }
189 
190 
191 /*************************************************************************/
193 /*************************************************************************/
194 params_t *parse_cmdline(int argc, char *argv[])
195 {
196  int i;
197  int c, option_index;
198  params_t *params;
199 
200  params = (params_t *)gk_malloc(sizeof(params_t), "parse_cmdline: params");
201 
202  /* initialize the params data structure */
203  params->minlen = 1;
204  params->maxlen = -1;
205  params->minfreq = 10;
206  params->maxfreq = -1;
207  params->silent = 0;
208  params->filename = NULL;
209  params->clabelfile = NULL;
210 
211 
212  /* Parse the command line arguments */
213  while ((c = gk_getopt_long_only(argc, argv, "", long_options, &option_index)) != -1) {
214  switch (c) {
215  case CMD_MINLEN:
216  if (gk_optarg) params->minlen = atoi(gk_optarg);
217  break;
218  case CMD_MAXLEN:
219  if (gk_optarg) params->maxlen = atoi(gk_optarg);
220  break;
221  case CMD_MINFREQ:
222  if (gk_optarg) params->minfreq = atoi(gk_optarg);
223  break;
224  case CMD_MAXFREQ:
225  if (gk_optarg) params->maxfreq = atoi(gk_optarg);
226  break;
227 
228  case CMD_SILENT:
229  params->silent = 1;
230  break;
231 
232  case CMD_CLABELFILE:
233  if (gk_optarg) params->clabelfile = gk_strdup(gk_optarg);
234  break;
235 
236  case CMD_HELP:
237  for (i=0; strlen(helpstr[i]) > 0; i++)
238  printf("%s\n", helpstr[i]);
239  exit(0);
240  break;
241  case '?':
242  default:
243  printf("Illegal command-line option(s)\nUse %s -help for a summary of the options.\n", argv[0]);
244  exit(0);
245  }
246  }
247 
248  if (argc-gk_optind != 1) {
249  printf("Unrecognized parameters.");
250  for (i=0; strlen(shorthelpstr[i]) > 0; i++)
251  printf("%s\n", shorthelpstr[i]);
252  exit(0);
253  }
254 
255  params->filename = gk_strdup(argv[gk_optind++]);
256 
257  if (!gk_fexists(params->filename))
258  errexit("input file %s does not exist.\n", params->filename);
259 
260  return params;
261 }
262 
263 
264 
265 /*************************************************************************/
267 /*************************************************************************/
268 void print_an_itemset(void *stateptr, int nitems, int *itemids, int ntrans,
269  int *transids)
270 {
271  ssize_t i;
272  params_t *params;
273 
274  params = (params_t *)stateptr;
275  params->nitemsets++;
276 
277  if (!params->silent) {
278  printf("%4zd %4d %4d => ", params->nitemsets, nitems, ntrans);
279  for (i=0; i<nitems; i++)
280  printf(" %s", params->clabels[itemids[i]]);
281  printf("\n");
282  for (i=0; i<ntrans; i++)
283  printf(" %d\n", transids[i]);
284  printf("\n");
285  }
286 }
The structure that stores the information about the command-line options.
Definition: gk_getopt.h:28
#define CMD_MAXFREQ
Definition: fis.c:31
#define CMD_HELP
Definition: fis.c:34
Definition: fis.c:15
FILE * gk_fopen(char *, char *, const char *)
Definition: GKlib/io.c:24
void errexit(char *f_str,...)
Definition: error.c:54
char * gk_strtprune(char *, char *)
Prunes characters from the end of the string.
Definition: string.c:254
int gk_optind
Index in ARGV of the next element to be scanned.
Definition: getopt.c:68
int32_t * rowind
Definition: gk_struct.h:75
void print_final_info(params_t *params)
Definition: fis.c:181
char * filename
Definition: fis.c:18
size_t gk_GetMaxMemoryUsed()
Definition: memory.c:246
static struct gk_option long_options[]
Definition: fis.c:40
int gk_fexists(char *fname)
Definition: fs.c:21
Scalar Scalar * c
Definition: benchVecAdd.cpp:17
ssize_t minfreq
Definition: fis.c:17
ssize_t maxfreq
Definition: fis.c:17
char * clabelfile
Definition: fis.c:21
#define GK_CSR_FMT_CLUTO
Definition: gk_defs.h:61
static const SmartProjectionParams params
int silent
Definition: fis.c:19
params_t * parse_cmdline(int argc, char *argv[])
Definition: fis.c:194
ssize_t nitemsets
Definition: fis.c:20
size_t gk_GetCurMemoryUsed()
Definition: memory.c:233
#define CMD_SILENT
Definition: fis.c:32
void gk_csr_CreateIndex(gk_csr_t *mat, int what)
Definition: csr.c:1223
void print_an_itemset(void *stateptr, int nitems, int *itemind, int ntrans, int *tranind)
Definition: fis.c:268
#define CMD_CLABELFILE
Definition: fis.c:33
ssize_t maxlen
Definition: fis.c:16
static char shorthelpstr[][100]
Definition: fis.c:88
#define NULL
Definition: ccolamd.c:609
char * gk_strdup(char *orgstr)
Duplicates a string.
Definition: string.c:372
char * gk_optarg
For communication arguments to the caller.
Definition: getopt.c:56
#define CMD_MAXLEN
Definition: fis.c:29
int main(int argc, char *argv[])
Definition: fis.c:110
void * gk_malloc(size_t nbytes, char *msg)
Definition: memory.c:140
ssize_t minlen
Definition: fis.c:16
ssize_t * rowptr
Definition: gk_struct.h:74
#define GK_CSR_COL
Definition: gk_defs.h:43
int32_t nrows
Definition: gk_struct.h:73
int32_t ncols
Definition: gk_struct.h:73
void print_init_info(params_t *params, gk_csr_t *mat)
Definition: fis.c:160
void gk_fclose(FILE *)
Definition: GKlib/io.c:44
#define CMD_MINFREQ
Definition: fis.c:30
char ** clabels
Definition: fis.c:22
static char helpstr[][100]
Definition: fis.c:55
int gk_getopt_long_only(int argc, char **argv, char *options, struct gk_option *long_options, int *opt_index)
Parse command-line arguments with only long options.
Definition: getopt.c:850
#define CMD_MINLEN
Definition: fis.c:28
gk_csr_t * gk_csr_Read(char *filename, int format, int readvals, int numbering)
Definition: csr.c:349
void gk_find_frequent_itemsets(int ntrans, ssize_t *tranptr, int *tranind, int minfreq, int maxfreq, int minlen, int maxlen, void(*process_itemset)(void *stateptr, int nitems, int *itemind, int ntrans, int *tranind), void *stateptr)


gtsam
Author(s):
autogenerated on Tue Jul 4 2023 02:34:13