itemsets.c
Go to the documentation of this file.
1 
13 #include <GKlib.h>
14 
15 /*-------------------------------------------------------------*/
17 /*-------------------------------------------------------------*/
18 typedef struct {
19  int minfreq; /* the minimum frequency of a pattern */
20  int maxfreq; /* the maximum frequency of a pattern */
21  int minlen; /* the minimum length of the requested pattern */
22  int maxlen; /* the maximum length of the requested pattern */
23  int tnitems; /* the initial range of the item space */
24 
25  /* the call-back function */
26  void (*callback)(void *stateptr, int nitems, int *itemids, int ntrans, int *transids);
27  void *stateptr; /* the user-supplied pointer to pass to the callback */
28 
29  /* workspace variables */
30  int *rmarker;
31  gk_ikv_t *cand;
32 } isparams_t;
33 
34 
35 /*-------------------------------------------------------------*/
37 /*-------------------------------------------------------------*/
39  int preflen, int *prefix);
41 
42 
43 
44 /*************************************************************************/
46 /*************************************************************************/
47 void gk_find_frequent_itemsets(int ntrans, ssize_t *tranptr, int *tranind,
48  int minfreq, int maxfreq, int minlen, int maxlen,
49  void (*process_itemset)(void *stateptr, int nitems, int *itemids,
50  int ntrans, int *transids),
51  void *stateptr)
52 {
53  ssize_t i;
54  gk_csr_t *mat, *pmat;
56  int *pattern;
57 
58  /* Create the matrix */
59  mat = gk_csr_Create();
60  mat->nrows = ntrans;
61  mat->ncols = tranind[gk_iargmax(tranptr[ntrans], tranind)]+1;
62  mat->rowptr = gk_zcopy(ntrans+1, tranptr, gk_zmalloc(ntrans+1, "gk_find_frequent_itemsets: mat.rowptr"));
63  mat->rowind = gk_icopy(tranptr[ntrans], tranind, gk_imalloc(tranptr[ntrans], "gk_find_frequent_itemsets: mat.rowind"));
64  mat->colids = gk_iincset(mat->ncols, 0, gk_imalloc(mat->ncols, "gk_find_frequent_itemsets: mat.colids"));
65 
66  /* Setup the parameters */
67  params.minfreq = minfreq;
68  params.maxfreq = (maxfreq == -1 ? mat->nrows : maxfreq);
69  params.minlen = minlen;
70  params.maxlen = (maxlen == -1 ? mat->ncols : maxlen);
71  params.tnitems = mat->ncols;
72  params.callback = process_itemset;
73  params.stateptr = stateptr;
74  params.rmarker = gk_ismalloc(mat->nrows, 0, "gk_find_frequent_itemsets: rmarker");
75  params.cand = gk_ikvmalloc(mat->ncols, "gk_find_frequent_itemsets: cand");
76 
77  /* Perform the initial projection */
79  pmat = itemsets_project_matrix(&params, mat, -1);
80  gk_csr_Free(&mat);
81 
82  pattern = gk_imalloc(pmat->ncols, "gk_find_frequent_itemsets: pattern");
83  itemsets_find_frequent_itemsets(&params, pmat, 0, pattern);
84 
85  gk_csr_Free(&pmat);
86  gk_free((void **)&pattern, &params.rmarker, &params.cand, LTERM);
87 
88 }
89 
90 
91 
92 /*************************************************************************/
94 /*************************************************************************/
96  int preflen, int *prefix)
97 {
98  ssize_t i;
99  gk_csr_t *cmat;
100 
101  /* Project each frequent column */
102  for (i=0; i<mat->ncols; i++) {
103  prefix[preflen] = mat->colids[i];
104 
105  if (preflen+1 >= params->minlen)
106  (*params->callback)(params->stateptr, preflen+1, prefix,
107  mat->colptr[i+1]-mat->colptr[i], mat->colind+mat->colptr[i]);
108 
109  if (preflen+1 < params->maxlen) {
110  cmat = itemsets_project_matrix(params, mat, i);
111  itemsets_find_frequent_itemsets(params, cmat, preflen+1, prefix);
112  gk_csr_Free(&cmat);
113  }
114  }
115 
116 }
117 
118 
119 /******************************************************************************/
127 /*******************************************************************************/
129 {
130  ssize_t i, j, k, ii, pnnz;
131  int nrows, ncols, pnrows, pncols;
132  ssize_t *colptr, *pcolptr;
133  int *colind, *colids, *pcolind, *pcolids, *rmarker;
134  gk_csr_t *pmat;
135  gk_ikv_t *cand;
136 
137  nrows = mat->nrows;
138  ncols = mat->ncols;
139  colptr = mat->colptr;
140  colind = mat->colind;
141  colids = mat->colids;
142 
143  rmarker = params->rmarker;
144  cand = params->cand;
145 
146 
147  /* Allocate space for the projected matrix based on what you know thus far */
148  pmat = gk_csr_Create();
149  pmat->nrows = pnrows = (cid == -1 ? nrows : colptr[cid+1]-colptr[cid]);
150 
151 
152  /* Mark the rows that will be kept and determine the prowids */
153  if (cid == -1) { /* Initial projection */
154  gk_iset(nrows, 1, rmarker);
155  }
156  else { /* The other projections */
157  for (i=colptr[cid]; i<colptr[cid+1]; i++)
158  rmarker[colind[i]] = 1;
159  }
160 
161 
162  /* Determine the length of each column that will be left in the projected matrix */
163  for (pncols=0, pnnz=0, i=cid+1; i<ncols; i++) {
164  for (k=0, j=colptr[i]; j<colptr[i+1]; j++) {
165  k += rmarker[colind[j]];
166  }
167  if (k >= params->minfreq && k <= params->maxfreq) {
168  cand[pncols].val = i;
169  cand[pncols++].key = k;
170  pnnz += k;
171  }
172  }
173 
174  /* Sort the columns in increasing order */
175  gk_ikvsorti(pncols, cand);
176 
177 
178  /* Allocate space for the remaining fields of the projected matrix */
179  pmat->ncols = pncols;
180  pmat->colids = pcolids = gk_imalloc(pncols, "itemsets_project_matrix: pcolids");
181  pmat->colptr = pcolptr = gk_zmalloc(pncols+1, "itemsets_project_matrix: pcolptr");
182  pmat->colind = pcolind = gk_imalloc(pnnz, "itemsets_project_matrix: pcolind");
183 
184 
185  /* Populate the projected matrix */
186  pcolptr[0] = 0;
187  for (pnnz=0, ii=0; ii<pncols; ii++) {
188  i = cand[ii].val;
189  for (j=colptr[i]; j<colptr[i+1]; j++) {
190  if (rmarker[colind[j]])
191  pcolind[pnnz++] = colind[j];
192  }
193 
194  pcolids[ii] = colids[i];
195  pcolptr[ii+1] = pnnz;
196  }
197 
198 
199  /* Reset the rmarker array */
200  if (cid == -1) { /* Initial projection */
201  gk_iset(nrows, 0, rmarker);
202  }
203  else { /* The other projections */
204  for (i=colptr[cid]; i<colptr[cid+1]; i++)
205  rmarker[colind[i]] = 0;
206  }
207 
208 
209  return pmat;
210 }
int minlen
Definition: itemsets.c:21
void gk_ikvsorti(size_t, gk_ikv_t *)
Definition: sort.c:157
gk_ikv_t * cand
Definition: itemsets.c:31
int32_t * rowind
Definition: gk_struct.h:75
void * stateptr
Definition: itemsets.c:27
int maxfreq
Definition: itemsets.c:20
ssize_t * colptr
Definition: gk_struct.h:74
void gk_find_frequent_itemsets(int ntrans, ssize_t *tranptr, int *tranind, int minfreq, int maxfreq, int minlen, int maxlen, void(*process_itemset)(void *stateptr, int nitems, int *itemids, int ntrans, int *transids), void *stateptr)
Definition: itemsets.c:47
gk_csr_t * itemsets_project_matrix(isparams_t *param, gk_csr_t *mat, int cid)
Definition: itemsets.c:128
gk_csr_t * gk_csr_Create()
Definition: csr.c:19
static const SmartProjectionParams params
void gk_csr_CreateIndex(gk_csr_t *mat, int what)
Definition: csr.c:1223
int * rmarker
Definition: itemsets.c:30
int minfreq
Definition: itemsets.c:19
int maxlen
Definition: itemsets.c:22
int32_t * colind
Definition: gk_struct.h:75
void gk_csr_Free(gk_csr_t **mat)
Definition: csr.c:48
void gk_free(void **ptr1,...)
Definition: memory.c:202
ssize_t * rowptr
Definition: gk_struct.h:74
#define GK_CSR_COL
Definition: gk_defs.h:43
int32_t * colids
Definition: gk_struct.h:76
int32_t nrows
Definition: gk_struct.h:73
int32_t ncols
Definition: gk_struct.h:73
int tnitems
Definition: itemsets.c:23
std::ptrdiff_t j
void(* callback)(void *stateptr, int nitems, int *itemids, int ntrans, int *transids)
Definition: itemsets.c:26
#define LTERM
Definition: gk_defs.h:14
void itemsets_find_frequent_itemsets(isparams_t *params, gk_csr_t *mat, int preflen, int *prefix)
Definition: itemsets.c:95


gtsam
Author(s):
autogenerated on Tue Jul 4 2023 02:34:26