csr.c
Go to the documentation of this file.
1 
10 #include <GKlib.h>
11 
12 #define OMPMINOPS 50000
13 
14 /*************************************************************************/
18 /**************************************************************************/
20 {
21  gk_csr_t *mat;
22 
23  mat = (gk_csr_t *)gk_malloc(sizeof(gk_csr_t), "gk_csr_Create: mat");
24 
25  gk_csr_Init(mat);
26 
27  return mat;
28 }
29 
30 
31 /*************************************************************************/
35 /*************************************************************************/
37 {
38  memset(mat, 0, sizeof(gk_csr_t));
39  mat->nrows = mat->ncols = -1;
40 }
41 
42 
43 /*************************************************************************/
47 /*************************************************************************/
49 {
50  if (*mat == NULL)
51  return;
52  gk_csr_FreeContents(*mat);
53  gk_free((void **)mat, LTERM);
54 }
55 
56 
57 /*************************************************************************/
62 /*************************************************************************/
64 {
65  gk_free((void *)&mat->rowptr, &mat->rowind, &mat->rowval, &mat->rowids,
66  &mat->colptr, &mat->colind, &mat->colval, &mat->colids,
67  &mat->rnorms, &mat->cnorms, &mat->rsums, &mat->csums,
68  &mat->rsizes, &mat->csizes, &mat->rvols, &mat->cvols,
69  &mat->rwgts, &mat->cwgts,
70  LTERM);
71 }
72 
73 
74 /*************************************************************************/
79 /**************************************************************************/
81 {
82  gk_csr_t *nmat;
83 
84  nmat = gk_csr_Create();
85 
86  nmat->nrows = mat->nrows;
87  nmat->ncols = mat->ncols;
88 
89  /* copy the row structure */
90  if (mat->rowptr)
91  nmat->rowptr = gk_zcopy(mat->nrows+1, mat->rowptr,
92  gk_zmalloc(mat->nrows+1, "gk_csr_Dup: rowptr"));
93  if (mat->rowids)
94  nmat->rowids = gk_icopy(mat->nrows, mat->rowids,
95  gk_imalloc(mat->nrows, "gk_csr_Dup: rowids"));
96  if (mat->rnorms)
97  nmat->rnorms = gk_fcopy(mat->nrows, mat->rnorms,
98  gk_fmalloc(mat->nrows, "gk_csr_Dup: rnorms"));
99  if (mat->rowind)
100  nmat->rowind = gk_icopy(mat->rowptr[mat->nrows], mat->rowind,
101  gk_imalloc(mat->rowptr[mat->nrows], "gk_csr_Dup: rowind"));
102  if (mat->rowval)
103  nmat->rowval = gk_fcopy(mat->rowptr[mat->nrows], mat->rowval,
104  gk_fmalloc(mat->rowptr[mat->nrows], "gk_csr_Dup: rowval"));
105 
106  /* copy the col structure */
107  if (mat->colptr)
108  nmat->colptr = gk_zcopy(mat->ncols+1, mat->colptr,
109  gk_zmalloc(mat->ncols+1, "gk_csr_Dup: colptr"));
110  if (mat->colids)
111  nmat->colids = gk_icopy(mat->ncols, mat->colids,
112  gk_imalloc(mat->ncols, "gk_csr_Dup: colids"));
113  if (mat->cnorms)
114  nmat->cnorms = gk_fcopy(mat->ncols, mat->cnorms,
115  gk_fmalloc(mat->ncols, "gk_csr_Dup: cnorms"));
116  if (mat->colind)
117  nmat->colind = gk_icopy(mat->colptr[mat->ncols], mat->colind,
118  gk_imalloc(mat->colptr[mat->ncols], "gk_csr_Dup: colind"));
119  if (mat->colval)
120  nmat->colval = gk_fcopy(mat->colptr[mat->ncols], mat->colval,
121  gk_fmalloc(mat->colptr[mat->ncols], "gk_csr_Dup: colval"));
122 
123  return nmat;
124 }
125 
126 
127 /*************************************************************************/
134 /**************************************************************************/
135 gk_csr_t *gk_csr_ExtractSubmatrix(gk_csr_t *mat, int rstart, int nrows)
136 {
137  ssize_t i;
138  gk_csr_t *nmat;
139 
140  if (rstart+nrows > mat->nrows)
141  return NULL;
142 
143  nmat = gk_csr_Create();
144 
145  nmat->nrows = nrows;
146  nmat->ncols = mat->ncols;
147 
148  /* copy the row structure */
149  if (mat->rowptr)
150  nmat->rowptr = gk_zcopy(nrows+1, mat->rowptr+rstart,
151  gk_zmalloc(nrows+1, "gk_csr_ExtractSubmatrix: rowptr"));
152  for (i=nrows; i>=0; i--)
153  nmat->rowptr[i] -= nmat->rowptr[0];
154  ASSERT(nmat->rowptr[0] == 0);
155 
156  if (mat->rowids)
157  nmat->rowids = gk_icopy(nrows, mat->rowids+rstart,
158  gk_imalloc(nrows, "gk_csr_ExtractSubmatrix: rowids"));
159  if (mat->rnorms)
160  nmat->rnorms = gk_fcopy(nrows, mat->rnorms+rstart,
161  gk_fmalloc(nrows, "gk_csr_ExtractSubmatrix: rnorms"));
162 
163  if (mat->rsums)
164  nmat->rsums = gk_fcopy(nrows, mat->rsums+rstart,
165  gk_fmalloc(nrows, "gk_csr_ExtractSubmatrix: rsums"));
166 
167  ASSERT(nmat->rowptr[nrows] == mat->rowptr[rstart+nrows]-mat->rowptr[rstart]);
168  if (mat->rowind)
169  nmat->rowind = gk_icopy(mat->rowptr[rstart+nrows]-mat->rowptr[rstart],
170  mat->rowind+mat->rowptr[rstart],
171  gk_imalloc(mat->rowptr[rstart+nrows]-mat->rowptr[rstart],
172  "gk_csr_ExtractSubmatrix: rowind"));
173  if (mat->rowval)
174  nmat->rowval = gk_fcopy(mat->rowptr[rstart+nrows]-mat->rowptr[rstart],
175  mat->rowval+mat->rowptr[rstart],
176  gk_fmalloc(mat->rowptr[rstart+nrows]-mat->rowptr[rstart],
177  "gk_csr_ExtractSubmatrix: rowval"));
178 
179  return nmat;
180 }
181 
182 
183 /*************************************************************************/
190 /**************************************************************************/
191 gk_csr_t *gk_csr_ExtractRows(gk_csr_t *mat, int nrows, int *rind)
192 {
193  ssize_t i, ii, j, nnz;
194  gk_csr_t *nmat;
195 
196  nmat = gk_csr_Create();
197 
198  nmat->nrows = nrows;
199  nmat->ncols = mat->ncols;
200 
201  for (nnz=0, i=0; i<nrows; i++)
202  nnz += mat->rowptr[rind[i]+1]-mat->rowptr[rind[i]];
203 
204  nmat->rowptr = gk_zmalloc(nmat->nrows+1, "gk_csr_ExtractPartition: rowptr");
205  nmat->rowind = gk_imalloc(nnz, "gk_csr_ExtractPartition: rowind");
206  nmat->rowval = gk_fmalloc(nnz, "gk_csr_ExtractPartition: rowval");
207 
208  nmat->rowptr[0] = 0;
209  for (nnz=0, j=0, ii=0; ii<nrows; ii++) {
210  i = rind[ii];
211  gk_icopy(mat->rowptr[i+1]-mat->rowptr[i], mat->rowind+mat->rowptr[i], nmat->rowind+nnz);
212  gk_fcopy(mat->rowptr[i+1]-mat->rowptr[i], mat->rowval+mat->rowptr[i], nmat->rowval+nnz);
213  nnz += mat->rowptr[i+1]-mat->rowptr[i];
214  nmat->rowptr[++j] = nnz;
215  }
216  ASSERT(j == nmat->nrows);
217 
218  return nmat;
219 }
220 
221 
222 /*************************************************************************/
229 /**************************************************************************/
231 {
232  ssize_t i, j, nnz;
233  gk_csr_t *nmat;
234 
235  nmat = gk_csr_Create();
236 
237  nmat->nrows = 0;
238  nmat->ncols = mat->ncols;
239 
240  for (nnz=0, i=0; i<mat->nrows; i++) {
241  if (part[i] == pid) {
242  nmat->nrows++;
243  nnz += mat->rowptr[i+1]-mat->rowptr[i];
244  }
245  }
246 
247  nmat->rowptr = gk_zmalloc(nmat->nrows+1, "gk_csr_ExtractPartition: rowptr");
248  nmat->rowind = gk_imalloc(nnz, "gk_csr_ExtractPartition: rowind");
249  nmat->rowval = gk_fmalloc(nnz, "gk_csr_ExtractPartition: rowval");
250 
251  nmat->rowptr[0] = 0;
252  for (nnz=0, j=0, i=0; i<mat->nrows; i++) {
253  if (part[i] == pid) {
254  gk_icopy(mat->rowptr[i+1]-mat->rowptr[i], mat->rowind+mat->rowptr[i], nmat->rowind+nnz);
255  gk_fcopy(mat->rowptr[i+1]-mat->rowptr[i], mat->rowval+mat->rowptr[i], nmat->rowval+nnz);
256  nnz += mat->rowptr[i+1]-mat->rowptr[i];
257  nmat->rowptr[++j] = nnz;
258  }
259  }
260  ASSERT(j == nmat->nrows);
261 
262  return nmat;
263 }
264 
265 
266 /*************************************************************************/
276 /**************************************************************************/
278 {
279  ssize_t i, j;
280  int nrows, ncolors;
281  ssize_t *rowptr;
282  int *rowind;
283  float *rowval;
284  gk_csr_t **smats;
285 
286  nrows = mat->nrows;
287  rowptr = mat->rowptr;
288  rowind = mat->rowind;
289  rowval = mat->rowval;
290 
291  ncolors = gk_imax(rowptr[nrows], color)+1;
292 
293  smats = (gk_csr_t **)gk_malloc(sizeof(gk_csr_t *)*ncolors, "gk_csr_Split: smats");
294  for (i=0; i<ncolors; i++) {
295  smats[i] = gk_csr_Create();
296  smats[i]->nrows = mat->nrows;
297  smats[i]->ncols = mat->ncols;
298  smats[i]->rowptr = gk_zsmalloc(nrows+1, 0, "gk_csr_Split: smats[i]->rowptr");
299  }
300 
301  for (i=0; i<nrows; i++) {
302  for (j=rowptr[i]; j<rowptr[i+1]; j++)
303  smats[color[j]]->rowptr[i]++;
304  }
305  for (i=0; i<ncolors; i++)
306  MAKECSR(j, nrows, smats[i]->rowptr);
307 
308  for (i=0; i<ncolors; i++) {
309  smats[i]->rowind = gk_imalloc(smats[i]->rowptr[nrows], "gk_csr_Split: smats[i]->rowind");
310  smats[i]->rowval = gk_fmalloc(smats[i]->rowptr[nrows], "gk_csr_Split: smats[i]->rowval");
311  }
312 
313  for (i=0; i<nrows; i++) {
314  for (j=rowptr[i]; j<rowptr[i+1]; j++) {
315  smats[color[j]]->rowind[smats[color[j]]->rowptr[i]] = rowind[j];
316  smats[color[j]]->rowval[smats[color[j]]->rowptr[i]] = rowval[j];
317  smats[color[j]]->rowptr[i]++;
318  }
319  }
320 
321  for (i=0; i<ncolors; i++)
322  SHIFTCSR(j, nrows, smats[i]->rowptr);
323 
324  return smats;
325 }
326 
327 
328 /**************************************************************************/
348 /**************************************************************************/
349 gk_csr_t *gk_csr_Read(char *filename, int format, int readvals, int numbering)
350 {
351  ssize_t i, k, l;
352  size_t nfields, nrows, ncols, nnz, fmt, ncon;
353  size_t lnlen;
354  ssize_t *rowptr;
355  int *rowind, ival;
356  float *rowval=NULL, fval;
357  int readsizes, readwgts;
358  char *line=NULL, *head, *tail, fmtstr[256];
359  FILE *fpin;
360  gk_csr_t *mat=NULL;
361 
362 
363  if (!gk_fexists(filename))
364  gk_errexit(SIGERR, "File %s does not exist!\n", filename);
365 
366  if (format == GK_CSR_FMT_BINROW) {
367  mat = gk_csr_Create();
368 
369  fpin = gk_fopen(filename, "rb", "gk_csr_Read: fpin");
370  if (fread(&(mat->nrows), sizeof(int32_t), 1, fpin) != 1)
371  gk_errexit(SIGERR, "Failed to read the nrows from file %s!\n", filename);
372  if (fread(&(mat->ncols), sizeof(int32_t), 1, fpin) != 1)
373  gk_errexit(SIGERR, "Failed to read the ncols from file %s!\n", filename);
374  mat->rowptr = gk_zmalloc(mat->nrows+1, "gk_csr_Read: rowptr");
375  if (fread(mat->rowptr, sizeof(ssize_t), mat->nrows+1, fpin) != mat->nrows+1)
376  gk_errexit(SIGERR, "Failed to read the rowptr from file %s!\n", filename);
377  mat->rowind = gk_imalloc(mat->rowptr[mat->nrows], "gk_csr_Read: rowind");
378  if (fread(mat->rowind, sizeof(int32_t), mat->rowptr[mat->nrows], fpin) != mat->rowptr[mat->nrows])
379  gk_errexit(SIGERR, "Failed to read the rowind from file %s!\n", filename);
380  if (readvals == 1) {
381  mat->rowval = gk_fmalloc(mat->rowptr[mat->nrows], "gk_csr_Read: rowval");
382  if (fread(mat->rowval, sizeof(float), mat->rowptr[mat->nrows], fpin) != mat->rowptr[mat->nrows])
383  gk_errexit(SIGERR, "Failed to read the rowval from file %s!\n", filename);
384  }
385 
386  gk_fclose(fpin);
387  return mat;
388  }
389 
390  if (format == GK_CSR_FMT_BINCOL) {
391  mat = gk_csr_Create();
392 
393  fpin = gk_fopen(filename, "rb", "gk_csr_Read: fpin");
394  if (fread(&(mat->nrows), sizeof(int32_t), 1, fpin) != 1)
395  gk_errexit(SIGERR, "Failed to read the nrows from file %s!\n", filename);
396  if (fread(&(mat->ncols), sizeof(int32_t), 1, fpin) != 1)
397  gk_errexit(SIGERR, "Failed to read the ncols from file %s!\n", filename);
398  mat->colptr = gk_zmalloc(mat->ncols+1, "gk_csr_Read: colptr");
399  if (fread(mat->colptr, sizeof(ssize_t), mat->ncols+1, fpin) != mat->ncols+1)
400  gk_errexit(SIGERR, "Failed to read the colptr from file %s!\n", filename);
401  mat->colind = gk_imalloc(mat->colptr[mat->ncols], "gk_csr_Read: colind");
402  if (fread(mat->colind, sizeof(int32_t), mat->colptr[mat->ncols], fpin) != mat->colptr[mat->ncols])
403  gk_errexit(SIGERR, "Failed to read the colind from file %s!\n", filename);
404  if (readvals) {
405  mat->colval = gk_fmalloc(mat->colptr[mat->ncols], "gk_csr_Read: colval");
406  if (fread(mat->colval, sizeof(float), mat->colptr[mat->ncols], fpin) != mat->colptr[mat->ncols])
407  gk_errexit(SIGERR, "Failed to read the colval from file %s!\n", filename);
408  }
409 
410  gk_fclose(fpin);
411  return mat;
412  }
413 
414 
415  if (format == GK_CSR_FMT_CLUTO) {
416  fpin = gk_fopen(filename, "r", "gk_csr_Read: fpin");
417  do {
418  if (gk_getline(&line, &lnlen, fpin) <= 0)
419  gk_errexit(SIGERR, "Premature end of input file: file:%s\n", filename);
420  } while (line[0] == '%');
421 
422  if (sscanf(line, "%zu %zu %zu", &nrows, &ncols, &nnz) != 3)
423  gk_errexit(SIGERR, "Header line must contain 3 integers.\n");
424 
425  readsizes = 0;
426  readwgts = 0;
427  readvals = 1;
428  numbering = 1;
429  }
430  else if (format == GK_CSR_FMT_METIS) {
431  fpin = gk_fopen(filename, "r", "gk_csr_Read: fpin");
432  do {
433  if (gk_getline(&line, &lnlen, fpin) <= 0)
434  gk_errexit(SIGERR, "Premature end of input file: file:%s\n", filename);
435  } while (line[0] == '%');
436 
437  fmt = ncon = 0;
438  nfields = sscanf(line, "%zu %zu %zu %zu", &nrows, &nnz, &fmt, &ncon);
439  if (nfields < 2)
440  gk_errexit(SIGERR, "Header line must contain at least 2 integers (#vtxs and #edges).\n");
441 
442  ncols = nrows;
443  nnz *= 2;
444 
445  if (fmt > 111)
446  gk_errexit(SIGERR, "Cannot read this type of file format [fmt=%zu]!\n", fmt);
447 
448  sprintf(fmtstr, "%03zu", fmt%1000);
449  readsizes = (fmtstr[0] == '1');
450  readwgts = (fmtstr[1] == '1');
451  readvals = (fmtstr[2] == '1');
452  numbering = 1;
453  ncon = (ncon == 0 ? 1 : ncon);
454  }
455  else {
456  readsizes = 0;
457  readwgts = 0;
458 
459  gk_getfilestats(filename, &nrows, &nnz, NULL, NULL);
460 
461  if (readvals == 1 && nnz%2 == 1)
462  gk_errexit(SIGERR, "Error: The number of numbers (%zd %d) in the input file is not even.\n", nnz, readvals);
463  if (readvals == 1)
464  nnz = nnz/2;
465  fpin = gk_fopen(filename, "r", "gk_csr_Read: fpin");
466  }
467 
468  mat = gk_csr_Create();
469 
470  mat->nrows = nrows;
471 
472  rowptr = mat->rowptr = gk_zmalloc(nrows+1, "gk_csr_Read: rowptr");
473  rowind = mat->rowind = gk_imalloc(nnz, "gk_csr_Read: rowind");
474  if (readvals != 2)
475  rowval = mat->rowval = gk_fsmalloc(nnz, 1.0, "gk_csr_Read: rowval");
476 
477  if (readsizes)
478  mat->rsizes = gk_fsmalloc(nrows, 0.0, "gk_csr_Read: rsizes");
479 
480  if (readwgts)
481  mat->rwgts = gk_fsmalloc(nrows*ncon, 0.0, "gk_csr_Read: rwgts");
482 
483  /*----------------------------------------------------------------------
484  * Read the sparse matrix file
485  *---------------------------------------------------------------------*/
486  numbering = (numbering ? - 1 : 0);
487  for (ncols=0, rowptr[0]=0, k=0, i=0; i<nrows; i++) {
488  do {
489  if (gk_getline(&line, &lnlen, fpin) == -1)
490  gk_errexit(SIGERR, "Premature end of input file: file while reading row %d\n", i);
491  } while (line[0] == '%');
492 
493  head = line;
494  tail = NULL;
495 
496  /* Read vertex sizes */
497  if (readsizes) {
498 #ifdef __MSC__
499  mat->rsizes[i] = (float)strtod(head, &tail);
500 #else
501  mat->rsizes[i] = strtof(head, &tail);
502 #endif
503  if (tail == head)
504  gk_errexit(SIGERR, "The line for vertex %zd does not have size information\n", i+1);
505  if (mat->rsizes[i] < 0)
506  errexit("The size for vertex %zd must be >= 0\n", i+1);
507  head = tail;
508  }
509 
510  /* Read vertex weights */
511  if (readwgts) {
512  for (l=0; l<ncon; l++) {
513 #ifdef __MSC__
514  mat->rwgts[i*ncon+l] = (float)strtod(head, &tail);
515 #else
516  mat->rwgts[i*ncon+l] = strtof(head, &tail);
517 #endif
518  if (tail == head)
519  errexit("The line for vertex %zd does not have enough weights "
520  "for the %d constraints.\n", i+1, ncon);
521  if (mat->rwgts[i*ncon+l] < 0)
522  errexit("The weight vertex %zd and constraint %zd must be >= 0\n", i+1, l);
523  head = tail;
524  }
525  }
526 
527 
528  /* Read the rest of the row */
529  while (1) {
530  ival = (int)strtol(head, &tail, 0);
531  if (tail == head)
532  break;
533  head = tail;
534 
535  if ((rowind[k] = ival + numbering) < 0)
536  gk_errexit(SIGERR, "Error: Invalid column number %d at row %zd.\n", ival, i);
537 
538  ncols = gk_max(rowind[k], ncols);
539 
540  if (readvals == 1) {
541 #ifdef __MSC__
542  fval = (float)strtod(head, &tail);
543 #else
544  fval = strtof(head, &tail);
545 #endif
546  if (tail == head)
547  gk_errexit(SIGERR, "Value could not be found for column! Row:%zd, NNZ:%zd\n", i, k);
548  head = tail;
549 
550  rowval[k] = fval;
551  }
552  k++;
553  }
554  rowptr[i+1] = k;
555  }
556 
557  if (format == GK_CSR_FMT_METIS) {
558  ASSERT(ncols+1 == mat->nrows);
559  mat->ncols = mat->nrows;
560  }
561  else {
562  mat->ncols = ncols+1;
563  }
564 
565  if (k != nnz)
566  gk_errexit(SIGERR, "gk_csr_Read: Something wrong with the number of nonzeros in "
567  "the input file. NNZ=%zd, ActualNNZ=%zd.\n", nnz, k);
568 
569  gk_fclose(fpin);
570 
571  gk_free((void **)&line, LTERM);
572 
573  return mat;
574 }
575 
576 
577 /**************************************************************************/
590 /**************************************************************************/
591 void gk_csr_Write(gk_csr_t *mat, char *filename, int format, int writevals, int numbering)
592 {
593  ssize_t i, j;
594  FILE *fpout;
595 
596  if (format == GK_CSR_FMT_BINROW) {
597  if (filename == NULL)
598  gk_errexit(SIGERR, "The filename parameter cannot be NULL.\n");
599  fpout = gk_fopen(filename, "wb", "gk_csr_Write: fpout");
600 
601  fwrite(&(mat->nrows), sizeof(int32_t), 1, fpout);
602  fwrite(&(mat->ncols), sizeof(int32_t), 1, fpout);
603  fwrite(mat->rowptr, sizeof(ssize_t), mat->nrows+1, fpout);
604  fwrite(mat->rowind, sizeof(int32_t), mat->rowptr[mat->nrows], fpout);
605  if (writevals)
606  fwrite(mat->rowval, sizeof(float), mat->rowptr[mat->nrows], fpout);
607 
608  gk_fclose(fpout);
609  return;
610  }
611 
612  if (format == GK_CSR_FMT_BINCOL) {
613  if (filename == NULL)
614  gk_errexit(SIGERR, "The filename parameter cannot be NULL.\n");
615  fpout = gk_fopen(filename, "wb", "gk_csr_Write: fpout");
616 
617  fwrite(&(mat->nrows), sizeof(int32_t), 1, fpout);
618  fwrite(&(mat->ncols), sizeof(int32_t), 1, fpout);
619  fwrite(mat->colptr, sizeof(ssize_t), mat->ncols+1, fpout);
620  fwrite(mat->colind, sizeof(int32_t), mat->colptr[mat->ncols], fpout);
621  if (writevals)
622  fwrite(mat->colval, sizeof(float), mat->colptr[mat->ncols], fpout);
623 
624  gk_fclose(fpout);
625  return;
626  }
627 
628  if (filename)
629  fpout = gk_fopen(filename, "w", "gk_csr_Write: fpout");
630  else
631  fpout = stdout;
632 
633  if (format == GK_CSR_FMT_CLUTO) {
634  fprintf(fpout, "%d %d %zd\n", mat->nrows, mat->ncols, mat->rowptr[mat->nrows]);
635  writevals = 1;
636  numbering = 1;
637  }
638 
639  for (i=0; i<mat->nrows; i++) {
640  for (j=mat->rowptr[i]; j<mat->rowptr[i+1]; j++) {
641  fprintf(fpout, " %d", mat->rowind[j]+(numbering ? 1 : 0));
642  if (writevals)
643  fprintf(fpout, " %f", mat->rowval[j]);
644  }
645  fprintf(fpout, "\n");
646  }
647  if (filename)
648  gk_fclose(fpout);
649 }
650 
651 
652 /*************************************************************************/
668 /**************************************************************************/
669 gk_csr_t *gk_csr_Prune(gk_csr_t *mat, int what, int minf, int maxf)
670 {
671  ssize_t i, j, nnz;
672  int nrows, ncols;
673  ssize_t *rowptr, *nrowptr;
674  int *rowind, *nrowind, *collen;
675  float *rowval, *nrowval;
676  gk_csr_t *nmat;
677 
678  nmat = gk_csr_Create();
679 
680  nrows = nmat->nrows = mat->nrows;
681  ncols = nmat->ncols = mat->ncols;
682 
683  rowptr = mat->rowptr;
684  rowind = mat->rowind;
685  rowval = mat->rowval;
686 
687  nrowptr = nmat->rowptr = gk_zmalloc(nrows+1, "gk_csr_Prune: nrowptr");
688  nrowind = nmat->rowind = gk_imalloc(rowptr[nrows], "gk_csr_Prune: nrowind");
689  nrowval = nmat->rowval = gk_fmalloc(rowptr[nrows], "gk_csr_Prune: nrowval");
690 
691 
692  switch (what) {
693  case GK_CSR_COL:
694  collen = gk_ismalloc(ncols, 0, "gk_csr_Prune: collen");
695 
696  for (i=0; i<nrows; i++) {
697  for (j=rowptr[i]; j<rowptr[i+1]; j++) {
698  ASSERT(rowind[j] < ncols);
699  collen[rowind[j]]++;
700  }
701  }
702  for (i=0; i<ncols; i++)
703  collen[i] = (collen[i] >= minf && collen[i] <= maxf ? 1 : 0);
704 
705  nrowptr[0] = 0;
706  for (nnz=0, i=0; i<nrows; i++) {
707  for (j=rowptr[i]; j<rowptr[i+1]; j++) {
708  if (collen[rowind[j]]) {
709  nrowind[nnz] = rowind[j];
710  nrowval[nnz] = rowval[j];
711  nnz++;
712  }
713  }
714  nrowptr[i+1] = nnz;
715  }
716  gk_free((void **)&collen, LTERM);
717  break;
718 
719  case GK_CSR_ROW:
720  nrowptr[0] = 0;
721  for (nnz=0, i=0; i<nrows; i++) {
722  if (rowptr[i+1]-rowptr[i] >= minf && rowptr[i+1]-rowptr[i] <= maxf) {
723  for (j=rowptr[i]; j<rowptr[i+1]; j++, nnz++) {
724  nrowind[nnz] = rowind[j];
725  nrowval[nnz] = rowval[j];
726  }
727  }
728  nrowptr[i+1] = nnz;
729  }
730  break;
731 
732  default:
733  gk_csr_Free(&nmat);
734  gk_errexit(SIGERR, "Unknown prunning type of %d\n", what);
735  return NULL;
736  }
737 
738  return nmat;
739 }
740 
741 
742 /*************************************************************************/
758 /**************************************************************************/
759 gk_csr_t *gk_csr_LowFilter(gk_csr_t *mat, int what, int norm, float fraction)
760 {
761  ssize_t i, j, nnz;
762  int nrows, ncols, ncand, maxlen=0;
763  ssize_t *rowptr, *colptr, *nrowptr;
764  int *rowind, *colind, *nrowind;
765  float *rowval, *colval, *nrowval, rsum, tsum;
766  gk_csr_t *nmat;
767  gk_fkv_t *cand;
768 
769  nmat = gk_csr_Create();
770 
771  nrows = nmat->nrows = mat->nrows;
772  ncols = nmat->ncols = mat->ncols;
773 
774  rowptr = mat->rowptr;
775  rowind = mat->rowind;
776  rowval = mat->rowval;
777  colptr = mat->colptr;
778  colind = mat->colind;
779  colval = mat->colval;
780 
781  nrowptr = nmat->rowptr = gk_zmalloc(nrows+1, "gk_csr_LowFilter: nrowptr");
782  nrowind = nmat->rowind = gk_imalloc(rowptr[nrows], "gk_csr_LowFilter: nrowind");
783  nrowval = nmat->rowval = gk_fmalloc(rowptr[nrows], "gk_csr_LowFilter: nrowval");
784 
785 
786  switch (what) {
787  case GK_CSR_COL:
788  if (mat->colptr == NULL)
789  gk_errexit(SIGERR, "Cannot filter columns when column-based structure has not been created.\n");
790 
791  gk_zcopy(nrows+1, rowptr, nrowptr);
792 
793  for (i=0; i<ncols; i++)
794  maxlen = gk_max(maxlen, colptr[i+1]-colptr[i]);
795 
796  #pragma omp parallel private(i, j, ncand, rsum, tsum, cand)
797  {
798  cand = gk_fkvmalloc(maxlen, "gk_csr_LowFilter: cand");
799 
800  #pragma omp for schedule(static)
801  for (i=0; i<ncols; i++) {
802  for (tsum=0.0, ncand=0, j=colptr[i]; j<colptr[i+1]; j++, ncand++) {
803  cand[ncand].val = colind[j];
804  cand[ncand].key = colval[j];
805  tsum += (norm == 1 ? colval[j] : colval[j]*colval[j]);
806  }
807  gk_fkvsortd(ncand, cand);
808 
809  for (rsum=0.0, j=0; j<ncand && rsum<=fraction*tsum; j++) {
810  rsum += (norm == 1 ? cand[j].key : cand[j].key*cand[j].key);
811  nrowind[nrowptr[cand[j].val]] = i;
812  nrowval[nrowptr[cand[j].val]] = cand[j].key;
813  nrowptr[cand[j].val]++;
814  }
815  }
816 
817  gk_free((void **)&cand, LTERM);
818  }
819 
820  /* compact the nrowind/nrowval */
821  for (nnz=0, i=0; i<nrows; i++) {
822  for (j=rowptr[i]; j<nrowptr[i]; j++, nnz++) {
823  nrowind[nnz] = nrowind[j];
824  nrowval[nnz] = nrowval[j];
825  }
826  nrowptr[i] = nnz;
827  }
828  SHIFTCSR(i, nrows, nrowptr);
829 
830  break;
831 
832  case GK_CSR_ROW:
833  if (mat->rowptr == NULL)
834  gk_errexit(SIGERR, "Cannot filter rows when row-based structure has not been created.\n");
835 
836  for (i=0; i<nrows; i++)
837  maxlen = gk_max(maxlen, rowptr[i+1]-rowptr[i]);
838 
839  #pragma omp parallel private(i, j, ncand, rsum, tsum, cand)
840  {
841  cand = gk_fkvmalloc(maxlen, "gk_csr_LowFilter: cand");
842 
843  #pragma omp for schedule(static)
844  for (i=0; i<nrows; i++) {
845  for (tsum=0.0, ncand=0, j=rowptr[i]; j<rowptr[i+1]; j++, ncand++) {
846  cand[ncand].val = rowind[j];
847  cand[ncand].key = rowval[j];
848  tsum += (norm == 1 ? rowval[j] : rowval[j]*rowval[j]);
849  }
850  gk_fkvsortd(ncand, cand);
851 
852  for (rsum=0.0, j=0; j<ncand && rsum<=fraction*tsum; j++) {
853  rsum += (norm == 1 ? cand[j].key : cand[j].key*cand[j].key);
854  nrowind[rowptr[i]+j] = cand[j].val;
855  nrowval[rowptr[i]+j] = cand[j].key;
856  }
857  nrowptr[i+1] = rowptr[i]+j;
858  }
859 
860  gk_free((void **)&cand, LTERM);
861  }
862 
863  /* compact nrowind/nrowval */
864  nrowptr[0] = nnz = 0;
865  for (i=0; i<nrows; i++) {
866  for (j=rowptr[i]; j<nrowptr[i+1]; j++, nnz++) {
867  nrowind[nnz] = nrowind[j];
868  nrowval[nnz] = nrowval[j];
869  }
870  nrowptr[i+1] = nnz;
871  }
872 
873  break;
874 
875  default:
876  gk_csr_Free(&nmat);
877  gk_errexit(SIGERR, "Unknown prunning type of %d\n", what);
878  return NULL;
879  }
880 
881  return nmat;
882 }
883 
884 
885 /*************************************************************************/
900 /**************************************************************************/
901 gk_csr_t *gk_csr_TopKPlusFilter(gk_csr_t *mat, int what, int topk, float keepval)
902 {
903  ssize_t i, j, k, nnz;
904  int nrows, ncols, ncand;
905  ssize_t *rowptr, *colptr, *nrowptr;
906  int *rowind, *colind, *nrowind;
907  float *rowval, *colval, *nrowval;
908  gk_csr_t *nmat;
909  gk_fkv_t *cand;
910 
911  nmat = gk_csr_Create();
912 
913  nrows = nmat->nrows = mat->nrows;
914  ncols = nmat->ncols = mat->ncols;
915 
916  rowptr = mat->rowptr;
917  rowind = mat->rowind;
918  rowval = mat->rowval;
919  colptr = mat->colptr;
920  colind = mat->colind;
921  colval = mat->colval;
922 
923  nrowptr = nmat->rowptr = gk_zmalloc(nrows+1, "gk_csr_LowFilter: nrowptr");
924  nrowind = nmat->rowind = gk_imalloc(rowptr[nrows], "gk_csr_LowFilter: nrowind");
925  nrowval = nmat->rowval = gk_fmalloc(rowptr[nrows], "gk_csr_LowFilter: nrowval");
926 
927 
928  switch (what) {
929  case GK_CSR_COL:
930  if (mat->colptr == NULL)
931  gk_errexit(SIGERR, "Cannot filter columns when column-based structure has not been created.\n");
932 
933  cand = gk_fkvmalloc(nrows, "gk_csr_LowFilter: cand");
934 
935  gk_zcopy(nrows+1, rowptr, nrowptr);
936  for (i=0; i<ncols; i++) {
937  for (ncand=0, j=colptr[i]; j<colptr[i+1]; j++, ncand++) {
938  cand[ncand].val = colind[j];
939  cand[ncand].key = colval[j];
940  }
941  gk_fkvsortd(ncand, cand);
942 
943  k = gk_min(topk, ncand);
944  for (j=0; j<k; j++) {
945  nrowind[nrowptr[cand[j].val]] = i;
946  nrowval[nrowptr[cand[j].val]] = cand[j].key;
947  nrowptr[cand[j].val]++;
948  }
949  for (; j<ncand; j++) {
950  if (cand[j].key < keepval)
951  break;
952 
953  nrowind[nrowptr[cand[j].val]] = i;
954  nrowval[nrowptr[cand[j].val]] = cand[j].key;
955  nrowptr[cand[j].val]++;
956  }
957  }
958 
959  /* compact the nrowind/nrowval */
960  for (nnz=0, i=0; i<nrows; i++) {
961  for (j=rowptr[i]; j<nrowptr[i]; j++, nnz++) {
962  nrowind[nnz] = nrowind[j];
963  nrowval[nnz] = nrowval[j];
964  }
965  nrowptr[i] = nnz;
966  }
967  SHIFTCSR(i, nrows, nrowptr);
968 
969  gk_free((void **)&cand, LTERM);
970  break;
971 
972  case GK_CSR_ROW:
973  if (mat->rowptr == NULL)
974  gk_errexit(SIGERR, "Cannot filter rows when row-based structure has not been created.\n");
975 
976  cand = gk_fkvmalloc(ncols, "gk_csr_LowFilter: cand");
977 
978  nrowptr[0] = 0;
979  for (nnz=0, i=0; i<nrows; i++) {
980  for (ncand=0, j=rowptr[i]; j<rowptr[i+1]; j++, ncand++) {
981  cand[ncand].val = rowind[j];
982  cand[ncand].key = rowval[j];
983  }
984  gk_fkvsortd(ncand, cand);
985 
986  k = gk_min(topk, ncand);
987  for (j=0; j<k; j++, nnz++) {
988  nrowind[nnz] = cand[j].val;
989  nrowval[nnz] = cand[j].key;
990  }
991  for (; j<ncand; j++, nnz++) {
992  if (cand[j].key < keepval)
993  break;
994 
995  nrowind[nnz] = cand[j].val;
996  nrowval[nnz] = cand[j].key;
997  }
998  nrowptr[i+1] = nnz;
999  }
1000 
1001  gk_free((void **)&cand, LTERM);
1002  break;
1003 
1004  default:
1005  gk_csr_Free(&nmat);
1006  gk_errexit(SIGERR, "Unknown prunning type of %d\n", what);
1007  return NULL;
1008  }
1009 
1010  return nmat;
1011 }
1012 
1013 
1014 /*************************************************************************/
1030 /**************************************************************************/
1031 gk_csr_t *gk_csr_ZScoreFilter(gk_csr_t *mat, int what, float zscore)
1032 {
1033  ssize_t i, j, nnz;
1034  int nrows;
1035  ssize_t *rowptr, *nrowptr;
1036  int *rowind, *nrowind;
1037  float *rowval, *nrowval, avgwgt;
1038  gk_csr_t *nmat;
1039 
1040  nmat = gk_csr_Create();
1041 
1042  nmat->nrows = mat->nrows;
1043  nmat->ncols = mat->ncols;
1044 
1045  nrows = mat->nrows;
1046  rowptr = mat->rowptr;
1047  rowind = mat->rowind;
1048  rowval = mat->rowval;
1049 
1050  nrowptr = nmat->rowptr = gk_zmalloc(nrows+1, "gk_csr_ZScoreFilter: nrowptr");
1051  nrowind = nmat->rowind = gk_imalloc(rowptr[nrows], "gk_csr_ZScoreFilter: nrowind");
1052  nrowval = nmat->rowval = gk_fmalloc(rowptr[nrows], "gk_csr_ZScoreFilter: nrowval");
1053 
1054 
1055  switch (what) {
1056  case GK_CSR_COL:
1057  gk_errexit(SIGERR, "This has not been implemented yet.\n");
1058  break;
1059 
1060  case GK_CSR_ROW:
1061  if (mat->rowptr == NULL)
1062  gk_errexit(SIGERR, "Cannot filter rows when row-based structure has not been created.\n");
1063 
1064  nrowptr[0] = 0;
1065  for (nnz=0, i=0; i<nrows; i++) {
1066  avgwgt = zscore/(rowptr[i+1]-rowptr[i]);
1067  for (j=rowptr[i]; j<rowptr[i+1]; j++) {
1068  if (rowval[j] > avgwgt) {
1069  nrowind[nnz] = rowind[j];
1070  nrowval[nnz] = rowval[j];
1071  nnz++;
1072  }
1073  }
1074  nrowptr[i+1] = nnz;
1075  }
1076  break;
1077 
1078  default:
1079  gk_csr_Free(&nmat);
1080  gk_errexit(SIGERR, "Unknown prunning type of %d\n", what);
1081  return NULL;
1082  }
1083 
1084  return nmat;
1085 }
1086 
1087 
1088 /*************************************************************************/
1097 /**************************************************************************/
1099 {
1100  ssize_t i;
1101  int nrows, ncols, nncols;
1102  ssize_t *rowptr;
1103  int *rowind, *colmap;
1104  gk_ikv_t *clens;
1105 
1106  nrows = mat->nrows;
1107  ncols = mat->ncols;
1108  rowptr = mat->rowptr;
1109  rowind = mat->rowind;
1110 
1111  colmap = gk_imalloc(ncols, "gk_csr_CompactColumns: colmap");
1112 
1113  clens = gk_ikvmalloc(ncols, "gk_csr_CompactColumns: clens");
1114  for (i=0; i<ncols; i++) {
1115  clens[i].key = 0;
1116  clens[i].val = i;
1117  }
1118 
1119  for (i=0; i<rowptr[nrows]; i++)
1120  clens[rowind[i]].key++;
1121  gk_ikvsortd(ncols, clens);
1122 
1123  for (nncols=0, i=0; i<ncols; i++) {
1124  if (clens[i].key > 0)
1125  colmap[clens[i].val] = nncols++;
1126  else
1127  break;
1128  }
1129 
1130  for (i=0; i<rowptr[nrows]; i++)
1131  rowind[i] = colmap[rowind[i]];
1132 
1133  mat->ncols = nncols;
1134 
1135  gk_free((void **)&colmap, &clens, LTERM);
1136 }
1137 
1138 
1139 /*************************************************************************/
1145 /**************************************************************************/
1147 {
1148  int n, nn=0;
1149  ssize_t *ptr;
1150  int *ind;
1151  float *val;
1152 
1153  switch (what) {
1154  case GK_CSR_ROW:
1155  if (!mat->rowptr)
1156  gk_errexit(SIGERR, "Row-based view of the matrix does not exists.\n");
1157 
1158  n = mat->nrows;
1159  ptr = mat->rowptr;
1160  ind = mat->rowind;
1161  val = mat->rowval;
1162  break;
1163 
1164  case GK_CSR_COL:
1165  if (!mat->colptr)
1166  gk_errexit(SIGERR, "Column-based view of the matrix does not exists.\n");
1167 
1168  n = mat->ncols;
1169  ptr = mat->colptr;
1170  ind = mat->colind;
1171  val = mat->colval;
1172  break;
1173 
1174  default:
1175  gk_errexit(SIGERR, "Invalid index type of %d.\n", what);
1176  return;
1177  }
1178 
1179  #pragma omp parallel if (n > 100)
1180  {
1181  ssize_t i, j, k;
1182  gk_ikv_t *cand;
1183  float *tval;
1184 
1185  #pragma omp single
1186  for (i=0; i<n; i++)
1187  nn = gk_max(nn, ptr[i+1]-ptr[i]);
1188 
1189  cand = gk_ikvmalloc(nn, "gk_csr_SortIndices: cand");
1190  tval = gk_fmalloc(nn, "gk_csr_SortIndices: tval");
1191 
1192  #pragma omp for schedule(static)
1193  for (i=0; i<n; i++) {
1194  for (k=0, j=ptr[i]; j<ptr[i+1]; j++) {
1195  if (j > ptr[i] && ind[j] < ind[j-1])
1196  k = 1; /* an inversion */
1197  cand[j-ptr[i]].val = j-ptr[i];
1198  cand[j-ptr[i]].key = ind[j];
1199  tval[j-ptr[i]] = val[j];
1200  }
1201  if (k) {
1202  gk_ikvsorti(ptr[i+1]-ptr[i], cand);
1203  for (j=ptr[i]; j<ptr[i+1]; j++) {
1204  ind[j] = cand[j-ptr[i]].key;
1205  val[j] = tval[cand[j-ptr[i]].val];
1206  }
1207  }
1208  }
1209 
1210  gk_free((void **)&cand, &tval, LTERM);
1211  }
1212 
1213 }
1214 
1215 
1216 /*************************************************************************/
1222 /**************************************************************************/
1224 {
1225  /* 'f' stands for forward, 'r' stands for reverse */
1226  ssize_t i, j, k, nf, nr;
1227  ssize_t *fptr, *rptr;
1228  int *find, *rind;
1229  float *fval, *rval;
1230 
1231  switch (what) {
1232  case GK_CSR_COL:
1233  nf = mat->nrows;
1234  fptr = mat->rowptr;
1235  find = mat->rowind;
1236  fval = mat->rowval;
1237 
1238  if (mat->colptr) gk_free((void **)&mat->colptr, LTERM);
1239  if (mat->colind) gk_free((void **)&mat->colind, LTERM);
1240  if (mat->colval) gk_free((void **)&mat->colval, LTERM);
1241 
1242  nr = mat->ncols;
1243  rptr = mat->colptr = gk_zsmalloc(nr+1, 0, "gk_csr_CreateIndex: rptr");
1244  rind = mat->colind = gk_imalloc(fptr[nf], "gk_csr_CreateIndex: rind");
1245  rval = mat->colval = (fval ? gk_fmalloc(fptr[nf], "gk_csr_CreateIndex: rval") : NULL);
1246  break;
1247  case GK_CSR_ROW:
1248  nf = mat->ncols;
1249  fptr = mat->colptr;
1250  find = mat->colind;
1251  fval = mat->colval;
1252 
1253  if (mat->rowptr) gk_free((void **)&mat->rowptr, LTERM);
1254  if (mat->rowind) gk_free((void **)&mat->rowind, LTERM);
1255  if (mat->rowval) gk_free((void **)&mat->rowval, LTERM);
1256 
1257  nr = mat->nrows;
1258  rptr = mat->rowptr = gk_zsmalloc(nr+1, 0, "gk_csr_CreateIndex: rptr");
1259  rind = mat->rowind = gk_imalloc(fptr[nf], "gk_csr_CreateIndex: rind");
1260  rval = mat->rowval = (fval ? gk_fmalloc(fptr[nf], "gk_csr_CreateIndex: rval") : NULL);
1261  break;
1262  default:
1263  gk_errexit(SIGERR, "Invalid index type of %d.\n", what);
1264  return;
1265  }
1266 
1267 
1268  for (i=0; i<nf; i++) {
1269  for (j=fptr[i]; j<fptr[i+1]; j++)
1270  rptr[find[j]]++;
1271  }
1272  MAKECSR(i, nr, rptr);
1273 
1274  if (rptr[nr] > 6*nr) {
1275  for (i=0; i<nf; i++) {
1276  for (j=fptr[i]; j<fptr[i+1]; j++)
1277  rind[rptr[find[j]]++] = i;
1278  }
1279  SHIFTCSR(i, nr, rptr);
1280 
1281  if (fval) {
1282  for (i=0; i<nf; i++) {
1283  for (j=fptr[i]; j<fptr[i+1]; j++)
1284  rval[rptr[find[j]]++] = fval[j];
1285  }
1286  SHIFTCSR(i, nr, rptr);
1287  }
1288  }
1289  else {
1290  if (fval) {
1291  for (i=0; i<nf; i++) {
1292  for (j=fptr[i]; j<fptr[i+1]; j++) {
1293  k = find[j];
1294  rind[rptr[k]] = i;
1295  rval[rptr[k]++] = fval[j];
1296  }
1297  }
1298  }
1299  else {
1300  for (i=0; i<nf; i++) {
1301  for (j=fptr[i]; j<fptr[i+1]; j++)
1302  rind[rptr[find[j]]++] = i;
1303  }
1304  }
1305  SHIFTCSR(i, nr, rptr);
1306  }
1307 }
1308 
1309 
1310 /*************************************************************************/
1318 /**************************************************************************/
1319 void gk_csr_Normalize(gk_csr_t *mat, int what, int norm)
1320 {
1321  ssize_t i, j;
1322  int n;
1323  ssize_t *ptr;
1324  float *val, sum;
1325 
1326  if (what & GK_CSR_ROW && mat->rowval) {
1327  n = mat->nrows;
1328  ptr = mat->rowptr;
1329  val = mat->rowval;
1330 
1331 #pragma omp parallel if (ptr[n] > OMPMINOPS)
1332  {
1333 #pragma omp for private(j, sum) schedule(static)
1334  for (i = 0; i < n; i++) {
1335  for (sum = 0.0, j = ptr[i]; j < ptr[i + 1]; j++) {
1336  if (norm == 2)
1337  sum += val[j] * val[j];
1338  else if (norm == 1)
1339  sum += val[j]; /* assume val[j] > 0 */
1340  }
1341  if (sum > 0) {
1342  if (norm == 2)
1343  sum = 1.0 / sqrt(sum);
1344  else if (norm == 1)
1345  sum = 1.0 / sum;
1346  for (j = ptr[i]; j < ptr[i + 1]; j++) val[j] *= sum;
1347  }
1348  }
1349  }
1350  }
1351 
1352  if (what & GK_CSR_COL && mat->colval) {
1353  n = mat->ncols;
1354  ptr = mat->colptr;
1355  val = mat->colval;
1356 
1357 #pragma omp parallel if (ptr[n] > OMPMINOPS)
1358  {
1359 #pragma omp for private(j, sum) schedule(static)
1360  for (i = 0; i < n; i++) {
1361  for (sum = 0.0, j = ptr[i]; j < ptr[i + 1]; j++)
1362  if (norm == 2)
1363  sum += val[j] * val[j];
1364  else if (norm == 1)
1365  sum += val[j];
1366  if (sum > 0) {
1367  if (norm == 2)
1368  sum = 1.0 / sqrt(sum);
1369  else if (norm == 1)
1370  sum = 1.0 / sum;
1371  for (j = ptr[i]; j < ptr[i + 1]; j++) val[j] *= sum;
1372  }
1373  }
1374  }
1375  }
1376 }
1377 
1378 
1379 /*************************************************************************/
1385 /**************************************************************************/
1387 {
1388  ssize_t i, j;
1389  int nrows, ncols, nnzcols, bgfreq;
1390  ssize_t *rowptr;
1391  int *rowind, *collen;
1392  float *rowval, *cscale, maxtf;
1393 
1394  nrows = mat->nrows;
1395  rowptr = mat->rowptr;
1396  rowind = mat->rowind;
1397  rowval = mat->rowval;
1398 
1399  switch (type) {
1400  case GK_CSR_MAXTF: /* TF' = .5 + .5*TF/MAX(TF) */
1401  #pragma omp parallel if (rowptr[nrows] > OMPMINOPS)
1402  {
1403  #pragma omp for private(j, maxtf) schedule(static)
1404  for (i=0; i<nrows; i++) {
1405  maxtf = fabs(rowval[rowptr[i]]);
1406  for (j=rowptr[i]; j<rowptr[i+1]; j++)
1407  maxtf = (maxtf < fabs(rowval[j]) ? fabs(rowval[j]) : maxtf);
1408 
1409  for (j=rowptr[i]; j<rowptr[i+1]; j++)
1410  rowval[j] = .5 + .5*rowval[j]/maxtf;
1411  }
1412  }
1413  break;
1414 
1415  case GK_CSR_MAXTF2: /* TF' = .1 + .9*TF/MAX(TF) */
1416  #pragma omp parallel if (rowptr[nrows] > OMPMINOPS)
1417  {
1418  #pragma omp for private(j, maxtf) schedule(static)
1419  for (i=0; i<nrows; i++) {
1420  maxtf = fabs(rowval[rowptr[i]]);
1421  for (j=rowptr[i]; j<rowptr[i+1]; j++)
1422  maxtf = (maxtf < fabs(rowval[j]) ? fabs(rowval[j]) : maxtf);
1423 
1424  for (j=rowptr[i]; j<rowptr[i+1]; j++)
1425  rowval[j] = .1 + .9*rowval[j]/maxtf;
1426  }
1427  }
1428  break;
1429 
1430  case GK_CSR_SQRT: /* TF' = .1+SQRT(TF) */
1431  #pragma omp parallel if (rowptr[nrows] > OMPMINOPS)
1432  {
1433  #pragma omp for private(j) schedule(static)
1434  for (i=0; i<nrows; i++) {
1435  for (j=rowptr[i]; j<rowptr[i+1]; j++) {
1436  if (rowval[j] != 0.0)
1437  rowval[j] = .1+sign(rowval[j], sqrt(fabs(rowval[j])));
1438  }
1439  }
1440  }
1441  break;
1442 
1443  case GK_CSR_POW25: /* TF' = .1+POW(TF,.25) */
1444  #pragma omp parallel if (rowptr[nrows] > OMPMINOPS)
1445  {
1446  #pragma omp for private(j) schedule(static)
1447  for (i=0; i<nrows; i++) {
1448  for (j=rowptr[i]; j<rowptr[i+1]; j++) {
1449  if (rowval[j] != 0.0)
1450  rowval[j] = .1+sign(rowval[j], sqrt(sqrt(fabs(rowval[j]))));
1451  }
1452  }
1453  }
1454  break;
1455 
1456  case GK_CSR_POW65: /* TF' = .1+POW(TF,.65) */
1457  #pragma omp parallel if (rowptr[nrows] > OMPMINOPS)
1458  {
1459  #pragma omp for private(j) schedule(static)
1460  for (i=0; i<nrows; i++) {
1461  for (j=rowptr[i]; j<rowptr[i+1]; j++) {
1462  if (rowval[j] != 0.0)
1463  rowval[j] = .1+sign(rowval[j], powf(fabs(rowval[j]), .65));
1464  }
1465  }
1466  }
1467  break;
1468 
1469  case GK_CSR_POW75: /* TF' = .1+POW(TF,.75) */
1470  #pragma omp parallel if (rowptr[nrows] > OMPMINOPS)
1471  {
1472  #pragma omp for private(j) schedule(static)
1473  for (i=0; i<nrows; i++) {
1474  for (j=rowptr[i]; j<rowptr[i+1]; j++) {
1475  if (rowval[j] != 0.0)
1476  rowval[j] = .1+sign(rowval[j], powf(fabs(rowval[j]), .75));
1477  }
1478  }
1479  }
1480  break;
1481 
1482  case GK_CSR_POW85: /* TF' = .1+POW(TF,.85) */
1483  #pragma omp parallel if (rowptr[nrows] > OMPMINOPS)
1484  {
1485  #pragma omp for private(j) schedule(static)
1486  for (i=0; i<nrows; i++) {
1487  for (j=rowptr[i]; j<rowptr[i+1]; j++) {
1488  if (rowval[j] != 0.0)
1489  rowval[j] = .1+sign(rowval[j], powf(fabs(rowval[j]), .85));
1490  }
1491  }
1492  }
1493  break;
1494 
1495  case GK_CSR_LOG: /* TF' = 1+log_2(TF) */
1496  #pragma omp parallel if (rowptr[nrows] > OMPMINOPS)
1497  {
1498  double logscale = 1.0/log(2.0);
1499  #pragma omp for schedule(static,32)
1500  for (i=0; i<rowptr[nrows]; i++) {
1501  if (rowval[i] != 0.0)
1502  rowval[i] = 1+(rowval[i]>0.0 ? log(rowval[i]) : -log(-rowval[i]))*logscale;
1503  }
1504 #ifdef XXX
1505  #pragma omp for private(j) schedule(static)
1506  for (i=0; i<nrows; i++) {
1507  for (j=rowptr[i]; j<rowptr[i+1]; j++) {
1508  if (rowval[j] != 0.0)
1509  rowval[j] = 1+(rowval[j]>0.0 ? log(rowval[j]) : -log(-rowval[j]))*logscale;
1510  //rowval[j] = 1+sign(rowval[j], log(fabs(rowval[j]))*logscale);
1511  }
1512  }
1513 #endif
1514  }
1515  break;
1516 
1517  case GK_CSR_IDF: /* TF' = TF*IDF */
1518  ncols = mat->ncols;
1519  cscale = gk_fmalloc(ncols, "gk_csr_Scale: cscale");
1520  collen = gk_ismalloc(ncols, 0, "gk_csr_Scale: collen");
1521 
1522  for (i=0; i<nrows; i++) {
1523  for (j=rowptr[i]; j<rowptr[i+1]; j++)
1524  collen[rowind[j]]++;
1525  }
1526 
1527  #pragma omp parallel if (ncols > OMPMINOPS)
1528  {
1529  #pragma omp for schedule(static)
1530  for (i=0; i<ncols; i++)
1531  cscale[i] = (collen[i] > 0 ? log(1.0*nrows/collen[i]) : 0.0);
1532  }
1533 
1534  #pragma omp parallel if (rowptr[nrows] > OMPMINOPS)
1535  {
1536  #pragma omp for private(j) schedule(static)
1537  for (i=0; i<nrows; i++) {
1538  for (j=rowptr[i]; j<rowptr[i+1]; j++)
1539  rowval[j] *= cscale[rowind[j]];
1540  }
1541  }
1542 
1543  gk_free((void **)&cscale, &collen, LTERM);
1544  break;
1545 
1546  case GK_CSR_IDF2: /* TF' = TF*IDF */
1547  ncols = mat->ncols;
1548  cscale = gk_fmalloc(ncols, "gk_csr_Scale: cscale");
1549  collen = gk_ismalloc(ncols, 0, "gk_csr_Scale: collen");
1550 
1551  for (i=0; i<nrows; i++) {
1552  for (j=rowptr[i]; j<rowptr[i+1]; j++)
1553  collen[rowind[j]]++;
1554  }
1555 
1556  nnzcols = 0;
1557  #pragma omp parallel if (ncols > OMPMINOPS)
1558  {
1559  #pragma omp for schedule(static) reduction(+:nnzcols)
1560  for (i=0; i<ncols; i++)
1561  nnzcols += (collen[i] > 0 ? 1 : 0);
1562 
1563  bgfreq = gk_max(10, (ssize_t)(.5*rowptr[nrows]/nnzcols));
1564  printf("nnz: %zd, nnzcols: %d, bgfreq: %d\n", rowptr[nrows], nnzcols, bgfreq);
1565 
1566  #pragma omp for schedule(static)
1567  for (i=0; i<ncols; i++)
1568  cscale[i] = (collen[i] > 0 ? log(1.0*(nrows+2*bgfreq)/(bgfreq+collen[i])) : 0.0);
1569  }
1570 
1571  #pragma omp parallel if (rowptr[nrows] > OMPMINOPS)
1572  {
1573  #pragma omp for private(j) schedule(static)
1574  for (i=0; i<nrows; i++) {
1575  for (j=rowptr[i]; j<rowptr[i+1]; j++)
1576  rowval[j] *= cscale[rowind[j]];
1577  }
1578  }
1579 
1580  gk_free((void **)&cscale, &collen, LTERM);
1581  break;
1582 
1583  default:
1584  gk_errexit(SIGERR, "Unknown scaling type of %d\n", type);
1585  }
1586 
1587 }
1588 
1589 
1590 /*************************************************************************/
1596 /**************************************************************************/
1598 {
1599  ssize_t i;
1600  int n;
1601  ssize_t *ptr;
1602  float *val, *sums;
1603 
1604  switch (what) {
1605  case GK_CSR_ROW:
1606  n = mat->nrows;
1607  ptr = mat->rowptr;
1608  val = mat->rowval;
1609 
1610  if (mat->rsums)
1611  gk_free((void **)&mat->rsums, LTERM);
1612 
1613  sums = mat->rsums = gk_fsmalloc(n, 0, "gk_csr_ComputeSums: sums");
1614  break;
1615  case GK_CSR_COL:
1616  n = mat->ncols;
1617  ptr = mat->colptr;
1618  val = mat->colval;
1619 
1620  if (mat->csums)
1621  gk_free((void **)&mat->csums, LTERM);
1622 
1623  sums = mat->csums = gk_fsmalloc(n, 0, "gk_csr_ComputeSums: sums");
1624  break;
1625  default:
1626  gk_errexit(SIGERR, "Invalid sum type of %d.\n", what);
1627  return;
1628  }
1629 
1630  #pragma omp parallel for if (ptr[n] > OMPMINOPS) schedule(static)
1631  for (i=0; i<n; i++)
1632  sums[i] = gk_fsum(ptr[i+1]-ptr[i], val+ptr[i], 1);
1633 }
1634 
1635 
1636 /*************************************************************************/
1642 /**************************************************************************/
1644 {
1645  ssize_t i;
1646  int n;
1647  ssize_t *ptr;
1648  float *val, *norms;
1649 
1650  switch (what) {
1651  case GK_CSR_ROW:
1652  n = mat->nrows;
1653  ptr = mat->rowptr;
1654  val = mat->rowval;
1655 
1656  if (mat->rnorms) gk_free((void **)&mat->rnorms, LTERM);
1657 
1658  norms = mat->rnorms = gk_fsmalloc(n, 0, "gk_csr_ComputeSums: norms");
1659  break;
1660  case GK_CSR_COL:
1661  n = mat->ncols;
1662  ptr = mat->colptr;
1663  val = mat->colval;
1664 
1665  if (mat->cnorms) gk_free((void **)&mat->cnorms, LTERM);
1666 
1667  norms = mat->cnorms = gk_fsmalloc(n, 0, "gk_csr_ComputeSums: norms");
1668  break;
1669  default:
1670  gk_errexit(SIGERR, "Invalid norm type of %d.\n", what);
1671  return;
1672  }
1673 
1674  #pragma omp parallel for if (ptr[n] > OMPMINOPS) schedule(static)
1675  for (i=0; i<n; i++)
1676  norms[i] = gk_fdot(ptr[i+1]-ptr[i], val+ptr[i], 1, val+ptr[i], 1);
1677 }
1678 
1679 
1680 /*************************************************************************/
1693 /**************************************************************************/
1694 float gk_csr_ComputeSimilarity(gk_csr_t *mat, int i1, int i2, int what, int simtype)
1695 {
1696  int nind1, nind2;
1697  int *ind1, *ind2;
1698  float *val1, *val2, stat1, stat2, sim;
1699 
1700  switch (what) {
1701  case GK_CSR_ROW:
1702  if (!mat->rowptr)
1703  gk_errexit(SIGERR, "Row-based view of the matrix does not exists.\n");
1704  nind1 = mat->rowptr[i1+1]-mat->rowptr[i1];
1705  nind2 = mat->rowptr[i2+1]-mat->rowptr[i2];
1706  ind1 = mat->rowind + mat->rowptr[i1];
1707  ind2 = mat->rowind + mat->rowptr[i2];
1708  val1 = mat->rowval + mat->rowptr[i1];
1709  val2 = mat->rowval + mat->rowptr[i2];
1710  break;
1711 
1712  case GK_CSR_COL:
1713  if (!mat->colptr)
1714  gk_errexit(SIGERR, "Column-based view of the matrix does not exists.\n");
1715  nind1 = mat->colptr[i1+1]-mat->colptr[i1];
1716  nind2 = mat->colptr[i2+1]-mat->colptr[i2];
1717  ind1 = mat->colind + mat->colptr[i1];
1718  ind2 = mat->colind + mat->colptr[i2];
1719  val1 = mat->colval + mat->colptr[i1];
1720  val2 = mat->colval + mat->colptr[i2];
1721  break;
1722 
1723  default:
1724  gk_errexit(SIGERR, "Invalid index type of %d.\n", what);
1725  return 0.0;
1726  }
1727 
1728 
1729  switch (simtype) {
1730  case GK_CSR_COS:
1731  case GK_CSR_JAC:
1732  sim = stat1 = stat2 = 0.0;
1733  i1 = i2 = 0;
1734  while (i1<nind1 && i2<nind2) {
1735  if (i1 == nind1) {
1736  stat2 += val2[i2]*val2[i2];
1737  i2++;
1738  }
1739  else if (i2 == nind2) {
1740  stat1 += val1[i1]*val1[i1];
1741  i1++;
1742  }
1743  else if (ind1[i1] < ind2[i2]) {
1744  stat1 += val1[i1]*val1[i1];
1745  i1++;
1746  }
1747  else if (ind1[i1] > ind2[i2]) {
1748  stat2 += val2[i2]*val2[i2];
1749  i2++;
1750  }
1751  else {
1752  sim += val1[i1]*val2[i2];
1753  stat1 += val1[i1]*val1[i1];
1754  stat2 += val2[i2]*val2[i2];
1755  i1++;
1756  i2++;
1757  }
1758  }
1759  if (simtype == GK_CSR_COS)
1760  sim = (stat1*stat2 > 0.0 ? sim/sqrt(stat1*stat2) : 0.0);
1761  else
1762  sim = (stat1+stat2-sim > 0.0 ? sim/(stat1+stat2-sim) : 0.0);
1763  break;
1764 
1765  case GK_CSR_MIN:
1766  sim = stat1 = stat2 = 0.0;
1767  i1 = i2 = 0;
1768  while (i1<nind1 && i2<nind2) {
1769  if (i1 == nind1) {
1770  stat2 += val2[i2];
1771  i2++;
1772  }
1773  else if (i2 == nind2) {
1774  stat1 += val1[i1];
1775  i1++;
1776  }
1777  else if (ind1[i1] < ind2[i2]) {
1778  stat1 += val1[i1];
1779  i1++;
1780  }
1781  else if (ind1[i1] > ind2[i2]) {
1782  stat2 += val2[i2];
1783  i2++;
1784  }
1785  else {
1786  sim += gk_min(val1[i1],val2[i2]);
1787  stat1 += val1[i1];
1788  stat2 += val2[i2];
1789  i1++;
1790  i2++;
1791  }
1792  }
1793  sim = (stat1+stat2-sim > 0.0 ? sim/(stat1+stat2-sim) : 0.0);
1794 
1795  break;
1796 
1797  case GK_CSR_AMIN:
1798  sim = stat1 = stat2 = 0.0;
1799  i1 = i2 = 0;
1800  while (i1<nind1 && i2<nind2) {
1801  if (i1 == nind1) {
1802  stat2 += val2[i2];
1803  i2++;
1804  }
1805  else if (i2 == nind2) {
1806  stat1 += val1[i1];
1807  i1++;
1808  }
1809  else if (ind1[i1] < ind2[i2]) {
1810  stat1 += val1[i1];
1811  i1++;
1812  }
1813  else if (ind1[i1] > ind2[i2]) {
1814  stat2 += val2[i2];
1815  i2++;
1816  }
1817  else {
1818  sim += gk_min(val1[i1],val2[i2]);
1819  stat1 += val1[i1];
1820  stat2 += val2[i2];
1821  i1++;
1822  i2++;
1823  }
1824  }
1825  sim = (stat1 > 0.0 ? sim/stat1 : 0.0);
1826 
1827  break;
1828 
1829  default:
1830  gk_errexit(SIGERR, "Unknown similarity measure %d\n", simtype);
1831  return -1;
1832  }
1833 
1834  return sim;
1835 
1836 }
1837 
1838 
1839 /*************************************************************************/
1865 /**************************************************************************/
1866 int gk_csr_GetSimilarRows(gk_csr_t *mat, int nqterms, int *qind,
1867  float *qval, int simtype, int nsim, float minsim, gk_fkv_t *hits,
1868  int *i_marker, gk_fkv_t *i_cand)
1869 {
1870  ssize_t i, ii, j, k;
1871  int nrows, ncols, ncand;
1872  ssize_t *colptr;
1873  int *colind, *marker;
1874  float *colval, *rnorms, mynorm, *rsums, mysum;
1875  gk_fkv_t *cand;
1876 
1877  if (nqterms == 0)
1878  return 0;
1879 
1880  nrows = mat->nrows;
1881  ncols = mat->ncols;
1882  colptr = mat->colptr;
1883  colind = mat->colind;
1884  colval = mat->colval;
1885 
1886  marker = (i_marker ? i_marker : gk_ismalloc(nrows, -1, "gk_csr_SimilarRows: marker"));
1887  cand = (i_cand ? i_cand : gk_fkvmalloc(nrows, "gk_csr_SimilarRows: cand"));
1888 
1889  switch (simtype) {
1890  case GK_CSR_COS:
1891  for (ncand=0, ii=0; ii<nqterms; ii++) {
1892  i = qind[ii];
1893  if (i < ncols) {
1894  for (j=colptr[i]; j<colptr[i+1]; j++) {
1895  k = colind[j];
1896  if (marker[k] == -1) {
1897  cand[ncand].val = k;
1898  cand[ncand].key = 0;
1899  marker[k] = ncand++;
1900  }
1901  cand[marker[k]].key += colval[j]*qval[ii];
1902  }
1903  }
1904  }
1905  break;
1906 
1907  case GK_CSR_JAC:
1908  for (ncand=0, ii=0; ii<nqterms; ii++) {
1909  i = qind[ii];
1910  if (i < ncols) {
1911  for (j=colptr[i]; j<colptr[i+1]; j++) {
1912  k = colind[j];
1913  if (marker[k] == -1) {
1914  cand[ncand].val = k;
1915  cand[ncand].key = 0;
1916  marker[k] = ncand++;
1917  }
1918  cand[marker[k]].key += colval[j]*qval[ii];
1919  }
1920  }
1921  }
1922 
1923  rnorms = mat->rnorms;
1924  mynorm = gk_fdot(nqterms, qval, 1, qval, 1);
1925 
1926  for (i=0; i<ncand; i++)
1927  cand[i].key = cand[i].key/(rnorms[cand[i].val]+mynorm-cand[i].key);
1928  break;
1929 
1930  case GK_CSR_MIN:
1931  for (ncand=0, ii=0; ii<nqterms; ii++) {
1932  i = qind[ii];
1933  if (i < ncols) {
1934  for (j=colptr[i]; j<colptr[i+1]; j++) {
1935  k = colind[j];
1936  if (marker[k] == -1) {
1937  cand[ncand].val = k;
1938  cand[ncand].key = 0;
1939  marker[k] = ncand++;
1940  }
1941  cand[marker[k]].key += gk_min(colval[j], qval[ii]);
1942  }
1943  }
1944  }
1945 
1946  rsums = mat->rsums;
1947  mysum = gk_fsum(nqterms, qval, 1);
1948 
1949  for (i=0; i<ncand; i++)
1950  cand[i].key = cand[i].key/(rsums[cand[i].val]+mysum-cand[i].key);
1951  break;
1952 
1953  /* Assymetric MIN similarity */
1954  case GK_CSR_AMIN:
1955  for (ncand=0, ii=0; ii<nqterms; ii++) {
1956  i = qind[ii];
1957  if (i < ncols) {
1958  for (j=colptr[i]; j<colptr[i+1]; j++) {
1959  k = colind[j];
1960  if (marker[k] == -1) {
1961  cand[ncand].val = k;
1962  cand[ncand].key = 0;
1963  marker[k] = ncand++;
1964  }
1965  cand[marker[k]].key += gk_min(colval[j], qval[ii]);
1966  }
1967  }
1968  }
1969 
1970  mysum = gk_fsum(nqterms, qval, 1);
1971 
1972  for (i=0; i<ncand; i++)
1973  cand[i].key = cand[i].key/mysum;
1974  break;
1975 
1976  default:
1977  gk_errexit(SIGERR, "Unknown similarity measure %d\n", simtype);
1978  return -1;
1979  }
1980 
1981  /* go and prune the hits that are bellow minsim */
1982  for (j=0, i=0; i<ncand; i++) {
1983  marker[cand[i].val] = -1;
1984  if (cand[i].key >= minsim)
1985  cand[j++] = cand[i];
1986  }
1987  ncand = j;
1988 
1989  if (nsim == -1 || nsim >= ncand) {
1990  nsim = ncand;
1991  }
1992  else {
1993  nsim = gk_min(nsim, ncand);
1994  gk_dfkvkselect(ncand, nsim, cand);
1995  gk_fkvsortd(nsim, cand);
1996  }
1997 
1998  gk_fkvcopy(nsim, cand, hits);
1999 
2000  if (i_marker == NULL)
2001  gk_free((void **)&marker, LTERM);
2002  if (i_cand == NULL)
2003  gk_free((void **)&cand, LTERM);
2004 
2005  return nsim;
2006 }
2007 
void gk_getfilestats(char *fname, size_t *r_nlines, size_t *r_ntokens, size_t *r_max_nlntokens, size_t *r_nbytes)
Definition: fs.c:79
const gtsam::Symbol key('X', 0)
float * rnorms
Definition: gk_struct.h:78
FILE * gk_fopen(char *, char *, const char *)
Definition: GKlib/io.c:24
float * rsums
Definition: gk_struct.h:79
void errexit(char *f_str,...)
Definition: error.c:54
void gk_ikvsorti(size_t, gk_ikv_t *)
Definition: sort.c:157
idx_t idx_t idx_t idx_t idx_t idx_t idx_t real_t real_t idx_t idx_t idx_t * part
void gk_csr_Write(gk_csr_t *mat, char *filename, int format, int writevals, int numbering)
Definition: csr.c:591
int32_t * rowind
Definition: gk_struct.h:75
#define GK_CSR_FMT_METIS
Definition: gk_defs.h:63
ssize_t * colptr
Definition: gk_struct.h:74
#define MAKECSR(i, n, a)
Definition: gk_macros.h:73
float * cvols
Definition: gk_struct.h:81
int gk_fexists(char *fname)
Definition: fs.c:21
int n
#define GK_CSR_IDF
Definition: gk_defs.h:52
float * rvols
Definition: gk_struct.h:81
float * colval
Definition: gk_struct.h:77
gk_csr_t * gk_csr_ExtractSubmatrix(gk_csr_t *mat, int rstart, int nrows)
Definition: csr.c:135
gk_csr_t * gk_csr_ExtractPartition(gk_csr_t *mat, int *part, int pid)
Definition: csr.c:230
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE FixedSegmentReturnType< internal::get_fixed_value< NType >::value >::Type tail(NType n)
float * rowval
Definition: gk_struct.h:77
#define GK_CSR_POW65
Definition: gk_defs.h:48
void gk_errexit(int signum, char *f_str,...)
Definition: error.c:77
float * rwgts
Definition: gk_struct.h:82
gk_csr_t * gk_csr_ZScoreFilter(gk_csr_t *mat, int what, float zscore)
Definition: csr.c:1031
#define GK_CSR_IDF2
Definition: gk_defs.h:53
Real fabs(const Real &a)
EIGEN_DEVICE_FUNC const LogReturnType log() const
void gk_fkvsortd(size_t, gk_fkv_t *)
Definition: sort.c:256
void gk_csr_Init(gk_csr_t *mat)
Definition: csr.c:36
void gk_csr_CompactColumns(gk_csr_t *mat)
Definition: csr.c:1098
float * csums
Definition: gk_struct.h:79
gk_csr_t * gk_csr_TopKPlusFilter(gk_csr_t *mat, int what, int topk, float keepval)
Definition: csr.c:901
#define GK_CSR_ROW
Definition: gk_defs.h:42
#define GK_CSR_FMT_CLUTO
Definition: gk_defs.h:61
gk_csr_t * gk_csr_Create()
Definition: csr.c:19
#define GK_CSR_JAC
Definition: gk_defs.h:57
float gk_csr_ComputeSimilarity(gk_csr_t *mat, int i1, int i2, int what, int simtype)
Definition: csr.c:1694
#define GK_CSR_MAXTF2
Definition: gk_defs.h:54
#define ASSERT(expression)
Definition: ccolamd.c:872
gk_csr_t * gk_csr_Dup(gk_csr_t *mat)
Definition: csr.c:80
void gk_csr_CreateIndex(gk_csr_t *mat, int what)
Definition: csr.c:1223
void gk_csr_Scale(gk_csr_t *mat, int type)
Definition: csr.c:1386
static const Line3 l(Rot3(), 1, 1)
#define rsum
Definition: gklib_rename.h:117
gk_csr_t * gk_csr_LowFilter(gk_csr_t *mat, int what, int norm, float fraction)
Definition: csr.c:759
gk_idx_t gk_getline(char **lineptr, size_t *n, FILE *stream)
Definition: GKlib/io.c:57
void gk_ikvsortd(size_t, gk_ikv_t *)
Definition: sort.c:168
#define GK_CSR_POW75
Definition: gk_defs.h:49
#define SIGERR
Definition: gk_defs.h:38
std::vector< int > ind
#define gk_max(a, b)
Definition: gk_macros.h:16
float * cnorms
Definition: gk_struct.h:78
gk_csr_t ** gk_csr_Split(gk_csr_t *mat, int *color)
Definition: csr.c:277
int gk_csr_GetSimilarRows(gk_csr_t *mat, int nqterms, int *qind, float *qval, int simtype, int nsim, float minsim, gk_fkv_t *hits, int *i_marker, gk_fkv_t *i_cand)
Definition: csr.c:1866
#define GK_CSR_FMT_BINCOL
Definition: gk_defs.h:65
EIGEN_DEVICE_FUNC const SignReturnType sign() const
signed int int32_t
Definition: ms_stdint.h:82
#define gk_min(a, b)
Definition: gk_macros.h:17
gk_csr_t * gk_csr_ExtractRows(gk_csr_t *mat, int nrows, int *rind)
Definition: csr.c:191
#define GK_CSR_AMIN
Definition: gk_defs.h:59
float * csizes
Definition: gk_struct.h:80
idx_t * ncon
#define NULL
Definition: ccolamd.c:609
float * rsizes
Definition: gk_struct.h:80
void gk_csr_SortIndices(gk_csr_t *mat, int what)
Definition: csr.c:1146
#define GK_CSR_FMT_BINROW
Definition: gk_defs.h:64
gk_csr_t * gk_csr_Prune(gk_csr_t *mat, int what, int minf, int maxf)
Definition: csr.c:669
#define SHIFTCSR(i, n, a)
Definition: gk_macros.h:80
#define GK_CSR_POW85
Definition: gk_defs.h:50
int32_t * colind
Definition: gk_struct.h:75
void gk_csr_Free(gk_csr_t **mat)
Definition: csr.c:48
idx_t * nn
void gk_csr_ComputeSquaredNorms(gk_csr_t *mat, int what)
Definition: csr.c:1643
void * gk_malloc(size_t nbytes, char *msg)
Definition: memory.c:140
void gk_free(void **ptr1,...)
Definition: memory.c:202
ssize_t * rowptr
Definition: gk_struct.h:74
#define GK_CSR_COL
Definition: gk_defs.h:43
void gk_csr_FreeContents(gk_csr_t *mat)
Definition: csr.c:63
#define GK_CSR_MIN
Definition: gk_defs.h:58
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE FixedSegmentReturnType< internal::get_fixed_value< NType >::value >::Type head(NType n)
std::string format(const std::string &str, const std::vector< std::string > &find, const std::vector< std::string > &replace)
float * cwgts
Definition: gk_struct.h:82
int32_t * colids
Definition: gk_struct.h:76
#define GK_CSR_SQRT
Definition: gk_defs.h:46
#define GK_CSR_MAXTF
Definition: gk_defs.h:45
int32_t nrows
Definition: gk_struct.h:73
void gk_csr_ComputeSums(gk_csr_t *mat, int what)
Definition: csr.c:1597
int32_t ncols
Definition: gk_struct.h:73
#define GK_CSR_COS
Definition: gk_defs.h:56
int32_t * rowids
Definition: gk_struct.h:76
Jet< T, N > sqrt(const Jet< T, N > &f)
Definition: jet.h:418
#define GK_CSR_LOG
Definition: gk_defs.h:51
void gk_fclose(FILE *)
Definition: GKlib/io.c:44
void gk_csr_Normalize(gk_csr_t *mat, int what, int norm)
Definition: csr.c:1319
std::ptrdiff_t j
int gk_dfkvkselect(size_t n, int topk, gk_fkv_t *cand)
Definition: fkvkselect.c:20
Definition: pytypes.h:1370
#define LTERM
Definition: gk_defs.h:14
gk_csr_t * gk_csr_Read(char *filename, int format, int readvals, int numbering)
Definition: csr.c:349
#define GK_CSR_POW25
Definition: gk_defs.h:47


gtsam
Author(s):
autogenerated on Tue Jul 4 2023 02:34:06