12 #define OMPMINOPS 50000
39 mat->nrows =
mat->ncols = -1;
92 gk_zmalloc(
mat->nrows+1,
"gk_csr_Dup: rowptr"));
95 gk_imalloc(
mat->nrows,
"gk_csr_Dup: rowids"));
98 gk_fmalloc(
mat->nrows,
"gk_csr_Dup: rnorms"));
101 gk_imalloc(
mat->rowptr[
mat->nrows],
"gk_csr_Dup: rowind"));
104 gk_fmalloc(
mat->rowptr[
mat->nrows],
"gk_csr_Dup: rowval"));
109 gk_zmalloc(
mat->ncols+1,
"gk_csr_Dup: colptr"));
112 gk_imalloc(
mat->ncols,
"gk_csr_Dup: colids"));
115 gk_fmalloc(
mat->ncols,
"gk_csr_Dup: cnorms"));
118 gk_imalloc(
mat->colptr[
mat->ncols],
"gk_csr_Dup: colind"));
121 gk_fmalloc(
mat->colptr[
mat->ncols],
"gk_csr_Dup: colval"));
140 if (rstart+nrows >
mat->nrows)
150 nmat->
rowptr = gk_zcopy(nrows+1,
mat->rowptr+rstart,
151 gk_zmalloc(nrows+1,
"gk_csr_ExtractSubmatrix: rowptr"));
152 for (
i=nrows;
i>=0;
i--)
157 nmat->
rowids = gk_icopy(nrows,
mat->rowids+rstart,
158 gk_imalloc(nrows,
"gk_csr_ExtractSubmatrix: rowids"));
160 nmat->
rnorms = gk_fcopy(nrows,
mat->rnorms+rstart,
161 gk_fmalloc(nrows,
"gk_csr_ExtractSubmatrix: rnorms"));
164 nmat->
rsums = gk_fcopy(nrows,
mat->rsums+rstart,
165 gk_fmalloc(nrows,
"gk_csr_ExtractSubmatrix: rsums"));
169 nmat->
rowind = gk_icopy(
mat->rowptr[rstart+nrows]-
mat->rowptr[rstart],
170 mat->rowind+
mat->rowptr[rstart],
171 gk_imalloc(
mat->rowptr[rstart+nrows]-
mat->rowptr[rstart],
172 "gk_csr_ExtractSubmatrix: rowind"));
174 nmat->
rowval = gk_fcopy(
mat->rowptr[rstart+nrows]-
mat->rowptr[rstart],
175 mat->rowval+
mat->rowptr[rstart],
176 gk_fmalloc(
mat->rowptr[rstart+nrows]-
mat->rowptr[rstart],
177 "gk_csr_ExtractSubmatrix: rowval"));
201 for (nnz=0,
i=0;
i<nrows;
i++)
202 nnz +=
mat->rowptr[rind[
i]+1]-
mat->rowptr[rind[
i]];
204 nmat->
rowptr = gk_zmalloc(nmat->
nrows+1,
"gk_csr_ExtractPartition: rowptr");
205 nmat->
rowind = gk_imalloc(nnz,
"gk_csr_ExtractPartition: rowind");
206 nmat->
rowval = gk_fmalloc(nnz,
"gk_csr_ExtractPartition: rowval");
209 for (nnz=0,
j=0, ii=0; ii<nrows; ii++) {
213 nnz +=
mat->rowptr[
i+1]-
mat->rowptr[
i];
240 for (nnz=0,
i=0;
i<
mat->nrows;
i++) {
241 if (
part[
i] == pid) {
243 nnz +=
mat->rowptr[
i+1]-
mat->rowptr[
i];
247 nmat->
rowptr = gk_zmalloc(nmat->
nrows+1,
"gk_csr_ExtractPartition: rowptr");
248 nmat->
rowind = gk_imalloc(nnz,
"gk_csr_ExtractPartition: rowind");
249 nmat->
rowval = gk_fmalloc(nnz,
"gk_csr_ExtractPartition: rowval");
252 for (nnz=0,
j=0,
i=0;
i<
mat->nrows;
i++) {
253 if (
part[
i] == pid) {
256 nnz +=
mat->rowptr[
i+1]-
mat->rowptr[
i];
287 rowptr =
mat->rowptr;
288 rowind =
mat->rowind;
289 rowval =
mat->rowval;
291 ncolors = gk_imax(rowptr[nrows], color)+1;
294 for (
i=0;
i<ncolors;
i++) {
298 smats[
i]->
rowptr = gk_zsmalloc(nrows+1, 0,
"gk_csr_Split: smats[i]->rowptr");
301 for (
i=0;
i<nrows;
i++) {
302 for (
j=rowptr[
i];
j<rowptr[
i+1];
j++)
303 smats[color[
j]]->rowptr[
i]++;
305 for (
i=0;
i<ncolors;
i++)
308 for (
i=0;
i<ncolors;
i++) {
309 smats[
i]->
rowind = gk_imalloc(smats[
i]->rowptr[nrows],
"gk_csr_Split: smats[i]->rowind");
310 smats[
i]->
rowval = gk_fmalloc(smats[
i]->rowptr[nrows],
"gk_csr_Split: smats[i]->rowval");
313 for (
i=0;
i<nrows;
i++) {
314 for (
j=rowptr[
i];
j<rowptr[
i+1];
j++) {
321 for (
i=0;
i<ncolors;
i++)
352 size_t nfields, nrows, ncols, nnz, fmt,
ncon;
356 float *rowval=
NULL, fval;
357 int readsizes, readwgts;
370 if (fread(&(
mat->nrows),
sizeof(
int32_t), 1, fpin) != 1)
372 if (fread(&(
mat->ncols),
sizeof(
int32_t), 1, fpin) != 1)
374 mat->rowptr = gk_zmalloc(
mat->nrows+1,
"gk_csr_Read: rowptr");
375 if (fread(
mat->rowptr,
sizeof(
ssize_t),
mat->nrows+1, fpin) !=
mat->nrows+1)
377 mat->rowind = gk_imalloc(
mat->rowptr[
mat->nrows],
"gk_csr_Read: rowind");
381 mat->rowval = gk_fmalloc(
mat->rowptr[
mat->nrows],
"gk_csr_Read: rowval");
382 if (fread(
mat->rowval,
sizeof(
float),
mat->rowptr[
mat->nrows], fpin) !=
mat->rowptr[
mat->nrows])
394 if (fread(&(
mat->nrows),
sizeof(
int32_t), 1, fpin) != 1)
396 if (fread(&(
mat->ncols),
sizeof(
int32_t), 1, fpin) != 1)
398 mat->colptr = gk_zmalloc(
mat->ncols+1,
"gk_csr_Read: colptr");
399 if (fread(
mat->colptr,
sizeof(
ssize_t),
mat->ncols+1, fpin) !=
mat->ncols+1)
401 mat->colind = gk_imalloc(
mat->colptr[
mat->ncols],
"gk_csr_Read: colind");
405 mat->colval = gk_fmalloc(
mat->colptr[
mat->ncols],
"gk_csr_Read: colval");
406 if (fread(
mat->colval,
sizeof(
float),
mat->colptr[
mat->ncols], fpin) !=
mat->colptr[
mat->ncols])
420 }
while (line[0] ==
'%');
422 if (sscanf(line,
"%zu %zu %zu", &nrows, &ncols, &nnz) != 3)
435 }
while (line[0] ==
'%');
438 nfields = sscanf(line,
"%zu %zu %zu %zu", &nrows, &nnz, &fmt, &
ncon);
440 gk_errexit(
SIGERR,
"Header line must contain at least 2 integers (#vtxs and #edges).\n");
446 gk_errexit(
SIGERR,
"Cannot read this type of file format [fmt=%zu]!\n", fmt);
448 sprintf(fmtstr,
"%03zu", fmt%1000);
449 readsizes = (fmtstr[0] ==
'1');
450 readwgts = (fmtstr[1] ==
'1');
451 readvals = (fmtstr[2] ==
'1');
461 if (readvals == 1 && nnz%2 == 1)
462 gk_errexit(
SIGERR,
"Error: The number of numbers (%zd %d) in the input file is not even.\n", nnz, readvals);
472 rowptr =
mat->rowptr = gk_zmalloc(nrows+1,
"gk_csr_Read: rowptr");
473 rowind =
mat->rowind = gk_imalloc(nnz,
"gk_csr_Read: rowind");
475 rowval =
mat->rowval = gk_fsmalloc(nnz, 1.0,
"gk_csr_Read: rowval");
478 mat->rsizes = gk_fsmalloc(nrows, 0.0,
"gk_csr_Read: rsizes");
481 mat->rwgts = gk_fsmalloc(nrows*
ncon, 0.0,
"gk_csr_Read: rwgts");
486 numbering = (numbering ? - 1 : 0);
487 for (ncols=0, rowptr[0]=0, k=0,
i=0;
i<nrows;
i++) {
490 gk_errexit(
SIGERR,
"Premature end of input file: file while reading row %d\n",
i);
491 }
while (line[0] ==
'%');
504 gk_errexit(
SIGERR,
"The line for vertex %zd does not have size information\n",
i+1);
505 if (
mat->rsizes[
i] < 0)
506 errexit(
"The size for vertex %zd must be >= 0\n",
i+1);
519 errexit(
"The line for vertex %zd does not have enough weights "
520 "for the %d constraints.\n",
i+1,
ncon);
522 errexit(
"The weight vertex %zd and constraint %zd must be >= 0\n",
i+1,
l);
535 if ((rowind[k] = ival + numbering) < 0)
538 ncols =
gk_max(rowind[k], ncols);
547 gk_errexit(
SIGERR,
"Value could not be found for column! Row:%zd, NNZ:%zd\n",
i, k);
562 mat->ncols = ncols+1;
566 gk_errexit(
SIGERR,
"gk_csr_Read: Something wrong with the number of nonzeros in "
567 "the input file. NNZ=%zd, ActualNNZ=%zd.\n", nnz, k);
601 fwrite(&(
mat->nrows),
sizeof(
int32_t), 1, fpout);
602 fwrite(&(
mat->ncols),
sizeof(
int32_t), 1, fpout);
606 fwrite(
mat->rowval,
sizeof(
float),
mat->rowptr[
mat->nrows], fpout);
617 fwrite(&(
mat->nrows),
sizeof(
int32_t), 1, fpout);
618 fwrite(&(
mat->ncols),
sizeof(
int32_t), 1, fpout);
622 fwrite(
mat->colval,
sizeof(
float),
mat->colptr[
mat->ncols], fpout);
634 fprintf(fpout,
"%d %d %zd\n",
mat->nrows,
mat->ncols,
mat->rowptr[
mat->nrows]);
639 for (
i=0;
i<
mat->nrows;
i++) {
640 for (
j=
mat->rowptr[
i]; j<mat->rowptr[
i+1];
j++) {
641 fprintf(fpout,
" %d",
mat->rowind[
j]+(numbering ? 1 : 0));
643 fprintf(fpout,
" %f",
mat->rowval[
j]);
645 fprintf(fpout,
"\n");
674 int *rowind, *nrowind, *collen;
675 float *rowval, *nrowval;
683 rowptr =
mat->rowptr;
684 rowind =
mat->rowind;
685 rowval =
mat->rowval;
687 nrowptr = nmat->
rowptr = gk_zmalloc(nrows+1,
"gk_csr_Prune: nrowptr");
688 nrowind = nmat->
rowind = gk_imalloc(rowptr[nrows],
"gk_csr_Prune: nrowind");
689 nrowval = nmat->
rowval = gk_fmalloc(rowptr[nrows],
"gk_csr_Prune: nrowval");
694 collen = gk_ismalloc(ncols, 0,
"gk_csr_Prune: collen");
696 for (
i=0;
i<nrows;
i++) {
697 for (
j=rowptr[
i];
j<rowptr[
i+1];
j++) {
702 for (
i=0;
i<ncols;
i++)
703 collen[
i] = (collen[
i] >= minf && collen[
i] <= maxf ? 1 : 0);
706 for (nnz=0,
i=0;
i<nrows;
i++) {
707 for (
j=rowptr[
i];
j<rowptr[
i+1];
j++) {
708 if (collen[rowind[
j]]) {
709 nrowind[nnz] = rowind[
j];
710 nrowval[nnz] = rowval[
j];
721 for (nnz=0,
i=0;
i<nrows;
i++) {
722 if (rowptr[
i+1]-rowptr[
i] >= minf && rowptr[
i+1]-rowptr[
i] <= maxf) {
723 for (
j=rowptr[
i];
j<rowptr[
i+1];
j++, nnz++) {
724 nrowind[nnz] = rowind[
j];
725 nrowval[nnz] = rowval[
j];
762 int nrows, ncols, ncand, maxlen=0;
763 ssize_t *rowptr, *colptr, *nrowptr;
764 int *rowind, *colind, *nrowind;
765 float *rowval, *colval, *nrowval,
rsum, tsum;
774 rowptr =
mat->rowptr;
775 rowind =
mat->rowind;
776 rowval =
mat->rowval;
777 colptr =
mat->colptr;
778 colind =
mat->colind;
779 colval =
mat->colval;
781 nrowptr = nmat->
rowptr = gk_zmalloc(nrows+1,
"gk_csr_LowFilter: nrowptr");
782 nrowind = nmat->
rowind = gk_imalloc(rowptr[nrows],
"gk_csr_LowFilter: nrowind");
783 nrowval = nmat->
rowval = gk_fmalloc(rowptr[nrows],
"gk_csr_LowFilter: nrowval");
789 gk_errexit(
SIGERR,
"Cannot filter columns when column-based structure has not been created.\n");
791 gk_zcopy(nrows+1, rowptr, nrowptr);
793 for (
i=0;
i<ncols;
i++)
794 maxlen =
gk_max(maxlen, colptr[
i+1]-colptr[
i]);
796 #pragma omp parallel private(i, j, ncand, rsum, tsum, cand)
798 cand = gk_fkvmalloc(maxlen,
"gk_csr_LowFilter: cand");
800 #pragma omp for schedule(static)
801 for (
i=0;
i<ncols;
i++) {
802 for (tsum=0.0, ncand=0,
j=colptr[
i];
j<colptr[
i+1];
j++, ncand++) {
803 cand[ncand].val = colind[
j];
804 cand[ncand].key = colval[
j];
805 tsum += (norm == 1 ? colval[
j] : colval[
j]*colval[
j]);
809 for (
rsum=0.0,
j=0;
j<ncand &&
rsum<=fraction*tsum;
j++) {
810 rsum += (norm == 1 ? cand[
j].key : cand[
j].key*cand[
j].key);
811 nrowind[nrowptr[cand[
j].val]] =
i;
812 nrowval[nrowptr[cand[
j].val]] = cand[
j].key;
813 nrowptr[cand[
j].val]++;
821 for (nnz=0,
i=0;
i<nrows;
i++) {
822 for (
j=rowptr[
i];
j<nrowptr[
i];
j++, nnz++) {
823 nrowind[nnz] = nrowind[
j];
824 nrowval[nnz] = nrowval[
j];
834 gk_errexit(
SIGERR,
"Cannot filter rows when row-based structure has not been created.\n");
836 for (
i=0;
i<nrows;
i++)
837 maxlen =
gk_max(maxlen, rowptr[
i+1]-rowptr[
i]);
839 #pragma omp parallel private(i, j, ncand, rsum, tsum, cand)
841 cand = gk_fkvmalloc(maxlen,
"gk_csr_LowFilter: cand");
843 #pragma omp for schedule(static)
844 for (
i=0;
i<nrows;
i++) {
845 for (tsum=0.0, ncand=0,
j=rowptr[
i];
j<rowptr[
i+1];
j++, ncand++) {
846 cand[ncand].val = rowind[
j];
847 cand[ncand].key = rowval[
j];
848 tsum += (norm == 1 ? rowval[
j] : rowval[
j]*rowval[
j]);
852 for (
rsum=0.0,
j=0;
j<ncand &&
rsum<=fraction*tsum;
j++) {
853 rsum += (norm == 1 ? cand[
j].key : cand[
j].key*cand[
j].key);
854 nrowind[rowptr[
i]+
j] = cand[
j].val;
855 nrowval[rowptr[
i]+
j] = cand[
j].key;
857 nrowptr[
i+1] = rowptr[
i]+
j;
864 nrowptr[0] = nnz = 0;
865 for (
i=0;
i<nrows;
i++) {
866 for (
j=rowptr[
i];
j<nrowptr[
i+1];
j++, nnz++) {
867 nrowind[nnz] = nrowind[
j];
868 nrowval[nnz] = nrowval[
j];
904 int nrows, ncols, ncand;
905 ssize_t *rowptr, *colptr, *nrowptr;
906 int *rowind, *colind, *nrowind;
907 float *rowval, *colval, *nrowval;
916 rowptr =
mat->rowptr;
917 rowind =
mat->rowind;
918 rowval =
mat->rowval;
919 colptr =
mat->colptr;
920 colind =
mat->colind;
921 colval =
mat->colval;
923 nrowptr = nmat->
rowptr = gk_zmalloc(nrows+1,
"gk_csr_LowFilter: nrowptr");
924 nrowind = nmat->
rowind = gk_imalloc(rowptr[nrows],
"gk_csr_LowFilter: nrowind");
925 nrowval = nmat->
rowval = gk_fmalloc(rowptr[nrows],
"gk_csr_LowFilter: nrowval");
931 gk_errexit(
SIGERR,
"Cannot filter columns when column-based structure has not been created.\n");
933 cand = gk_fkvmalloc(nrows,
"gk_csr_LowFilter: cand");
935 gk_zcopy(nrows+1, rowptr, nrowptr);
936 for (
i=0;
i<ncols;
i++) {
937 for (ncand=0,
j=colptr[
i];
j<colptr[
i+1];
j++, ncand++) {
938 cand[ncand].val = colind[
j];
939 cand[ncand].key = colval[
j];
944 for (
j=0;
j<k;
j++) {
945 nrowind[nrowptr[cand[
j].val]] =
i;
946 nrowval[nrowptr[cand[
j].val]] = cand[
j].key;
947 nrowptr[cand[
j].val]++;
949 for (;
j<ncand;
j++) {
950 if (cand[
j].
key < keepval)
953 nrowind[nrowptr[cand[
j].val]] =
i;
954 nrowval[nrowptr[cand[
j].val]] = cand[
j].key;
955 nrowptr[cand[
j].val]++;
960 for (nnz=0,
i=0;
i<nrows;
i++) {
961 for (
j=rowptr[
i];
j<nrowptr[
i];
j++, nnz++) {
962 nrowind[nnz] = nrowind[
j];
963 nrowval[nnz] = nrowval[
j];
974 gk_errexit(
SIGERR,
"Cannot filter rows when row-based structure has not been created.\n");
976 cand = gk_fkvmalloc(ncols,
"gk_csr_LowFilter: cand");
979 for (nnz=0,
i=0;
i<nrows;
i++) {
980 for (ncand=0,
j=rowptr[
i];
j<rowptr[
i+1];
j++, ncand++) {
981 cand[ncand].val = rowind[
j];
982 cand[ncand].key = rowval[
j];
987 for (
j=0;
j<k;
j++, nnz++) {
988 nrowind[nnz] = cand[
j].val;
989 nrowval[nnz] = cand[
j].key;
991 for (;
j<ncand;
j++, nnz++) {
992 if (cand[
j].
key < keepval)
995 nrowind[nnz] = cand[
j].val;
996 nrowval[nnz] = cand[
j].key;
1036 int *rowind, *nrowind;
1037 float *rowval, *nrowval, avgwgt;
1046 rowptr =
mat->rowptr;
1047 rowind =
mat->rowind;
1048 rowval =
mat->rowval;
1050 nrowptr = nmat->
rowptr = gk_zmalloc(nrows+1,
"gk_csr_ZScoreFilter: nrowptr");
1051 nrowind = nmat->
rowind = gk_imalloc(rowptr[nrows],
"gk_csr_ZScoreFilter: nrowind");
1052 nrowval = nmat->
rowval = gk_fmalloc(rowptr[nrows],
"gk_csr_ZScoreFilter: nrowval");
1062 gk_errexit(
SIGERR,
"Cannot filter rows when row-based structure has not been created.\n");
1065 for (nnz=0,
i=0;
i<nrows;
i++) {
1066 avgwgt = zscore/(rowptr[
i+1]-rowptr[
i]);
1067 for (
j=rowptr[
i];
j<rowptr[
i+1];
j++) {
1068 if (rowval[
j] > avgwgt) {
1069 nrowind[nnz] = rowind[
j];
1070 nrowval[nnz] = rowval[
j];
1101 int nrows, ncols, nncols;
1103 int *rowind, *colmap;
1108 rowptr =
mat->rowptr;
1109 rowind =
mat->rowind;
1111 colmap = gk_imalloc(ncols,
"gk_csr_CompactColumns: colmap");
1113 clens = gk_ikvmalloc(ncols,
"gk_csr_CompactColumns: clens");
1114 for (
i=0;
i<ncols;
i++) {
1119 for (
i=0;
i<rowptr[nrows];
i++)
1120 clens[rowind[
i]].
key++;
1123 for (nncols=0,
i=0;
i<ncols;
i++) {
1124 if (clens[
i].
key > 0)
1125 colmap[clens[
i].val] = nncols++;
1130 for (
i=0;
i<rowptr[nrows];
i++)
1131 rowind[
i] = colmap[rowind[
i]];
1133 mat->ncols = nncols;
1179 #pragma omp parallel if (n > 100)
1189 cand = gk_ikvmalloc(
nn,
"gk_csr_SortIndices: cand");
1190 tval = gk_fmalloc(
nn,
"gk_csr_SortIndices: tval");
1192 #pragma omp for schedule(static)
1193 for (
i=0;
i<
n;
i++) {
1194 for (k=0,
j=ptr[
i];
j<ptr[
i+1];
j++) {
1197 cand[
j-ptr[
i]].val =
j-ptr[
i];
1198 cand[
j-ptr[
i]].key =
ind[
j];
1199 tval[
j-ptr[
i]] = val[
j];
1203 for (
j=ptr[
i];
j<ptr[
i+1];
j++) {
1204 ind[
j] = cand[
j-ptr[
i]].key;
1205 val[
j] = tval[cand[
j-ptr[
i]].val];
1243 rptr =
mat->colptr = gk_zsmalloc(nr+1, 0,
"gk_csr_CreateIndex: rptr");
1244 rind =
mat->colind = gk_imalloc(fptr[nf],
"gk_csr_CreateIndex: rind");
1245 rval =
mat->colval = (fval ? gk_fmalloc(fptr[nf],
"gk_csr_CreateIndex: rval") :
NULL);
1258 rptr =
mat->rowptr = gk_zsmalloc(nr+1, 0,
"gk_csr_CreateIndex: rptr");
1259 rind =
mat->rowind = gk_imalloc(fptr[nf],
"gk_csr_CreateIndex: rind");
1260 rval =
mat->rowval = (fval ? gk_fmalloc(fptr[nf],
"gk_csr_CreateIndex: rval") :
NULL);
1268 for (
i=0;
i<nf;
i++) {
1269 for (
j=fptr[
i];
j<fptr[
i+1];
j++)
1274 if (rptr[nr] > 6*nr) {
1275 for (
i=0;
i<nf;
i++) {
1276 for (
j=fptr[
i];
j<fptr[
i+1];
j++)
1277 rind[rptr[find[
j]]++] =
i;
1282 for (
i=0;
i<nf;
i++) {
1283 for (
j=fptr[
i];
j<fptr[
i+1];
j++)
1284 rval[rptr[find[
j]]++] = fval[
j];
1291 for (
i=0;
i<nf;
i++) {
1292 for (
j=fptr[
i];
j<fptr[
i+1];
j++) {
1295 rval[rptr[k]++] = fval[
j];
1300 for (
i=0;
i<nf;
i++) {
1301 for (
j=fptr[
i];
j<fptr[
i+1];
j++)
1302 rind[rptr[find[
j]]++] =
i;
1331 #pragma omp parallel if (ptr[n] > OMPMINOPS)
1333 #pragma omp for private(j, sum) schedule(static)
1334 for (
i = 0;
i <
n;
i++) {
1335 for (sum = 0.0,
j = ptr[
i];
j < ptr[
i + 1];
j++) {
1337 sum += val[
j] * val[
j];
1343 sum = 1.0 /
sqrt(sum);
1346 for (
j = ptr[
i];
j < ptr[
i + 1];
j++) val[
j] *= sum;
1357 #pragma omp parallel if (ptr[n] > OMPMINOPS)
1359 #pragma omp for private(j, sum) schedule(static)
1360 for (
i = 0;
i <
n;
i++) {
1361 for (sum = 0.0,
j = ptr[
i];
j < ptr[
i + 1];
j++)
1363 sum += val[
j] * val[
j];
1368 sum = 1.0 /
sqrt(sum);
1371 for (
j = ptr[
i];
j < ptr[
i + 1];
j++) val[
j] *= sum;
1389 int nrows, ncols, nnzcols, bgfreq;
1391 int *rowind, *collen;
1392 float *rowval, *cscale, maxtf;
1395 rowptr =
mat->rowptr;
1396 rowind =
mat->rowind;
1397 rowval =
mat->rowval;
1401 #pragma omp parallel if (rowptr[nrows] > OMPMINOPS)
1403 #pragma omp for private(j, maxtf) schedule(static)
1404 for (
i=0;
i<nrows;
i++) {
1405 maxtf =
fabs(rowval[rowptr[
i]]);
1406 for (
j=rowptr[
i];
j<rowptr[
i+1];
j++)
1407 maxtf = (maxtf <
fabs(rowval[
j]) ?
fabs(rowval[
j]) : maxtf);
1409 for (
j=rowptr[
i];
j<rowptr[
i+1];
j++)
1410 rowval[
j] = .5 + .5*rowval[
j]/maxtf;
1416 #pragma omp parallel if (rowptr[nrows] > OMPMINOPS)
1418 #pragma omp for private(j, maxtf) schedule(static)
1419 for (
i=0;
i<nrows;
i++) {
1420 maxtf =
fabs(rowval[rowptr[
i]]);
1421 for (
j=rowptr[
i];
j<rowptr[
i+1];
j++)
1422 maxtf = (maxtf <
fabs(rowval[
j]) ?
fabs(rowval[
j]) : maxtf);
1424 for (
j=rowptr[
i];
j<rowptr[
i+1];
j++)
1425 rowval[
j] = .1 + .9*rowval[
j]/maxtf;
1431 #pragma omp parallel if (rowptr[nrows] > OMPMINOPS)
1433 #pragma omp for private(j) schedule(static)
1434 for (
i=0;
i<nrows;
i++) {
1435 for (
j=rowptr[
i];
j<rowptr[
i+1];
j++) {
1436 if (rowval[
j] != 0.0)
1444 #pragma omp parallel if (rowptr[nrows] > OMPMINOPS)
1446 #pragma omp for private(j) schedule(static)
1447 for (
i=0;
i<nrows;
i++) {
1448 for (
j=rowptr[
i];
j<rowptr[
i+1];
j++) {
1449 if (rowval[
j] != 0.0)
1457 #pragma omp parallel if (rowptr[nrows] > OMPMINOPS)
1459 #pragma omp for private(j) schedule(static)
1460 for (
i=0;
i<nrows;
i++) {
1461 for (
j=rowptr[
i];
j<rowptr[
i+1];
j++) {
1462 if (rowval[
j] != 0.0)
1463 rowval[
j] = .1+
sign(rowval[
j], powf(
fabs(rowval[
j]), .65));
1470 #pragma omp parallel if (rowptr[nrows] > OMPMINOPS)
1472 #pragma omp for private(j) schedule(static)
1473 for (
i=0;
i<nrows;
i++) {
1474 for (
j=rowptr[
i];
j<rowptr[
i+1];
j++) {
1475 if (rowval[
j] != 0.0)
1476 rowval[
j] = .1+
sign(rowval[
j], powf(
fabs(rowval[
j]), .75));
1483 #pragma omp parallel if (rowptr[nrows] > OMPMINOPS)
1485 #pragma omp for private(j) schedule(static)
1486 for (
i=0;
i<nrows;
i++) {
1487 for (
j=rowptr[
i];
j<rowptr[
i+1];
j++) {
1488 if (rowval[
j] != 0.0)
1489 rowval[
j] = .1+
sign(rowval[
j], powf(
fabs(rowval[
j]), .85));
1496 #pragma omp parallel if (rowptr[nrows] > OMPMINOPS)
1498 double logscale = 1.0/
log(2.0);
1499 #pragma omp for schedule(static,32)
1500 for (
i=0;
i<rowptr[nrows];
i++) {
1501 if (rowval[
i] != 0.0)
1502 rowval[
i] = 1+(rowval[
i]>0.0 ?
log(rowval[
i]) : -
log(-rowval[
i]))*logscale;
1505 #pragma omp for private(j) schedule(static)
1506 for (
i=0;
i<nrows;
i++) {
1507 for (
j=rowptr[
i];
j<rowptr[
i+1];
j++) {
1508 if (rowval[
j] != 0.0)
1509 rowval[
j] = 1+(rowval[
j]>0.0 ?
log(rowval[
j]) : -
log(-rowval[
j]))*logscale;
1519 cscale = gk_fmalloc(ncols,
"gk_csr_Scale: cscale");
1520 collen = gk_ismalloc(ncols, 0,
"gk_csr_Scale: collen");
1522 for (
i=0;
i<nrows;
i++) {
1523 for (
j=rowptr[
i];
j<rowptr[
i+1];
j++)
1524 collen[rowind[
j]]++;
1527 #pragma omp parallel if (ncols > OMPMINOPS)
1529 #pragma omp for schedule(static)
1530 for (
i=0;
i<ncols;
i++)
1531 cscale[
i] = (collen[
i] > 0 ?
log(1.0*nrows/collen[
i]) : 0.0);
1534 #pragma omp parallel if (rowptr[nrows] > OMPMINOPS)
1536 #pragma omp for private(j) schedule(static)
1537 for (
i=0;
i<nrows;
i++) {
1538 for (
j=rowptr[
i];
j<rowptr[
i+1];
j++)
1539 rowval[
j] *= cscale[rowind[
j]];
1548 cscale = gk_fmalloc(ncols,
"gk_csr_Scale: cscale");
1549 collen = gk_ismalloc(ncols, 0,
"gk_csr_Scale: collen");
1551 for (
i=0;
i<nrows;
i++) {
1552 for (
j=rowptr[
i];
j<rowptr[
i+1];
j++)
1553 collen[rowind[
j]]++;
1557 #pragma omp parallel if (ncols > OMPMINOPS)
1559 #pragma omp for schedule(static) reduction(+:nnzcols)
1560 for (
i=0;
i<ncols;
i++)
1561 nnzcols += (collen[
i] > 0 ? 1 : 0);
1564 printf(
"nnz: %zd, nnzcols: %d, bgfreq: %d\n", rowptr[nrows], nnzcols, bgfreq);
1566 #pragma omp for schedule(static)
1567 for (
i=0;
i<ncols;
i++)
1568 cscale[
i] = (collen[
i] > 0 ?
log(1.0*(nrows+2*bgfreq)/(bgfreq+collen[
i])) : 0.0);
1571 #pragma omp parallel if (rowptr[nrows] > OMPMINOPS)
1573 #pragma omp for private(j) schedule(static)
1574 for (
i=0;
i<nrows;
i++) {
1575 for (
j=rowptr[
i];
j<rowptr[
i+1];
j++)
1576 rowval[
j] *= cscale[rowind[
j]];
1613 sums =
mat->rsums = gk_fsmalloc(
n, 0,
"gk_csr_ComputeSums: sums");
1623 sums =
mat->csums = gk_fsmalloc(
n, 0,
"gk_csr_ComputeSums: sums");
1630 #pragma omp parallel for if (ptr[n] > OMPMINOPS) schedule(static)
1632 sums[
i] = gk_fsum(ptr[
i+1]-ptr[
i], val+ptr[
i], 1);
1658 norms =
mat->rnorms = gk_fsmalloc(
n, 0,
"gk_csr_ComputeSums: norms");
1667 norms =
mat->cnorms = gk_fsmalloc(
n, 0,
"gk_csr_ComputeSums: norms");
1674 #pragma omp parallel for if (ptr[n] > OMPMINOPS) schedule(static)
1676 norms[
i] = gk_fdot(ptr[
i+1]-ptr[
i], val+ptr[
i], 1, val+ptr[
i], 1);
1698 float *val1, *val2, stat1, stat2, sim;
1705 nind2 =
mat->rowptr[i2+1]-
mat->rowptr[i2];
1706 ind1 =
mat->rowind +
mat->rowptr[
i1];
1707 ind2 =
mat->rowind +
mat->rowptr[i2];
1708 val1 =
mat->rowval +
mat->rowptr[
i1];
1709 val2 =
mat->rowval +
mat->rowptr[i2];
1716 nind2 =
mat->colptr[i2+1]-
mat->colptr[i2];
1717 ind1 =
mat->colind +
mat->colptr[
i1];
1718 ind2 =
mat->colind +
mat->colptr[i2];
1719 val1 =
mat->colval +
mat->colptr[
i1];
1720 val2 =
mat->colval +
mat->colptr[i2];
1732 sim = stat1 = stat2 = 0.0;
1734 while (
i1<nind1 && i2<nind2) {
1736 stat2 += val2[i2]*val2[i2];
1739 else if (i2 == nind2) {
1740 stat1 += val1[
i1]*val1[
i1];
1743 else if (ind1[
i1] < ind2[i2]) {
1744 stat1 += val1[
i1]*val1[
i1];
1747 else if (ind1[
i1] > ind2[i2]) {
1748 stat2 += val2[i2]*val2[i2];
1752 sim += val1[
i1]*val2[i2];
1753 stat1 += val1[
i1]*val1[
i1];
1754 stat2 += val2[i2]*val2[i2];
1760 sim = (stat1*stat2 > 0.0 ? sim/
sqrt(stat1*stat2) : 0.0);
1762 sim = (stat1+stat2-sim > 0.0 ? sim/(stat1+stat2-sim) : 0.0);
1766 sim = stat1 = stat2 = 0.0;
1768 while (
i1<nind1 && i2<nind2) {
1773 else if (i2 == nind2) {
1777 else if (ind1[
i1] < ind2[i2]) {
1781 else if (ind1[
i1] > ind2[i2]) {
1793 sim = (stat1+stat2-sim > 0.0 ? sim/(stat1+stat2-sim) : 0.0);
1798 sim = stat1 = stat2 = 0.0;
1800 while (
i1<nind1 && i2<nind2) {
1805 else if (i2 == nind2) {
1809 else if (ind1[
i1] < ind2[i2]) {
1813 else if (ind1[
i1] > ind2[i2]) {
1825 sim = (stat1 > 0.0 ? sim/stat1 : 0.0);
1867 float *qval,
int simtype,
int nsim,
float minsim, gk_fkv_t *hits,
1868 int *i_marker, gk_fkv_t *i_cand)
1871 int nrows, ncols, ncand;
1873 int *colind, *marker;
1874 float *colval, *rnorms, mynorm, *rsums, mysum;
1882 colptr =
mat->colptr;
1883 colind =
mat->colind;
1884 colval =
mat->colval;
1886 marker = (i_marker ? i_marker : gk_ismalloc(nrows, -1,
"gk_csr_SimilarRows: marker"));
1887 cand = (i_cand ? i_cand : gk_fkvmalloc(nrows,
"gk_csr_SimilarRows: cand"));
1891 for (ncand=0, ii=0; ii<nqterms; ii++) {
1894 for (
j=colptr[
i];
j<colptr[
i+1];
j++) {
1896 if (marker[k] == -1) {
1897 cand[ncand].val = k;
1898 cand[ncand].key = 0;
1899 marker[k] = ncand++;
1901 cand[marker[k]].key += colval[
j]*qval[ii];
1908 for (ncand=0, ii=0; ii<nqterms; ii++) {
1911 for (
j=colptr[
i];
j<colptr[
i+1];
j++) {
1913 if (marker[k] == -1) {
1914 cand[ncand].val = k;
1915 cand[ncand].key = 0;
1916 marker[k] = ncand++;
1918 cand[marker[k]].key += colval[
j]*qval[ii];
1923 rnorms =
mat->rnorms;
1924 mynorm = gk_fdot(nqterms, qval, 1, qval, 1);
1926 for (
i=0;
i<ncand;
i++)
1927 cand[
i].
key = cand[
i].
key/(rnorms[cand[
i].val]+mynorm-cand[
i].
key);
1931 for (ncand=0, ii=0; ii<nqterms; ii++) {
1934 for (
j=colptr[
i];
j<colptr[
i+1];
j++) {
1936 if (marker[k] == -1) {
1937 cand[ncand].val = k;
1938 cand[ncand].key = 0;
1939 marker[k] = ncand++;
1941 cand[marker[k]].key +=
gk_min(colval[
j], qval[ii]);
1947 mysum = gk_fsum(nqterms, qval, 1);
1949 for (
i=0;
i<ncand;
i++)
1950 cand[
i].
key = cand[
i].
key/(rsums[cand[
i].val]+mysum-cand[
i].
key);
1955 for (ncand=0, ii=0; ii<nqterms; ii++) {
1958 for (
j=colptr[
i];
j<colptr[
i+1];
j++) {
1960 if (marker[k] == -1) {
1961 cand[ncand].val = k;
1962 cand[ncand].key = 0;
1963 marker[k] = ncand++;
1965 cand[marker[k]].key +=
gk_min(colval[
j], qval[ii]);
1970 mysum = gk_fsum(nqterms, qval, 1);
1972 for (
i=0;
i<ncand;
i++)
1982 for (
j=0,
i=0;
i<ncand;
i++) {
1983 marker[cand[
i].val] = -1;
1984 if (cand[
i].
key >= minsim)
1985 cand[
j++] = cand[
i];
1989 if (nsim == -1 || nsim >= ncand) {
1993 nsim =
gk_min(nsim, ncand);
1998 gk_fkvcopy(nsim, cand, hits);
2000 if (i_marker ==
NULL)