51 #ifndef CLOCKS_PER_SEC // define clocks-per-second if needed 52 #define CLOCKS_PER_SEC 1000000 284 const double ERR = 0.00001;
381 cerr <<
"ann_test: ERROR------->" << msg <<
"<-------------ERROR\n";
385 cerr <<
"ann_test: WARNING----->" << msg <<
"<-------------WARNING\n";
394 for (
int i = 0; i <
dim; i++) {
396 if (i < dim-1) cout <<
",";
407 for (i = 0; i < size; i++) {
408 if (!strcmp(arg, table[i]))
return i;
580 apx_pts_in_range = NULL;
583 min_pts_in_range = NULL;
584 max_pts_in_range = NULL;
599 do { in.get(ch); }
while (isspace(ch) && !in.eof());
600 while (ch ==
'#' && !in.eof()) {
602 do { in.get(ch); }
while(ch !=
'\n' && !in.eof());
604 do { in.get(ch); }
while(isspace(ch) && !in.eof());
628 int main(
int argc,
char** argv)
634 cout <<
"------------------------------------------------------------\n" 638 <<
"------------------------------------------------------------\n\n";
650 if (!strcmp(directive,
"dim")) {
653 else if (!strcmp(directive,
"colors")) {
656 else if (!strcmp(directive,
"new_clust")) {
659 else if (!strcmp(directive,
"max_clus_dim")) {
662 else if (!strcmp(directive,
"std_dev")) {
665 else if (!strcmp(directive,
"std_dev_lo")) {
668 else if (!strcmp(directive,
"std_dev_hi")) {
671 else if (!strcmp(directive,
"corr_coef")) {
674 else if (!strcmp(directive,
"data_size")) {
677 else if (!strcmp(directive,
"query_size")) {
680 else if (!strcmp(directive,
"bucket_size")) {
683 else if (!strcmp(directive,
"epsilon")) {
686 else if (!strcmp(directive,
"max_pts_visit")) {
690 else if (!strcmp(directive,
"radius_bound")) {
694 else if (!strcmp(directive,
"near_neigh")) {
699 else if (!strcmp(directive,
"true_near_neigh")) {
708 else if (!strcmp(directive,
"seed")) {
715 else if (!strcmp(directive,
"validate")) {
717 if (!strcmp(arg,
"on")) {
719 cout <<
"validate = on " 720 <<
"(Warning: this may slow execution time.)\n";
722 else if (!strcmp(arg,
"off")) {
726 cerr <<
"Argument: " << arg <<
"\n";
727 Error(
"validate argument must be \"on\" or \"off\"",
ANNabort);
733 else if (!strcmp(directive,
"distribution")) {
737 cerr <<
"Distribution: " << arg <<
"\n";
744 else if (!strcmp(directive,
"stats")) {
748 cerr <<
"Stats level: " << arg <<
"\n";
752 cout <<
"stats = " << arg <<
"\n";
757 else if (!strcmp(directive,
"split_rule")) {
761 cerr <<
"Splitting rule: " << arg <<
"\n";
768 else if (!strcmp(directive,
"shrink_rule")) {
772 cerr <<
"Shrinking rule: " << arg <<
"\n";
779 else if (!strcmp(directive,
"output_label")) {
782 cout <<
"<" << arg <<
">\n";
787 else if (!strcmp(directive,
"gen_data_pts")) {
789 Error(
"Cannot use planted distribution for data points",
ANNabort);
805 else if (!strcmp(directive,
"gen_query_pts")) {
807 if (data_pts == NULL) {
808 Error(
"Must generate data points before query points for planted distribution",
ANNabort);
831 else if (!strcmp(directive,
"read_data_pts")) {
843 else if (!strcmp(directive,
"read_query_pts")) {
858 else if (!strcmp(directive,
"build_ann")) {
862 if (the_tree != NULL) {
878 long prep_time = clock() - clock0;
881 cout <<
"[Build ann-structure:\n";
884 cout <<
" data_size = " << data_size <<
"\n";
885 cout <<
" dim = " << dim <<
"\n";
886 cout <<
" bucket_size = " << bucket_size <<
"\n";
889 cout <<
" process_time = " 899 cout <<
" (Structure Contents:\n";
909 else if (!strcmp(directive,
"dump")) {
911 if (the_tree == NULL) {
912 Error(
"Cannot dump. No tree has been built yet",
ANNwarn);
916 ofstream out_dump_file(arg);
917 if (!out_dump_file) {
918 cerr <<
"File name: " << arg <<
"\n";
924 cout <<
"(Tree has been dumped to file " << arg <<
")\n";
933 else if (!strcmp(directive,
"load")) {
935 if (the_tree != NULL) {
938 if (data_pts != NULL) {
942 ifstream in_dump_file(arg);
944 cerr <<
"File name: " << arg <<
"\n";
951 data_size = the_tree->
nPoints();
957 cout <<
"(Tree has been loaded from file " << arg <<
")\n";
960 cout <<
" (Structure Contents:\n";
984 else if (!strcmp(directive,
"run_queries")) {
989 enum {STANDARD, PRIORITY} method;
992 if (!strcmp(arg,
"standard")) {
995 else if (!strcmp(arg,
"priority")) {
999 cerr <<
"Search type: " << arg <<
"\n";
1000 Error(
"Search type must be \"standard\" or \"priority\"",
1003 if (data_pts == NULL || query_pts == NULL) {
1004 Error(
"Either data set and query set not constructed",
ANNabort);
1006 if (the_tree == NULL) {
1014 #ifdef ANN_PERF // performance only 1020 if (apx_nn_idx != NULL)
delete []
apx_nn_idx;
1021 if (apx_dists != NULL)
delete []
apx_dists;
1041 apx_pts_in_range[i] = 0;
1043 if (radius_bound == 0) {
1044 if (method == STANDARD) {
1052 else if (method == PRIORITY) {
1065 if (method != STANDARD) {
1066 Error(
"A nonzero radius bound assumes standard search",
1085 long query_time = clock() - clock0;
1097 cout <<
"[Run Queries:\n";
1098 cout <<
" query_size = " << query_size <<
"\n";
1099 cout <<
" dim = " << dim <<
"\n";
1100 cout <<
" search_method = " << arg <<
"\n";
1101 cout <<
" epsilon = " << epsilon <<
"\n";
1102 cout <<
" near_neigh = " << near_neigh <<
"\n";
1103 if (max_pts_visit != 0)
1104 cout <<
" max_pts_visit = " << max_pts_visit <<
"\n";
1105 if (radius_bound != 0)
1106 cout <<
" radius_bound = " << radius_bound <<
"\n";
1108 cout <<
" true_nn = " << true_nn <<
"\n";
1111 cout <<
" query_time = " <<
1115 cout <<
" (biased by perf measurements)";
1125 cout <<
" (Performance statistics unavailable.)\n";
1130 cout <<
" (Query Results:\n";
1131 cout <<
" Pt\tANN\tDist\n";
1136 cout <<
" " << setw(4) << i;
1140 cout <<
"\t[no other pts in radius bound]\n";
1144 cout <<
"\t" << curr_nn_idx[j]
1150 if (radius_bound != 0) {
1151 cout <<
" pts_in_radius_bound = " 1152 << apx_pts_in_range[i] <<
"\n";
1167 cerr <<
"Directive: " << directive <<
"\n";
1174 if (the_tree != NULL)
delete the_tree;
1177 if (apx_nn_idx != NULL)
delete []
apx_nn_idx;
1178 if (apx_dists != NULL)
delete []
apx_dists;
1183 return EXIT_SUCCESS;
1226 std_dev_lo, std_dev_hi, max_dim);
1237 if(type ==
DATA) cout <<
"[Generating Data Points:\n";
1238 else cout <<
"[Generating Query Points:\n";
1239 cout <<
" number = " << n <<
"\n";
1240 cout <<
" dim = " << dim <<
"\n";
1243 cout <<
" seed = " <<
annIdum <<
"\n";
1246 cout <<
" std_dev = " << std_dev <<
"\n";
1248 cout <<
" std_dev = " << std_dev <<
" (small) \n";
1249 cout <<
" std_dev_lo = " << std_dev_lo <<
"\n";
1250 cout <<
" std_dev_hi = " << std_dev_hi <<
"\n";
1253 cout <<
" corr_coef = " << corr_coef <<
"\n";
1256 cout <<
" colors = " << n_color <<
"\n";
1258 cout <<
" (cluster centers regenerated)\n";
1261 cout <<
" max_dim = " << max_dim <<
"\n";
1267 if(type ==
DATA) cout <<
"(Data Points:\n";
1268 else cout <<
"(Query Points:\n";
1269 for (
int i = 0; i < n; i++) {
1270 cout <<
" " << setw(4) << i <<
"\t";
1293 ifstream in_file(file_nm);
1295 cerr <<
"File name: " << file_nm <<
"\n";
1302 for (i = 0; i < n; i++) {
1303 if (!(in_file >> pa[i][0]))
break;
1304 for (
int d = 1;
d <
dim;
d++) {
1305 in_file >> pa[i][
d];
1310 in_file >> ignore_me;
1311 if (!in_file.eof()) {
1313 Error(
"`data_size' too small. Input file truncated.",
ANNwarn);
1315 Error(
"`query_size' too small. Input file truncated.",
ANNwarn);
1324 cout <<
"[Read Data Points:\n";
1325 cout <<
" data_size = " << n <<
"\n";
1328 cout <<
"[Read Query Points:\n";
1329 cout <<
" query_size = " << n <<
"\n";
1331 cout <<
" file_name = " << file_nm <<
"\n";
1332 cout <<
" dim = " << dim <<
"\n";
1336 cout <<
" (Points:\n";
1337 for (i = 0; i < n; i++) {
1338 cout <<
" " << i <<
"\t";
1377 cout <<
"(Computing true nearest neighbors for validation. This may take time.)\n";
1381 if (true_dists != NULL)
delete []
true_dists;
1385 if (true_nn > data_size) {
1402 if (radius_bound == 0) {
1475 if (true_nn < near_neigh) {
1476 Error(
"Cannot validate with fewer true near neighbors than actual",
ANNabort);
1501 double true_dist =
ANN_ROOT(curr_tru_dst[j]);
1503 double rept_dist =
ANN_ROOT(curr_apx_dst[j]);
1505 if (rept_dist < true_dist*(1-
ERR)) {
1506 Error(
"INTERNAL ERROR: True nearest neighbor incorrect",
1511 if (true_dist == 0.0) {
1513 else resultErr = 0.0;
1516 resultErr = (rept_dist - true_dist) / ((
double) true_dist);
1519 if (resultErr > epsilon +
RND_OFF && max_pts_visit == 0) {
1520 Error(
"INTERNAL ERROR: Actual error exceeds epsilon",
1535 double rnkErr = 0.0;
1537 ANNdist rept_dist = curr_apx_dst[j];
1539 while (rnk < true_nn && curr_tru_dst[rnk] <= rept_dist)
1541 if (j+1-rnk > 0) rnkErr = (double) (j+1-rnk);
1548 if (radius_bound != 0) {
1549 if (apx_pts_in_range[i] < min_pts_in_range[i] ||
1550 apx_pts_in_range[i] > max_pts_in_range[i])
1551 Error(
"INTERNAL ERROR: Invalid fixed-radius range count",
1570 #define log2(x) (log(x)/log(2.0)) // log base 2 1576 const int MIN_PTS = 20;
1577 const float MAX_FRAC_TL = 0.50;
1578 const float MAX_AVG_AR = 20;
1587 int too_many_nodes = 10*opt_n_nodes;
1588 if (st.
n_pts >= MIN_PTS && n_nodes > too_many_nodes) {
1589 out <<
"-----------------------------------------------------------\n";
1590 out <<
"Warning: The tree has more than 10x as many nodes as points.\n";
1591 out <<
"You may want to consider a different split or shrink method.\n";
1592 out <<
"-----------------------------------------------------------\n";
1596 float frac_tl = (st.
n_lf == 0 ? 0 : ((float) st.
n_tl)/ st.
n_lf);
1597 if (st.
n_pts >= MIN_PTS && frac_tl > MAX_FRAC_TL) {
1598 out <<
"-----------------------------------------------------------\n";
1599 out <<
"Warning: A significant fraction of leaves contain no points.\n";
1600 out <<
"You may want to consider a different split or shrink method.\n";
1601 out <<
"-----------------------------------------------------------\n";
1605 int too_many_levels = (int) (2.0 * st.
dim *
log2((
double) st.
n_pts));
1607 if (st.
n_pts >= MIN_PTS && st.
depth > too_many_levels) {
1608 out <<
"-----------------------------------------------------------\n";
1609 out <<
"Warning: The tree is more than 2x as deep as (dim*log n).\n";
1610 out <<
"You may want to consider a different split or shrink method.\n";
1611 out <<
"-----------------------------------------------------------\n";
1615 if (st.
n_pts >= MIN_PTS && st.
avg_ar > MAX_AVG_AR) {
1616 out <<
"-----------------------------------------------------------\n";
1617 out <<
"Warning: Average aspect ratio of cells is quite large.\n";
1618 out <<
"This may slow queries depending on the point distribution.\n";
1619 out <<
"-----------------------------------------------------------\n";
1627 out <<
" (Structure Statistics:\n";
1628 out <<
" n_nodes = " << n_nodes
1629 <<
" (opt = " << opt_n_nodes
1630 <<
", best if < " << too_many_nodes <<
")\n" 1631 <<
" n_leaves = " << st.
n_lf 1632 <<
" (" << st.
n_tl <<
" contain no points)\n" 1633 <<
" n_splits = " << st.
n_spl <<
"\n" 1634 <<
" n_shrinks = " << st.
n_shr <<
"\n";
1635 out <<
" empty_leaves = " << frac_tl*100
1636 <<
" percent (best if < " << MAX_FRAC_TL*100 <<
" percent)\n";
1637 out <<
" depth = " << st.
depth 1638 <<
" (opt = " << opt_levels
1639 <<
", best if < " << too_many_levels <<
")\n";
1640 out <<
" avg_aspect_ratio = " << st.
avg_ar 1641 <<
" (best if < " << MAX_AVG_AR <<
")\n";
void annkPriSearch(ANNpoint q, int k, ANNidxArray nn_idx, ANNdistArray dd, double eps=0.0)
int main(int argc, char **argv)
const ANNidx ANN_NULL_IDX
void annClusGaussPts(ANNpointArray pa, int n, int dim, int n_clus, ANNbool new_clust, double std_dev)
void annCoGaussPts(ANNpointArray pa, int n, int dim, double correlation)
int annkFRSearch(ANNpoint q, ANNdist sqRad, int k, ANNidxArray nn_idx=NULL, ANNdistArray dd=NULL, double eps=0.0)
DLL_API void annUpdateStats()
void annLaplacePts(ANNpointArray pa, int n, int dim)
DLL_API void annPrintStats(ANNbool validate)
ANNpointArray thePoints()
DLL_API ANNpointArray annAllocPts(int n, int dim)
ANNbool getDirective(istream &in, char *directive)
void annPlanted(ANNpointArray pa, int n, int dim, ANNpointArray src, int n_src, double std_dev)
void annClusOrthFlats(ANNpointArray pa, int n, int dim, int n_clus, ANNbool new_clust, double std_dev, int max_dim)
const char distr_table[N_DISTRIBS][STRING_LEN]
void printPoint(ANNpoint p, int dim)
void annUniformPts(ANNpointArray pa, int n, int dim)
void annkSearch(ANNpoint q, int k, ANNidxArray nn_idx, ANNdistArray dd, double eps=0.0)
void readPts(ANNpointArray &pa, int &n, char *file_nm, PtType type)
ANNbool skipComment(istream &in)
const char split_table[N_SPLIT_RULES][STRING_LEN]
DLL_API void annDeallocPts(ANNpointArray &pa)
void annGaussPts(ANNpointArray pa, int n, int dim, double std_dev)
void Error(const char *msg, ANNerr level)
const ANNbool def_new_clust
const ANNsplitRule def_split
const char stat_table[N_STAT_LEVELS][STRING_LEN]
const ANNbool def_validate
void annCoLaplacePts(ANNpointArray pa, int n, int dim, double correlation)
const ANNshrinkRule def_shrink
DLL_API ANNsampStat ann_rank_err
virtual void getStats(ANNkdStats &st)
void treeStats(ostream &out, ANNbool verbose)
virtual void Dump(ANNbool with_pts, std::ostream &out)
void annkSearch(ANNpoint q, int k, ANNidxArray nn_idx, ANNdistArray dd, double eps=0.0)
void generatePts(ANNpointArray &pa, int n, PtType type, ANNbool new_clust, ANNpointArray src=NULL, int n_src=0)
const char shrink_table[N_SHRINK_RULES][STRING_LEN]
int annkFRSearch(ANNpoint q, ANNdist sqRad, int k=0, ANNidxArray nn_idx=NULL, ANNdistArray dd=NULL, double eps=0.0)
virtual void Print(ANNbool with_pts, std::ostream &out)
DLL_API void annResetStats(int data_size)
int lookUp(const char *arg, const char(*table)[STRING_LEN], int size)
void annClusEllipsoids(ANNpointArray pa, int n, int dim, int n_clus, ANNbool new_clust, double std_dev_small, double std_dev_lo, double std_dev_hi, int max_dim)
const double def_corr_coef
DLL_API void annMaxPtsVisit(int maxPts)
const int def_bucket_size
DLL_API ANNsampStat ann_average_err
DLL_API void annResetCounts()