#include <C45Tree.hh>
Classes | |
struct | tree_experience |
struct | tree_node |
Public Types | |
enum | splitTypes { ONLY, CUT } |
Public Member Functions | |
tree_node * | allocateNode () |
bool | buildTree (tree_node *node, const std::vector< tree_experience * > &instances, bool changed) |
C45Tree (int id, int trainMode, int trainFreq, int m, float featPct, Random rng) | |
C45Tree (const C45Tree &) | |
float | calcGainRatio (int dim, float val, bool type, const std::vector< tree_experience * > &instances, float I, std::vector< tree_experience * > &left, std::vector< tree_experience * > &right) |
float | calcIforSet (const std::vector< tree_experience * > &instances) |
float | calcIofP (float *P, int size) |
void | compareSplits (float gainRatio, int dim, float val, bool type, const std::vector< tree_experience * > &left, const std::vector< tree_experience * > &right, int *nties, float *bestGainRatio, int *bestDim, float *bestVal, bool *bestType, std::vector< tree_experience * > *bestLeft, std::vector< tree_experience * > *bestRight) |
void | copyTree (tree_node *newNode, tree_node *origNode) |
void | deallocateNode (tree_node *node) |
void | deleteTree (tree_node *node) |
virtual float | getConf (const std::vector< float > &input) |
virtual C45Tree * | getCopy () |
tree_node * | getCorrectChild (tree_node *node, const std::vector< float > &input) |
std::set< float > | getUniques (int dim, const std::vector< tree_experience * > &instances, float &minVal, float &maxVal) |
bool | implementSplit (tree_node *node, float bestGainRatio, int bestDim, float bestVal, bool bestType, const std::vector< tree_experience * > &left, const std::vector< tree_experience * > &right, bool changed) |
void | initNodes () |
void | initTree () |
void | initTreeNode (tree_node *node) |
bool | makeLeaf (tree_node *node) |
void | outputProbabilities (tree_node *t, std::map< float, float > *retval) |
bool | passTest (int dim, float val, bool type, const std::vector< float > &input) |
void | printTree (tree_node *t, int level) |
bool | rebuildTree () |
float * | sortOnDim (int dim, const std::vector< tree_experience * > &instances) |
virtual void | testInstance (const std::vector< float > &input, std::map< float, float > *retval) |
void | testPossibleSplits (const std::vector< tree_experience * > &instances, float *bestGainRatio, int *bestDim, float *bestVal, bool *bestType, std::vector< tree_experience * > *bestLeft, std::vector< tree_experience * > *bestRight) |
virtual bool | trainInstance (classPair &instance) |
virtual bool | trainInstances (std::vector< classPair > &instances) |
tree_node * | traverseTree (tree_node *node, const std::vector< float > &input) |
~C45Tree () | |
Public Attributes | |
bool | COPYDEBUG |
bool | DTDEBUG |
bool | INCDEBUG |
float | MIN_GAIN_RATIO |
int | nExperiences |
bool | NODEDEBUG |
float | SPLIT_MARGIN |
bool | SPLITDEBUG |
bool | STOCH_DEBUG |
Private Attributes | |
tree_experience | allExp [N_C45_EXP] |
tree_node | allNodes [N_C45_NODES] |
const bool | ALLOW_ONLY_SPLITS |
std::vector< tree_experience * > | experiences |
const float | featPct |
std::vector< int > | freeNodes |
const int | freq |
bool | hadError |
const int | id |
tree_node * | lastNode |
const int | M |
int | maxnodes |
const int | mode |
int | nnodes |
int | nOutput |
Random | rng |
tree_node * | root |
int | totalnodes |
C4.5 decision tree class.
Definition at line 29 of file C45Tree.hh.
enum C45Tree::splitTypes |
The types of splits. Split on ONLY meaning is input == x, or CUT meaning is input > x
Definition at line 85 of file C45Tree.hh.
C45Tree::C45Tree | ( | int | id, |
int | trainMode, | ||
int | trainFreq, | ||
int | m, | ||
float | featPct, | ||
Random | rng | ||
) |
Default constructor
id | id of the tree for debug |
trainMode | build every step? only on errors? every freq steps? |
trainFreq | frequency of model building if using latter mode |
m | # of visits for a given state-action to be considered known |
featPct | pct of features to remove from set used for each tree split |
rng | Random Number Generator |
Definition at line 12 of file C45Tree.cc.
C45Tree::C45Tree | ( | const C45Tree & | t | ) |
Copy constructor
Definition at line 49 of file C45Tree.cc.
Definition at line 142 of file C45Tree.cc.
Allocate a new node from our pre-allocated store of tree nodes
Definition at line 1106 of file C45Tree.cc.
bool C45Tree::buildTree | ( | tree_node * | node, |
const std::vector< tree_experience * > & | instances, | ||
bool | changed | ||
) |
Build the tree with the given instances from the given tree node
Definition at line 550 of file C45Tree.cc.
float C45Tree::calcGainRatio | ( | int | dim, |
float | val, | ||
bool | type, | ||
const std::vector< tree_experience * > & | instances, | ||
float | I, | ||
std::vector< tree_experience * > & | left, | ||
std::vector< tree_experience * > & | right | ||
) |
Calculate the gain ratio for the given split of instances
Definition at line 859 of file C45Tree.cc.
float C45Tree::calcIforSet | ( | const std::vector< tree_experience * > & | instances | ) |
Calculate I(P) for set.
Definition at line 937 of file C45Tree.cc.
float C45Tree::calcIofP | ( | float * | P, |
int | size | ||
) |
Calculate I(P)
Definition at line 928 of file C45Tree.cc.
void C45Tree::compareSplits | ( | float | gainRatio, |
int | dim, | ||
float | val, | ||
bool | type, | ||
const std::vector< tree_experience * > & | left, | ||
const std::vector< tree_experience * > & | right, | ||
int * | nties, | ||
float * | bestGainRatio, | ||
int * | bestDim, | ||
float * | bestVal, | ||
bool * | bestType, | ||
std::vector< tree_experience * > * | bestLeft, | ||
std::vector< tree_experience * > * | bestRight | ||
) |
Compare the current split to determine if it is the best split.
Definition at line 805 of file C45Tree.cc.
void C45Tree::copyTree | ( | tree_node * | newNode, |
tree_node * | origNode | ||
) |
Make a copy of the subtree from origNode to newNode
Definition at line 110 of file C45Tree.cc.
void C45Tree::deallocateNode | ( | tree_node * | node | ) |
Return tree node back to store of nodes
Definition at line 1125 of file C45Tree.cc.
void C45Tree::deleteTree | ( | tree_node * | node | ) |
Delete this tree node and all nodes below it in the tree.
Definition at line 469 of file C45Tree.cc.
float C45Tree::getConf | ( | const std::vector< float > & | input | ) | [virtual] |
Implements Classifier.
Definition at line 399 of file C45Tree.cc.
C45Tree * C45Tree::getCopy | ( | ) | [virtual] |
Implements Classifier.
Definition at line 103 of file C45Tree.cc.
C45Tree::tree_node * C45Tree::getCorrectChild | ( | tree_node * | node, |
const std::vector< float > & | input | ||
) |
Get the correct child of this node for a given input
Definition at line 504 of file C45Tree.cc.
std::set< float > C45Tree::getUniques | ( | int | dim, |
const std::vector< tree_experience * > & | instances, | ||
float & | minVal, | ||
float & | maxVal | ||
) |
Get all the unique values of the features on dimension dim
Definition at line 962 of file C45Tree.cc.
bool C45Tree::implementSplit | ( | tree_node * | node, |
float | bestGainRatio, | ||
int | bestDim, | ||
float | bestVal, | ||
bool | bestType, | ||
const std::vector< tree_experience * > & | left, | ||
const std::vector< tree_experience * > & | right, | ||
bool | changed | ||
) |
Implement the given split at the given node
Definition at line 634 of file C45Tree.cc.
void C45Tree::initNodes | ( | ) |
Initialize our store of tree nodes
Definition at line 1094 of file C45Tree.cc.
void C45Tree::initTree | ( | ) |
Initialize the tree
Definition at line 424 of file C45Tree.cc.
void C45Tree::initTreeNode | ( | tree_node * | node | ) |
Initialize the tree_node struct
Definition at line 440 of file C45Tree.cc.
bool C45Tree::makeLeaf | ( | tree_node * | node | ) |
Make the given node into a leaf node.
Definition at line 610 of file C45Tree.cc.
void C45Tree::outputProbabilities | ( | tree_node * | t, |
std::map< float, float > * | retval | ||
) |
Get the probability distribution for the given leaf node.
Definition at line 1073 of file C45Tree.cc.
bool C45Tree::passTest | ( | int | dim, |
float | val, | ||
bool | type, | ||
const std::vector< float > & | input | ||
) |
Determine if the input passes the test defined by dim, val, type
Definition at line 529 of file C45Tree.cc.
void C45Tree::printTree | ( | tree_node * | t, |
int | level | ||
) |
Print the tree for debug purposes.
Definition at line 1036 of file C45Tree.cc.
Rebuild the tree
Definition at line 373 of file C45Tree.cc.
float * C45Tree::sortOnDim | ( | int | dim, |
const std::vector< tree_experience * > & | instances | ||
) |
Returns an array of the values of features at the index dim, sorted from lowest to highest
Definition at line 981 of file C45Tree.cc.
void C45Tree::testInstance | ( | const std::vector< float > & | input, |
std::map< float, float > * | retval | ||
) | [virtual] |
Implements Classifier.
Definition at line 379 of file C45Tree.cc.
void C45Tree::testPossibleSplits | ( | const std::vector< tree_experience * > & | instances, |
float * | bestGainRatio, | ||
int * | bestDim, | ||
float * | bestVal, | ||
bool * | bestType, | ||
std::vector< tree_experience * > * | bestLeft, | ||
std::vector< tree_experience * > * | bestRight | ||
) |
Test the possible splits for the given set of instances
Definition at line 734 of file C45Tree.cc.
bool C45Tree::trainInstance | ( | classPair & | instance | ) | [virtual] |
Implements Classifier.
Definition at line 151 of file C45Tree.cc.
bool C45Tree::trainInstances | ( | std::vector< classPair > & | instances | ) | [virtual] |
Implements Classifier.
Definition at line 261 of file C45Tree.cc.
C45Tree::tree_node * C45Tree::traverseTree | ( | tree_node * | node, |
const std::vector< float > & | input | ||
) |
Traverse the tree to a leaf for the given input
Definition at line 516 of file C45Tree.cc.
tree_experience C45Tree::allExp[N_C45_EXP] [private] |
Pre-allocated array of experiences to be filled during training.
Definition at line 210 of file C45Tree.hh.
tree_node C45Tree::allNodes[N_C45_NODES] [private] |
Pre-allocated array of tree nodes to be used for tree
Definition at line 213 of file C45Tree.hh.
const bool C45Tree::ALLOW_ONLY_SPLITS [private] |
Definition at line 196 of file C45Tree.hh.
Definition at line 183 of file C45Tree.hh.
Definition at line 178 of file C45Tree.hh.
std::vector<tree_experience*> C45Tree::experiences [private] |
Vector of all experiences used to train the tree
Definition at line 207 of file C45Tree.hh.
const float C45Tree::featPct [private] |
Definition at line 195 of file C45Tree.hh.
std::vector<int> C45Tree::freeNodes [private] |
Definition at line 214 of file C45Tree.hh.
const int C45Tree::freq [private] |
Definition at line 193 of file C45Tree.hh.
bool C45Tree::hadError [private] |
Definition at line 202 of file C45Tree.hh.
const int C45Tree::id [private] |
Definition at line 190 of file C45Tree.hh.
Definition at line 177 of file C45Tree.hh.
tree_node* C45Tree::lastNode [private] |
Pointer to last node of tree used (leaf used in last prediction made).
Definition at line 220 of file C45Tree.hh.
const int C45Tree::M [private] |
Definition at line 194 of file C45Tree.hh.
int C45Tree::maxnodes [private] |
Definition at line 203 of file C45Tree.hh.
float C45Tree::MIN_GAIN_RATIO |
Definition at line 186 of file C45Tree.hh.
const int C45Tree::mode [private] |
Definition at line 192 of file C45Tree.hh.
Definition at line 181 of file C45Tree.hh.
int C45Tree::nnodes [private] |
Definition at line 201 of file C45Tree.hh.
Definition at line 182 of file C45Tree.hh.
int C45Tree::nOutput [private] |
Definition at line 200 of file C45Tree.hh.
Random C45Tree::rng [private] |
Definition at line 198 of file C45Tree.hh.
tree_node* C45Tree::root [private] |
Pointer to root node of tree.
Definition at line 218 of file C45Tree.hh.
float C45Tree::SPLIT_MARGIN |
Definition at line 185 of file C45Tree.hh.
Definition at line 179 of file C45Tree.hh.
Definition at line 180 of file C45Tree.hh.
int C45Tree::totalnodes [private] |
Definition at line 204 of file C45Tree.hh.