#include <ETUCT.hh>
Classes | |
struct | state_info |
struct | state_samples |
Public Types | |
typedef const std::vector < float > * | state_t |
Public Member Functions | |
std::vector< float > | discretizeState (const std::vector< float > &s) |
ETUCT (int numactions, float gamma, float rrange, float lambda, int MAX_ITER, float MAX_TIME, int MAX_DEPTH, int modelType, const std::vector< float > &featmax, const std::vector< float > &featmin, const std::vector< int > &statesPerDim, bool trackActual, int history, Random rng=Random()) | |
ETUCT (const ETUCT &) | |
void | fillInState (std::vector< float >s, int depth) |
virtual int | getBestAction (const std::vector< float > &s) |
void | initStates () |
void | logValues (ofstream *of, int xmin, int xmax, int ymin, int ymax) |
virtual void | planOnNewModel () |
virtual void | setFirst () |
virtual void | setModel (MDPModel *model) |
virtual void | setSeeding (bool seed) |
virtual bool | updateModelWithExperience (const std::vector< float > &last, int act, const std::vector< float > &curr, float reward, bool term) |
virtual | ~ETUCT () |
Public Attributes | |
bool | ACTDEBUG |
bool | HISTORYDEBUG |
MDPModel * | model |
bool | MODELDEBUG |
bool | PLANNERDEBUG |
bool | REALSTATEDEBUG |
bool | UCTDEBUG |
Protected Member Functions | |
std::vector< float > | addVec (const std::vector< float > &a, const std::vector< float > &b) |
void | calculateReachableStates () |
state_t | canonicalize (const std::vector< float > &s) |
void | canonNextStates (StateActionInfo *modelInfo) |
void | createPolicy () |
void | deleteInfo (state_info *info) |
double | getSeconds () |
void | initNewState (state_t s) |
void | initStateInfo (state_t s, state_info *info) |
void | printStates () |
void | removeUnreachableStates () |
void | resetUCTCounts () |
virtual void | savePolicy (const char *filename) |
int | selectUCTAction (state_info *info) |
std::vector< float > | simulateNextState (const std::vector< float > &actualState, state_t discState, state_info *info, const std::deque< float > &searchHistory, int action, float *reward, bool *term) |
std::vector< float > | subVec (const std::vector< float > &a, const std::vector< float > &b) |
float | uctSearch (const std::vector< float > &actualS, state_t state, int depth, std::deque< float > history) |
void | updateStateActionFromModel (state_t s, int a, state_info *info) |
void | updateStateActionHistoryFromModel (const std::vector< float > modState, int a, StateActionInfo *newModel) |
Private Attributes | |
std::vector< float > | featmax |
std::vector< float > | featmin |
const float | gamma |
const int | HISTORY_FL_SIZE |
const int | HISTORY_SIZE |
const float | lambda |
int | lastUpdate |
const int | MAX_DEPTH |
const int | MAX_ITER |
const float | MAX_TIME |
const int | modelType |
int | nactions |
int | nstates |
const int | numactions |
double | planTime |
int | prevact |
state_info * | previnfo |
state_t | prevstate |
const float | rrange |
std::deque< float > | saHistory |
bool | seedMode |
std::map< state_t, state_info > | statedata |
std::set< std::vector< float > > | statespace |
const std::vector< int > & | statesPerDim |
bool | timingType |
const bool | trackActual |
This class defines a modified version of UCT, which plans on a model using Monte Carlo rollouts. Unlike the original UCT, it does not separate values by tree depth, and it incorporates eligibility traces.
typedef const std::vector<float>* ETUCT::state_t |
ETUCT::ETUCT | ( | int | numactions, |
float | gamma, | ||
float | rrange, | ||
float | lambda, | ||
int | MAX_ITER, | ||
float | MAX_TIME, | ||
int | MAX_DEPTH, | ||
int | modelType, | ||
const std::vector< float > & | featmax, | ||
const std::vector< float > & | featmin, | ||
const std::vector< int > & | statesPerDim, | ||
bool | trackActual, | ||
int | history, | ||
Random | rng = Random() |
||
) |
Standard constructor
numactions,numactions | in the domain |
gamma | discount factor |
rrange | range of one-step rewards in the domain |
lambda | for use with eligibility traces |
MAX_ITER | maximum number of MC rollouts to perform |
MAX_TIME | maximum amount of time to run Monte Carlo rollouts |
MAX_DEPTH | maximum depth to perform rollout to |
modelType | specifies model type |
featmax | maximum value of each feature |
featmin | minimum value of each feature |
statesPerDim | # of values to discretize each feature into |
trackActual | track actual real-valued states (or just discrete states) |
history | # of previous actions to use for delayed domains |
rng | random number generator |
ETUCT::ETUCT | ( | const ETUCT & | ) |
Unimplemented copy constructor: internal state cannot be simply copied.
ETUCT::~ETUCT | ( | ) | [virtual] |
std::vector< float > ETUCT::addVec | ( | const std::vector< float > & | a, |
const std::vector< float > & | b | ||
) | [protected] |
void ETUCT::calculateReachableStates | ( | ) | [protected] |
Calculate which states are reachable from states the agent has actually visited.
ETUCT::state_t ETUCT::canonicalize | ( | const std::vector< float > & | s | ) | [protected] |
void ETUCT::canonNextStates | ( | StateActionInfo * | modelInfo | ) | [protected] |
void ETUCT::createPolicy | ( | ) | [protected] |
Compuate a policy from a model
void ETUCT::deleteInfo | ( | state_info * | info | ) | [protected] |
Delete a state_info struct
std::vector< float > ETUCT::discretizeState | ( | const std::vector< float > & | s | ) |
void ETUCT::fillInState | ( | std::vector< float > | s, |
int | depth | ||
) |
int ETUCT::getBestAction | ( | const std::vector< float > & | s | ) | [virtual] |
double ETUCT::getSeconds | ( | ) | [protected] |
void ETUCT::initNewState | ( | state_t | s | ) | [protected] |
void ETUCT::initStateInfo | ( | state_t | s, |
state_info * | info | ||
) | [protected] |
void ETUCT::initStates | ( | ) |
void ETUCT::logValues | ( | ofstream * | of, |
int | xmin, | ||
int | xmax, | ||
int | ymin, | ||
int | ymax | ||
) |
void ETUCT::planOnNewModel | ( | ) | [virtual] |
void ETUCT::printStates | ( | ) | [protected] |
void ETUCT::removeUnreachableStates | ( | ) | [protected] |
Remove states from set that were deemed unreachable.
void ETUCT::resetUCTCounts | ( | ) | [protected] |
void ETUCT::savePolicy | ( | const char * | filename | ) | [protected, virtual] |
int ETUCT::selectUCTAction | ( | state_info * | info | ) | [protected] |
void ETUCT::setFirst | ( | ) | [virtual] |
void ETUCT::setModel | ( | MDPModel * | model | ) | [virtual] |
void ETUCT::setSeeding | ( | bool | seed | ) | [virtual] |
std::vector< float > ETUCT::simulateNextState | ( | const std::vector< float > & | actualState, |
state_t | discState, | ||
state_info * | info, | ||
const std::deque< float > & | searchHistory, | ||
int | action, | ||
float * | reward, | ||
bool * | term | ||
) | [protected] |
std::vector< float > ETUCT::subVec | ( | const std::vector< float > & | a, |
const std::vector< float > & | b | ||
) | [protected] |
float ETUCT::uctSearch | ( | const std::vector< float > & | actualS, |
state_t | state, | ||
int | depth, | ||
std::deque< float > | history | ||
) | [protected] |
Perform UCT/Monte Carlo rollout from the given state. If terminal or at depth, return some reward. Otherwise, select an action based on UCB. Simulate action to get reward and next state. Call search on next state at depth+1 to get reward return from there on. Update q value towards new value: reward + gamma * searchReturn Update visit counts for confidence bounds Return q
From "Bandit Based Monte Carlo Planning" by Kocsis and Szepesv´ari.
bool ETUCT::updateModelWithExperience | ( | const std::vector< float > & | last, |
int | act, | ||
const std::vector< float > & | curr, | ||
float | reward, | ||
bool | term | ||
) | [virtual] |
void ETUCT::updateStateActionFromModel | ( | state_t | s, |
int | a, | ||
state_info * | info | ||
) | [protected] |
Update the state_info copy of the model for the given state-action from the MDPModel
void ETUCT::updateStateActionHistoryFromModel | ( | const std::vector< float > | modState, |
int | a, | ||
StateActionInfo * | newModel | ||
) | [protected] |
Update the state_info copy of the model for the given state-action and k-action history from the MDPModel.
std::vector<float> ETUCT::featmax [private] |
std::vector<float> ETUCT::featmin [private] |
const float ETUCT::gamma [private] |
const int ETUCT::HISTORY_FL_SIZE [private] |
const int ETUCT::HISTORY_SIZE [private] |
const float ETUCT::lambda [private] |
int ETUCT::lastUpdate [private] |
const int ETUCT::MAX_DEPTH [private] |
const int ETUCT::MAX_ITER [private] |
const float ETUCT::MAX_TIME [private] |
const int ETUCT::modelType [private] |
int ETUCT::nactions [private] |
int ETUCT::nstates [private] |
const int ETUCT::numactions [private] |
double ETUCT::planTime [private] |
int ETUCT::prevact [private] |
state_info* ETUCT::previnfo [private] |
state_t ETUCT::prevstate [private] |
const float ETUCT::rrange [private] |
std::deque<float> ETUCT::saHistory [private] |
bool ETUCT::seedMode [private] |
std::map<state_t, state_info> ETUCT::statedata [private] |
Hashmap mapping state vectors to their state_info structs.
std::set<std::vector<float> > ETUCT::statespace [private] |
const std::vector<int>& ETUCT::statesPerDim [private] |
bool ETUCT::timingType [private] |
const bool ETUCT::trackActual [private] |