#include <PO_ETUCT.hh>
Classes | |
struct | state_info |
struct | state_samples |
Public Types | |
typedef const std::vector < float > * | state_t |
Public Member Functions | |
std::vector< float > | discretizeState (const std::vector< float > &s) |
virtual int | getBestAction (const std::vector< float > &s) |
void | logValues (ofstream *of, int xmin, int xmax, int ymin, int ymax) |
virtual void | planOnNewModel () |
PO_ETUCT (int numactions, float gamma, float rrange, float lambda, int MAX_ITER, float MAX_TIME, int MAX_DEPTH, int modelType, const std::vector< float > &featmax, const std::vector< float > &featmin, const std::vector< int > &statesPerDim, bool trackActual, int history, Random rng=Random()) | |
PO_ETUCT (const PO_ETUCT &) | |
virtual void | setFirst () |
virtual void | setModel (MDPModel *model) |
virtual void | setSeeding (bool seed) |
virtual bool | updateModelWithExperience (const std::vector< float > &last, int act, const std::vector< float > &curr, float reward, bool term) |
virtual | ~PO_ETUCT () |
Public Attributes | |
bool | ACTDEBUG |
bool | HISTORYDEBUG |
MDPModel * | model |
bool | MODELDEBUG |
bool | PLANNERDEBUG |
bool | REALSTATEDEBUG |
bool | UCTDEBUG |
Protected Member Functions | |
std::vector< float > | addVec (const std::vector< float > &a, const std::vector< float > &b) |
void | calculateReachableStates () |
state_t | canonicalize (const std::vector< float > &s) |
void | canonNextStates (StateActionInfo *modelInfo) |
void | createPolicy () |
void | deleteInfo (state_info *info) |
double | getSeconds () |
void | initNewState (state_t s) |
void | initStateInfo (state_t s, state_info *info) |
void | printStates () |
void | removeUnreachableStates () |
void | resetUCTCounts () |
virtual void | savePolicy (const char *filename) |
int | selectUCTAction (state_info *info) |
std::vector< float > | simulateNextState (const std::vector< float > &actualState, state_t discState, state_info *info, int action, float *reward, bool *term) |
std::vector< float > | subVec (const std::vector< float > &a, const std::vector< float > &b) |
float | uctSearch (const std::vector< float > &actualS, state_t state, int depth) |
void | updateStateActionFromModel (state_t s, int a, state_info *info) |
void | updateStateActionHistoryFromModel (const std::vector< float > modState, int a, StateActionInfo *newModel) |
Private Attributes | |
std::vector< float > | featmax |
std::vector< float > | featmin |
const float | gamma |
const int | HISTORY_FL_SIZE |
const int | HISTORY_SIZE |
const float | lambda |
int | lastUpdate |
const int | MAX_DEPTH |
const int | MAX_ITER |
const float | MAX_TIME |
const int | modelType |
int | nactions |
int | nstates |
const int | numactions |
double | planTime |
int | prevact |
state_info * | previnfo |
state_t | prevstate |
const float | rrange |
std::deque< float > | saHistory |
bool | seedMode |
std::map< state_t, state_info > | statedata |
std::set< std::vector< float > > | statespace |
const std::vector< int > & | statesPerDim |
bool | timingType |
const bool | trackActual |
This class defines a modified version of UCT, which plans on a model using Monte Carlo rollouts. Unlike the original UCT, it does not separate values by tree depth, and it incorporates eligibility traces. This version plans over states augmented with k-action histories, for delayed or partially observable domains.
Definition at line 24 of file PO_ETUCT.hh.
typedef const std::vector<float>* PO_ETUCT::state_t |
The implementation maps all sensations to a set of canonical pointers, which serve as the internal representation of environment state.
Definition at line 85 of file PO_ETUCT.hh.
PO_ETUCT::PO_ETUCT | ( | int | numactions, |
float | gamma, | ||
float | rrange, | ||
float | lambda, | ||
int | MAX_ITER, | ||
float | MAX_TIME, | ||
int | MAX_DEPTH, | ||
int | modelType, | ||
const std::vector< float > & | featmax, | ||
const std::vector< float > & | featmin, | ||
const std::vector< int > & | statesPerDim, | ||
bool | trackActual, | ||
int | history, | ||
Random | rng = Random() |
||
) |
Standard constructor
numactions,numactions | in the domain |
gamma | discount factor |
rrange | range of one-step rewards in the domain |
lambda | for use with eligibility traces |
MAX_ITER | maximum number of MC rollouts to perform |
MAX_TIME | maximum amount of time to run Monte Carlo rollouts |
MAX_DEPTH | maximum depth to perform rollout to |
modelType | specifies model type |
featmax | maximum value of each feature |
featmin | minimum value of each feature |
statesPerDim | # of values to discretize each feature into |
trackActual | track actual real-valued states (or just discrete states) |
history | # of previous actions to use for delayed domains |
rng | random number generator |
Definition at line 16 of file PO_ETUCT.cc.
PO_ETUCT::PO_ETUCT | ( | const PO_ETUCT & | ) |
Unimplemented copy constructor: internal state cannot be simply copied.
PO_ETUCT::~PO_ETUCT | ( | ) | [virtual] |
Definition at line 73 of file PO_ETUCT.cc.
std::vector< float > PO_ETUCT::addVec | ( | const std::vector< float > & | a, |
const std::vector< float > & | b | ||
) | [protected] |
Add two vectors together.
Definition at line 907 of file PO_ETUCT.cc.
void PO_ETUCT::calculateReachableStates | ( | ) | [protected] |
Calculate which states are reachable from states the agent has actually visited.
PO_ETUCT::state_t PO_ETUCT::canonicalize | ( | const std::vector< float > & | s | ) | [protected] |
Produces a canonical representation of the given sensation.
s | The current sensation from the environment. |
Definition at line 465 of file PO_ETUCT.cc.
void PO_ETUCT::canonNextStates | ( | StateActionInfo * | modelInfo | ) | [protected] |
Canonicalize all the next states predicted by this model.
Definition at line 313 of file PO_ETUCT.cc.
void PO_ETUCT::createPolicy | ( | ) | [protected] |
Compuate a policy from a model
void PO_ETUCT::deleteInfo | ( | state_info * | info | ) | [protected] |
Delete a state_info struct
Definition at line 551 of file PO_ETUCT.cc.
std::vector< float > PO_ETUCT::discretizeState | ( | const std::vector< float > & | s | ) |
Return a discretized version of the input state.
Definition at line 879 of file PO_ETUCT.cc.
int PO_ETUCT::getBestAction | ( | const std::vector< float > & | s | ) | [virtual] |
Implements Planner.
Definition at line 342 of file PO_ETUCT.cc.
double PO_ETUCT::getSeconds | ( | ) | [protected] |
Get the current time in seconds
Definition at line 559 of file PO_ETUCT.cc.
void PO_ETUCT::initNewState | ( | state_t | s | ) | [protected] |
Initialize a new state
Definition at line 107 of file PO_ETUCT.cc.
void PO_ETUCT::initStateInfo | ( | state_t | s, |
state_info * | info | ||
) | [protected] |
Initialize state info struct
Definition at line 501 of file PO_ETUCT.cc.
void PO_ETUCT::logValues | ( | ofstream * | of, |
int | xmin, | ||
int | xmax, | ||
int | ymin, | ||
int | ymax | ||
) |
Output value function to a file
Definition at line 860 of file PO_ETUCT.cc.
void PO_ETUCT::planOnNewModel | ( | ) | [virtual] |
Implements Planner.
Definition at line 403 of file PO_ETUCT.cc.
void PO_ETUCT::printStates | ( | ) | [protected] |
Print information for each state.
Definition at line 528 of file PO_ETUCT.cc.
void PO_ETUCT::removeUnreachableStates | ( | ) | [protected] |
Remove states from set that were deemed unreachable.
void PO_ETUCT::resetUCTCounts | ( | ) | [protected] |
Reset UCT visit counts to some baseline level (to decrease our confidence in q-values because model has changed.
Definition at line 435 of file PO_ETUCT.cc.
void PO_ETUCT::savePolicy | ( | const char * | filename | ) | [protected, virtual] |
Reimplemented from Planner.
Definition at line 831 of file PO_ETUCT.cc.
int PO_ETUCT::selectUCTAction | ( | state_info * | info | ) | [protected] |
Select UCT action based on UCB1 algorithm.
Definition at line 692 of file PO_ETUCT.cc.
void PO_ETUCT::setFirst | ( | ) | [virtual] |
Reimplemented from Planner.
Definition at line 932 of file PO_ETUCT.cc.
void PO_ETUCT::setModel | ( | MDPModel * | model | ) | [virtual] |
Implements Planner.
Definition at line 95 of file PO_ETUCT.cc.
void PO_ETUCT::setSeeding | ( | bool | seed | ) | [virtual] |
Reimplemented from Planner.
Definition at line 941 of file PO_ETUCT.cc.
std::vector< float > PO_ETUCT::simulateNextState | ( | const std::vector< float > & | actualState, |
state_t | discState, | ||
state_info * | info, | ||
int | action, | ||
float * | reward, | ||
bool * | term | ||
) | [protected] |
Return a sampled state from the next state distribution of the model. Simulate the next state from the given state, action, and possibly history of past actions.
Definition at line 734 of file PO_ETUCT.cc.
std::vector< float > PO_ETUCT::subVec | ( | const std::vector< float > & | a, |
const std::vector< float > & | b | ||
) | [protected] |
Subtract two vectors.
Definition at line 919 of file PO_ETUCT.cc.
float PO_ETUCT::uctSearch | ( | const std::vector< float > & | actualS, |
state_t | state, | ||
int | depth | ||
) | [protected] |
Perform UCT/Monte Carlo rollout from the given state. If terminal or at depth, return some reward. Otherwise, select an action based on UCB. Simulate action to get reward and next state. Call search on next state at depth+1 to get reward return from there on. Update q value towards new value: reward + gamma * searchReturn Update visit counts for confidence bounds Return q
From "Bandit Based Monte Carlo Planning" by Kocsis and Szepesv´ari.
Definition at line 568 of file PO_ETUCT.cc.
bool PO_ETUCT::updateModelWithExperience | ( | const std::vector< float > & | last, |
int | act, | ||
const std::vector< float > & | curr, | ||
float | reward, | ||
bool | term | ||
) | [virtual] |
Implements Planner.
Definition at line 120 of file PO_ETUCT.cc.
void PO_ETUCT::updateStateActionFromModel | ( | state_t | s, |
int | a, | ||
state_info * | info | ||
) | [protected] |
Update the state_info copy of the model for the given state-action from the MDPModel
Definition at line 234 of file PO_ETUCT.cc.
void PO_ETUCT::updateStateActionHistoryFromModel | ( | const std::vector< float > | modState, |
int | a, | ||
StateActionInfo * | newModel | ||
) | [protected] |
Update the state_info copy of the model for the given state-action and k-action history from the MDPModel.
Definition at line 243 of file PO_ETUCT.cc.
Definition at line 74 of file PO_ETUCT.hh.
std::vector<float> PO_ETUCT::featmax [private] |
Definition at line 202 of file PO_ETUCT.hh.
std::vector<float> PO_ETUCT::featmin [private] |
Definition at line 203 of file PO_ETUCT.hh.
const float PO_ETUCT::gamma [private] |
Definition at line 219 of file PO_ETUCT.hh.
const int PO_ETUCT::HISTORY_FL_SIZE [private] |
Definition at line 230 of file PO_ETUCT.hh.
const int PO_ETUCT::HISTORY_SIZE [private] |
Definition at line 229 of file PO_ETUCT.hh.
Definition at line 77 of file PO_ETUCT.hh.
const float PO_ETUCT::lambda [private] |
Definition at line 221 of file PO_ETUCT.hh.
int PO_ETUCT::lastUpdate [private] |
Definition at line 215 of file PO_ETUCT.hh.
const int PO_ETUCT::MAX_DEPTH [private] |
Definition at line 225 of file PO_ETUCT.hh.
const int PO_ETUCT::MAX_ITER [private] |
Definition at line 223 of file PO_ETUCT.hh.
const float PO_ETUCT::MAX_TIME [private] |
Definition at line 224 of file PO_ETUCT.hh.
MDPModel that we're using with planning
Definition at line 80 of file PO_ETUCT.hh.
Definition at line 73 of file PO_ETUCT.hh.
const int PO_ETUCT::modelType [private] |
Definition at line 226 of file PO_ETUCT.hh.
int PO_ETUCT::nactions [private] |
Definition at line 214 of file PO_ETUCT.hh.
int PO_ETUCT::nstates [private] |
Definition at line 213 of file PO_ETUCT.hh.
const int PO_ETUCT::numactions [private] |
Definition at line 218 of file PO_ETUCT.hh.
Definition at line 72 of file PO_ETUCT.hh.
double PO_ETUCT::planTime [private] |
Definition at line 209 of file PO_ETUCT.hh.
int PO_ETUCT::prevact [private] |
Definition at line 206 of file PO_ETUCT.hh.
state_info* PO_ETUCT::previnfo [private] |
Definition at line 207 of file PO_ETUCT.hh.
state_t PO_ETUCT::prevstate [private] |
Definition at line 205 of file PO_ETUCT.hh.
Definition at line 76 of file PO_ETUCT.hh.
const float PO_ETUCT::rrange [private] |
Definition at line 220 of file PO_ETUCT.hh.
std::deque<float> PO_ETUCT::saHistory [private] |
Current history of previous actions.
Definition at line 200 of file PO_ETUCT.hh.
bool PO_ETUCT::seedMode [private] |
Definition at line 211 of file PO_ETUCT.hh.
std::map<state_t, state_info> PO_ETUCT::statedata [private] |
Hashmap mapping state vectors to their state_info structs.
Definition at line 197 of file PO_ETUCT.hh.
std::set<std::vector<float> > PO_ETUCT::statespace [private] |
Set of all distinct sensations seen. Pointers to elements of this set serve as the internal representation of the environment state.
Definition at line 194 of file PO_ETUCT.hh.
const std::vector<int>& PO_ETUCT::statesPerDim [private] |
Definition at line 227 of file PO_ETUCT.hh.
bool PO_ETUCT::timingType [private] |
Definition at line 216 of file PO_ETUCT.hh.
const bool PO_ETUCT::trackActual [private] |
Definition at line 228 of file PO_ETUCT.hh.
Definition at line 75 of file PO_ETUCT.hh.