#include <PO_ParallelETUCT.hh>
Classes | |
struct | state_info |
struct | state_samples |
Public Types | |
typedef const std::vector < float > * | state_t |
Public Member Functions | |
std::vector< float > | addVec (const std::vector< float > &a, const std::vector< float > &b) |
virtual int | getBestAction (const std::vector< float > &s) |
void | loadPolicy (const char *filename) |
void | logValues (ofstream *of, int xmin, int xmax, int ymin, int ymax) |
void | parallelModelLearning () |
void | parallelSearch () |
virtual void | planOnNewModel () |
PO_ParallelETUCT (int numactions, float gamma, float rrange, float lambda, int MAX_ITER, float MAX_TIME, int MAX_DEPTH, int modelType, const std::vector< float > &featmax, const std::vector< float > &featmin, const std::vector< int > &statesPerDim, bool trackActual, int historySize, Random rng=Random()) | |
PO_ParallelETUCT (const PO_ParallelETUCT &) | |
std::vector< float > | selectRandomState () |
virtual void | setFirst () |
virtual void | setModel (MDPModel *model) |
virtual void | setSeeding (bool seed) |
std::vector< float > | subVec (const std::vector< float > &a, const std::vector< float > &b) |
float | uctSearch (const std::vector< float > &actS, state_t state, int depth) |
virtual bool | updateModelWithExperience (const std::vector< float > &last, int act, const std::vector< float > &curr, float reward, bool term) |
virtual | ~PO_ParallelETUCT () |
Public Attributes | |
bool | ACTDEBUG |
std::vector< float > | actualPlanState |
bool | ATHREADDEBUG |
state_t | discPlanState |
std::vector< experience > | expList |
pthread_mutex_t | history_mutex |
bool | HISTORYDEBUG |
pthread_cond_t | list_cond |
pthread_mutex_t | list_mutex |
MDPModel * | model |
pthread_mutex_t | model_mutex |
MDPModel * | modelcopy |
bool | MODELDEBUG |
pthread_t | modelThread |
bool | modelThreadStarted |
bool | MTHREADDEBUG |
pthread_mutex_t | nactions_mutex |
pthread_mutex_t | plan_state_mutex |
bool | PLANNERDEBUG |
pthread_t | planThread |
bool | planThreadStarted |
bool | POLICYDEBUG |
bool | PTHREADDEBUG |
bool | REALSTATEDEBUG |
state_t | startState |
pthread_mutex_t | statespace_mutex |
bool | TIMINGDEBUG |
bool | UCTDEBUG |
pthread_mutex_t | update_mutex |
Protected Member Functions | |
void | calculateReachableStates () |
state_t | canonicalize (const std::vector< float > &s) |
void | canonNextStates (StateActionInfo *modelInfo) |
void | createPolicy () |
void | deleteInfo (state_info *info) |
std::vector< float > | discretizeState (const std::vector< float > &s) |
void | fillInState (std::vector< float >s, int depth) |
double | getSeconds () |
void | initStateInfo (state_t s, state_info *info, int id) |
void | initStates () |
void | printStates () |
void | removeUnreachableStates () |
void | resetAndUpdateStateActions () |
virtual void | savePolicy (const char *filename) |
int | selectUCTAction (state_info *info) |
std::vector< float > | simulateNextState (const std::vector< float > &actS, state_t state, state_info *info, int action, float *reward, bool *term) |
void | updateStateActionFromModel (state_t s, int a, state_info *info) |
void | updateStateActionHistoryFromModel (const std::vector< float > modState, int a, StateActionInfo *newModel) |
Private Attributes | |
ExperienceFile | expfile |
std::vector< float > | featmax |
std::vector< float > | featmin |
const float | gamma |
const int | HISTORY_FL_SIZE |
const int | HISTORY_SIZE |
double | initTime |
const float | lambda |
int | lastUpdate |
const int | MAX_DEPTH |
const int | MAX_ITER |
const float | MAX_TIME |
const int | modelType |
int | nactions |
int | nsaved |
int | nstates |
const int | numactions |
double | planTime |
int | prevact |
state_info * | previnfo |
state_t | prevstate |
const float | rrange |
std::deque< float > | saHistory |
bool | seedMode |
double | setTime |
std::map< state_t, state_info > | statedata |
std::set< std::vector< float > > | statespace |
const std::vector< int > & | statesPerDim |
bool | timingType |
const bool | trackActual |
This class defines my real-time model-based RL architecture, which does model learning, planning, and acting on 3 separate threads. It uses a modified version of UCT for planning, which plans on a model using Monte Carlo rollouts. Unlike the original UCT, it does not separate values by tree depth, and it incorporates eligibility traces. This version plans over states augmented with k-action histories, for delayed or partially observable domains.
Definition at line 35 of file PO_ParallelETUCT.hh.
typedef const std::vector<float>* PO_ParallelETUCT::state_t |
The implementation maps all sensations to a set of canonical pointers, which serve as the internal representation of environment state.
Definition at line 98 of file PO_ParallelETUCT.hh.
PO_ParallelETUCT::PO_ParallelETUCT | ( | int | numactions, |
float | gamma, | ||
float | rrange, | ||
float | lambda, | ||
int | MAX_ITER, | ||
float | MAX_TIME, | ||
int | MAX_DEPTH, | ||
int | modelType, | ||
const std::vector< float > & | featmax, | ||
const std::vector< float > & | featmin, | ||
const std::vector< int > & | statesPerDim, | ||
bool | trackActual, | ||
int | historySize, | ||
Random | rng = Random() |
||
) |
Standard constructor
numactions,numactions | in the domain |
gamma | discount factor |
rrange | range of one-step rewards in the domain |
lambda | for use with eligibility traces |
MAX_ITER | maximum number of MC rollouts to perform |
MAX_TIME | maximum amount of time to run Monte Carlo rollouts |
MAX_DEPTH | maximum depth to perform rollout to |
modelType | specifies model type |
featmax | maximum value of each feature |
featmin | minimum value of each feature |
statesPerDim | # of values to discretize each feature into |
trackActual | track actual real-valued states (or just discrete states) |
historySize | # of previous actions to use for delayed domains |
rng | random number generator |
Definition at line 17 of file PO_ParallelETUCT.cc.
PO_ParallelETUCT::PO_ParallelETUCT | ( | const PO_ParallelETUCT & | ) |
Unimplemented copy constructor: internal state cannot be simply copied.
PO_ParallelETUCT::~PO_ParallelETUCT | ( | ) | [virtual] |
Definition at line 100 of file PO_ParallelETUCT.cc.
std::vector< float > PO_ParallelETUCT::addVec | ( | const std::vector< float > & | a, |
const std::vector< float > & | b | ||
) |
Add two vectors together.
Definition at line 1337 of file PO_ParallelETUCT.cc.
void PO_ParallelETUCT::calculateReachableStates | ( | ) | [protected] |
Calculate which states are reachable from states the agent has actually visited.
PO_ParallelETUCT::state_t PO_ParallelETUCT::canonicalize | ( | const std::vector< float > & | s | ) | [protected] |
Produces a canonical representation of the given sensation.
s | The current sensation from the environment. |
Definition at line 669 of file PO_ParallelETUCT.cc.
void PO_ParallelETUCT::canonNextStates | ( | StateActionInfo * | modelInfo | ) | [protected] |
Canonicalize all the next states predicted by this model.
Definition at line 391 of file PO_ParallelETUCT.cc.
void PO_ParallelETUCT::createPolicy | ( | ) | [protected] |
Compuate a policy from a model
void PO_ParallelETUCT::deleteInfo | ( | state_info * | info | ) | [protected] |
Delete a state_info struct
Definition at line 778 of file PO_ParallelETUCT.cc.
std::vector< float > PO_ParallelETUCT::discretizeState | ( | const std::vector< float > & | s | ) | [protected] |
Return a discretized version of the input state.
Definition at line 1307 of file PO_ParallelETUCT.cc.
void PO_ParallelETUCT::fillInState | ( | std::vector< float > | s, |
int | depth | ||
) | [protected] |
Fill in a state based on featmin and featmax
int PO_ParallelETUCT::getBestAction | ( | const std::vector< float > & | s | ) | [virtual] |
Implements Planner.
Definition at line 418 of file PO_ParallelETUCT.cc.
double PO_ParallelETUCT::getSeconds | ( | ) | [protected] |
Get the current time in seconds
Definition at line 788 of file PO_ParallelETUCT.cc.
void PO_ParallelETUCT::initStateInfo | ( | state_t | s, |
state_info * | info, | ||
int | id | ||
) | [protected] |
Initialize state info struct
Definition at line 703 of file PO_ParallelETUCT.cc.
void PO_ParallelETUCT::initStates | ( | ) | [protected] |
Initialize the states for this domain (based on featmin and featmax)
void PO_ParallelETUCT::loadPolicy | ( | const char * | filename | ) |
Load a policy from a file.
Definition at line 1212 of file PO_ParallelETUCT.cc.
void PO_ParallelETUCT::logValues | ( | ofstream * | of, |
int | xmin, | ||
int | xmax, | ||
int | ymin, | ||
int | ymax | ||
) |
Output value function to a file
Definition at line 1279 of file PO_ParallelETUCT.cc.
Start the parallel model learning thread.
Definition at line 549 of file PO_ParallelETUCT.cc.
void PO_ParallelETUCT::parallelSearch | ( | ) |
Start the parallel UCT planning thread.
Definition at line 1126 of file PO_ParallelETUCT.cc.
void PO_ParallelETUCT::planOnNewModel | ( | ) | [virtual] |
Implements Planner.
Definition at line 518 of file PO_ParallelETUCT.cc.
void PO_ParallelETUCT::printStates | ( | ) | [protected] |
Print information for each state.
Definition at line 742 of file PO_ParallelETUCT.cc.
void PO_ParallelETUCT::removeUnreachableStates | ( | ) | [protected] |
Remove states from set that were deemed unreachable.
void PO_ParallelETUCT::resetAndUpdateStateActions | ( | ) | [protected] |
Reset UCT visit counts to some baseline level (to decrease our confidence in q-values because model has changed.
Definition at line 608 of file PO_ParallelETUCT.cc.
void PO_ParallelETUCT::savePolicy | ( | const char * | filename | ) | [protected, virtual] |
Reimplemented from Planner.
Definition at line 1171 of file PO_ParallelETUCT.cc.
std::vector< float > PO_ParallelETUCT::selectRandomState | ( | ) |
Select a random previously visited state.
Definition at line 1077 of file PO_ParallelETUCT.cc.
int PO_ParallelETUCT::selectUCTAction | ( | state_info * | info | ) | [protected] |
Select UCT action based on UCB1 algorithm.
Definition at line 939 of file PO_ParallelETUCT.cc.
void PO_ParallelETUCT::setFirst | ( | ) | [virtual] |
Reimplemented from Planner.
Definition at line 1361 of file PO_ParallelETUCT.cc.
void PO_ParallelETUCT::setModel | ( | MDPModel * | model | ) | [virtual] |
Implements Planner.
Definition at line 152 of file PO_ParallelETUCT.cc.
void PO_ParallelETUCT::setSeeding | ( | bool | seed | ) | [virtual] |
Reimplemented from Planner.
Definition at line 1372 of file PO_ParallelETUCT.cc.
std::vector< float > PO_ParallelETUCT::simulateNextState | ( | const std::vector< float > & | actS, |
state_t | state, | ||
state_info * | info, | ||
int | action, | ||
float * | reward, | ||
bool * | term | ||
) | [protected] |
Return a sampled state from the next state distribution of the model. Simulate the next state from the given state, action, and possibly history of past actions.
Definition at line 988 of file PO_ParallelETUCT.cc.
std::vector< float > PO_ParallelETUCT::subVec | ( | const std::vector< float > & | a, |
const std::vector< float > & | b | ||
) |
Subtract two vectors.
Definition at line 1349 of file PO_ParallelETUCT.cc.
float PO_ParallelETUCT::uctSearch | ( | const std::vector< float > & | actS, |
state_t | state, | ||
int | depth | ||
) |
Perform UCT/Monte Carlo rollout from the given state. If terminal or at depth, return some reward. Otherwise, select an action based on UCB. Simulate action to get reward and next state. Call search on next state at depth+1 to get reward return from there on. Update q value towards new value: reward + gamma * searchReturn Update visit counts for confidence bounds Return q
From "Bandit Based Monte Carlo Planning" by Kocsis and Szepesv´ari.
Definition at line 796 of file PO_ParallelETUCT.cc.
bool PO_ParallelETUCT::updateModelWithExperience | ( | const std::vector< float > & | last, |
int | act, | ||
const std::vector< float > & | curr, | ||
float | reward, | ||
bool | term | ||
) | [virtual] |
Implements Planner.
Definition at line 165 of file PO_ParallelETUCT.cc.
void PO_ParallelETUCT::updateStateActionFromModel | ( | state_t | s, |
int | a, | ||
state_info * | info | ||
) | [protected] |
Update the state_info copy of the model for the given state-action from the MDPModel
Definition at line 306 of file PO_ParallelETUCT.cc.
void PO_ParallelETUCT::updateStateActionHistoryFromModel | ( | const std::vector< float > | modState, |
int | a, | ||
StateActionInfo * | newModel | ||
) | [protected] |
Update the state_info copy of the model for the given state-action and k-action history from the MDPModel.
Definition at line 315 of file PO_ParallelETUCT.cc.
Definition at line 80 of file PO_ParallelETUCT.hh.
std::vector<float> PO_ParallelETUCT::actualPlanState |
Agent's current actual real-valued state that UCT rollouts should start from.
Definition at line 125 of file PO_ParallelETUCT.hh.
Definition at line 83 of file PO_ParallelETUCT.hh.
Agent's current discrete state that UCT rollouts should start from.
Definition at line 122 of file PO_ParallelETUCT.hh.
ExperienceFile PO_ParallelETUCT::expfile [private] |
Definition at line 323 of file PO_ParallelETUCT.hh.
std::vector<experience> PO_ParallelETUCT::expList |
List of experiences from action thread to be added into model
Definition at line 119 of file PO_ParallelETUCT.hh.
std::vector<float> PO_ParallelETUCT::featmax [private] |
Definition at line 287 of file PO_ParallelETUCT.hh.
std::vector<float> PO_ParallelETUCT::featmin [private] |
Definition at line 288 of file PO_ParallelETUCT.hh.
const float PO_ParallelETUCT::gamma [private] |
Definition at line 310 of file PO_ParallelETUCT.hh.
const int PO_ParallelETUCT::HISTORY_FL_SIZE [private] |
Definition at line 321 of file PO_ParallelETUCT.hh.
pthread_mutex_t PO_ParallelETUCT::history_mutex |
Mutex around the history of previous actions of the agent.
Definition at line 142 of file PO_ParallelETUCT.hh.
const int PO_ParallelETUCT::HISTORY_SIZE [private] |
Definition at line 320 of file PO_ParallelETUCT.hh.
Definition at line 87 of file PO_ParallelETUCT.hh.
double PO_ParallelETUCT::initTime [private] |
Definition at line 298 of file PO_ParallelETUCT.hh.
const float PO_ParallelETUCT::lambda [private] |
Definition at line 312 of file PO_ParallelETUCT.hh.
int PO_ParallelETUCT::lastUpdate [private] |
Definition at line 305 of file PO_ParallelETUCT.hh.
pthread_cond_t PO_ParallelETUCT::list_cond |
Definition at line 148 of file PO_ParallelETUCT.hh.
pthread_mutex_t PO_ParallelETUCT::list_mutex |
Mutex around the list of experiences to be added to the model.
Definition at line 140 of file PO_ParallelETUCT.hh.
const int PO_ParallelETUCT::MAX_DEPTH [private] |
Definition at line 316 of file PO_ParallelETUCT.hh.
const int PO_ParallelETUCT::MAX_ITER [private] |
Definition at line 314 of file PO_ParallelETUCT.hh.
const float PO_ParallelETUCT::MAX_TIME [private] |
Definition at line 315 of file PO_ParallelETUCT.hh.
MDPModel that we're using with planning
Definition at line 90 of file PO_ParallelETUCT.hh.
pthread_mutex_t PO_ParallelETUCT::model_mutex |
Mutex around the model.
Definition at line 138 of file PO_ParallelETUCT.hh.
Copy of our model used when updating the model, so the original can still be queried by the other threads.
Definition at line 93 of file PO_ParallelETUCT.hh.
Definition at line 79 of file PO_ParallelETUCT.hh.
pthread_t PO_ParallelETUCT::modelThread |
Thread that performs model updates.
Definition at line 115 of file PO_ParallelETUCT.hh.
Definition at line 107 of file PO_ParallelETUCT.hh.
const int PO_ParallelETUCT::modelType [private] |
Definition at line 317 of file PO_ParallelETUCT.hh.
Definition at line 84 of file PO_ParallelETUCT.hh.
int PO_ParallelETUCT::nactions [private] |
Definition at line 304 of file PO_ParallelETUCT.hh.
pthread_mutex_t PO_ParallelETUCT::nactions_mutex |
Mutex around the counter of how many actions the agent has taken.
Definition at line 134 of file PO_ParallelETUCT.hh.
int PO_ParallelETUCT::nsaved [private] |
Definition at line 303 of file PO_ParallelETUCT.hh.
int PO_ParallelETUCT::nstates [private] |
Definition at line 302 of file PO_ParallelETUCT.hh.
const int PO_ParallelETUCT::numactions [private] |
Definition at line 309 of file PO_ParallelETUCT.hh.
pthread_mutex_t PO_ParallelETUCT::plan_state_mutex |
Mutex around the agent's current state to plan from.
Definition at line 136 of file PO_ParallelETUCT.hh.
Definition at line 77 of file PO_ParallelETUCT.hh.
pthread_t PO_ParallelETUCT::planThread |
Thread that performs planning using UCT.
Definition at line 112 of file PO_ParallelETUCT.hh.
Definition at line 108 of file PO_ParallelETUCT.hh.
double PO_ParallelETUCT::planTime [private] |
Definition at line 297 of file PO_ParallelETUCT.hh.
Definition at line 78 of file PO_ParallelETUCT.hh.
int PO_ParallelETUCT::prevact [private] |
Definition at line 294 of file PO_ParallelETUCT.hh.
state_info* PO_ParallelETUCT::previnfo [private] |
Definition at line 295 of file PO_ParallelETUCT.hh.
state_t PO_ParallelETUCT::prevstate [private] |
Definition at line 293 of file PO_ParallelETUCT.hh.
Definition at line 82 of file PO_ParallelETUCT.hh.
Definition at line 86 of file PO_ParallelETUCT.hh.
const float PO_ParallelETUCT::rrange [private] |
Definition at line 311 of file PO_ParallelETUCT.hh.
std::deque<float> PO_ParallelETUCT::saHistory [private] |
Current history of previous actions.
Definition at line 291 of file PO_ParallelETUCT.hh.
bool PO_ParallelETUCT::seedMode [private] |
Definition at line 300 of file PO_ParallelETUCT.hh.
double PO_ParallelETUCT::setTime [private] |
Definition at line 299 of file PO_ParallelETUCT.hh.
Canonical pointer to agent's current state that UCT rollouts should start from.
Definition at line 128 of file PO_ParallelETUCT.hh.
std::map<state_t, state_info> PO_ParallelETUCT::statedata [private] |
Hashmap mapping state vectors to their state_info structs.
Definition at line 285 of file PO_ParallelETUCT.hh.
std::set<std::vector<float> > PO_ParallelETUCT::statespace [private] |
Set of all distinct sensations seen. Pointers to elements of this set serve as the internal representation of the environment state.
Definition at line 282 of file PO_ParallelETUCT.hh.
pthread_mutex_t PO_ParallelETUCT::statespace_mutex |
Mutex around the set of states.
Definition at line 145 of file PO_ParallelETUCT.hh.
const std::vector<int>& PO_ParallelETUCT::statesPerDim [private] |
Definition at line 318 of file PO_ParallelETUCT.hh.
Definition at line 85 of file PO_ParallelETUCT.hh.
bool PO_ParallelETUCT::timingType [private] |
Definition at line 307 of file PO_ParallelETUCT.hh.
const bool PO_ParallelETUCT::trackActual [private] |
Definition at line 319 of file PO_ParallelETUCT.hh.
Definition at line 81 of file PO_ParallelETUCT.hh.
pthread_mutex_t PO_ParallelETUCT::update_mutex |
Mutex around the last frame that the model was updated.
Definition at line 132 of file PO_ParallelETUCT.hh.