#include <ParallelETUCT.hh>
Classes | |
struct | state_info |
struct | state_samples |
Public Types | |
typedef const std::vector < float > * | state_t |
Public Member Functions | |
std::vector< float > | addVec (const std::vector< float > &a, const std::vector< float > &b) |
virtual int | getBestAction (const std::vector< float > &s) |
void | loadPolicy (const char *filename) |
void | logValues (ofstream *of, int xmin, int xmax, int ymin, int ymax) |
ParallelETUCT (int numactions, float gamma, float rrange, float lambda, int MAX_ITER, float MAX_TIME, int MAX_DEPTH, int modelType, const std::vector< float > &featmax, const std::vector< float > &featmin, const std::vector< int > &statesPerDim, bool trackActual, int historySize, Random rng=Random()) | |
ParallelETUCT (const ParallelETUCT &) | |
void | parallelModelLearning () |
void | parallelSearch () |
virtual void | planOnNewModel () |
std::vector< float > | selectRandomState () |
virtual void | setFirst () |
virtual void | setModel (MDPModel *model) |
virtual void | setSeeding (bool seed) |
std::vector< float > | subVec (const std::vector< float > &a, const std::vector< float > &b) |
float | uctSearch (const std::vector< float > &actS, state_t state, int depth, std::deque< float > &history) |
virtual bool | updateModelWithExperience (const std::vector< float > &last, int act, const std::vector< float > &curr, float reward, bool term) |
virtual | ~ParallelETUCT () |
Public Attributes | |
bool | ACTDEBUG |
std::vector< float > | actualPlanState |
bool | ATHREADDEBUG |
state_t | discPlanState |
std::vector< experience > | expList |
pthread_mutex_t | history_mutex |
bool | HISTORYDEBUG |
pthread_cond_t | list_cond |
pthread_mutex_t | list_mutex |
MDPModel * | model |
pthread_mutex_t | model_mutex |
MDPModel * | modelcopy |
bool | MODELDEBUG |
pthread_t | modelThread |
bool | modelThreadStarted |
bool | MTHREADDEBUG |
pthread_mutex_t | nactions_mutex |
pthread_mutex_t | plan_state_mutex |
bool | PLANNERDEBUG |
pthread_t | planThread |
bool | planThreadStarted |
bool | POLICYDEBUG |
bool | PTHREADDEBUG |
bool | REALSTATEDEBUG |
state_t | startState |
pthread_mutex_t | statespace_mutex |
bool | TIMINGDEBUG |
bool | UCTDEBUG |
pthread_mutex_t | update_mutex |
Protected Member Functions | |
void | calculateReachableStates () |
state_t | canonicalize (const std::vector< float > &s) |
void | canonNextStates (StateActionInfo *modelInfo) |
void | createPolicy () |
void | deleteInfo (state_info *info) |
std::vector< float > | discretizeState (const std::vector< float > &s) |
void | fillInState (std::vector< float >s, int depth) |
double | getSeconds () |
void | initStateInfo (state_t s, state_info *info, int id) |
void | initStates () |
void | printStates () |
void | removeUnreachableStates () |
void | resetAndUpdateStateActions () |
virtual void | savePolicy (const char *filename) |
int | selectUCTAction (state_info *info) |
std::vector< float > | simulateNextState (const std::vector< float > &actS, state_t state, state_info *info, const std::deque< float > &history, int action, float *reward, bool *term) |
void | updateStateActionFromModel (state_t s, int a, state_info *info) |
void | updateStateActionHistoryFromModel (const std::vector< float > modState, int a, StateActionInfo *newModel) |
Private Attributes | |
const unsigned | CLEAR_SIZE |
ExperienceFile | expfile |
std::vector< float > | featmax |
std::vector< float > | featmin |
const float | gamma |
const int | HISTORY_FL_SIZE |
const int | HISTORY_SIZE |
double | initTime |
const float | lambda |
int | lastUpdate |
const int | MAX_DEPTH |
const int | MAX_ITER |
const float | MAX_TIME |
const int | modelType |
int | nactions |
int | nsaved |
int | nstates |
const int | numactions |
double | planTime |
int | prevact |
state_info * | previnfo |
state_t | prevstate |
const float | rrange |
std::deque< float > | saHistory |
bool | seedMode |
double | setTime |
std::map< state_t, state_info > | statedata |
std::set< std::vector< float > > | statespace |
const std::vector< int > & | statesPerDim |
bool | timingType |
const bool | trackActual |
This class defines my real-time model-based RL architecture, which does model learning, planning, and acting on 3 separate threads. It uses a modified version of UCT for planning, which plans on a model using Monte Carlo rollouts. Unlike the original UCT, it does not separate values by tree depth, and it incorporates eligibility traces.
Definition at line 34 of file ParallelETUCT.hh.
typedef const std::vector<float>* ParallelETUCT::state_t |
The implementation maps all sensations to a set of canonical pointers, which serve as the internal representation of environment state.
Definition at line 97 of file ParallelETUCT.hh.
ParallelETUCT::ParallelETUCT | ( | int | numactions, |
float | gamma, | ||
float | rrange, | ||
float | lambda, | ||
int | MAX_ITER, | ||
float | MAX_TIME, | ||
int | MAX_DEPTH, | ||
int | modelType, | ||
const std::vector< float > & | featmax, | ||
const std::vector< float > & | featmin, | ||
const std::vector< int > & | statesPerDim, | ||
bool | trackActual, | ||
int | historySize, | ||
Random | rng = Random() |
||
) |
Standard constructor
numactions,numactions | in the domain |
gamma | discount factor |
rrange | range of one-step rewards in the domain |
lambda | for use with eligibility traces |
MAX_ITER | maximum number of MC rollouts to perform |
MAX_TIME | maximum amount of time to run Monte Carlo rollouts |
MAX_DEPTH | maximum depth to perform rollout to |
modelType | specifies model type |
featmax | maximum value of each feature |
featmin | minimum value of each feature |
statesPerDim | # of values to discretize each feature into |
trackActual | track actual real-valued states (or just discrete states) |
historySize | # of previous actions to use for delayed domains |
rng | random number generator |
Definition at line 17 of file ParallelETUCT.cc.
ParallelETUCT::ParallelETUCT | ( | const ParallelETUCT & | ) |
Unimplemented copy constructor: internal state cannot be simply copied.
ParallelETUCT::~ParallelETUCT | ( | ) | [virtual] |
Definition at line 102 of file ParallelETUCT.cc.
std::vector< float > ParallelETUCT::addVec | ( | const std::vector< float > & | a, |
const std::vector< float > & | b | ||
) |
Add two vectors together.
Definition at line 1381 of file ParallelETUCT.cc.
void ParallelETUCT::calculateReachableStates | ( | ) | [protected] |
Calculate which states are reachable from states the agent has actually visited.
ParallelETUCT::state_t ParallelETUCT::canonicalize | ( | const std::vector< float > & | s | ) | [protected] |
Produces a canonical representation of the given sensation.
s | The current sensation from the environment. |
Definition at line 649 of file ParallelETUCT.cc.
void ParallelETUCT::canonNextStates | ( | StateActionInfo * | modelInfo | ) | [protected] |
Canonicalize all the next states predicted by this model.
Definition at line 370 of file ParallelETUCT.cc.
void ParallelETUCT::createPolicy | ( | ) | [protected] |
Compuate a policy from a model
void ParallelETUCT::deleteInfo | ( | state_info * | info | ) | [protected] |
Delete a state_info struct
Definition at line 758 of file ParallelETUCT.cc.
std::vector< float > ParallelETUCT::discretizeState | ( | const std::vector< float > & | s | ) | [protected] |
Return a discretized version of the input state.
Definition at line 1355 of file ParallelETUCT.cc.
void ParallelETUCT::fillInState | ( | std::vector< float > | s, |
int | depth | ||
) | [protected] |
Fill in a state based on featmin and featmax
Definition at line 1202 of file ParallelETUCT.cc.
int ParallelETUCT::getBestAction | ( | const std::vector< float > & | state | ) | [virtual] |
double ParallelETUCT::getSeconds | ( | ) | [protected] |
Get the current time in seconds
Definition at line 766 of file ParallelETUCT.cc.
void ParallelETUCT::initStateInfo | ( | state_t | s, |
state_info * | info, | ||
int | id | ||
) | [protected] |
Initialize state info struct
Definition at line 684 of file ParallelETUCT.cc.
void ParallelETUCT::initStates | ( | ) | [protected] |
Initialize the states for this domain (based on featmin and featmax)
Definition at line 1195 of file ParallelETUCT.cc.
void ParallelETUCT::loadPolicy | ( | const char * | filename | ) |
Load a policy from a file.
Definition at line 1260 of file ParallelETUCT.cc.
void ParallelETUCT::logValues | ( | ofstream * | of, |
int | xmin, | ||
int | xmax, | ||
int | ymin, | ||
int | ymax | ||
) |
Output value function to a file
Definition at line 1327 of file ParallelETUCT.cc.
void ParallelETUCT::parallelModelLearning | ( | ) |
Start the parallel model learning thread.
Definition at line 529 of file ParallelETUCT.cc.
void ParallelETUCT::parallelSearch | ( | ) |
Start the parallel UCT planning thread.
Definition at line 1144 of file ParallelETUCT.cc.
void ParallelETUCT::planOnNewModel | ( | ) | [virtual] |
Implements Planner.
Definition at line 497 of file ParallelETUCT.cc.
void ParallelETUCT::printStates | ( | ) | [protected] |
Print information for each state.
Print state info for debugging.
Definition at line 722 of file ParallelETUCT.cc.
void ParallelETUCT::removeUnreachableStates | ( | ) | [protected] |
Remove states from set that were deemed unreachable.
void ParallelETUCT::resetAndUpdateStateActions | ( | ) | [protected] |
Reset UCT visit counts to some baseline level (to decrease our confidence in q-values because model has changed.
Definition at line 588 of file ParallelETUCT.cc.
void ParallelETUCT::savePolicy | ( | const char * | filename | ) | [protected, virtual] |
Reimplemented from Planner.
Definition at line 1219 of file ParallelETUCT.cc.
std::vector< float > ParallelETUCT::selectRandomState | ( | ) |
Select a random previously visited state.
Definition at line 1095 of file ParallelETUCT.cc.
int ParallelETUCT::selectUCTAction | ( | state_info * | info | ) | [protected] |
Select UCT action based on UCB1 algorithm.
Definition at line 958 of file ParallelETUCT.cc.
void ParallelETUCT::setFirst | ( | ) | [virtual] |
Reimplemented from Planner.
Definition at line 1405 of file ParallelETUCT.cc.
void ParallelETUCT::setModel | ( | MDPModel * | model | ) | [virtual] |
Implements Planner.
Definition at line 152 of file ParallelETUCT.cc.
void ParallelETUCT::setSeeding | ( | bool | seed | ) | [virtual] |
Reimplemented from Planner.
Definition at line 1416 of file ParallelETUCT.cc.
std::vector< float > ParallelETUCT::simulateNextState | ( | const std::vector< float > & | actualState, |
state_t | discState, | ||
state_info * | info, | ||
const std::deque< float > & | history, | ||
int | action, | ||
float * | reward, | ||
bool * | term | ||
) | [protected] |
Return a sampled state from the next state distribution of the model. Simulate the next state from the given state, action, and possibly history of past actions.
sample from next state distribution
Definition at line 1008 of file ParallelETUCT.cc.
std::vector< float > ParallelETUCT::subVec | ( | const std::vector< float > & | a, |
const std::vector< float > & | b | ||
) |
Subtract two vectors.
Definition at line 1393 of file ParallelETUCT.cc.
float ParallelETUCT::uctSearch | ( | const std::vector< float > & | actS, |
state_t | discS, | ||
int | depth, | ||
std::deque< float > & | searchHistory | ||
) |
Perform UCT/Monte Carlo rollout from the given state. If terminal or at depth, return some reward. Otherwise, select an action based on UCB. Simulate action to get reward and next state. Call search on next state at depth+1 to get reward return from there on. Update q value towards new value: reward + gamma * searchReturn Update visit counts for confidence bounds Return q
From "Bandit Based Monte Carlo Planning" by Kocsis and Szepesv´ari.
Execute the uct search from state state at depth depth. If terminal or at depth, return some reward. Otherwise, select an action based on UCB. Simulate action to get reward and next state. Call search on next state at depth+1 to get reward return from there on. Update q value towards new value: reward + gamma * searchReturn Update visit counts for confidence bounds Return q
From "Bandit Based Monte Carlo Planning" by Kocsis and Csaba.
Definition at line 785 of file ParallelETUCT.cc.
bool ParallelETUCT::updateModelWithExperience | ( | const std::vector< float > & | laststate, |
int | lastact, | ||
const std::vector< float > & | currstate, | ||
float | reward, | ||
bool | term | ||
) | [virtual] |
Use the latest experience to update state info and the model.
Implements Planner.
Definition at line 166 of file ParallelETUCT.cc.
void ParallelETUCT::updateStateActionFromModel | ( | state_t | s, |
int | a, | ||
state_info * | info | ||
) | [protected] |
Update the state_info copy of the model for the given state-action from the MDPModel
Update a single state-action from the model
Definition at line 303 of file ParallelETUCT.cc.
void ParallelETUCT::updateStateActionHistoryFromModel | ( | const std::vector< float > | modState, |
int | a, | ||
StateActionInfo * | newModel | ||
) | [protected] |
Update the state_info copy of the model for the given state-action and k-action history from the MDPModel.
Update a single state-action from the model
Definition at line 352 of file ParallelETUCT.cc.
Definition at line 79 of file ParallelETUCT.hh.
std::vector<float> ParallelETUCT::actualPlanState |
Agent's current actual real-valued state that UCT rollouts should start from.
Definition at line 121 of file ParallelETUCT.hh.
Definition at line 82 of file ParallelETUCT.hh.
const unsigned ParallelETUCT::CLEAR_SIZE [private] |
Definition at line 320 of file ParallelETUCT.hh.
Agent's current discrete state that UCT rollouts should start from.
Definition at line 118 of file ParallelETUCT.hh.
ExperienceFile ParallelETUCT::expfile [private] |
Definition at line 321 of file ParallelETUCT.hh.
std::vector<experience> ParallelETUCT::expList |
List of experiences from action thread to be added into model
Definition at line 115 of file ParallelETUCT.hh.
std::vector<float> ParallelETUCT::featmax [private] |
Definition at line 284 of file ParallelETUCT.hh.
std::vector<float> ParallelETUCT::featmin [private] |
Definition at line 285 of file ParallelETUCT.hh.
const float ParallelETUCT::gamma [private] |
Definition at line 307 of file ParallelETUCT.hh.
const int ParallelETUCT::HISTORY_FL_SIZE [private] |
Definition at line 318 of file ParallelETUCT.hh.
pthread_mutex_t ParallelETUCT::history_mutex |
Mutex around the history of previous actions of the agent.
Definition at line 136 of file ParallelETUCT.hh.
const int ParallelETUCT::HISTORY_SIZE [private] |
Definition at line 317 of file ParallelETUCT.hh.
Definition at line 86 of file ParallelETUCT.hh.
double ParallelETUCT::initTime [private] |
Definition at line 295 of file ParallelETUCT.hh.
const float ParallelETUCT::lambda [private] |
Definition at line 309 of file ParallelETUCT.hh.
int ParallelETUCT::lastUpdate [private] |
Definition at line 302 of file ParallelETUCT.hh.
pthread_cond_t ParallelETUCT::list_cond |
Definition at line 144 of file ParallelETUCT.hh.
pthread_mutex_t ParallelETUCT::list_mutex |
Mutex around the list of experiences to be added to the model.
Definition at line 134 of file ParallelETUCT.hh.
const int ParallelETUCT::MAX_DEPTH [private] |
Definition at line 313 of file ParallelETUCT.hh.
const int ParallelETUCT::MAX_ITER [private] |
Definition at line 311 of file ParallelETUCT.hh.
const float ParallelETUCT::MAX_TIME [private] |
Definition at line 312 of file ParallelETUCT.hh.
MDPModel that we're using with planning
Definition at line 89 of file ParallelETUCT.hh.
pthread_mutex_t ParallelETUCT::model_mutex |
Mutex around the model.
Definition at line 132 of file ParallelETUCT.hh.
Copy of our model used when updating the model, so the original can still be queried by the other threads.
Definition at line 92 of file ParallelETUCT.hh.
Definition at line 78 of file ParallelETUCT.hh.
pthread_t ParallelETUCT::modelThread |
Thread that performs model updates.
Definition at line 111 of file ParallelETUCT.hh.
Definition at line 104 of file ParallelETUCT.hh.
const int ParallelETUCT::modelType [private] |
Definition at line 314 of file ParallelETUCT.hh.
Definition at line 83 of file ParallelETUCT.hh.
int ParallelETUCT::nactions [private] |
Definition at line 301 of file ParallelETUCT.hh.
pthread_mutex_t ParallelETUCT::nactions_mutex |
Mutex around the counter of how many actions the agent has taken.
Definition at line 138 of file ParallelETUCT.hh.
int ParallelETUCT::nsaved [private] |
Definition at line 300 of file ParallelETUCT.hh.
int ParallelETUCT::nstates [private] |
Definition at line 299 of file ParallelETUCT.hh.
const int ParallelETUCT::numactions [private] |
Definition at line 306 of file ParallelETUCT.hh.
pthread_mutex_t ParallelETUCT::plan_state_mutex |
Mutex around the agent's current state to plan from.
Definition at line 130 of file ParallelETUCT.hh.
Definition at line 76 of file ParallelETUCT.hh.
pthread_t ParallelETUCT::planThread |
Thread that performs planning using UCT.
Definition at line 108 of file ParallelETUCT.hh.
Definition at line 105 of file ParallelETUCT.hh.
double ParallelETUCT::planTime [private] |
Definition at line 294 of file ParallelETUCT.hh.
Definition at line 77 of file ParallelETUCT.hh.
int ParallelETUCT::prevact [private] |
Definition at line 291 of file ParallelETUCT.hh.
state_info* ParallelETUCT::previnfo [private] |
Definition at line 292 of file ParallelETUCT.hh.
state_t ParallelETUCT::prevstate [private] |
Definition at line 290 of file ParallelETUCT.hh.
Definition at line 81 of file ParallelETUCT.hh.
Definition at line 85 of file ParallelETUCT.hh.
const float ParallelETUCT::rrange [private] |
Definition at line 308 of file ParallelETUCT.hh.
std::deque<float> ParallelETUCT::saHistory [private] |
Current history of previous actions.
Definition at line 288 of file ParallelETUCT.hh.
bool ParallelETUCT::seedMode [private] |
Definition at line 297 of file ParallelETUCT.hh.
double ParallelETUCT::setTime [private] |
Definition at line 296 of file ParallelETUCT.hh.
Canonical pointer to agent's current state that UCT rollouts should start from.
Definition at line 124 of file ParallelETUCT.hh.
std::map<state_t, state_info> ParallelETUCT::statedata [private] |
Hashmap mapping state vectors to their state_info structs.
Definition at line 282 of file ParallelETUCT.hh.
std::set<std::vector<float> > ParallelETUCT::statespace [private] |
Set of all distinct sensations seen. Pointers to elements of this set serve as the internal representation of the environment state.
Definition at line 279 of file ParallelETUCT.hh.
pthread_mutex_t ParallelETUCT::statespace_mutex |
Mutex around the set of states.
Definition at line 141 of file ParallelETUCT.hh.
const std::vector<int>& ParallelETUCT::statesPerDim [private] |
Definition at line 315 of file ParallelETUCT.hh.
Definition at line 84 of file ParallelETUCT.hh.
bool ParallelETUCT::timingType [private] |
Definition at line 304 of file ParallelETUCT.hh.
const bool ParallelETUCT::trackActual [private] |
Definition at line 316 of file ParallelETUCT.hh.
Definition at line 80 of file ParallelETUCT.hh.
pthread_mutex_t ParallelETUCT::update_mutex |
Mutex around the last frame that the model was updated.
Definition at line 128 of file ParallelETUCT.hh.