#include <PolicyIteration.hh>
Classes | |
struct | state_info |
Public Types | |
typedef const std::vector < float > * | state_t |
Public Member Functions | |
virtual int | getBestAction (const std::vector< float > &s) |
virtual void | planOnNewModel () |
PolicyIteration (int numactions, float gamma, int MAX_LOOPS, float MAX_TIME, int modelType, const std::vector< float > &featmax, const std::vector< float > &featmin, const std::vector< int > &statesPerDim, Random rng=Random()) | |
PolicyIteration (const PolicyIteration &) | |
virtual void | savePolicy (const char *filename) |
virtual void | setModel (MDPModel *model) |
virtual bool | updateModelWithExperience (const std::vector< float > &last, int act, const std::vector< float > &curr, float reward, bool term) |
virtual | ~PolicyIteration () |
Public Attributes | |
bool | ACTDEBUG |
MDPModel * | model |
bool | MODELDEBUG |
bool | PLANNERDEBUG |
bool | POLICYDEBUG |
Protected Member Functions | |
void | calculateReachableStates () |
state_t | canonicalize (const std::vector< float > &s) |
void | createPolicy () |
void | deleteInfo (state_info *info) |
std::vector< float > | discretizeState (const std::vector< float > &s) |
float | getActionValue (state_t s, state_info *info, int act) |
double | getSeconds () |
void | initNewState (state_t s) |
void | initStateInfo (state_info *info) |
void | policyEvaluation () |
bool | policyImprovement () |
void | printStates () |
void | removeUnreachableStates () |
void | updateStateActionFromModel (const std::vector< float > &state, int j) |
void | updateStatesFromModel () |
Private Attributes | |
std::vector< float > | featmax |
std::vector< float > | featmin |
const float | gamma |
const int | MAX_LOOPS |
int | MAX_STEPS |
const float | MAX_TIME |
const int | modelType |
int | nactions |
int | nstates |
const int | numactions |
double | planTime |
int | prevact |
std::vector< float > | prevstate |
std::map< state_t, state_info > | statedata |
std::set< std::vector< float > > | statespace |
const std::vector< int > & | statesPerDim |
bool | timingType |
Definition at line 12 of file PolicyIteration.hh.
typedef const std::vector<float>* PolicyIteration::state_t |
The implementation maps all sensations to a set of canonical pointers, which serve as the internal representation of environment state.
Definition at line 18 of file PolicyIteration.hh.
PolicyIteration::PolicyIteration | ( | int | numactions, |
float | gamma, | ||
int | MAX_LOOPS, | ||
float | MAX_TIME, | ||
int | modelType, | ||
const std::vector< float > & | featmax, | ||
const std::vector< float > & | featmin, | ||
const std::vector< int > & | statesPerDim, | ||
Random | rng = Random() |
||
) |
Standard constructor
numactions,numactions | in the domain |
gamma | discount factor |
maxloops | |
max | time |
rng | random |
Definition at line 8 of file PolicyIteration.cc.
PolicyIteration::PolicyIteration | ( | const PolicyIteration & | ) |
Unimplemented copy constructor: internal state cannot be simply copied.
PolicyIteration::~PolicyIteration | ( | ) | [virtual] |
Definition at line 44 of file PolicyIteration.cc.
void PolicyIteration::calculateReachableStates | ( | ) | [protected] |
Sets # of steps to visited states to 0, unvisited to 100000
Definition at line 333 of file PolicyIteration.cc.
PolicyIteration::state_t PolicyIteration::canonicalize | ( | const std::vector< float > & | s | ) | [protected] |
Produces a canonical representation of the given sensation.
s | The current sensation from the environment. |
Definition at line 248 of file PolicyIteration.cc.
void PolicyIteration::createPolicy | ( | ) | [protected] |
Do Policy Iteration
Definition at line 420 of file PolicyIteration.cc.
void PolicyIteration::deleteInfo | ( | state_info * | info | ) | [protected] |
Definition at line 404 of file PolicyIteration.cc.
std::vector< float > PolicyIteration::discretizeState | ( | const std::vector< float > & | s | ) | [protected] |
Definition at line 780 of file PolicyIteration.cc.
float PolicyIteration::getActionValue | ( | state_t | s, |
state_info * | info, | ||
int | act | ||
) | [protected] |
Definition at line 528 of file PolicyIteration.cc.
int PolicyIteration::getBestAction | ( | const std::vector< float > & | state | ) | [virtual] |
double PolicyIteration::getSeconds | ( | ) | [protected] |
Definition at line 411 of file PolicyIteration.cc.
void PolicyIteration::initNewState | ( | state_t | s | ) | [protected] |
Definition at line 70 of file PolicyIteration.cc.
void PolicyIteration::initStateInfo | ( | state_info * | info | ) | [protected] |
Definition at line 279 of file PolicyIteration.cc.
void PolicyIteration::planOnNewModel | ( | ) | [virtual] |
Implements Planner.
Definition at line 227 of file PolicyIteration.cc.
void PolicyIteration::policyEvaluation | ( | ) | [protected] |
Definition at line 652 of file PolicyIteration.cc.
bool PolicyIteration::policyImprovement | ( | ) | [protected] |
Definition at line 461 of file PolicyIteration.cc.
void PolicyIteration::printStates | ( | ) | [protected] |
Print state info for debugging.
Definition at line 303 of file PolicyIteration.cc.
void PolicyIteration::removeUnreachableStates | ( | ) | [protected] |
Deletes states that we no longer believe to be reachable.
Definition at line 361 of file PolicyIteration.cc.
void PolicyIteration::savePolicy | ( | const char * | filename | ) | [virtual] |
Reimplemented from Planner.
Definition at line 742 of file PolicyIteration.cc.
void PolicyIteration::setModel | ( | MDPModel * | model | ) | [virtual] |
Implements Planner.
Definition at line 58 of file PolicyIteration.cc.
bool PolicyIteration::updateModelWithExperience | ( | const std::vector< float > & | laststate, |
int | lastact, | ||
const std::vector< float > & | currstate, | ||
float | reward, | ||
bool | term | ||
) | [virtual] |
Use the latest experience to update state info and the model.
Implements Planner.
Definition at line 91 of file PolicyIteration.cc.
void PolicyIteration::updateStateActionFromModel | ( | const std::vector< float > & | state, |
int | a | ||
) | [protected] |
Update our state info's from the model by calling the model function
Definition at line 145 of file PolicyIteration.cc.
void PolicyIteration::updateStatesFromModel | ( | ) | [protected] |
Update our state info's from the model by calling the model function
Definition at line 162 of file PolicyIteration.cc.
Definition at line 53 of file PolicyIteration.hh.
std::vector<float> PolicyIteration::featmax [private] |
Definition at line 130 of file PolicyIteration.hh.
std::vector<float> PolicyIteration::featmin [private] |
Definition at line 131 of file PolicyIteration.hh.
const float PolicyIteration::gamma [private] |
Definition at line 144 of file PolicyIteration.hh.
const int PolicyIteration::MAX_LOOPS [private] |
Definition at line 146 of file PolicyIteration.hh.
int PolicyIteration::MAX_STEPS [private] |
Definition at line 140 of file PolicyIteration.hh.
const float PolicyIteration::MAX_TIME [private] |
Definition at line 147 of file PolicyIteration.hh.
Model that we're using
Definition at line 56 of file PolicyIteration.hh.
Definition at line 52 of file PolicyIteration.hh.
const int PolicyIteration::modelType [private] |
Definition at line 148 of file PolicyIteration.hh.
int PolicyIteration::nactions [private] |
Definition at line 138 of file PolicyIteration.hh.
int PolicyIteration::nstates [private] |
Definition at line 137 of file PolicyIteration.hh.
const int PolicyIteration::numactions [private] |
Definition at line 143 of file PolicyIteration.hh.
Definition at line 50 of file PolicyIteration.hh.
double PolicyIteration::planTime [private] |
Definition at line 136 of file PolicyIteration.hh.
Definition at line 51 of file PolicyIteration.hh.
int PolicyIteration::prevact [private] |
Definition at line 134 of file PolicyIteration.hh.
std::vector<float> PolicyIteration::prevstate [private] |
Definition at line 133 of file PolicyIteration.hh.
std::map<state_t, state_info> PolicyIteration::statedata [private] |
Hashmap mapping state vectors to their state_info structs.
Definition at line 128 of file PolicyIteration.hh.
std::set<std::vector<float> > PolicyIteration::statespace [private] |
Set of all distinct sensations seen. Pointers to elements of this set serve as the internal representation of the environment state.
Definition at line 125 of file PolicyIteration.hh.
const std::vector<int>& PolicyIteration::statesPerDim [private] |
Definition at line 149 of file PolicyIteration.hh.
bool PolicyIteration::timingType [private] |
Definition at line 141 of file PolicyIteration.hh.