#include <ValueIteration.hh>
Classes | |
struct | state_info |
Public Types | |
typedef const std::vector < float > * | state_t |
Public Member Functions | |
void | fillInState (std::vector< float >s, int depth) |
virtual int | getBestAction (const std::vector< float > &s) |
void | initStates () |
virtual void | planOnNewModel () |
virtual void | savePolicy (const char *filename) |
virtual void | setModel (MDPModel *model) |
virtual bool | updateModelWithExperience (const std::vector< float > &last, int act, const std::vector< float > &curr, float reward, bool term) |
ValueIteration (int numactions, float gamma, int MAX_LOOPS, float MAX_TIME, int modelType, const std::vector< float > &featmax, const std::vector< float > &featmin, const std::vector< int > &statesPerDim, Random rng=Random()) | |
ValueIteration (const ValueIteration &) | |
virtual | ~ValueIteration () |
Public Attributes | |
bool | ACTDEBUG |
MDPModel * | model |
bool | MODELDEBUG |
bool | PLANNERDEBUG |
bool | POLICYDEBUG |
Protected Member Functions | |
void | calculateReachableStates () |
state_t | canonicalize (const std::vector< float > &s) |
void | createPolicy () |
void | deleteInfo (state_info *info) |
std::vector< float > | discretizeState (const std::vector< float > &s) |
double | getSeconds () |
void | initNewState (state_t s) |
void | initStateInfo (state_info *info) |
void | printStates () |
void | removeUnreachableStates () |
void | updateStateActionFromModel (const std::vector< float > &state, int j) |
void | updateStatesFromModel () |
Private Attributes | |
std::vector< float > | featmax |
std::vector< float > | featmin |
const float | gamma |
const int | MAX_LOOPS |
int | MAX_STEPS |
const float | MAX_TIME |
const int | modelType |
int | nactions |
int | nstates |
const int | numactions |
double | planTime |
int | prevact |
std::vector< float > | prevstate |
std::map< state_t, state_info > | statedata |
std::set< std::vector< float > > | statespace |
const std::vector< int > | statesPerDim |
bool | timingType |
Planner that performs value iteration to compute a policy based on a model
Definition at line 17 of file ValueIteration.hh.
typedef const std::vector<float>* ValueIteration::state_t |
The implementation maps all sensations to a set of canonical pointers, which serve as the internal representation of environment state.
Definition at line 69 of file ValueIteration.hh.
ValueIteration::ValueIteration | ( | int | numactions, |
float | gamma, | ||
int | MAX_LOOPS, | ||
float | MAX_TIME, | ||
int | modelType, | ||
const std::vector< float > & | featmax, | ||
const std::vector< float > & | featmin, | ||
const std::vector< int > & | statesPerDim, | ||
Random | rng = Random() |
||
) |
Standard constructor
numactions | numactions in the domain |
gamma | discount factor |
MAX_LOOPS | maximum number of iterations of VI we'll run |
MAX_TIME | maximum time we'll allow VI to run |
modelType | specifies model type |
featmax | maximum value of each feature |
featmin | minimum value of each feature |
statesPerDim | # of values to discretize each feature into |
rng | random number generator |
Definition at line 12 of file ValueIteration.cc.
ValueIteration::ValueIteration | ( | const ValueIteration & | ) |
Unimplemented copy constructor: internal state cannot be simply copied.
ValueIteration::~ValueIteration | ( | ) | [virtual] |
Definition at line 49 of file ValueIteration.cc.
void ValueIteration::calculateReachableStates | ( | ) | [protected] |
Calculate which states are reachable from states the agent has actually visited.
Definition at line 590 of file ValueIteration.cc.
ValueIteration::state_t ValueIteration::canonicalize | ( | const std::vector< float > & | s | ) | [protected] |
Produces a canonical representation of the given sensation.
s | The current sensation from the environment. |
Definition at line 505 of file ValueIteration.cc.
void ValueIteration::createPolicy | ( | ) | [protected] |
Compuate a policy from a model
Definition at line 272 of file ValueIteration.cc.
void ValueIteration::deleteInfo | ( | state_info * | info | ) | [protected] |
Delete a state_info struct
Definition at line 660 of file ValueIteration.cc.
std::vector< float > ValueIteration::discretizeState | ( | const std::vector< float > & | s | ) | [protected] |
Return a discretized version of the input state.
Definition at line 707 of file ValueIteration.cc.
void ValueIteration::fillInState | ( | std::vector< float > | s, |
int | depth | ||
) |
Fill in a state based on featmin and featmax
Definition at line 82 of file ValueIteration.cc.
int ValueIteration::getBestAction | ( | const std::vector< float > & | s | ) | [virtual] |
Implements Planner.
Definition at line 233 of file ValueIteration.cc.
double ValueIteration::getSeconds | ( | ) | [protected] |
Get the current time in seconds
Definition at line 667 of file ValueIteration.cc.
void ValueIteration::initNewState | ( | state_t | s | ) | [protected] |
Initialize a new state
Definition at line 103 of file ValueIteration.cc.
void ValueIteration::initStateInfo | ( | state_info * | info | ) | [protected] |
Initialize state info struct
Definition at line 538 of file ValueIteration.cc.
void ValueIteration::initStates | ( | ) |
Initialize the states for this domain (based on featmin and featmax)
Definition at line 74 of file ValueIteration.cc.
void ValueIteration::planOnNewModel | ( | ) | [virtual] |
Implements Planner.
Definition at line 484 of file ValueIteration.cc.
void ValueIteration::printStates | ( | ) | [protected] |
Print information for each state.
Definition at line 563 of file ValueIteration.cc.
void ValueIteration::removeUnreachableStates | ( | ) | [protected] |
Remove states from set that were deemed unreachable.
Definition at line 617 of file ValueIteration.cc.
void ValueIteration::savePolicy | ( | const char * | filename | ) | [virtual] |
Reimplemented from Planner.
Definition at line 675 of file ValueIteration.cc.
void ValueIteration::setModel | ( | MDPModel * | model | ) | [virtual] |
Implements Planner.
Definition at line 64 of file ValueIteration.cc.
bool ValueIteration::updateModelWithExperience | ( | const std::vector< float > & | last, |
int | act, | ||
const std::vector< float > & | curr, | ||
float | reward, | ||
bool | term | ||
) | [virtual] |
Implements Planner.
Definition at line 123 of file ValueIteration.cc.
void ValueIteration::updateStateActionFromModel | ( | const std::vector< float > & | state, |
int | j | ||
) | [protected] |
Update a given state-actions model in its state_info struct from the MDPModel
Definition at line 176 of file ValueIteration.cc.
void ValueIteration::updateStatesFromModel | ( | ) | [protected] |
Update the tabular copy of our model from the MDPModel
Definition at line 193 of file ValueIteration.cc.
Definition at line 61 of file ValueIteration.hh.
std::vector<float> ValueIteration::featmax [private] |
Definition at line 149 of file ValueIteration.hh.
std::vector<float> ValueIteration::featmin [private] |
Definition at line 150 of file ValueIteration.hh.
const float ValueIteration::gamma [private] |
Definition at line 164 of file ValueIteration.hh.
const int ValueIteration::MAX_LOOPS [private] |
Definition at line 166 of file ValueIteration.hh.
int ValueIteration::MAX_STEPS [private] |
Definition at line 160 of file ValueIteration.hh.
const float ValueIteration::MAX_TIME [private] |
Definition at line 167 of file ValueIteration.hh.
MDPModel that we're using with planning
Definition at line 64 of file ValueIteration.hh.
Definition at line 60 of file ValueIteration.hh.
const int ValueIteration::modelType [private] |
Definition at line 168 of file ValueIteration.hh.
int ValueIteration::nactions [private] |
Definition at line 158 of file ValueIteration.hh.
int ValueIteration::nstates [private] |
Definition at line 157 of file ValueIteration.hh.
const int ValueIteration::numactions [private] |
Definition at line 163 of file ValueIteration.hh.
Definition at line 58 of file ValueIteration.hh.
double ValueIteration::planTime [private] |
Definition at line 155 of file ValueIteration.hh.
Definition at line 59 of file ValueIteration.hh.
int ValueIteration::prevact [private] |
Definition at line 153 of file ValueIteration.hh.
std::vector<float> ValueIteration::prevstate [private] |
Definition at line 152 of file ValueIteration.hh.
std::map<state_t, state_info> ValueIteration::statedata [private] |
Hashmap mapping state vectors to their state_info structs.
Definition at line 147 of file ValueIteration.hh.
std::set<std::vector<float> > ValueIteration::statespace [private] |
Set of all distinct sensations seen. Pointers to elements of this set serve as the internal representation of the environment state.
Definition at line 144 of file ValueIteration.hh.
const std::vector<int> ValueIteration::statesPerDim [private] |
Definition at line 169 of file ValueIteration.hh.
bool ValueIteration::timingType [private] |
Definition at line 161 of file ValueIteration.hh.