#include <Dyna.hh>
Classes | |
struct | dynaExperience |
Public Member Functions | |
Dyna (int numactions, float gamma, float initialvalue, float alpha, int k, float epsilon, Random rng=Random()) | |
Dyna (const Dyna &) | |
virtual int | first_action (const std::vector< float > &s) |
int | getBestAction (const std::vector< float > &s) |
double | getSeconds () |
float | getValue (std::vector< float > state) |
virtual void | last_action (float r) |
void | logValues (ofstream *of, int xmin, int xmax, int ymin, int ymax) |
virtual int | next_action (float r, const std::vector< float > &s) |
void | printState (const std::vector< float > &s) |
std::vector< float >::iterator | random_max_element (std::vector< float >::iterator start, std::vector< float >::iterator end) |
virtual void | savePolicy (const char *filename) |
virtual void | seedExp (std::vector< experience >) |
virtual void | setDebug (bool d) |
virtual | ~Dyna () |
Public Attributes | |
float | epsilon |
Protected Types | |
typedef const std::vector < float > * | state_t |
Protected Member Functions | |
void | addExperience (float r, state_t s, bool term) |
state_t | canonicalize (const std::vector< float > &s) |
Private Attributes | |
bool | ACTDEBUG |
const float | alpha |
float * | currentq |
std::vector< dynaExperience > | experiences |
const float | gamma |
const float | initialvalue |
const int | k |
int | lastact |
state_t | laststate |
const int | numactions |
std::map< state_t, std::vector < float > > | Q |
Random | rng |
std::set< std::vector< float > > | statespace |
Agent that uses straight Q-learning, with no generalization and epsilon-greedy exploration.
typedef const std::vector<float>* Dyna::state_t [protected] |
Dyna::Dyna | ( | int | numactions, |
float | gamma, | ||
float | initialvalue, | ||
float | alpha, | ||
int | k, | ||
float | epsilon, | ||
Random | rng = Random() |
||
) |
Standard constructor
numactions | The number of possible actions |
gamma | The discount factor |
initialvalue | The initial value of each Q(s,a) |
alpha | The learning rate |
epsilon | The probability of taking a random action |
rng | Initial state of the random number generator to use |
Dyna::Dyna | ( | const Dyna & | ) |
Unimplemented copy constructor: internal state cannot be simply copied.
Dyna::~Dyna | ( | ) | [virtual] |
void Dyna::addExperience | ( | float | r, |
state_t | s, | ||
bool | term | ||
) | [protected] |
Dyna::state_t Dyna::canonicalize | ( | const std::vector< float > & | s | ) | [protected] |
int Dyna::first_action | ( | const std::vector< float > & | s | ) | [virtual] |
int Dyna::getBestAction | ( | const std::vector< float > & | s | ) |
double Dyna::getSeconds | ( | ) |
float Dyna::getValue | ( | std::vector< float > | state | ) |
void Dyna::last_action | ( | float | r | ) | [virtual] |
void Dyna::logValues | ( | ofstream * | of, |
int | xmin, | ||
int | xmax, | ||
int | ymin, | ||
int | ymax | ||
) |
int Dyna::next_action | ( | float | r, |
const std::vector< float > & | s | ||
) | [virtual] |
void Dyna::printState | ( | const std::vector< float > & | s | ) |
std::vector< float >::iterator Dyna::random_max_element | ( | std::vector< float >::iterator | start, |
std::vector< float >::iterator | end | ||
) |
void Dyna::savePolicy | ( | const char * | filename | ) | [virtual] |
void Dyna::seedExp | ( | std::vector< experience > | seeds | ) | [virtual] |
void Dyna::setDebug | ( | bool | d | ) | [virtual] |
bool Dyna::ACTDEBUG [private] |
const float Dyna::alpha [private] |
float* Dyna::currentq [private] |
float Dyna::epsilon |
std::vector<dynaExperience> Dyna::experiences [private] |
const float Dyna::gamma [private] |
const float Dyna::initialvalue [private] |
int Dyna::lastact [private] |
state_t Dyna::laststate [private] |
const int Dyna::numactions [private] |
std::set<std::vector<float> > Dyna::statespace [private] |