Go to the documentation of this file.00001
00005 #ifndef _DYNA_HH_
00006 #define _DYNA_HH_
00007
00008 #include <rl_common/Random.h>
00009 #include <rl_common/core.hh>
00010
00011 #include <map>
00012 #include <set>
00013 #include <vector>
00014
00017 class Dyna: public Agent {
00018 public:
00026 Dyna(int numactions, float gamma,
00027 float initialvalue, float alpha, int k, float epsilon,
00028 Random rng = Random());
00029
00032 Dyna(const Dyna &);
00033
00034 virtual ~Dyna();
00035
00036 virtual int first_action(const std::vector<float> &s);
00037 virtual int next_action(float r, const std::vector<float> &s);
00038 virtual void last_action(float r);
00039 virtual void setDebug(bool d);
00040 virtual void seedExp(std::vector<experience>);
00041 virtual void savePolicy(const char* filename);
00042
00043
00044 int getBestAction(const std::vector<float> &s);
00045 double getSeconds();
00046
00047 void printState(const std::vector<float> &s);
00048 float getValue(std::vector<float> state);
00049
00050 std::vector<float>::iterator random_max_element(
00051 std::vector<float>::iterator start,
00052 std::vector<float>::iterator end);
00053
00054 void logValues(ofstream *of, int xmin, int xmax, int ymin, int ymax);
00055 float epsilon;
00056
00057 protected:
00061 typedef const std::vector<float> *state_t;
00062
00063 struct dynaExperience {
00064 state_t s;
00065 state_t next;
00066 float r;
00067 bool term;
00068 int a;
00069 };
00070 void addExperience(float r, state_t s, bool term);
00071
00075 state_t canonicalize(const std::vector<float> &s);
00076
00077 private:
00081 std::set<std::vector<float> > statespace;
00082
00087 std::map<state_t, std::vector<float> > Q;
00088
00089 std::vector<dynaExperience> experiences;
00090
00091 const int numactions;
00092 const float gamma;
00093
00094 const float initialvalue;
00095 const float alpha;
00096 const int k;
00097
00098 Random rng;
00099 float *currentq;
00100 state_t laststate;
00101 int lastact;
00102
00103 bool ACTDEBUG;
00104 };
00105
00106 #endif