rl_agent: Dyna.hh Source File

Go to the documentation of this file.
00001 
00005 #ifndef _DYNA_HH_
00006 #define _DYNA_HH_
00007 
00008 #include <rl_common/Random.h>
00009 #include <rl_common/core.hh>
00010 
00011 #include <map>
00012 #include <set>
00013 #include <vector>
00014 
00017 class Dyna: public Agent {
00018 public:
00026   Dyna(int numactions, float gamma,
00027        float initialvalue, float alpha, int k, float epsilon,
00028            Random rng = Random());
00029 
00032   Dyna(const Dyna &);
00033 
00034   virtual ~Dyna();
00035 
00036   virtual int first_action(const std::vector<float> &s);
00037   virtual int next_action(float r, const std::vector<float> &s);
00038   virtual void last_action(float r);
00039   virtual void setDebug(bool d);
00040   virtual void seedExp(std::vector<experience>);
00041   virtual void savePolicy(const char* filename);
00042 
00043   
00044   int getBestAction(const std::vector<float> &s);
00045   double getSeconds();
00046 
00047   void printState(const std::vector<float> &s);
00048   float getValue(std::vector<float> state);
00049   
00050   std::vector<float>::iterator random_max_element(
00051                                                    std::vector<float>::iterator start,
00052                                                    std::vector<float>::iterator end);
00053 
00054   void logValues(ofstream *of, int xmin, int xmax, int ymin, int ymax);
00055   float epsilon;
00056 
00057 protected:
00061   typedef const std::vector<float> *state_t;
00062 
00063   struct dynaExperience {
00064     state_t s;
00065     state_t next;
00066     float r;
00067     bool term;
00068     int a;
00069   };
00070   void addExperience(float r, state_t s, bool term);
00071 
00075   state_t canonicalize(const std::vector<float> &s);
00076 
00077 private:
00081   std::set<std::vector<float> > statespace;
00082 
00087   std::map<state_t, std::vector<float> > Q;
00088 
00089   std::vector<dynaExperience> experiences;
00090 
00091   const int numactions;
00092   const float gamma;
00093 
00094   const float initialvalue;
00095   const float alpha;
00096   const int k;
00097 
00098   Random rng;
00099   float *currentq;
00100   state_t laststate;
00101   int lastact;
00102 
00103   bool ACTDEBUG;
00104 };
00105 
00106 #endif