Go to the documentation of this file.00001
00005 #ifndef _SARSA_HH_
00006 #define _SARSA_HH_
00007
00008 #include <rl_common/Random.h>
00009 #include <rl_common/core.hh>
00010
00011 #include <map>
00012 #include <set>
00013 #include <vector>
00014
00017 class Sarsa: public Agent {
00018 public:
00026 Sarsa(int numactions, float gamma,
00027 float initialvalue, float alpha, float epsilon, float lambda,
00028 Random rng = Random());
00029
00032 Sarsa(const Sarsa &);
00033
00034 virtual ~Sarsa();
00035
00036 virtual int first_action(const std::vector<float> &s);
00037 virtual int next_action(float r, const std::vector<float> &s);
00038 virtual void last_action(float r);
00039 virtual void setDebug(bool d);
00040 virtual void seedExp(std::vector<experience>);
00041 virtual void savePolicy(const char* filename);
00042
00043 void printState(const std::vector<float> &s);
00044 float getValue(std::vector<float> state);
00045
00046 std::vector<float>::iterator random_max_element(
00047 std::vector<float>::iterator start,
00048 std::vector<float>::iterator end);
00049
00050 void logValues(ofstream *of, int xmin, int xmax, int ymin, int ymax);
00051
00052 protected:
00056 typedef const std::vector<float> *state_t;
00057
00061 state_t canonicalize(const std::vector<float> &s);
00062
00063 private:
00067 std::set<std::vector<float> > statespace;
00068
00073 std::map<state_t, std::vector<float> > Q;
00074 std::map<state_t, std::vector<float> > eligibility;
00075
00076 const int numactions;
00077 const float gamma;
00078
00079 const float initialvalue;
00080 const float alpha;
00081 const float epsilon;
00082 const float lambda;
00083
00084 Random rng;
00085 float *currentq;
00086
00087 bool ACTDEBUG;
00088 bool ELIGDEBUG;
00089 };
00090
00091 #endif