Go to the documentation of this file.00001
00005 #ifndef _QLEARNER_HH_
00006 #define _QLEARNER_HH_
00007
00008 #include <rl_common/Random.h>
00009 #include <rl_common/core.hh>
00010
00011 #include <map>
00012 #include <set>
00013 #include <vector>
00014
00017 class QLearner: public Agent {
00018 public:
00026 QLearner(int numactions, float gamma,
00027 float initialvalue, float alpha, float epsilon,
00028 Random rng = Random());
00029
00032 QLearner(const QLearner &);
00033
00034 virtual ~QLearner();
00035
00036 virtual int first_action(const std::vector<float> &s);
00037 virtual int next_action(float r, const std::vector<float> &s);
00038 virtual void last_action(float r);
00039 virtual void setDebug(bool d);
00040 virtual void seedExp(std::vector<experience>);
00041 virtual void savePolicy(const char* filename);
00042 void loadPolicy(const char* filename);
00043
00044 void printState(const std::vector<float> &s);
00045 float getValue(std::vector<float> state);
00046
00047 std::vector<float>::iterator random_max_element(
00048 std::vector<float>::iterator start,
00049 std::vector<float>::iterator end);
00050
00051 void logValues(ofstream *of, int xmin, int xmax, int ymin, int ymax);
00052 float epsilon;
00053
00054 protected:
00058 typedef const std::vector<float> *state_t;
00059
00063 state_t canonicalize(const std::vector<float> &s);
00064
00065 private:
00069 std::set<std::vector<float> > statespace;
00070
00075 std::map<state_t, std::vector<float> > Q;
00076
00077 const int numactions;
00078 const float gamma;
00079
00080 const float initialvalue;
00081 const float alpha;
00082
00083 Random rng;
00084 float *currentq;
00085
00086 bool ACTDEBUG;
00087 };
00088
00089 #endif