00001 00006 #ifndef _RMAXMODEL_HH_ 00007 #define _RMAXMODEL_HH_ 00008 00009 #include <rl_common/Random.h> 00010 #include <rl_common/core.hh> 00011 #include <vector> 00012 #include <map> 00013 #include <set> 00014 00015 00017 class RMaxModel: public MDPModel { 00018 00019 public: 00020 00026 RMaxModel(int m, int nact, Random rng); 00027 00029 RMaxModel(const RMaxModel&); 00030 00031 virtual ~RMaxModel(); 00032 virtual RMaxModel* getCopy(); 00033 00034 virtual bool updateWithExperiences(std::vector<experience> &instances); 00035 virtual bool updateWithExperience(experience &e); 00036 virtual float getStateActionInfo(const std::vector<float> &state, int act, StateActionInfo* retval); 00037 00038 00039 // structs to be defined 00040 struct state_info; 00041 00042 00047 struct state_info { 00048 int id; 00049 00050 // model data (visit counts, outcome counts, reward sums, known) 00051 std::vector<int> visits; 00052 00053 std::map< std::vector<float> , std::vector<int> > outCounts; 00054 std::vector<float> Rsum; 00055 std::vector<int> terminations; 00056 00057 std::vector<bool> known; 00058 00059 }; 00060 00061 protected: 00062 typedef const std::vector<float> *state_t; 00063 00064 // various helper functions that we need 00065 00067 void initStateInfo(state_info* info); 00068 00070 state_t canonicalize(const std::vector<float> &s); 00071 00073 void checkTransitionCountSize(std::vector<int> *transCounts); 00074 00076 void initNewState(state_t s); 00077 00078 00079 00080 private: 00081 00085 std::set<std::vector<float> > statespace; 00086 00088 std::map<state_t, state_info> statedata; 00089 00090 int nstates; 00091 00092 int M; 00093 int nact; 00094 Random rng; 00095 00096 bool RMAX_DEBUG; 00097 00098 }; 00099 00100 00101 00102 #endif