rl_agent: PolicyIteration.hh Source File

Go to the documentation of this file.
00001 #ifndef _POLICYITERATION_HH_
00002 #define _POLICYITERATION_HH_
00003 
00004 #include <rl_common/Random.h>
00005 #include <rl_common/core.hh>
00006 
00007 #include <set>
00008 #include <vector>
00009 #include <map>
00010 
00011 
00012 class PolicyIteration: public Planner {
00013 public:
00014 
00018   typedef const std::vector<float> *state_t;
00019 
00020 
00028   PolicyIteration(int numactions, float gamma,
00029                   int MAX_LOOPS, float MAX_TIME, int modelType,
00030                   const std::vector<float> &featmax, 
00031                   const std::vector<float> &featmin,
00032                    const std::vector<int> &statesPerDim,
00033                   Random rng = Random());
00034 
00037   PolicyIteration(const PolicyIteration &);
00038 
00039   virtual ~PolicyIteration();
00040 
00041   virtual void setModel(MDPModel* model);
00042   virtual bool updateModelWithExperience(const std::vector<float> &last, 
00043                                          int act, 
00044                                          const std::vector<float> &curr, 
00045                                          float reward, bool term);
00046   virtual void planOnNewModel();
00047   virtual int getBestAction(const std::vector<float> &s);
00048   virtual void savePolicy(const char* filename);
00049 
00050   bool PLANNERDEBUG;
00051   bool POLICYDEBUG; //= false; //true;
00052   bool MODELDEBUG;
00053   bool ACTDEBUG;
00054 
00056   MDPModel* model;
00057 
00058 
00059 
00060 protected:
00061 
00062 
00063   struct state_info;
00064   struct model_info;
00065 
00067   struct state_info {
00068     int id;
00069 
00070     int stepsAway;
00071     bool fresh;
00072 
00073     // experience data
00074     std::vector<int> visits;
00075 
00076     // data filled in from models
00077     StateActionInfo* modelInfo;
00078 
00079     //std::map<state_t, std::vector<float> > P;
00080     //std::vector<float> R;
00081     //std::vector<bool> known;
00082 
00083     // q values from policy creation
00084 
00085     float value;
00086     int bestAction;
00087 
00088   };
00089 
00090 
00091 
00092   // various helper functions that we need
00093   void initStateInfo(state_info* info);
00094   
00098   state_t canonicalize(const std::vector<float> &s);
00099 
00100   // Operational functions
00101   void deleteInfo(state_info* info);
00102   void initNewState(state_t s);
00103   void createPolicy();
00104   void printStates();
00105   void calculateReachableStates();
00106   void removeUnreachableStates();
00107 
00108   // functions to update our models and get info from them
00109   void updateStatesFromModel();
00110   void updateStateActionFromModel(const std::vector<float> &state, int j);
00111 
00112   double getSeconds();
00113 
00114   // for policy iter
00115   void policyEvaluation();
00116   float getActionValue(state_t s, state_info* info, int act);
00117   bool policyImprovement();
00118   std::vector<float> discretizeState(const std::vector<float> &s);
00119 
00120 private:
00121 
00125   std::set<std::vector<float> > statespace;
00126 
00128   std::map<state_t, state_info> statedata;
00129 
00130   std::vector<float> featmax;
00131   std::vector<float> featmin;
00132 
00133   std::vector<float> prevstate;
00134   int prevact;
00135 
00136   double planTime;
00137   int nstates;
00138   int nactions; 
00139   
00140   int MAX_STEPS;
00141   bool timingType;
00142 
00143   const int numactions;
00144   const float gamma;
00145 
00146   const int MAX_LOOPS;
00147   const float MAX_TIME;
00148   const int modelType;
00149   const std::vector<int> &statesPerDim;
00150 
00151 };
00152 
00153 #endif