rl_agent: PrioritizedSweeping.hh Source File

Go to the documentation of this file.
00001 #ifndef _PRIORITIZEDSWEEPING_HH_
00002 #define _PRIORITIZEDSWEEPING_HH_
00003 
00004 #include <rl_common/Random.h>
00005 #include <rl_common/core.hh>
00006 
00007 #include <set>
00008 #include <vector>
00009 #include <map>
00010 
00011 
00012 class PrioritizedSweeping: public Planner {
00013 public:
00014 
00018   typedef const std::vector<float> *state_t;
00019 
00020 
00026   PrioritizedSweeping(int numactions, float gamma, float MAX_TIME,
00027                       bool onlyAddLastSA,  int modelType,
00028                       const std::vector<float> &featmax, 
00029                       const std::vector<float> &featmin,
00030                       Random rng = Random());
00031 
00034   PrioritizedSweeping(const PrioritizedSweeping &);
00035 
00036   virtual ~PrioritizedSweeping();
00037 
00038   virtual void setModel(MDPModel* model);
00039   virtual bool updateModelWithExperience(const std::vector<float> &last, 
00040                                          int act, 
00041                                          const std::vector<float> &curr, 
00042                                          float reward, bool term);
00043   virtual void planOnNewModel();
00044   virtual int getBestAction(const std::vector<float> &s);
00045 
00046   bool PLANNERDEBUG;
00047   bool POLICYDEBUG; //= false; //true;
00048   bool MODELDEBUG;
00049   bool ACTDEBUG;
00050   bool LISTDEBUG;
00051 
00053   MDPModel* model;
00054 
00055 
00056 
00057 protected:
00058 
00059 
00060   struct state_info;
00061   struct model_info;
00062 
00063   struct saqPair {
00064     std::vector<float> s;
00065     int a;
00066     float q;
00067   };
00068 
00070   struct state_info {
00071     int id;
00072 
00073     bool fresh;
00074 
00075     // experience data
00076     std::vector<int> visits;
00077 
00078     // data filled in from models
00079     StateActionInfo* modelInfo;
00080 
00081     //std::map<state_t, std::vector<float> > P;
00082     //std::vector<float> R;
00083     //std::vector<bool> known;
00084 
00085     // q values from policy creation
00086     std::vector<float> Q;
00087     
00088     // which states lead to this state?
00089     std::list<saqPair> pred;
00090     std::vector<int> lastUpdate;
00091 
00092   };
00093 
00094 
00095 
00096   // various helper functions that we need
00097   void initStateInfo(state_info* info);
00098   
00102   state_t canonicalize(const std::vector<float> &s);
00103 
00104   // Operational functions
00105   void deleteInfo(state_info* info);
00106   void initNewState(state_t s);
00107   void createPolicy();
00108   void printStates();
00109 
00110   // functions to update our models and get info from them
00111   void updateStatesFromModel();
00112 
00113   double getSeconds();
00114 
00115   // for prioritized sweeping
00116   void updatePriorityList(state_info* info, const std::vector<float> &next);
00117   bool saqPairMatch(saqPair a, saqPair b);
00118   float updateQValues(const std::vector<float> &state, int act);
00119   void addSAToList(const std::vector<float> &s, int act, float q);
00120   void updateStateActionFromModel(const std::vector<float> &state, int a);
00121 
00122 private:
00123 
00127   std::set<std::vector<float> > statespace;
00128 
00130   std::map<state_t, state_info> statedata;
00131 
00133   std::list< saqPair> priorityList;
00134 
00135   std::vector<float> featmax;
00136   std::vector<float> featmin;
00137 
00138   std::vector<float> prevstate;
00139   int prevact;
00140   
00141   double planTime;
00142   int nstates;
00143   int nactions; 
00144   int lastModelUpdate;
00145 
00146   int MAX_STEPS;
00147   bool timingType;
00148 
00149   const int numactions;
00150   const float gamma;
00151   const float MAX_TIME;
00152   const bool onlyAddLastSA;
00153   const int modelType;
00154 
00155 };
00156 
00157 #endif