Go to the documentation of this file.00001 #ifndef _PRIORITIZEDSWEEPING_HH_
00002 #define _PRIORITIZEDSWEEPING_HH_
00003
00004 #include <rl_common/Random.h>
00005 #include <rl_common/core.hh>
00006
00007 #include <set>
00008 #include <vector>
00009 #include <map>
00010
00011
00012 class PrioritizedSweeping: public Planner {
00013 public:
00014
00018 typedef const std::vector<float> *state_t;
00019
00020
00026 PrioritizedSweeping(int numactions, float gamma, float MAX_TIME,
00027 bool onlyAddLastSA, int modelType,
00028 const std::vector<float> &featmax,
00029 const std::vector<float> &featmin,
00030 Random rng = Random());
00031
00034 PrioritizedSweeping(const PrioritizedSweeping &);
00035
00036 virtual ~PrioritizedSweeping();
00037
00038 virtual void setModel(MDPModel* model);
00039 virtual bool updateModelWithExperience(const std::vector<float> &last,
00040 int act,
00041 const std::vector<float> &curr,
00042 float reward, bool term);
00043 virtual void planOnNewModel();
00044 virtual int getBestAction(const std::vector<float> &s);
00045
00046 bool PLANNERDEBUG;
00047 bool POLICYDEBUG;
00048 bool MODELDEBUG;
00049 bool ACTDEBUG;
00050 bool LISTDEBUG;
00051
00053 MDPModel* model;
00054
00055
00056
00057 protected:
00058
00059
00060 struct state_info;
00061 struct model_info;
00062
00063 struct saqPair {
00064 std::vector<float> s;
00065 int a;
00066 float q;
00067 };
00068
00070 struct state_info {
00071 int id;
00072
00073 bool fresh;
00074
00075
00076 std::vector<int> visits;
00077
00078
00079 StateActionInfo* modelInfo;
00080
00081
00082
00083
00084
00085
00086 std::vector<float> Q;
00087
00088
00089 std::list<saqPair> pred;
00090 std::vector<int> lastUpdate;
00091
00092 };
00093
00094
00095
00096
00097 void initStateInfo(state_info* info);
00098
00102 state_t canonicalize(const std::vector<float> &s);
00103
00104
00105 void deleteInfo(state_info* info);
00106 void initNewState(state_t s);
00107 void createPolicy();
00108 void printStates();
00109
00110
00111 void updateStatesFromModel();
00112
00113 double getSeconds();
00114
00115
00116 void updatePriorityList(state_info* info, const std::vector<float> &next);
00117 bool saqPairMatch(saqPair a, saqPair b);
00118 float updateQValues(const std::vector<float> &state, int act);
00119 void addSAToList(const std::vector<float> &s, int act, float q);
00120 void updateStateActionFromModel(const std::vector<float> &state, int a);
00121
00122 private:
00123
00127 std::set<std::vector<float> > statespace;
00128
00130 std::map<state_t, state_info> statedata;
00131
00133 std::list< saqPair> priorityList;
00134
00135 std::vector<float> featmax;
00136 std::vector<float> featmin;
00137
00138 std::vector<float> prevstate;
00139 int prevact;
00140
00141 double planTime;
00142 int nstates;
00143 int nactions;
00144 int lastModelUpdate;
00145
00146 int MAX_STEPS;
00147 bool timingType;
00148
00149 const int numactions;
00150 const float gamma;
00151 const float MAX_TIME;
00152 const bool onlyAddLastSA;
00153 const int modelType;
00154
00155 };
00156
00157 #endif