Go to the documentation of this file.00001 #ifndef _POLICYITERATION_HH_
00002 #define _POLICYITERATION_HH_
00003
00004 #include <rl_common/Random.h>
00005 #include <rl_common/core.hh>
00006
00007 #include <set>
00008 #include <vector>
00009 #include <map>
00010
00011
00012 class PolicyIteration: public Planner {
00013 public:
00014
00018 typedef const std::vector<float> *state_t;
00019
00020
00028 PolicyIteration(int numactions, float gamma,
00029 int MAX_LOOPS, float MAX_TIME, int modelType,
00030 const std::vector<float> &featmax,
00031 const std::vector<float> &featmin,
00032 const std::vector<int> &statesPerDim,
00033 Random rng = Random());
00034
00037 PolicyIteration(const PolicyIteration &);
00038
00039 virtual ~PolicyIteration();
00040
00041 virtual void setModel(MDPModel* model);
00042 virtual bool updateModelWithExperience(const std::vector<float> &last,
00043 int act,
00044 const std::vector<float> &curr,
00045 float reward, bool term);
00046 virtual void planOnNewModel();
00047 virtual int getBestAction(const std::vector<float> &s);
00048 virtual void savePolicy(const char* filename);
00049
00050 bool PLANNERDEBUG;
00051 bool POLICYDEBUG;
00052 bool MODELDEBUG;
00053 bool ACTDEBUG;
00054
00056 MDPModel* model;
00057
00058
00059
00060 protected:
00061
00062
00063 struct state_info;
00064 struct model_info;
00065
00067 struct state_info {
00068 int id;
00069
00070 int stepsAway;
00071 bool fresh;
00072
00073
00074 std::vector<int> visits;
00075
00076
00077 StateActionInfo* modelInfo;
00078
00079
00080
00081
00082
00083
00084
00085 float value;
00086 int bestAction;
00087
00088 };
00089
00090
00091
00092
00093 void initStateInfo(state_info* info);
00094
00098 state_t canonicalize(const std::vector<float> &s);
00099
00100
00101 void deleteInfo(state_info* info);
00102 void initNewState(state_t s);
00103 void createPolicy();
00104 void printStates();
00105 void calculateReachableStates();
00106 void removeUnreachableStates();
00107
00108
00109 void updateStatesFromModel();
00110 void updateStateActionFromModel(const std::vector<float> &state, int j);
00111
00112 double getSeconds();
00113
00114
00115 void policyEvaluation();
00116 float getActionValue(state_t s, state_info* info, int act);
00117 bool policyImprovement();
00118 std::vector<float> discretizeState(const std::vector<float> &s);
00119
00120 private:
00121
00125 std::set<std::vector<float> > statespace;
00126
00128 std::map<state_t, state_info> statedata;
00129
00130 std::vector<float> featmax;
00131 std::vector<float> featmin;
00132
00133 std::vector<float> prevstate;
00134 int prevact;
00135
00136 double planTime;
00137 int nstates;
00138 int nactions;
00139
00140 int MAX_STEPS;
00141 bool timingType;
00142
00143 const int numactions;
00144 const float gamma;
00145
00146 const int MAX_LOOPS;
00147 const float MAX_TIME;
00148 const int modelType;
00149 const std::vector<int> &statesPerDim;
00150
00151 };
00152
00153 #endif