Go to the documentation of this file.00001
00006 #ifndef _VALUEITERATION_HH_
00007 #define _VALUEITERATION_HH_
00008
00009 #include <rl_common/Random.h>
00010 #include <rl_common/core.hh>
00011
00012 #include <set>
00013 #include <vector>
00014 #include <map>
00015
00017 class ValueIteration: public Planner {
00018 public:
00019
00031 ValueIteration(int numactions, float gamma,
00032 int MAX_LOOPS, float MAX_TIME, int modelType,
00033 const std::vector<float> &featmax,
00034 const std::vector<float> &featmin, const std::vector<int> &statesPerDim,
00035 Random rng = Random());
00036
00039 ValueIteration(const ValueIteration &);
00040
00041 virtual ~ValueIteration();
00042
00043 virtual void setModel(MDPModel* model);
00044 virtual bool updateModelWithExperience(const std::vector<float> &last,
00045 int act,
00046 const std::vector<float> &curr,
00047 float reward, bool term);
00048 virtual void planOnNewModel();
00049 virtual int getBestAction(const std::vector<float> &s);
00050 virtual void savePolicy(const char* filename);
00051
00053 void initStates();
00054
00056 void fillInState(std::vector<float>s, int depth);
00057
00058 bool PLANNERDEBUG;
00059 bool POLICYDEBUG;
00060 bool MODELDEBUG;
00061 bool ACTDEBUG;
00062
00064 MDPModel* model;
00065
00069 typedef const std::vector<float> *state_t;
00070
00071
00072 protected:
00073
00074
00075 struct state_info;
00076 struct model_info;
00077
00079 struct state_info {
00080
00081 int id;
00082
00083 int stepsAway;
00084 bool fresh;
00085
00086
00087 std::vector<int> visits;
00088
00089
00090 StateActionInfo* modelInfo;
00091
00092
00093
00094
00095
00096
00097 std::vector<float> Q;
00098
00099 };
00100
00102 void initStateInfo(state_info* info);
00103
00107 state_t canonicalize(const std::vector<float> &s);
00108
00110 void deleteInfo(state_info* info);
00111
00113 void initNewState(state_t s);
00114
00116 void createPolicy();
00117
00119 void printStates();
00120
00122 void calculateReachableStates();
00123
00125 void removeUnreachableStates();
00126
00128 void updateStatesFromModel();
00129
00131 void updateStateActionFromModel(const std::vector<float> &state, int j);
00132
00134 double getSeconds();
00135
00137 std::vector<float> discretizeState(const std::vector<float> &s);
00138
00139 private:
00140
00144 std::set<std::vector<float> > statespace;
00145
00147 std::map<state_t, state_info> statedata;
00148
00149 std::vector<float> featmax;
00150 std::vector<float> featmin;
00151
00152 std::vector<float> prevstate;
00153 int prevact;
00154
00155 double planTime;
00156
00157 int nstates;
00158 int nactions;
00159
00160 int MAX_STEPS;
00161 bool timingType;
00162
00163 const int numactions;
00164 const float gamma;
00165
00166 const int MAX_LOOPS;
00167 const float MAX_TIME;
00168 const int modelType;
00169 const std::vector<int> statesPerDim;
00170
00171 };
00172
00173
00174 #endif