Go to the documentation of this file.00001
00011 #ifndef _ParallelETUCT_HH_
00012 #define _ParallelETUCT_HH_
00013
00014 #include <rl_common/Random.h>
00015 #include <rl_common/core.hh>
00016 #include <rl_common/ExperienceFile.hh>
00017
00018 #include "../Models/FactoredModel.hh"
00019 #include "../Models/C45Tree.hh"
00020
00021 #include <set>
00022 #include <vector>
00023 #include <map>
00024 #include <sstream>
00025 #include <deque>
00026
00028 void* parallelSearchStart(void* arg);
00029
00031 void* parallelModelLearningStart(void* arg);
00032
00034 class ParallelETUCT: public Planner {
00035 public:
00036
00037
00054 ParallelETUCT(int numactions, float gamma, float rrange, float lambda,
00055 int MAX_ITER, float MAX_TIME, int MAX_DEPTH, int modelType,
00056 const std::vector<float> &featmax, const std::vector<float> &featmin,
00057 const std::vector<int> &statesPerDim, bool trackActual, int historySize, Random rng = Random());
00058
00061 ParallelETUCT(const ParallelETUCT &);
00062
00063 virtual ~ParallelETUCT();
00064
00065 virtual void setModel(MDPModel* model);
00066 virtual bool updateModelWithExperience(const std::vector<float> &last,
00067 int act,
00068 const std::vector<float> &curr,
00069 float reward, bool term);
00070 virtual void planOnNewModel();
00071 virtual int getBestAction(const std::vector<float> &s);
00072
00073 virtual void setSeeding(bool seed);
00074 virtual void setFirst();
00075
00076 bool PLANNERDEBUG;
00077 bool POLICYDEBUG;
00078 bool MODELDEBUG;
00079 bool ACTDEBUG;
00080 bool UCTDEBUG;
00081 bool PTHREADDEBUG;
00082 bool ATHREADDEBUG;
00083 bool MTHREADDEBUG;
00084 bool TIMINGDEBUG;
00085 bool REALSTATEDEBUG;
00086 bool HISTORYDEBUG;
00087
00089 MDPModel* model;
00090
00092 MDPModel* modelcopy;
00093
00097 typedef const std::vector<float> *state_t;
00098
00100
00102
00103
00104 bool modelThreadStarted;
00105 bool planThreadStarted;
00106
00108 pthread_t planThread;
00109
00111 pthread_t modelThread;
00112
00113
00115 std::vector<experience> expList;
00116
00118 state_t discPlanState;
00119
00121 std::vector<float> actualPlanState;
00122
00124 state_t startState;
00125
00126
00128 pthread_mutex_t update_mutex;
00130 pthread_mutex_t plan_state_mutex;
00132 pthread_mutex_t model_mutex;
00134 pthread_mutex_t list_mutex;
00136 pthread_mutex_t history_mutex;
00138 pthread_mutex_t nactions_mutex;
00139
00141 pthread_mutex_t statespace_mutex;
00142
00143
00144 pthread_cond_t list_cond;
00145
00157 float uctSearch(const std::vector<float> &actS, state_t state, int depth, std::deque<float> &history);
00158
00160 std::vector<float> selectRandomState();
00161
00163 void parallelModelLearning();
00164
00166 void parallelSearch();
00167
00169 void loadPolicy(const char* filename);
00170
00172 void logValues(ofstream *of, int xmin, int xmax, int ymin, int ymax);
00173
00175 std::vector<float> addVec(const std::vector<float> &a, const std::vector<float> &b);
00176
00178 std::vector<float> subVec(const std::vector<float> &a, const std::vector<float> &b);
00179
00180 protected:
00181
00182
00183 struct state_info;
00184 struct model_info;
00185
00187 struct state_samples {
00188 std::vector<state_t> samples;
00189 };
00190
00192 struct state_info {
00193
00194
00195 std::map< std::deque<float>, StateActionInfo>* historyModel;
00196
00197
00198 std::vector<float> Q;
00199
00200
00201 int uctVisits;
00202 std::vector<int> uctActions;
00203 short unsigned int visited;
00204 short unsigned int id;
00205
00206
00207 bool needsUpdate;
00208
00209
00210 pthread_mutex_t statemodel_mutex;
00211 pthread_mutex_t stateinfo_mutex;
00212
00213 };
00214
00215
00216
00218 void initStateInfo(state_t s,state_info* info, int id);
00219
00223 state_t canonicalize(const std::vector<float> &s);
00224
00226 void deleteInfo(state_info* info);
00227
00229 void createPolicy();
00230
00232 void printStates();
00233
00235 void calculateReachableStates();
00236
00238 void removeUnreachableStates();
00239
00241 void updateStateActionFromModel(state_t s, int a, state_info* info);
00242
00244 void updateStateActionHistoryFromModel(const std::vector<float> modState, int a, StateActionInfo *newModel);
00245
00247 double getSeconds();
00248
00249
00251 void resetAndUpdateStateActions();
00252
00255 std::vector<float> simulateNextState(const std::vector<float> &actS, state_t state, state_info* info, const std::deque<float> &history, int action, float* reward, bool* term);
00256
00258 int selectUCTAction(state_info* info);
00259
00261 void canonNextStates(StateActionInfo* modelInfo);
00262
00264 void initStates();
00265
00267 void fillInState(std::vector<float>s, int depth);
00268
00269 virtual void savePolicy(const char* filename);
00270
00272 std::vector<float> discretizeState(const std::vector<float> &s);
00273
00274 private:
00275
00279 std::set<std::vector<float> > statespace;
00280
00282 std::map<state_t, state_info> statedata;
00283
00284 std::vector<float> featmax;
00285 std::vector<float> featmin;
00286
00288 std::deque<float> saHistory;
00289
00290 state_t prevstate;
00291 int prevact;
00292 state_info* previnfo;
00293
00294 double planTime;
00295 double initTime;
00296 double setTime;
00297 bool seedMode;
00298
00299 int nstates;
00300 int nsaved;
00301 int nactions;
00302 int lastUpdate;
00303
00304 bool timingType;
00305
00306 const int numactions;
00307 const float gamma;
00308 const float rrange;
00309 const float lambda;
00310
00311 const int MAX_ITER;
00312 const float MAX_TIME;
00313 const int MAX_DEPTH;
00314 const int modelType;
00315 const std::vector<int> &statesPerDim;
00316 const bool trackActual;
00317 const int HISTORY_SIZE;
00318 const int HISTORY_FL_SIZE;
00319
00320 const unsigned CLEAR_SIZE;
00321 ExperienceFile expfile;
00322 };
00323
00324 #endif