rl_agent: ParallelETUCT.hh Source File

Go to the documentation of this file.
00001 
00011 #ifndef _ParallelETUCT_HH_
00012 #define _ParallelETUCT_HH_
00013 
00014 #include <rl_common/Random.h>
00015 #include <rl_common/core.hh>
00016 #include <rl_common/ExperienceFile.hh>
00017 
00018 #include "../Models/FactoredModel.hh"
00019 #include "../Models/C45Tree.hh"
00020 
00021 #include <set>
00022 #include <vector>
00023 #include <map>
00024 #include <sstream>
00025 #include <deque>
00026 
00028 void* parallelSearchStart(void* arg);
00029 
00031 void* parallelModelLearningStart(void* arg);
00032 
00034 class ParallelETUCT: public Planner {
00035 public:
00036 
00037 
00054   ParallelETUCT(int numactions, float gamma, float rrange, float lambda,
00055                 int MAX_ITER, float MAX_TIME, int MAX_DEPTH,  int modelType,
00056                 const std::vector<float> &featmax, const std::vector<float> &featmin,
00057                  const std::vector<int> &statesPerDim, bool trackActual, int historySize, Random rng = Random());
00058   
00061   ParallelETUCT(const ParallelETUCT &);
00062 
00063   virtual ~ParallelETUCT();
00064 
00065   virtual void setModel(MDPModel* model);
00066   virtual bool updateModelWithExperience(const std::vector<float> &last, 
00067                                          int act, 
00068                                          const std::vector<float> &curr, 
00069                                          float reward, bool term);
00070   virtual void planOnNewModel();
00071   virtual int getBestAction(const std::vector<float> &s);
00072 
00073   virtual void setSeeding(bool seed);
00074   virtual void setFirst();
00075 
00076   bool PLANNERDEBUG;
00077   bool POLICYDEBUG; //= false; //true;
00078   bool MODELDEBUG;
00079   bool ACTDEBUG;
00080   bool UCTDEBUG;
00081   bool PTHREADDEBUG;
00082   bool ATHREADDEBUG;
00083   bool MTHREADDEBUG;
00084   bool TIMINGDEBUG;
00085   bool REALSTATEDEBUG;
00086   bool HISTORYDEBUG;
00087 
00089   MDPModel* model;
00090 
00092   MDPModel* modelcopy;
00093 
00097   typedef const std::vector<float> *state_t;
00098 
00100   // parallel stuff
00102 
00103   // tell if thread started
00104   bool modelThreadStarted;
00105   bool planThreadStarted;
00106 
00108   pthread_t planThread;
00109 
00111   pthread_t modelThread;
00112 
00113   // some variables that are locked
00115   std::vector<experience> expList;
00116 
00118   state_t discPlanState;
00119   
00121   std::vector<float> actualPlanState;
00122 
00124   state_t startState;
00125 
00126   // lock over simple objects
00128   pthread_mutex_t update_mutex;
00130   pthread_mutex_t plan_state_mutex;
00132   pthread_mutex_t model_mutex;
00134   pthread_mutex_t list_mutex;
00136   pthread_mutex_t history_mutex;
00138   pthread_mutex_t nactions_mutex;
00139 
00141   pthread_mutex_t statespace_mutex;
00142 
00143   // condition for when list is updated
00144   pthread_cond_t list_cond; 
00145 
00157   float uctSearch(const std::vector<float> &actS, state_t state, int depth, std::deque<float> &history);
00158 
00160   std::vector<float> selectRandomState();
00161 
00163   void parallelModelLearning();
00164 
00166   void parallelSearch();
00167 
00169   void loadPolicy(const char* filename);
00170 
00172   void logValues(ofstream *of, int xmin, int xmax, int ymin, int ymax);
00173 
00175   std::vector<float> addVec(const std::vector<float> &a, const std::vector<float> &b);
00176 
00178   std::vector<float> subVec(const std::vector<float> &a, const std::vector<float> &b);
00179 
00180 protected:
00181 
00182 
00183   struct state_info;
00184   struct model_info;
00185 
00187   struct state_samples {
00188     std::vector<state_t> samples;
00189   };
00190 
00192   struct state_info {
00193 
00194     // data filled in from models
00195     std::map< std::deque<float>, StateActionInfo>* historyModel;
00196 
00197     // q values from policy creation
00198     std::vector<float> Q;
00199 
00200     // uct experience data
00201     int uctVisits;
00202     std::vector<int> uctActions;
00203     short unsigned int visited;
00204     short unsigned int id;
00205 
00206     // needs update
00207     bool needsUpdate;
00208 
00209     // mutex for model info, samples, everything else
00210     pthread_mutex_t statemodel_mutex;
00211     pthread_mutex_t stateinfo_mutex;
00212 
00213   };
00214 
00215 
00216 
00218   void initStateInfo(state_t s,state_info* info, int id);
00219   
00223   state_t canonicalize(const std::vector<float> &s);
00224 
00226   void deleteInfo(state_info* info);
00227   
00229   void createPolicy();
00230   
00232   void printStates();
00233   
00235   void calculateReachableStates();
00236   
00238   void removeUnreachableStates();
00239 
00241   void updateStateActionFromModel(state_t s, int a, state_info* info);
00242   
00244   void updateStateActionHistoryFromModel(const std::vector<float> modState, int a, StateActionInfo *newModel);
00245 
00247   double getSeconds();
00248 
00249   // uct stuff
00251   void resetAndUpdateStateActions();
00252   
00255   std::vector<float> simulateNextState(const std::vector<float> &actS, state_t state, state_info* info, const std::deque<float> &history, int action, float* reward, bool* term);
00256   
00258   int selectUCTAction(state_info* info);
00259   
00261   void canonNextStates(StateActionInfo* modelInfo);
00262 
00264   void initStates();
00265   
00267   void fillInState(std::vector<float>s, int depth);
00268 
00269   virtual void savePolicy(const char* filename);
00270   
00272   std::vector<float> discretizeState(const std::vector<float> &s);
00273 
00274 private:
00275 
00279   std::set<std::vector<float> > statespace;
00280 
00282   std::map<state_t, state_info> statedata;
00283 
00284   std::vector<float> featmax;
00285   std::vector<float> featmin;
00286 
00288   std::deque<float> saHistory;
00289 
00290   state_t prevstate;
00291   int prevact;
00292   state_info* previnfo;
00293 
00294   double planTime;
00295   double initTime;
00296   double setTime;
00297   bool seedMode;
00298 
00299   int nstates;
00300   int nsaved;
00301   int nactions;
00302   int lastUpdate;
00303 
00304   bool timingType;
00305 
00306   const int numactions;
00307   const float gamma;
00308   const float rrange;
00309   const float lambda;
00310 
00311   const int MAX_ITER;
00312   const float MAX_TIME;
00313   const int MAX_DEPTH;
00314   const int modelType;
00315   const std::vector<int> &statesPerDim;
00316   const bool trackActual;
00317   const int HISTORY_SIZE;
00318   const int HISTORY_FL_SIZE;
00319 
00320   const unsigned CLEAR_SIZE;
00321   ExperienceFile expfile;
00322 };
00323 
00324 #endif