MBS.cc
Go to the documentation of this file.
00001 #include "MBS.hh"
00002 #include <algorithm>
00003 
00004 //#include <time.h>
00005 #include <sys/time.h>
00006 
00007 
00008 MBS::MBS(int numactions, float gamma,
00009          int MAX_LOOPS, float MAX_TIME, int modelType,
00010          const std::vector<float> &fmax, 
00011          const std::vector<float> &fmin, 
00012          const std::vector<int> &n, 
00013          const int k, Random newRng):
00014   k(k)
00015 {
00016   
00017   vi = new ValueIteration(numactions, gamma, MAX_LOOPS, MAX_TIME, modelType,
00018                           fmax, fmin, n, newRng);
00019   DELAYDEBUG = false; //true;
00020   seedMode = false;
00021 
00022   if (DELAYDEBUG) cout << "MBS delay planner with k = " << k << endl;
00023 }
00024 
00025 MBS::~MBS() {
00026   delete vi;
00027 }
00028 
00029 void MBS::setModel(MDPModel* m){
00030   vi->setModel(m);
00031   model = m;
00032 }
00033 
00034 
00036 bool MBS::updateModelWithExperience(const std::vector<float> &laststate,
00037                                     int lastact,
00038                                     const std::vector<float> &currstate,
00039                                     float reward, bool term){
00040 
00041   if (seedMode) return false;
00042   
00043   // add this action to our history list
00044   if (DELAYDEBUG) cout << "add new action " << lastact << " to list" << endl;
00045   actHistory.push_back(lastact);
00046   
00047   if (actHistory.size() > k){
00048     int effectiveAction = actHistory.front();
00049     actHistory.pop_front();
00050     
00051     // if history size is >= k
00052     // then we can add this experience
00053     if (DELAYDEBUG){
00054       cout << "update with old act: " << effectiveAction << endl;
00055       cout << "from: " << laststate[0] << ", " << laststate[1];
00056       cout << " to: " << currstate[0] << ", " << currstate[1];
00057       cout << " reward: " << reward << " term: " << term << endl;
00058     }
00059     
00060 
00061     return vi->updateModelWithExperience(laststate, effectiveAction,
00062                                          currstate, reward, term);
00063   }
00064 
00065   return false;
00066 
00067 }
00068 
00069 
00071 int MBS::getBestAction(const std::vector<float> &state){
00072   std::vector<float> statePred = state;
00073 
00074   // figure out what state we think we're in
00075   for (unsigned i = 0; i < actHistory.size(); i++){
00076     if (DELAYDEBUG) cout << i << " prediction: " 
00077                          << statePred[0] << ", " << statePred[1] 
00078                          << " pred for act: " << actHistory[i] << endl;
00079     StateActionInfo prediction;
00080     model->getStateActionInfo(statePred, actHistory[i], &prediction);
00081 
00082     // find most likely next state
00083     std::vector<float> possibleNext;
00084     float maxProb = -1;
00085     for (std::map<std::vector<float>, float>::iterator it = prediction.transitionProbs.begin(); it != prediction.transitionProbs.end(); it++){
00086       
00087       float prob = (*it).second;
00088       if (prob > maxProb){
00089         possibleNext = (*it).first;
00090         maxProb = prob;
00091       }
00092     }
00093     statePred = possibleNext;
00094 
00095   }
00096   if (DELAYDEBUG) cout << "predict current state is " << statePred[0] << ", " << statePred[1] << endl;
00097     
00098   // call get best action for that state
00099   int act = vi->getBestAction(statePred);
00100 
00101   if (DELAYDEBUG) cout << "best action is " << act << endl << endl;
00102 
00103   return act;
00104 
00105 }
00106 
00107 
00108 void MBS::planOnNewModel(){
00109   vi->planOnNewModel();
00110 }
00111 
00112 void MBS::savePolicy(const char* filename){
00113   vi->savePolicy(filename);
00114 }
00115 
00116 void MBS::setSeeding(bool seeding){
00117 
00118   seedMode = seeding;
00119 
00120 }
00121 
00122 
00123 void MBS::setFirst(){ 
00124   // first action, reset history vector
00125   actHistory.clear();
00126 }


rl_agent
Author(s): Todd Hester
autogenerated on Thu Jun 6 2019 22:00:13