Go to the documentation of this file.00001 #include "MBS.hh"
00002 #include <algorithm>
00003
00004
00005 #include <sys/time.h>
00006
00007
00008 MBS::MBS(int numactions, float gamma,
00009 int MAX_LOOPS, float MAX_TIME, int modelType,
00010 const std::vector<float> &fmax,
00011 const std::vector<float> &fmin,
00012 const std::vector<int> &n,
00013 const int k, Random newRng):
00014 k(k)
00015 {
00016
00017 vi = new ValueIteration(numactions, gamma, MAX_LOOPS, MAX_TIME, modelType,
00018 fmax, fmin, n, newRng);
00019 DELAYDEBUG = false;
00020 seedMode = false;
00021
00022 if (DELAYDEBUG) cout << "MBS delay planner with k = " << k << endl;
00023 }
00024
00025 MBS::~MBS() {
00026 delete vi;
00027 }
00028
00029 void MBS::setModel(MDPModel* m){
00030 vi->setModel(m);
00031 model = m;
00032 }
00033
00034
00036 bool MBS::updateModelWithExperience(const std::vector<float> &laststate,
00037 int lastact,
00038 const std::vector<float> &currstate,
00039 float reward, bool term){
00040
00041 if (seedMode) return false;
00042
00043
00044 if (DELAYDEBUG) cout << "add new action " << lastact << " to list" << endl;
00045 actHistory.push_back(lastact);
00046
00047 if (actHistory.size() > k){
00048 int effectiveAction = actHistory.front();
00049 actHistory.pop_front();
00050
00051
00052
00053 if (DELAYDEBUG){
00054 cout << "update with old act: " << effectiveAction << endl;
00055 cout << "from: " << laststate[0] << ", " << laststate[1];
00056 cout << " to: " << currstate[0] << ", " << currstate[1];
00057 cout << " reward: " << reward << " term: " << term << endl;
00058 }
00059
00060
00061 return vi->updateModelWithExperience(laststate, effectiveAction,
00062 currstate, reward, term);
00063 }
00064
00065 return false;
00066
00067 }
00068
00069
00071 int MBS::getBestAction(const std::vector<float> &state){
00072 std::vector<float> statePred = state;
00073
00074
00075 for (unsigned i = 0; i < actHistory.size(); i++){
00076 if (DELAYDEBUG) cout << i << " prediction: "
00077 << statePred[0] << ", " << statePred[1]
00078 << " pred for act: " << actHistory[i] << endl;
00079 StateActionInfo prediction;
00080 model->getStateActionInfo(statePred, actHistory[i], &prediction);
00081
00082
00083 std::vector<float> possibleNext;
00084 float maxProb = -1;
00085 for (std::map<std::vector<float>, float>::iterator it = prediction.transitionProbs.begin(); it != prediction.transitionProbs.end(); it++){
00086
00087 float prob = (*it).second;
00088 if (prob > maxProb){
00089 possibleNext = (*it).first;
00090 maxProb = prob;
00091 }
00092 }
00093 statePred = possibleNext;
00094
00095 }
00096 if (DELAYDEBUG) cout << "predict current state is " << statePred[0] << ", " << statePred[1] << endl;
00097
00098
00099 int act = vi->getBestAction(statePred);
00100
00101 if (DELAYDEBUG) cout << "best action is " << act << endl << endl;
00102
00103 return act;
00104
00105 }
00106
00107
00108 void MBS::planOnNewModel(){
00109 vi->planOnNewModel();
00110 }
00111
00112 void MBS::savePolicy(const char* filename){
00113 vi->savePolicy(filename);
00114 }
00115
00116 void MBS::setSeeding(bool seeding){
00117
00118 seedMode = seeding;
00119
00120 }
00121
00122
00123 void MBS::setFirst(){
00124
00125 actHistory.clear();
00126 }