00001 #include <sstream>
00002 #include <fstream>
00003 #include "SimulationEngine.h"
00004 #include "AlphaVectorPolicy.h"
00005 #include "CPTimer.h"
00006 #include "solverUtils.h"
00007
00008 using namespace std;
00009 using namespace momdp;
00010
00011 namespace momdp
00012 {
00013 void printTuple(map<string, string> tuple, ofstream* streamOut){
00014 *streamOut << "(";
00015 for(map<string, string>::iterator iter = tuple.begin() ; iter != tuple.end() ; )
00016 {
00017 *streamOut << iter->second;
00018 if(++iter!=tuple.end())
00019 *streamOut << ",";
00020 }
00021 *streamOut<<")" << endl;
00022 }
00023
00024 SimulationEngine::SimulationEngine()
00025 {
00026 }
00027
00028 SimulationEngine::~SimulationEngine(void)
00029 {
00030 }
00031
00032 void SimulationEngine::checkTerminal(string p, string s, vector<int> &bhout, vector<int> &fhout) {
00033 if (s.substr(0,3) == "bt2") {
00034 if (s.substr(9,2) == "FH") {
00035 int ind = atoi(p.substr(4,1).c_str());
00036 fhout[ind]++;
00037 } else if (s.substr(9,2) == "BH") {
00038 int ind = atoi(p.substr(4,1).c_str());
00039 bhout[ind]++;
00040 }
00041 }
00042 }
00043
00044 int SimulationEngine::getGreedyAction(vector<int> &bhout, vector<int> &fhout) {
00045 int greedyAction = 2;
00046 int currBest = bhout[0];
00047
00048 vector<int> temp;
00049 for (int i=0;i<(int)bhout.size();i++){
00050 temp.push_back(bhout[i]);
00051 }
00052 for (int i=0;i<(int)fhout.size();i++){
00053 temp.push_back(fhout[i]);
00054 }
00055
00056 for (int i=1; i<(int)temp.size();i++){
00057 if (temp[i]>currBest) {
00058 currBest = temp[i];
00059 greedyAction = 2+i;
00060 }
00061 }
00062
00063 return greedyAction;
00064 }
00065
00066 void SimulationEngine::setup(SharedPointer<MOMDP> problem, SharedPointer<AlphaVectorPolicy> policy, SolverParams * solverParams)
00067 {
00068 this->policy = policy;
00069 this->problem = problem;
00070 this->solverParams = solverParams;
00071 }
00072
00073 void SimulationEngine::performActionObs(belief_vector& outBelObs, int action, const BeliefWithState& belSt) const
00074 {
00075
00076 if (problem->XStates->size() == 1)
00077 {
00078
00079 outBelObs.resize(1);
00080 outBelObs.push_back(0,1.0);
00081 }
00082 else
00083 {
00084
00085 const SharedPointer<SparseMatrix> transMatX = problem->XTrans->getMatrix(action, belSt.sval);
00086 mult(outBelObs, *belSt.bvec, *transMatX);
00087 }
00088 }
00089
00090 void SimulationEngine::performActionUnobs(belief_vector& outBelUnobs, int action, const BeliefWithState& belSt, int currObsState) const
00091 {
00092 const SharedPointer<SparseMatrix> transMatY = problem->YTrans->getMatrix(action, belSt.sval, currObsState);
00093 mult(outBelUnobs, *belSt.bvec, *transMatY);
00094 }
00095
00096 void SimulationEngine::getPossibleObservations(belief_vector& possObs, int action, const BeliefWithState& belSt) const
00097 {
00098
00099 const SharedPointer<SparseMatrix> obsMat = problem->obsProb->getMatrix(action, belSt.sval);
00100 mult(possObs, *belSt.bvec, *obsMat);
00101 }
00102
00103
00104 double SimulationEngine::getReward(const BeliefWithState& belst, int action)
00105 {
00106
00107 const SharedPointer<SparseMatrix> rewMat = problem->rewards->getMatrix(belst.sval);
00108 return inner_prod_column(*rewMat, action, *belst.bvec);
00109 }
00110
00111 string SimulationEngine::toString()
00112 {
00113 std::ostringstream mystrm;
00114 mystrm << "action selector: (replaced by Policy) ";
00115 return mystrm.str();
00116 }
00117
00118 void SimulationEngine::display(belief_vector& b, ostream& s)
00119 {
00120 for(unsigned int i = 0; i < b.filled(); i++)
00121 {
00122 s << b.data[i].index << " -> " << b.data[i].value << endl;
00123 }
00124 }
00125
00126 int SimulationEngine::runFor(int iters, ofstream* streamOut, double& reward, double& expReward)
00127 {
00128 DEBUG_TRACE(cout << "runFor" << endl; );
00129 DEBUG_TRACE(cout << "iters " << iters << endl; );
00130
00131
00132 DEBUG_TRACE(cout << "startBeliefX" << endl; );
00133 DEBUG_TRACE(startBeliefX.write(cout) << endl;);
00134
00135
00136 bool enableFiling = false;
00137 if(streamOut == NULL)
00138 {
00139 enableFiling = false;
00140 }
00141 else
00142 {
00143 enableFiling = true;
00144 }
00145
00146
00147 SharedPointer<BeliefWithState> actStateCompl (new BeliefWithState());
00148 SharedPointer<BeliefWithState> actNewStateCompl (new BeliefWithState());
00149
00150
00151
00152 SharedPointer<BeliefWithState> nextBelSt;
00153 SharedPointer<BeliefWithState> currBelSt (new BeliefWithState());
00154
00155
00156
00157 DenseVector currBelX;
00158
00159
00160 if (problem->initialBeliefStval->sval == -1)
00161 {
00162
00163 const SharedPointer<DenseVector>& startBeliefX = problem->initialBeliefX;
00164 actStateCompl->sval = chooseFromDistribution(*startBeliefX);
00165 copy(currBelX, *startBeliefX);
00166 }
00167 else
00168 {
00169
00170 actStateCompl->sval = problem->initialBeliefStval->sval;
00171 }
00172
00173
00174 SharedPointer<SparseVector> startBeliefVec;
00175 if (problem->initialBeliefStval->bvec)
00176 startBeliefVec = problem->initialBeliefStval->bvec;
00177 else
00178 startBeliefVec = problem->initialBeliefYByX[actStateCompl->sval];
00179 int currUnobsState = chooseFromDistribution(*startBeliefVec);
00180 int belSize = startBeliefVec->size();
00181
00182 actStateCompl->bvec->resize(belSize);
00183 actStateCompl->bvec->push_back(currUnobsState, 1.0);
00184
00185 DEBUG_TRACE( cout << "actStateCompl sval " << actStateCompl->sval << endl; );
00186 DEBUG_TRACE( actStateCompl->bvec->write(cout) << endl; );
00187
00188 currBelSt->sval = actStateCompl->sval;
00189 copy(*currBelSt->bvec, *startBeliefVec);
00190
00191 DEBUG_TRACE( cout << "currBelSt sval " << currBelSt->sval << endl; );
00192 DEBUG_TRACE( currBelSt->bvec->write(cout) << endl; );
00193
00194 double mult=1;
00195 CPTimer lapTimer;
00196
00197
00198
00199
00200 unsigned int firstAction;
00201
00202 double gamma = problem->getDiscount();
00203
00204 int xDim = 3;
00205 vector<int> bhout(xDim,0);
00206 vector<int> fhout(xDim,0);
00207 for(int timeIter = 0; timeIter < iters; timeIter++)
00208 {
00209 DEBUG_TRACE( cout << "timeIter " << timeIter << endl; );
00210
00211 if(enableFiling && timeIter == 0)
00212 {
00213 *streamOut << ">>> begin\n";
00214 }
00215
00216
00217 int currAction;
00218
00219 if (timeIter == 0)
00220 {
00221
00222 if(solverParams->useLookahead)
00223 {
00224 if (currBelSt->sval == -1)
00225 currAction = policy->getBestActionLookAhead(currBelSt->bvec, currBelX);
00226 else
00227 currAction = policy->getBestActionLookAhead(*currBelSt);
00228 }
00229 else
00230 {
00231 if (currBelSt->sval == -1)
00232 currAction = policy->getBestAction(currBelSt->bvec, currBelX);
00233 else
00234 currAction = policy->getBestAction(*currBelSt);
00235 }
00236 }
00237 else
00238 {
00239
00240 if(solverParams->useLookahead)
00241 {
00242 currAction = policy->getBestActionLookAhead(*currBelSt);
00243
00244 }
00245 else
00246 {
00247 currAction = policy->getBestAction(*currBelSt);
00248 }
00249 }
00250
00251
00252
00253
00254
00255
00256
00257 if(currAction < 0 )
00258 {
00259 cout << "You are using a MDP Policy, please make sure you are using a MDP policy together with one-step look ahead option turned on" << endl;
00260 return -1;
00261 }
00262 if (timeIter == 0)
00263 {
00264 firstAction = currAction;
00265 }
00266
00267
00268 double currReward = getReward(*actStateCompl, currAction);
00269
00270 DEBUG_TRACE( cout << "currAction " << currAction << endl; );
00271 DEBUG_TRACE( cout << "actStateCompl sval " << actStateCompl->sval << endl; );
00272 DEBUG_TRACE( actStateCompl->bvec->write(cout) << endl; );
00273
00274 DEBUG_TRACE( cout << "currReward " << currReward << endl; );
00275 expReward += mult*currReward;
00276 mult *= gamma;
00277 reward += currReward;
00278
00279 DEBUG_TRACE( cout << "expReward " << expReward << endl; );
00280 DEBUG_TRACE( cout << "reward " << reward << endl; );
00281
00282
00283
00284 belief_vector actualActionUpdUnobs(belSize), actualActionUpdObs(problem->XStates->size()) ;
00285 performActionObs(actualActionUpdObs, currAction, *actStateCompl);
00286
00287 DEBUG_TRACE( cout << "actualActionUpdObs " << endl; );
00288 DEBUG_TRACE( actualActionUpdObs.write(cout) << endl; );
00289
00290
00291 actNewStateCompl->sval = (unsigned int) chooseFromDistribution(actualActionUpdObs, ((double)rand()/RAND_MAX));
00292
00293
00294
00295 performActionUnobs(actualActionUpdUnobs, currAction, *actStateCompl, actNewStateCompl->sval);
00296
00297 DEBUG_TRACE( cout << "actualActionUpdUnobs " << endl; );
00298 DEBUG_TRACE( actualActionUpdUnobs.write(cout) << endl; );
00299
00300
00301 int newUnobsState = chooseFromDistribution(actualActionUpdUnobs, ((double)rand()/RAND_MAX));
00302
00303 DEBUG_TRACE( cout << "newUnobsState "<< newUnobsState << endl; );
00304
00305 actNewStateCompl->bvec->resize(belSize);
00306 actNewStateCompl->bvec->push_back(newUnobsState, 1.0);
00307
00308 DEBUG_TRACE( cout << "actNewStateCompl sval "<< actNewStateCompl->sval << endl; );
00309 DEBUG_TRACE( actNewStateCompl->bvec->write(cout) << endl; );
00310
00311
00312 belief_vector obsPoss;
00313 getPossibleObservations(obsPoss, currAction, *actNewStateCompl);
00314
00315
00316 DEBUG_TRACE( cout << "obsPoss"<< endl; );
00317 DEBUG_TRACE( obsPoss.write(cout) << endl; );
00318
00319 int currObservation = chooseFromDistribution(obsPoss, ((double)rand()/RAND_MAX));
00320
00321 DEBUG_TRACE( cout << "currObservation "<< currObservation << endl; );
00322
00323
00324 map<string, string> aa = problem->getActionsSymbols(currAction);
00325
00326
00327
00328 map<string, string> bb = problem->getObservationsSymbols(currObservation);
00329 map<string, string> cc = problem->getFactoredObservedStatesSymbols(actStateCompl->sval);
00330
00331
00332 for (int ii=0;ii<xDim;ii++){
00333
00334 }
00335 for (int ii=0;ii<xDim;ii++){
00336
00337 }
00338
00339 checkTerminal(cc["pTwo_0"],bb["obs_ballDp2S"],bhout,fhout);
00340
00341 for (int ii=0;ii<xDim;ii++){
00342
00343 }
00344 for (int ii=0;ii<xDim;ii++){
00345
00346 }
00347
00348
00349
00350 if(enableFiling)
00351 {
00352
00353 if (timeIter == 0)
00354 {
00355
00356 map<string, string> obsState = problem->getFactoredObservedStatesSymbols(actStateCompl->sval);
00357 if(obsState.size()>0){
00358 streamOut->width(4);*streamOut<<left<<"X"<<":";
00359 printTuple(obsState, streamOut);
00360 }
00361
00362
00363 streamOut->width(4);*streamOut<<left<<"Y"<<":";
00364 map<string, string> unobsState = problem->getFactoredUnobservedStatesSymbols(currUnobsState);
00365 printTuple(unobsState, streamOut);
00366
00367
00368 if (currBelSt->sval == -1) {
00369 SparseVector currBelXSparse;
00370 copy(currBelXSparse, currBelX);
00371 int mostProbX = currBelXSparse.argmax();
00372 streamOut->width(4);*streamOut<<left<<"ML X"<<":";
00373 map<string, string> mostProbXState = problem->getFactoredObservedStatesSymbols(mostProbX);
00374 printTuple(mostProbXState, streamOut);
00375 }
00376
00377
00378 int mostProbY = currBelSt->bvec->argmax();
00379 double prob = currBelSt->bvec->operator()(mostProbY);
00380 streamOut->width(4);*streamOut<<left<<"ML Y"<<":";
00381 map<string, string> mostProbYState = problem->getFactoredUnobservedStatesSymbols(mostProbY);
00382 printTuple(mostProbYState, streamOut);
00383 }
00384
00385 streamOut->width(4);*streamOut<<left<<"A"<<":";
00386 map<string, string> actState = problem->getActionsSymbols(currAction);
00387 printTuple(actState, streamOut);
00388
00389 streamOut->width(4);*streamOut<<left<<"R"<<":";
00390 *streamOut << currReward<<endl;
00391 }
00392
00393
00394
00395 if (timeIter == 0) {
00396 if (currBelSt->sval == -1)
00397 nextBelSt = problem->beliefTransition->nextBelief(currBelSt->bvec, currBelX, currAction, currObservation, actNewStateCompl->sval);
00398 else
00399 nextBelSt = problem->beliefTransition->nextBelief(currBelSt, currAction, currObservation, actNewStateCompl->sval);
00400 } else
00401 nextBelSt = problem->beliefTransition->nextBelief(currBelSt, currAction, currObservation, actNewStateCompl->sval);
00402
00403
00404
00405
00406 if(enableFiling)
00407 {
00408 if(timeIter == iters - 1)
00409 {
00410 *streamOut << "terminated\n";
00411 }
00412
00413
00414 map<string, string> obsState = problem->getFactoredObservedStatesSymbols(actNewStateCompl->sval);
00415 if(obsState.size()>0){
00416 streamOut->width(4);*streamOut<<left<<"X"<<":";
00417 printTuple(obsState, streamOut);
00418 }
00419
00420
00421 streamOut->width(4);*streamOut<<left<<"Y"<<":";
00422 map<string, string> unobsState = problem->getFactoredUnobservedStatesSymbols(newUnobsState);
00423 printTuple(unobsState, streamOut);
00424
00425
00426 streamOut->width(4);*streamOut<<left<<"O"<<":";
00427 map<string, string> obs = problem->getObservationsSymbols(currObservation);
00428 printTuple(obs, streamOut);
00429
00430
00431 int mostProbY = nextBelSt->bvec->argmax();
00432 double prob = nextBelSt->bvec->operator()(mostProbY);
00433 streamOut->width(4);*streamOut<<left<<"ML Y"<<":";
00434 map<string, string> mostProbYState = problem->getFactoredUnobservedStatesSymbols(mostProbY);
00435 printTuple(mostProbYState, streamOut);
00436
00437 if(timeIter == iters - 1)
00438 {
00439
00440
00441 double lapTime = lapTimer.elapsed();
00442 *streamOut << "----- time: " << lapTime <<endl;
00443 }
00444 }
00445
00446 currUnobsState = newUnobsState;
00447 actStateCompl->sval = actNewStateCompl->sval;
00448 copy(*actStateCompl->bvec, *actNewStateCompl->bvec);
00449
00450
00451 copy(*currBelSt->bvec, *nextBelSt->bvec);
00452 currBelSt->sval = nextBelSt->sval;
00453
00454
00455 if(problem->getIsTerminalState(*actStateCompl))
00456 {
00457
00458
00459
00460
00461 if(enableFiling)
00462 *streamOut << "Reached terminal state" << endl;
00463 break;
00464 }
00465
00466 }
00467
00468
00469 return firstAction;
00470 }
00471 };