Go to the documentation of this file.00001 #include <sstream>
00002 #include <fstream>
00003 #include "EvaluationEngine.h"
00004 #include "AlphaVectorPolicy.h"
00005 #include "CPTimer.h"
00006 #include "BeliefForest.h"
00007 #include "Sample.h"
00008 #include "BeliefCache.h"
00009 #include "solverUtils.h"
00010
00011 #include "EvaluatorBeliefTreeNodeTuple.h"
00012
00013 using namespace std;
00014 using namespace momdp;
00015
00016 namespace momdp
00017 {
00018 EvaluationEngine::EvaluationEngine()
00019 {
00020 }
00021 void EvaluationEngine::setup(SharedPointer<MOMDP> problem, SharedPointer<AlphaVectorPolicy> policy, BeliefForest* _beliefForest,vector <BeliefCache *> *_beliefCacheSet, Sample *_sampleEngine, SolverParams * solverParams)
00022 {
00023 this->policy = policy;
00024 this->problem = problem;
00025 this->beliefForest = _beliefForest;
00026 this->beliefCacheSet = _beliefCacheSet;
00027 this->sampleEngine = _sampleEngine;
00028 this->solverParams = solverParams;
00029 }
00030
00031 EvaluationEngine::~EvaluationEngine(void)
00032 {
00033 }
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057 void EvaluationEngine::getPossibleObservations(belief_vector& possObs, int action, const BeliefWithState& belSt) const
00058 {
00059
00060 const SharedPointer<SparseMatrix> obsMat = problem->obsProb->getMatrix(action, belSt.sval);
00061 mult(possObs, *belSt.bvec, *obsMat);
00062 }
00063
00064
00065 double EvaluationEngine::getReward(const BeliefWithState& belst, int action)
00066 {
00067
00068 const SharedPointer<SparseMatrix> rewMat = problem->rewards->getMatrix(belst.sval);
00069 return inner_prod_column(*rewMat, action, *belst.bvec);
00070 }
00071
00072 string EvaluationEngine::toString()
00073 {
00074 std::ostringstream mystrm;
00075 mystrm << "action selector: (replaced by Policy) ";
00076 return mystrm.str();
00077 }
00078
00079 void EvaluationEngine::display(belief_vector& b, ostream& s)
00080 {
00081 for(unsigned int i = 0; i < b.filled(); i++)
00082 {
00083 s << b.data[i].index << " -> " << b.data[i].value << endl;
00084 }
00085 }
00086
00087 BeliefTreeNode* EvaluationEngine::searchNode(SharedPointer<BeliefWithState> belief)
00088 {
00089 SharedPointer<belief_vector> s = belief->bvec;
00090 state_val stateidx = belief->sval;
00091
00092 int beliefIndex = (*beliefCacheSet)[stateidx]->getBeliefRowIndex(s);
00093
00094 if (beliefIndex==-1)
00095 {
00096 return NULL;
00097 }
00098 else
00099 {
00100
00101 BeliefTreeNode* temp = (*beliefCacheSet)[stateidx]->getRow(beliefIndex)->REACHABLE;
00102 return temp;
00103 }
00104 }
00105
00106 int EvaluationEngine::runFor(int iters, BeliefWithState& startVec, SparseVector startBeliefX, ofstream* streamOut, double& reward, double& expReward)
00107 {
00108 DEBUG_TRACE(cout << "runFor" << endl; );
00109 DEBUG_TRACE(cout << "iters " << iters << endl; );
00110 DEBUG_TRACE(cout << "startVec sval " << startVec.sval << endl; );
00111 DEBUG_TRACE(startVec.bvec->write(cout) << endl;);
00112 DEBUG_TRACE(cout << "startBeliefX" << endl; );
00113 DEBUG_TRACE(startBeliefX.write(cout) << endl;);
00114
00115
00116
00117 bool enableFiling = false;
00118 if(streamOut == NULL)
00119 {
00120 enableFiling = false;
00121 }
00122 else
00123 {
00124 enableFiling = true;
00125 }
00126
00127 BeliefTreeNode* curNode = NULL;
00128 if(startVec.sval == -1)
00129 {
00130 cerr << "Please use the simulator. Random initial value for the fully observed state variable is not supported in the evaluator.\n";
00131 exit(1);
00132 int sampledX = chooseFromDistribution(startBeliefX);
00133 curNode = beliefForest->sampleRootEdges[sampledX]->sampleRoot;
00134 }
00135 else
00136 {
00137 curNode = beliefForest->sampleRootEdges[startVec.sval]->sampleRoot;
00138 }
00139
00140
00141
00142 SharedPointer<BeliefWithState> currBelSt = curNode->s;
00143
00144 DEBUG_TRACE( cout << "currBelSt sval " << currBelSt->sval << endl; );
00145 DEBUG_TRACE( currBelSt->bvec->write(cout) << endl; );
00146
00147 double mult=1;
00148 CPTimer lapTimer;
00149
00150 unsigned int firstAction;
00151
00152 double gamma = problem->getDiscount();
00153 for(int timeIter = 0; timeIter < iters; timeIter++)
00154 {
00155 DEBUG_TRACE( cout << "timeIter " << timeIter << endl; );
00156
00157 if(enableFiling && timeIter == 0)
00158 {
00159 *streamOut << ">>> begin\n";
00160 }
00161
00162
00163 int currAction = -1;
00164
00165 sampleEngine->samplePrepare(curNode);
00166 EvaluatorBeliefTreeNodeTuple *curNodeExtraData = (EvaluatorBeliefTreeNodeTuple *)curNode->extraData;
00167
00168 if(curNodeExtraData->selectedAction == -1)
00169 {
00170
00171
00172 if(solverParams->useLookahead)
00173 {
00174 currAction = policy->getBestActionLookAhead(*currBelSt);
00175 }
00176 else
00177 {
00178 currAction = policy->getBestAction(*currBelSt);
00179 }
00180
00181 curNodeExtraData->selectedAction = currAction;
00182 }
00183
00184 currAction = curNodeExtraData->selectedAction;
00185
00186 if(currAction < 0 )
00187 {
00188 cout << "You are using a MDP Policy, please make sure you are using a MDP policy together with one-step look ahead option turned on" << endl;
00189 return -1;
00190 }
00191
00192 if (timeIter == 0)
00193 {
00194 firstAction = currAction;
00195 }
00196
00197
00198
00199 double currReward = curNode->Q[currAction].immediateReward;
00200
00201 DEBUG_TRACE( cout << "currAction " << currAction << endl; );
00202
00203 DEBUG_TRACE( cout << "currReward " << currReward << endl; );
00204 expReward += mult*currReward;
00205 mult *= gamma;
00206 reward += currReward;
00207
00208 DEBUG_TRACE( cout << "expReward " << expReward << endl; );
00209 DEBUG_TRACE( cout << "reward " << reward << endl; );
00210
00211 EvaluatorAfterActionDataTuple *afterActionDataTuple = (EvaluatorAfterActionDataTuple *)curNode->Q[currAction].extraData;
00212
00213
00214
00215 int newObsState = (unsigned int) chooseFromDistribution(*afterActionDataTuple->spv);
00216
00217 EvaluatorAfterObsDataTuple* afterObsDataTupel = (EvaluatorAfterObsDataTuple*)curNode->Q[currAction].stateOutcomes[newObsState]->extraData;
00218 SharedPointer<obs_prob_vector> obsPoss = afterObsDataTupel->opv;
00219
00220
00221
00222 DEBUG_TRACE( cout << "obsPoss"<< endl; );
00223 DEBUG_TRACE( obsPoss->write(cout) << endl; );
00224
00225 int currObservation = chooseFromDistribution(*obsPoss);
00226
00227 DEBUG_TRACE( cout << "currObservation "<< currObservation << endl; );
00228
00229 if(enableFiling)
00230 {
00231
00232 if (timeIter == 0)
00233 {
00234 *streamOut<<"X: "<<curNode->s->sval << endl;
00235
00236 *streamOut << "belief (over Y): " ;
00237
00238 std::vector<SparseVector_Entry>::const_iterator iter;
00239 for(iter = curNode->s->bvec->data.begin(); iter != curNode->s->bvec->data.end(); iter++)
00240 {
00241 *streamOut << iter->index <<":"<<iter->value << " ";
00242 }
00243 *streamOut << endl;
00244 }
00245
00246 *streamOut<<"A: "<<currAction<< endl;
00247 *streamOut<<"R: "<<currReward<< endl;
00248 *streamOut<<"X: "<<newObsState << endl;
00249 *streamOut<<"O: "<<currObservation<< endl;
00250
00251 }
00252
00253
00254
00255
00256 curNode = curNode->Q[currAction].stateOutcomes[newObsState]->outcomes[currObservation]->nextState;
00257
00258 currBelSt = curNode->s;
00259
00260 if(enableFiling)
00261 {
00262 if(timeIter == iters - 1)
00263 {
00264 *streamOut << "terminated\n";
00265 }
00266
00267 *streamOut << "belief (over Y): " ;
00268
00269 std::vector<SparseVector_Entry>::const_iterator iter;
00270 for(iter = curNode->s->bvec->data.begin(); iter != curNode->s->bvec->data.end(); iter++)
00271 {
00272 *streamOut << iter->index <<":"<<iter->value << " ";
00273 }
00274 *streamOut << endl;
00275
00276 if(timeIter == iters - 1)
00277 {
00278
00279
00280 double lapTime = lapTimer.elapsed();
00281 *streamOut << "----- time: " << lapTime <<endl;
00282
00283 }
00284 }
00285
00286
00287
00288 if(problem->getIsTerminalState(*curNode->s))
00289 {
00290
00291
00292
00293
00294 if(enableFiling)
00295 *streamOut << "Reached terminal state" << endl;
00296 break;
00297 break;
00298 }
00299
00300
00301
00302 }
00303
00304 return firstAction;
00305 }
00306 };