appl: EvaluationEngine.cpp Source File

Go to the documentation of this file.
00001 #include <sstream>
00002 #include <fstream>
00003 #include "EvaluationEngine.h"
00004 #include "AlphaVectorPolicy.h"
00005 #include "CPTimer.h"
00006 #include "BeliefForest.h"
00007 #include "Sample.h"
00008 #include "BeliefCache.h"
00009 #include "solverUtils.h"
00010 
00011 #include "EvaluatorBeliefTreeNodeTuple.h"
00012 
00013 using namespace std;
00014 using namespace momdp;
00015 
00016 namespace momdp
00017 {
00018         EvaluationEngine::EvaluationEngine()
00019         {
00020         }
00021         void EvaluationEngine::setup(SharedPointer<MOMDP> problem, SharedPointer<AlphaVectorPolicy> policy, BeliefForest* _beliefForest,vector <BeliefCache *> *_beliefCacheSet,  Sample *_sampleEngine, SolverParams * solverParams)
00022         {
00023                 this->policy = policy;
00024                 this->problem = problem;
00025                 this->beliefForest = _beliefForest;
00026                 this->beliefCacheSet = _beliefCacheSet;
00027                 this->sampleEngine = _sampleEngine;
00028                 this->solverParams = solverParams;
00029         }
00030 
00031         EvaluationEngine::~EvaluationEngine(void)
00032         {
00033         }
00034 
00035   /*    void EvaluationEngine::performAction(belief_vector& outBelUnobs, belief_vector& outBelObs, int action, const BeliefWithState& belSt) const 
00036         {
00037                 // DEBUG_SIMSPEED_270409 skip calculating outprobs for x when there is only one possible x value
00038                 if (problem->XStates->size() == 1) 
00039                 {
00040                         // clear out the entries
00041                         outBelObs.resize(1);
00042                         outBelObs.push_back(0,1.0); 
00043                 } 
00044                 else 
00045                 {
00046                         //problem->getTransitionMatrixX(action, belSt.sval);
00047                         const SharedPointer<SparseMatrix>  transMatX = problem->XTrans->getMatrix(action, belSt.sval); 
00048                         mult(outBelObs, *belSt.bvec, *transMatX);
00049                 }
00050 
00051                 //const SparseMatrix transMatY = problem->getTransitionMatrixY(action, belSt.sval);
00052                 const SharedPointer<SparseMatrix>  transMatY = problem->XYTrans->getMatrix(action, belSt.sval);
00053                 mult(outBelUnobs, *belSt.bvec, *transMatY);
00054         }
00055   */
00056 
00057         void EvaluationEngine::getPossibleObservations(belief_vector& possObs, int action,      const BeliefWithState& belSt) const
00058         {
00059                 //const SparseMatrix obsMat = problem->getObservationMatrix(action, belSt.sval);
00060                 const SharedPointer<SparseMatrix>  obsMat = problem->obsProb->getMatrix(action, belSt.sval);
00061                 mult(possObs,  *belSt.bvec, *obsMat);
00062         }
00063 
00064 
00065         double EvaluationEngine::getReward(const BeliefWithState& belst, int action)
00066         {
00067                 //const SparseMatrix rewMat = problem->getRewardMatrix(belst.sval);
00068                 const SharedPointer<SparseMatrix>  rewMat = problem->rewards->getMatrix(belst.sval);
00069                 return inner_prod_column(*rewMat, action, *belst.bvec);
00070         }
00071 
00072         string EvaluationEngine::toString()
00073         {
00074                 std::ostringstream mystrm; 
00075                 mystrm << "action selector: (replaced by Policy) ";
00076                 return mystrm.str();
00077         }
00078 
00079         void EvaluationEngine::display(belief_vector& b, ostream& s)
00080         {
00081                 for(unsigned int i = 0; i < b.filled(); i++)
00082                 {
00083                         s << b.data[i].index << " -> " << b.data[i].value << endl;
00084                 }
00085         }
00086 
00087         BeliefTreeNode* EvaluationEngine::searchNode(SharedPointer<BeliefWithState>  belief)
00088         {
00089                 SharedPointer<belief_vector> s = belief->bvec;
00090                 state_val stateidx = belief->sval;
00091                 
00092                 int beliefIndex = (*beliefCacheSet)[stateidx]->getBeliefRowIndex(s);
00093 
00094                 if (beliefIndex==-1) 
00095                 {
00096                         return NULL;
00097                 }
00098                 else
00099                 {
00100                         // return existing node
00101                         BeliefTreeNode* temp = (*beliefCacheSet)[stateidx]->getRow(beliefIndex)->REACHABLE;
00102                         return temp;
00103                 }
00104         }
00105 
00106         int EvaluationEngine::runFor(int iters, BeliefWithState& startVec, SparseVector startBeliefX, ofstream* streamOut, double& reward, double& expReward)
00107         { 
00108                 DEBUG_TRACE(cout << "runFor" << endl; );
00109                 DEBUG_TRACE(cout << "iters " << iters << endl; );
00110                 DEBUG_TRACE(cout << "startVec sval " << startVec.sval << endl; );
00111                 DEBUG_TRACE(startVec.bvec->write(cout) << endl;);
00112                 DEBUG_TRACE(cout << "startBeliefX" << endl; );
00113                 DEBUG_TRACE(startBeliefX.write(cout) << endl;);
00114 
00115 
00116                 //    double reward = 0, expReward = 0;
00117                 bool enableFiling = false;
00118                 if(streamOut == NULL)
00119                 {
00120                         enableFiling = false;
00121                 }
00122                 else 
00123                 {
00124                         enableFiling = true;
00125                 }
00126 
00127                 BeliefTreeNode* curNode = NULL;
00128                 if(startVec.sval == -1)
00129                 {
00130                         cerr << "Please use the simulator. Random initial value for the fully observed state variable is not supported in the evaluator.\n";
00131                         exit(1);
00132                         int sampledX = chooseFromDistribution(startBeliefX);
00133                         curNode = beliefForest->sampleRootEdges[sampledX]->sampleRoot;
00134                 }
00135                 else
00136                 {
00137                         curNode = beliefForest->sampleRootEdges[startVec.sval]->sampleRoot;
00138                 }
00139 
00140                 // policy follower state
00141                 // belief with state
00142                 SharedPointer<BeliefWithState> currBelSt = curNode->s;
00143 
00144                 DEBUG_TRACE( cout << "currBelSt sval " << currBelSt->sval << endl; );
00145                 DEBUG_TRACE( currBelSt->bvec->write(cout) << endl; );
00146 
00147                 double mult=1;
00148                 CPTimer lapTimer;
00149 
00150                 unsigned int firstAction;
00151 
00152                 double gamma = problem->getDiscount();
00153                 for(int timeIter = 0; timeIter < iters; timeIter++)
00154                 { 
00155                         DEBUG_TRACE( cout << "timeIter " << timeIter << endl; );
00156 
00157                         if(enableFiling && timeIter == 0)
00158                         {
00159                                 *streamOut << ">>> begin\n";
00160                         }
00161 
00162                         // get action according to policy and current belief and state
00163                         int currAction = -1;
00164                         
00165                         sampleEngine->samplePrepare(curNode);
00166                         EvaluatorBeliefTreeNodeTuple *curNodeExtraData = (EvaluatorBeliefTreeNodeTuple *)curNode->extraData;
00167 
00168                         if(curNodeExtraData->selectedAction == -1)
00169                         {
00170                                 // note, previous versions treated the first time step differently when there is random initial value for the fully observed state variable
00171                                 // here, we assume that in such cases, the initial value for the fully observed state variable is sampled, and the policy follower knows the sampled value
00172                                 if(solverParams->useLookahead)
00173                                 {
00174                                         currAction = policy->getBestActionLookAhead(*currBelSt);
00175                                 }
00176                                 else
00177                                 {
00178                                         currAction = policy->getBestAction(*currBelSt);
00179                                 }
00180 
00181                                 curNodeExtraData->selectedAction = currAction;
00182                         } 
00183                         
00184                         currAction = curNodeExtraData->selectedAction;
00185 
00186                         if(currAction < 0 )
00187                         {
00188                                 cout << "You are using a MDP Policy, please make sure you are using a MDP policy together with one-step look ahead option turned on" << endl;
00189                                 return -1;
00190                         }
00191 
00192                         if (timeIter == 0)
00193                         {
00194                                 firstAction = currAction;
00195                         }
00196 
00197                         // this is the reward for the "actual state" system
00198                         //double currReward = getReward(actStateCompl, currAction);
00199                         double currReward = curNode->Q[currAction].immediateReward;
00200 
00201                         DEBUG_TRACE( cout << "currAction " << currAction << endl; );
00202 
00203                         DEBUG_TRACE( cout << "currReward " << currReward << endl; );
00204                         expReward += mult*currReward;
00205                         mult *= gamma;
00206                         reward += currReward;
00207 
00208                         DEBUG_TRACE( cout << "expReward " << expReward << endl; );
00209                         DEBUG_TRACE( cout << "reward " << reward << endl; );
00210 
00211                         EvaluatorAfterActionDataTuple *afterActionDataTuple = (EvaluatorAfterActionDataTuple *)curNode->Q[currAction].extraData;
00212                         
00213                 
00214                         // the actual next state for the observed variable
00215                         int newObsState = (unsigned int) chooseFromDistribution(*afterActionDataTuple->spv);
00216 
00217                         EvaluatorAfterObsDataTuple* afterObsDataTupel = (EvaluatorAfterObsDataTuple*)curNode->Q[currAction].stateOutcomes[newObsState]->extraData;
00218                         SharedPointer<obs_prob_vector> obsPoss = afterObsDataTupel->opv;
00219                         //getPossibleObservations(obsPoss, currAction, actNewStateCompl);  
00220 
00221                 
00222                         DEBUG_TRACE( cout << "obsPoss"<< endl; );
00223                         DEBUG_TRACE( obsPoss->write(cout) << endl; );
00224 
00225                         int currObservation = chooseFromDistribution(*obsPoss);
00226 
00227                         DEBUG_TRACE( cout << "currObservation "<< currObservation << endl; );
00228 
00229                         if(enableFiling)
00230                         {
00231 
00232                                 if (timeIter == 0) 
00233                                 {
00234                                         *streamOut<<"X: "<<curNode->s->sval << endl; 
00235 
00236                                         *streamOut << "belief (over Y): " ;
00237 
00238                                         std::vector<SparseVector_Entry>::const_iterator iter;
00239                                         for(iter = curNode->s->bvec->data.begin(); iter != curNode->s->bvec->data.end(); iter++)
00240                                         {
00241                                                 *streamOut << iter->index <<":"<<iter->value << " ";
00242                                         }
00243                                         *streamOut << endl;
00244                                 }
00245 
00246                                 *streamOut<<"A: "<<currAction<< endl; 
00247                                 *streamOut<<"R: "<<currReward<< endl; 
00248                                 *streamOut<<"X: "<<newObsState << endl; 
00249                                 *streamOut<<"O: "<<currObservation<< endl; 
00250 
00251                         }
00252 
00253                         // now that we have the action, state of observed variable, and observation,
00254                         // we can update the belief of unobserved variable
00255                         // this is for the tree, i.e. the "actual system"
00256                         curNode = curNode->Q[currAction].stateOutcomes[newObsState]->outcomes[currObservation]->nextState;
00257 
00258                         currBelSt = curNode->s;  
00259 
00260                         if(enableFiling)
00261                         {
00262                                 if(timeIter == iters - 1)
00263                                 {
00264                                         *streamOut << "terminated\n";
00265                                 }
00266 
00267                                 *streamOut << "belief (over Y): " ;
00268 
00269                                 std::vector<SparseVector_Entry>::const_iterator iter;
00270                                 for(iter = curNode->s->bvec->data.begin(); iter != curNode->s->bvec->data.end(); iter++)
00271                                 {
00272                                         *streamOut << iter->index <<":"<<iter->value << " ";
00273                                 }
00274                                 *streamOut << endl;
00275 
00276                                 if(timeIter == iters - 1)
00277                                 {
00278                                         //timeval timeInRunFor = getTime() - prevTime;                          
00279                                         //*streamOut << "----- time: " << timevalToSeconds(timeInRunFor) <<endl;
00280                                         double lapTime = lapTimer.elapsed();
00281                                         *streamOut << "----- time: " << lapTime <<endl;
00282 
00283                                 }
00284                         } 
00285                         
00286                         // added to stop simulation when at terminal state
00287                         //if(problem->getIsTerminalState(*currBelSt))
00288                         if(problem->getIsTerminalState(*curNode->s))
00289                         {
00290                                 // Terminal state
00291                                 // reward all 0, transfer back to it self
00292                                 // TODO Consider reward not all 0 case
00293                                 //cout <<"Terminal State!! timeIter " << timeIter << endl;
00294                                 if(enableFiling)
00295                                 *streamOut << "Reached terminal state" << endl;
00296                                 break;
00297                                 break;
00298                         }
00299 
00300 
00301 
00302                 }
00303 
00304                 return firstAction;
00305         }
00306 };