MOMDP.cpp
Go to the documentation of this file.
00001 #include "MOMDP.h"
00002 #include <string>
00003 #include "FactoredPomdp.h"
00004 #include "BeliefTransitionMOMDP.h"
00005 
00006 using namespace std;
00007 
00008 
00009 MOMDP::MOMDP(void) : initialBeliefY(new SparseVector()), initialBeliefStval(new BeliefWithState()), initialBeliefX(new DenseVector()),
00010                      hasIntraslice(false)
00011 {
00012     beliefTransition = new BeliefTransitionMOMDP();
00013     beliefTransition->problem =dynamic_pointer_cast<MObject>( SharedPointer<MOMDP>(this));
00014     discount = 0.95;
00015 
00016 
00017     XStates = new States();
00018     YStates = new States();
00019     actions = new Actions();
00020     observations = new Observations();
00021 
00022     XTrans = new StateTransitionX();
00023     YTrans = NULL;
00024     // XYTrans = new StateTransitionXY();
00025     obsProb = new ObservationProbabilities();
00026     rewards = new Rewards();
00027 
00028     pomdpR = NULL; pomdpT = NULL; pomdpTtr = NULL; pomdpO = NULL;
00029 }
00030 
00031 bool MOMDP::hasPOMDPMatrices()
00032 {
00033     return (pomdpR!=NULL && pomdpT !=NULL && pomdpTtr != NULL && pomdpO != NULL);
00034 }
00035 
00036 void MOMDP::deletePOMDPMatrices()
00037 {
00038     /*( if  (pomdpR != NULL)
00039       {
00040       delete pomdpR;
00041       }
00042       if  (pomdpT != NULL)
00043       {
00044       deleteMatrixVector(pomdpT);
00045       }
00046       if  (pomdpTtr != NULL)
00047       {
00048       deleteMatrixVector(pomdpTtr);
00049       }
00050       if  (pomdpO != NULL)
00051       {
00052       deleteMatrixVector(pomdpO);
00053       }*/
00054 }
00055 
00056 void MOMDP::deleteMatrixVector(vector<SharedPointer<SparseMatrix> > *m)
00057 {
00058     /*FOREACH_NOCONST(SharedPointer<SparseMatrix> , ppMatrix, *m)
00059       {
00060       SharedPointer<SparseMatrix>  curMatrix = *ppMatrix;
00061       if(curMatrix !=NULL)
00062       {
00063       delete curMatrix;
00064       }
00065       }*/
00066     delete m;
00067 }
00068 
00069 MOMDP::~MOMDP(void)
00070 {
00071     delete beliefTransition;
00072 }
00073 
00074 obsState_prob_vector& MOMDP::getObsStateProbVector(obsState_prob_vector& result, BeliefWithState& b, int a)
00075 {
00076     int Xc = b.sval; // currrent value for observed state variable
00077     mult( result, *b.bvec, *this->XTrans->getMatrix(a, Xc));
00078 
00079     return result;
00080 }
00081 
00082 
00083 string MOMDP::ToString()
00084 {
00085     stringstream sb ;
00086     sb << "discount : " << discount << endl;
00087     sb << "initialBeliefY : " << endl;
00088     initialBeliefY->write(sb) << endl;
00089     sb << "initialBeliefStval : " << endl;
00090     sb << "initialBeliefStval stval: " <<  initialBeliefStval->sval << endl;
00091     initialBeliefStval->bvec->write(sb) << endl;
00092     sb << "initialBeliefX : " << endl;
00093     initialBeliefX->write(sb) << endl;
00094     sb << "Num X States : " << XStates->size() << endl;
00095     sb << "Num Y States : " << YStates->size() << endl;
00096     sb << "Num Action : " << actions->size() << endl;
00097     sb << "Num Observations : " << observations->size() << endl;
00098     sb << "X Trans : " << XTrans->ToString() << endl;
00099     sb << "Y Trans : " << YTrans->ToString() << endl;
00100     sb << "Obs Prob : " << obsProb->ToString() << endl;
00101     sb << "Rewards : " << rewards->ToString() << endl;
00102 
00103     return sb.str();
00104 }
00105 
00106 void MOMDP::getObsProbVectorFast(obs_prob_vector& result, int a, int Xn, SparseVector& tmp1)
00107 {
00108     mult( result, tmp1, *obsProb->getMatrix(a, Xn) );
00109     // this should give the same result
00110     // mult( result, Otr[a][Xn], tmp1 );
00111 
00112     result *= (1.0/(result.norm_1()));
00113 
00114 }
00115 
00116 int  MOMDP::getNumActions()
00117 {
00118     return actions->size();
00119 }
00120 int  MOMDP::getBeliefSize()
00121 {
00122     return YStates->size();
00123 }
00124 
00125 SparseVector&  MOMDP::getJointUnobsStateProbVector(SparseVector& result, SharedPointer<BeliefWithState> b, int a, int Xn) 
00126 {
00127     int Xc = b->sval; // currrent value for observed state variable
00128     // belief_vector Bc = b.bvec;       // current belief for unobserved state variable
00129     DenseVector tmp, tmp1;
00130     DenseVector Bc; // = b.bvec;
00131 
00132     copy(Bc, *(b->bvec));
00133 
00134     if (this->XStates->size() == 1)
00135     {
00136         tmp = Bc;
00137     }
00138     else
00139     {
00140       emult_column( tmp, *this->XTrans->getMatrix(a, Xc), Xn, Bc );
00141     }
00142 
00143     mult( tmp1, *this->YTrans->getMatrixTr(a, Xc, Xn), tmp );
00144 
00145     copy(result, tmp1);
00146     return result;
00147 }
00148 
00149 //return vector of P(x'|b_x , b_y, a) 
00150 obsState_prob_vector& MOMDP::getObsStateProbVector(obsState_prob_vector& result, SharedPointer<belief_vector>& belY, DenseVector& belX, int a)
00151 {
00152     DenseVector Bc; 
00153     copy(Bc, *belY);
00154     result.resize(this->XStates->size());
00155 
00156     //loop over x
00157     FOR (xc, this->XStates->size())
00158     {
00159         if (!(belX(xc) == 0)) 
00160         {
00161             // for a particular x
00162             DenseVector tmp;
00163             SparseVector spv;
00164 
00165             mult( tmp, Bc, *(this->XTrans->getMatrix(a, xc)));
00166 
00167             copy(spv, tmp);  // P(x'| x, b_{y|x}, a) for a particular x and x'
00168 
00169             // multiply with belX(xc) and add to sum over x values
00170             spv *= belX(xc);
00171             result += spv;
00172         }
00173     }
00174     return result;
00175 }
00176 
00177 void MOMDP::getObsProbVector(obs_prob_vector& result, SharedPointer<belief_vector>& belY, obsState_prob_vector& belX, int a, int Xn) 
00178 {
00179     DenseVector Bc; 
00180     copy(Bc, *belY);
00181     result.resize(this->observations->size());
00182 
00183     //loop over x
00184     FOR (Xc, XStates->size())
00185     {
00186         if (!(belX(Xc) == 0)) 
00187         {
00188             // for a particular x
00189             SparseVector opv;
00190             DenseVector tmp, tmp1, tmp2;
00191 
00192             emult_column( tmp, *XTrans->getMatrix(a, Xc), (int) Xn, Bc );
00193             // tmp1 = TY_a_xc' * tmp
00194             mult( tmp1, *YTrans->getMatrixTr(a, Xc, Xn), tmp );
00195             // result = O_a_xn' * tmp1
00196             mult( tmp2, tmp1, *obsProb->getMatrix(a, Xn) );
00197 
00198             // multiply with belX(xc) and add to sum over x values
00199             tmp2 *= belX(Xc);
00200             copy (opv, tmp2);
00201             result += opv;
00202         }
00203     }
00204     result *= (1.0/result.norm_1());
00205 }
00206 
00207 void MOMDP::getObsProbVector(obs_prob_vector& result, const BeliefWithState& b, int a, int Xn) 
00208 {
00209     int Xc = b.sval; // currrent value for observed state variable
00210     // belief_vector Bc = b.bvec;       // current belief for unobserved state variable
00211     DenseVector tmp, tmp1, tmp2;
00212     DenseVector Bc; // = b.bvec;
00213 
00214     copy(Bc, *b.bvec);
00215 
00216     //cout << "a :" << a << " Xc :" << Xc << "Xn :" << Xn << endl;
00217     // --- overall: result = O_a_xn' * (TY_a_xc' * (TX_a_xc (:,xn) .* bc))
00218     // tmp = TX_a_xc (:,xn) .* bc
00219     emult_column( tmp, *XTrans->getMatrix(a, Xc), (int) Xn, Bc );
00220     // tmp1 = TY_a_xc' * tmp
00221     mult( tmp1, *YTrans->getMatrixTr(a, Xc, Xn), tmp );
00222     // result = O_a_xn' * tmp1
00223     mult( tmp2, tmp1, *obsProb->getMatrix(a, Xn) );
00224 
00225     copy(result, tmp2);
00226     result *= (1.0/result.norm_1());
00227 
00228     // this should give the same result
00229     // mult( result, Otr[a][Xn], tmp1 );
00230 
00231     // avoid doing norm_1 calculation with DenseVector
00232     //SparseVector resultC;
00233     //copy(resultC, result);
00234     //resultC *= (1.0/resultC.norm_1());
00235     //copy(result, resultC);
00236     // result *= (1.0/norm_1(result));
00237 }
00238 
00239 // Convert from Cassandra's POMDP to MOMDP
00240 SharedPointer<MOMDP> MOMDP::convertMOMDPFromPOMDP(POMDP* pomdpProblem)
00241 {
00242     SharedPointer<MOMDP> result (new MOMDP());
00243     StateTransitionXY* XYTrans = new StateTransitionXY();
00244     result->YTrans = XYTrans;
00245 
00246     result->discount = pomdpProblem->discount;
00247 
00248     // States
00249     {
00250         StateVal temp;
00251         temp.name = "Dummy X State For Pure POMDP problem";
00252         result->XStates->add(temp);
00253     }
00254 
00255     FOR(y, pomdpProblem->getNumStateDimensions()) 
00256     {
00257         stringstream sstream;
00258         sstream << "State " << y ;
00259         StateVal temp;
00260         temp.name = sstream.str();
00261         result->YStates->add(temp);
00262     }
00263 
00264     // Action
00265     FOR(a, pomdpProblem->getNumActions()) 
00266     {
00267         stringstream sstream;
00268         sstream << "Action " << a ;
00269         Action tempAction;
00270         tempAction.name = sstream.str();
00271         result->actions->add(tempAction);
00272     }
00273     // Observations
00274     FOR(o, pomdpProblem->getNumObservations()) 
00275     {
00276         stringstream sstream;
00277         sstream << "Obs " << 0 ;
00278         Observation temp;
00279         temp.name = sstream.str();
00280         result->observations->add(temp);
00281     }
00282 
00283 
00284     int numStates = result->YStates->size();
00285     int numActions = result->actions->size();
00286 
00287     result->isPOMDPTerminalState.resize(1);
00288     result->isPOMDPTerminalState[0] = pomdpProblem->isPOMDPTerminalState;
00289 
00290     // Rewards:
00291     result->rewards->matrix.resize(1);
00292     result->rewards->matrix[0] = &pomdpProblem->R;  // copy reward function into the correct stateidx of R
00293 
00294     // deal with the dummy TX and TXtr matrices
00295     kmatrix Tdummy;
00296     Tdummy.resize(numStates, 1);
00297     FOR (unobsStateidx, numStates)
00298     {
00299         kmatrix_set_entry(Tdummy, unobsStateidx, 0, 1.0);
00300     }
00301 
00302     XYTrans->matrix.resize(numActions);
00303     XYTrans->matrixTr.resize(numActions);
00304     result->obsProb->matrix.resize(numActions);
00305     result->obsProb->matrixTr.resize(numActions); 
00306     result->XTrans->matrix.resize(numActions);
00307     result->XTrans->matrixTr.resize(numActions);
00308 
00309     FOR (a, numActions) 
00310     {
00311         XYTrans->matrix[a].resize(1);
00312         XYTrans->matrix[a][0] = &(pomdpProblem->T[a]);
00313 
00314         XYTrans->matrixTr[a].resize(1);
00315         XYTrans->matrixTr[a][0] = &(pomdpProblem->Ttr[a]);
00316 
00317         result->obsProb->matrix[a].resize(1);
00318         result->obsProb->matrix[a][0] = &(pomdpProblem->O[a]);
00319 
00320         result->obsProb->matrixTr[a].resize(1);
00321         result->obsProb->matrixTr[a][0] = &(pomdpProblem->Otr[a]);
00322 
00323         result->XTrans->matrix[a].resize(1);
00324         result->XTrans->matrix[a][0] = new SparseMatrix();
00325         copy( *result->XTrans->matrix[a][0], Tdummy );
00326     }
00327 
00328     kmatrix_transpose_in_place( Tdummy );
00329     FOR (a, numActions) 
00330     {
00331         result->XTrans->matrixTr[a].resize(1);
00332         result->XTrans->matrixTr[a][0] = new SparseMatrix();
00333         copy( *result->XTrans->matrixTr[a][0], Tdummy );
00334     }
00335 
00336     result->initialBeliefY = &(pomdpProblem->initialBelief);
00337     result->initialBeliefStval->bvec = &(pomdpProblem->initialBelief);
00338     result->initialBeliefStval->sval = 0;
00339 
00340     // added so that initialBeliefX is defined for pomdp input files
00341     result->initialBeliefX->resize(1);
00342     result->initialBeliefX->operator ()(0) = 1;
00343 
00344     // SYL040909 this code is not needed
00345     // Temp code:
00346     /* result->pomdpR = &(pomdpProblem->R);
00347 
00348        result->pomdpT = new vector<SharedPointer<SparseMatrix> >();
00349        result->pomdpTtr = new vector<SharedPointer<SparseMatrix> >();
00350        result->pomdpO = new vector<SharedPointer<SparseMatrix> >();
00351 
00352        result->pomdpT->resize(pomdpProblem->T.size());
00353        for(size_t i = 0 ; i < pomdpProblem->T.size(); i++)
00354        {
00355        (*result->pomdpT)[i] = &(pomdpProblem->T[i]);
00356        }
00357 
00358        result->pomdpTtr->resize(pomdpProblem->Ttr.size());
00359        for(size_t i = 0 ; i < pomdpProblem->Ttr.size(); i++)
00360        {
00361        (*result->pomdpTtr)[i] = &(pomdpProblem->Ttr[i]);
00362        }
00363 
00364        result->initialBeliefStval->bvec->finalize();
00365 
00366        result->pomdpO->resize(pomdpProblem->O.size());
00367        for(size_t i = 0 ; i < pomdpProblem->O.size(); i++)
00368        {
00369        (*result->pomdpO)[i] = &(pomdpProblem->O[i]);
00370        } */
00371 
00372     return result;
00373 
00374 }
00375 
00376 // Convert from Shaowei's POMDP Layer to MOMDP
00377 SharedPointer<MOMDP> MOMDP::convertMOMDPFromPOMDPX(FactoredPomdp* factoredPomdp, bool assumeUnknownFlag,unsigned int probType) 
00378 {
00379     SharedPointer<MOMDP> result (new MOMDP());
00380     POMDPLayer* layerPtr = &(factoredPomdp->layer);
00381     // copy over state list 
00382     result->stateList = factoredPomdp->stateList;
00383     result->observationList = factoredPomdp->observationList;
00384     result->actionList = factoredPomdp->actionList;
00385     result->rewardList = factoredPomdp->rewardList;
00386 
00387     StateTransitionXY* XYTrans = new StateTransitionXY();
00388     result->YTrans = XYTrans;  // Default to StateTransitionXY, but may change to StateTransitionXXpY if problem is MIXED_REPARAM
00389 
00390 
00391     // initialize pointers to NULL so that we can tell the difference when it is actually pointing to something useful  
00392     //POMDP information
00393 
00394     // SYL040909 commented out
00395     // R[x] (y,a)
00396     //  vector<SharedPointer<SparseMatrix> > R;
00397     //  // R(s,a)
00398     //  //T[a](s,s'), Ttr[a](s',s), O[a](s',o)
00399     //  vector<SharedPointer<SparseMatrix> > *pomdpOtr;
00400     // 
00401     //  // TX[a][x](y,x'), TXtr[a][x](x',y), TY[a][x](y,y'), TYtr[a][x](y',y)
00402     //  // O[a][x'](y',o), Otr[a][x'](o,y')
00403     //  vector<vector<SharedPointer<SparseMatrix> > > TX, TXtr, TY, TYtr, O, Otr;
00404 
00405     result->pomdpR = NULL; 
00406     result->pomdpT = NULL; 
00407     result->pomdpTtr = NULL; 
00408     result->pomdpO = NULL; 
00409     // pomdpOtr = NULL; // SYL040909 commented out
00410 
00411     int numStates = 0;
00412     int numStatesUnobs = 0;
00413     int numStatesObs = 0;
00414     int numActions = 0;
00415     int numObservations = 0;
00416 
00417 
00418     if (probType == MIXED || probType == MIXED_REPARAM) 
00419     { 
00420         // mixed observable
00421 
00422         numStates = layerPtr->numStatesUnobs;
00423         //setBeliefSize(numStates);
00424         numStatesUnobs = numStates;
00425         numStatesObs = layerPtr->numStatesObs;
00426 
00427         numActions = layerPtr->numActions;
00428         numObservations = layerPtr->numObservations;
00429 
00430         result->discount = layerPtr->discount;
00431 
00432         // SYL0409809 this code seems redundant
00433         /* R = layerPtr->R;
00434            TX = layerPtr->TX;
00435            TXtr = layerPtr->TXtr;
00436            TY = layerPtr->TY;
00437            TYtr = layerPtr->TYtr;
00438            O = layerPtr->O;
00439            Otr = layerPtr->Otr; */
00440 
00441         result->obsProb->matrix =  layerPtr->O;
00442         result->obsProb->matrixTr =layerPtr->Otr;
00443         result->rewards->matrix = layerPtr->R;
00444         result->XTrans->matrix = layerPtr->TX;
00445         result->XTrans->matrixTr = layerPtr->TXtr;
00446 
00447         if (probType == MIXED) {
00448           XYTrans->matrix =layerPtr->TY;
00449           XYTrans->matrixTr =layerPtr->TYtr;
00450         } else { // probType == MIXED_REPARAM
00451           result->hasIntraslice = true;
00452           
00453           StateTransitionXXpY* XXpYTrans = new StateTransitionXXpY();
00454           XXpYTrans->matrix = layerPtr->TY_reparam;
00455           XXpYTrans->matrixTr = layerPtr->TYtr_reparam;
00456           result->YTrans = XXpYTrans;}
00457 
00458         if (assumeUnknownFlag) 
00459         { // this option only makes sense for mixed observable cases
00460 
00461             result->pomdpR = new SparseMatrix;
00462             result->pomdpT = new  std::vector<SharedPointer<SparseMatrix> >;
00463             result->pomdpTtr = new std::vector<SharedPointer<SparseMatrix> >;
00464             result->pomdpO = new std::vector<SharedPointer<SparseMatrix> >;
00465             // pomdpOtr = new std::vector<SharedPointer<SparseMatrix> >;  // SYL040909 commented out
00466 
00467             result->pomdpR = layerPtr->pomdpR;
00468             *result->pomdpT = layerPtr->pomdpT;
00469             *result->pomdpTtr = layerPtr->pomdpTtr;
00470             *result->pomdpO = layerPtr->pomdpO;
00471             //*pomdpOtr = layerPtr->pomdpOtr;  // SYL040909 commented out
00472         }
00473 
00474         if (probType == MIXED) {          
00475           copy(*result->initialBeliefY, layerPtr->initialBeliefY);
00476           copy(*(result->initialBeliefStval->bvec), layerPtr->initialBeliefY); // copy the belief into the bvec field of initialBeliefStval
00477         } else {
00478           result->initialBeliefY = NULL;
00479           FOREACH(SparseVector, vec, layerPtr->initialBeliefY_reparam) {
00480             result->initialBeliefYByX.push_back(new SparseVector(*vec));
00481           }
00482           
00483           //TODO(haoyu) How to specify bvec? (This is only used in evaluator and simulator.
00484           //copy(*(result->initialBeliefStval->bvec), layerPtr->initialBeliefY_reparam[0]);
00485           result->initialBeliefStval->bvec = NULL;
00486         }
00487         result->initialBeliefStval->sval = layerPtr->initialStateX;
00488         
00489         copy(*result->initialBeliefX, layerPtr->initialBeliefX);
00490 
00491 
00492 
00493     } 
00494     else if (probType == FULLY_UNOBSERVED) 
00495     { // all state variables are unobserved
00496 
00497         numStates = layerPtr->pomdpNumStates; // numStates = layerPtr->numStatesUnobs;
00498         //setBeliefSize(numStates);
00499         numStatesUnobs = numStates;
00500         numStatesObs = 1;
00501 
00502         numActions = layerPtr->pomdpNumActions;   // numActions = layerPtr->numActions;
00503         numObservations = layerPtr->pomdpNumObservations; // numObservations = layerPtr->numObservations;
00504         result->discount = layerPtr->pomdpDiscount;  //discount = layerPtr->discount; 
00505 
00506         //isPOMDPTerminalState.resize(numStatesObs);
00507         //isPOMDPTerminalState = layerPtr->isPOMDPTerminalState;
00508         // result->isPOMDPTerminalState.resize(1);
00509         // result->isPOMDPTerminalState[0] = layerPtr->pomdpIsPOMDPTerminalState;
00510 
00511         //copy(initialBelief, layerPtr->initialBeliefY);
00512         //copy(initialBeliefStval.bvec, layerPtr->initialBeliefY); // copy the belief into the bvec field of initialBeliefStval
00513         copy(*result->initialBeliefY, layerPtr->pomdpInitialBelief);
00514         copy(*(result->initialBeliefStval->bvec), layerPtr->pomdpInitialBelief); // copy the belief into the bvec field of initialBeliefStval
00515         result->initialBeliefStval->sval = 0;
00516 
00517         // define initialBeliefX for problems with all unobserved variables
00518         result->initialBeliefX->resize(1);
00519         (*result->initialBeliefX)(0) = 1;
00520 
00521         result->rewards->matrix.resize(1);
00522         result->rewards->matrix[0] = layerPtr->pomdpR;
00523         //copy(R[0], layerPtr->pomdpR);
00524 
00525         // SYL040909
00526         XYTrans->matrix.resize(numActions);
00527         XYTrans->matrixTr.resize(numActions);
00528         result->obsProb->matrix.resize(numActions);
00529         result->obsProb->matrixTr.resize(numActions);
00530         result->XTrans->matrix.resize(numActions);
00531         result->XTrans->matrixTr.resize(numActions);
00532         //              TY.resize(numActions);
00533         //              TYtr.resize(numActions);
00534         //              O.resize(numActions);
00535         //              Otr.resize(numActions);
00536         //              TX.resize(numActions);
00537         //              TXtr.resize(numActions);
00538 
00539         // deal with the dummy TX and TXtr matrices
00540         kmatrix Tdummy;
00541         Tdummy.resize(numStates, 1);
00542 
00543         FOR (unobsStateidx, numStates)
00544             kmatrix_set_entry(Tdummy, unobsStateidx, 0, 1.0);
00545 
00546         FOR (a, numActions) {
00547 
00548             // SYL040909
00549             XYTrans->matrix[a].resize(1);
00550             XYTrans->matrix[a][0] = layerPtr->pomdpT[a];
00551 
00552             XYTrans->matrixTr[a].resize(1);
00553             XYTrans->matrixTr[a][0] = layerPtr->pomdpTtr[a];
00554 
00555             result->obsProb->matrix[a].resize(1);
00556             result->obsProb->matrix[a][0] = layerPtr->pomdpO[a];
00557 
00558             result->obsProb->matrixTr[a].resize(1);
00559             result->obsProb->matrixTr[a][0] = layerPtr->pomdpOtr[a];
00560 
00561             result->XTrans->matrix[a].resize(1);
00562             result->XTrans->matrix[a][0] = new SparseMatrix();
00563             copy( *result->XTrans->matrix[a][0], Tdummy );
00564 
00565             /*                  TY[a].resize(1);
00566             //copy(TY[a][0], layerPtr->pomdpT);
00567             TY[a][0] = layerPtr->pomdpT[a];
00568 
00569             TYtr[a].resize(1);
00570             //copy(TYtr[a][0], layerPtr->pomdpTtr);
00571             TYtr[a][0] = layerPtr->pomdpTtr[a];
00572 
00573             O[a].resize(1);
00574             //copy(O[a][0], layerPtr->pomdpO);
00575             O[a][0] = layerPtr->pomdpO[a];
00576 
00577             Otr[a].resize(1);
00578             //copy(Otr[a][0], layerPtr->pomdpOtr);
00579             Otr[a][0] = layerPtr->pomdpOtr[a];
00580 
00581             TX[a].resize(1);
00582             // set TX[a][0](*,0) = 1
00583             TX[a][0] = new SparseMatrix();
00584             copy( *TX[a][0], Tdummy );*/
00585         }
00586 
00587         // SYL040909
00588         kmatrix_transpose_in_place( Tdummy );
00589         FOR (a, numActions) 
00590         {
00591             result->XTrans->matrixTr[a].resize(1);
00592             result->XTrans->matrixTr[a][0] = new SparseMatrix();
00593             copy( *result->XTrans->matrixTr[a][0], Tdummy );
00594         }
00595 
00596 
00597         //              kmatrix_transpose_in_place( Tdummy );
00598         //              FOR (a, numActions) {
00599         //                      TXtr[a].resize(1);
00600         //                      // set TXtr[a][0](0,*) = 1
00601         //                      TXtr[a][0] = new SparseMatrix();
00602         //                      copy( *TXtr[a][0], Tdummy );
00603         //              }
00604         // 
00605         // 
00606         //              result->rewards->matrix = R;
00607         //              result->XTrans->matrix = TX;
00608         //              result->XTrans->matrixTr = TXtr;
00609         //              result->XYTrans->matrix =TY;
00610         //              result->XYTrans->matrixTr =TYtr;
00611         //              result->obsProb->matrix = O;
00612         //              result->obsProb->matrixTr = Otr;
00613 
00614     }
00615     else if (probType == FULLY_OBSERVED) 
00616     {
00617         numStates = layerPtr->pomdpNumStates; 
00618         numStatesUnobs = 1;
00619         numStatesObs = numStates;
00620 
00621         numActions = layerPtr->pomdpNumActions;   
00622         numObservations = layerPtr->pomdpNumObservations; 
00623         result->discount = layerPtr->pomdpDiscount;  
00624 
00625         result->initialBeliefY->resize(1);
00626         result->initialBeliefY->push_back(0,1);
00627         copy(*(result->initialBeliefStval->bvec), *result->initialBeliefY); // copy the belief into the bvec field of initialBeliefStval
00628         result->initialBeliefStval->sval = -1; 
00629 
00630         // define initialBeliefX for problems with all unobserved variables
00631         result->initialBeliefX->resize(numStates);
00632         copy(*result->initialBeliefX, layerPtr->pomdpInitialBelief);
00633 
00634         result->rewards->matrix.resize(numStates);
00635         FOR(stateidx, numStates){
00636             kmatrix RewardMat;
00637             RewardMat.resize(1, numActions);
00638             FOR(a, numActions){
00639                 kmatrix_set_entry(RewardMat, 0, a, layerPtr->pomdpR->operator()(stateidx, a));
00640             }
00641             result->rewards->matrix[stateidx] = SharedPointer<SparseMatrix>(new SparseMatrix());
00642             copy(*result->rewards->matrix[stateidx], RewardMat);
00643         }
00644 
00645         XYTrans->matrix.resize(numActions);
00646         XYTrans->matrixTr.resize(numActions);
00647         result->obsProb->matrix.resize(numActions);
00648         result->obsProb->matrixTr.resize(numActions);
00649         result->XTrans->matrix.resize(numActions);
00650         result->XTrans->matrixTr.resize(numActions);
00651 
00652         // deal with the dummy TY and TYtr matrices
00653         kmatrix Tdummy; 
00654         Tdummy.resize(1, 1);
00655         kmatrix_set_entry(Tdummy, 0, 0, 1.0);
00656 
00657         FOR (a, numActions) {
00658 
00659             XYTrans->matrix[a].resize(numStates);
00660             XYTrans->matrixTr[a].resize(numStates);
00661             result->obsProb->matrix[a].resize(numStates);
00662             result->obsProb->matrixTr[a].resize(numStates);
00663             result->XTrans->matrix[a].resize(numStates);
00664             result->XTrans->matrixTr[a].resize(numStates);
00665 
00666             FOR(stateidx, numStates){
00667                 XYTrans->matrix[a][stateidx] = SharedPointer<SparseMatrix>(new SparseMatrix());
00668                 copy(*XYTrans->matrix[a][stateidx], Tdummy);
00669                 XYTrans->matrixTr[a][stateidx] = SharedPointer<SparseMatrix>(new SparseMatrix());
00670                 copy(*XYTrans->matrixTr[a][stateidx], Tdummy);
00671 
00672                 //extract each row of the observation table
00673                 kmatrix ObsMatrix;
00674                 ObsMatrix.resize(1, numObservations);
00675                 FOR(obs, numObservations){
00676                     kmatrix_set_entry(ObsMatrix, 0, obs, layerPtr->pomdpO[a]->operator()(stateidx, obs));
00677                 }
00678                 result->obsProb->matrix[a][stateidx] = SharedPointer<SparseMatrix>(new SparseMatrix());
00679                 copy(*result->obsProb->matrix[a][stateidx], ObsMatrix);
00680                 kmatrix_transpose_in_place (ObsMatrix);
00681                 result->obsProb->matrixTr[a][stateidx] = SharedPointer<SparseMatrix>(new SparseMatrix());
00682                 copy(*result->obsProb->matrixTr[a][stateidx], ObsMatrix);
00683 
00684                 //extract each row of the state transition table
00685                 kmatrix StateMatrix;
00686                 StateMatrix.resize(1, numStates);
00687                 FOR(s, numStates){
00688                     kmatrix_set_entry(StateMatrix, 0, s, layerPtr->pomdpT[a]->operator()(stateidx, s));
00689                 }
00690                 result->XTrans->matrix[a][stateidx] = SharedPointer<SparseMatrix>(new SparseMatrix());
00691                 copy(*result->XTrans->matrix[a][stateidx], StateMatrix);
00692                 kmatrix_transpose_in_place(StateMatrix);
00693                 result->XTrans->matrixTr[a][stateidx] = new SparseMatrix();
00694                 copy(*result->XTrans->matrixTr[a][stateidx], StateMatrix);
00695             }
00696         }
00697         // post-process: calculate isPOMDPTerminalState
00698         // result->isPOMDPTerminalState.resize(numStates);
00699 
00700         // FOR(s, numStates){
00701             // result->isPOMDPTerminalState[s].resize(1);
00702             // result->isPOMDPTerminalState[s][0] = layerPtr->pomdpIsPOMDPTerminalState[s];
00703         // }
00704 
00705     } 
00706 
00707     // post-process: calculate isPOMDPTerminalState
00708         result->isPOMDPTerminalState.resize(numStatesObs);
00709 
00710         FOR (state_idx, numStatesObs) 
00711         {
00712             result->isPOMDPTerminalState[state_idx].resize(numStatesUnobs, /* initialValue = */true);
00713             FOR (s, numStatesUnobs) 
00714             {
00715           FOR (a, numActions) 
00716           {
00717             // Probability of self looping: (a, state_idx, s) -> (state_idx, s) 
00718             double probX = (*result->XTrans->getMatrix(a, state_idx)) (s, state_idx);
00719             double probY = (*result->YTrans->getMatrix(a, state_idx, state_idx)) (s,s);
00720             if ( fabs(1.0 - probX) > OBS_IS_ZERO_EPS || fabs(1.0 - probY) > OBS_IS_ZERO_EPS || (*result->rewards->matrix[state_idx])(s,a) != 0.0) {
00721               result->isPOMDPTerminalState[state_idx][s] = false;
00722               break;
00723             }
00724           }
00725             }
00726         }
00727 
00728     // States
00729     FOR(x, numStatesObs) 
00730     {
00731         StateVal temp;
00732         stringstream sstream;
00733         sstream << "X state " << x;
00734         temp.name = sstream.str();
00735         result->XStates->add(temp);
00736     }
00737 
00738     FOR(y, numStatesUnobs) 
00739     {
00740         stringstream sstream;
00741         sstream << "Y State " << y ;
00742         StateVal temp;
00743         temp.name = sstream.str();
00744         result->YStates->add(temp);
00745     }
00746 
00747     // Action
00748 
00749     FOR(a, numActions) 
00750     {
00751         stringstream sstream;
00752         sstream << "Action " << a ;
00753         Action tempAction;
00754         tempAction.name = sstream.str();
00755         result->actions->add(tempAction);
00756     }
00757     // Observations
00758     FOR(o, numObservations) 
00759     {
00760         stringstream sstream;
00761         sstream << "Obs " << o ;
00762         Observation temp;
00763         temp.name = sstream.str();
00764         result->observations->add(temp);
00765     }
00766 
00767 
00768     //for(size_t i = 0; i < Otr.size() ; i ++)
00769     //{
00770     //  for(size_t j = 0 ; j < Otr[i].size() ; j ++)
00771     //  {
00772     //          Otr[i][j]->write(cout);
00773     //  }
00774     //  cout << endl;
00775     //}
00776 
00777     if(result->initialBeliefStval->bvec)
00778       result->initialBeliefStval->bvec->finalize();
00779     return result;
00780 }
00781 
00782 bool MOMDP::getIsTerminalState(BeliefWithState &b)
00783 {
00784     double nonTerminalSum = 0.0;
00785     SharedPointer<belief_vector> s = b.bvec;
00786     state_val stateidx = b.sval;
00787 
00788     FOR_CV ((*s)) 
00789     {
00790         if (!isPOMDPTerminalState[stateidx][CV_INDEX(s)]) 
00791         {
00792             nonTerminalSum += CV_VAL(s);
00793         }
00794     }
00795     return (nonTerminalSum < 1e-10);
00796 }
00797 
00798 map<string, string> MOMDP::getActionsSymbols(int actionNum) 
00799 {
00800     map<string, string> result;
00801     if(actionList.size() == 0)
00802     {
00803         // This is a pure pomdp problem
00804         stringstream sstream;
00805         sstream << actionNum ;
00806         result["action"] = sstream.str();
00807         return result;
00808     }
00809 
00810     int quotient, remainder;
00811     quotient = actionNum;
00812     for (int i = (int) actionList.size() - 1; i >= 0; i--) {
00813 
00814         ObsAct act = actionList[i];
00815 
00816         remainder = quotient % act.getValueEnum().size();
00817         result[act.getVName()] = act.getValueEnum()[remainder];
00818         quotient = quotient / act.getValueEnum().size();
00819 
00820     }
00821     return result;
00822 }
00823 
00824 
00825 map<string, string> MOMDP::getFactoredObservedStatesSymbols(int stateNum) 
00826 {
00827     map<string, string> result;
00828     if(stateList.size() == 0)
00829     {
00830         // This is a pure pomdp problem
00831         result["dummy observed state"] = "0";
00832         return result;
00833     }
00834 
00835     int quotient, remainder;
00836     quotient = stateNum;
00837     for (int i = (int) stateList.size() - 1; i >= 0; i--) {
00838 
00839         const State& s = stateList[i];
00840         if (s.getObserved()) {
00841 
00842             remainder = quotient % s.getValueEnum().size();
00843             result[s.getVNamePrev()] = s.getValueEnum()[remainder];
00844             //result[s.getVNameCurr()] = s.getValueEnum()[remainder];
00845             quotient = quotient / s.getValueEnum().size();
00846         }
00847     }
00848     return result;
00849 }
00850 
00851 
00852 map<string, string> MOMDP::getFactoredUnobservedStatesSymbols(int stateNum) 
00853 {
00854     map<string, string> result;
00855     if(stateList.size() == 0)
00856     {
00857         // This is a pure pomdp problem
00858         stringstream sstream;
00859         sstream << stateNum ;
00860         result["state"] = sstream.str();
00861         return result;
00862     }
00863 
00864     int quotient, remainder;
00865     quotient = stateNum;
00866     for (int i = (int) stateList.size() - 1; i >= 0; i--) 
00867     {
00868 
00869         const State& s = stateList[i];
00870         if (!(s.getObserved())) 
00871         {
00872             remainder = quotient % s.getValueEnum().size();
00873             result[s.getVNamePrev()] = s.getValueEnum()[remainder];
00874             //result[s.getVNameCurr()] = s.getValueEnum()[remainder];
00875             quotient = quotient / s.getValueEnum().size();
00876         }
00877     }
00878     return result;
00879 }
00880 
00881 map<string, string> MOMDP::getObservationsSymbols(int observationNum) 
00882 {
00883     map<string, string> result;
00884     if(observationList.size() == 0)
00885     {
00886         // This is a pure pomdp problem
00887         stringstream sstream;
00888         sstream << observationNum ;
00889         result["observation"] = sstream.str();
00890         return result;
00891     }
00892     int quotient, remainder;
00893     quotient = observationNum;
00894     for (int i = (int) observationList.size() - 1; i >= 0; i--) {
00895 
00896         ObsAct obs = observationList[i];
00897 
00898         remainder = quotient % obs.getValueEnum().size();
00899         result[obs.getVName()] = obs.getValueEnum()[remainder];
00900         quotient = quotient / obs.getValueEnum().size();
00901 
00902     }
00903     return result;
00904 }
00905 
00906 // Commented out during code merge on 02102009
00907 /* 
00908 // TODO:: remove this after fixing Simulator assume unknown flag
00909 belief_vector& MOMDP::getNextBelief(belief_vector& result,const belief_vector& b, int a, int o)
00910 {
00911 belief_vector tmp;
00912 
00913 mult( tmp, *(*pomdpTtr)[a], b );
00914 emult_column( result, *(*pomdpO)[a], o, tmp );
00915 
00916 result *= (1.0/result.norm_1());
00917 
00918 return result;
00919 } */
00920 


appl
Author(s): petercai
autogenerated on Tue Jan 7 2014 11:02:29