appl: BackupAlphaPlaneMOMDP.cpp Source File

Go to the documentation of this file.
00001 #include "BackupAlphaPlaneMOMDP.h"
00002 #include "BeliefCache.h"
00003 #include "AlphaPlanePool.h"
00004 #include "SARSOP.h"
00005 #include "exception" 
00006 #include <list>
00007 #include <vector>
00008 #include <cfloat>
00009 
00010 using namespace std;
00011 using namespace momdp;
00012 
00013 BackupAlphaPlaneMOMDP::BackupAlphaPlaneMOMDP(void)
00014 {
00015 }
00016 
00017 BackupAlphaPlaneMOMDP::~BackupAlphaPlaneMOMDP(void)
00018 {
00019 }
00020 // Should use this function with Bound Set
00021 SharedPointer<AlphaPlane> BackupAlphaPlaneMOMDP::backup(BeliefTreeNode* cn)
00022 {
00023         SharedPointer<AlphaPlane> newPlaneP (new AlphaPlane());
00024         //AlphaPlane newPlane;  Inefficient implementation, commented out by Yanzhu 15 Aug 2007
00025 
00026         double lbVal;
00027         //get a new plane at the belief
00028         lbVal = getNewAlphaPlane(*newPlaneP, *cn);
00029 
00030         // TODO:: migrate below code to SARSOP::onLowerBoundBackup
00031 
00032         newPlaneP->init(solver->numBackups, cn);//to set its timestamp and put cn in its domination list
00033         //add the new plane as the best alpha for the belief
00034 
00035         AlphaPlanePool* bound = boundSet->set[cn->s->sval];
00036         list<SharedPointer<AlphaPlane> >* alphas = bound->dataTable->get(cn->cacheIndex.row).ALPHA_PLANES;
00037         
00038         if(alphas->size()>0)
00039         {
00040                 SharedPointer<AlphaPlane>frontAlpha = alphas->front();
00041                 SARSOPAlphaPlaneTuple *tempTuple = (SARSOPAlphaPlaneTuple *)frontAlpha->solverData;
00042                 tempTuple->certed--;
00043                 alphas->pop_front();
00044         }
00045         ((SARSOPAlphaPlaneTuple *)newPlaneP->solverData)->certed ++;
00046         alphas->push_front(newPlaneP);
00047         // TODO:: migrate the addAlphaPlane(newPlaneP) to algorithm based code
00048 
00049         bound->addAlphaPlane(newPlaneP); 
00050         bound->beliefCache->getRow( cn->cacheIndex.row)->LB = lbVal;
00051         bound->dataTable->set(cn->cacheIndex.row).ALPHA_TIME_STAMP = newPlaneP->timeStamp;
00052 
00053         return newPlaneP;
00054 }
00055 
00056 
00057 
00058 /* method: getNewAlphaPlane, saves a n-1 calls to getNewAlphaPlaneQ
00059 
00060 */
00061 double BackupAlphaPlaneMOMDP::getNewAlphaPlane(AlphaPlane& result, BeliefTreeNode& cn)
00062 {
00063         DEBUG_TRACE( cout << "getNewAlphaPlane" << endl; );
00064         if(cn.isFringe())
00065         {
00066                 assert(false);
00067                 cout << "Code bug" << endl;
00068                 exit(EXIT_FAILURE);
00069         }
00070         else
00071         {
00072                 // NOTE: make sure whether to use -DBL_MAX or DBL_MIN
00073                 // DBL_MIN is the smallest Positive number
00074                 // -DBL_MAX is the most negative number
00075                 double maxActionLB = -DBL_MAX;
00076                 int maxAction = 0;
00077                 
00078 
00079                 FOR (a, problem->getNumActions())
00080                 {
00081                         const BeliefTreeQEntry& Qa = cn.Q[a];
00082                         double sum = 0.0;
00083 
00084                         DEBUG_TRACE( cout << "a " << a << endl; );
00085                         FOR (Xn, Qa.getNumStateOutcomes())
00086                         {
00087                                 DEBUG_TRACE( cout << "Xn " << Xn << endl; );
00088 
00089                                 const BeliefTreeObsState* QaXn = Qa.stateOutcomes[Xn];
00090                                 //const BeliefTreeObsState& QaXn = Qa.stateOutcomes[Xn];
00091                                 if (NULL != QaXn ) 
00092                                 {
00093                                         FOR (o, QaXn->getNumOutcomes())
00094                                                 //FOR (o, QaXn.getNumOutcomes())
00095                                         {
00096                                                 DEBUG_TRACE( cout << "o " << o << endl; );
00097 
00098                                                 BeliefTreeEdge* e = QaXn->outcomes[o];
00099                                                 //BeliefTreeEdge* e = QaXn.outcomes[o];
00100                                                 if (NULL != e)
00101                                                 {
00102                                                         // child node:
00103                                                         BeliefTreeNode* childNode = e->nextState;
00104 
00105                                                         // SLOWER, separate out the functions in the statement :
00106                                                         /*      AlphaPlane bestPlane = alphaPlanePool->getBestAlphaPlane(*(childNode));
00107                                                         alpha_vector bestalpha = bestPlane->alpha;
00108                                                         double childLB = inner_prod(bestalpha, childNode->s.bvec);
00109                                                         */      
00110                                                         
00111                                                         SharedPointer<AlphaPlane> tempAlpha = boundSet->getBestAlphaPlane(*(childNode));
00112                                                         double childLB = inner_prod(*(tempAlpha->alpha), *(childNode->s->bvec));  
00113                                                         DEBUG_TRACE( cout << "childLB " << childLB << endl; );
00114                                                         
00115                                                         
00116                                                         boundSet->set[childNode->cacheIndex.sval]->beliefCache->getRow( childNode->cacheIndex.row)->LB = childLB;
00117 
00118         
00119 
00120                                                         //SLOWEST:
00121                                                         //                                                              double childLB = alphaPlanePool->getLowerBoundValue(childNode->s);
00122 
00123 
00124                                                         // 260908 fixed bug, prevly multiplied only by e->obsProb
00125                                                         // 061008 changed calculation for e->obsProb
00126                                                         sum += childLB * e->obsProb;
00127                                                         DEBUG_TRACE( cout << "sum " << sum << endl; );
00128                                                         //sum += childLB * e->obsProb * QaXn->obsStateProb;
00129                                                 }
00130                                                 else
00131                                                 {
00132 
00133                                                 }
00134                                         }
00135                                 }
00136                         }
00137                         sum *= problem->getDiscount();
00138                         sum += cn.Q[a].immediateReward;
00139                         cn.Q[a].lbVal = sum;
00140                         DEBUG_TRACE( cout << "sum " << sum << endl; );
00141 
00142                         DEBUG_TRACE( cout << "maxActionLB " << maxActionLB << endl; );
00143                         if(sum > maxActionLB)
00144                         {
00145                                 DEBUG_TRACE( cout << "maxActionLB TO " << sum << endl; );
00146                                 maxActionLB = sum;
00147                                 maxAction = a;
00148                         }
00149 
00150                 }
00151                 assert(maxActionLB !=  -DBL_MAX);
00152                 
00153 
00154                 getNewAlphaPlaneQ(result, cn, maxAction);
00155         
00156 
00157                 DEBUG_TRACE(cout << "resulting alpha : " << endl);
00158                 DEBUG_TRACE(result.alpha->write(cout) << endl; );
00159                 return maxActionLB;
00160         }
00161 }
00162 
00163 
00164 // lower bound on long-term reward for taking action a (alpha vector)
00165 void BackupAlphaPlaneMOMDP::getNewAlphaPlaneQ(AlphaPlane& result, const BeliefTreeNode& cn, int a)
00166 {
00167         SharedPointer<alpha_vector> betaA (new alpha_vector(problem->getBeliefSize()));
00168         SharedPointer<alpha_vector> betaAXnO;
00169         // use pointer instead of creating a new alpha vector, Yanzhu
00170         alpha_vector tmp, tmp1, tmp2;
00171 
00172         bool defaultIsSet;
00173         SharedPointer<BeliefWithState> dummy_stval (new BeliefWithState());
00174 
00175         
00176         state_val Xc = cn.s->sval;      // state value at current time step (ie X, not X')
00177         SharedPointer<alpha_vector> defaultBetaAXnO;
00178         const BeliefTreeQEntry& Qa = cn.Q[a];
00179 
00180 
00181         FOR (Xn, Qa.getNumStateOutcomes())
00182         {
00183                 if( !(problem->XTrans->getMatrix(a, Xc)->isColumnEmpty(Xn)))
00184                 {
00185                         defaultIsSet = false;
00186 
00187                         const BeliefTreeObsState* QaXn = Qa.stateOutcomes[Xn];
00188                         if (NULL != QaXn )
00189                         {       
00190                                 FOR (o, QaXn->getNumOutcomes())
00191                                 {
00192                                         if( !(problem->obsProb->getMatrix(a, Xn)->isColumnEmpty(o)))
00193                                         {
00194                                                 BeliefTreeEdge* e = QaXn->outcomes[o];
00195                                                 //BeliefTreeEdge* e = QaXn.outcomes[o];
00196                                                 if (NULL != e)
00197                                                 {
00198                                                         betaAXnO = (boundSet->getBestAlphaPlane(*(e->nextState))->alpha);
00199                                                 }
00200                                                 else
00201                                                 {
00202                                                         if (!defaultIsSet)
00203                                                         {
00204                                                                 dummy_stval->sval = Xn;
00205                                                                 dummy_stval->bvec = cn.s->bvec;
00206                                                                 defaultBetaAXnO = (boundSet->getBestAlphaPlane(*dummy_stval)->alpha);
00207                                                                 defaultIsSet = true;
00208                                                         }
00209                                                         betaAXnO = defaultBetaAXnO;
00210                                                 }       
00211                                                 emult_column( tmp, *problem->obsProb->getMatrix(a, Xn), o, *betaAXnO );
00212                                                 mult( tmp1, *problem->YTrans->getMatrix(a, Xc, Xn), tmp);
00213                                                 emult_column( tmp2, *problem->XTrans->getMatrix(a, Xc), Xn, tmp1 );
00214                                                 (*betaA) += tmp2;
00215                                         }
00216                                 }
00217                         }  
00218                         else 
00219                         {
00220                                 // still cycle through all possible observations and transform the vector and add to betaA
00221                                 //FOR (o, problem->numObservations) 
00222                                 for(Observations::iterator oIter = problem->observations->begin(); oIter != problem->observations->end(); oIter ++)
00223                                 {
00224                                         int o = oIter.index();
00225                                         if( !(problem->obsProb->getMatrix(a, Xn)->isColumnEmpty(o)) )
00226                                         {
00227                                                 if (!defaultIsSet)
00228                                                 {
00229                                                         dummy_stval->sval = Xn;
00230                                                         dummy_stval->bvec = cn.s->bvec;
00231                                                         defaultBetaAXnO = (boundSet->getBestAlphaPlane(*dummy_stval)->alpha);
00232                                                         defaultIsSet = true;
00233                                                 }
00234                                                 betaAXnO = defaultBetaAXnO;
00235 
00236                                                 emult_column( tmp, *problem->obsProb->getMatrix(a, Xn), o, *betaAXnO );
00237                                                 mult( tmp1, *problem->YTrans->getMatrix(a, Xc, Xn), tmp);
00238                                                 emult_column( tmp2, *problem->XTrans->getMatrix(a, Xc), Xn, tmp1 );
00239                                                 (*betaA) += tmp2;
00240                                         }
00241 
00242                                 }
00243                                 //}
00244                         } 
00245                 }
00246         }
00247         alpha_vector RaXc;
00248 
00249 
00250         copy_from_column( RaXc, *(problem->rewards->getMatrix(Xc)), a );
00251         (*betaA) *= problem->getDiscount();
00252         (*betaA) += RaXc;
00253 
00254         result.copyFrom(betaA, a, Xc);
00255 }
00256