BackupBeliefValuePairMOMDP.cpp
Go to the documentation of this file.
00001 #include "Const.h"
00002 #include "BackupBeliefValuePairMOMDP.h"
00003 #include "BeliefTreeNode.h"
00004 #include "BeliefCache.h"
00005 #include "MathLib.h"
00006 #include "MOMDP.h"
00007 #include "SARSOP.h"
00008 #include "BeliefValuePairPool.h"
00009 #include "exception" 
00010 
00011 using namespace std;
00012 using namespace momdp;
00013 
00014 BackupBeliefValuePairMOMDP::BackupBeliefValuePairMOMDP(void)
00015 {
00016 }
00017 
00018 BackupBeliefValuePairMOMDP::~BackupBeliefValuePairMOMDP(void)
00019 {
00020 }
00021 
00022 // Should use this function with Bound Set
00023 SharedPointer<BeliefValuePair> BackupBeliefValuePairMOMDP::backup(BeliefTreeNode* cn)
00024 {
00025         int maxUBAction;
00026 
00027         state_val stateidx = cn->s->sval;
00028 
00029         double newUBVal = getNewUBValue(*cn, &maxUBAction);
00030         SharedPointer<BeliefValuePair> result = boundSet->addPoint(cn->s, newUBVal);    
00031 
00032         if(maxUBAction < 0)
00033         {
00034                 cout << "error" << endl;
00035         }
00036         //  maybePrune();
00037         //pruning is done in Prune class later
00038         double lastUbVal = boundSet->set[stateidx]->beliefCache->getRow(cn->cacheIndex.row)->UB;
00039 
00040         boundSet->set[stateidx]->beliefCache->getRow( cn->cacheIndex.row)->UB = newUBVal;
00041 
00042         boundSet->set[stateidx]->dataTable->set(cn->cacheIndex.row).UB_ACTION = maxUBAction;
00043 
00044         DEBUG_TRACE( cout << "Set UB_ACTION: [ " << stateidx << " / " << cn->cacheIndex.row << " ] = " << maxUBAction << endl; );
00045         
00046         // TODO:: cn.lastUbVal = lastUbVal;
00047         //printf("lastUb: %f, ubVal %f\n", lastUbVal, newUBVal);//for debugging purpose
00048         //return maxUBAction;
00049         return result;
00050 }
00051 // upper bound on long-term reward for taking action a
00052 double BackupBeliefValuePairMOMDP::getNewUBValueQ(BeliefTreeNode& cn, int a) 
00053 {
00054         DEBUG_TRACE( cout << "getNewUBValueQ a " << a << endl; );
00055         DEBUG_TRACE( cout << "cn->cacheIndex " << cn.cacheIndex.row << " " << cn.cacheIndex.sval << endl; );
00056 
00057         double val = 0;
00058         BeliefTreeQEntry& Qa = cn.Q[a];
00059         FOR (Xc, Qa.getNumStateOutcomes()) 
00060         {
00061                 DEBUG_TRACE( cout << "Xc " << Xc << endl; );
00062                 BeliefTreeObsState* QaXc =  Qa.stateOutcomes[Xc];
00063                 if (NULL != QaXc ) 
00064                 {
00065                         FOR(o, QaXc->getNumOutcomes()) 
00066                         {
00067                                 DEBUG_TRACE( cout << "o " << o << endl; );
00068                                 BeliefTreeEdge* e = QaXc->outcomes[o];
00069                                 if (NULL != e) 
00070                                 {
00071                                         DEBUG_TRACE( cout << "e!=NULL " << endl; );
00072                                         DEBUG_TRACE( cout << "e->nextState->cacheIndex " << e->nextState->cacheIndex.row << " " << e->nextState->cacheIndex.sval <<endl; );
00073 
00074                                         double ubval = boundSet->getValue(e->nextState->s);
00075 
00076                                         DEBUG_TRACE( cout << "Next Node: " << e->nextState->cacheIndex.row << " : "  << e->nextState->cacheIndex.sval << " action: " << a << " obs: " << o << " ubval: " << ubval << endl; );
00077 
00078                                         //26092008 corrected, prevly multiplied by e->obsProb only
00079                                         // 061008 changed calculation for obsProb
00080                                         val += e->obsProb * ubval;
00081                                         //val += e->obsProb * QaXc->obsStateProb * ubval;
00082 
00083                                         DEBUG_TRACE( cout << "val " << val << endl; );
00084 
00085                                         //26092008 SYL added - update cache for all children
00086                                         boundSet->set[e->nextState->cacheIndex.sval]->beliefCache->getRow( e->nextState->cacheIndex.row)->UB = ubval;
00087                                 }
00088                         }
00089                 }
00090         }
00091 
00092         val = Qa.immediateReward + problem->getDiscount() * val; 
00093         DEBUG_TRACE( cout << "val " << val << endl; );
00094         Qa.ubVal = val;
00095 
00096 
00097         return val;
00098 }
00099 
00100 double BackupBeliefValuePairMOMDP::getNewUBValueSimple(BeliefTreeNode& cn, int* maxUBActionP) 
00101 {
00102         DEBUG_TRACE( cout << "getNewUBValueSimple: " << endl; );
00103 
00104         double val, maxVal = -99e+20;
00105         int maxUBAction = -1;
00106         FOR(a, problem->getNumActions()) 
00107         {
00108                 DEBUG_TRACE( cout << "a: " << a << endl; );
00109                 val = getNewUBValueQ(cn, a);
00110                 DEBUG_TRACE( cout << "val: " << val << endl; );
00111                 DEBUG_TRACE( cout << "maxVal: " << maxVal << endl; );
00112 
00113 
00114                 if (val > maxVal ) 
00115                 {
00116                         maxVal = val;
00117                         maxUBAction = a;
00118                         DEBUG_TRACE( cout << "maxUBAction TO: " << maxUBAction << endl; );
00119                 }
00120         }
00121 
00122         if (NULL != maxUBActionP)
00123         {
00124                 *maxUBActionP = maxUBAction;
00125         }
00126 
00127         return maxVal;
00128 }
00129 
00130 double BackupBeliefValuePairMOMDP::getNewUBValueUseCache(BeliefTreeNode& cn, int* maxUBActionP) 
00131 {
00132         DEBUG_TRACE( cout << "getNewUBValueUseCache" <<  endl; );
00133         // cache upper bound for each action
00134         DenseVector cachedUpperBound(problem->getNumActions());
00135         
00136         for(Actions::iterator aIter = problem->actions->begin(); aIter != problem->actions->end(); aIter ++)
00137         {
00138                 int a = aIter.index();
00139                 cachedUpperBound(a) = cn.Q[a].ubVal;
00140         }
00141 
00142         // remember which Q functions we have updated on this call
00143         //std::vector<bool> updatedAction(problem->getNumActions());
00144         vector<int> updatedAction(problem->actions->size());
00145 
00146         for(Actions::iterator aIter = problem->actions->begin(); aIter != problem->actions->end(); aIter ++)
00147         {
00148                 int a = aIter.index();
00149                 updatedAction[a] = false;
00150         }
00151 
00152         double val;
00153         int maxUBAction = argmax_elt(cachedUpperBound);
00154 
00155         while (1) 
00156         {
00157                 DEBUG_TRACE( cout << "cachedUpperBound" << endl; );
00158                 DEBUG_TRACE( cachedUpperBound.write(cout) << endl; );
00159                 DEBUG_TRACE( cout << "maxUBAction " << maxUBAction << endl );
00160 
00161 
00162                 // do the backup for the best Q
00163                 val = getNewUBValueQ(cn, maxUBAction);
00164                 cachedUpperBound(maxUBAction) = val;
00165                 updatedAction[maxUBAction] = true;
00166 
00167                 // the best action may have changed after updating Q
00168                 maxUBAction = argmax_elt(cachedUpperBound);
00169 
00170                 // if the best action after the update is one that we have already
00171                 //    updated, we're done
00172                 if (updatedAction[maxUBAction]) break;
00173         }
00174 
00175         double maxVal = cachedUpperBound(maxUBAction);
00176 
00177         if (NULL != maxUBActionP) 
00178         {
00179                 *maxUBActionP = maxUBAction;
00180         }
00181         return maxVal;
00182 }
00183 
00184 double BackupBeliefValuePairMOMDP::getNewUBValue(BeliefTreeNode& cn, int* maxUBActionP)
00185 {
00186         DEBUG_TRACE( cout << "BackupUpperBoundBVpair::getNewUBValue: " <<  cn.cacheIndex.row << " : " << cn.cacheIndex.sval << endl; );
00187                     
00188         if (CB_QVAL_UNDEFINED == cn.Q[0].ubVal) 
00189         {
00190                 DEBUG_TRACE( cout << "getNewUBValue:2" << endl; );
00191                 return getNewUBValueSimple(cn, maxUBActionP);
00192         } 
00193         else 
00194         {
00195                 DEBUG_TRACE( cout << "getNewUBValue:3" << endl; );
00196                 return getNewUBValueUseCache(cn, maxUBActionP);
00197         }
00198 
00199 }


appl
Author(s): petercai
autogenerated on Tue Jan 7 2014 11:02:28