Go to the documentation of this file.00001 #include "Const.h"
00002 #include "BackupBeliefValuePairMOMDP.h"
00003 #include "BeliefTreeNode.h"
00004 #include "BeliefCache.h"
00005 #include "MathLib.h"
00006 #include "MOMDP.h"
00007 #include "SARSOP.h"
00008 #include "BeliefValuePairPool.h"
00009 #include "exception"
00010
00011 using namespace std;
00012 using namespace momdp;
00013
00014 BackupBeliefValuePairMOMDP::BackupBeliefValuePairMOMDP(void)
00015 {
00016 }
00017
00018 BackupBeliefValuePairMOMDP::~BackupBeliefValuePairMOMDP(void)
00019 {
00020 }
00021
00022
00023 SharedPointer<BeliefValuePair> BackupBeliefValuePairMOMDP::backup(BeliefTreeNode* cn)
00024 {
00025 int maxUBAction;
00026
00027 state_val stateidx = cn->s->sval;
00028
00029 double newUBVal = getNewUBValue(*cn, &maxUBAction);
00030 SharedPointer<BeliefValuePair> result = boundSet->addPoint(cn->s, newUBVal);
00031
00032 if(maxUBAction < 0)
00033 {
00034 cout << "error" << endl;
00035 }
00036
00037
00038 double lastUbVal = boundSet->set[stateidx]->beliefCache->getRow(cn->cacheIndex.row)->UB;
00039
00040 boundSet->set[stateidx]->beliefCache->getRow( cn->cacheIndex.row)->UB = newUBVal;
00041
00042 boundSet->set[stateidx]->dataTable->set(cn->cacheIndex.row).UB_ACTION = maxUBAction;
00043
00044 DEBUG_TRACE( cout << "Set UB_ACTION: [ " << stateidx << " / " << cn->cacheIndex.row << " ] = " << maxUBAction << endl; );
00045
00046
00047
00048
00049 return result;
00050 }
00051
00052 double BackupBeliefValuePairMOMDP::getNewUBValueQ(BeliefTreeNode& cn, int a)
00053 {
00054 DEBUG_TRACE( cout << "getNewUBValueQ a " << a << endl; );
00055 DEBUG_TRACE( cout << "cn->cacheIndex " << cn.cacheIndex.row << " " << cn.cacheIndex.sval << endl; );
00056
00057 double val = 0;
00058 BeliefTreeQEntry& Qa = cn.Q[a];
00059 FOR (Xc, Qa.getNumStateOutcomes())
00060 {
00061 DEBUG_TRACE( cout << "Xc " << Xc << endl; );
00062 BeliefTreeObsState* QaXc = Qa.stateOutcomes[Xc];
00063 if (NULL != QaXc )
00064 {
00065 FOR(o, QaXc->getNumOutcomes())
00066 {
00067 DEBUG_TRACE( cout << "o " << o << endl; );
00068 BeliefTreeEdge* e = QaXc->outcomes[o];
00069 if (NULL != e)
00070 {
00071 DEBUG_TRACE( cout << "e!=NULL " << endl; );
00072 DEBUG_TRACE( cout << "e->nextState->cacheIndex " << e->nextState->cacheIndex.row << " " << e->nextState->cacheIndex.sval <<endl; );
00073
00074 double ubval = boundSet->getValue(e->nextState->s);
00075
00076 DEBUG_TRACE( cout << "Next Node: " << e->nextState->cacheIndex.row << " : " << e->nextState->cacheIndex.sval << " action: " << a << " obs: " << o << " ubval: " << ubval << endl; );
00077
00078
00079
00080 val += e->obsProb * ubval;
00081
00082
00083 DEBUG_TRACE( cout << "val " << val << endl; );
00084
00085
00086 boundSet->set[e->nextState->cacheIndex.sval]->beliefCache->getRow( e->nextState->cacheIndex.row)->UB = ubval;
00087 }
00088 }
00089 }
00090 }
00091
00092 val = Qa.immediateReward + problem->getDiscount() * val;
00093 DEBUG_TRACE( cout << "val " << val << endl; );
00094 Qa.ubVal = val;
00095
00096
00097 return val;
00098 }
00099
00100 double BackupBeliefValuePairMOMDP::getNewUBValueSimple(BeliefTreeNode& cn, int* maxUBActionP)
00101 {
00102 DEBUG_TRACE( cout << "getNewUBValueSimple: " << endl; );
00103
00104 double val, maxVal = -99e+20;
00105 int maxUBAction = -1;
00106 FOR(a, problem->getNumActions())
00107 {
00108 DEBUG_TRACE( cout << "a: " << a << endl; );
00109 val = getNewUBValueQ(cn, a);
00110 DEBUG_TRACE( cout << "val: " << val << endl; );
00111 DEBUG_TRACE( cout << "maxVal: " << maxVal << endl; );
00112
00113
00114 if (val > maxVal )
00115 {
00116 maxVal = val;
00117 maxUBAction = a;
00118 DEBUG_TRACE( cout << "maxUBAction TO: " << maxUBAction << endl; );
00119 }
00120 }
00121
00122 if (NULL != maxUBActionP)
00123 {
00124 *maxUBActionP = maxUBAction;
00125 }
00126
00127 return maxVal;
00128 }
00129
00130 double BackupBeliefValuePairMOMDP::getNewUBValueUseCache(BeliefTreeNode& cn, int* maxUBActionP)
00131 {
00132 DEBUG_TRACE( cout << "getNewUBValueUseCache" << endl; );
00133
00134 DenseVector cachedUpperBound(problem->getNumActions());
00135
00136 for(Actions::iterator aIter = problem->actions->begin(); aIter != problem->actions->end(); aIter ++)
00137 {
00138 int a = aIter.index();
00139 cachedUpperBound(a) = cn.Q[a].ubVal;
00140 }
00141
00142
00143
00144 vector<int> updatedAction(problem->actions->size());
00145
00146 for(Actions::iterator aIter = problem->actions->begin(); aIter != problem->actions->end(); aIter ++)
00147 {
00148 int a = aIter.index();
00149 updatedAction[a] = false;
00150 }
00151
00152 double val;
00153 int maxUBAction = argmax_elt(cachedUpperBound);
00154
00155 while (1)
00156 {
00157 DEBUG_TRACE( cout << "cachedUpperBound" << endl; );
00158 DEBUG_TRACE( cachedUpperBound.write(cout) << endl; );
00159 DEBUG_TRACE( cout << "maxUBAction " << maxUBAction << endl );
00160
00161
00162
00163 val = getNewUBValueQ(cn, maxUBAction);
00164 cachedUpperBound(maxUBAction) = val;
00165 updatedAction[maxUBAction] = true;
00166
00167
00168 maxUBAction = argmax_elt(cachedUpperBound);
00169
00170
00171
00172 if (updatedAction[maxUBAction]) break;
00173 }
00174
00175 double maxVal = cachedUpperBound(maxUBAction);
00176
00177 if (NULL != maxUBActionP)
00178 {
00179 *maxUBActionP = maxUBAction;
00180 }
00181 return maxVal;
00182 }
00183
00184 double BackupBeliefValuePairMOMDP::getNewUBValue(BeliefTreeNode& cn, int* maxUBActionP)
00185 {
00186 DEBUG_TRACE( cout << "BackupUpperBoundBVpair::getNewUBValue: " << cn.cacheIndex.row << " : " << cn.cacheIndex.sval << endl; );
00187
00188 if (CB_QVAL_UNDEFINED == cn.Q[0].ubVal)
00189 {
00190 DEBUG_TRACE( cout << "getNewUBValue:2" << endl; );
00191 return getNewUBValueSimple(cn, maxUBActionP);
00192 }
00193 else
00194 {
00195 DEBUG_TRACE( cout << "getNewUBValue:3" << endl; );
00196 return getNewUBValueUseCache(cn, maxUBActionP);
00197 }
00198
00199 }