00001 #include "BackupAlphaPlaneMOMDP.h"
00002 #include "BeliefCache.h"
00003 #include "AlphaPlanePool.h"
00004 #include "SARSOP.h"
00005 #include "exception"
00006 #include <list>
00007 #include <vector>
00008 #include <cfloat>
00009
00010 using namespace std;
00011 using namespace momdp;
00012
00013 BackupAlphaPlaneMOMDP::BackupAlphaPlaneMOMDP(void)
00014 {
00015 }
00016
00017 BackupAlphaPlaneMOMDP::~BackupAlphaPlaneMOMDP(void)
00018 {
00019 }
00020
00021 SharedPointer<AlphaPlane> BackupAlphaPlaneMOMDP::backup(BeliefTreeNode* cn)
00022 {
00023 SharedPointer<AlphaPlane> newPlaneP (new AlphaPlane());
00024
00025
00026 double lbVal;
00027
00028 lbVal = getNewAlphaPlane(*newPlaneP, *cn);
00029
00030
00031
00032 newPlaneP->init(solver->numBackups, cn);
00033
00034
00035 AlphaPlanePool* bound = boundSet->set[cn->s->sval];
00036 list<SharedPointer<AlphaPlane> >* alphas = bound->dataTable->get(cn->cacheIndex.row).ALPHA_PLANES;
00037
00038 if(alphas->size()>0)
00039 {
00040 SharedPointer<AlphaPlane>frontAlpha = alphas->front();
00041 SARSOPAlphaPlaneTuple *tempTuple = (SARSOPAlphaPlaneTuple *)frontAlpha->solverData;
00042 tempTuple->certed--;
00043 alphas->pop_front();
00044 }
00045 ((SARSOPAlphaPlaneTuple *)newPlaneP->solverData)->certed ++;
00046 alphas->push_front(newPlaneP);
00047
00048
00049 bound->addAlphaPlane(newPlaneP);
00050 bound->beliefCache->getRow( cn->cacheIndex.row)->LB = lbVal;
00051 bound->dataTable->set(cn->cacheIndex.row).ALPHA_TIME_STAMP = newPlaneP->timeStamp;
00052
00053 return newPlaneP;
00054 }
00055
00056
00057
00058
00059
00060
00061 double BackupAlphaPlaneMOMDP::getNewAlphaPlane(AlphaPlane& result, BeliefTreeNode& cn)
00062 {
00063 DEBUG_TRACE( cout << "getNewAlphaPlane" << endl; );
00064 if(cn.isFringe())
00065 {
00066 assert(false);
00067 cout << "Code bug" << endl;
00068 exit(EXIT_FAILURE);
00069 }
00070 else
00071 {
00072
00073
00074
00075 double maxActionLB = -DBL_MAX;
00076 int maxAction = 0;
00077
00078
00079 FOR (a, problem->getNumActions())
00080 {
00081 const BeliefTreeQEntry& Qa = cn.Q[a];
00082 double sum = 0.0;
00083
00084 DEBUG_TRACE( cout << "a " << a << endl; );
00085 FOR (Xn, Qa.getNumStateOutcomes())
00086 {
00087 DEBUG_TRACE( cout << "Xn " << Xn << endl; );
00088
00089 const BeliefTreeObsState* QaXn = Qa.stateOutcomes[Xn];
00090
00091 if (NULL != QaXn )
00092 {
00093 FOR (o, QaXn->getNumOutcomes())
00094
00095 {
00096 DEBUG_TRACE( cout << "o " << o << endl; );
00097
00098 BeliefTreeEdge* e = QaXn->outcomes[o];
00099
00100 if (NULL != e)
00101 {
00102
00103 BeliefTreeNode* childNode = e->nextState;
00104
00105
00106
00107
00108
00109
00110
00111 SharedPointer<AlphaPlane> tempAlpha = boundSet->getBestAlphaPlane(*(childNode));
00112 double childLB = inner_prod(*(tempAlpha->alpha), *(childNode->s->bvec));
00113 DEBUG_TRACE( cout << "childLB " << childLB << endl; );
00114
00115
00116 boundSet->set[childNode->cacheIndex.sval]->beliefCache->getRow( childNode->cacheIndex.row)->LB = childLB;
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126 sum += childLB * e->obsProb;
00127 DEBUG_TRACE( cout << "sum " << sum << endl; );
00128
00129 }
00130 else
00131 {
00132
00133 }
00134 }
00135 }
00136 }
00137 sum *= problem->getDiscount();
00138 sum += cn.Q[a].immediateReward;
00139 cn.Q[a].lbVal = sum;
00140 DEBUG_TRACE( cout << "sum " << sum << endl; );
00141
00142 DEBUG_TRACE( cout << "maxActionLB " << maxActionLB << endl; );
00143 if(sum > maxActionLB)
00144 {
00145 DEBUG_TRACE( cout << "maxActionLB TO " << sum << endl; );
00146 maxActionLB = sum;
00147 maxAction = a;
00148 }
00149
00150 }
00151 assert(maxActionLB != -DBL_MAX);
00152
00153
00154 getNewAlphaPlaneQ(result, cn, maxAction);
00155
00156
00157 DEBUG_TRACE(cout << "resulting alpha : " << endl);
00158 DEBUG_TRACE(result.alpha->write(cout) << endl; );
00159 return maxActionLB;
00160 }
00161 }
00162
00163
00164
00165 void BackupAlphaPlaneMOMDP::getNewAlphaPlaneQ(AlphaPlane& result, const BeliefTreeNode& cn, int a)
00166 {
00167 SharedPointer<alpha_vector> betaA (new alpha_vector(problem->getBeliefSize()));
00168 SharedPointer<alpha_vector> betaAXnO;
00169
00170 alpha_vector tmp, tmp1, tmp2;
00171
00172 bool defaultIsSet;
00173 SharedPointer<BeliefWithState> dummy_stval (new BeliefWithState());
00174
00175
00176 state_val Xc = cn.s->sval;
00177 SharedPointer<alpha_vector> defaultBetaAXnO;
00178 const BeliefTreeQEntry& Qa = cn.Q[a];
00179
00180
00181 FOR (Xn, Qa.getNumStateOutcomes())
00182 {
00183 if( !(problem->XTrans->getMatrix(a, Xc)->isColumnEmpty(Xn)))
00184 {
00185 defaultIsSet = false;
00186
00187 const BeliefTreeObsState* QaXn = Qa.stateOutcomes[Xn];
00188 if (NULL != QaXn )
00189 {
00190 FOR (o, QaXn->getNumOutcomes())
00191 {
00192 if( !(problem->obsProb->getMatrix(a, Xn)->isColumnEmpty(o)))
00193 {
00194 BeliefTreeEdge* e = QaXn->outcomes[o];
00195
00196 if (NULL != e)
00197 {
00198 betaAXnO = (boundSet->getBestAlphaPlane(*(e->nextState))->alpha);
00199 }
00200 else
00201 {
00202 if (!defaultIsSet)
00203 {
00204 dummy_stval->sval = Xn;
00205 dummy_stval->bvec = cn.s->bvec;
00206 defaultBetaAXnO = (boundSet->getBestAlphaPlane(*dummy_stval)->alpha);
00207 defaultIsSet = true;
00208 }
00209 betaAXnO = defaultBetaAXnO;
00210 }
00211 emult_column( tmp, *problem->obsProb->getMatrix(a, Xn), o, *betaAXnO );
00212 mult( tmp1, *problem->YTrans->getMatrix(a, Xc, Xn), tmp);
00213 emult_column( tmp2, *problem->XTrans->getMatrix(a, Xc), Xn, tmp1 );
00214 (*betaA) += tmp2;
00215 }
00216 }
00217 }
00218 else
00219 {
00220
00221
00222 for(Observations::iterator oIter = problem->observations->begin(); oIter != problem->observations->end(); oIter ++)
00223 {
00224 int o = oIter.index();
00225 if( !(problem->obsProb->getMatrix(a, Xn)->isColumnEmpty(o)) )
00226 {
00227 if (!defaultIsSet)
00228 {
00229 dummy_stval->sval = Xn;
00230 dummy_stval->bvec = cn.s->bvec;
00231 defaultBetaAXnO = (boundSet->getBestAlphaPlane(*dummy_stval)->alpha);
00232 defaultIsSet = true;
00233 }
00234 betaAXnO = defaultBetaAXnO;
00235
00236 emult_column( tmp, *problem->obsProb->getMatrix(a, Xn), o, *betaAXnO );
00237 mult( tmp1, *problem->YTrans->getMatrix(a, Xc, Xn), tmp);
00238 emult_column( tmp2, *problem->XTrans->getMatrix(a, Xc), Xn, tmp1 );
00239 (*betaA) += tmp2;
00240 }
00241
00242 }
00243
00244 }
00245 }
00246 }
00247 alpha_vector RaXc;
00248
00249
00250 copy_from_column( RaXc, *(problem->rewards->getMatrix(Xc)), a );
00251 (*betaA) *= problem->getDiscount();
00252 (*betaA) += RaXc;
00253
00254 result.copyFrom(betaA, a, Xc);
00255 }
00256