00001
00008 #include <assert.h>
00009 #include <stdlib.h>
00010 #ifdef _MSC_VER
00011 #else
00012 #include <unistd.h>
00013 #endif
00014
00015 #include <stdio.h>
00016 #include <math.h>
00017 #include <cfloat>
00018
00019 #include <algorithm>
00020 #include <iostream>
00021
00022 #include "FullObsUBInitializer.h"
00023
00024
00025 using namespace std;
00026
00027
00028
00029
00030 namespace momdp {
00031
00032 void FullObsUBInitializer::nextAlphaAction_unfac(DenseVector& result, int a)
00033 {
00034 DenseVector R_xa;
00035 mult(result, *(*(pomdp->pomdpT))[a], alpha);
00036 result *= pomdp->discount;
00037 copy_from_column( R_xa, *(pomdp->pomdpR), a );
00038 result += R_xa;
00039 }
00040
00041 double FullObsUBInitializer::valueIterationOneStep_unfac(void)
00042 {
00043 DenseVector nextAlpha(pomdp->XStates->size() * pomdp->YStates->size());
00044 DenseVector naa(pomdp->XStates->size() * pomdp->YStates->size());
00045 DenseVector tmp;
00046 double maxResidual;
00047
00048 nextAlphaAction_unfac(nextAlpha,0);
00049
00050 FOR (a, pomdp->actions->size())
00051 {
00052 nextAlphaAction_unfac(naa,a);
00053
00054 FOR (s, pomdp->XStates->size() * pomdp->YStates->size())
00055 {
00056 if (naa(s) > nextAlpha(s))
00057 {
00058 nextAlpha(s) = naa(s);
00059 }
00060 }
00061 }
00062
00063 tmp.resize( alpha.size() );
00064 tmp = alpha;
00065 tmp -= nextAlpha;
00066
00067 maxResidual = tmp.norm_inf();
00068 alpha = nextAlpha;
00069
00070 return maxResidual;
00071 }
00072
00073 void FullObsUBInitializer::valueIteration_unfac( SharedPointer<MOMDP> _pomdp, double eps)
00074 {
00075 pomdp = _pomdp;
00076
00077
00078 alpha.resize(pomdp->XStates->size() * pomdp->YStates->size());
00079
00080 set_to_zero(alpha);
00081
00082
00083 double residual;
00084 FOR (i, MDP_MAX_ITERS)
00085 {
00086 residual = valueIterationOneStep_unfac();
00087
00088 if (residual < eps)
00089 {
00090 return;
00091 }
00092 }
00093 cout << endl
00094 << "failed to reach desired eps of " << eps << " after "
00095 << MDP_MAX_ITERS << " iterations" << endl;
00096 cout << "residual = " << residual << endl;
00097 }
00098
00099
00100 void FullObsUBInitializer::QNextAlphaAction_unfac(DenseVector& result, int a)
00101 {
00102 DenseVector R_xa;
00103 mult( result, actionAlphas[a], *(*(pomdp->pomdpTtr))[a] );
00104 result *= pomdp->discount;
00105 copy_from_column( R_xa, *(pomdp->pomdpR), a );
00106 result += R_xa;
00107 }
00108
00109 double FullObsUBInitializer::QValueIterationOneStep_unfac(void)
00110 {
00111 DenseVector nextAlpha(pomdp->XStates->size() * pomdp->YStates->size());
00112 DenseVector naa(pomdp->XStates->size() * pomdp->YStates->size());
00113 DenseVector tmp;
00114 double maxResidual = -DBL_MAX;
00115
00116 FOR (a, pomdp->actions->size())
00117 {
00118 DenseVector nextAlpha(actionAlphas[a].size());
00119 DenseVector tmp;
00120 tmp = actionAlphas[a];
00121 QNextAlphaAction_unfac(nextAlpha,a);
00122 tmp -= nextAlpha;
00123 double residual = tmp.norm_inf();
00124 if(residual > maxResidual)
00125 {
00126 maxResidual = residual;
00127 }
00128 actionAlphas[a] = nextAlpha;
00129 }
00130
00131 return maxResidual;
00132 }
00133
00134 void FullObsUBInitializer::QMDPSolution_unfac( SharedPointer<MOMDP> _pomdp, double eps)
00135 {
00136 pomdp = _pomdp;
00137 actionAlphas.resize(pomdp->actions->size());
00138
00139 valueIteration_unfac(pomdp, eps);
00140
00141
00142 FOR(a, pomdp->actions->size())
00143 {
00144 actionAlphas[a].resize(pomdp->XStates->size() * pomdp->YStates->size());
00145 nextAlphaAction_unfac(actionAlphas[a],a);
00146
00147 }
00148
00149 }
00150
00151
00152 void FullObsUBInitializer::QValueIteration_unfac( SharedPointer<MOMDP> _pomdp, double eps)
00153 {
00154 pomdp = _pomdp;
00155 actionAlphas.resize(pomdp->actions->size());
00156 FOR(a, pomdp->actions->size())
00157 {
00158 actionAlphas[a].resize(pomdp->XStates->size() * pomdp->YStates->size());
00159 set_to_zero(actionAlphas[a]);
00160 }
00161
00162 double residual;
00163 FOR (i, MDP_MAX_ITERS)
00164 {
00165 residual = QValueIterationOneStep_unfac();
00166 if (residual < eps)
00167 {
00168 return;
00169 }
00170 }
00171 cout << endl
00172 << "failed to reach desired eps of " << eps << " after "
00173 << MDP_MAX_ITERS << " iterations" << endl;
00174 cout << "residual = " << residual << endl;
00175 }
00176
00177
00178 void FullObsUBInitializer::nextAlphaAction(std::vector<DenseVector>& resultByState, int a)
00179 {
00180
00181 DenseVector resultThisState(pomdp->YStates->size()), resultSum(pomdp->YStates->size());
00182 DenseVector tmp(pomdp->YStates->size());
00183 DenseVector R_xa;
00184
00185 FOR (state_idx, pomdp->XStates->size())
00186 {
00187 set_to_zero(resultSum);
00188
00189
00190 const vector<int>& possibleXns = pomdp->XTrans->getMatrix(a, state_idx)->nonEmptyColumns();
00191 FOREACH (int, XnIt, possibleXns)
00192 {
00193 int Xn = *XnIt;
00194
00195 mult( tmp, *pomdp->YTrans->getMatrix(a, state_idx, Xn), alphaByState[Xn]);
00196
00197
00198 emult_column( resultThisState, *pomdp->XTrans->getMatrix(a, state_idx), Xn, tmp );
00199 resultSum += resultThisState;
00200 }
00201
00202 resultSum *= pomdp->discount;
00203 copy_from_column( R_xa, *pomdp->rewards->getMatrix(state_idx), a );
00204 resultSum += R_xa;
00205 resultByState[state_idx] = resultSum;
00206 }
00207 }
00208
00209
00210 double FullObsUBInitializer::valueIterationOneStep(void)
00211 {
00212
00213 DenseVector tmp;
00214 double maxResidual;
00215
00216 std::vector<DenseVector> nextAlpha(pomdp->XStates->size()), naa(pomdp->XStates->size());
00217
00218 nextAlphaAction(nextAlpha,0);
00219
00220 for (unsigned int a=1; a < pomdp->actions->size(); a++)
00221 {
00222 nextAlphaAction(naa,a);
00223 FOR (state_idx, pomdp->XStates->size())
00224 {
00225 FOR (s, pomdp->getBeliefSize())
00226 {
00227 if (naa[state_idx](s) > nextAlpha[state_idx](s))
00228 {
00229 nextAlpha[state_idx](s) = naa[state_idx](s);
00230 }
00231 }
00232 }
00233 }
00234
00235 FOR (state_idx, pomdp->XStates->size())
00236 {
00237
00238 tmp = alphaByState[state_idx];
00239 tmp -= nextAlpha[state_idx];
00240
00241 if (state_idx == 0)
00242 maxResidual = tmp.norm_inf();
00243 else {
00244 if (tmp.norm_inf() > maxResidual)
00245 maxResidual =tmp.norm_inf();
00246 }
00247
00248 }
00249
00250 alphaByState = nextAlpha;
00251
00252 return maxResidual;
00253 }
00254
00255 void FullObsUBInitializer::valueIteration(SharedPointer<MOMDP> _pomdp, double eps)
00256 {
00257 pomdp = _pomdp;
00258
00259 alphaByState.resize(pomdp->XStates->size());
00260
00261
00262 FOR (state_idx, pomdp->XStates->size())
00263 {
00264
00265 alphaByState[state_idx].resize(pomdp->getBeliefSize());
00266
00267 }
00268
00269 double residual;
00270 FOR (i, MDP_MAX_ITERS)
00271 {
00272 residual = valueIterationOneStep();
00273 if (residual < eps)
00274 {
00275 return;
00276 }
00277 }
00278 cout << endl
00279 << "failed to reach desired eps of " << eps << " after "
00280 << MDP_MAX_ITERS << " iterations" << endl;
00281 cout << "residual = " << residual << endl;
00282 }
00283 void FullObsUBInitializer::QNextAlphaAction(std::vector<DenseVector>& resultByState, int a)
00284 {
00285
00286 DenseVector resultThisState(pomdp->YStates->size()), resultSum(pomdp->YStates->size());
00287 DenseVector tmp(pomdp->YStates->size());
00288 DenseVector R_xa;
00289
00290 FOR (state_idx, pomdp->XStates->size())
00291 {
00292 set_to_zero(resultSum);
00293
00294
00295 const vector<int>& possibleXns = pomdp->XTrans->getMatrix(a, state_idx)->nonEmptyColumns();
00296 FOREACH (int, XnIt, possibleXns)
00297 {
00298 int Xn = *XnIt;
00299
00300 mult( tmp, actionAlphaByState[a][Xn], *pomdp->YTrans->getMatrixTr(a, state_idx, Xn) );
00301 emult_column( resultThisState, *pomdp->XTrans->getMatrix(a, state_idx), Xn, tmp );
00302 resultSum += resultThisState;
00303
00304 }
00305
00306 resultSum *= pomdp->discount;
00307 copy_from_column( R_xa, *pomdp->rewards->getMatrix(state_idx), a );
00308 resultSum += R_xa;
00309 resultByState[state_idx] = resultSum;
00310 }
00311 }
00312
00313 double FullObsUBInitializer::QValueIterationOneStep(void)
00314 {
00315
00316 DenseVector tmp;
00317 double maxResidual = - DBL_MAX;
00318
00319 FOR (a, pomdp->actions->size())
00320 {
00321 vector<DenseVector> nextAlpha(pomdp->XStates->size());
00322 QNextAlphaAction(nextAlpha,a);
00323
00324
00325 FOR (state_idx, pomdp->XStates->size())
00326 {
00327 tmp = actionAlphaByState[a][state_idx];
00328 tmp -= nextAlpha[state_idx];
00329
00330 if (tmp.norm_inf() > maxResidual)
00331 {
00332 maxResidual =tmp.norm_inf();
00333 }
00334
00335
00336
00337 }
00338
00339 actionAlphaByState[a] = nextAlpha;
00340 }
00341 return maxResidual;
00342 }
00343
00344 void FullObsUBInitializer::QMDPSolution(SharedPointer<MOMDP> _pomdp, double eps)
00345 {
00346 pomdp = _pomdp;
00347
00348 actionAlphaByState.resize(pomdp->actions->size());
00349
00350 valueIteration(pomdp, eps);
00351
00352 FOR (a, pomdp->actions->size()) {
00353 actionAlphaByState[a].resize(pomdp->XStates->size());
00354 nextAlphaAction(actionAlphaByState[a], a);
00355 }
00356 }
00357
00358 void FullObsUBInitializer::QValueIteration(SharedPointer<MOMDP> _pomdp, double eps)
00359 {
00360 pomdp = _pomdp;
00361
00362 actionAlphaByState.resize(pomdp->actions->size());
00363 FOR(a, pomdp->actions->size())
00364 {
00365 actionAlphaByState[a].resize(pomdp->XStates->size());
00366 FOR (state_idx, pomdp->XStates->size())
00367 {
00368 actionAlphaByState[a][state_idx].resize(pomdp->getBeliefSize());
00369 set_to_zero(actionAlphaByState[a][state_idx]);
00370 }
00371
00372 }
00373
00374 double residual;
00375 FOR (i, MDP_MAX_ITERS)
00376 {
00377 residual = QValueIterationOneStep();
00378 if (residual < eps)
00379 {
00380 return;
00381 }
00382 }
00383 cout << endl
00384 << "failed to reach desired eps of " << eps << " after "
00385 << MDP_MAX_ITERS << " iterations" << endl;
00386 cout << "residual = " << residual << endl;
00387 }
00388
00389 void FullObsUBInitializer::FacPostProcessing(vector<alpha_vector>& alphasByState)
00390 {
00391 FOR (state_idx, pomdp->XStates->size())
00392 {
00393 FOR (i, pomdp->YStates->size())
00394 {
00395 if (pomdp->isPOMDPTerminalState[state_idx][i])
00396 {
00397 alphasByState[state_idx](i) = 0.0;
00398 }
00399 }
00400 }
00401 }
00402 void FullObsUBInitializer::UnfacPostProcessing(DenseVector& calpha, vector<alpha_vector>& alphasByState)
00403 {
00404
00405
00406
00407 int numXState = pomdp->XStates->size();
00408 int numYState = pomdp->YStates->size();
00409 FOR (i,numXState *numYState)
00410 {
00411
00412
00413 unsigned int x = (unsigned int) i/numYState;
00414 unsigned int y = i % numYState;
00415
00416 if (pomdp->isPOMDPTerminalState[x][y])
00417 {
00418 calpha(i) = 0.0;
00419 }
00420 }
00421
00422 FOR (x, numXState)
00423 {
00424 alphasByState[x].resize(numYState);
00425 }
00426 FOR (s, numXState * pomdp->YStates->size())
00427 {
00428
00429 unsigned int x = (unsigned int) s/numYState;
00430 unsigned int y = s % numYState;
00431 alphasByState[x](y) = calpha(s);
00432 }
00433
00434 }
00435
00436
00437 };
00438