appl: FullObsUBInitializer.cpp Source File

Go to the documentation of this file.
00001 
00008 #include <assert.h>
00009 #include <stdlib.h>
00010 #ifdef _MSC_VER
00011 #else
00012 #include <unistd.h>
00013 #endif
00014 
00015 #include <stdio.h>
00016 #include <math.h>
00017 #include <cfloat>
00018 
00019 #include <algorithm>
00020 #include <iostream>
00021 
00022 #include "FullObsUBInitializer.h"
00023 
00024 
00025 using namespace std;
00026 //using namespace MatrixUtils;
00027 
00028 //#define DEBUGSYL_170908 1
00029 
00030 namespace momdp {
00031 
00032         void FullObsUBInitializer::nextAlphaAction_unfac(DenseVector& result, int a) 
00033         {
00034                 DenseVector R_xa;
00035                 mult(result, *(*(pomdp->pomdpT))[a], alpha); // SYL040909 prevly mult( result, alpha, *(*(pomdp->pomdpTtr))[a] ); The current function call mult(result, matrix, vector) seems faster
00036                 result *= pomdp->discount;
00037                 copy_from_column( R_xa, *(pomdp->pomdpR), a );
00038                 result += R_xa;
00039         }
00040 
00041         double FullObsUBInitializer::valueIterationOneStep_unfac(void) 
00042         {
00043                 DenseVector nextAlpha(pomdp->XStates->size() * pomdp->YStates->size());
00044                 DenseVector naa(pomdp->XStates->size() * pomdp->YStates->size());
00045                 DenseVector tmp;
00046                 double maxResidual;
00047 
00048                 nextAlphaAction_unfac(nextAlpha,0);
00049 
00050                 FOR (a, pomdp->actions->size()) 
00051                 {
00052                         nextAlphaAction_unfac(naa,a);
00053 
00054                         FOR (s, pomdp->XStates->size() * pomdp->YStates->size()) 
00055                         {
00056                                 if (naa(s) > nextAlpha(s))
00057                                 {
00058                                         nextAlpha(s) = naa(s);
00059                                 }
00060                         }
00061                 }
00062 
00063                 tmp.resize( alpha.size() );
00064                 tmp = alpha;
00065                 tmp -= nextAlpha;
00066 
00067                 maxResidual = tmp.norm_inf();
00068                 alpha = nextAlpha;
00069 
00070                 return maxResidual;
00071         }
00072 
00073         void FullObsUBInitializer::valueIteration_unfac( SharedPointer<MOMDP> _pomdp, double eps) 
00074         {
00075                 pomdp = _pomdp;
00076 
00077 
00078                 alpha.resize(pomdp->XStates->size() * pomdp->YStates->size());
00079                 //alpha.resize(pomdp->getBeliefSize());
00080                 set_to_zero(alpha);
00081 
00082 
00083                 double residual;
00084                 FOR (i, MDP_MAX_ITERS) 
00085                 {
00086                         residual = valueIterationOneStep_unfac();
00087                         
00088                         if (residual < eps) 
00089                         {
00090                                 return;
00091                         }
00092                 }
00093                 cout << endl
00094                         << "failed to reach desired eps of " << eps << " after "
00095                         << MDP_MAX_ITERS << " iterations" << endl;
00096                 cout << "residual = " << residual << endl;
00097         }
00098 
00099 
00100         void FullObsUBInitializer::QNextAlphaAction_unfac(DenseVector& result, int a) 
00101         {
00102                 DenseVector R_xa;
00103                 mult( result, actionAlphas[a], *(*(pomdp->pomdpTtr))[a] );
00104                 result *= pomdp->discount;
00105                 copy_from_column( R_xa, *(pomdp->pomdpR), a );
00106                 result += R_xa;
00107         }
00108 
00109         double FullObsUBInitializer::QValueIterationOneStep_unfac(void) 
00110         {
00111                 DenseVector nextAlpha(pomdp->XStates->size() * pomdp->YStates->size());
00112                 DenseVector naa(pomdp->XStates->size() * pomdp->YStates->size());
00113                 DenseVector tmp;
00114                 double maxResidual = -DBL_MAX;
00115 
00116                 FOR (a, pomdp->actions->size()) 
00117                 {
00118                         DenseVector nextAlpha(actionAlphas[a].size());
00119                         DenseVector tmp;
00120                         tmp = actionAlphas[a];
00121                         QNextAlphaAction_unfac(nextAlpha,a);
00122                         tmp -= nextAlpha;
00123                         double residual = tmp.norm_inf();
00124                         if(residual > maxResidual)
00125                         {
00126                                 maxResidual = residual;
00127                         }
00128                         actionAlphas[a] = nextAlpha;
00129                 }
00130 
00131                 return maxResidual;
00132         }
00133 
00134         void FullObsUBInitializer::QMDPSolution_unfac( SharedPointer<MOMDP> _pomdp, double eps) 
00135         {
00136                 pomdp = _pomdp;
00137                 actionAlphas.resize(pomdp->actions->size());
00138 
00139                 valueIteration_unfac(pomdp, eps); // this will put the result of MDP value iteration in alpha.
00140 
00141 
00142                 FOR(a, pomdp->actions->size())
00143                 {
00144                         actionAlphas[a].resize(pomdp->XStates->size() * pomdp->YStates->size());
00145                         nextAlphaAction_unfac(actionAlphas[a],a); // onestep backup on alpha with action "a"
00146 
00147                 }
00148 
00149         }
00150 
00151 
00152         void FullObsUBInitializer::QValueIteration_unfac( SharedPointer<MOMDP> _pomdp, double eps) 
00153         {
00154                 pomdp = _pomdp;
00155                 actionAlphas.resize(pomdp->actions->size());
00156                 FOR(a, pomdp->actions->size())
00157                 {
00158                         actionAlphas[a].resize(pomdp->XStates->size() * pomdp->YStates->size());
00159                         set_to_zero(actionAlphas[a]);
00160                 }
00161 
00162                 double residual;
00163                 FOR (i, MDP_MAX_ITERS) 
00164                 {
00165                         residual = QValueIterationOneStep_unfac();
00166                         if (residual < eps) 
00167                         {
00168                                 return;
00169                         }
00170                 }
00171                 cout << endl
00172                         << "failed to reach desired eps of " << eps << " after "
00173                         << MDP_MAX_ITERS << " iterations" << endl;
00174                 cout << "residual = " << residual << endl;
00175         }
00176 
00177 
00178         void FullObsUBInitializer::nextAlphaAction(std::vector<DenseVector>& resultByState, int a) 
00179         {
00180 
00181                 DenseVector resultThisState(pomdp->YStates->size()), resultSum(pomdp->YStates->size());
00182                 DenseVector tmp(pomdp->YStates->size());
00183                 DenseVector R_xa;
00184 
00185                 FOR (state_idx, pomdp->XStates->size()) 
00186                 {
00187                         set_to_zero(resultSum); 
00188                         // total expected next value
00189                         // only iterate over possible X states
00190                         const vector<int>& possibleXns = pomdp->XTrans->getMatrix(a, state_idx)->nonEmptyColumns();
00191                         FOREACH (int, XnIt, possibleXns)
00192                         {
00193                             int Xn = *XnIt;
00194                             // expected next value for Xn
00195                             mult( tmp, *pomdp->YTrans->getMatrix(a, state_idx, Xn), alphaByState[Xn]);
00196                             // SYL260809 prevly is as below. The current function called, mult(result, matrix, vector), seems faster
00197                             //mult( tmp, alphaByState[Xn], *pomdp->XYTrans->matrixTr[a][state_idx] );
00198                             emult_column( resultThisState, *pomdp->XTrans->getMatrix(a, state_idx), Xn, tmp );
00199                             resultSum += resultThisState;
00200                         }
00201 
00202                         resultSum *= pomdp->discount;
00203                         copy_from_column( R_xa, *pomdp->rewards->getMatrix(state_idx), a );
00204                         resultSum += R_xa;
00205                         resultByState[state_idx] = resultSum;
00206                 } 
00207         }
00208 
00209 
00210         double FullObsUBInitializer::valueIterationOneStep(void) 
00211         {
00212                 //DenseVector nextAlpha(pomdp->getBeliefSize()), naa(pomdp->getBeliefSize());
00213                 DenseVector tmp; //, tmpzero(pomdp->YStates->size());
00214                 double maxResidual; // = 0;
00215 
00216                 std::vector<DenseVector> nextAlpha(pomdp->XStates->size()), naa(pomdp->XStates->size());
00217 
00218                 nextAlphaAction(nextAlpha,0);
00219                 //FOR (a, pomdp->actions->size()) {
00220                 for (unsigned int a=1; a < pomdp->actions->size(); a++) 
00221                 {
00222                         nextAlphaAction(naa,a);         
00223                         FOR (state_idx, pomdp->XStates->size()) 
00224                         {
00225                                 FOR (s, pomdp->getBeliefSize()) 
00226                                 {
00227                                         if (naa[state_idx](s) > nextAlpha[state_idx](s)) 
00228                                         {
00229                                                 nextAlpha[state_idx](s) = naa[state_idx](s);
00230                                         }
00231                                 }
00232                         }
00233                 }
00234 
00235                 FOR (state_idx, pomdp->XStates->size()) 
00236                 {
00237                         //tmp.resize( alphaByState[state_idx].size() );
00238                         tmp = alphaByState[state_idx];
00239                         tmp -= nextAlpha[state_idx];
00240 
00241                         if (state_idx == 0)
00242                                 maxResidual = tmp.norm_inf();
00243                         else {
00244                                 if (tmp.norm_inf() >  maxResidual)
00245                                         maxResidual =tmp.norm_inf();
00246                         }
00247 
00248                 }
00249 
00250                 alphaByState = nextAlpha;
00251 
00252                 return maxResidual;
00253         }
00254 
00255         void FullObsUBInitializer::valueIteration(SharedPointer<MOMDP> _pomdp, double eps) 
00256         {
00257                 pomdp = _pomdp;
00258 
00259                 alphaByState.resize(pomdp->XStates->size());
00260 
00261 
00262                 FOR (state_idx, pomdp->XStates->size()) 
00263                 {
00264                         //cout << "pomdp->getBeliefSize() " << pomdp->getBeliefSize() << endl;
00265                         alphaByState[state_idx].resize(pomdp->getBeliefSize());
00266                         //set_to_zero(alphaByState[state_idx]);
00267                 }
00268 
00269                 double residual;
00270                 FOR (i, MDP_MAX_ITERS) 
00271                 {
00272                         residual = valueIterationOneStep();
00273                         if (residual < eps) 
00274                         {
00275                                 return;
00276                         }
00277                 }
00278                 cout << endl
00279                         << "failed to reach desired eps of " << eps << " after "
00280                         << MDP_MAX_ITERS << " iterations" << endl;
00281                 cout << "residual = " << residual << endl;
00282         }
00283         void FullObsUBInitializer::QNextAlphaAction(std::vector<DenseVector>& resultByState, int a) 
00284         {
00285 
00286                 DenseVector resultThisState(pomdp->YStates->size()), resultSum(pomdp->YStates->size());
00287                 DenseVector tmp(pomdp->YStates->size());
00288                 DenseVector R_xa;
00289 
00290                 FOR (state_idx, pomdp->XStates->size()) 
00291                 {
00292                         set_to_zero(resultSum); 
00293                         // total expected next value
00294                         // only iterate over possible X states
00295                         const vector<int>& possibleXns = pomdp->XTrans->getMatrix(a, state_idx)->nonEmptyColumns();
00296                         FOREACH (int, XnIt, possibleXns)
00297                         {
00298                             int Xn = *XnIt;
00299                             // expected next value for Xn
00300                             mult( tmp, actionAlphaByState[a][Xn], *pomdp->YTrans->getMatrixTr(a, state_idx, Xn) );
00301                             emult_column( resultThisState, *pomdp->XTrans->getMatrix(a, state_idx), Xn, tmp );
00302                             resultSum += resultThisState;
00303 
00304                         }
00305 
00306                         resultSum *= pomdp->discount;
00307                         copy_from_column( R_xa, *pomdp->rewards->getMatrix(state_idx), a );
00308                         resultSum += R_xa;
00309                         resultByState[state_idx] = resultSum;
00310                 } 
00311         }
00312 
00313         double FullObsUBInitializer::QValueIterationOneStep(void) 
00314         {
00315                 //DenseVector nextAlpha(pomdp->getBeliefSize()), naa(pomdp->getBeliefSize());
00316                 DenseVector tmp; //, tmpzero(pomdp->YStates->size());
00317                 double maxResidual = - DBL_MAX;
00318 
00319                 FOR (a, pomdp->actions->size())
00320                 {
00321                         vector<DenseVector> nextAlpha(pomdp->XStates->size());
00322                         QNextAlphaAction(nextAlpha,a);
00323 
00324 
00325                         FOR (state_idx, pomdp->XStates->size()) 
00326                         {
00327                                 tmp = actionAlphaByState[a][state_idx];
00328                                 tmp -= nextAlpha[state_idx];
00329 
00330                                 if (tmp.norm_inf() >  maxResidual)
00331                                 {
00332                                         maxResidual =tmp.norm_inf();
00333                                 }
00334 
00335                                 // 16092008 changed, prevly was probably too stringent
00336                                 //maxResidual += norm_inf(tmp);         
00337                         }
00338 
00339                         actionAlphaByState[a] = nextAlpha;
00340                 }
00341                 return maxResidual;
00342         }
00343 
00344         void FullObsUBInitializer::QMDPSolution(SharedPointer<MOMDP> _pomdp, double eps) 
00345         {
00346                 pomdp = _pomdp;
00347 
00348                 actionAlphaByState.resize(pomdp->actions->size());
00349 
00350                 valueIteration(pomdp, eps); // the MDP solution is now in alphaByState
00351 
00352                 FOR (a, pomdp->actions->size()) {
00353                         actionAlphaByState[a].resize(pomdp->XStates->size());
00354                         nextAlphaAction(actionAlphaByState[a], a);  // nextAlphaAction() does onestep backup on alphaByState, with action "a".
00355                 }
00356         }
00357 
00358         void FullObsUBInitializer::QValueIteration(SharedPointer<MOMDP> _pomdp, double eps) 
00359         {
00360                 pomdp = _pomdp;
00361 
00362                 actionAlphaByState.resize(pomdp->actions->size());
00363                 FOR(a, pomdp->actions->size())
00364                 {
00365                         actionAlphaByState[a].resize(pomdp->XStates->size());
00366                         FOR (state_idx, pomdp->XStates->size()) 
00367                         {
00368                                 actionAlphaByState[a][state_idx].resize(pomdp->getBeliefSize());
00369                                 set_to_zero(actionAlphaByState[a][state_idx]);
00370                         }
00371 
00372                 } 
00373 
00374                 double residual;
00375                 FOR (i, MDP_MAX_ITERS) 
00376                 {
00377                         residual = QValueIterationOneStep();
00378                         if (residual < eps) 
00379                         {
00380                                 return;
00381                         }
00382                 }
00383                 cout << endl
00384                         << "failed to reach desired eps of " << eps << " after "
00385                         << MDP_MAX_ITERS << " iterations" << endl;
00386                 cout << "residual = " << residual << endl; 
00387         }
00388 
00389         void FullObsUBInitializer::FacPostProcessing(vector<alpha_vector>& alphasByState)
00390         {
00391                 FOR (state_idx, pomdp->XStates->size()) 
00392                 {
00393                         FOR (i, pomdp->YStates->size()) 
00394                         {
00395                                 if (pomdp->isPOMDPTerminalState[state_idx][i]) 
00396                                 {
00397                                         alphasByState[state_idx](i) = 0.0;
00398                                 }
00399                         }
00400                 }
00401         }
00402         void FullObsUBInitializer::UnfacPostProcessing(DenseVector& calpha, vector<alpha_vector>& alphasByState)
00403         {
00404                 // post-process: make sure the value for all terminal states
00405                 // is exactly 0, since that is how the ubVal field of terminal
00406                 // nodes is initialized.
00407                 int numXState = pomdp->XStates->size();
00408                 int numYState = pomdp->YStates->size();
00409                 FOR (i,numXState *numYState)
00410                 {
00411 
00412                         // convert i to x and y values
00413                         unsigned int x = (unsigned int) i/numYState;
00414                         unsigned int y = i % numYState;
00415 
00416                         if (pomdp->isPOMDPTerminalState[x][y]) 
00417                         {
00418                                 calpha(i) = 0.0;
00419                         }
00420                 }
00421                 // write dalpha entries into dalphas[state_idx] entries
00422                 FOR (x, numXState)
00423                 {
00424                         alphasByState[x].resize(numYState);
00425                 }
00426                 FOR (s, numXState * pomdp->YStates->size()) 
00427                 {
00428                         // convert i to x and y values
00429                         unsigned int x = (unsigned int) s/numYState;
00430                         unsigned int y = s % numYState;
00431                         alphasByState[x](y) = calpha(s);
00432                 }
00433 
00434         }
00435 
00436 
00437 }; // namespace zmdp
00438