appl: FastInfUBInitializer.cpp Source File

Go to the documentation of this file.
00001 
00008 #include <stdlib.h>
00009 #ifdef _MSC_VER
00010 #else
00011 #include <unistd.h>
00012 #endif
00013 
00014 #include <stdio.h>
00015 #include <assert.h>
00016 
00017 #include <iostream>
00018 #include <fstream>
00019 
00020 #include "MOMDP.h"
00021 #include "FastInfUBInitializer.h"
00022 #include "FullObsUBInitializer.h"
00023 #include "BeliefValuePairPoolSet.h"
00024 
00025 using namespace std;
00026 //using namespace MatrixUtils;
00027 
00028 //#define DEBUGSYL_290908 1
00029 //#define DEBUGSYL_160908a 1
00030 
00031 namespace momdp {
00032 
00033         FastInfUBInitializer::FastInfUBInitializer( SharedPointer<MOMDP> problem, BeliefValuePairPoolSet* _bound)
00034         {
00035                 pomdp = ( SharedPointer<MOMDP>) problem;
00036                 bound = _bound;
00037 
00038                 // pomdp->XStates->size()
00039                 //alphasByState.resize(pomdp->XStates->size());   // SYL260809 commented out
00040                 //upperBoundBVpair = _upperBoundBVpair;
00041 
00042         }
00043 
00044         FastInfUBInitializer::FastInfUBInitializer(SharedPointer<MOMDP> problem)
00045         {
00046                 pomdp = ( SharedPointer<MOMDP>) problem;
00047 
00048                 // pomdp->XStates->size()
00049                 //alphasByState.resize(pomdp->XStates->size());   // SYL260809 commented out
00050                 //upperBoundBVpair = _upperBoundBVpair;
00051 
00052         }
00053 
00054         void FastInfUBInitializer::initialize(double targetPrecision)
00055         {
00056                 if(pomdp->XStates->size() != 1 && pomdp->hasPOMDPMatrices())
00057                 {
00058                         // only does this if convert fast is called to produce pomdp version of the matrices
00059                         initFIB_unfac(targetPrecision, false); // need pomdp matrix
00060                 }
00061                 else
00062                 {
00063                         initFIB(targetPrecision, false);
00064                 }
00065 
00066                 //if (pomdp->XStates->size() == 1)
00067                 //      initFIB(targetPrecision);
00068                 //else
00069                 //      initFIB_unfac(targetPrecision);
00070         }
00071 
00072         void FastInfUBInitializer::getFIBsolution(double targetPrecision)
00073         {
00074                 if(pomdp->XStates->size() != 1 && pomdp->hasPOMDPMatrices())
00075                 {
00076                         // only does this if convert fast is called to produce pomdp version of the matrices
00077                         initFIB_unfac(targetPrecision, true); // need pomdp matrix
00078                 }
00079                 else
00080                 {
00081                         initFIB(targetPrecision, true);
00082                 }
00083 
00084                 //if (pomdp->XStates->size() == 1)
00085                 //      initFIB(targetPrecision);
00086                 //else
00087                 //      initFIB_unfac(targetPrecision);
00088         }
00089 
00090 
00091         void FastInfUBInitializer::initMDP_unfac(double targetPrecision)
00092         {
00093                 // set alpha to be the mdp upper bound
00094                 FullObsUBInitializer m;
00095                 m.valueIteration_unfac(pomdp, targetPrecision);
00096                 DenseVector calpha;
00097                 copy(calpha, m.alpha);
00098 
00099                 alphas.clear();
00100                 alphas.push_back(calpha);
00101 
00102                 // **** ADDED PRINTOUTS TO SCREEN HERE ******** 6 APRIL 2009 *******
00103                 /*  cout << pomdp->YStates->size() << endl;
00104                 FOR (i, pomdp->YStates->size()) {
00105                 cout << calpha(i) << " ";
00106                 }
00107                 cout << endl;
00108                 */
00109 
00110         }
00111 
00112         void FastInfUBInitializer::initFIB_unfac(double targetPrecision, bool getFIBvectors)
00113         {
00114                 // calculates the fast informed bound (Hauskrecht, JAIR 2000)
00115                 std::vector< DenseVector > al(pomdp->actions->size());
00116                 std::vector< DenseVector > nextAl(pomdp->actions->size());
00117                 DenseVector tmp, beta_aoi, beta_ao, diff;
00118                 double maxResidual;
00119                 alpha_vector backup;
00120 
00121                 initMDP_unfac(MDP_RESIDUAL);
00122 
00123                 // TODO:: bound->elapsed = bound->heurTimer.elapsed();
00124                 // TODO:: printf("%.2fs initMDP(MDP_RESIDUAL) done\n", bound->elapsed);
00125 
00126                 // initialize al array with weak MDP upper bound
00127                 FullObsUBInitializer m;
00128                 m.pomdp = pomdp;
00129                 alpha_vector& alpha = alphas[0];
00130                 copy(m.alpha, alpha);
00131 
00132                 FOR (a, pomdp->actions->size()) 
00133                 {
00134                         //al[a].resize(pomdp->YStates->size());
00135                         //nextAl[a].resize(pomdp->YStates->size());
00136                         al[a].resize(pomdp->XStates->size() * pomdp->YStates->size());
00137                         nextAl[a].resize(pomdp->XStates->size() * pomdp->YStates->size());
00138                         m.nextAlphaAction_unfac(al[a], a);
00139                 }
00140 
00141                 // iterate FIB update rule to approximate convergence
00142                 do {
00143                         FOR (a, pomdp->actions->size()) {
00144                                 DenseVector& beta_a = nextAl[a];
00145 
00146                                 set_to_zero( beta_a );
00147                                 
00148 
00149                                 FOR (o, pomdp->observations->size()) {
00150                                         FOR (i, pomdp->actions->size()) {
00151                                                 emult_column( tmp, *(*(pomdp->pomdpO))[a], o, al[i] );
00152                                                 //        emult_column( tmp, pomdp->pomdpO[a], o, al[i] );
00153                                                 mult( beta_aoi, tmp, *(*(pomdp->pomdpTtr))[a] );
00154                                                 //        mult( beta_aoi, tmp, pomdp->pomdpTtr[a] );
00155                                                 if (0 == i) {
00156                                                         beta_ao = beta_aoi;
00157                                                 } else {
00158                                                         max_assign( beta_ao, beta_aoi );
00159                                                 }
00160                                         }
00161                                         beta_a += beta_ao;
00162                                 }
00163 
00164                                 beta_a *= pomdp->discount;
00165                                 copy_from_column( tmp, (*(pomdp->pomdpR)), a );
00166                                 //     copy_from_column( tmp, pomdp->pomdpR, a );
00167                                 beta_a += tmp;
00168                         }
00169 
00170                         maxResidual = 0;
00171                         FOR (a, pomdp->actions->size()) {
00172                                 diff = nextAl[a];
00173                                 diff -= al[a];
00174                                 maxResidual = std::max( maxResidual, diff.norm_inf() );
00175 
00176                                 al[a] = nextAl[a];
00177                         }
00178 
00179                 } while ( maxResidual > targetPrecision );
00180 
00181                 if (!getFIBvectors)
00182                 {       DenseVector dalpha;
00183                         FOR (a, pomdp->actions->size()) 
00184                         {
00185                                 if (0 == a) 
00186                                 {
00187                                         dalpha = al[a];
00188                                 } 
00189                                 else 
00190                                 {
00191                                         max_assign(dalpha, al[a]);
00192                                 }
00193                         }
00194         
00195                         // post-process: make sure the value for all terminal states
00196                         // is exactly 0, since that is how the ubVal field of terminal
00197                         // nodes is initialized.
00198                         FOR (i, pomdp->XStates->size()*pomdp->YStates->size()) {
00199         
00200                                 // convert i to x and y values
00201                                 unsigned int x = (unsigned int) i/(pomdp->YStates->size());
00202                                 unsigned int y = i % (pomdp->YStates->size());
00203         
00204                                 if (pomdp->isPOMDPTerminalState[x][y]) {
00205         
00206                                         dalpha(i) = 0.0;
00207                                 }
00208                         }
00209         
00210                         // **** ADDED PRINTOUTS TO SCREEN HERE ******** 6 APRIL 2009 *******
00211                         /*  cout << pomdp->YStates->size() << endl;
00212                         FOR (i, pomdp->YStates->size()) {
00213                         cout << dalpha(i) << " ";
00214                         }
00215                         cout << endl;
00216                         */
00217                         // write out result
00218                         //  bound->points.clear();
00219                         //  copy(upperBoundBVpair->cornerPoints, dalpha);
00220         
00221                         // write dalpha entries into dalphas[state_idx] entries
00222                         std::vector<DenseVector> dalphas(pomdp->XStates->size());
00223         
00224                         FOR (s, pomdp->XStates->size() * pomdp->YStates->size()) {
00225                                 // convert i to x and y values
00226                                 unsigned int x = (unsigned int) s/(pomdp->YStates->size());
00227                                 unsigned int y = s % (pomdp->YStates->size());
00228         
00229                                 if (y==0) dalphas[x].resize(pomdp->YStates->size());
00230         
00231                                 dalphas[x](y) = dalpha(s);
00232                         }
00233         
00234                         // write out result - do it for each of the BoundsSet
00235                         FOR (state_idx, pomdp->XStates->size()) 
00236                         {
00237                                 bound->set[state_idx]->points.clear();
00238                                 copy(bound->set[state_idx]->cornerPoints,dalphas[state_idx]);
00239         
00240                                 /*cout << "state_idx  " << state_idx << endl;
00241                                 dalphas[state_idx].write(std::cout);
00242                                 cout << endl; */
00243         
00244                         }
00245                 } else {
00246                         actionAlphaByState.resize(pomdp->actions->size());
00247 
00248                         FOR (a, pomdp->actions->size()) 
00249                         {
00250                                 FOR (i, pomdp->XStates->size()*pomdp->YStates->size()) {
00251                 
00252                                         // convert i to x and y values
00253                                         unsigned int x = (unsigned int) i/(pomdp->YStates->size());
00254                                         unsigned int y = i % (pomdp->YStates->size());
00255                 
00256                                         if (pomdp->isPOMDPTerminalState[x][y]) {
00257                 
00258                                                 al[a](i) = 0.0;
00259                                         }
00260 
00261                                         if (x==0) actionAlphaByState[a].resize(pomdp->XStates->size());
00262                                         if (y==0) actionAlphaByState[a][x].resize(pomdp->YStates->size());
00263                                         actionAlphaByState[a][x](y) = al[a](i);
00264                                 }
00265                         }
00266 
00267                 }
00268         }
00269 
00270         void FastInfUBInitializer::initMDP(double targetPrecision, FullObsUBInitializer& m)
00271 // SYL260809 prevly:    void FastInfUBInitializer::initMDP(double targetPrecision)
00272         {
00273                 // set alpha to be the mdp upper bound
00274                 // FullObsUBInitializer m;              // SYL260809 commented out
00275                 m.valueIteration(pomdp, targetPrecision); // this puts the MDP vector at m.alphaByState 
00276 
00277                 //alphasByState.clear();  
00278 
00279                 //alphasByState = m.alphaByState; // SYL260809 commented out
00280 
00281                 /* #if DEBUGSYL_160908
00282                 //### check alphasByState contents compared to m.alphaByState contents
00283 
00284                 cout << "m.alphaByState" << endl;
00285                 for (unsigned int stateidx=0; stateidx < m.alphaByState.size(); stateidx++)
00286                 {
00287                 m.alphaByState[stateidx].write(std::cout);
00288                 cout << endl;
00289                 }
00290                 cout << "alphasByState" << endl;
00291                 for (unsigned int stateidx=0; stateidx < alphasByState.size(); stateidx++)
00292                 {
00293                 alphasByState[stateidx].write(std::cout);
00294                 cout << endl;
00295                 }
00296                 #endif */
00297 
00298 
00299 
00300                 //std::vector<DenseVector> calphas;
00301                 //copy(calpha, m.alpha);
00302 
00303 
00304                 //alphas.clear();
00305                 //alphas.push_back(calpha);
00306 
00307                 /*
00308                 if (zmdpDebugLevelG >= 1) {
00309                 cout << "initUpperBoundMDP: alpha=" << sparseRep(alphas[0]).c_str() << endl;
00310                 cout << "initUpperBoundMDP: val(b)=" << inner_prod(alphas[0], pomdp->initialBelief) << endl;
00311                 }
00312                 */
00313         }
00314 
00315         void FastInfUBInitializer::initFIB(double targetPrecision, bool getFIBvectors)
00316         {
00317                 // calculates the fast informed bound (Hauskrecht, JAIR 2000)
00318                 //std::vector< DenseVector > al(pomdp->actions->size());
00319                 //std::vector< DenseVector > nextAl(pomdp->actions->size());
00320 
00321                 std::vector< std::vector<DenseVector> > al(pomdp->actions->size());
00322                 std::vector< std::vector<DenseVector> > nextAl(pomdp->actions->size());
00323 
00324                 DenseVector tmp, tmp2, diff, beta_ao; //beta_aoi, beta_aoiSum
00325                 DenseVector beta_aoXn_unweighted, beta_aoXn;
00326                 double maxResidual;
00327                 //alpha_vector backup;
00328 
00329                 FullObsUBInitializer M;  // SYL260809 added 
00330 
00331                 initMDP(MDP_RESIDUAL, M); // SYL260809 modified
00332 
00333                 //TODO:: bound->elapsed = bound->heurTimer.elapsed();
00334                 //TODO:: printf("%.2fs initMDP(MDP_RESIDUAL) done\n", bound->elapsed);
00335 
00336 
00337                 // SYL260809 commented out - the MDP upper bound is already in M.alphaByState
00338                 /* 
00339                 // initialize al array with weak MDP upper bound
00340                 FullObsUBInitializer m;
00341                 m.pomdp = pomdp;
00342                 m.alphaByState.resize(pomdp->XStates->size());
00343 
00344                 //alpha_vector& alpha = alphas[0];
00345                 //copy(m.alpha, alpha);
00346                 FOR (state_idx, pomdp->XStates->size()) {
00347                         copy(m.alphaByState[state_idx], alphasByState[state_idx]);
00348                 }
00349                 */
00350 
00351                 /* FOR (a, pomdp->actions->size()) {
00352                 al[a].resize(pomdp->YStates->size());
00353                 nextAl[a].resize(pomdp->YStates->size());
00354                 m.nextAlphaAction(al[a], a);
00355                 } */
00356 
00357                 FOR (a, pomdp->actions->size()) {
00358                         al[a].resize(pomdp->XStates->size());
00359                         nextAl[a].resize(pomdp->XStates->size());
00360 
00361                         M.nextAlphaAction(al[a], a); // SYL260809 prevly: m.nextAlphaAction(al[a], a);
00362                 }
00363 
00364                 bool    valid_o_and_Xn; // SYL260809 to allow skipping operation for an observation o, if there are
00365                                         // no valid Xn values (for given action a) associated with it
00366                 do {
00367                         // SYL260809 keep track of maximum residual
00368                         maxResidual = 0;
00369 
00370                         FOR (a, pomdp->actions->size()) 
00371                         {
00372                                 std::vector< DenseVector >& beta_a = nextAl[a];
00373 
00374                                 FOR (Xc, pomdp->XStates->size()) 
00375                                 {
00376                                         beta_a[Xc].resize(pomdp->YStates->size());
00377 
00378                                         FOR (o, pomdp->observations->size()) 
00379                                         {
00380                                                 beta_ao.resize(pomdp->YStates->size());
00381                                                 valid_o_and_Xn = false; // SYL260809
00382 
00383                                                 // only iterate over possible X states
00384                                                 const vector<int>& possibleXns = pomdp->XTrans->getMatrix(a, Xc)->nonEmptyColumns();
00385                                                 FOREACH (int, XnIt, possibleXns)
00386                                                 {
00387                                                     int Xn = *XnIt;
00388                                                     if (!(pomdp->obsProb->getMatrix(a, Xn)->isColumnEmpty(o)))
00389                                                     {
00390                                                         valid_o_and_Xn = true;   // SYL260809
00391 
00392                                                         FOR (i, pomdp->actions->size()) 
00393                                                         {
00394                                                             emult_column( tmp, *pomdp->obsProb->getMatrix(a, Xn), o, al[i][Xn] );
00395                                                             mult( tmp2, *pomdp->YTrans->getMatrix(a, Xc, Xn), tmp); // SYL270809
00396                                                             // SYL270809 mult(vector, matrix, vector) is faster than mult(vector, vector, matrix)
00397                                                             //mult( tmp2, tmp, *pomdp->XYTrans->getMatrixTr(a, Xc) );
00398 
00399                                                             if (0 == i) {
00400                                                                 beta_aoXn_unweighted = tmp2;
00401                                                             } else {
00402                                                                 max_assign( beta_aoXn_unweighted, tmp2 );
00403                                                             }
00404 
00405                                                         }
00406 
00407                                                         emult_column(beta_aoXn, *pomdp->XTrans->getMatrix(a, Xc), Xn, beta_aoXn_unweighted);
00408                                                         beta_ao += beta_aoXn;
00409                                                     }
00410                                                 }
00411                                                                         // SYL260809
00412                                                 if (valid_o_and_Xn)     // false means there's no valid Xn (given the 
00413                                                                         //action and Xc) for this observation o
00414                                                         beta_a[Xc] += beta_ao; //beta_ao;
00415 
00416                                         }
00417 
00418                                         beta_a[Xc] *= pomdp->discount;
00419                                         
00420                                         copy_from_column( tmp, *pomdp->rewards->getMatrix(Xc), a );
00421                                         beta_a[Xc] += tmp;
00422 
00423                                         // SYL260809 keep track of maximum residual
00424                                         diff = beta_a[Xc];
00425                                         diff -= al[a][Xc];
00426                                         maxResidual = std::max( maxResidual, diff.norm_inf() );
00427                                 }
00428                         }
00429 
00430                         // SYL260809  assign the FIB vectors for next iteration
00431                         al = nextAl;
00432 
00433                         /*
00434                         if (zmdpDebugLevelG >= 1) {
00435                         cout << ".";
00436                         cout.flush();
00437                         }
00438                         */
00439 
00440                 } while ( maxResidual > targetPrecision );
00441 
00442                 //cout << "targetPrecision : " << targetPrecision << endl;
00443 
00444 
00445                 /*
00446                 if (zmdpDebugLevelG >= 1) {
00447                 cout << endl;
00448                 }
00449                 */
00450 #if DEBUGSYL_290908
00451                 cout << "targetPrecision : " << targetPrecision << endl;
00452                 cout << "maxResidual : " << maxResidual << " iterationCount : " << iterationCount << endl;
00453 #endif
00454 
00455 #if DEBUGSYL_160908
00456                 cout << "targetPrecision : " << targetPrecision << endl;
00457                 cout << "maxResidual : " << maxResidual << " iterationCount : " << iterationCount << endl;
00458                 cout << "After iteration, al " << endl;
00459                 FOR (a, pomdp->actions->size()) {
00460                         cout << "a : " << a << endl;
00461 
00462                         for (unsigned int stateidx=0; stateidx < alphasByState.size(); stateidx++)
00463                         {
00464                                 cout << "stateidx : " << stateidx << endl;
00465                                 al[a][stateidx].write(std::cout);
00466                                 cout << endl;
00467                         }
00468                 }
00469 #endif 
00470 
00471                 if (!getFIBvectors) {
00472                         // SYL260809 we only need one dalpha of size YStates.size at a time
00473                         DenseVector dalpha;
00474                         //std::vector<DenseVector> dalpha(pomdp->XStates->size());
00475                         FOR (state_idx, pomdp->XStates->size()) {
00476         
00477                                 FOR (a, pomdp->actions->size()) {
00478                                         if (0 == a) {
00479                                                 dalpha = al[a][state_idx];
00480                                         } else {
00481                                                 max_assign(dalpha, al[a][state_idx]);
00482                                         }
00483                                 }
00484         
00485                                 // at this point, the vector at dalpha[state_idx] contains the highest value, across actions
00486         
00487                                 // post-process: make sure the value for all terminal states
00488                                 // is exactly 0, since that is how the ubVal field of terminal
00489                                 // nodes is initialized.
00490                                 FOR (i, pomdp->YStates->size()) {
00491                                         if (pomdp->isPOMDPTerminalState[state_idx][i]) {
00492                                                 dalpha(i) = 0.0;
00493                                         }
00494                                 }
00495         
00496                                 // at this point, the vector at dalpha[state_idx] has taken into account terminal state
00497                                 bound->set[state_idx]->points.clear();
00498                                 copy(bound->set[state_idx]->cornerPoints,dalpha);
00499         
00500                         } 
00501 
00502                 } else {  // output one vector for each action
00503 
00504                         FOR (state_idx, pomdp->XStates->size()) {
00505         
00506                                 FOR (a, pomdp->actions->size()) {       
00507                                 // post-process: make sure the value for all terminal states
00508                                 // is exactly 0
00509                                         FOR (i, pomdp->YStates->size()) {
00510                                                 if (pomdp->isPOMDPTerminalState[state_idx][i]) {
00511                                                         al[a][state_idx](i) = 0.0;
00512                                                 }
00513                                         }
00514                                 }
00515         
00516                         } 
00517 
00518                         actionAlphaByState = al;
00519 
00520                 }
00521         }
00522 
00523 }; // namespace zmdp
00524 
00525