appl: FullObsUBInitializer.h Source File

Go to the documentation of this file.
00001 
00008 #ifndef INCFullObsUBInitializer_h
00009 #define INCFullObsUBInitializer_h
00010 
00011 #include "MOMDP.h"
00012 
00013 #define MDP_MAX_ITERS (1000000)
00014 
00015 namespace momdp {
00016 
00017         class FullObsUBInitializer {
00018         public:
00019                 SharedPointer<MOMDP> pomdp;     
00020 
00021   // data and methods for factored version
00022   std::vector<DenseVector> alphaByState;
00023                 void nextAlphaAction(std::vector<DenseVector>& resultByState, int a);
00024                 double valueIterationOneStep(void);
00025                 void valueIteration(SharedPointer<MOMDP> _pomdp, double eps);
00026 
00027   // data and methods for unfactored version
00028                 DenseVector alpha;
00029                 void nextAlphaAction_unfac(DenseVector& result, int a);
00030                 double valueIterationOneStep_unfac(void);
00031                 void valueIteration_unfac(SharedPointer<MOMDP> _pomdp, double eps);
00032 
00033 
00034                 // FOR QMDP
00035 // data and methods for factored version
00036                 vector<vector<alpha_vector> > actionAlphaByState;
00037                 void QMDPSolution(SharedPointer<MOMDP> _pomdp, double eps);
00038                 // these are not really needed - it is for computing a fixed action policy, starting with vector of all zero values, instead of the usual best-worst reward value.
00039                 void QNextAlphaAction(std::vector<DenseVector>& resultByState, int a);
00040                 double QValueIterationOneStep(void);
00041                 void QValueIteration(SharedPointer<MOMDP> _pomdp, double eps);
00042 
00043 // data and methods for factored version
00044                 vector<alpha_vector> actionAlphas;
00045                 void QMDPSolution_unfac(SharedPointer<MOMDP> _pomdp, double eps);
00046 
00047                 // these are not really needed - it is for computing a fixed action policy, starting with vector of all zero values, instead of the usual best-worst reward value.
00048                 void QNextAlphaAction_unfac(DenseVector& result, int a) ;
00049                 double QValueIterationOneStep_unfac(void);
00050                 void QValueIteration_unfac(SharedPointer<MOMDP> _pomdp, double eps);
00051 
00052                 // Utils function
00053                 void FacPostProcessing(vector<alpha_vector>& alphasByState);
00054                 void UnfacPostProcessing(DenseVector& calpha, vector<alpha_vector>& alphasByState);
00055 
00056         };
00057 
00058 }; // namespace zmdp
00059 
00060 #endif // INCFullObsUBInitializer_h