Go to the documentation of this file.00001 #ifndef SARSOP_H
00002 #define SARSOP_H
00003 #include <list>
00004 #include "PointBasedAlgorithm.h"
00005 #include "Bound.h"
00006 #include "BoundSet.h"
00007 #include "AlphaPlane.h"
00008 #include "BeliefCache.h"
00009 #include "BeliefForest.h"
00010 #include "BinManager.h"
00011 #include "BinManagerSet.h"
00012 #include "SampleBP.h"
00013 #include "AlphaPlanePool.h"
00014 #include "AlphaPlanePoolSet.h"
00015 #include "BeliefValuePair.h"
00016 #include "BeliefValuePairPool.h"
00017 #include "BeliefValuePairPoolSet.h"
00018 #include "AlphaPlaneMaxMeta.h"
00019 #include "Tuple.h"
00020 #include "FacmodelStructs.h"
00021 #include "GlobalResource.h"
00022
00023
00024 using namespace std;
00025 namespace momdp
00026 {
00027 class BinManager;
00028 class SARSOPPrune;
00029
00030 #define CHECK_INTERVAL 50
00031
00032
00033 class SARSOPAlphaPlaneTuple :public Tuple
00034 {
00035 public:
00036 int certed;
00037 vector<BeliefTreeNode*> certifiedBeliefs;
00038 vector<int> certifiedBeliefTimeStamps;
00039 vector<AlphaPlaneMaxMeta*> maxMeta;
00040 bool sanityMax;
00041
00042 };
00043
00044 class SARSOP : public PointBasedAlgorithm
00045 {
00046 private:
00047
00048
00049 public:
00050
00051 SARSOP(SharedPointer<MOMDP> problem, SolverParams * solverParams);
00052 virtual ~SARSOP(void);
00053
00054 BeliefValuePairPoolSet *upperBoundSet;
00055 AlphaPlanePoolSet *lowerBoundSet;
00056
00057 BinManagerSet* binManagerSet;
00058
00059 SARSOPPrune* pruneEngine;
00060
00061 Backup<BeliefValuePair> *upperBoundBackup;
00062 Backup<AlphaPlane> *lowerBoundBackup;
00063
00064 vector<IndexedTuple<AlphaPlanePoolDataTuple> *> lbDataTableSet;
00065 vector<IndexedTuple<BeliefValuePairPoolDataTuple> *> ubDataTableSet;
00066
00067
00068
00069 virtual void solve(SharedPointer<MOMDP> problem);
00070 virtual void writePolicy(string fileName, string problemName);
00071 void writeToFile(const std::string& outFileName, string problemName);
00072
00073 BeliefTreeNode* sample();
00074 void backup(BeliefTreeNode* node);
00075
00076
00077 static void onLowerBoundBackup (PointBasedAlgorithm *solver, BeliefTreeNode * node, SharedPointer<AlphaPlane> backupResult)
00078 {
00079
00080 }
00081
00082 static void onUpperBoundBackup (PointBasedAlgorithm *solver, BeliefTreeNode * node, SharedPointer<BeliefValuePair> backupResult)
00083 {
00084 }
00085
00086 static void onGetNode(PointBasedAlgorithm *solver, BeliefTreeNode* node, SharedPointer<BeliefWithState>& belief)
00087 {
00088 SARSOP *sarsopSolver = (SARSOP *)solver;
00089 int stateidx = belief->sval;
00090 int row = node->cacheIndex.row;
00091 int timeStamp = sarsopSolver->numBackups;
00092
00093
00094 sarsopSolver->binManagerSet->binManagerSet[stateidx]->binManagerDataTable.set(row).binned = false;
00095
00096
00097 sarsopSolver->upperBoundSet->set[stateidx]->dataTable->set(row).UB_ACTION = 0;
00098
00099
00100 list<SharedPointer<AlphaPlane> >* alphas = new list<SharedPointer<AlphaPlane> >();
00101 sarsopSolver->lowerBoundSet->set[stateidx]->dataTable->set(row).ALPHA_PLANES= alphas;
00102
00103
00104 SharedPointer<AlphaPlane>alpha = sarsopSolver->lowerBoundSet->getValueAlpha(belief);
00105
00106 REAL_VALUE lbVal = inner_prod(*alpha->alpha, *belief->bvec);
00107
00108
00109
00110
00111 SARSOPAlphaPlaneTuple *dataAttachedToAlpha = (SARSOPAlphaPlaneTuple *)(alpha->solverData);
00112
00113
00114
00115
00116 REAL_VALUE ubVal =sarsopSolver->upperBoundSet->getValue(belief);
00117
00118
00119 solver->beliefCacheSet[stateidx]->getRow( row)->REACHABLE = node;
00120 solver->beliefCacheSet[stateidx]->getRow( row)->UB = ubVal;
00121 solver->beliefCacheSet[stateidx]->getRow( row)->LB = lbVal;
00122
00123 sarsopSolver->lowerBoundSet->set[stateidx]->dataTable->set(row).ALPHA_TIME_STAMP = timeStamp;
00124
00125
00126 if(timeStamp!=-1)
00127 {
00128
00129 DEBUG_TRACE("getNode timeStamp!=-1");
00130 if(!hasMaxMetaAt(alpha, node->cacheIndex.row))
00131 {
00132 DEBUG_TRACE("!hasMaxMetaAt");
00133 AlphaPlaneMaxMeta* newMax = new AlphaPlaneMaxMeta();
00134 newMax->cacheIndex = node->cacheIndex.row;
00135 newMax->lastLB = lbVal;
00136 newMax->timestamp = GlobalResource::getInstance()->getTimeStamp();
00137 dataAttachedToAlpha->maxMeta.push_back(newMax);
00138 }
00139 }
00140
00141
00142 }
00143
00144
00145
00146 void initialize(SharedPointer<MOMDP> problem);
00147 void initSampleEngine(SharedPointer<MOMDP> problem);
00148 void initializeUpperBound(SharedPointer<MOMDP> problem);
00149 void initializeLowerBound(SharedPointer<MOMDP> problem);
00150 void initializeBounds(double _targetPrecision);
00151
00152
00153 static bool hasMaxMetaAt(SharedPointer<AlphaPlane>alpha, int index)
00154 {
00155 SARSOPAlphaPlaneTuple *attachedData = (SARSOPAlphaPlaneTuple *)alpha->solverData;
00156 FOREACH(AlphaPlaneMaxMeta* , entry, attachedData->maxMeta)
00157 {
00158 if((*entry)->cacheIndex == index)
00159 {
00160 return true;
00161 }
00162 }
00163 return false;
00164 }
00165
00166
00167 CPTimer runtimeTimer;
00168 CPTimer lapTimer;
00169
00170 double elapsed;
00171
00172
00173 int printIndex;
00174
00175 void alwaysPrint();
00176 void printHeader();
00177 void printDivider();
00178 void print();
00179 bool stopNow();
00180
00181 BeliefTreeNode& getMaxExcessUncRoot(BeliefForest& globalroot);
00182
00183 void writeIntermediatePolicyTraceToFile(int trial, double time, const string& outFileName, string problemName);
00184 void progressiveIncreasePolicyInteval(int& numPolicies);
00185 void logFilePrint(int index);
00186
00187
00188 cacherow_stval backup(list<cacherow_stval> beliefNStates);
00189 cacherow_stval backupLBonly(list<cacherow_stval> beliefNStates);
00190
00191
00192 cacherow_stval backup(cacherow_stval beliefNState);
00193 cacherow_stval backupLBonly(cacherow_stval beliefNState);
00194
00195 };
00196
00197 }
00198
00199 #endif