00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029 #include <iostream>
00030 using namespace std;
00031
00032 #include "../../sbpl/headers.h"
00033
00034 static unsigned int g_backups;
00035 static clock_t g_runtime = 0;
00036 static double g_belldelta = INFINITECOST;
00037
00038
00039 VIPlanner::~VIPlanner()
00040 {
00041
00042
00043
00044
00045
00046 }
00047
00048
00049 void VIPlanner::Initialize_vidata(CMDPSTATE* state)
00050 {
00051 VIState* vi_data = (VIState*)state->PlannerSpecificData;
00052
00053 vi_data->bestnextaction = NULL;
00054 vi_data->iteration = 0;
00055 vi_data->v = (float)environment_->GetGoalHeuristic(state->StateID);
00056
00057
00058 }
00059
00060 CMDPSTATE* VIPlanner::CreateState(int stateID)
00061 {
00062 CMDPSTATE* state = NULL;
00063
00064 #if DEBUG
00065 if(environment_->StateID2IndexMapping[stateID][VIMDP_STATEID2IND] != -1)
00066 {
00067 SBPL_ERROR("ERROR in CreateState: state already created\n");
00068 throw new SBPL_Exception();
00069 }
00070 #endif
00071
00072
00073 state = viPlanner.MDP.AddState(stateID);
00074
00075
00076 environment_->StateID2IndexMapping[stateID][VIMDP_STATEID2IND] = viPlanner.MDP.StateArray.size()-1;
00077
00078 #if DEBUG
00079 if(state != viPlanner.MDP.StateArray[environment_->StateID2IndexMapping[stateID][VIMDP_STATEID2IND]])
00080 {
00081 SBPL_ERROR("ERROR in CreateState: invalid state index\n");
00082 throw new SBPL_Exception();
00083 }
00084 #endif
00085
00086
00087 state->PlannerSpecificData = new VIState;
00088 Initialize_vidata(state);
00089
00090 return state;
00091
00092 }
00093
00094 CMDPSTATE* VIPlanner::GetState(int stateID)
00095 {
00096
00097 if(stateID >= (int)environment_->StateID2IndexMapping.size())
00098 {
00099 SBPL_ERROR("ERROR int GetState: stateID is invalid\n");
00100 throw new SBPL_Exception();
00101 }
00102
00103 if(environment_->StateID2IndexMapping[stateID][VIMDP_STATEID2IND] == -1)
00104 return CreateState(stateID);
00105 else
00106 return viPlanner.MDP.StateArray[environment_->StateID2IndexMapping[stateID][VIMDP_STATEID2IND]];
00107
00108 }
00109
00110
00111
00112 void VIPlanner::PrintVIData()
00113 {
00114 SBPL_PRINTF("iteration %d: v(start) = %f\n", viPlanner.iteration,
00115 ((VIState*)(viPlanner.StartState->PlannerSpecificData))->v);
00116
00117 }
00118
00119 void VIPlanner::PrintStatHeader(FILE* fOut)
00120 {
00121 SBPL_FPRINTF(fOut,
00122 "iteration backups v(start)\n");
00123 }
00124
00125
00126
00127 void VIPlanner::PrintStat(FILE* fOut, clock_t starttime)
00128 {
00129 SBPL_FPRINTF(fOut, "%d %d %f %f %d\n",
00130 viPlanner.iteration, g_backups, ((double)(clock()-starttime))/CLOCKS_PER_SEC,
00131 ((VIState*)(viPlanner.StartState->PlannerSpecificData))->v,
00132 (unsigned int)viPlanner.MDP.StateArray.size());
00133
00134 }
00135
00136
00137 void VIPlanner::PrintPolicy(FILE* fPolicy)
00138 {
00139 bool bPrintStatOnly = true;
00140 vector<CMDPSTATE*> WorkList;
00141 CMDP PolicyforEvaluation;
00142
00143 viPlanner.iteration++;
00144 WorkList.push_back(viPlanner.StartState);
00145 ((VIState*)viPlanner.StartState->PlannerSpecificData)->iteration = viPlanner.iteration;
00146 double PolVal = 0.0;
00147 double Conf = 0;
00148 bool bCycles = false;
00149 SBPL_PRINTF("Printing policy...\n");
00150 while((int)WorkList.size() > 0)
00151 {
00152
00153 CMDPSTATE* state = WorkList.at(WorkList.size()-1);
00154 WorkList.pop_back();
00155 VIState* statedata = (VIState*)state->PlannerSpecificData;
00156
00157 CMDPSTATE* polstate = PolicyforEvaluation.AddState(state->StateID);
00158
00159
00160 if(!bPrintStatOnly)
00161 {
00162 SBPL_FPRINTF(fPolicy, "%d\n", state->StateID);
00163 environment_->PrintState(state->StateID, false, fPolicy);
00164
00165 int h = environment_->GetGoalHeuristic(state->StateID);
00166 SBPL_FPRINTF(fPolicy, "h=%d\n", h);
00167 if(h > statedata->v)
00168 {
00169 SBPL_FPRINTF(fPolicy, "WARNING h overestimates exp.cost\n");
00170 }
00171 }
00172
00173 if(state->StateID == viPlanner.GoalState->StateID)
00174 {
00175
00176 if(!bPrintStatOnly)
00177 SBPL_FPRINTF(fPolicy, "0\n");
00178 Conf += ((VIState*)state->PlannerSpecificData)->Pc;
00179 }
00180 else if(statedata->bestnextaction == NULL)
00181 {
00182
00183 if(!bPrintStatOnly)
00184 {
00185
00186 SBPL_FPRINTF(fPolicy, "%d %d %d\n", 1, 0, state->StateID);
00187 }
00188 }
00189 else
00190 {
00191
00192
00193 CMDPACTION* action = statedata->bestnextaction;
00194
00195
00196 CMDPACTION* polaction = polstate->AddAction(action->ActionID);
00197
00198 if(!bPrintStatOnly)
00199 SBPL_FPRINTF(fPolicy, "%d ", (unsigned int)action->SuccsID.size());
00200
00201
00202 for(int i = 0; i < (int)action->SuccsID.size(); i++)
00203 {
00204 if(!bPrintStatOnly)
00205 SBPL_FPRINTF(fPolicy, "%d %d ", action->Costs[i], action->SuccsID[i]);
00206 polaction->AddOutcome(action->SuccsID[i], action->Costs[i], action->SuccsProb[i]);
00207
00208 CMDPSTATE* succstate = GetState(action->SuccsID[i]);
00209 if((int)((VIState*)succstate->PlannerSpecificData)->iteration != viPlanner.iteration)
00210 {
00211 ((VIState*)succstate->PlannerSpecificData)->iteration = viPlanner.iteration;
00212 WorkList.push_back(succstate);
00213
00214 ((VIState*)succstate->PlannerSpecificData)->Pc =
00215 action->SuccsProb[i]*((VIState*)state->PlannerSpecificData)->Pc;
00216 PolVal += ((VIState*)succstate->PlannerSpecificData)->Pc*action->Costs[i];
00217 }
00218 }
00219 if(!bPrintStatOnly)
00220 SBPL_FPRINTF(fPolicy, "\n");
00221 }
00222 }
00223 SBPL_PRINTF("done\n");
00224
00225
00226 double PolicyValue = -1;
00227 bool bFullPolicy = false;
00228 double Pcgoal = -1;
00229 int nMerges = 0;
00230 EvaluatePolicy(&PolicyforEvaluation, viPlanner.StartState->StateID,
00231 viPlanner.GoalState->StateID, &PolicyValue, &bFullPolicy, &Pcgoal, &nMerges,
00232 &bCycles);
00233
00234 SBPL_PRINTF("Policy value = %f FullPolicy=%d Merges=%d Cycles=%d\n",
00235 PolicyValue, bFullPolicy, nMerges, bCycles);
00236
00237 if(!bFullPolicy)
00238 SBPL_PRINTF("WARN: POLICY IS ONLY PARTIAL\n");
00239 if(fabs(PolicyValue-((VIState*)(viPlanner.StartState->PlannerSpecificData))->v) > MDP_ERRDELTA)
00240 SBPL_PRINTF("WARN: POLICY VALUE IS NOT CORRECT\n");
00241
00242
00243 if(!bPrintStatOnly)
00244 SBPL_FPRINTF(fPolicy, "backups=%d runtime=%f vstart=%f policyvalue=%f fullpolicy=%d Pc(goal)=%f nMerges=%d bCyc=%d\n",
00245 g_backups, (double)g_runtime/CLOCKS_PER_SEC,
00246 ((VIState*)(viPlanner.StartState->PlannerSpecificData))->v,
00247 PolicyValue, bFullPolicy, Pcgoal, nMerges, bCycles);
00248 else
00249 SBPL_FPRINTF(fPolicy, "%d %f %f %f %d %f %d %d\n",
00250 g_backups, (double)g_runtime/CLOCKS_PER_SEC,
00251 ((VIState*)(viPlanner.StartState->PlannerSpecificData))->v,
00252 PolicyValue, bFullPolicy, Pcgoal, nMerges, bCycles);
00253
00254 }
00255
00256
00257
00258 void VIPlanner::backup(CMDPSTATE* state)
00259 {
00260 int aind, oind;
00261 CMDPSTATE* succstate;
00262
00263 g_backups++;
00264
00265 if(state == viPlanner.GoalState)
00266 {
00267 ((VIState*)(state->PlannerSpecificData))->bestnextaction = NULL;
00268 ((VIState*)(state->PlannerSpecificData))->v = 0;
00269 return;
00270 }
00271
00272
00273 double minactionQ = INFINITECOST;
00274 CMDPACTION* minaction = NULL;
00275 for(aind = 0; aind < (int)state->Actions.size(); aind++)
00276 {
00277 double actionQ = 0;
00278 CMDPACTION* action = state->Actions[aind];
00279 for(oind = 0; oind < (int)action->SuccsID.size(); oind++)
00280 {
00281 succstate = GetState(action->SuccsID[oind]);
00282 actionQ += action->SuccsProb[oind]*(action->Costs[oind] +
00283 ((VIState*)(succstate->PlannerSpecificData))->v);
00284 }
00285
00286 if(minaction == NULL || actionQ < minactionQ)
00287 {
00288 minactionQ = actionQ;
00289 minaction = action;
00290 }
00291
00292 }
00293
00294 if(((VIState*)state->PlannerSpecificData)->bestnextaction == NULL)
00295 g_belldelta = INFINITECOST;
00296 else if(g_belldelta < fabs(((VIState*)state->PlannerSpecificData)->v - minactionQ))
00297 g_belldelta = fabs(((VIState*)state->PlannerSpecificData)->v - minactionQ);
00298
00299
00300 ((VIState*)state->PlannerSpecificData)->bestnextaction = minaction;
00301 ((VIState*)state->PlannerSpecificData)->v = (float)minactionQ;
00302
00303 }
00304
00305
00306 void VIPlanner::perform_iteration_backward()
00307 {
00308 CMDPSTATE* state;
00309 vector<int> Worklist;
00310 int aind, oind;
00311
00312
00313 Worklist.push_back(viPlanner.GoalState->StateID);
00314
00315
00316 while((int)Worklist.size() > 0)
00317 {
00318
00319 state = GetState(Worklist[Worklist.size()-1]);
00320 Worklist.pop_back();
00321
00322
00323 if((int)state->Actions.size() == 0)
00324 environment_->SetAllActionsandAllOutcomes(state);
00325
00326
00327 backup(state);
00328
00329
00330 for(aind = 0; aind < (int)state->Actions.size(); aind++)
00331 {
00332 CMDPACTION* action = state->Actions[aind];
00333 for(oind = 0; oind < (int)action->SuccsID.size(); oind++)
00334 {
00335 CMDPSTATE* succstate = GetState(action->SuccsID[oind]);
00336
00337
00338 if((int)((VIState*)succstate->PlannerSpecificData)->iteration != viPlanner.iteration)
00339 {
00340 Worklist.push_back(succstate->StateID);
00341
00342
00343 ((VIState*)succstate->PlannerSpecificData)->iteration = viPlanner.iteration;
00344 }
00345 }
00346 }
00347
00348
00349 if(state == viPlanner.StartState)
00350 continue;
00351
00352
00353 if((int)state->PredsID.size() == 0)
00354 environment_->SetAllPreds(state);
00355
00356 for(int pind = 0; pind < (int)state->PredsID.size(); pind++)
00357 {
00358 CMDPSTATE* PredState = GetState(state->PredsID[pind]);
00359
00360
00361 if((int)((VIState*)PredState->PlannerSpecificData)->iteration != viPlanner.iteration)
00362 {
00363 Worklist.push_back(PredState->StateID);
00364
00365
00366 ((VIState*)PredState->PlannerSpecificData)->iteration = viPlanner.iteration;
00367 }
00368 }
00369 }
00370 }
00371
00372
00373 void VIPlanner::perform_iteration_forward()
00374 {
00375 CMDPSTATE* state = NULL;
00376 vector<CMDPSTATE*> Worklist;
00377 int aind, oind;
00378
00379
00380 Worklist.push_back(viPlanner.StartState);
00381
00382
00383 while((int)Worklist.size() > 0)
00384 {
00385
00386 state = Worklist[Worklist.size()-1];
00387
00388 Worklist.pop_back();
00389
00390
00391 if((int)state->Actions.size() == 0)
00392 environment_->SetAllActionsandAllOutcomes(state);
00393
00394
00395 backup(state);
00396
00397
00398 for(aind = 0; aind < (int)state->Actions.size(); aind++)
00399 {
00400
00401 CMDPACTION* action = ((VIState*)state->PlannerSpecificData)->bestnextaction;
00402 for(oind = 0; action != NULL && oind < (int)action->SuccsID.size(); oind++)
00403 {
00404 CMDPSTATE* succstate = GetState(action->SuccsID[oind]);
00405
00406
00407 if((int)((VIState*)succstate->PlannerSpecificData)->iteration != viPlanner.iteration)
00408 {
00409 Worklist.push_back(succstate);
00410
00411
00412 ((VIState*)succstate->PlannerSpecificData)->iteration = viPlanner.iteration;
00413 }
00414 }
00415 }
00416 }
00417 }
00418
00419
00420
00421 void VIPlanner::InitializePlanner()
00422 {
00423
00424 viPlanner.iteration = 0;
00425
00426
00427 viPlanner.StartState = GetState(MDPCfg_->startstateid);
00428 viPlanner.GoalState = GetState(MDPCfg_->goalstateid);
00429
00430 }
00431
00432
00433
00434
00435 int VIPlanner::replan(double allocatedtime, vector<int>* solution_stateIDs_V)
00436 {
00437
00438 #ifndef ROS
00439 const char* policy = "policy.txt";
00440 const char* stat = "stat.txt";
00441 #endif
00442 FILE* fPolicy = SBPL_FOPEN(policy,"w");
00443 FILE* fStat = SBPL_FOPEN(stat,"w");
00444
00445
00446 InitializePlanner();
00447
00448
00449 clock_t starttime = clock();
00450
00451
00452 while(((clock()-starttime)/(double)CLOCKS_PER_SEC) < allocatedtime &&
00453 g_belldelta > MDP_ERRDELTA)
00454 {
00455 viPlanner.iteration++;
00456
00457 g_belldelta = 0;
00458 perform_iteration_forward();
00459
00460 if(viPlanner.iteration%100 == 0)
00461 {
00462 PrintStat(stdout, starttime);
00463 PrintStat(fStat, starttime);
00464 }
00465 }
00466
00467
00468 g_runtime = clock()-starttime;
00469
00470 PrintStat(stdout, starttime);
00471 PrintStat(fStat, starttime);
00472 SBPL_FFLUSH(fStat);
00473
00474 PrintPolicy(fPolicy);
00475
00476 SBPL_FCLOSE(fPolicy);
00477 SBPL_FCLOSE(fStat);
00478
00479 return 1;
00480
00481 }
00482
00483