$search
00001 /* 00002 * Copyright (c) 2008, Maxim Likhachev 00003 * All rights reserved. 00004 * 00005 * Redistribution and use in source and binary forms, with or without 00006 * modification, are permitted provided that the following conditions are met: 00007 * 00008 * * Redistributions of source code must retain the above copyright 00009 * notice, this list of conditions and the following disclaimer. 00010 * * Redistributions in binary form must reproduce the above copyright 00011 * notice, this list of conditions and the following disclaimer in the 00012 * documentation and/or other materials provided with the distribution. 00013 * * Neither the name of the University of Pennsylvania nor the names of its 00014 * contributors may be used to endorse or promote products derived from 00015 * this software without specific prior written permission. 00016 * 00017 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 00018 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00019 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 00020 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 00021 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 00022 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 00023 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 00024 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 00025 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 00026 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00027 * POSSIBILITY OF SUCH DAMAGE. 00028 */ 00029 #include <iostream> 00030 using namespace std; 00031 00032 #include "../../sbpl/headers.h" 00033 00034 static unsigned int g_backups; 00035 static clock_t g_runtime = 0; 00036 static double g_belldelta = INFINITECOST; 00037 00038 00039 VIPlanner::~VIPlanner() 00040 { 00041 00042 //delete the statespace 00043 00044 00045 00046 } 00047 00048 00049 void VIPlanner::Initialize_vidata(CMDPSTATE* state) 00050 { 00051 VIState* vi_data = (VIState*)state->PlannerSpecificData; 00052 00053 vi_data->bestnextaction = NULL; 00054 vi_data->iteration = 0; 00055 vi_data->v = (float)environment_->GetGoalHeuristic(state->StateID); 00056 00057 00058 } 00059 00060 CMDPSTATE* VIPlanner::CreateState(int stateID) 00061 { 00062 CMDPSTATE* state = NULL; 00063 00064 #if DEBUG 00065 if(environment_->StateID2IndexMapping[stateID][VIMDP_STATEID2IND] != -1) 00066 { 00067 SBPL_ERROR("ERROR in CreateState: state already created\n"); 00068 throw new SBPL_Exception(); 00069 } 00070 #endif 00071 00072 //adds to the tail a state 00073 state = viPlanner.MDP.AddState(stateID); 00074 00075 //remember the index of the state 00076 environment_->StateID2IndexMapping[stateID][VIMDP_STATEID2IND] = viPlanner.MDP.StateArray.size()-1; 00077 00078 #if DEBUG 00079 if(state != viPlanner.MDP.StateArray[environment_->StateID2IndexMapping[stateID][VIMDP_STATEID2IND]]) 00080 { 00081 SBPL_ERROR("ERROR in CreateState: invalid state index\n"); 00082 throw new SBPL_Exception(); 00083 } 00084 #endif 00085 00086 //create and initialize vi_data 00087 state->PlannerSpecificData = new VIState; 00088 Initialize_vidata(state); 00089 00090 return state; 00091 00092 } 00093 00094 CMDPSTATE* VIPlanner::GetState(int stateID) 00095 { 00096 00097 if(stateID >= (int)environment_->StateID2IndexMapping.size()) 00098 { 00099 SBPL_ERROR("ERROR int GetState: stateID is invalid\n"); 00100 throw new SBPL_Exception(); 00101 } 00102 00103 if(environment_->StateID2IndexMapping[stateID][VIMDP_STATEID2IND] == -1) 00104 return CreateState(stateID); 00105 else 00106 return viPlanner.MDP.StateArray[environment_->StateID2IndexMapping[stateID][VIMDP_STATEID2IND]]; 00107 00108 } 00109 00110 00111 00112 void VIPlanner::PrintVIData() 00113 { 00114 SBPL_PRINTF("iteration %d: v(start) = %f\n", viPlanner.iteration, 00115 ((VIState*)(viPlanner.StartState->PlannerSpecificData))->v); 00116 00117 } 00118 00119 void VIPlanner::PrintStatHeader(FILE* fOut) 00120 { 00121 SBPL_FPRINTF(fOut, 00122 "iteration backups v(start)\n"); 00123 } 00124 00125 00126 00127 void VIPlanner::PrintStat(FILE* fOut, clock_t starttime) 00128 { 00129 SBPL_FPRINTF(fOut, "%d %d %f %f %d\n", 00130 viPlanner.iteration, g_backups, ((double)(clock()-starttime))/CLOCKS_PER_SEC, 00131 ((VIState*)(viPlanner.StartState->PlannerSpecificData))->v, 00132 (unsigned int)viPlanner.MDP.StateArray.size()); 00133 00134 } 00135 00136 00137 void VIPlanner::PrintPolicy(FILE* fPolicy) 00138 { 00139 bool bPrintStatOnly = true; 00140 vector<CMDPSTATE*> WorkList; 00141 CMDP PolicyforEvaluation; 00142 00143 viPlanner.iteration++; 00144 WorkList.push_back(viPlanner.StartState); 00145 ((VIState*)viPlanner.StartState->PlannerSpecificData)->iteration = viPlanner.iteration; 00146 double PolVal = 0.0; 00147 double Conf = 0; 00148 bool bCycles = false; 00149 SBPL_PRINTF("Printing policy...\n"); 00150 while((int)WorkList.size() > 0) 00151 { 00152 //pop the last state 00153 CMDPSTATE* state = WorkList.at(WorkList.size()-1); 00154 WorkList.pop_back(); 00155 VIState* statedata = (VIState*)state->PlannerSpecificData; 00156 00157 CMDPSTATE* polstate = PolicyforEvaluation.AddState(state->StateID); 00158 00159 //print state ID 00160 if(!bPrintStatOnly) 00161 { 00162 SBPL_FPRINTF(fPolicy, "%d\n", state->StateID); 00163 environment_->PrintState(state->StateID, false, fPolicy); 00164 00165 int h = environment_->GetGoalHeuristic(state->StateID); 00166 SBPL_FPRINTF(fPolicy, "h=%d\n", h); 00167 if(h > statedata->v) 00168 { 00169 SBPL_FPRINTF(fPolicy, "WARNING h overestimates exp.cost\n"); 00170 } 00171 } 00172 00173 if(state->StateID == viPlanner.GoalState->StateID) 00174 { 00175 //goal state 00176 if(!bPrintStatOnly) 00177 SBPL_FPRINTF(fPolicy, "0\n"); 00178 Conf += ((VIState*)state->PlannerSpecificData)->Pc; 00179 } 00180 else if(statedata->bestnextaction == NULL) 00181 { 00182 //unexplored 00183 if(!bPrintStatOnly) 00184 { 00185 //no outcome explored - stay in the same place 00186 SBPL_FPRINTF(fPolicy, "%d %d %d\n", 1, 0, state->StateID); 00187 } 00188 } 00189 else 00190 { 00191 00192 //get best action 00193 CMDPACTION* action = statedata->bestnextaction; 00194 00195 //add action to evaluation MDP 00196 CMDPACTION* polaction = polstate->AddAction(action->ActionID); 00197 00198 if(!bPrintStatOnly) 00199 SBPL_FPRINTF(fPolicy, "%d ", (unsigned int)action->SuccsID.size()); 00200 00201 //print successors and insert them into the list 00202 for(int i = 0; i < (int)action->SuccsID.size(); i++) 00203 { 00204 if(!bPrintStatOnly) 00205 SBPL_FPRINTF(fPolicy, "%d %d ", action->Costs[i], action->SuccsID[i]); 00206 polaction->AddOutcome(action->SuccsID[i], action->Costs[i], action->SuccsProb[i]); 00207 00208 CMDPSTATE* succstate = GetState(action->SuccsID[i]); 00209 if((int)((VIState*)succstate->PlannerSpecificData)->iteration != viPlanner.iteration) 00210 { 00211 ((VIState*)succstate->PlannerSpecificData)->iteration = viPlanner.iteration; 00212 WorkList.push_back(succstate); 00213 00214 ((VIState*)succstate->PlannerSpecificData)->Pc = 00215 action->SuccsProb[i]*((VIState*)state->PlannerSpecificData)->Pc; 00216 PolVal += ((VIState*)succstate->PlannerSpecificData)->Pc*action->Costs[i]; 00217 } 00218 } 00219 if(!bPrintStatOnly) 00220 SBPL_FPRINTF(fPolicy, "\n"); 00221 } 00222 }//while worklist not empty 00223 SBPL_PRINTF("done\n"); 00224 00225 //now evaluate the policy 00226 double PolicyValue = -1; 00227 bool bFullPolicy = false; 00228 double Pcgoal = -1; 00229 int nMerges = 0; 00230 EvaluatePolicy(&PolicyforEvaluation, viPlanner.StartState->StateID, 00231 viPlanner.GoalState->StateID, &PolicyValue, &bFullPolicy, &Pcgoal, &nMerges, 00232 &bCycles); 00233 00234 SBPL_PRINTF("Policy value = %f FullPolicy=%d Merges=%d Cycles=%d\n", 00235 PolicyValue, bFullPolicy, nMerges, bCycles); 00236 00237 if(!bFullPolicy) 00238 SBPL_PRINTF("WARN: POLICY IS ONLY PARTIAL\n"); 00239 if(fabs(PolicyValue-((VIState*)(viPlanner.StartState->PlannerSpecificData))->v) > MDP_ERRDELTA) 00240 SBPL_PRINTF("WARN: POLICY VALUE IS NOT CORRECT\n"); 00241 00242 00243 if(!bPrintStatOnly) 00244 SBPL_FPRINTF(fPolicy, "backups=%d runtime=%f vstart=%f policyvalue=%f fullpolicy=%d Pc(goal)=%f nMerges=%d bCyc=%d\n", 00245 g_backups, (double)g_runtime/CLOCKS_PER_SEC, 00246 ((VIState*)(viPlanner.StartState->PlannerSpecificData))->v, 00247 PolicyValue, bFullPolicy, Pcgoal, nMerges, bCycles); 00248 else 00249 SBPL_FPRINTF(fPolicy, "%d %f %f %f %d %f %d %d\n", 00250 g_backups, (double)g_runtime/CLOCKS_PER_SEC, 00251 ((VIState*)(viPlanner.StartState->PlannerSpecificData))->v, 00252 PolicyValue, bFullPolicy, Pcgoal, nMerges, bCycles); 00253 00254 } 00255 00256 00257 00258 void VIPlanner::backup(CMDPSTATE* state) 00259 { 00260 int aind, oind; 00261 CMDPSTATE* succstate; 00262 00263 g_backups++; 00264 00265 if(state == viPlanner.GoalState) 00266 { 00267 ((VIState*)(state->PlannerSpecificData))->bestnextaction = NULL; 00268 ((VIState*)(state->PlannerSpecificData))->v = 0; 00269 return; 00270 } 00271 00272 //iterate through actions 00273 double minactionQ = INFINITECOST; 00274 CMDPACTION* minaction = NULL; 00275 for(aind = 0; aind < (int)state->Actions.size(); aind++) 00276 { 00277 double actionQ = 0; 00278 CMDPACTION* action = state->Actions[aind]; 00279 for(oind = 0; oind < (int)action->SuccsID.size(); oind++) 00280 { 00281 succstate = GetState(action->SuccsID[oind]); 00282 actionQ += action->SuccsProb[oind]*(action->Costs[oind] + 00283 ((VIState*)(succstate->PlannerSpecificData))->v); 00284 } 00285 00286 if(minaction == NULL || actionQ < minactionQ) 00287 { 00288 minactionQ = actionQ; 00289 minaction = action; 00290 } 00291 00292 } 00293 00294 if(((VIState*)state->PlannerSpecificData)->bestnextaction == NULL) 00295 g_belldelta = INFINITECOST; 00296 else if(g_belldelta < fabs(((VIState*)state->PlannerSpecificData)->v - minactionQ)) 00297 g_belldelta = fabs(((VIState*)state->PlannerSpecificData)->v - minactionQ); 00298 00299 //set state values 00300 ((VIState*)state->PlannerSpecificData)->bestnextaction = minaction; 00301 ((VIState*)state->PlannerSpecificData)->v = (float)minactionQ; 00302 00303 } 00304 00305 00306 void VIPlanner::perform_iteration_backward() 00307 { 00308 CMDPSTATE* state; 00309 vector<int> Worklist; 00310 int aind, oind; 00311 00312 //initialize the worklist 00313 Worklist.push_back(viPlanner.GoalState->StateID); 00314 00315 //backup all the states 00316 while((int)Worklist.size() > 0) 00317 { 00318 //get the next state to process 00319 state = GetState(Worklist[Worklist.size()-1]); 00320 Worklist.pop_back(); 00321 00322 //add all actions to the state 00323 if((int)state->Actions.size() == 0) 00324 environment_->SetAllActionsandAllOutcomes(state); 00325 00326 //backup the state 00327 backup(state); 00328 00329 //insert all the not yet processed successors into the worklist 00330 for(aind = 0; aind < (int)state->Actions.size(); aind++) 00331 { 00332 CMDPACTION* action = state->Actions[aind]; 00333 for(oind = 0; oind < (int)action->SuccsID.size(); oind++) 00334 { 00335 CMDPSTATE* succstate = GetState(action->SuccsID[oind]); 00336 00337 //skip if already was in the queue 00338 if((int)((VIState*)succstate->PlannerSpecificData)->iteration != viPlanner.iteration) 00339 { 00340 Worklist.push_back(succstate->StateID); 00341 00342 //mark it 00343 ((VIState*)succstate->PlannerSpecificData)->iteration = viPlanner.iteration; 00344 } 00345 } 00346 } 00347 00348 //it is not necessary to process the predecessors of the start state 00349 if(state == viPlanner.StartState) 00350 continue; 00351 00352 //add all predecessor ids to the state 00353 if((int)state->PredsID.size() == 0) 00354 environment_->SetAllPreds(state); 00355 //insert all the not yet processed predecessors into the worklist 00356 for(int pind = 0; pind < (int)state->PredsID.size(); pind++) 00357 { 00358 CMDPSTATE* PredState = GetState(state->PredsID[pind]); 00359 00360 //skip if already was in the queue 00361 if((int)((VIState*)PredState->PlannerSpecificData)->iteration != viPlanner.iteration) 00362 { 00363 Worklist.push_back(PredState->StateID); 00364 00365 //mark it 00366 ((VIState*)PredState->PlannerSpecificData)->iteration = viPlanner.iteration; 00367 } 00368 } 00369 } //until empty worklist 00370 } 00371 00372 00373 void VIPlanner::perform_iteration_forward() 00374 { 00375 CMDPSTATE* state = NULL; 00376 vector<CMDPSTATE*> Worklist; 00377 int aind, oind; 00378 00379 //initialize the worklist 00380 Worklist.push_back(viPlanner.StartState); 00381 00382 //backup all the states 00383 while((int)Worklist.size() > 0) 00384 { 00385 //get the next state to process from the front 00386 state = Worklist[Worklist.size()-1]; 00387 //Env_PrintState(state->StateID); 00388 Worklist.pop_back(); 00389 00390 //add all actions to the state 00391 if((int)state->Actions.size() == 0) 00392 environment_->SetAllActionsandAllOutcomes(state); 00393 00394 //backup the state 00395 backup(state); 00396 00397 //insert all the not yet processed successors into the worklist 00398 for(aind = 0; aind < (int)state->Actions.size(); aind++) 00399 { 00400 //CMDPACTION* action = state->Actions[aind]; 00401 CMDPACTION* action = ((VIState*)state->PlannerSpecificData)->bestnextaction; 00402 for(oind = 0; action != NULL && oind < (int)action->SuccsID.size(); oind++) 00403 { 00404 CMDPSTATE* succstate = GetState(action->SuccsID[oind]); 00405 00406 //skip if already was in the queue 00407 if((int)((VIState*)succstate->PlannerSpecificData)->iteration != viPlanner.iteration) 00408 { 00409 Worklist.push_back(succstate); 00410 00411 //mark it 00412 ((VIState*)succstate->PlannerSpecificData)->iteration = viPlanner.iteration; 00413 } 00414 } 00415 } 00416 } //until empty worklist 00417 } 00418 00419 00420 00421 void VIPlanner::InitializePlanner() 00422 { 00423 00424 viPlanner.iteration = 0; 00425 00426 //create and set up goal and start states 00427 viPlanner.StartState = GetState(MDPCfg_->startstateid); 00428 viPlanner.GoalState = GetState(MDPCfg_->goalstateid); 00429 00430 } 00431 00432 00433 //the planning entry point 00434 //returns 1 if path is found, 0 otherwise 00435 int VIPlanner::replan(double allocatedtime, vector<int>* solution_stateIDs_V) 00436 { 00437 00438 #ifndef ROS 00439 const char* policy = "policy.txt"; 00440 const char* stat = "stat.txt"; 00441 #endif 00442 FILE* fPolicy = SBPL_FOPEN(policy,"w"); 00443 FILE* fStat = SBPL_FOPEN(stat,"w"); 00444 00445 //initialization 00446 InitializePlanner(); 00447 00448 //start the timer 00449 clock_t starttime = clock(); 00450 00451 //--------------iterate------------------------------- 00452 while(((clock()-starttime)/(double)CLOCKS_PER_SEC) < allocatedtime && 00453 g_belldelta > MDP_ERRDELTA) 00454 { 00455 viPlanner.iteration++; 00456 00457 g_belldelta = 0; 00458 perform_iteration_forward(); 00459 00460 if(viPlanner.iteration%100 == 0) 00461 { 00462 PrintStat(stdout, starttime); 00463 PrintStat(fStat, starttime); 00464 } 00465 } 00466 //------------------------------------------------------------------ 00467 00468 g_runtime = clock()-starttime; 00469 00470 PrintStat(stdout, starttime); 00471 PrintStat(fStat, starttime); 00472 SBPL_FFLUSH(fStat); 00473 00474 PrintPolicy(fPolicy); 00475 00476 SBPL_FCLOSE(fPolicy); 00477 SBPL_FCLOSE(fStat); 00478 00479 return 1; 00480 00481 } 00482 00483