viplanner.cpp
Go to the documentation of this file.
00001 /*
00002  * Copyright (c) 2008, Maxim Likhachev
00003  * All rights reserved.
00004  * 
00005  * Redistribution and use in source and binary forms, with or without
00006  * modification, are permitted provided that the following conditions are met:
00007  * 
00008  *     * Redistributions of source code must retain the above copyright
00009  *       notice, this list of conditions and the following disclaimer.
00010  *     * Redistributions in binary form must reproduce the above copyright
00011  *       notice, this list of conditions and the following disclaimer in the
00012  *       documentation and/or other materials provided with the distribution.
00013  *     * Neither the name of the University of Pennsylvania nor the names of its
00014  *       contributors may be used to endorse or promote products derived from
00015  *       this software without specific prior written permission.
00016  * 
00017  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
00018  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00019  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00020  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
00021  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
00022  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
00023  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
00024  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
00025  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
00026  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00027  * POSSIBILITY OF SUCH DAMAGE.
00028  */
00029 #include <iostream>
00030 using namespace std;
00031 
00032 #include "../../sbpl/headers.h"
00033 
00034 static unsigned int g_backups;
00035 static clock_t g_runtime = 0;
00036 static double g_belldelta = INFINITECOST;
00037 
00038 
00039 VIPlanner::~VIPlanner()
00040 {
00041 
00042     //delete the statespace
00043 
00044 
00045 
00046 }
00047 
00048 
00049 void VIPlanner::Initialize_vidata(CMDPSTATE* state)
00050 {
00051         VIState* vi_data = (VIState*)state->PlannerSpecificData;
00052 
00053         vi_data->bestnextaction = NULL;
00054         vi_data->iteration = 0;
00055         vi_data->v = (float)environment_->GetGoalHeuristic(state->StateID); 
00056 
00057 
00058 }
00059 
00060 CMDPSTATE* VIPlanner::CreateState(int stateID)
00061 {       
00062         CMDPSTATE* state = NULL;
00063 
00064 #if DEBUG
00065         if(environment_->StateID2IndexMapping[stateID][VIMDP_STATEID2IND] != -1)
00066         {
00067                 SBPL_ERROR("ERROR in CreateState: state already created\n");
00068                 throw new SBPL_Exception();
00069         }
00070 #endif
00071 
00072         //adds to the tail a state
00073         state = viPlanner.MDP.AddState(stateID);
00074 
00075         //remember the index of the state
00076         environment_->StateID2IndexMapping[stateID][VIMDP_STATEID2IND] = viPlanner.MDP.StateArray.size()-1;
00077 
00078 #if DEBUG
00079         if(state != viPlanner.MDP.StateArray[environment_->StateID2IndexMapping[stateID][VIMDP_STATEID2IND]])
00080         {
00081                 SBPL_ERROR("ERROR in CreateState: invalid state index\n");
00082                 throw new SBPL_Exception();
00083         }
00084 #endif
00085 
00086         //create and initialize vi_data
00087         state->PlannerSpecificData = new VIState;
00088         Initialize_vidata(state);
00089 
00090         return state;
00091 
00092 }
00093 
00094 CMDPSTATE* VIPlanner::GetState(int stateID)
00095 {       
00096 
00097         if(stateID >= (int)environment_->StateID2IndexMapping.size())
00098         {
00099                 SBPL_ERROR("ERROR int GetState: stateID is invalid\n");
00100                 throw new SBPL_Exception();
00101         }
00102 
00103         if(environment_->StateID2IndexMapping[stateID][VIMDP_STATEID2IND] == -1)
00104                 return CreateState(stateID);
00105         else
00106                 return viPlanner.MDP.StateArray[environment_->StateID2IndexMapping[stateID][VIMDP_STATEID2IND]];
00107 
00108 }
00109 
00110 
00111 
00112 void VIPlanner::PrintVIData()
00113 {
00114         SBPL_PRINTF("iteration %d: v(start) = %f\n", viPlanner.iteration, 
00115                 ((VIState*)(viPlanner.StartState->PlannerSpecificData))->v);
00116 
00117 }
00118 
00119 void VIPlanner::PrintStatHeader(FILE* fOut)
00120 {
00121         SBPL_FPRINTF(fOut, 
00122                 "iteration backups v(start)\n");
00123 }
00124 
00125 
00126 
00127 void VIPlanner::PrintStat(FILE* fOut, clock_t starttime)
00128 {
00129         SBPL_FPRINTF(fOut, "%d %d %f %f %d\n", 
00130                         viPlanner.iteration, g_backups, ((double)(clock()-starttime))/CLOCKS_PER_SEC, 
00131                         ((VIState*)(viPlanner.StartState->PlannerSpecificData))->v,                     
00132                         (unsigned int)viPlanner.MDP.StateArray.size());
00133 
00134 }
00135 
00136 
00137 void VIPlanner::PrintPolicy(FILE* fPolicy)
00138 {
00139         bool bPrintStatOnly = true;
00140         vector<CMDPSTATE*> WorkList;
00141         CMDP PolicyforEvaluation;
00142 
00143         viPlanner.iteration++;  
00144         WorkList.push_back(viPlanner.StartState);
00145         ((VIState*)viPlanner.StartState->PlannerSpecificData)->iteration = viPlanner.iteration;
00146         double PolVal = 0.0;
00147         double Conf = 0;
00148         bool bCycles = false;
00149         SBPL_PRINTF("Printing policy...\n");
00150         while((int)WorkList.size() > 0)
00151         {
00152                 //pop the last state
00153                 CMDPSTATE* state = WorkList.at(WorkList.size()-1);
00154                 WorkList.pop_back();
00155                 VIState* statedata = (VIState*)state->PlannerSpecificData;
00156 
00157                 CMDPSTATE* polstate = PolicyforEvaluation.AddState(state->StateID);
00158 
00159                 //print state ID
00160                 if(!bPrintStatOnly)
00161                 {
00162                         SBPL_FPRINTF(fPolicy, "%d\n", state->StateID); 
00163                         environment_->PrintState(state->StateID, false, fPolicy);
00164 
00165                         int h = environment_->GetGoalHeuristic(state->StateID);
00166                         SBPL_FPRINTF(fPolicy, "h=%d\n", h);
00167                         if(h > statedata->v)
00168                         {
00169                                 SBPL_FPRINTF(fPolicy, "WARNING h overestimates exp.cost\n");
00170                         }
00171                 }
00172 
00173                 if(state->StateID == viPlanner.GoalState->StateID) 
00174                 {
00175                         //goal state
00176                         if(!bPrintStatOnly)
00177                                 SBPL_FPRINTF(fPolicy, "0\n");
00178                         Conf += ((VIState*)state->PlannerSpecificData)->Pc;
00179                 }
00180                 else if(statedata->bestnextaction == NULL)
00181                 {
00182                         //unexplored
00183                         if(!bPrintStatOnly)
00184                         {
00185                                 //no outcome explored - stay in the same place
00186                                 SBPL_FPRINTF(fPolicy, "%d %d %d\n", 1, 0, state->StateID);
00187                         }
00188                 }
00189                 else
00190                 {
00191 
00192                         //get best action
00193                         CMDPACTION* action = statedata->bestnextaction;
00194 
00195                         //add action to evaluation MDP
00196                         CMDPACTION* polaction = polstate->AddAction(action->ActionID);
00197 
00198                         if(!bPrintStatOnly)
00199                                 SBPL_FPRINTF(fPolicy, "%d ", (unsigned int)action->SuccsID.size());
00200 
00201                         //print successors and insert them into the list
00202                         for(int i = 0; i < (int)action->SuccsID.size(); i++)
00203                         {
00204                                 if(!bPrintStatOnly)
00205                                         SBPL_FPRINTF(fPolicy, "%d %d ", action->Costs[i], action->SuccsID[i]);
00206                                 polaction->AddOutcome(action->SuccsID[i], action->Costs[i], action->SuccsProb[i]);
00207 
00208                                 CMDPSTATE* succstate = GetState(action->SuccsID[i]);
00209                                 if((int)((VIState*)succstate->PlannerSpecificData)->iteration != viPlanner.iteration)
00210                                 {
00211                                         ((VIState*)succstate->PlannerSpecificData)->iteration = viPlanner.iteration;
00212                                         WorkList.push_back(succstate);
00213 
00214                                         ((VIState*)succstate->PlannerSpecificData)->Pc = 
00215                                                 action->SuccsProb[i]*((VIState*)state->PlannerSpecificData)->Pc;
00216                                         PolVal += ((VIState*)succstate->PlannerSpecificData)->Pc*action->Costs[i];
00217                                 }
00218                         }
00219                         if(!bPrintStatOnly)
00220                                 SBPL_FPRINTF(fPolicy, "\n");
00221                 }
00222         }//while worklist not empty
00223         SBPL_PRINTF("done\n");
00224 
00225         //now evaluate the policy
00226         double PolicyValue = -1;
00227         bool bFullPolicy = false;
00228         double Pcgoal = -1;
00229         int nMerges = 0;
00230         EvaluatePolicy(&PolicyforEvaluation, viPlanner.StartState->StateID,
00231                 viPlanner.GoalState->StateID, &PolicyValue, &bFullPolicy, &Pcgoal, &nMerges,
00232                 &bCycles);
00233 
00234         SBPL_PRINTF("Policy value = %f FullPolicy=%d Merges=%d Cycles=%d\n", 
00235                 PolicyValue, bFullPolicy, nMerges, bCycles);
00236         
00237         if(!bFullPolicy)
00238                 SBPL_PRINTF("WARN: POLICY IS ONLY PARTIAL\n");
00239         if(fabs(PolicyValue-((VIState*)(viPlanner.StartState->PlannerSpecificData))->v) > MDP_ERRDELTA)
00240                 SBPL_PRINTF("WARN: POLICY VALUE IS NOT CORRECT\n");
00241 
00242 
00243         if(!bPrintStatOnly)
00244                 SBPL_FPRINTF(fPolicy, "backups=%d runtime=%f vstart=%f policyvalue=%f fullpolicy=%d Pc(goal)=%f nMerges=%d bCyc=%d\n", 
00245                         g_backups, (double)g_runtime/CLOCKS_PER_SEC, 
00246                         ((VIState*)(viPlanner.StartState->PlannerSpecificData))->v,             
00247                         PolicyValue, bFullPolicy, Pcgoal, nMerges, bCycles);
00248         else
00249                 SBPL_FPRINTF(fPolicy, "%d %f %f %f %d %f %d %d\n", 
00250                         g_backups, (double)g_runtime/CLOCKS_PER_SEC, 
00251                         ((VIState*)(viPlanner.StartState->PlannerSpecificData))->v,             
00252                         PolicyValue, bFullPolicy, Pcgoal, nMerges, bCycles);
00253 
00254 }
00255 
00256 
00257 
00258 void VIPlanner::backup(CMDPSTATE* state)
00259 {
00260         int aind, oind;
00261         CMDPSTATE* succstate;
00262 
00263         g_backups++;
00264 
00265         if(state == viPlanner.GoalState)
00266         {
00267                 ((VIState*)(state->PlannerSpecificData))->bestnextaction = NULL;
00268                 ((VIState*)(state->PlannerSpecificData))->v = 0;
00269                 return;
00270         }
00271 
00272         //iterate through actions
00273         double minactionQ = INFINITECOST;
00274         CMDPACTION* minaction = NULL;
00275         for(aind = 0; aind < (int)state->Actions.size(); aind++)
00276         {
00277                 double actionQ = 0;
00278                 CMDPACTION* action = state->Actions[aind];
00279                 for(oind = 0; oind < (int)action->SuccsID.size(); oind++)
00280                 {
00281                         succstate = GetState(action->SuccsID[oind]);
00282                         actionQ += action->SuccsProb[oind]*(action->Costs[oind] + 
00283                                 ((VIState*)(succstate->PlannerSpecificData))->v);
00284                 }
00285 
00286                 if(minaction == NULL || actionQ < minactionQ)
00287                 {
00288                         minactionQ = actionQ;
00289                         minaction = action;
00290                 }
00291                 
00292         }
00293 
00294         if(((VIState*)state->PlannerSpecificData)->bestnextaction == NULL)
00295                 g_belldelta = INFINITECOST;
00296         else if(g_belldelta < fabs(((VIState*)state->PlannerSpecificData)->v - minactionQ))
00297                 g_belldelta = fabs(((VIState*)state->PlannerSpecificData)->v - minactionQ);
00298 
00299         //set state values
00300         ((VIState*)state->PlannerSpecificData)->bestnextaction = minaction;
00301         ((VIState*)state->PlannerSpecificData)->v = (float)minactionQ;
00302 
00303 }
00304 
00305 
00306 void VIPlanner::perform_iteration_backward()
00307 {
00308         CMDPSTATE* state;
00309         vector<int> Worklist;
00310         int aind, oind;
00311 
00312         //initialize the worklist
00313         Worklist.push_back(viPlanner.GoalState->StateID);
00314                 
00315         //backup all the states
00316         while((int)Worklist.size() > 0)
00317         {
00318                 //get the next state to process
00319                 state = GetState(Worklist[Worklist.size()-1]);
00320                 Worklist.pop_back();
00321 
00322                 //add all actions to the state 
00323                 if((int)state->Actions.size() == 0)
00324                         environment_->SetAllActionsandAllOutcomes(state);
00325 
00326                 //backup the state
00327                 backup(state);
00328 
00329                 //insert all the not yet processed successors into the worklist
00330                 for(aind = 0; aind < (int)state->Actions.size(); aind++)
00331                 {
00332                         CMDPACTION* action = state->Actions[aind];
00333                         for(oind = 0; oind < (int)action->SuccsID.size(); oind++)
00334                         {
00335                                 CMDPSTATE* succstate = GetState(action->SuccsID[oind]);
00336 
00337                                 //skip if already was in the queue
00338                                 if((int)((VIState*)succstate->PlannerSpecificData)->iteration != viPlanner.iteration)
00339                                 {
00340                                         Worklist.push_back(succstate->StateID);
00341                                 
00342                                         //mark it
00343                                         ((VIState*)succstate->PlannerSpecificData)->iteration = viPlanner.iteration;
00344                                 }
00345                         }
00346                 }
00347 
00348                 //it is not necessary to process the predecessors of the start state
00349                 if(state == viPlanner.StartState)
00350                         continue;
00351                 
00352                 //add all predecessor ids to the state
00353                 if((int)state->PredsID.size() == 0)
00354                         environment_->SetAllPreds(state);
00355                 //insert all the not yet processed predecessors into the worklist
00356                 for(int pind = 0; pind < (int)state->PredsID.size(); pind++)
00357                 {
00358                         CMDPSTATE* PredState = GetState(state->PredsID[pind]);
00359         
00360                         //skip if already was in the queue
00361                         if((int)((VIState*)PredState->PlannerSpecificData)->iteration != viPlanner.iteration)
00362                         {
00363                                 Worklist.push_back(PredState->StateID);
00364                                 
00365                                 //mark it
00366                                 ((VIState*)PredState->PlannerSpecificData)->iteration = viPlanner.iteration;
00367                         }
00368                 }
00369         } //until empty worklist
00370 }
00371 
00372 
00373 void VIPlanner::perform_iteration_forward()
00374 {
00375         CMDPSTATE* state = NULL;
00376         vector<CMDPSTATE*> Worklist;
00377         int aind, oind;
00378 
00379         //initialize the worklist
00380         Worklist.push_back(viPlanner.StartState);
00381                 
00382         //backup all the states
00383         while((int)Worklist.size() > 0)
00384         {
00385                 //get the next state to process from the front
00386                 state = Worklist[Worklist.size()-1];
00387                 //Env_PrintState(state->StateID);
00388                 Worklist.pop_back();
00389 
00390                 //add all actions to the state 
00391                 if((int)state->Actions.size() == 0)
00392                         environment_->SetAllActionsandAllOutcomes(state);
00393 
00394                 //backup the state
00395                 backup(state);
00396 
00397                 //insert all the not yet processed successors into the worklist
00398                 for(aind = 0; aind < (int)state->Actions.size(); aind++)
00399                 {
00400                         //CMDPACTION* action = state->Actions[aind];
00401                         CMDPACTION* action = ((VIState*)state->PlannerSpecificData)->bestnextaction;
00402                         for(oind = 0; action != NULL && oind < (int)action->SuccsID.size(); oind++)
00403                         {
00404                                 CMDPSTATE* succstate = GetState(action->SuccsID[oind]);
00405 
00406                                 //skip if already was in the queue
00407                                 if((int)((VIState*)succstate->PlannerSpecificData)->iteration != viPlanner.iteration)
00408                                 {
00409                                         Worklist.push_back(succstate);
00410                                 
00411                                         //mark it
00412                                         ((VIState*)succstate->PlannerSpecificData)->iteration = viPlanner.iteration;
00413                                 }
00414                         }
00415                 }
00416         } //until empty worklist
00417 }
00418 
00419 
00420 
00421 void VIPlanner::InitializePlanner()
00422 {
00423 
00424         viPlanner.iteration = 0;
00425 
00426         //create and set up goal and start states
00427         viPlanner.StartState = GetState(MDPCfg_->startstateid);
00428         viPlanner.GoalState = GetState(MDPCfg_->goalstateid);
00429 
00430 }
00431 
00432 
00433 //the planning entry point
00434 //returns 1 if path is found, 0 otherwise
00435 int VIPlanner::replan(double allocatedtime, vector<int>* solution_stateIDs_V)
00436 {
00437 
00438 #ifndef ROS
00439   const char* policy = "policy.txt";
00440   const char* stat = "stat.txt";
00441 #endif
00442         FILE* fPolicy = SBPL_FOPEN(policy,"w");
00443         FILE* fStat = SBPL_FOPEN(stat,"w");
00444 
00445         //initialization
00446         InitializePlanner();
00447                         
00448         //start the timer
00449         clock_t starttime = clock();
00450 
00451         //--------------iterate-------------------------------
00452         while(((clock()-starttime)/(double)CLOCKS_PER_SEC) < allocatedtime && 
00453                 g_belldelta > MDP_ERRDELTA)
00454         {
00455                 viPlanner.iteration++;
00456 
00457                 g_belldelta = 0;
00458                 perform_iteration_forward();
00459 
00460                 if(viPlanner.iteration%100 == 0)
00461                 {
00462                         PrintStat(stdout, starttime);
00463                         PrintStat(fStat, starttime);
00464                 }
00465         }
00466         //------------------------------------------------------------------    
00467 
00468         g_runtime = clock()-starttime;
00469 
00470         PrintStat(stdout, starttime);
00471         PrintStat(fStat, starttime);
00472         SBPL_FFLUSH(fStat);
00473 
00474         PrintPolicy(fPolicy);
00475 
00476         SBPL_FCLOSE(fPolicy);
00477         SBPL_FCLOSE(fStat);
00478 
00479         return 1;
00480 
00481 }
00482 
00483 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines


sbpl
Author(s): Maxim Likhachev/maximl@seas.upenn.edu
autogenerated on Fri Jan 18 2013 13:41:53