mdm_library: controller_mdp.cpp Source File

Go to the documentation of this file.
00001 
00025 #include <float.h>
00026 #include <fstream>
00027 
00028 #include <boost/numeric/ublas/io.hpp>
00029 
00030 #include <mdm_library/controller_mdp.h>
00031 #include <mdm_library/ActionSymbol.h>
00032 #include <std_msgs/Float32.h>
00033 
00034 
00035 
00036 using namespace std;
00037 using namespace mdm_library;
00038 
00039 
00040 
00041 #ifdef HAVE_MADP
00042 
00043 
00044 
00045 ControllerMDP::
00046 ControllerMDP ( const string& problem_file_path,
00047                 const string& policy_file_path,
00048                 const CONTROLLER_STATUS initial_status ) :
00049     R_ptr_(),
00050     policy_ptr_ (),
00051     loader_ ( new DecPOMDPLoader ( problem_file_path ) ),
00052     number_of_states_ (),
00053     number_of_actions_ (),
00054     state_sub_ ( nh_.subscribe ( "state", 0, &ControllerMDP::stateCallback, this ) ),
00055     action_pub_ ( nh_.advertise<ActionSymbol> ( "action", 0, true ) ),
00056     reward_pub_ ( nh_.advertise<std_msgs::Float32> ( "reward", 0, true ) )
00057 {
00061     number_of_states_ = loader_->GetDecPOMDP()->GetNrStates();
00062     number_of_actions_ = loader_->GetDecPOMDP()->GetNrJointActions();
00063     loadPolicyVector ( policy_file_path );
00064 }
00065 
00066 
00067 
00068 #endif
00069 
00070 
00071 
00072 ControllerMDP::
00073 ControllerMDP ( const string& policy_file_path,
00074                 const CONTROLLER_STATUS initial_status ) :
00075     ControlLayerBase ( initial_status ),
00076     R_ptr_(),
00077     policy_ptr_ (),
00078     loader_ (),
00079     number_of_states_ (),
00080     number_of_actions_ (),
00081     state_sub_ ( nh_.subscribe ( "state", 0, &ControllerMDP::stateCallback, this ) ),
00082     action_pub_ ( nh_.advertise<ActionSymbol> ( "action", 0, true ) ),
00083     reward_pub_ ( nh_.advertise<std_msgs::Float32> ( "reward", 0, true ) )
00084 {
00086     loadPolicyVector ( policy_file_path );
00087 }
00088 
00089 
00090 
00091 void
00092 ControllerMDP::
00093 loadPolicyVector ( const string& policy_vector_path )
00094 {
00095     try
00096     {
00097         if ( policy_ptr_ != 0 )
00098             ROS_WARN_STREAM ( "The policy for this MDP had already been loaded! Overwriting." );
00099 
00100         ifstream fp;
00101         fp.open ( policy_vector_path.c_str() );
00102 
00104         char c;
00105         int dim_count = 1;
00106         
00107         while(c !=']')
00108         {
00109             fp.get(c);
00110             if(c==',')
00111                 dim_count++;
00112         }
00113         if(dim_count > 2)
00114         {
00115             ROS_ERROR_STREAM("The policy is incorrectly formatted. It must have at most two dimensions (|A|x|S|).");
00116             abort();
00117         }
00118 
00119         fp.seekg (0, fp.beg);
00120 
00121         if(dim_count == 1)
00122         {
00123             IndexVectorPtr policy_vec ( new IndexVector() );
00124             fp >> ( *policy_vec );
00125 
00126             policy_ptr_ = boost::shared_ptr<MDPPolicy> ( new MDPPolicyVector ( policy_vec ) );
00127         }
00128         else
00129         {
00130             MatrixPtr policy_mat ( new Matrix() );
00131             fp >> ( *policy_mat );
00132 
00133             policy_ptr_ = boost::shared_ptr<MDPPolicy> ( new MDPPolicyMatrix ( policy_mat ) );
00134         }
00135     }
00136     catch ( exception& e )
00137     {
00138         ROS_ERROR_STREAM ( e.what() );
00139         abort();
00140     }
00141 }
00142 
00143 
00144 
00145 void
00146 ControllerMDP::
00147 loadRewardMatrix ( const string& reward_matrix_path )
00148 {
00149     try
00150     {
00151         if ( R_ptr_ != 0 )
00152             ROS_WARN_STREAM ( "The reward model for this MDP had already been loaded! Overwriting." );
00153         if ( loader_ != 0 )
00154         {
00155             ROS_WARN_STREAM ( "Trying to load a reward model when it has already been parsed from the problem file." );
00156             ROS_WARN_STREAM ( "This reward model will be used instead, but it may be inconsistent with the policy." );
00157         }
00158         ifstream fp;
00159         fp.open ( reward_matrix_path.c_str() );
00160         MatrixPtr reward_matrix ( new Matrix() );
00161 
00162         fp >> ( *reward_matrix );
00163 
00164         R_ptr_ = boost::shared_ptr<RewardModel> ( new RewardMatrix ( reward_matrix ) );
00165     }
00166     catch ( exception& e )
00167     {
00168         ROS_ERROR_STREAM ( e.what() );
00169         abort();
00170     }
00171 }
00172 
00173 
00174 
00175 void
00176 ControllerMDP::
00177 act ( const uint32_t state )
00178 {
00179     if ( getStatus() == STOPPED )
00180     {
00181         return;
00182     }
00183 
00184     if ( policy_ptr_ == 0 )
00185     {
00186         ROS_WARN_STREAM ( "No policy has been specified for this MDP! Idling." );
00187     }
00188 
00189     uint32_t action;
00190 
00191     try
00192     {
00193         action = ( *policy_ptr_ ) [state];
00194     }
00195     catch ( exception& e )
00196     {
00197         ROS_ERROR_STREAM ( e.what() );
00198         abort();
00199     }
00200 
00201     publishAction ( action );
00202     publishReward ( state, action );
00203 
00204     incrementDecisionEpisode();
00205 }
00206 
00207 
00208 
00209 void
00210 ControllerMDP::
00211 publishAction ( uint32_t a )
00212 {
00213     ActionSymbol aInfo;
00214     aInfo.action_symbol = a;
00215     aInfo.decision_episode = getDecisionEpisode();
00216     action_pub_.publish ( aInfo );
00217 }
00218 
00219 
00220 
00221 void
00222 ControllerMDP::
00223 publishReward ( uint32_t s, uint32_t a )
00224 {
00225     if ( R_ptr_ == 0 )
00226     {
00227         return;  
00228     }
00229 
00230     std_msgs::Float32 reward;
00231     reward.data = R_ptr_->getReward ( s, a );
00232     reward_pub_.publish ( reward );
00233 }
00234 
00235 
00236 
00237 size_t
00238 ControllerMDP::
00239 getNumberOfActions ()
00240 {
00241     return number_of_actions_;
00242 }
00243 
00244 
00245 
00246 size_t
00247 ControllerMDP::
00248 getNumberOfStates ()
00249 {
00250     return number_of_states_;
00251 }