Go to the documentation of this file.00001
00025 #include <float.h>
00026 #include <fstream>
00027
00028 #include <boost/numeric/ublas/io.hpp>
00029
00030 #include <mdm_library/controller_mdp.h>
00031 #include <mdm_library/ActionSymbol.h>
00032 #include <std_msgs/Float32.h>
00033
00034
00035
00036 using namespace std;
00037 using namespace mdm_library;
00038
00039
00040
00041 #ifdef HAVE_MADP
00042
00043
00044
00045 ControllerMDP::
00046 ControllerMDP ( const string& problem_file_path,
00047 const string& policy_file_path,
00048 const CONTROLLER_STATUS initial_status ) :
00049 R_ptr_(),
00050 policy_ptr_ (),
00051 loader_ ( new DecPOMDPLoader ( problem_file_path ) ),
00052 number_of_states_ (),
00053 number_of_actions_ (),
00054 state_sub_ ( nh_.subscribe ( "state", 0, &ControllerMDP::stateCallback, this ) ),
00055 action_pub_ ( nh_.advertise<ActionSymbol> ( "action", 0, true ) ),
00056 reward_pub_ ( nh_.advertise<std_msgs::Float32> ( "reward", 0, true ) )
00057 {
00061 number_of_states_ = loader_->GetDecPOMDP()->GetNrStates();
00062 number_of_actions_ = loader_->GetDecPOMDP()->GetNrJointActions();
00063 loadPolicyVector ( policy_file_path );
00064 }
00065
00066
00067
00068 #endif
00069
00070
00071
00072 ControllerMDP::
00073 ControllerMDP ( const string& policy_file_path,
00074 const CONTROLLER_STATUS initial_status ) :
00075 ControlLayerBase ( initial_status ),
00076 R_ptr_(),
00077 policy_ptr_ (),
00078 loader_ (),
00079 number_of_states_ (),
00080 number_of_actions_ (),
00081 state_sub_ ( nh_.subscribe ( "state", 0, &ControllerMDP::stateCallback, this ) ),
00082 action_pub_ ( nh_.advertise<ActionSymbol> ( "action", 0, true ) ),
00083 reward_pub_ ( nh_.advertise<std_msgs::Float32> ( "reward", 0, true ) )
00084 {
00086 loadPolicyVector ( policy_file_path );
00087 }
00088
00089
00090
00091 void
00092 ControllerMDP::
00093 loadPolicyVector ( const string& policy_vector_path )
00094 {
00095 try
00096 {
00097 if ( policy_ptr_ != 0 )
00098 ROS_WARN_STREAM ( "The policy for this MDP had already been loaded! Overwriting." );
00099
00100 ifstream fp;
00101 fp.open ( policy_vector_path.c_str() );
00102
00104 char c;
00105 int dim_count = 1;
00106
00107 while(c !=']')
00108 {
00109 fp.get(c);
00110 if(c==',')
00111 dim_count++;
00112 }
00113 if(dim_count > 2)
00114 {
00115 ROS_ERROR_STREAM("The policy is incorrectly formatted. It must have at most two dimensions (|A|x|S|).");
00116 abort();
00117 }
00118
00119 fp.seekg (0, fp.beg);
00120
00121 if(dim_count == 1)
00122 {
00123 IndexVectorPtr policy_vec ( new IndexVector() );
00124 fp >> ( *policy_vec );
00125
00126 policy_ptr_ = boost::shared_ptr<MDPPolicy> ( new MDPPolicyVector ( policy_vec ) );
00127 }
00128 else
00129 {
00130 MatrixPtr policy_mat ( new Matrix() );
00131 fp >> ( *policy_mat );
00132
00133 policy_ptr_ = boost::shared_ptr<MDPPolicy> ( new MDPPolicyMatrix ( policy_mat ) );
00134 }
00135 }
00136 catch ( exception& e )
00137 {
00138 ROS_ERROR_STREAM ( e.what() );
00139 abort();
00140 }
00141 }
00142
00143
00144
00145 void
00146 ControllerMDP::
00147 loadRewardMatrix ( const string& reward_matrix_path )
00148 {
00149 try
00150 {
00151 if ( R_ptr_ != 0 )
00152 ROS_WARN_STREAM ( "The reward model for this MDP had already been loaded! Overwriting." );
00153 if ( loader_ != 0 )
00154 {
00155 ROS_WARN_STREAM ( "Trying to load a reward model when it has already been parsed from the problem file." );
00156 ROS_WARN_STREAM ( "This reward model will be used instead, but it may be inconsistent with the policy." );
00157 }
00158 ifstream fp;
00159 fp.open ( reward_matrix_path.c_str() );
00160 MatrixPtr reward_matrix ( new Matrix() );
00161
00162 fp >> ( *reward_matrix );
00163
00164 R_ptr_ = boost::shared_ptr<RewardModel> ( new RewardMatrix ( reward_matrix ) );
00165 }
00166 catch ( exception& e )
00167 {
00168 ROS_ERROR_STREAM ( e.what() );
00169 abort();
00170 }
00171 }
00172
00173
00174
00175 void
00176 ControllerMDP::
00177 act ( const uint32_t state )
00178 {
00179 if ( getStatus() == STOPPED )
00180 {
00181 return;
00182 }
00183
00184 if ( policy_ptr_ == 0 )
00185 {
00186 ROS_WARN_STREAM ( "No policy has been specified for this MDP! Idling." );
00187 }
00188
00189 uint32_t action;
00190
00191 try
00192 {
00193 action = ( *policy_ptr_ ) [state];
00194 }
00195 catch ( exception& e )
00196 {
00197 ROS_ERROR_STREAM ( e.what() );
00198 abort();
00199 }
00200
00201 publishAction ( action );
00202 publishReward ( state, action );
00203
00204 incrementDecisionEpisode();
00205 }
00206
00207
00208
00209 void
00210 ControllerMDP::
00211 publishAction ( uint32_t a )
00212 {
00213 ActionSymbol aInfo;
00214 aInfo.action_symbol = a;
00215 aInfo.decision_episode = getDecisionEpisode();
00216 action_pub_.publish ( aInfo );
00217 }
00218
00219
00220
00221 void
00222 ControllerMDP::
00223 publishReward ( uint32_t s, uint32_t a )
00224 {
00225 if ( R_ptr_ == 0 )
00226 {
00227 return;
00228 }
00229
00230 std_msgs::Float32 reward;
00231 reward.data = R_ptr_->getReward ( s, a );
00232 reward_pub_.publish ( reward );
00233 }
00234
00235
00236
00237 size_t
00238 ControllerMDP::
00239 getNumberOfActions ()
00240 {
00241 return number_of_actions_;
00242 }
00243
00244
00245
00246 size_t
00247 ControllerMDP::
00248 getNumberOfStates ()
00249 {
00250 return number_of_states_;
00251 }