00001 #include <ros/ros.h>
00002
00003 #include <rl_msgs/RLStateReward.h>
00004 #include <rl_msgs/RLEnvDescription.h>
00005 #include <rl_msgs/RLAction.h>
00006 #include <rl_msgs/RLExperimentInfo.h>
00007 #include <rl_msgs/RLEnvSeedExperience.h>
00008
00009 #include <ros/callback_queue.h>
00010
00011 #include <rl_common/core.hh>
00012 #include <rl_common/Random.h>
00013 #include <rl_common/ExperienceFile.hh>
00014
00015 #include <rl_agent/DiscretizationAgent.hh>
00016 #include <rl_agent/QLearner.hh>
00017 #include <rl_agent/ModelBasedAgent.hh>
00018 #include <rl_agent/SavedPolicy.hh>
00019 #include <rl_agent/Dyna.hh>
00020 #include <rl_agent/Sarsa.hh>
00021
00022 #include "std_msgs/String.h"
00023
00024 #include <getopt.h>
00025 #include <stdlib.h>
00026
00027 #define NODE "RLAgent"
00028
00029 static ros::Publisher out_rl_action;
00030 static ros::Publisher out_exp_info;
00031
00032 bool firstAction = true;
00033 int seed = 1;
00034
00035 Agent* agent = NULL;
00036 bool PRINTS = true;
00037
00038 rl_msgs::RLExperimentInfo info;
00039 char* agentType;
00040
00041
00042 float discountfactor = 0.99;
00043 float epsilon = 0.1;
00044 float alpha = 0.3;
00045 float initialvalue = 0.0;
00046 float actrate = 10.0;
00047 float lambda = 0.1;
00048 int M = 5;
00049 int model = C45TREE;
00050 int explore = GREEDY;
00051 int modelcombo = BEST;
00052 int planner = PAR_ETUCT_ACTUAL;
00053 int nmodels = 1;
00054 bool reltrans = true;
00055 int nstates = 0;
00056 int k = 1000;
00057 char *filename = NULL;
00058 int history = 0;
00059 float v = 0;
00060 float n = 0;
00061
00062
00063
00064 void displayHelp(){
00065 cout << "\n Call agent --agent type [options]\n";
00066 cout << "Agent types: qlearner sarsa modelbased rmax texplore dyna savedpolicy\n";
00067 cout << "\n Options:\n";
00068 cout << "--seed value (integer seed for random number generator)\n";
00069 cout << "--gamma value (discount factor between 0 and 1)\n";
00070 cout << "--epsilon value (epsilon for epsilon-greedy exploration)\n";
00071 cout << "--alpha value (learning rate alpha)\n";
00072 cout << "--initialvalue value (initial q values)\n";
00073 cout << "--actrate value (action selection rate (Hz))\n";
00074 cout << "--lamba value (lamba for eligibility traces)\n";
00075 cout << "--m value (parameter for R-Max)\n";
00076 cout << "--k value (For Dyna: # of model based updates to do between each real world update)\n";
00077 cout << "--history value (# steps of history to use for planning with delay)\n";
00078 cout << "--filename file (file to load saved policy from for savedpolicy agent)\n";
00079 cout << "--model type (tabular,tree,m5tree)\n";
00080 cout << "--planner type (vi,pi,sweeping,uct,parallel-uct,delayed-uct,delayed-parallel-uct)\n";
00081 cout << "--explore type (unknown,greedy,epsilongreedy,variancenovelty)\n";
00082 cout << "--combo type (average,best,separate)\n";
00083 cout << "--nmodels value (# of models)\n";
00084 cout << "--nstates value (optionally discretize domain into value # of states on each feature)\n";
00085 cout << "--reltrans (learn relative transitions)\n";
00086 cout << "--abstrans (learn absolute transitions)\n";
00087 cout << "--v value (For TEXPLORE: b/v coefficient for rewarding state-actions where models disagree)\n";
00088 cout << "--n value (For TEXPLORE: n coefficient for rewarding state-actions which are novel)\n";
00089 cout << "--prints (turn on debug printing of actions/rewards)\n";
00090
00091 cout << "\n For more info, see: http://www.ros.org/wiki/rl_agent\n";
00092 exit(-1);
00093
00094 }
00095
00097 void processState(const rl_msgs::RLStateReward::ConstPtr &stateIn){
00098
00099 if (agent == NULL){
00100 cout << "no agent yet" << endl;
00101 return;
00102 }
00103
00104 rl_msgs::RLAction a;
00105
00106
00107 if (firstAction){
00108 a.action = agent->first_action(stateIn->state);
00109 info.episode_reward = 0;
00110 info.number_actions = 1;
00111 } else {
00112 info.episode_reward += stateIn->reward;
00113
00114 if (stateIn->terminal){
00115 agent->last_action(stateIn->reward);
00116 cout << "Episode " << info.episode_number << " reward: " << info.episode_reward << endl;
00117
00118 out_exp_info.publish(info);
00119 info.episode_number++;
00120 info.episode_reward = 0;
00121 firstAction = true;
00122 return;
00123 } else {
00124 a.action = agent->next_action(stateIn->reward, stateIn->state);
00125 info.number_actions++;
00126 }
00127 }
00128 firstAction = false;
00129
00130
00131 out_rl_action.publish(a);
00132 }
00133
00134
00136 void processSeed(const rl_msgs::RLEnvSeedExperience::ConstPtr &seedIn){
00137
00138 if (agent == NULL){
00139 cout << "no agent yet" << endl;
00140 return;
00141 }
00142
00143 std::vector<experience> seeds;
00144 experience seed1;
00145 seed1.s = seedIn->from_state;
00146 seed1.next = seedIn->to_state;
00147 seed1.act = seedIn->action;
00148 seed1.reward = seedIn->reward;
00149 seed1.terminal = seedIn->terminal;
00150 seeds.push_back(seed1);
00151
00152 agent->seedExp(seeds);
00153
00154 }
00155
00157 void processEnvDescription(const rl_msgs::RLEnvDescription::ConstPtr &envIn){
00158
00159
00160 Random rng(seed+1);
00161 agent = NULL;
00162
00163
00164 if (strcmp(agentType, "qlearner") == 0){
00165 cout << "Agent: QLearner" << endl;
00166 agent = new QLearner(envIn->num_actions,
00167 discountfactor,
00168 initialvalue,
00169 alpha,
00170 epsilon,
00171 rng);
00172 }
00173
00174 else if (strcmp(agentType, "rmax") == 0 || strcmp(agentType, "texplore") == 0 || strcmp(agentType, "modelbased") == 0){
00175 cout << "Agent: Model Based" << endl;
00176 agent = new ModelBasedAgent(envIn->num_actions,
00177 discountfactor,
00178 envIn->max_reward, envIn->reward_range,
00179 model, explore, modelcombo,
00180 nmodels,
00181 planner,
00182 0.1,
00183 lambda,
00184 (1.0/actrate),
00185 M,
00186 envIn->min_state_range, envIn->max_state_range,
00187 nstates,
00188 history, v, n, false, reltrans, 0.2,
00189 envIn->stochastic, envIn->episodic,
00190 rng);
00191
00192 }
00193
00194 else if (strcmp(agentType, "dyna") == 0){
00195 cout << "Agent: Dyna" << endl;
00196 agent = new Dyna(envIn->num_actions, discountfactor,
00197 initialvalue, alpha, k, epsilon,
00198 rng);
00199 }
00200
00201 else if (strcmp(agentType, "sarsa") == 0){
00202 cout << "Agent: Sarsa" << endl;
00203 agent = new Sarsa(envIn->num_actions, discountfactor,
00204 initialvalue, alpha, epsilon, lambda,
00205 rng);
00206 }
00207
00208 else if (strcmp(agentType, "savedpolicy") == 0){
00209 cout << "Agent: Saved Policy" << endl;
00210 agent = new SavedPolicy(envIn->num_actions, filename);
00211 }
00212
00213 else {
00214 cout << "Invalid Agent!" << endl;
00215 displayHelp();
00216 exit(-1);
00217 }
00218
00219 Agent* a2 = agent;
00220
00221 if (nstates > 0 && (model != M5ALLMULTI || strcmp(agentType, "qlearner") == 0)){
00222 int totalStates = powf(nstates,envIn->min_state_range.size());
00223 if (PRINTS) cout << "Discretize with " << nstates << ", total: " << totalStates << endl;
00224 agent = new DiscretizationAgent(nstates, a2,
00225 envIn->min_state_range,
00226 envIn->max_state_range, PRINTS);
00227 }
00228
00229
00230 firstAction = true;
00231 info.episode_number = 0;
00232 info.episode_reward = 0;
00233 }
00234
00235
00237 int main(int argc, char *argv[])
00238 {
00239 ros::init(argc, argv, NODE);
00240 ros::NodeHandle node;
00241
00242
00243 if (argc < 3){
00244 cout << "--agent type option is required" << endl;
00245 displayHelp();
00246 exit(-1);
00247 }
00248
00249
00250 agentType = argv[1];
00251 seed = std::atoi(argv[2]);
00252
00253
00254 bool gotAgent = false;
00255 for (int i = 1; i < argc-1; i++){
00256 if (strcmp(argv[i], "--agent") == 0){
00257 gotAgent = true;
00258 agentType = argv[i+1];
00259 }
00260 }
00261 if (!gotAgent) {
00262 cout << "--agent type option is required" << endl;
00263 displayHelp();
00264 }
00265
00266
00267 if (strcmp(agentType, "rmax") == 0){
00268 model = RMAX;
00269 explore = EXPLORE_UNKNOWN;
00270 modelcombo = BEST;
00271 planner = VALUE_ITERATION;
00272 nmodels = 1;
00273 reltrans = false;
00274 M = 5;
00275 } else if (strcmp(agentType, "texplore") == 0){
00276 model = C45TREE;
00277 explore = DIFF_AND_NOVEL_BONUS;
00278 v = 0;
00279 n = 0;
00280 modelcombo = AVERAGE;
00281 planner = PAR_ETUCT_ACTUAL;
00282 nmodels = 5;
00283 reltrans = true;
00284 M = 0;
00285 }
00286
00287 char ch;
00288 const char* optflags = "geairlmoxpcn:";
00289 int option_index = 0;
00290 static struct option long_options[] = {
00291 {"gamma", 1, 0, 'g'},
00292 {"discountfactor", 1, 0, 'g'},
00293 {"epsilon", 1, 0, 'e'},
00294 {"alpha", 1, 0, 'a'},
00295 {"initialvalue", 1, 0, 'i'},
00296 {"actrate", 1, 0, 'r'},
00297 {"lambda", 1, 0, 'l'},
00298 {"m", 1, 0, 'm'},
00299 {"model", 1, 0, 'o'},
00300 {"explore", 1, 0, 'x'},
00301 {"planner", 1, 0, 'p'},
00302 {"combo", 1, 0, 'c'},
00303 {"nmodels", 1, 0, '#'},
00304 {"reltrans", 0, 0, 't'},
00305 {"abstrans", 0, 0, '0'},
00306 {"seed", 1, 0, 's'},
00307 {"agent", 1, 0, 'q'},
00308 {"prints", 0, 0, 'd'},
00309 {"nstates", 1, 0, 'w'},
00310 {"k", 1, 0, 'k'},
00311 {"filename", 1, 0, 'f'},
00312 {"history", 1, 0, 'y'},
00313 {"b", 1, 0, 'b'},
00314 {"v", 1, 0, 'v'},
00315 {"n", 1, 0, 'n'}
00316 };
00317
00318 bool epsilonChanged = false;
00319 bool actrateChanged = false;
00320 bool mChanged = false;
00321 bool bvnChanged = false;
00322 bool lambdaChanged = false;
00323
00324 while(-1 != (ch = getopt_long_only(argc, argv, optflags, long_options, &option_index))) {
00325 switch(ch) {
00326
00327 case 'g':
00328 discountfactor = std::atof(optarg);
00329 cout << "discountfactor: " << discountfactor << endl;
00330 break;
00331
00332 case 'e':
00333 epsilonChanged = true;
00334 epsilon = std::atof(optarg);
00335 cout << "epsilon: " << epsilon << endl;
00336 break;
00337
00338 case 'y':
00339 {
00340 if (strcmp(agentType, "texplore") == 0 || strcmp(agentType, "modelbased") == 0){
00341 history = std::atoi(optarg);
00342 cout << "history: " << history << endl;
00343 } else {
00344 cout << "--history is not a valid option for agent: " << agentType << endl;
00345 exit(-1);
00346 }
00347 break;
00348 }
00349
00350 case 'k':
00351 {
00352 if (strcmp(agentType, "dyna") == 0){
00353 k = std::atoi(optarg);
00354 cout << "k: " << k << endl;
00355 } else {
00356 cout << "--k is only a valid option for the Dyna agent" << endl;
00357 exit(-1);
00358 }
00359 break;
00360 }
00361
00362 case 'f':
00363 filename = optarg;
00364 cout << "policy filename: " << filename << endl;
00365 break;
00366
00367 case 'a':
00368 {
00369 if (strcmp(agentType, "qlearner") == 0 || strcmp(agentType, "dyna") == 0 || strcmp(agentType, "sarsa") == 0){
00370 alpha = std::atof(optarg);
00371 cout << "alpha: " << alpha << endl;
00372 } else {
00373 cout << "--alpha option is only valid for Q-Learning, Dyna, and Sarsa" << endl;
00374 exit(-1);
00375 }
00376 break;
00377 }
00378
00379 case 'i':
00380 {
00381 if (strcmp(agentType, "qlearner") == 0 || strcmp(agentType, "dyna") == 0 || strcmp(agentType, "sarsa") == 0){
00382 initialvalue = std::atof(optarg);
00383 cout << "initialvalue: " << initialvalue << endl;
00384 } else {
00385 cout << "--initialvalue option is only valid for Q-Learning, Dyna, and Sarsa" << endl;
00386 exit(-1);
00387 }
00388 break;
00389 }
00390
00391 case 'r':
00392 {
00393 actrateChanged = true;
00394 if (strcmp(agentType, "texplore") == 0 || strcmp(agentType, "modelbased") == 0 || strcmp(agentType, "rmax") == 0){
00395 actrate = std::atof(optarg);
00396 cout << "actrate: " << actrate << endl;
00397 } else {
00398 cout << "Model-free methods do not require an action rate" << endl;
00399 exit(-1);
00400 }
00401 break;
00402 }
00403
00404 case 'l':
00405 {
00406 lambdaChanged = true;
00407 if (strcmp(agentType, "texplore") == 0 || strcmp(agentType, "modelbased") == 0 || strcmp(agentType, "rmax") == 0 || strcmp(agentType, "sarsa") == 0){
00408 lambda = std::atof(optarg);
00409 cout << "lambda: " << lambda << endl;
00410 } else {
00411 cout << "--lambda option is invalid for this agent: " << agentType << endl;
00412 exit(-1);
00413 }
00414 break;
00415 }
00416
00417 case 'm':
00418 {
00419 mChanged = true;
00420 if (strcmp(agentType, "texplore") == 0 || strcmp(agentType, "modelbased") == 0 || strcmp(agentType, "rmax") == 0){
00421 M = std::atoi(optarg);
00422 cout << "M: " << M << endl;
00423 } else {
00424 cout << "--M option only useful for model-based agents, not " << agentType << endl;
00425 exit(-1);
00426 }
00427 break;
00428 }
00429
00430 case 'o':
00431 {
00432 if (strcmp(agentType, "texplore") == 0 || strcmp(agentType, "modelbased") == 0 || strcmp(agentType, "rmax") == 0){
00433 if (strcmp(optarg, "tabular") == 0) model = RMAX;
00434 else if (strcmp(optarg, "tree") == 0) model = C45TREE;
00435 else if (strcmp(optarg, "texplore") == 0) model = C45TREE;
00436 else if (strcmp(optarg, "c45tree") == 0) model = C45TREE;
00437 else if (strcmp(optarg, "m5tree") == 0) model = M5ALLMULTI;
00438 if (strcmp(agentType, "rmax") == 0 && model != RMAX){
00439 cout << "R-Max should use tabular model" << endl;
00440 exit(-1);
00441 }
00442 } else {
00443 cout << "Model-free methods do not need a model, --model option does nothing for this agent type" << endl;
00444 exit(-1);
00445 }
00446 cout << "model: " << modelNames[model] << endl;
00447 break;
00448 }
00449
00450 case 'x':
00451 {
00452 if (strcmp(optarg, "unknown") == 0) explore = EXPLORE_UNKNOWN;
00453 else if (strcmp(optarg, "greedy") == 0) explore = GREEDY;
00454 else if (strcmp(optarg, "epsilongreedy") == 0) explore = EPSILONGREEDY;
00455 else if (strcmp(optarg, "unvisitedstates") == 0) explore = UNVISITED_BONUS;
00456 else if (strcmp(optarg, "unvisitedactions") == 0) explore = UNVISITED_ACT_BONUS;
00457 else if (strcmp(optarg, "variancenovelty") == 0) explore = DIFF_AND_NOVEL_BONUS;
00458 if (strcmp(agentType, "rmax") == 0 && explore != EXPLORE_UNKNOWN){
00459 cout << "R-Max should use \"--explore unknown\" exploration" << endl;
00460 exit(-1);
00461 }
00462 else if (strcmp(agentType, "texplore") != 0 && strcmp(agentType, "modelbased") != 0 && strcmp(agentType, "rmax") != 0 && (explore != GREEDY && explore != EPSILONGREEDY)) {
00463 cout << "Model free methods must use either greedy or epsilon-greedy exploration!" << endl;
00464 explore = EPSILONGREEDY;
00465 exit(-1);
00466 }
00467 cout << "explore: " << exploreNames[explore] << endl;
00468 break;
00469 }
00470
00471 case 'p':
00472 {
00473 if (strcmp(optarg, "vi") == 0) planner = VALUE_ITERATION;
00474 else if (strcmp(optarg, "valueiteration") == 0) planner = VALUE_ITERATION;
00475 else if (strcmp(optarg, "policyiteration") == 0) planner = POLICY_ITERATION;
00476 else if (strcmp(optarg, "pi") == 0) planner = POLICY_ITERATION;
00477 else if (strcmp(optarg, "sweeping") == 0) planner = PRI_SWEEPING;
00478 else if (strcmp(optarg, "prioritizedsweeping") == 0) planner = PRI_SWEEPING;
00479 else if (strcmp(optarg, "uct") == 0) planner = ET_UCT_ACTUAL;
00480 else if (strcmp(optarg, "paralleluct") == 0) planner = PAR_ETUCT_ACTUAL;
00481 else if (strcmp(optarg, "realtimeuct") == 0) planner = PAR_ETUCT_ACTUAL;
00482 else if (strcmp(optarg, "realtime-uct") == 0) planner = PAR_ETUCT_ACTUAL;
00483 else if (strcmp(optarg, "parallel-uct") == 0) planner = PAR_ETUCT_ACTUAL;
00484 else if (strcmp(optarg, "delayeduct") == 0) planner = POMDP_ETUCT;
00485 else if (strcmp(optarg, "delayed-uct") == 0) planner = POMDP_ETUCT;
00486 else if (strcmp(optarg, "delayedparalleluct") == 0) planner = POMDP_PAR_ETUCT;
00487 else if (strcmp(optarg, "delayed-parallel-uct") == 0) planner = POMDP_PAR_ETUCT;
00488 if (strcmp(agentType, "texplore") != 0 && strcmp(agentType, "modelbased") != 0 && strcmp(agentType, "rmax") != 0){
00489 cout << "Model-free methods do not require planners, --planner option does nothing with this agent" << endl;
00490 exit(-1);
00491 }
00492 if (strcmp(agentType, "rmax") == 0 && planner != VALUE_ITERATION){
00493 cout << "Typical implementation of R-Max would use value iteration, but another planner type is ok" << endl;
00494 }
00495 cout << "planner: " << plannerNames[planner] << endl;
00496 break;
00497 }
00498
00499 case 'c':
00500 {
00501 if (strcmp(agentType, "texplore") == 0 || strcmp(agentType, "modelbased") == 0){
00502 if (strcmp(optarg, "average") == 0) modelcombo = AVERAGE;
00503 else if (strcmp(optarg, "weighted") == 0) modelcombo = WEIGHTAVG;
00504 else if (strcmp(optarg, "best") == 0) modelcombo = BEST;
00505 else if (strcmp(optarg, "separate") == 0) modelcombo = SEPARATE;
00506 cout << "modelcombo: " << comboNames[modelcombo] << endl;
00507 } else {
00508 cout << "--combo is an invalid option for agent: " << agentType << endl;
00509 exit(-1);
00510 }
00511 break;
00512 }
00513
00514 case '#':
00515 {
00516 if (strcmp(agentType, "texplore") == 0 || strcmp(agentType, "modelbased") == 0){
00517 nmodels = std::atoi(optarg);
00518 cout << "nmodels: " << nmodels << endl;
00519 } else {
00520 cout << "--nmodels is an invalid option for agent: " << agentType << endl;
00521 exit(-1);
00522 }
00523 if (nmodels < 1){
00524 cout << "nmodels must be > 0" << endl;
00525 exit(-1);
00526 }
00527 break;
00528 }
00529
00530 case 't':
00531 {
00532 if (strcmp(agentType, "texplore") == 0 || strcmp(agentType, "modelbased") == 0){
00533 reltrans = true;
00534 cout << "reltrans: " << reltrans << endl;
00535 } else {
00536 cout << "--reltrans is an invalid option for agent: " << agentType << endl;
00537 exit(-1);
00538 }
00539 break;
00540 }
00541
00542 case '0':
00543 {
00544 if (strcmp(agentType, "texplore") == 0 || strcmp(agentType, "modelbased") == 0){
00545 reltrans = false;
00546 cout << "reltrans: " << reltrans << endl;
00547 } else {
00548 cout << "--abstrans is an invalid option for agent: " << agentType << endl;
00549 exit(-1);
00550 }
00551 break;
00552 }
00553
00554 case 's':
00555 seed = std::atoi(optarg);
00556 cout << "seed: " << seed << endl;
00557 break;
00558
00559 case 'q':
00560
00561 cout << "agent: " << agentType << endl;
00562 break;
00563
00564 case 'd':
00565 PRINTS = true;
00566 break;
00567
00568 case 'w':
00569 nstates = std::atoi(optarg);
00570 cout << "nstates for discretization: " << nstates << endl;
00571 break;
00572
00573 case 'v':
00574 case 'b':
00575 {
00576 bvnChanged = true;
00577 if (strcmp(agentType, "texplore") == 0){
00578 v = std::atof(optarg);
00579 cout << "v coefficient (variance bonus): " << v << endl;
00580 }
00581 else {
00582 cout << "--v and --b are invalid options for agent: " << agentType << endl;
00583 exit(-1);
00584 }
00585 break;
00586 }
00587
00588 case 'n':
00589 {
00590 bvnChanged = true;
00591 if (strcmp(agentType, "texplore") == 0){
00592 n = std::atof(optarg);
00593 cout << "n coefficient (novelty bonus): " << n << endl;
00594 }
00595 else {
00596 cout << "--n is an invalid option for agent: " << agentType << endl;
00597 exit(-1);
00598 }
00599 break;
00600 }
00601
00602 case 'h':
00603 case '?':
00604 case 0:
00605 default:
00606 displayHelp();
00607 break;
00608 }
00609 }
00610
00611
00612 if (explore == DIFF_AND_NOVEL_BONUS && v == 0 && n == 0)
00613 explore = GREEDY;
00614
00615
00616
00617 if (epsilonChanged && explore != EPSILONGREEDY){
00618 cout << "No reason to change epsilon when not using epsilon-greedy exploration" << endl;
00619 exit(-1);
00620 }
00621
00622
00623 if (history > 0 && (planner == VALUE_ITERATION || planner == POLICY_ITERATION || planner == PRI_SWEEPING)){
00624 cout << "No reason to set history higher than 0 if not using a UCT planner" << endl;
00625 exit(-1);
00626 }
00627
00628
00629 if (actrateChanged && (planner == VALUE_ITERATION || planner == POLICY_ITERATION || planner == PRI_SWEEPING)){
00630 cout << "No reason to set actrate if not using a UCT planner" << endl;
00631 exit(-1);
00632 }
00633
00634
00635 if (lambdaChanged && (strcmp(agentType, "texplore") == 0 || strcmp(agentType, "modelbased") == 0 || strcmp(agentType, "rmax") == 0) && (planner == VALUE_ITERATION || planner == POLICY_ITERATION || planner == PRI_SWEEPING)){
00636 cout << "No reason to set actrate if not using a UCT planner" << endl;
00637 exit(-1);
00638 }
00639
00640
00641 if (bvnChanged && explore != DIFF_AND_NOVEL_BONUS){
00642 cout << "No reason to set n or v if not doing variance & novelty exploration" << endl;
00643 exit(-1);
00644 }
00645
00646
00647 if (modelcombo != BEST && nmodels == 1){
00648 cout << "No reason to have model combo other than best with nmodels = 1" << endl;
00649 exit(-1);
00650 }
00651
00652
00653 if (mChanged && explore != EXPLORE_UNKNOWN){
00654 cout << "No reason to set M if not doing R-max style Explore Unknown exploration" << endl;
00655 exit(-1);
00656 }
00657
00658 int qDepth = 1;
00659
00660
00661 out_rl_action = node.advertise<rl_msgs::RLAction>("rl_agent/rl_action",qDepth, false);
00662 out_exp_info = node.advertise<rl_msgs::RLExperimentInfo>("rl_agent/rl_experiment_info",qDepth, false);
00663
00664
00665 ros::TransportHints noDelay = ros::TransportHints().tcpNoDelay(true);
00666
00667 ros::Subscriber rl_description = node.subscribe("rl_env/rl_env_description", qDepth, processEnvDescription, noDelay);
00668 ros::Subscriber rl_state = node.subscribe("rl_env/rl_state_reward", qDepth, processState, noDelay);
00669 ros::Subscriber rl_seed = node.subscribe("rl_env/rl_seed", 20, processSeed, noDelay);
00670
00671 ROS_INFO(NODE ": starting main loop");
00672
00673 ros::spin();
00674
00675
00676
00677
00678 if(agent != NULL) {
00679 if(filename != NULL) {
00680 agent->savePolicy(filename);
00681 }
00682 }
00683
00684 return 0;
00685 }
00686
00687
00688