main.cpp
Go to the documentation of this file.
1 // rosmon - versatile ROS node launcher & monitor
2 // Author: Max Schwarz <max.schwarz@uni-bonn.de>
3 
4 #include <ros/init.h>
5 #include <ros/master.h>
6 #include <ros/rate.h>
7 #include <ros/node_handle.h>
8 #include <ros/package.h>
9 #include <ros/console_backend.h>
10 #include <ros/this_node.h>
11 
12 #include <boost/filesystem.hpp>
13 #include <boost/lexical_cast.hpp>
14 
15 #include <unistd.h>
16 #include <getopt.h>
17 #include <csignal>
18 
19 #include <iostream>
20 
21 #include <fmt/format.h>
22 #include <fmt/chrono.h>
23 
24 #include "launch/launch_config.h"
25 #include "launch/bytes_parser.h"
26 #include "monitor/monitor.h"
27 #include "ui.h"
28 #include "ros_interface.h"
29 #include "package_registry.h"
30 #include "fd_watcher.h"
31 #include "logger.h"
32 
33 namespace fs = boost::filesystem;
34 
35 bool g_shouldStop = false;
36 bool g_flushStdout = false;
37 
38 static fs::path findFile(const fs::path& base, const std::string& name)
39 {
40  for(fs::directory_iterator it(base); it != fs::directory_iterator(); ++it)
41  {
42  if(fs::is_directory(*it))
43  {
44  fs::path p = findFile(*it, name);
45  if(!p.empty())
46  return p;
47  }
48  else if(it->path().leaf() == name)
49  return *it;
50  }
51 
52  return fs::path();
53 }
54 
55 void usage()
56 {
57  fprintf(stderr,
58  "Usage:\n"
59  " rosmon [actions] [options] some_package test.launch [arg1:=value1 ...]\n"
60  " rosmon [actions] [options] path/to/test.launch [arg1:=value1 ...]\n"
61  "\n"
62  "Actions (default is to launch the launch file):\n"
63  " --benchmark Exit after loading the launch file\n"
64  " --list-args List launch file arguments\n"
65  "\n"
66  "Options:\n"
67  " --disable-ui Disable fancy terminal UI\n"
68  " --flush-log Flush logfile after writing an entry\n"
69  " --flush-stdout Flush stdout after writing an entry\n"
70  " --help This help screen\n"
71  " --log=FILE Write log file to FILE (use 'syslog' for syslog)\n"
72  " --name=NAME Use NAME as ROS node name. By default, an anonymous\n"
73  " name is chosen.\n"
74  " --no-start Don't automatically start the nodes in the beginning\n"
75  " --stop-timeout=SECONDS\n"
76  " Kill a process if it is still running this long\n"
77  " after the initial signal is send.\n"
78  " --disable-diagnostics\n"
79  " Disable publication of ros diagnostics message about\n"
80  " monitored nodes\n"
81  " --diagnostics-prefix=PREFIX\n"
82  " Prefix for the ros diagnostics generated by this node.\n"
83  " By default this will be the node name.\n"
84  " --cpu-limit=[0-n]\n"
85  " Default CPU Usage limit of monitored process. n is the\n"
86  " number of CPU cores. This is the sum of system and user\n"
87  " CPU usage.\n"
88  " --memory-limit=15MB\n"
89  " Default memory limit usage of monitored process.\n"
90  " --output-attr=obey|ignore\n"
91  " Obey or ignore output=\"*\" attributes on node tags.\n"
92  " Default is to ignore.\n"
93  " --auto-increment-spawn-delay=SECONDS\n"
94  " Add SECONDS spawn delay to each process.\n"
95  " This is ignored for processes having 'rosmon-spawn-delay' set.\n"
96  " By default this is disabled.\n"
97  "\n"
98  "rosmon also obeys some environment variables:\n"
99  " ROSMON_COLOR_MODE Can be set to 'truecolor', '256colors', 'ansi'\n"
100  " to force a specific color mode\n"
101  " If unset, rosmon tries to detect the best\n"
102  " available color mode.\n"
103  );
104 }
105 
106 void handleSignal(int)
107 {
108  g_shouldStop = true;
109 }
110 
111 void logToStdout(const rosmon::LogEvent& event, const int max_width)
112 {
113  // Are we supposed to show stdout?
115  return;
116 
117  fmt::print("{:>{}}: {}", event.source, max_width, event.message);
118 
119  if(!event.message.empty() && event.message.back() != '\n')
120  std::cout << '\n';
121 
122  if(g_flushStdout)
123  fflush(stdout);
124 }
125 
126 // Options
127 static const struct option OPTIONS[] = {
128  {"disable-ui", no_argument, nullptr, 'd'},
129  {"benchmark", no_argument, nullptr, 'b'},
130  {"flush-log", no_argument, nullptr, 'f'},
131  {"flush-stdout", no_argument, nullptr, 'F'},
132  {"help", no_argument, nullptr, 'h'},
133  {"list-args", no_argument, nullptr, 'L'},
134  {"log", required_argument, nullptr, 'l'},
135  {"name", required_argument, nullptr, 'n'},
136  {"no-start", no_argument, nullptr, 'S'},
137  {"stop-timeout", required_argument, nullptr, 's'},
138  {"disable-diagnostics", no_argument, nullptr, 'D'},
139  {"cpu-limit", required_argument, nullptr, 'c'},
140  {"memory-limit", required_argument, nullptr, 'm'},
141  {"diagnostics-prefix", required_argument, nullptr, 'p'},
142  {"output-attr", required_argument, nullptr, 'o'},
143  {"auto-increment-spawn-delay", required_argument, nullptr, 'a'},
144  {nullptr, 0, nullptr, 0}
145 };
146 
147 enum Action {
151 };
152 
153 static int get_max_node_name(const rosmon::monitor::Monitor &monitor)
154 {
155  std::size_t max_width = 0;
156 
157  for(const auto& node : monitor.nodes())
158  {
159  max_width = std::max(max_width, node->fullName().size());
160  }
161 
162  return max_width;
163 }
164 
165 int main(int argc, char** argv)
166 {
167  std::string name;
168  std::string logFile;
169  std::string launchFilePath;
170 
171  Action action = ACTION_LAUNCH;
172  bool enableUI = true;
173  bool flushLog = false;
174  bool startNodes = true;
175  double stopTimeout = rosmon::launch::DEFAULT_STOP_TIMEOUT;
176  uint64_t memoryLimit = rosmon::launch::DEFAULT_MEMORY_LIMIT;
177  float cpuLimit = rosmon::launch::DEFAULT_CPU_LIMIT;
178  bool disableDiagnostics = false;
179  std::string diagnosticsPrefix;
180  double autoIncrementSpawnDelay = -1.;
182 
183  // Parse options
184  while(true)
185  {
186  int option_index;
187  int c = getopt_long(argc, argv, "h", OPTIONS, &option_index);
188 
189  if(c == -1)
190  break;
191 
192  switch(c)
193  {
194  case 'h':
195  usage();
196  return 0;
197  case 'n':
198  name = optarg;
199  break;
200  case 'l':
201  logFile = optarg;
202  break;
203  case 'L':
204  action = ACTION_LIST_ARGS;
205  break;
206  case 'b':
207  action = ACTION_BENCHMARK;
208  break;
209  case 'd':
210  enableUI = false;
211  break;
212  case 'f':
213  flushLog = true;
214  break;
215  case 'F':
216  g_flushStdout = true;
217  break;
218  case 'S':
219  startNodes = false;
220  break;
221  case 's':
222  try
223  {
224  stopTimeout = boost::lexical_cast<double>(optarg);
225  }
226  catch(boost::bad_lexical_cast&)
227  {
228  fmt::print(stderr, "Bad value for --stop-timeout argument: '{}'\n", optarg);
229  return 1;
230  }
231 
232  if(stopTimeout < 0)
233  {
234  fmt::print(stderr, "Stop timeout cannot be negative\n");
235  return 1;
236  }
237  break;
238  case 'D' :
239  disableDiagnostics = true;
240  break;
241  case 'c':
242  try
243  {
244  cpuLimit = boost::lexical_cast<float>(optarg);
245  }
246  catch(boost::bad_lexical_cast&)
247  {
248  fmt::print(stderr, "Bad value for --cpu-limit argument: '{}'\n", optarg);
249  return 1;
250  }
251 
252  if(cpuLimit < 0)
253  {
254  fmt::print(stderr, "CPU Limit cannot be negative\n");
255  return 1;
256  }
257  break;
258  case 'm':
259  {
260  bool ok;
261  std::tie(memoryLimit, ok) = rosmon::launch::parseMemory(optarg);
262  if(!ok)
263  {
264  fmt::print(stderr, "Bad value for --memory-limit argument: '{}'\n", optarg);
265  return 1;
266  }
267  break;
268  }
269  case 'o':
270  if(optarg == std::string("obey"))
272  else if(optarg == std::string("ignore"))
274  else
275  {
276  fmt::print(stderr, "Bad value for --output-attr argument: '{}'\n", optarg);
277  return 1;
278  }
279  break;
280  case 'p':
281  fmt::print(stderr, "Prefix : {}", optarg);
282  diagnosticsPrefix = std::string(optarg);
283  break;
284  case 'a':
285  fmt::print(stderr, "Auto increment spawn delay of : {} seconds\n", optarg);
286  try
287  {
288  autoIncrementSpawnDelay = boost::lexical_cast<double>(optarg);
289  }
290  catch(boost::bad_lexical_cast&)
291  {
292  fmt::print(stderr, "Bad value for --auto-increment-spawn-delay: '{}'\n", optarg);
293  return 1;
294  }
295 
296  if(autoIncrementSpawnDelay < 0)
297  {
298  fmt::print(stderr, "Auto increment spawn delay cannot be negative\n");
299  return 1;
300  }
301  break;
302  }
303  }
304 
305  // Parse the positional arguments
306  if(optind == argc)
307  {
308  usage();
309  return 1;
310  }
311 
312  // Find first launch file argument (must contain ':=')
313  int firstArg = optind + 1;
314  for(; firstArg < argc; ++firstArg)
315  {
316  if(strstr(argv[firstArg], ":="))
317  break;
318  }
319 
320  // From the position of the argument (or the end-of-options), we know
321  // if we were called with a) package + filename or b) just a path.
322 
323  if(firstArg - optind == 1)
324  {
325  launchFilePath = argv[optind];
326  }
327  else if(firstArg - optind == 2)
328  {
329  const char* packageName = argv[optind];
330  const char* fileName = argv[optind + 1];
331 
332  std::string package = rosmon::PackageRegistry::getPath(packageName);
333  if(package.empty())
334  {
335  fmt::print(stderr, "Could not find path of package '{}'\n", packageName);
336  return 1;
337  }
338 
339  fs::path path = findFile(package, fileName);
340  if(path.empty())
341  {
342  fmt::print(stderr, "Could not find launch file '{}' in package '{}'\n",
343  fileName, packageName
344  );
345  return 1;
346  }
347 
348  launchFilePath = path.string();
349  }
350  else
351  {
352  usage();
353  return 1;
354  }
355 
356  // Setup logging
357  boost::scoped_ptr<rosmon::Logger> logger;
358  std::string nodeLogPath;
359  {
360  // Setup a sane ROSCONSOLE_FORMAT if the user did not already
361  setenv("ROSCONSOLE_FORMAT", "[${function}]: ${message}", 0);
362 
363  // Disable direct logging to stdout
365 
366  // Open logger
367  bool envSyslog = false;
368  if(auto env = getenv("ROSMON_SYSLOG"))
369  envSyslog = (strcmp(env, "1") == 0);
370 
371  if(envSyslog || logFile == "syslog")
372  {
373  // Try systemd journal first
374  try
375  {
376  logger.reset(new rosmon::SystemdLogger(fs::basename(launchFilePath)));
377  }
379  {
380  fmt::print(stderr, "Systemd Journal not available: {}\n", e.what());
381  logger.reset(new rosmon::SyslogLogger(fs::basename(launchFilePath)));
382  }
383  }
384  else
385  {
386  if(logFile.empty())
387  {
388  fmt::print("Tip: use --log=syslog or set environment variable ROSMON_SYSLOG=1 to send log output to syslog instead of a log file in /tmp.\n");
389 
390  // Create log file in /tmp
391  std::time_t t = std::time(nullptr);
392  std::tm currentTime = fmt::localtime(t);
393 
394  logFile = fmt::format("/tmp/rosmon_{:%Y_%m_%d_%H_%M_%S}.log", currentTime);
395  }
396 
397  logger.reset(new rosmon::FileLogger(logFile, flushLog));
398 
399  // Old-style logging means we will enable per-node log files in ~/.ros as well.
400  if(auto val = getenv("ROS_LOG_DIR"))
401  nodeLogPath = val;
402  else if(auto val = getenv("ROS_HOME"))
403  nodeLogPath = fmt::format("{}/log", val);
404  else if(auto val = getenv("HOME"))
405  nodeLogPath = fmt::format("{}/.ros/log", val);
406  }
407  }
408 
410 
412  config->setDefaultStopTimeout(stopTimeout);
413  config->setDefaultCPULimit(cpuLimit);
414  config->setDefaultMemoryLimit(memoryLimit);
415  config->setOutputAttrMode(outputAttrMode);
416 
417  // Parse launch file arguments from command line
418  for(int i = firstArg; i < argc; ++i)
419  {
420  char* arg = strstr(argv[i], ":=");
421 
422  if(!arg)
423  {
424  fmt::print(stderr, "You specified a non-argument after an argument\n");
425  return 1;
426  }
427 
428  char* name = argv[i];
429 
430  *arg = 0;
431 
432  char* value = arg + 2;
433 
434  config->setArgument(name, value);
435  }
436 
437  bool onlyArguments = (action == ACTION_LIST_ARGS);
438 
439  try
440  {
441  config->parse(launchFilePath, onlyArguments);
442  config->evaluateParameters();
443  }
445  {
446  fmt::print(stderr, "Could not load launch file: {}\n", e.what());
447  return 1;
448  }
449 
450  switch(action)
451  {
452  case ACTION_BENCHMARK:
453  return 0;
454  case ACTION_LIST_ARGS:
455  for(const auto& arg : config->arguments())
456  std::cout << arg.first << std::endl;
457 
458  return 0;
459  case ACTION_LAUNCH:
460  break;
461  }
462 
463  if(autoIncrementSpawnDelay > 0.)
464  config->applyAutoIncrementSpawnDelayToAll(ros::WallDuration{autoIncrementSpawnDelay});
465 
466  if(config->disableUI())
467  enableUI = false;
468 
469  // Initialize the ROS node.
470  {
471  uint32_t init_options = ros::init_options::NoSigintHandler;
472 
473  // If the user did not specify a node name on the command line, look in
474  // the launch file
475  if(name.empty())
476  name = config->rosmonNodeName();
477 
478  // As last fallback, choose anonymous name.
479  if(name.empty())
480  {
481  name = "rosmon";
482  init_options |= ros::init_options::AnonymousName;
483  }
484 
485  // prevent ros::init from parsing the launch file arguments as remappings
486  int dummyArgc = 1;
487  ros::init(dummyArgc, argv, name, init_options);
488  }
489 
490  // Check connectivity to ROS master
491  {
492  fmt::print("ROS_MASTER_URI: '{}'\n", ros::master::getURI());
493  if(ros::master::check())
494  {
495  fmt::print("roscore is already running.\n");
496  }
497  else
498  {
499  fmt::print("roscore is not runnning.\n");
500  fmt::print("Waiting until it is up (abort with CTRL+C)...\n");
501 
502  while(!ros::master::check())
503  ros::WallDuration(0.5).sleep();
504 
505  fmt::print("roscore is running now.\n");
506  }
507  }
508 
509  ros::NodeHandle nh;
510 
511  fmt::print("Running as '{}'\n", ros::this_node::getName());
512 
513  // Create node logging path
514  if(!nodeLogPath.empty())
515  {
516  std::string runID;
517  if(nh.getParam("/run_id", runID))
518  nodeLogPath = fmt::format("{}/{}", nodeLogPath, runID);
519 
520  if(!fs::create_directories(nodeLogPath))
521  fmt::print(stderr, "Warning: Could not create log directory '{}'\n", nodeLogPath);
522  }
523  config->setNodeLogDir(nodeLogPath);
524 
525  rosmon::monitor::Monitor monitor(config, watcher);
526  monitor.logMessageSignal.connect(boost::bind(&rosmon::Logger::log, logger.get(), _1));
527 
528  fmt::print("\n\n");
529  monitor.setParameters();
530 
531  if(config->nodes().empty())
532  {
533  fmt::print("No ROS nodes to be launched. Finished...\n");
534  return 0;
535  }
536 
537  // Should we automatically start the nodes?
538  if(startNodes)
539  monitor.start();
540 
541  // Start the ncurses UI
542  boost::scoped_ptr<rosmon::UI> ui;
543  if(enableUI)
544  {
545  ui.reset(new rosmon::UI(&monitor, watcher));
546  }
547  else
548  {
549  monitor.logMessageSignal.connect(
550  boost::bind(
551  logToStdout,
552  _1,
553  get_max_node_name(monitor)));
554  }
555 
556  // ROS interface
557  rosmon::ROSInterface rosInterface(&monitor, !disableDiagnostics, diagnosticsPrefix);
558 
559  ros::WallDuration waitDuration(0.1);
560 
561  // On SIGINT, SIGTERM, SIGHUP we stop gracefully.
562  signal(SIGINT, handleSignal);
563  signal(SIGHUP, handleSignal);
564  signal(SIGTERM, handleSignal);
565 
566  // Main loop
567  while(ros::ok() && monitor.ok() && !g_shouldStop)
568  {
569  ros::spinOnce();
570  watcher->wait(waitDuration);
571 
572  if(ui)
573  ui->update();
574  }
575 
576  if(ui)
577  ui->log({"[rosmon]", "Shutting down..."});
578  monitor.shutdown();
579 
580  // Wait for graceful shutdown
582  while(!monitor.allShutdown() && ros::WallTime::now() - start < ros::WallDuration(monitor.shutdownTimeout()))
583  {
584  watcher->wait(waitDuration);
585 
586  if(ui)
587  ui->update();
588  }
589 
590  // If we timed out, force exit (kill the nodes with SIGKILL)
591  if(!monitor.allShutdown())
592  monitor.forceExit();
593 
594  rosInterface.shutdown();
595 
596  // Wait until that is finished (should always work)
597  while(!monitor.allShutdown())
598  {
599  watcher->wait(waitDuration);
600 
601  if(enableUI)
602  ui->update();
603  }
604 
605  // If coredumps are available, be helpful and display gdb commands
606  bool coredumpsAvailable = std::any_of(monitor.nodes().begin(), monitor.nodes().end(),
607  [](const rosmon::monitor::NodeMonitor::Ptr& n) { return n->coredumpAvailable(); }
608  );
609 
610  if(ui && coredumpsAvailable)
611  {
612  ui->log({"[rosmon]", "\n"});
613  ui->log({"[rosmon]", "If you want to debug one of the crashed nodes, you can use the following commands"});
614  for(auto& node : monitor.nodes())
615  {
616  if(node->coredumpAvailable())
617  {
618  ui->log({
619  "[rosmon]",
620  fmt::format("{:20}: # {}", node->name(), node->debuggerCommand())
621  });
622  }
623  }
624  }
625 
626  return 0;
627 }
virtual const char * what() const noexcept
Definition: launch_config.h:42
ROSCONSOLE_BACKEND_DECL void(* function_print)(void *, ::ros::console::Level, const char *, const char *, const char *, int)
Definition: ui.h:20
ROSCPP_DECL const std::string & getURI()
boost::signals2::signal< void(LogEvent)> logMessageSignal
Definition: monitor.h:49
ROSCPP_DECL bool check()
Write log messages into a log file.
Definition: logger.h:25
ROSCPP_DECL void start()
string package
virtual void log(const LogEvent &event)=0
int main(int argc, char **argv)
Definition: main.cpp:165
bool ok() const
Definition: monitor.h:38
ROSCPP_DECL void init(int &argc, char **argv, const std::string &name, uint32_t options=0)
void usage()
Definition: main.cpp:55
ROSCPP_DECL const std::string & getName()
Channel channel
Definition: log_event.h:64
std::shared_ptr< LaunchConfig > Ptr
static fs::path findFile(const fs::path &base, const std::string &name)
Definition: main.cpp:38
Write log messages to systemd journal.
Definition: logger.h:60
constexpr double DEFAULT_STOP_TIMEOUT
Definition: launch_config.h:30
bool getParam(const std::string &key, std::string &s) const
ROSCPP_DECL bool ok()
bool g_flushStdout
Definition: main.cpp:36
bool sleep() const
void handleSignal(int)
Definition: main.cpp:106
constexpr uint64_t DEFAULT_MEMORY_LIMIT
Definition: launch_config.h:29
Action
Definition: main.cpp:147
std::string env(const std::string &name)
Write log messages to syslog.
Definition: logger.h:46
static const struct option OPTIONS[]
Definition: main.cpp:127
bool g_shouldStop
Definition: main.cpp:35
static WallTime now()
std::string arg(const std::string &name, const ParseContext &context)
constexpr double DEFAULT_CPU_LIMIT
Definition: launch_config.h:28
static int get_max_node_name(const rosmon::monitor::Monitor &monitor)
Definition: main.cpp:153
std::tuple< uint64_t, bool > parseMemory(const std::string &memory)
ROSCPP_DECL void spinOnce()
std::string source
Definition: log_event.h:60
void logToStdout(const rosmon::LogEvent &event, const int max_width)
Definition: main.cpp:111
std::shared_ptr< NodeMonitor > Ptr
Definition: node_monitor.h:30
const std::vector< NodeMonitor::Ptr > & nodes() const
Definition: monitor.h:41
std::string message
Definition: log_event.h:61


rosmon_core
Author(s): Max Schwarz
autogenerated on Fri Jun 16 2023 02:15:06