16 #include <sys/types.h> 18 #include <sys/resource.h> 19 #include <sys/utsname.h> 20 #include <sys/prctl.h> 24 #include <boost/tokenizer.hpp> 25 #include <boost/range.hpp> 26 #include <boost/algorithm/string.hpp> 28 #include <boost/filesystem.hpp> 30 #include <fmt/format.h> 32 #define TASK_COMM_LEN 16 // from linux/sched.h 34 namespace fs = boost::filesystem;
38 template<
typename... Args>
39 std::runtime_error
error(
const char* fmt,
const Args& ... args)
41 return std::runtime_error(fmt::format(fmt, args...));
49 explicit final_act(F
f) noexcept
50 : f_(std::move(f)), invoke_(
true) {}
52 final_act(final_act&& other) noexcept
53 : f_(std::move(other.f_)),
54 invoke_(other.invoke_)
56 other.invoke_ =
false;
59 final_act(
const final_act&) =
delete;
60 final_act& operator=(
const final_act&) =
delete;
72 inline final_act<F>
finally(
const F&
f) noexcept
74 return final_act<F>(
f);
78 inline final_act<F>
finally(F&& f) noexcept
80 return final_act<F>(std::forward<F>(
f));
83 static bool g_corePatternAnalyzed =
false;
84 static bool g_coreIsRelative =
true;
85 static bool g_coreIsSystemd =
false;
94 : m_launchConfig(config)
95 , m_launchNode(launchNode)
96 , m_fdWatcher(
std::move(fdWatcher))
100 , m_muted(m_launchNode->isMuted())
105 event.type = src.severity;
114 event.type = src.severity;
126 if(!g_corePatternAnalyzed)
128 char core_pattern[256];
129 int core_fd = open(
"/proc/sys/kernel/core_pattern", O_RDONLY | O_CLOEXEC);
136 int bytes = read(core_fd, core_pattern,
sizeof(core_pattern)-1);
145 g_coreIsRelative = (core_pattern[0] !=
'/');
147 if(std::string(core_pattern).find(
"systemd-coredump") != std::string::npos)
148 g_coreIsSystemd =
true;
150 g_corePatternAnalyzed =
true;
158 kill(
m_pid, SIGKILL);
166 throw error(
"Could not find node '{}' in package '{}'",
172 std::vector<std::string> cmd =
m_launchNode->launchPrefix();
179 std::copy(args.begin(), args.end(), std::back_inserter(cmd));
194 if(!logname.empty() && logname[0] ==
'/')
195 logname = logname.substr(1);
198 std::replace(logname.begin(), logname.end(),
'/',
'-');
200 cmd.push_back(fmt::format(
"__log:={}/{}.log", logDir, logname));
205 cmd.push_back(
"__log:=/dev/null");
211 cmd.push_back(map.first +
":=" + map.second);
239 strncpy(tmpfile,
"/tmp/rosmon-node-XXXXXX",
sizeof(tmpfile));
240 tmpfile[
sizeof(tmpfile)-1] = 0;
254 std::vector<char*> args;
255 auto argsCleaner =
finally([&args](){
256 for(
char*
arg : args)
265 int stderr_master, stderr_slave;
266 std::tie(stderr_master, stderr_slave) =
createPTY();
270 args.push_back(strdup(
"rosrun"));
271 args.push_back(strdup(
"rosmon_core"));
272 args.push_back(strdup(
"_shim"));
274 args.push_back(strdup(
"--tty"));
275 args.push_back(strdup(fmt::format(
"{}", slave).c_str()));
277 args.push_back(strdup(
"--stderr"));
278 args.push_back(strdup(fmt::format(
"{}", stderr_slave).c_str()));
282 args.push_back(strdup(
"--namespace"));
283 args.push_back(strdup(
m_launchNode->namespaceString().c_str()));
288 args.push_back(strdup(
"--env"));
289 args.push_back(strdup(fmt::format(
"{}={}", pair.first, pair.second).c_str()));
294 args.push_back(strdup(
"--coredump"));
298 args.push_back(strdup(
"--coredump-relative"));
303 args.push_back(strdup(
"--run"));
306 args.push_back(strdup(c.c_str()));
308 args.push_back(
nullptr);
314 throw error(
"Could not fork(): {}", strerror(errno));
319 close(stderr_master);
321 if(execvp(
"rosrun", args.data()) != 0)
323 std::stringstream ss;
324 for(
const auto& part : cmd)
327 fmt::print(stderr,
"Could not execute '{}': {}\n", ss.str(), strerror(errno));
361 kill(-
m_pid, SIGINT);
371 kill(
m_pid, SIGKILL);
392 kill(-
m_pid, SIGINT);
400 kill(-
m_pid, SIGKILL);
426 int bytes = read(
m_stderrFD, buf,
sizeof(buf));
428 if(bytes == 0 || (bytes < 0 && errno == EIO))
440 throw error(
"{}: Could not read: {}",
fullName(), strerror(errno));
448 int bytes = read(
m_fd, buf,
sizeof(buf));
450 if(bytes == 0 || (bytes < 0 && errno == EIO))
456 if(waitpid(
m_pid, &status, 0) > 0)
459 if(errno == EINTR || errno == EAGAIN)
462 throw error(
"{}: Could not waitpid(): {}",
fullName(), strerror(errno));
468 if(WIFEXITED(status))
471 logTyped(type,
"{} exited with status {}",
fullName(), WEXITSTATUS(status));
472 ROS_INFO(
"rosmon: %s exited with status %d",
fullName().c_str(), WEXITSTATUS(status));
475 else if(WIFSIGNALED(status))
478 ROS_ERROR(
"rosmon: %s died from signal %d",
fullName().c_str(), WTERMSIG(status));
483 if(WCOREDUMP(status))
510 boost::system::error_code
error;
515 "Could not remove old working directory '{}' after process died twice: {}",
558 throw error(
"{}: Could not read: {}",
fullName(), strerror(errno));
563 template<
typename... Args>
569 template<
typename... Args>
575 static boost::iterator_range<std::string::const_iterator>
578 for(; begin != end && begin+1 != end; ++begin)
581 return {begin, begin+2};
597 char core_pattern[256];
598 int core_fd = open(
"/proc/sys/kernel/core_pattern", O_RDONLY | O_CLOEXEC);
605 int bytes = read(core_fd, core_pattern,
sizeof(core_pattern)-1);
610 log(
"Could not read /proc/sys/kernel/core_pattern: {}", strerror(errno));
614 core_pattern[bytes-1] = 0;
616 if(core_pattern[0] ==
'|')
620 strncpy(core_pattern,
"core",
sizeof(core_pattern));
623 auto formatter = [&](boost::iterator_range<std::string::const_iterator> match) -> std::string {
624 char code = *(match.begin()+1);
631 return std::to_string(
m_pid);
633 return std::to_string(getuid());
635 return std::to_string(getgid());
637 return std::to_string(signal);
643 memset(&uts, 0,
sizeof(uts));
654 boost::replace_all(executable,
"/",
"!");
660 memset(&limit, 0,
sizeof(limit));
661 getrlimit(RLIMIT_CORE, &limit);
664 return std::to_string(limit.rlim_max);
671 std::string coreGlob = boost::find_format_all_copy(std::string(core_pattern),
corePatternFormatFinder, formatter);
674 if(coreGlob[0] !=
'/')
677 log(
"Determined pattern '{}'", coreGlob);
680 memset(&results, 0,
sizeof(results));
681 int ret = glob(coreGlob.c_str(), GLOB_NOSORT,
nullptr, &results);
683 if(ret != 0 || results.gl_pathc == 0)
690 if(results.gl_pathc > 1)
697 std::string coreFile = results.gl_pathv[0];
702 std::stringstream ss;
704 ss <<
"gdb " <<
m_launchNode->executable() <<
" " << coreFile;
717 if(openpty(&master, &slave,
nullptr,
nullptr,
nullptr) == -1)
718 throw error(
"Could not open pseudo terminal for child process: {}", strerror(errno));
723 struct termios termios;
724 if(tcgetattr(slave, &termios) == -1)
725 throw error(
"Could not get PTY slave attributes: {}", strerror(errno));
727 termios.c_oflag &= ~ONLCR;
729 if(tcsetattr(slave, TCSANOW, &termios) == -1)
730 throw error(
"Could not set PTY slave attributes: {}", strerror(errno));
732 return {master,slave};
743 std::stringstream ss;
744 ss <<
"gdb -p " <<
m_pid;
748 if(!getenv(
"DISPLAY"))
754 char* envTerm = getenv(
"ROSMON_DEBUGGER_TERMINAL");
755 std::string term =
"xterm -e";
758 else if(getenv(
"KONSOLE_DBUS_SESSION"))
760 else if(getenv(
"VTE_VERSION"))
761 term =
"gnome-terminal -e";
766 if(system((term +
" '" + cmd +
"' &").c_str()) != 0)
void log(const char *format, Args &&... args)
std::shared_ptr< const Node > ConstPtr
std::string m_debuggerCommand
boost::signals2::signal< void(std::string)> exitedSignal
Signalled whenever the process exits.
void start()
Start the node.
void forceExit()
Finish shutdown sequence.
void setPeriod(const WallDuration &period, bool reset=true)
void addMemory(uint64_t memoryBytes)
void stop(bool restart=false)
Stop the node.
std::string m_processWorkingDirectory
static boost::iterator_range< std::string::const_iterator > corePatternFormatFinder(std::string::const_iterator begin, std::string::const_iterator end)
void addCPUTime(uint64_t userTime, uint64_t systemTime)
State state() const
Get process state.
std::runtime_error error(const char *fmt, const Args &... args)
bool running() const
Is the node running?
Idle (e.g. exited with code 0)
WallTimer createWallTimer(WallDuration period, void(T::*callback)(const WallTimerEvent &), T *obj, bool oneshot=false, bool autostart=true) const
void gatherCoredump(int signal)
void logTyped(LogEvent::Type type, const char *format, Args &&... args)
void setMuted(bool muted)
unsigned int m_restartCount
void launchDebugger()
Launch gdb interactively.
void setCallback(const std::function< void(Event &&)> &cb)
Waiting for automatic restart after crash.
std::pair< int, int > createPTY()
void process(const char *input, std::size_t size, const std::chrono::steady_clock::time_point &time=std::chrono::steady_clock::now())
boost::signals2::signal< void(LogEvent)> logMessageSignal
Logging signal.
bool coredumpAvailable() const
Is a core dump available from a crash under rosmon control?
FDWatcher::Ptr m_fdWatcher
std::shared_ptr< const LaunchConfig > ConstPtr
ros::WallTimer m_stopCheckTimer
void endStatUpdate(double elapsedTimeInTicks)
std::string fullName() const
Full name including namespace.
launch::LaunchConfig::ConstPtr m_launchConfig
void shutdown()
Start shutdown sequence.
std::string arg(const std::string &name, const ParseContext &context)
launch::Node::ConstPtr m_launchNode
Crashed (i.e. exited with code != 0)
std::vector< std::string > composeCommand() const
bool m_processWorkingDirectoryCreated
void restart()
Restart the node.
ros::WallTimer m_restartTimer
std::string m_lastWorkingDirectory