16 #include <sys/types.h> 18 #include <sys/resource.h> 19 #include <sys/utsname.h> 20 #include <sys/prctl.h> 24 #include <boost/tokenizer.hpp> 25 #include <boost/range.hpp> 26 #include <boost/algorithm/string.hpp> 28 #include <boost/filesystem.hpp> 30 #include <fmt/format.h> 32 #define TASK_COMM_LEN 16 // from linux/sched.h 34 namespace fs = boost::filesystem;
38 template<
typename... Args>
39 std::runtime_error
error(
const char* fmt,
const Args& ... args)
41 return std::runtime_error(fmt::format(fmt, args...));
49 explicit final_act(F
f) noexcept
50 : f_(std::move(f)), invoke_(
true) {}
52 final_act(final_act&& other) noexcept
53 : f_(std::move(other.f_)),
54 invoke_(other.invoke_)
56 other.invoke_ =
false;
59 final_act(
const final_act&) =
delete;
60 final_act& operator=(
const final_act&) =
delete;
72 inline final_act<F>
finally(
const F&
f) noexcept
74 return final_act<F>(
f);
78 inline final_act<F>
finally(F&& f) noexcept
80 return final_act<F>(std::forward<F>(
f));
83 static bool g_corePatternAnalyzed =
false;
84 static bool g_coreIsRelative =
true;
85 static bool g_coreIsSystemd =
false;
94 : m_launchNode(
std::move(launchNode))
95 , m_fdWatcher(
std::move(fdWatcher))
97 , m_stderrBuffer(4096)
100 , m_restarting(false)
101 , m_muted(m_launchNode->isMuted())
108 if(!g_corePatternAnalyzed)
110 char core_pattern[256];
111 int core_fd = open(
"/proc/sys/kernel/core_pattern", O_RDONLY | O_CLOEXEC);
118 int bytes = read(core_fd, core_pattern,
sizeof(core_pattern)-1);
127 g_coreIsRelative = (core_pattern[0] !=
'/');
129 if(std::string(core_pattern).find(
"systemd-coredump") != std::string::npos)
130 g_coreIsSystemd =
true;
132 g_corePatternAnalyzed =
true;
140 kill(
m_pid, SIGKILL);
148 throw error(
"Could not find node '{}' in package '{}'",
154 std::vector<std::string> cmd =
m_launchNode->launchPrefix();
161 std::copy(args.begin(), args.end(), std::back_inserter(cmd));
169 cmd.push_back(map.first +
":=" + map.second);
189 strncpy(tmpfile,
"/tmp/rosmon-node-XXXXXX",
sizeof(tmpfile));
190 tmpfile[
sizeof(tmpfile)-1] = 0;
204 std::vector<char*> args;
205 auto argsCleaner =
finally([&args](){
206 for(
char*
arg : args)
216 if(openpty(&master, &slave,
nullptr,
nullptr,
nullptr) == -1)
217 throw error(
"Could not open pseudo terminal for child process: {}", strerror(errno));
221 if(pipe(stderr_pipe) != 0)
222 throw error(
"Could not create stderr pipe: {}", strerror(errno));
226 args.push_back(strdup(
"rosrun"));
227 args.push_back(strdup(
"rosmon_core"));
228 args.push_back(strdup(
"_shim"));
230 args.push_back(strdup(
"--tty"));
231 args.push_back(strdup(fmt::format(
"{}", slave).c_str()));
233 args.push_back(strdup(
"--stderr"));
234 args.push_back(strdup(fmt::format(
"{}", stderr_pipe[1]).c_str()));
238 args.push_back(strdup(
"--namespace"));
239 args.push_back(strdup(
m_launchNode->namespaceString().c_str()));
244 args.push_back(strdup(
"--env"));
245 args.push_back(strdup(fmt::format(
"{}={}", pair.first, pair.second).c_str()));
250 args.push_back(strdup(
"--coredump"));
254 args.push_back(strdup(
"--coredump-relative"));
259 args.push_back(strdup(
"--run"));
262 args.push_back(strdup(c.c_str()));
264 args.push_back(
nullptr);
270 throw error(
"Could not fork(): {}", strerror(errno));
275 close(stderr_pipe[0]);
277 if(execvp(
"rosrun", args.data()) != 0)
279 std::stringstream ss;
280 for(
const auto& part : cmd)
283 fmt::print(stderr,
"Could not execute '{}': {}\n", ss.str(), strerror(errno));
292 close(stderr_pipe[1]);
317 kill(-
m_pid, SIGINT);
327 kill(
m_pid, SIGKILL);
348 kill(-
m_pid, SIGINT);
356 kill(-
m_pid, SIGKILL);
381 auto handleByte = [&](
char c){
401 int bytes = read(
m_stderrFD, buf,
sizeof(buf));
416 throw error(
"{}: Could not read: {}",
fullName(), strerror(errno));
418 for(
int i = 0; i < bytes; ++i)
426 auto handleByte = [&](
char c){
447 int bytes = read(
m_fd, buf,
sizeof(buf));
449 if(bytes == 0 || (bytes < 0 && errno == EIO))
455 if(waitpid(
m_pid, &status, 0) > 0)
458 if(errno == EINTR || errno == EAGAIN)
461 throw error(
"{}: Could not waitpid(): {}",
fullName(), strerror(errno));
468 if(WIFEXITED(status))
471 logTyped(type,
"{} exited with status {}",
fullName(), WEXITSTATUS(status));
472 ROS_INFO(
"rosmon: %s exited with status %d",
fullName().c_str(), WEXITSTATUS(status));
475 else if(WIFSIGNALED(status))
478 ROS_ERROR(
"rosmon: %s died from signal %d",
fullName().c_str(), WTERMSIG(status));
483 if(WCOREDUMP(status))
510 boost::system::error_code
error;
515 "Could not remove old working directory '{}' after process died twice: {}",
558 throw error(
"{}: Could not read: {}",
fullName(), strerror(errno));
560 for(
int i = 0; i < bytes; ++i)
566 template<
typename... Args>
572 template<
typename... Args>
578 static boost::iterator_range<std::string::const_iterator>
581 for(; begin != end && begin+1 != end; ++begin)
584 return {begin, begin+2};
600 char core_pattern[256];
601 int core_fd = open(
"/proc/sys/kernel/core_pattern", O_RDONLY | O_CLOEXEC);
608 int bytes = read(core_fd, core_pattern,
sizeof(core_pattern)-1);
613 log(
"Could not read /proc/sys/kernel/core_pattern: {}", strerror(errno));
617 core_pattern[bytes-1] = 0;
619 if(core_pattern[0] ==
'|')
623 strncpy(core_pattern,
"core",
sizeof(core_pattern));
626 auto formatter = [&](boost::iterator_range<std::string::const_iterator> match) -> std::string {
627 char code = *(match.begin()+1);
634 return std::to_string(
m_pid);
636 return std::to_string(getuid());
638 return std::to_string(getgid());
640 return std::to_string(signal);
646 memset(&uts, 0,
sizeof(uts));
657 boost::replace_all(executable,
"/",
"!");
663 memset(&limit, 0,
sizeof(limit));
664 getrlimit(RLIMIT_CORE, &limit);
667 return std::to_string(limit.rlim_max);
674 std::string coreGlob = boost::find_format_all_copy(std::string(core_pattern),
corePatternFormatFinder, formatter);
677 if(coreGlob[0] !=
'/')
680 log(
"Determined pattern '{}'", coreGlob);
683 memset(&results, 0,
sizeof(results));
684 int ret = glob(coreGlob.c_str(), GLOB_NOSORT,
nullptr, &results);
686 if(ret != 0 || results.gl_pathc == 0)
693 if(results.gl_pathc > 1)
700 std::string coreFile = results.gl_pathv[0];
705 std::stringstream ss;
707 ss <<
"gdb " <<
m_launchNode->executable() <<
" " << coreFile;
720 std::stringstream ss;
721 ss <<
"gdb -p " <<
m_pid;
725 if(!getenv(
"DISPLAY"))
731 char* envTerm = getenv(
"ROSMON_DEBUGGER_TERMINAL");
732 std::string term =
"xterm -e";
735 else if(getenv(
"KONSOLE_DBUS_SESSION"))
737 else if(getenv(
"VTE_VERSION"))
738 term =
"gnome-terminal -e";
743 if(system((term +
" '" + cmd +
"' &").c_str()) != 0)
std::shared_ptr< const Node > ConstPtr
std::string m_debuggerCommand
boost::signals2::signal< void(std::string)> exitedSignal
Signalled whenever the process exits.
void start()
Start the node.
void forceExit()
Finish shutdown sequence.
void setPeriod(const WallDuration &period, bool reset=true)
void addMemory(uint64_t memoryBytes)
void stop(bool restart=false)
Stop the node.
boost::circular_buffer< char > m_rxBuffer
boost::circular_buffer< char > m_stderrBuffer
std::string m_processWorkingDirectory
bool coredumpAvailable() const
Is a core dump available from a crash under rosmon control?
void log(const char *format, Args &&...args)
static boost::iterator_range< std::string::const_iterator > corePatternFormatFinder(std::string::const_iterator begin, std::string::const_iterator end)
void addCPUTime(uint64_t userTime, uint64_t systemTime)
Idle (e.g. exited with code 0)
void gatherCoredump(int signal)
void setMuted(bool muted)
unsigned int m_restartCount
void launchDebugger()
Launch gdb interactively.
Waiting for automatic restart after crash.
WallTimer createWallTimer(WallDuration period, void(T::*callback)(const WallTimerEvent &), T *obj, bool oneshot=false, bool autostart=true) const
std::runtime_error error(const char *fmt, const Args &...args)
boost::signals2::signal< void(LogEvent)> logMessageSignal
Logging signal.
FDWatcher::Ptr m_fdWatcher
int pid() const
Node PID.
ros::WallTimer m_stopCheckTimer
void endStatUpdate(double elapsedTimeInTicks)
std::string fullName() const
Full name including namespace.
void shutdown()
Start shutdown sequence.
std::string arg(const std::string &name, const ParseContext &context)
bool running() const
Is the node running?
std::vector< std::string > composeCommand() const
launch::Node::ConstPtr m_launchNode
Crashed (i.e. exited with code != 0)
State state() const
Get process state.
bool m_processWorkingDirectoryCreated
void logTyped(LogEvent::Type type, const char *format, Args &&...args)
void restart()
Restart the node.
ros::WallTimer m_restartTimer
std::string m_lastWorkingDirectory