1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35 from __future__ import print_function
36 from __future__ import with_statement
37
38 """
39 Process monitoring implementation for roslaunch.
40 """
41
42 import os
43 import sys
44 import time
45 import traceback
46 import logging
47 try:
48 from queue import Empty, Queue
49 except ImportError:
50 from Queue import Empty, Queue
51 import signal
52 import atexit
53 from threading import Thread, RLock, Lock
54
55 import roslib
56 from roslaunch.core import printlog, printlog_bold, printerrlog, RLException
57
58 logger = logging.getLogger("roslaunch.pmon")
59
61 """
62 Exception to indicate that a process launch has failed in a fatal
63 manner (i.e. relaunch is unlikely to succeed)
64 """
65 pass
66
67
68
69 _pmons = []
70 _pmon_counter = 0
92
94 """
95 @param process_monitor: process monitor to kill
96 @type process_monitor: L{ProcessMonitor}
97 @return: True if process_monitor was successfully
98 shutdown. False if it could not be shutdown cleanly or if there is
99 a problem with process_monitor
100 parameter. shutdown_process_monitor() does not throw any exceptions
101 as this is shutdown-critical code.
102 @rtype: bool
103 """
104 try:
105 if process_monitor is None or process_monitor.is_shutdown:
106 return False
107
108
109
110
111
112
113 process_monitor.shutdown()
114
115 process_monitor.join(20.0)
116 if process_monitor.isAlive():
117 logger.error("shutdown_process_monitor: ProcessMonitor shutdown failed!")
118 return False
119 else:
120 logger.debug("shutdown_process_monitor: ProcessMonitor shutdown succeeded")
121 return True
122 except Exception as e:
123 print("exception in shutdown_process_monitor: %s" % e, file=sys.stderr)
124 traceback.print_exc()
125 return False
126
127 _shutdown_lock = Lock()
143
144 _signal_chain = {}
145 _shutting_down = False
158
159 if sys.platform in ['win32']:
160 _signal_list = [signal.SIGTERM, signal.SIGINT]
161 else:
162 _signal_list = [signal.SIGTERM, signal.SIGINT, signal.SIGHUP]
163
164 _sig_initialized = False
174
175
176
178 """
179 Basic process representation for L{ProcessMonitor}. Must be subclassed
180 to provide actual start()/stop() implementations.
181
182 Constructor *must* be called from the Python Main thread in order
183 for signal handlers to register properly.
184 """
185
186 - def __init__(self, package, name, args, env,
187 respawn=False, respawn_delay=0.0, required=False):
202
204 return "Process<%s>"%(self.name)
205
206
207
208
210 """
211 Get all data about this process in dictionary form
212 @return: dictionary of all relevant process properties
213 @rtype: dict { str: val }
214 """
215 info = {
216 'spawn_count': self.spawn_count,
217 'args': self.args,
218 'env': self.env,
219 'package': self.package,
220 'name': self.name,
221 'alive': self.is_alive(),
222 'respawn': self.respawn,
223 'respawn_delay': self.respawn_delay,
224 'required': self.required,
225 }
226 if self.exit_code is not None:
227 info['exit_code'] = self.exit_code
228 return info
229
231 self.time_of_death = None
232 self.spawn_count += 1
233
235 if self.time_of_death is None:
236 self.time_of_death = time.time()
237 return False
238
240 """
241 @return: False if process should not respawn
242 floating point seconds until respawn otherwise
243 """
244 if not self.respawn:
245 return False
246 if self.time_of_death is None:
247 if self.is_alive():
248 return False
249 return (self.time_of_death + self.respawn_delay) - time.time()
250
251 - def stop(self, errors=None):
252 """
253 Stop the process. Record any significant error messages in the errors parameter
254
255 @param errors: error messages. stop() will record messages into this list.
256 @type errors: [str]
257 """
258 pass
259
261 if self.exit_code is not None:
262 if self.exit_code:
263 return 'process has died [exit code %s]'%self.exit_code
264 else:
265
266 return 'process has finished cleanly'
267 else:
268 return 'process has died'
269
271 """
272 Container class to maintain information about a process that has died. This
273 container allows us to delete the actual Process but still maintain the metadata
274 """
285 raise Exception("cannot call start on a dead process!")
288
290 """
291 Listener class for L{ProcessMonitor}
292 """
293
295 """
296 Notifies listener that process has died. This callback only
297 occurs for processes that die during normal process monitor
298 execution -- processes that are forcibly killed during
299 ProcessMonitor shutdown are not reported.
300 @param process_name: name of process
301 @type process_name: str
302 @param exit_code: exit code of process. If None, it means
303 that ProcessMonitor was unable to determine an exit code.
304 @type exit_code: int
305 """
306 pass
307
309
310 - def __init__(self, name="ProcessMonitor"):
311 Thread.__init__(self, name=name)
312 self.procs = []
313 self.plock = RLock()
314 self.is_shutdown = False
315 self.done = False
316 self.setDaemon(True)
317 self.reacquire_signals = set()
318 self.listeners = []
319 self.dead_list = []
320
321 self.core_procs = []
322
323 self._registrations_complete = False
324
325 logger.info("created process monitor %s"%self)
326
328 """
329 Listener for process events. MUST be called before
330 ProcessMonitor is running.See ProcessListener class.
331 @param l: listener instance
332 @type l: L{ProcessListener}
333 """
334 self.listeners.append(l)
335
337 """
338 Register process with L{ProcessMonitor}
339 @param p: Process
340 @type p: L{Process}
341 @raise RLException: if process with same name is already registered
342 """
343 logger.info("ProcessMonitor.register[%s]"%p.name)
344 e = None
345 with self.plock:
346 if self.has_process(p.name):
347 e = RLException("cannot add process with duplicate name '%s'"%p.name)
348 elif self.is_shutdown:
349 e = RLException("cannot add process [%s] after process monitor has been shut down"%p.name)
350 else:
351 self.procs.append(p)
352 if e:
353 logger.error("ProcessMonitor.register[%s] failed %s"%(p.name, e))
354 raise e
355 else:
356 logger.info("ProcessMonitor.register[%s] complete"%p.name)
357
359 """
360 Register core process with ProcessMonitor. Coreprocesses
361 have special shutdown semantics. They are killed after all
362 other processes, in reverse order in which they are added.
363 @param p Process
364 @type p: L{Process}
365 @raise RLException: if process with same name is already registered
366 """
367 self.register(p)
368 self.core_procs.append(p)
369
371 """
372 Inform the process monitor that registrations are complete.
373 After the registrations_complete flag is set, process monitor
374 will exit if there are no processes left to monitor.
375 """
376 self._registrations_complete = True
377 logger.info("registrations completed %s"%self)
378
380 logger.info("ProcessMonitor.unregister[%s] starting"%p.name)
381 with self.plock:
382 self.procs.remove(p)
383 logger.info("ProcessMonitor.unregister[%s] complete"%p.name)
384
386 """
387 @return: True if process is still be monitored. If False, process
388 has died or was never registered with process
389 @rtype: bool
390 """
391 return len([p for p in self.procs if p.name == name]) > 0
392
394 """
395 @return: process registered under \a name, or None
396 @rtype: L{Process}
397 """
398 with self.plock:
399 v = [p for p in self.procs if p.name == name]
400 if v:
401 return v[0]
402
404 """
405 @return: True if ProcessMonitor has tasks that need to be run in the main thread
406 @rtype: bool
407 """
408 return len(self.reacquire_signals)
409
411 """
412 Execute tasks that need to be run in the main thread. Must be
413 called from main thread.
414 """
415
416 sigs = [s for s in self.reacquire_signals]
417 for s in sigs:
418 _signal_chain[s] = signal.signal(s, rl_signal)
419 self.reacquire_signals.remove(s)
420
422 """
423 Kill process that matches name. NOTE: a killed process will
424 continue to show up as active until the process monitor thread
425 has caught that it has died.
426 @param name: Process name
427 @type name: str
428 @return: True if a process named name was removed from
429 process monitor. A process is considered killed if its stop()
430 method was called.
431 @rtype: bool
432 """
433 def isstring(s):
434 """Small helper version to check an object is a string in
435 a way that works for both Python 2 and 3
436 """
437 try:
438 return isinstance(s, basestring)
439 except NameError:
440 return isinstance(s, str)
441
442 if not isstring(name):
443 raise RLException("kill_process takes in a process name but was given: %s"%name)
444 logger.debug("ProcessMonitor.kill_process[%s]"%name)
445 printlog("[%s] kill requested"%name)
446 with self.plock:
447 p = self.get_process(name)
448 if p:
449 try:
450
451 p.stop([])
452 except:
453 logger.error(traceback.format_exc())
454 return True
455 else:
456 return False
457
459 """
460 Shutdown the process monitor thread
461 """
462 logger.info("ProcessMonitor.shutdown %s"%self)
463 self.is_shutdown = True
464
466 """
467 @return [str]: list of active process names
468 """
469 with self.plock:
470 retval = [p.name for p in self.procs]
471 return retval
472
474 """
475 @return: Two lists, where first
476 list of active process names along with the number of times
477 that process has been spawned. Second list contains dead process names
478 and their spawn count.
479 @rtype: [[(str, int),], [(str,int),]]
480 """
481 with self.plock:
482 actives = [(p.name, p.spawn_count) for p in self.procs]
483 deads = [(p.name, p.spawn_count) for p in self.dead_list]
484 retval = [actives, deads]
485 return retval
486
488 """
489 run() occurs in a separate thread and cannot do certain signal-related
490 work. The main thread of the application must call mainthread_spin()
491 or mainthread_spin_once() in order to perform these jobs.
492 """
493 if not self.done:
494 if self.has_main_thread_jobs():
495 self.do_main_thread_jobs()
496 return True
497 else:
498 return False
499
500 - def mainthread_spin(self):
501 """
502 run() occurs in a separate thread and cannot do certain signal-related
503 work. The main thread of the application must call mainthread_spin()
504 or mainthread_spin_once() in order to perform these jobs. mainthread_spin()
505 blocks until the process monitor is complete.
506 """
507 while not self.done:
508 if sys.platform in ['win32']:
509
510
511
512 try:
513 time.sleep(0.1)
514 except IOError:
515 pass
516 else:
517 time.sleep(0.1)
518
519 if self.has_main_thread_jobs():
520 self.do_main_thread_jobs()
521
523 """
524 thread routine of the process monitor. NOTE: you must still
525 call mainthread_spin or mainthread_spin_once() from the main
526 thread in order to pick up main thread work from the process
527 monitor.
528 """
529 try:
530
531 try:
532 self._run()
533 except:
534 logger.error(traceback.format_exc())
535 traceback.print_exc()
536 finally:
537 self._post_run()
538
540 """
541 Internal run loop of ProcessMonitor
542 """
543 plock = self.plock
544 dead = []
545 respawn = []
546 while not self.is_shutdown:
547 with plock:
548 procs = self.procs[:]
549 if self.is_shutdown:
550 break
551
552
553
554 for s in _signal_list:
555 if signal.getsignal(s) != rl_signal:
556 self.reacquire_signals.add(s)
557
558 for p in procs:
559 try:
560 if not p.is_alive():
561 logger.debug("Process[%s] has died, respawn=%s, required=%s, exit_code=%s",
562 p.name,
563 "True(%f)" % p.respawn_delay if p.respawn else p.respawn,
564 p.required, p.exit_code)
565 exit_code_str = p.get_exit_description()
566 if p.required:
567 printerrlog('='*80+"REQUIRED process [%s] has died!\n%s\nInitiating shutdown!\n"%(p.name, exit_code_str)+'='*80)
568 self.is_shutdown = True
569 elif not p in respawn:
570 if p.exit_code:
571 printerrlog("[%s] %s"%(p.name, exit_code_str))
572 else:
573 printlog_bold("[%s] %s"%(p.name, exit_code_str))
574 dead.append(p)
575
576
577
578 for l in self.listeners:
579 l.process_died(p.name, p.exit_code)
580
581 except Exception as e:
582 traceback.print_exc()
583
584 dead.append(p)
585 if self.is_shutdown:
586 break
587 for d in dead:
588 try:
589
590
591 if d.should_respawn() is not False:
592 respawn.append(d)
593 else:
594 self.unregister(d)
595
596 d.stop([])
597
598 with plock:
599 self.dead_list.append(DeadProcess(d))
600 except:
601 logger.error(traceback.format_exc())
602
603
604
605 if self._registrations_complete and dead and not self.procs and not respawn:
606 printlog("all processes on machine have died, roslaunch will exit")
607 self.is_shutdown = True
608 del dead[:]
609 _respawn=[]
610 for r in respawn:
611 try:
612 if self.is_shutdown:
613 break
614 if r.should_respawn() <= 0.0:
615 printlog("[%s] restarting process" % r.name)
616
617 r.stop([])
618 r.start()
619 else:
620
621 _respawn.append(r)
622 except:
623 traceback.print_exc()
624 logger.error("Restart failed %s",traceback.format_exc())
625 respawn = _respawn
626 time.sleep(0.1)
627
628
629
630 - def _post_run(self):
631 logger.info("ProcessMonitor._post_run %s"%self)
632
633 self.is_shutdown = True
634
635
636 q = Queue()
637 q.join()
638
639 with self.plock:
640
641 core_procs = self.core_procs[:]
642 logger.info("ProcessMonitor._post_run %s: remaining procs are %s"%(self, self.procs))
643
644
645
646 [q.put(p) for p in reversed(self.procs) if not p in core_procs]
647
648
649 killers = []
650 for i in range(10):
651 t = _ProcessKiller(q, i)
652 killers.append(t)
653 t.start()
654
655
656 q.join()
657 shutdown_errors = []
658
659
660 for t in killers:
661 shutdown_errors.extend(t.errors)
662 del killers[:]
663
664
665
666 for p in reversed(core_procs):
667 _kill_process(p, shutdown_errors)
668
669
670 logger.info("ProcessMonitor exit: cleaning up data structures and signals")
671 with self.plock:
672 del core_procs[:]
673 del self.procs[:]
674 del self.core_procs[:]
675
676 reacquire_signals = self.reacquire_signals
677 if reacquire_signals:
678 reacquire_signals.clear()
679 logger.info("ProcessMonitor exit: pmon has shutdown")
680 self.done = True
681
682 if shutdown_errors:
683 printerrlog("Shutdown errors:\n"+'\n'.join([" * %s"%e for e in shutdown_errors]))
684
686 """
687 Routine for kill Process p with appropriate logging to screen and logfile
688
689 @param p: process to kill
690 @type p: Process
691 @param errors: list of error messages from killed process
692 @type errors: [str]
693 """
694 try:
695 logger.info("ProcessMonitor exit: killing %s", p.name)
696 printlog("[%s] killing on exit"%p.name)
697
698 p.stop(errors)
699 except:
700 traceback.print_exc()
701 logger.error(traceback.format_exc())
702
704
706 Thread.__init__(self, name="ProcessKiller-%s"%i)
707 self.q = q
708 self.errors = []
709
711 q = self.q
712 while not q.empty():
713 try:
714 p = q.get(False)
715 _kill_process(p, self.errors)
716 q.task_done()
717 except Empty:
718 pass
719