1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35 """
36 Process monitor
37 """
38
39 from __future__ import with_statement
40
41 import atexit
42 import time
43 import traceback
44 from threading import Lock
45 from threading import RLock
46 from threading import Thread
47 try:
48 from queue import Empty, Queue
49 except ImportError:
50 from Queue import Empty, Queue
51
52 from .core import printerrlog
53 from .core import printlog
54 from .core import printlog_bold
55
56
59
60
62 """
63 Exception to indicate that a process launch has failed in a fatal
64 manner (i.e. relaunch is unlikely to succeed)
65 """
66 pass
67
68
69
70
71 _pmons = []
72 _pmon_counter = 0
73 _shutting_down = False
74
75
90
91
93 """
94 @param process_monitor: process monitor to kill
95 @type process_monitor: L{ProcessMonitor}
96 @return: True if process_monitor was successfully
97 shutdown. False if it could not be shutdown cleanly or if there is
98 a problem with process_monitor
99 parameter. shutdown_process_monitor() does not throw any exceptions
100 as this is shutdown-critical code.
101 @rtype: bool
102 """
103 try:
104 if process_monitor is None or process_monitor.is_shutdown:
105 return False
106
107 process_monitor.shutdown()
108 process_monitor.join(20.0)
109 if process_monitor.isAlive():
110 return False
111 else:
112 return True
113 except Exception:
114 return False
115
116
117 _shutdown_lock = Lock()
118
119
128
129
130 atexit.register(pmon_shutdown)
131
132
133
134
136 """
137 Basic process representation for L{ProcessMonitor}. Must be subclassed
138 to provide actual start()/stop() implementations.
139 """
140
141 - def __init__(self, package, name, args, env, respawn=False, required=False):
142 self.package = package
143 self.name = name
144 self.args = args
145 self.env = env
146 self.respawn = respawn
147 self.required = required
148 self.lock = Lock()
149 self.exit_code = None
150
151 self.spawn_count = 0
152
154 return 'Process<%s>' % (self.name)
155
156
157
158
160 """
161 Get all data about this process in dictionary form
162 @return: dictionary of all relevant process properties
163 @rtype: dict { str: val }
164 """
165 info = {
166 'spawn_count': self.spawn_count,
167 'args': self.args,
168 'env': self.env,
169 'package': self.package,
170 'name': self.name,
171 'alive': self.is_alive(),
172 'respawn': self.respawn,
173 'required': self.required,
174 }
175 if self.exit_code is not None:
176 info['exit_code'] = self.exit_code
177 return info
178
180 self.spawn_count += 1
181
184
185 - def stop(self, errors=[]):
186 """
187 Stop the process. Record any significant error messages in the errors parameter
188
189 @param errors: error messages. stop() will record messages into this list.
190 @type errors: [str]
191 """
192 pass
193
195 if self.exit_code is not None:
196 if self.exit_code:
197 return 'process has died [exit code %s]' % self.exit_code
198 else:
199
200 return 'process has finished cleanly'
201 else:
202 return 'process has died'
203
204
206 """
207 Container class to maintain information about a process that has died. This
208 container allows us to delete the actual Process but still maintain the metadata
209 """
211 super(DeadProcess, self).__init__(p.package, p.name, p.args, p.env, p.respawn)
212 self.exit_code = p.exit_code
213 self.lock = None
214 self.spawn_count = p.spawn_count
215 self.info = p.get_info()
216
219
221 raise Exception('cannot call start on a dead process!')
222
225
226
228 """
229 Listener class for L{ProcessMonitor}
230 """
231
233 """
234 Notifies listener that process has died. This callback only
235 occurs for processes that die during normal process monitor
236 execution -- processes that are forcibly killed during
237 ProcessMonitor shutdown are not reported.
238 @param process_name: name of process
239 @type process_name: str
240 @param exit_code: exit code of process. If None, it means
241 that ProcessMonitor was unable to determine an exit code.
242 @type exit_code: int
243 """
244 pass
245
246
248
249 - def __init__(self, name='ProcessMonitor'):
250 Thread.__init__(self, name=name)
251 self.procs = []
252 self.plock = RLock()
253 self.is_shutdown = False
254 self.done = False
255 self.setDaemon(True)
256 self.listeners = []
257 self.dead_list = []
258
259 self.core_procs = []
260
261 self._registrations_complete = False
262
264 """
265 Listener for process events. MUST be called before
266 ProcessMonitor is running.See ProcessListener class.
267 @param l: listener instance
268 @type l: L{ProcessListener}
269 """
270 self.listeners.append(l)
271
273 """
274 Register process with L{ProcessMonitor}
275 @param p: Process
276 @type p: L{Process}
277 @raise PmonException: if process with same name is already registered
278 """
279 e = None
280 with self.plock:
281 if self.has_process(p.name):
282 e = PmonException("cannot add process with duplicate name '%s'" % p.name)
283 elif self.is_shutdown:
284 e = PmonException('cannot add process [%s] after process monitor has been shut down' % p.name)
285 else:
286 self.procs.append(p)
287 if e:
288 raise e
289
291 """
292 Register core process with ProcessMonitor. Coreprocesses
293 have special shutdown semantics. They are killed after all
294 other processes, in reverse order in which they are added.
295 @param p Process
296 @type p: L{Process}
297 @raise PmonException: if process with same name is already registered
298 """
299 self.register(p)
300 self.core_procs.append(p)
301
303 """
304 Inform the process monitor that registrations are complete.
305 After the registrations_complete flag is set, process monitor
306 will exit if there are no processes left to monitor.
307 """
308 self._registrations_complete = True
309
311 with self.plock:
312 self.procs.remove(p)
313
315 """
316 @return: True if process is still be monitored. If False, process
317 has died or was never registered with process
318 @rtype: bool
319 """
320 return len([p for p in self.procs if p.name == name]) > 0
321
323 """
324 @return: process registered under \a name, or None
325 @rtype: L{Process}
326 """
327 with self.plock:
328 v = [p for p in self.procs if p.name == name]
329 if v:
330 return v[0]
331
333 """
334 Kill process that matches name. NOTE: a killed process will
335 continue to show up as active until the process monitor thread
336 has caught that it has died.
337 @param name: Process name
338 @type name: str
339 @return: True if a process named name was removed from
340 process monitor. A process is considered killed if its stop()
341 method was called.
342 @rtype: bool
343 """
344 def is_string_type(obj):
345 try:
346 return isinstance(obj, basestring)
347 except NameError:
348 return isinstance(obj, str)
349 if not is_string_type(name):
350 raise PmonException('kill_process takes in a process name but was given: %s' % name)
351 printlog('[%s] kill requested' % name)
352 with self.plock:
353 p = self.get_process(name)
354 if p:
355 try:
356
357 p.stop([])
358 except Exception as e:
359 printerrlog('Exception: %s' % (str(e)))
360 return True
361 else:
362 return False
363
365 """
366 Shutdown the process monitor thread
367 """
368 self.is_shutdown = True
369
371 """
372 @return [str]: list of active process names
373 """
374 with self.plock:
375 retval = [p.name for p in self.procs]
376 return retval
377
379 """
380 @return: Two lists, where first
381 list of active process names along with the number of times
382 that process has been spawned. Second list contains dead process names
383 and their spawn count.
384 @rtype: [[(str, int),], [(str,int),]]
385 """
386 with self.plock:
387 actives = [(p.name, p.spawn_count) for p in self.procs]
388 deads = [(p.name, p.spawn_count) for p in self.dead_list]
389 retval = [actives, deads]
390 return retval
391
393 """
394 thread routine of the process monitor.
395 """
396 try:
397
398 try:
399 self._run()
400 except Exception:
401 traceback.print_exc()
402 finally:
403 self._post_run()
404
406 """
407 Internal run loop of ProcessMonitor
408 """
409 plock = self.plock
410 dead = []
411 respawn = []
412 while not self.is_shutdown:
413 with plock:
414 procs = self.procs[:]
415 if self.is_shutdown:
416 break
417
418 for p in procs:
419 try:
420 if not p.is_alive():
421 exit_code_str = p.get_exit_description()
422 if p.respawn:
423 printlog_bold('[%s] %s\nrespawning...' % (p.name, exit_code_str))
424 respawn.append(p)
425 elif p.required:
426 printerrlog('=' * 80 + 'REQUIRED process [%s] has died!\n%s\nInitiating shutdown!\n' % (p.name, exit_code_str) + '=' * 80)
427 self.is_shutdown = True
428 else:
429 if p.exit_code:
430 printerrlog('[%s] %s' % (p.name, exit_code_str))
431 else:
432 printlog_bold('[%s] %s' % (p.name, exit_code_str))
433 dead.append(p)
434
435
436
437 for l in self.listeners:
438 l.process_died(p.name, p.exit_code)
439
440 except Exception:
441 traceback.print_exc()
442
443 dead.append(p)
444 if self.is_shutdown:
445 break
446 for d in dead:
447 try:
448 self.unregister(d)
449
450 d.stop([])
451
452
453 with plock:
454 self.dead_list.append(DeadProcess(d))
455 except Exception as e:
456 printerrlog('Exception: %s' % (str(e)))
457
458
459
460 if self._registrations_complete and dead and not self.procs and not respawn:
461 printlog('all processes on machine have died, roslaunch will exit')
462 self.is_shutdown = True
463 del dead[:]
464 for r in respawn:
465 try:
466 if self.is_shutdown:
467 break
468 printlog('[%s] restarting process' % r.name)
469
470 r.stop([])
471 r.start()
472 except Exception:
473 traceback.print_exc()
474 del respawn[:]
475 time.sleep(0.1)
476
477
478
479 - def _post_run(self):
480
481 self.is_shutdown = True
482
483
484 q = Queue()
485 q.join()
486
487 with self.plock:
488
489 core_procs = self.core_procs[:]
490
491
492
493 [q.put(p) for p in reversed(self.procs) if p not in core_procs]
494
495
496 killers = []
497 for i in range(10):
498 t = _ProcessKiller(q, i)
499 killers.append(t)
500 t.start()
501
502
503 q.join()
504 shutdown_errors = []
505
506
507 for t in killers:
508 shutdown_errors.extend(t.errors)
509 del killers[:]
510
511
512
513 for p in reversed(core_procs):
514 _kill_process(p, shutdown_errors)
515
516
517 with self.plock:
518 del core_procs[:]
519 del self.procs[:]
520 del self.core_procs[:]
521
522 self.done = True
523
524 if shutdown_errors:
525 printerrlog('Shutdown errors:\n' + '\n'.join([' * %s' % e for e in shutdown_errors]))
526
527
529 """
530 Routine for kill Process p with appropriate logging to screen and logfile
531
532 @param p: process to kill
533 @type p: Process
534 @param errors: list of error messages from killed process
535 @type errors: [str]
536 """
537 try:
538 printlog('[%s] killing on exit' % p.name)
539
540 p.stop(errors)
541 except Exception as e:
542 printerrlog('Exception: %s' % (str(e)))
543
544
546
548 Thread.__init__(self, name='ProcessKiller-%s' % i)
549 self.q = q
550 self.errors = []
551
553 q = self.q
554 while not q.empty():
555 try:
556 p = q.get(False)
557 _kill_process(p, self.errors)
558 q.task_done()
559 except Empty:
560 pass
561