1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35 """
36 Process monitor
37 """
38
39 from __future__ import with_statement
40
41 import os
42 import sys
43 import time
44 import traceback
45 import logging
46 import Queue
47 import atexit
48 from threading import Thread, RLock, Lock
49
50 from .core import printlog, printlog_bold, printerrlog
51
52 _logger = logging.getLogger("rosunit")
53
55
57 """
58 Exception to indicate that a process launch has failed in a fatal
59 manner (i.e. relaunch is unlikely to succeed)
60 """
61 pass
62
63
64
65 _pmons = []
66 _pmon_counter = 0
67 _shutting_down = False
85
87 """
88 @param process_monitor: process monitor to kill
89 @type process_monitor: L{ProcessMonitor}
90 @return: True if process_monitor was successfully
91 shutdown. False if it could not be shutdown cleanly or if there is
92 a problem with process_monitor
93 parameter. shutdown_process_monitor() does not throw any exceptions
94 as this is shutdown-critical code.
95 @rtype: bool
96 """
97 try:
98 if process_monitor is None or process_monitor.is_shutdown:
99 return False
100
101
102
103
104
105
106 process_monitor.shutdown()
107
108 process_monitor.join(20.0)
109 if process_monitor.isAlive():
110 _logger.error("shutdown_process_monitor: ProcessMonitor shutdown failed!")
111 return False
112 else:
113 _logger.debug("shutdown_process_monitor: ProcessMonitor shutdown succeeded")
114 return True
115 except Exception, e:
116 return False
117
118 _shutdown_lock = Lock()
127
128 atexit.register(pmon_shutdown)
129
130
131
133 """
134 Basic process representation for L{ProcessMonitor}. Must be subclassed
135 to provide actual start()/stop() implementations.
136 """
137
138 - def __init__(self, package, name, args, env, respawn=False, required=False):
139 self.package = package
140 self.name = name
141 self.args = args
142 self.env = env
143 self.respawn = respawn
144 self.required = required
145 self.lock = Lock()
146 self.exit_code = None
147
148 self.spawn_count = 0
149
151 return "Process<%s>"%(self.name)
152
153
154
155
157 """
158 Get all data about this process in dictionary form
159 @return: dictionary of all relevant process properties
160 @rtype: dict { str: val }
161 """
162 info = {
163 'spawn_count': self.spawn_count,
164 'args': self.args,
165 'env': self.env,
166 'package': self.package,
167 'name': self.name,
168 'alive': self.is_alive(),
169 'respawn': self.respawn,
170 'required': self.required,
171 }
172 if self.exit_code is not None:
173 info['exit_code'] = self.exit_code
174 return info
175
177 self.spawn_count += 1
178
181
182 - def stop(self, errors=[]):
183 """
184 Stop the process. Record any significant error messages in the errors parameter
185
186 @param errors: error messages. stop() will record messages into this list.
187 @type errors: [str]
188 """
189 pass
190
192 if self.exit_code is not None:
193 if self.exit_code:
194 return 'process has died [exit code %s]'%self.exit_code
195 else:
196
197 return 'process has finished cleanly'
198 else:
199 return 'process has died'
200
202 """
203 Container class to maintain information about a process that has died. This
204 container allows us to delete the actual Process but still maintain the metadata
205 """
207 super(DeadProcess, self).__init__(p.package, p.name, p.args, p.env, p.respawn)
208 self.exit_code = p.exit_code
209 self.lock = None
210 self.spawn_count = p.spawn_count
211 self.info = p.get_info()
215 raise Exception("cannot call start on a dead process!")
218
220 """
221 Listener class for L{ProcessMonitor}
222 """
223
225 """
226 Notifies listener that process has died. This callback only
227 occurs for processes that die during normal process monitor
228 execution -- processes that are forcibly killed during
229 ProcessMonitor shutdown are not reported.
230 @param process_name: name of process
231 @type process_name: str
232 @param exit_code: exit code of process. If None, it means
233 that ProcessMonitor was unable to determine an exit code.
234 @type exit_code: int
235 """
236 pass
237
239
240 - def __init__(self, name="ProcessMonitor"):
241 Thread.__init__(self, name=name)
242 self.procs = []
243 self.plock = RLock()
244 self.is_shutdown = False
245 self.done = False
246 self.setDaemon(True)
247 self.listeners = []
248 self.dead_list = []
249
250 self.core_procs = []
251
252 self._registrations_complete = False
253
254 _logger.info("created process monitor %s"%self)
255
257 """
258 Listener for process events. MUST be called before
259 ProcessMonitor is running.See ProcessListener class.
260 @param l: listener instance
261 @type l: L{ProcessListener}
262 """
263 self.listeners.append(l)
264
266 """
267 Register process with L{ProcessMonitor}
268 @param p: Process
269 @type p: L{Process}
270 @raise PmonException: if process with same name is already registered
271 """
272 _logger.info("ProcessMonitor.register[%s]"%p.name)
273 e = None
274 with self.plock:
275 if self.has_process(p.name):
276 e = PmonException("cannot add process with duplicate name '%s'"%p.name)
277 elif self.is_shutdown:
278 e = PmonException("cannot add process [%s] after process monitor has been shut down"%p.name)
279 else:
280 self.procs.append(p)
281 if e:
282 _logger.error("ProcessMonitor.register[%s] failed %s"%(p.name, e))
283 raise e
284 else:
285 _logger.info("ProcessMonitor.register[%s] complete"%p.name)
286
288 """
289 Register core process with ProcessMonitor. Coreprocesses
290 have special shutdown semantics. They are killed after all
291 other processes, in reverse order in which they are added.
292 @param p Process
293 @type p: L{Process}
294 @raise PmonException: if process with same name is already registered
295 """
296 self.register(p)
297 self.core_procs.append(p)
298
300 """
301 Inform the process monitor that registrations are complete.
302 After the registrations_complete flag is set, process monitor
303 will exit if there are no processes left to monitor.
304 """
305 self._registrations_complete = True
306 _logger.info("registrations completed %s"%self)
307
309 _logger.info("ProcessMonitor.unregister[%s] starting"%p.name)
310 with self.plock:
311 self.procs.remove(p)
312 _logger.info("ProcessMonitor.unregister[%s] complete"%p.name)
313
315 """
316 @return: True if process is still be monitored. If False, process
317 has died or was never registered with process
318 @rtype: bool
319 """
320 return len([p for p in self.procs if p.name == name]) > 0
321
323 """
324 @return: process registered under \a name, or None
325 @rtype: L{Process}
326 """
327 with self.plock:
328 v = [p for p in self.procs if p.name == name]
329 if v:
330 return v[0]
331
333 """
334 Kill process that matches name. NOTE: a killed process will
335 continue to show up as active until the process monitor thread
336 has caught that it has died.
337 @param name: Process name
338 @type name: str
339 @return: True if a process named name was removed from
340 process monitor. A process is considered killed if its stop()
341 method was called.
342 @rtype: bool
343 """
344 if not isinstance(name, basestring):
345 raise PmonException("kill_process takes in a process name but was given: %s"%name)
346 _logger.debug("ProcessMonitor.kill_process[%s]"%name)
347 printlog("[%s] kill requested"%name)
348 with self.plock:
349 p = self.get_process(name)
350 if p:
351 try:
352
353 p.stop([])
354 except:
355 _logger.error(traceback.format_exc())
356 return True
357 else:
358 return False
359
361 """
362 Shutdown the process monitor thread
363 """
364 _logger.info("ProcessMonitor.shutdown %s"%self)
365 self.is_shutdown = True
366
368 """
369 @return [str]: list of active process names
370 """
371 with self.plock:
372 retval = [p.name for p in self.procs]
373 return retval
374
376 """
377 @return: Two lists, where first
378 list of active process names along with the number of times
379 that process has been spawned. Second list contains dead process names
380 and their spawn count.
381 @rtype: [[(str, int),], [(str,int),]]
382 """
383 with self.plock:
384 actives = [(p.name, p.spawn_count) for p in self.procs]
385 deads = [(p.name, p.spawn_count) for p in self.dead_list]
386 retval = [actives, deads]
387 return retval
388
390 """
391 thread routine of the process monitor.
392 """
393 try:
394
395 try:
396 self._run()
397 except:
398 _logger.error(traceback.format_exc())
399 traceback.print_exc()
400 finally:
401 self._post_run()
402
404 """
405 Internal run loop of ProcessMonitor
406 """
407 plock = self.plock
408 dead = []
409 respawn = []
410 while not self.is_shutdown:
411 with plock:
412 procs = self.procs[:]
413 if self.is_shutdown:
414 break
415
416 for p in procs:
417 try:
418 if not p.is_alive():
419 _logger.debug("Process[%s] has died, respawn=%s, required=%s, exit_code=%s",p.name, p.respawn, p.required, p.exit_code)
420 exit_code_str = p.get_exit_description()
421 if p.respawn:
422 printlog_bold("[%s] %s\nrespawning..."%(p.name, exit_code_str))
423 respawn.append(p)
424 elif p.required:
425 printerrlog('='*80+"REQUIRED process [%s] has died!\n%s\nInitiating shutdown!\n"%(p.name, exit_code_str)+'='*80)
426 self.is_shutdown = True
427 else:
428 if p.exit_code:
429 printerrlog("[%s] %s"%(p.name, exit_code_str))
430 else:
431 printlog_bold("[%s] %s"%(p.name, exit_code_str))
432 dead.append(p)
433
434
435
436 for l in self.listeners:
437 l.process_died(p.name, p.exit_code)
438
439 except Exception, e:
440 traceback.print_exc()
441
442 dead.append(p)
443 if self.is_shutdown:
444 break
445 for d in dead:
446 try:
447 self.unregister(d)
448
449 d.stop([])
450
451
452 with plock:
453 self.dead_list.append(DeadProcess(d))
454 except:
455 _logger.error(traceback.format_exc())
456
457
458
459 if self._registrations_complete and dead and not self.procs and not respawn:
460 printlog("all processes on machine have died, roslaunch will exit")
461 self.is_shutdown = True
462 del dead[:]
463 for r in respawn:
464 try:
465 if self.is_shutdown:
466 break
467 printlog("[%s] restarting process"%r.name)
468
469 r.stop([])
470 r.start()
471 except:
472 traceback.print_exc()
473 _logger.error("Restart failed %s",traceback.format_exc())
474 del respawn[:]
475 time.sleep(0.1)
476
477
478
479 - def _post_run(self):
480 _logger.info("ProcessMonitor._post_run %s"%self)
481
482 self.is_shutdown = True
483
484
485 q = Queue.Queue()
486 q.join()
487
488 with self.plock:
489
490 core_procs = self.core_procs[:]
491 _logger.info("ProcessMonitor._post_run %s: remaining procs are %s"%(self, self.procs))
492
493
494
495 [q.put(p) for p in reversed(self.procs) if not p in core_procs]
496
497
498 killers = []
499 for i in range(10):
500 t = _ProcessKiller(q, i)
501 killers.append(t)
502 t.start()
503
504
505 q.join()
506 shutdown_errors = []
507
508
509 for t in killers:
510 shutdown_errors.extend(t.errors)
511 del killers[:]
512
513
514
515 for p in reversed(core_procs):
516 _kill_process(p, shutdown_errors)
517
518
519 _logger.info("ProcessMonitor exit: cleaning up data structures")
520 with self.plock:
521 del core_procs[:]
522 del self.procs[:]
523 del self.core_procs[:]
524
525 _logger.info("ProcessMonitor exit: pmon has shutdown")
526 self.done = True
527
528 if shutdown_errors:
529 printerrlog("Shutdown errors:\n"+'\n'.join([" * %s"%e for e in shutdown_errors]))
530
532 """
533 Routine for kill Process p with appropriate logging to screen and logfile
534
535 @param p: process to kill
536 @type p: Process
537 @param errors: list of error messages from killed process
538 @type errors: [str]
539 """
540 try:
541 _logger.info("ProcessMonitor exit: killing %s", p.name)
542 printlog("[%s] killing on exit"%p.name)
543
544 p.stop(errors)
545 except:
546 _logger.error(traceback.format_exc())
547
549
551 Thread.__init__(self, name="ProcessKiller-%s"%i)
552 self.q = q
553 self.errors = []
554
556 q = self.q
557 while not q.empty():
558 try:
559 p = q.get(False)
560 _kill_process(p, self.errors)
561 q.task_done()
562 except Queue.Empty:
563 pass
564