00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039 from __future__ import with_statement
00040
00041 PKG = 'pr2_hardware_test_monitor'
00042 import roslib
00043 roslib.load_manifest(PKG)
00044
00045 import rospy
00046
00047 from pr2_self_test_msgs.msg import TestStatus
00048
00049 from diagnostic_msgs.msg import DiagnosticArray
00050 from std_srvs.srv import *
00051 import std_msgs.msg
00052
00053 import traceback
00054 import threading
00055 import sys
00056
00057 HEARTBEAT_TIMEOUT = 60
00058 IGNORE_TIME = 30
00059
00060 def create_listener(params, listeners):
00061 """
00062 Loads listener from parameters
00063
00064 @param params {} : Must have "type", "file". "pkg" is optional
00065 @param listeners [] : Newly created listener is appended
00066 @return bool : True if listener created successfully
00067 """
00068 if not (params.has_key('type') and params.has_key('file')):
00069 rospy.logerr('Params "type" and "file" weren\'t found!')
00070 return False
00071
00072 file = params['file']
00073 type = params['type']
00074 pkg = params['pkg'] if params.has_key('pkg') else PKG
00075
00076 try:
00077 import_str = '%s.%s' % (pkg, file)
00078 __import__(import_str)
00079 pypkg = sys.modules[import_str]
00080 listener_type = getattr(pypkg, type)
00081 except Exception, e:
00082 rospy.logerr('Couldn\'t load listener %s from %s.%s.\n\nException: %s' % (type, pkg, file, traceback.format_exc()))
00083 return False
00084
00085 try:
00086 listener = listener_type()
00087 except Exception, e:
00088 rospy.logerr('Listener %s failed to construct.\nException: %s' % (type, traceback.format_exc()))
00089 return False
00090
00091 if not listener.create(params):
00092 return False
00093
00094 listeners.append(listener)
00095 return True
00096
00097 def _make_message(level, warnings, errors):
00098 """
00099 Write status message with warnings and errors
00100 """
00101 if level == 0:
00102 return 'OK'
00103 if level == TestStatus.WARNING:
00104 return ', '.join(warnings)
00105 if level > TestStatus.WARNING:
00106 if len(errors + warnings) > 0:
00107 return ', '.join(errors + warnings)
00108 else:
00109 return 'Error'
00110
00111 class TestMonitor:
00112 """
00113 TestMonitor class loads listeners, and polls them to check for status updates on the test
00114
00115 """
00116 def __init__(self):
00117 self._listeners = []
00118
00119 self._mutex = threading.Lock()
00120
00121 my_params = rospy.get_param("~")
00122
00123 self._listeners_ok = True
00124
00125 for ns, listener_param in my_params.iteritems():
00126 if not create_listener(listener_param, self._listeners):
00127 rospy.logerr('Listener failed to initialize. Namespace: %s' % ns)
00128 self._listeners_ok = False
00129
00130
00131 self._reset_state()
00132
00133 self._snapshot_pub = rospy.Publisher('snapshot_trigger', std_msgs.msg.Empty)
00134
00135 self._status_pub = rospy.Publisher('test_status', TestStatus)
00136 self._diag_pub = rospy.Publisher('/diagnostics', DiagnosticArray)
00137
00138 self.reset_srv = rospy.Service('reset_test', Empty, self._reset_test)
00139 self.halt_srv = rospy.Service('halt_test', Empty, self._halt_test)
00140
00141 self._heartbeat_halted = False
00142 self._heartbeat_time = rospy.get_time()
00143 self._heartbeat_sub = rospy.Subscriber('/heartbeat', std_msgs.msg.Empty, self._on_heartbeat)
00144
00145
00146 @property
00147 def init_ok(self):
00148 """
00149 Returns whether the monitor initialized correctly. Used in unit tests.
00150 """
00151 return len(self._listeners) > 0 and self._listeners_ok
00152
00153 def _on_heartbeat(self, msg):
00154 self._heartbeat_time = rospy.get_time()
00155
00156 def _reset_state(self):
00157 """
00158 Reset state of monitor for startup or on reset. Needs mutex
00159 """
00160 self._heartbeat_halted = False
00161 self._was_ok = True
00162 self._start_time = rospy.get_time()
00163
00164
00165 self._errors = []
00166 self._latched_lvl = TestStatus.RUNNING
00167
00168 def _reset_test(self, srv):
00169 """
00170 Service callback for "reset_test"
00171 """
00172 with self._mutex:
00173 self._reset_state()
00174 self._reset_listeners()
00175
00176 return EmptyResponse()
00177
00178 def _halt_test(self, srv):
00179 """
00180 Service callback for "halt_test"
00181 """
00182 with self._mutex:
00183 self._halt_listeners()
00184
00185 return EmptyResponse()
00186
00187 def _reset_listeners(self):
00188 """
00189 Reset all listeners. Needs mutex.
00190 """
00191 for listener in self._listeners:
00192 try:
00193 listener.reset()
00194 except Exception, e:
00195 rospy.logerr('Listener failed to reset!')
00196
00197 def _halt_listeners(self):
00198 """
00199 Halt all listeners. Needs mutex.
00200 """
00201 for listener in self._listeners:
00202 try:
00203 listener.halt()
00204 except Exception, e:
00205 rospy.logerr('Listener failed to halt!')
00206
00207 self._snapshot_pub.publish()
00208
00209 def _check_status(self):
00210 level = TestStatus.RUNNING
00211 array = DiagnosticArray()
00212 warnings = []
00213 errors = []
00214
00215 for listener in self._listeners:
00216 try:
00217 lvl, msg, diags = listener.check_ok()
00218 except Exception, e:
00219 rospy.logerr('Listener failed to check status. %s' % traceback.format_exc())
00220 lvl, msg, diags = (TestStatus.ERROR, 'Listener Error', None)
00221
00222 level = max(level, lvl)
00223 if msg is not None and msg != '':
00224 if lvl == TestStatus.WARNING:
00225 warnings.append(msg)
00226 if lvl > TestStatus.WARNING:
00227 errors.append(msg)
00228 if diags is not None:
00229 array.status.extend(diags)
00230
00231 if len(self._listeners) == 0:
00232 level = TestStatus.STALE
00233 return level, 'No listeners', array
00234
00235 if not self._listeners_ok:
00236 level = TestStatus.ERROR
00237 errors = [ 'Listener Startup Error' ]
00238
00239
00240 grace_period = rospy.get_time() - self._start_time < IGNORE_TIME
00241
00242
00243 if not grace_period and \
00244 (self._was_ok and level > TestStatus.WARNING):
00245 with self._mutex:
00246 self._halt_listeners()
00247 rospy.logerr('Halted test after failure. Failure message: %s' % ', '.join(errors))
00248 self._was_ok = False
00249
00250
00251 if not self._heartbeat_halted and rospy.get_time() - self._heartbeat_time > HEARTBEAT_TIMEOUT:
00252 rospy.logerr('No heartbeat from Test Manager received, halting test')
00253 with self._mutex:
00254 self._halt_listeners()
00255 self._heartbeat_halted = True
00256
00257
00258 if self._heartbeat_halted and rospy.get_time() - self._heartbeat_time < 5.0:
00259 with self._mutex:
00260 self._reset_state()
00261 self._reset_listeners()
00262 self._heartbeat_halted = False
00263 rospy.logwarn('Automatic reset after heartbeat topic reacquired.')
00264
00265 if self._heartbeat_halted:
00266 level = TestStatus.STALE
00267 errors = [ 'No heartbeat' ]
00268
00269
00270 if not grace_period and level > TestStatus.WARNING:
00271 self._latched_lvl = max(self._latched_lvl, level)
00272
00273
00274 level = max(self._latched_lvl, level)
00275
00276
00277 if not grace_period and len(errors) > 0:
00278 for err in errors:
00279 if not err in self._errors:
00280 self._errors.append(err)
00281
00282 if not grace_period:
00283
00284 message = _make_message(level, warnings, self._errors)
00285 else:
00286
00287 message = _make_message(level, warnings, errors)
00288
00289 return level, message, array
00290
00291
00292
00293 def publish_status(self):
00294 """
00295 Called at 1Hz. Polls listeners, publishes diagnostics, test_status.
00296 """
00297 level, message, array = self._check_status()
00298
00299 if len(array.status) > 0:
00300 array.header.stamp = rospy.get_rostime()
00301 self._diag_pub.publish(array)
00302
00303 test_stat = TestStatus()
00304 test_stat.test_ok = int(level)
00305 test_stat.message = message
00306
00307 self._status_pub.publish(test_stat)