37 from __future__
import with_statement
39 roslib.load_manifest(
'pr2_computer_monitor')
45 from threading
import Timer
47 from time
import sleep
52 from diagnostic_msgs.msg
import DiagnosticArray, DiagnosticStatus, KeyValue
56 if sys.version_info[:3] == (2, 7, 3):
58 threading._DummyThread._Thread__stop =
lambda x: 42
67 stat_dict = { 0:
'OK', 1:
'Warning', 2:
'Error' }
68 temp_dict = { 0:
'OK', 1:
'Hot', 2:
'Critical Hot' }
69 usage_dict = { 0:
'OK', 1:
'Low Disk Space', 2:
'Very Low Disk Space' }
71 REMOVABLE = [
'/dev/sda']
76 hd_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
77 hd_sock.connect((hostname, port))
80 newdat = hd_sock.recv(1024)
83 sock_data = sock_data + newdat
86 sock_vals = sock_data.split(
'|')
94 while idx + 5 < len(sock_vals):
95 this_drive = sock_vals[idx + 1]
96 this_make = sock_vals[idx + 2]
97 this_temp = sock_vals[idx + 3]
101 if this_make
in makes:
105 drives.append(this_drive)
106 makes.append(this_make)
107 temps.append(this_temp)
111 return True, drives, makes, temps
113 rospy.logerr(traceback.format_exc())
114 return False, [
'Exception' ], [ traceback.format_exc() ], [ 0 ]
117 time_since_update = rospy.get_time() - last_update_time
120 if time_since_update > 20
and time_since_update <= 35:
121 stale_status =
'Lagging' 122 if stat.level == DiagnosticStatus.OK:
123 stat.message = stale_status
124 elif stat.message.find(stale_status) < 0:
125 stat.message =
', '.join([stat.message, stale_status])
126 stat.level = max(stat.level, DiagnosticStatus.WARN)
127 if time_since_update > 35:
128 stale_status =
'Stale' 129 if stat.level == DiagnosticStatus.OK:
130 stat.message = stale_status
131 elif stat.message.find(stale_status) < 0:
132 stat.message =
', '.join([stat.message, stale_status])
133 stat.level = max(stat.level, DiagnosticStatus.ERROR)
137 stat.values.insert(0, KeyValue(key =
'Update Status', value = stale_status))
138 stat.values.insert(1, KeyValue(key =
'Time Since Update', value = str(time_since_update)))
141 def __init__(self, hostname, diag_hostname, home_dir = ''):
147 rospy.logwarn(
'Not warning for HD temperatures is deprecated. This will be removed in D-turtle')
150 self.
_diag_pub = rospy.Publisher(
'/diagnostics', DiagnosticArray, queue_size=10)
160 self._temp_stat.name =
"%s HD Temperature" % diag_hostname
161 self._temp_stat.level = DiagnosticStatus.ERROR
162 self._temp_stat.hardware_id = hostname
163 self._temp_stat.message =
'No Data' 164 self._temp_stat.values = [ KeyValue(key =
'Update Status', value =
'No Data'),
165 KeyValue(key =
'Time Since Last Update', value =
'N/A') ]
169 self._usage_stat.level = DiagnosticStatus.ERROR
170 self._usage_stat.hardware_id = hostname
171 self._usage_stat.name =
'%s HD Usage' % diag_hostname
172 self._usage_stat.values = [ KeyValue(key =
'Update Status', value =
'No Data' ),
173 KeyValue(key =
'Time Since Last Update', value =
'N/A') ]
181 self._temp_timer.cancel()
185 self._usage_timer.cancel()
189 if rospy.is_shutdown():
194 diag_strs = [ KeyValue(key =
'Update Status', value =
'OK' ) ,
195 KeyValue(key =
'Time Since Last Update', value =
'0' ) ]
196 diag_level = DiagnosticStatus.OK
201 for index
in range(0, len(drives)):
204 if not unicode(temp).isnumeric()
and drives[index]
not in REMOVABLE:
205 temp_level = DiagnosticStatus.ERROR
207 elif not unicode(temp).isnumeric()
and drives[index]
in REMOVABLE:
208 temp_level = DiagnosticStatus.OK
211 temp_level = DiagnosticStatus.OK
212 if float(temp) > hd_temp_warn:
213 temp_level = DiagnosticStatus.WARN
214 if float(temp) > hd_temp_error:
215 temp_level = DiagnosticStatus.ERROR
217 diag_level = max(diag_level, temp_level)
219 diag_strs.append(KeyValue(key =
'Disk %d Temp Status' % index, value = temp_dict[temp_level]))
220 diag_strs.append(KeyValue(key =
'Disk %d Mount Pt.' % index, value = drives[index]))
221 diag_strs.append(KeyValue(key =
'Disk %d Device ID' % index, value = makes[index]))
222 diag_strs.append(KeyValue(key =
'Disk %d Temp' % index, value = temp))
225 diag_level = DiagnosticStatus.ERROR
229 self._temp_stat.values = diag_strs
230 self._temp_stat.level = diag_level
233 self._temp_stat.message = temp_dict[diag_level]
235 self._temp_stat.message =
'Error' 238 self._temp_stat.level = DiagnosticStatus.OK
240 if not rospy.is_shutdown():
242 self._temp_timer.start()
247 if rospy.is_shutdown():
252 diag_vals = [ KeyValue(key =
'Update Status', value =
'OK' ),
253 KeyValue(key =
'Time Since Last Update', value =
'0' ) ]
254 diag_level = DiagnosticStatus.OK
258 p = subprocess.Popen([
"df",
"-P",
"--block-size=1G", self.
_home_dir],
259 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
260 stdout, stderr = p.communicate()
261 retcode = p.returncode
265 diag_vals.append(KeyValue(key =
'Disk Space Reading', value =
'OK'))
267 for row
in stdout.split(
'\n'):
268 if len(row.split()) < 2:
270 if not unicode(row.split()[1]).isnumeric()
or float(row.split()[1]) < 10:
274 g_available = row.split()[-3]
275 name = row.split()[0]
276 size = row.split()[1]
277 mount_pt = row.split()[-1]
279 if (float(g_available) > low_hd_level):
280 level = DiagnosticStatus.OK
281 elif (float(g_available) > critical_hd_level):
282 level = DiagnosticStatus.WARN
284 level = DiagnosticStatus.ERROR
286 diag_vals.append(KeyValue(
287 key =
'Disk %d Name' % row_count, value = name))
288 diag_vals.append(KeyValue(
289 key =
'Disk %d Available' % row_count, value = g_available))
290 diag_vals.append(KeyValue(
291 key =
'Disk %d Size' % row_count, value = size))
292 diag_vals.append(KeyValue(
293 key =
'Disk %d Status' % row_count, value = stat_dict[level]))
294 diag_vals.append(KeyValue(
295 key =
'Disk %d Mount Point' % row_count, value = mount_pt))
297 diag_level = max(diag_level, level)
298 diag_message = usage_dict[diag_level]
301 diag_vals.append(KeyValue(key =
'Disk Space Reading', value =
'Failed'))
302 diag_level = DiagnosticStatus.ERROR
303 diag_message = stat_dict[diag_level]
307 rospy.logerr(traceback.format_exc())
309 diag_vals.append(KeyValue(key =
'Disk Space Reading', value =
'Exception'))
310 diag_vals.append(KeyValue(key =
'Disk Space Ex', value = traceback.format_exc()))
312 diag_level = DiagnosticStatus.ERROR
313 diag_message = stat_dict[diag_level]
318 self._usage_stat.values = diag_vals
319 self._usage_stat.message = diag_message
320 self._usage_stat.level = diag_level
322 if not rospy.is_shutdown():
324 self._usage_timer.start()
333 msg = DiagnosticArray()
334 msg.header.stamp = rospy.get_rostime()
341 self._diag_pub.publish(msg)
349 if __name__ ==
'__main__':
350 hostname = socket.gethostname()
353 parser = optparse.OptionParser(usage=
"usage: hd_monitor.py [--diag-hostname=cX]")
354 parser.add_option(
"--diag-hostname", dest=
"diag_hostname",
355 help=
"Computer name in diagnostics output (ex: 'c1')",
356 metavar=
"DIAG_HOSTNAME",
357 action=
"store", default = hostname)
358 options, args = parser.parse_args(rospy.myargv())
365 rospy.init_node(
'hd_monitor_%s' % hostname)
366 except rospy.exceptions.ROSInitException:
367 print(
'HD monitor is unable to initialize node. Master may not be running.')
370 hd_monitor =
hd_monitor(hostname, options.diag_hostname, home_dir)
371 rate = rospy.Rate(1.0)
374 while not rospy.is_shutdown():
376 hd_monitor.publish_stats()
377 except KeyboardInterrupt:
379 except Exception
as e:
380 traceback.print_exc()
382 hd_monitor.cancel_timers()
def get_hddtemp_data(hostname='localhost', port=7634)
Connects to hddtemp daemon to get temp, HD make.
def cancel_timers(self)
Must have the lock to cancel everything.
def __init__(self, hostname, diag_hostname, home_dir='')
def update_status_stale(stat, last_update_time)
def check_disk_usage(self)