38 from __future__
import with_statement
40 roslib.load_manifest(
'diagnostic_common_diagnostics')
46 from threading
import Timer
47 import sys, os, time, string
48 from time
import sleep
52 from diagnostic_msgs.msg
import DiagnosticArray, DiagnosticStatus, KeyValue
60 stat_dict = { 0:
'OK', 1:
'Warning', 2:
'Error' }
61 temp_dict = { 0:
'OK', 1:
'Hot', 2:
'Critical Hot' }
62 usage_dict = { 0:
'OK', 1:
'Low Disk Space', 2:
'Very Low Disk Space' }
64 REMOVABLE = [
'/dev/sg1',
'/dev/sdb']
69 hd_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
70 hd_sock.connect((hostname, port))
73 newdat = hd_sock.recv(1024)
76 sock_data = sock_data + newdat
79 sock_vals = sock_data.split(
'|')
87 while idx + 5 < len(sock_vals):
88 this_drive = sock_vals[idx + 1]
89 this_make = sock_vals[idx + 2]
90 this_temp = sock_vals[idx + 3]
94 if this_make
in makes:
98 drives.append(this_drive)
99 makes.append(this_make)
100 temps.append(this_temp)
104 return True, drives, makes, temps
106 rospy.logerr(traceback.format_exc())
107 return False, [
'Exception' ], [ traceback.format_exc() ], [
'0' ]
110 time_since_update = rospy.get_time() - last_update_time
113 if time_since_update > 20
and time_since_update <= 35:
114 stale_status =
'Lagging' 115 if stat.level == DiagnosticStatus.OK:
116 stat.message = stale_status
117 elif stat.message.find(stale_status) < 0:
118 stat.message =
', '.join([stat.message, stale_status])
119 stat.level = max(stat.level, DiagnosticStatus.WARN)
120 if time_since_update > 35:
121 stale_status =
'Stale' 122 if stat.level == DiagnosticStatus.OK:
123 stat.message = stale_status
124 elif stat.message.find(stale_status) < 0:
125 stat.message =
', '.join([stat.message, stale_status])
126 stat.level = max(stat.level, DiagnosticStatus.ERROR)
130 stat.values.insert(0, KeyValue(key =
'Update Status', value = stale_status))
131 stat.values.insert(1, KeyValue(key =
'Time Since Update', value = str(time_since_update)))
134 def __init__(self, hostname, diag_hostname, home_dir = ''):
140 rospy.logwarn(
'Not warning for HD temperatures is deprecated. This will be removed in D-turtle')
143 self.
_diag_pub = rospy.Publisher(
'/diagnostics', DiagnosticArray, queue_size=10)
153 self._temp_stat.name =
"%s HD Temperature" % diag_hostname
154 self._temp_stat.level = DiagnosticStatus.ERROR
155 self._temp_stat.hardware_id = hostname
156 self._temp_stat.message =
'No Data' 157 self._temp_stat.values = [ KeyValue(key =
'Update Status', value =
'No Data'),
158 KeyValue(key =
'Time Since Last Update', value =
'N/A') ]
162 self._usage_stat.level = DiagnosticStatus.ERROR
163 self._usage_stat.hardware_id = hostname
164 self._usage_stat.name =
'%s HD Usage' % diag_hostname
165 self._usage_stat.values = [ KeyValue(key =
'Update Status', value =
'No Data' ),
166 KeyValue(key =
'Time Since Last Update', value =
'N/A') ]
174 self._temp_timer.cancel()
178 self._usage_timer.cancel()
182 if rospy.is_shutdown():
187 diag_strs = [ KeyValue(key =
'Update Status', value =
'OK' ) ,
188 KeyValue(key =
'Time Since Last Update', value =
'0' ) ]
189 diag_level = DiagnosticStatus.OK
194 for index
in range(0, len(drives)):
197 if not unicode(temp).isnumeric()
and drives[index]
not in REMOVABLE:
198 temp_level = DiagnosticStatus.ERROR
200 elif not unicode(temp).isnumeric()
and drives[index]
in REMOVABLE:
201 temp_level = DiagnosticStatus.OK
204 temp_level = DiagnosticStatus.OK
205 if float(temp) > hd_temp_warn:
206 temp_level = DiagnosticStatus.WARN
207 if float(temp) > hd_temp_error:
208 temp_level = DiagnosticStatus.ERROR
210 diag_level = max(diag_level, temp_level)
212 diag_strs.append(KeyValue(key =
'Disk %d Temp Status' % index, value = temp_dict[temp_level]))
213 diag_strs.append(KeyValue(key =
'Disk %d Mount Pt.' % index, value = drives[index]))
214 diag_strs.append(KeyValue(key =
'Disk %d Device ID' % index, value = makes[index]))
215 diag_strs.append(KeyValue(key =
'Disk %d Temp' % index, value = temp))
218 diag_level = DiagnosticStatus.ERROR
222 self._temp_stat.values = diag_strs
223 self._temp_stat.level = diag_level
226 self._temp_stat.message = temp_dict[diag_level]
228 self._temp_stat.message =
'Error' 231 self._temp_stat.level = DiagnosticStatus.OK
233 if not rospy.is_shutdown():
235 self._temp_timer.start()
240 if rospy.is_shutdown():
245 diag_vals = [ KeyValue(key =
'Update Status', value =
'OK' ),
246 KeyValue(key =
'Time Since Last Update', value =
'0' ) ]
247 diag_level = DiagnosticStatus.OK
251 p = subprocess.Popen([
"df",
"-P",
"--block-size=1G", self.
_home_dir],
252 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
253 stdout, stderr = p.communicate()
254 retcode = p.returncode
258 diag_vals.append(KeyValue(key =
'Disk Space Reading', value =
'OK'))
260 for row
in stdout.split(
'\n'):
261 if len(row.split()) < 2:
263 if not unicode(row.split()[1]).isnumeric()
or float(row.split()[1]) < 10:
267 g_available = row.split()[-3]
268 name = row.split()[0]
269 size = row.split()[1]
270 mount_pt = row.split()[-1]
272 if (float(g_available) > low_hd_level):
273 level = DiagnosticStatus.OK
274 elif (float(g_available) > critical_hd_level):
275 level = DiagnosticStatus.WARN
277 level = DiagnosticStatus.ERROR
279 diag_vals.append(KeyValue(
280 key =
'Disk %d Name' % row_count, value = name))
281 diag_vals.append(KeyValue(
282 key =
'Disk %d Available' % row_count, value = g_available))
283 diag_vals.append(KeyValue(
284 key =
'Disk %d Size' % row_count, value = size))
285 diag_vals.append(KeyValue(
286 key =
'Disk %d Status' % row_count, value = stat_dict[level]))
287 diag_vals.append(KeyValue(
288 key =
'Disk %d Mount Point' % row_count, value = mount_pt))
290 diag_level = max(diag_level, level)
291 diag_message = usage_dict[diag_level]
294 diag_vals.append(KeyValue(key =
'Disk Space Reading', value =
'Failed'))
295 diag_level = DiagnosticStatus.ERROR
296 diag_message = stat_dict[diag_level]
300 rospy.logerr(traceback.format_exc())
302 diag_vals.append(KeyValue(key =
'Disk Space Reading', value =
'Exception'))
303 diag_vals.append(KeyValue(key =
'Disk Space Ex', value = traceback.format_exc()))
305 diag_level = DiagnosticStatus.ERROR
306 diag_message = stat_dict[diag_level]
311 self._usage_stat.values = diag_vals
312 self._usage_stat.message = diag_message
313 self._usage_stat.level = diag_level
315 if not rospy.is_shutdown():
317 self._usage_timer.start()
326 msg = DiagnosticArray()
327 msg.header.stamp = rospy.get_rostime()
338 self._diag_pub.publish(msg)
346 if __name__ ==
'__main__':
347 hostname = socket.gethostname()
350 parser = optparse.OptionParser(usage=
"usage: hd_monitor.py [--diag-hostname=cX]")
351 parser.add_option(
"--diag-hostname", dest=
"diag_hostname",
352 help=
"Computer name in diagnostics output (ex: 'c1')",
353 metavar=
"DIAG_HOSTNAME",
354 action=
"store", default = hostname)
355 options, args = parser.parse_args(rospy.myargv())
361 hostname_clean = string.translate(hostname, string.maketrans(
'-',
'_'))
363 rospy.init_node(
'hd_monitor_%s' % hostname_clean)
364 except rospy.exceptions.ROSInitException:
365 print 'HD monitor is unable to initialize node. Master may not be running.' 368 hd_monitor =
hd_monitor(hostname, options.diag_hostname, home_dir)
369 rate = rospy.Rate(1.0)
372 while not rospy.is_shutdown():
374 hd_monitor.publish_stats()
375 except KeyboardInterrupt:
378 traceback.print_exc()
380 hd_monitor.cancel_timers()
def check_disk_usage(self)
def update_status_stale(stat, last_update_time)
def cancel_timers(self)
Must have the lock to cancel everything.
def __init__(self, hostname, diag_hostname, home_dir='')
def get_hddtemp_data(hostname='localhost', port=7634)
Connects to hddtemp daemon to get temp, HD make.