00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 from __future__ import with_statement, print_function
00019
00020 import sys, os, time
00021 import subprocess
00022 import socket
00023
00024 import rospy
00025 from diagnostic_msgs.msg import DiagnosticArray, DiagnosticStatus, KeyValue
00026
00027 stat_dict = { DiagnosticStatus.OK: 'OK', DiagnosticStatus.WARN: 'Warning', DiagnosticStatus.ERROR: 'Error' }
00028 usage_dict = { DiagnosticStatus.OK: 'OK', DiagnosticStatus.WARN: 'Low Disk Space', DiagnosticStatus.ERROR: 'Very Low Disk Space' }
00029
00030 class hd_monitor():
00031 def __init__(self, hostname, diag_hostname, home_dir = ''):
00032 self._hostname = hostname
00033 self._home_dir = home_dir
00034
00035 self.unit = 'G'
00036 self.low_hd_level = rospy.get_param('~low_hd_level', 5)
00037 self.critical_hd_level = rospy.get_param('~critical_hd_level', 1)
00038
00039 self._usage_stat = DiagnosticStatus()
00040 self._usage_stat.level = DiagnosticStatus.WARN
00041 self._usage_stat.hardware_id = hostname
00042 self._usage_stat.name = '%s HD Usage' % diag_hostname
00043 self._usage_stat.message = 'No Data'
00044 self._usage_stat.values = []
00045
00046 self._io_stat = DiagnosticStatus()
00047 self._io_stat.name = '%s HD IO' % diag_hostname
00048 self._io_stat.level = DiagnosticStatus.WARN
00049 self._io_stat.hardware_id = hostname
00050 self._io_stat.message = 'No Data'
00051 self._io_stat.values = []
00052
00053 self._diag_pub = rospy.Publisher('/diagnostics', DiagnosticArray, queue_size=1)
00054 self._publish_timer = rospy.Timer(rospy.Duration(1.0), self.publish_stats)
00055 self._usage_timer = rospy.Timer(rospy.Duration(5.0), self.check_disk_usage)
00056 self._io_timer = rospy.Timer(rospy.Duration(5.0), self.check_io_stat)
00057
00058 def check_io_stat(self, event):
00059 diag_vals = []
00060 diag_msg = 'OK'
00061 diag_level = DiagnosticStatus.OK
00062
00063 try:
00064 p = subprocess.Popen('iostat -d',
00065 stdout = subprocess.PIPE,
00066 stderr = subprocess.PIPE, shell = True)
00067 stdout, stderr = p.communicate()
00068 retcode = p.returncode
00069
00070 if retcode != 0:
00071 diag_level = DiagnosticStatus.ERROR
00072 diag_msg = 'HD IO Error'
00073 diag_vals = [ KeyValue(key = 'HD IO Error', value = stderr),
00074 KeyValue(key = 'Output', value = stdout) ]
00075 return (diag_vals, diag_msg, diag_level)
00076
00077 for index, row in enumerate(stdout.split('\n')):
00078 if index < 3:
00079 continue
00080
00081 lst = row.split()
00082
00083 device = lst[0]
00084 tps = lst[1]
00085 kB_read_s = lst[2]
00086 kB_wrtn_s = lst[3]
00087 kB_read = lst[4]
00088 kB_wrtn = lst[5]
00089
00090 diag_vals.append(KeyValue(
00091 key = '%s tps' % device, value=tps))
00092 diag_vals.append(KeyValue(
00093 key = '%s kB_read/s' % device, value=kB_read_s))
00094 diag_vals.append(KeyValue(
00095 key = '%s kB_wrtn/s' % device, value=kB_wrtn_s))
00096 diag_vals.append(KeyValue(
00097 key = '%s kB_read' % device, value=kB_read))
00098 diag_vals.append(KeyValue(
00099 key = '%s kB_wrtn' % device, value=kB_wrtn))
00100
00101 except Exception, e:
00102 diag_level = DiagnosticStatus.ERROR
00103 diag_msg = 'HD IO Exception'
00104 diag_vals = [ KeyValue(key = 'Exception', value = str(e)) ]
00105
00106 self._io_stat.values = diag_vals
00107 self._io_stat.message = diag_msg
00108 self._io_stat.level = diag_level
00109
00110 def check_disk_usage(self, event):
00111 diag_vals = []
00112 diag_message = ''
00113 diag_level = DiagnosticStatus.OK
00114 try:
00115 p = subprocess.Popen(["df", "--print-type", "--portability", "--block-size=1"+self.unit, self._home_dir],
00116 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
00117 stdout, stderr = p.communicate()
00118 retcode = p.returncode
00119
00120 if retcode != 0:
00121 diag_level = DiagnosticStatus.ERROR
00122 diag_message = 'HD Usage Error'
00123 diag_vals = [ KeyValue(key = 'HD Usage Error', value = stderr),
00124 KeyValue(key = 'Output', value = stdout) ]
00125
00126 else:
00127 diag_vals.append(KeyValue(key = 'Disk Space Reading', value = 'OK'))
00128 row_count = 0
00129 for row in stdout.split('\n'):
00130 if len(row.split()) < 2:
00131 continue
00132 if not unicode(row.split()[2]).isnumeric() or float(row.split()[2]) < 10:
00133 continue
00134
00135 row_count += 1
00136
00137 name = row.split()[0]
00138 hd_type = row.split()[1]
00139 size = row.split()[2]
00140 used = row.split()[3]
00141 available = row.split()[4]
00142 capacity = row.split()[5]
00143 mount_pt = row.split()[6]
00144
00145 if (float(available) > self.low_hd_level):
00146 level = DiagnosticStatus.OK
00147 elif (float(available) > self.critical_hd_level):
00148 level = DiagnosticStatus.WARN
00149 else:
00150 level = DiagnosticStatus.ERROR
00151
00152 diag_vals.append(KeyValue(
00153 key = 'Disk %d Name' % row_count, value = name))
00154 diag_vals.append(KeyValue(
00155 key = 'Disk %d Size' % row_count, value = size + ' ' +self.unit))
00156 diag_vals.append(KeyValue(
00157 key = 'Disk %d Used' % row_count, value = used + ' ' +self.unit))
00158 diag_vals.append(KeyValue(
00159 key = 'Disk %d Available' % row_count, value = available + ' ' +self.unit))
00160 diag_vals.append(KeyValue(
00161 key = 'Disk %d Capacity' % row_count, value = capacity))
00162 diag_vals.append(KeyValue(
00163 key = 'Disk %d Status' % row_count, value = stat_dict[level]))
00164 diag_vals.append(KeyValue(
00165 key = 'Disk %d Mount Point' % row_count, value = mount_pt))
00166
00167 diag_level = max(diag_level, level)
00168 diag_message = usage_dict[diag_level]
00169
00170 except Exception, e:
00171 diag_level = DiagnosticStatus.ERROR
00172 diag_message = 'HD Usage Exception'
00173 diag_vals = [ KeyValue(key = 'Exception', value = str(e)) ]
00174
00175 self._usage_stat.values = diag_vals
00176 self._usage_stat.message = diag_message
00177 self._usage_stat.level = diag_level
00178
00179 def publish_stats(self, event):
00180 msg = DiagnosticArray()
00181 msg.header.stamp = rospy.get_rostime()
00182 msg.status.append(self._usage_stat)
00183 self._diag_pub.publish(msg)
00184
00185
00186
00187
00188 if __name__ == '__main__':
00189 hostname = socket.gethostname()
00190
00191 import optparse
00192 parser = optparse.OptionParser(usage="usage: hd_monitor.py --diag-hostname=X --directory=/name_of_dir")
00193 parser.add_option("--diag-hostname",
00194 dest="diag_hostname",
00195 help="Computer name in diagnostics output (ex: 'b1' for the base PC, 'h32' for the head PC and so on)",
00196 metavar="DIAG_HOSTNAME",
00197 action="store",
00198 default=False)
00199 parser.add_option("--directory",
00200 dest="directory",
00201 help="Enter the directory name (ex: /directory/sub_directory)",
00202 metavar="DIR_NAME",
00203 default="/")
00204 options, args = parser.parse_args(rospy.myargv())
00205 if len(sys.argv[1:]) == 0:
00206 parser.error("argument not found.")
00207
00208 try:
00209
00210
00211 node_name = ("hd_monitor_"+hostname).replace ("-", "_")
00212 rospy.init_node(node_name)
00213 except rospy.exceptions.ROSInitException:
00214 print('HD monitor is unable to initialize node. Master may not be running.', file=sys.stderr)
00215 sys.exit(0)
00216
00217 hd_monitor = hd_monitor(hostname, options.diag_hostname, options.directory)
00218 rospy.spin()