hd_monitor.py
Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 #
00003 # Copyright 2017 Fraunhofer Institute for Manufacturing Engineering and Automation (IPA)
00004 #
00005 # Licensed under the Apache License, Version 2.0 (the "License");
00006 # you may not use this file except in compliance with the License.
00007 # You may obtain a copy of the License at
00008 #
00009 #   http://www.apache.org/licenses/LICENSE-2.0
00010 #
00011 # Unless required by applicable law or agreed to in writing, software
00012 # distributed under the License is distributed on an "AS IS" BASIS,
00013 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00014 # See the License for the specific language governing permissions and
00015 # limitations under the License.
00016 
00017 
00018 from __future__ import with_statement, print_function
00019 
00020 import sys, os, time
00021 import subprocess
00022 import socket
00023 
00024 import rospy
00025 from diagnostic_msgs.msg import DiagnosticArray, DiagnosticStatus, KeyValue
00026 
00027 stat_dict = { DiagnosticStatus.OK: 'OK', DiagnosticStatus.WARN: 'Warning', DiagnosticStatus.ERROR: 'Error' }
00028 usage_dict = { DiagnosticStatus.OK: 'OK', DiagnosticStatus.WARN: 'Low Disk Space', DiagnosticStatus.ERROR: 'Very Low Disk Space' }
00029 
00030 class hd_monitor():
00031     def __init__(self, hostname, diag_hostname, home_dir = ''):
00032         self._hostname = hostname
00033         self._home_dir = home_dir
00034 
00035         self.unit = 'G'
00036         self.low_hd_level = rospy.get_param('~low_hd_level', 5) #self.unit
00037         self.critical_hd_level = rospy.get_param('~critical_hd_level', 1) #self.unit
00038 
00039         self._usage_stat = DiagnosticStatus()
00040         self._usage_stat.level = DiagnosticStatus.WARN
00041         self._usage_stat.hardware_id = hostname
00042         self._usage_stat.name = '%s HD Usage' % diag_hostname
00043         self._usage_stat.message = 'No Data'
00044         self._usage_stat.values = []
00045 
00046         self._io_stat = DiagnosticStatus()
00047         self._io_stat.name = '%s HD IO' % diag_hostname
00048         self._io_stat.level = DiagnosticStatus.WARN
00049         self._io_stat.hardware_id = hostname
00050         self._io_stat.message = 'No Data'
00051         self._io_stat.values = []
00052 
00053         self._diag_pub = rospy.Publisher('/diagnostics', DiagnosticArray, queue_size=1)
00054         self._publish_timer = rospy.Timer(rospy.Duration(1.0), self.publish_stats)
00055         self._usage_timer = rospy.Timer(rospy.Duration(5.0), self.check_disk_usage)
00056         self._io_timer = rospy.Timer(rospy.Duration(5.0), self.check_io_stat)
00057 
00058     def check_io_stat(self, event):
00059         diag_vals = []
00060         diag_msg = 'OK'
00061         diag_level = DiagnosticStatus.OK
00062 
00063         try:
00064             p = subprocess.Popen('iostat -d',
00065                                  stdout = subprocess.PIPE,
00066                                  stderr = subprocess.PIPE, shell = True)
00067             stdout, stderr = p.communicate()
00068             retcode = p.returncode
00069 
00070             if retcode != 0:
00071                 diag_level = DiagnosticStatus.ERROR
00072                 diag_msg = 'HD IO Error'
00073                 diag_vals = [ KeyValue(key = 'HD IO Error', value = stderr),
00074                               KeyValue(key = 'Output', value = stdout) ]
00075                 return (diag_vals, diag_msg, diag_level)
00076 
00077             for index, row in enumerate(stdout.split('\n')):
00078                 if index < 3:
00079                     continue
00080 
00081                 lst = row.split()
00082                 #Device:            tps    kB_read/s    kB_wrtn/s    kB_read    kB_wrtn
00083                 device = lst[0]
00084                 tps = lst[1]
00085                 kB_read_s = lst[2]
00086                 kB_wrtn_s = lst[3]
00087                 kB_read = lst[4]
00088                 kB_wrtn = lst[5]
00089 
00090                 diag_vals.append(KeyValue(
00091                         key = '%s tps' % device, value=tps))
00092                 diag_vals.append(KeyValue(
00093                         key = '%s kB_read/s' % device, value=kB_read_s))
00094                 diag_vals.append(KeyValue(
00095                         key = '%s kB_wrtn/s' % device, value=kB_wrtn_s))
00096                 diag_vals.append(KeyValue(
00097                         key = '%s kB_read' % device, value=kB_read))
00098                 diag_vals.append(KeyValue(
00099                         key = '%s kB_wrtn' % device, value=kB_wrtn))
00100 
00101         except Exception, e:
00102             diag_level = DiagnosticStatus.ERROR
00103             diag_msg = 'HD IO Exception'
00104             diag_vals = [ KeyValue(key = 'Exception', value = str(e)) ]
00105 
00106         self._io_stat.values = diag_vals
00107         self._io_stat.message = diag_msg
00108         self._io_stat.level = diag_level
00109 
00110     def check_disk_usage(self, event):
00111         diag_vals = []
00112         diag_message = ''
00113         diag_level = DiagnosticStatus.OK
00114         try:
00115             p = subprocess.Popen(["df", "--print-type", "--portability", "--block-size=1"+self.unit, self._home_dir],
00116                                  stdout=subprocess.PIPE, stderr=subprocess.PIPE)
00117             stdout, stderr = p.communicate()
00118             retcode = p.returncode
00119 
00120             if retcode != 0:
00121                 diag_level = DiagnosticStatus.ERROR
00122                 diag_message = 'HD Usage Error'
00123                 diag_vals = [ KeyValue(key = 'HD Usage Error', value = stderr),
00124                               KeyValue(key = 'Output', value = stdout) ]
00125 
00126             else:
00127                 diag_vals.append(KeyValue(key = 'Disk Space Reading', value = 'OK'))
00128                 row_count = 0
00129                 for row in stdout.split('\n'):
00130                     if len(row.split()) < 2:
00131                         continue
00132                     if not unicode(row.split()[2]).isnumeric() or float(row.split()[2]) < 10: # Ignore small drives
00133                         continue
00134 
00135                     row_count += 1
00136                     #Filesystem     Type 1073741824-blocks  Used Available Capacity Mounted on
00137                     name = row.split()[0]
00138                     hd_type = row.split()[1]
00139                     size = row.split()[2]
00140                     used = row.split()[3]
00141                     available = row.split()[4]
00142                     capacity = row.split()[5]
00143                     mount_pt = row.split()[6]
00144 
00145                     if (float(available) > self.low_hd_level):
00146                         level = DiagnosticStatus.OK
00147                     elif (float(available) > self.critical_hd_level):
00148                         level = DiagnosticStatus.WARN
00149                     else:
00150                         level = DiagnosticStatus.ERROR
00151 
00152                     diag_vals.append(KeyValue(
00153                             key = 'Disk %d Name' % row_count, value = name))
00154                     diag_vals.append(KeyValue(
00155                             key = 'Disk %d Size' % row_count, value = size + ' ' +self.unit))
00156                     diag_vals.append(KeyValue(
00157                             key = 'Disk %d Used' % row_count, value = used + ' ' +self.unit))
00158                     diag_vals.append(KeyValue(
00159                             key = 'Disk %d Available' % row_count, value = available + ' ' +self.unit))
00160                     diag_vals.append(KeyValue(
00161                             key = 'Disk %d Capacity' % row_count, value = capacity))
00162                     diag_vals.append(KeyValue(
00163                             key = 'Disk %d Status' % row_count, value = stat_dict[level]))
00164                     diag_vals.append(KeyValue(
00165                             key = 'Disk %d Mount Point' % row_count, value = mount_pt))
00166 
00167                     diag_level = max(diag_level, level)
00168                     diag_message = usage_dict[diag_level]
00169 
00170         except Exception, e:
00171             diag_level = DiagnosticStatus.ERROR
00172             diag_message = 'HD Usage Exception'
00173             diag_vals = [ KeyValue(key = 'Exception', value = str(e)) ]
00174 
00175         self._usage_stat.values = diag_vals
00176         self._usage_stat.message = diag_message
00177         self._usage_stat.level = diag_level
00178 
00179     def publish_stats(self, event):
00180         msg = DiagnosticArray()
00181         msg.header.stamp = rospy.get_rostime()
00182         msg.status.append(self._usage_stat)
00183         self._diag_pub.publish(msg)
00184 
00185 
00186 ##\todo Need to check HD input/output too using iostat
00187 
00188 if __name__ == '__main__':
00189     hostname = socket.gethostname()
00190 
00191     import optparse
00192     parser = optparse.OptionParser(usage="usage: hd_monitor.py --diag-hostname=X --directory=/name_of_dir")
00193     parser.add_option("--diag-hostname", 
00194                       dest="diag_hostname",
00195                       help="Computer name in diagnostics output (ex: 'b1' for the base PC, 'h32' for the head PC and so on)",
00196                       metavar="DIAG_HOSTNAME",
00197                       action="store", 
00198                       default=False)
00199     parser.add_option("--directory", 
00200                       dest="directory",
00201                       help="Enter the directory name (ex: /directory/sub_directory)",
00202                       metavar="DIR_NAME",
00203                       default="/") ## taking the root directory as the default directory for checking HDD usage
00204     options, args = parser.parse_args(rospy.myargv())
00205     if len(sys.argv[1:]) == 0:
00206         parser.error("argument not found.")
00207 
00208     try:
00209         ## the hostname consists of hiphens, 
00210         ## replacing hiphens "-" with underscore "_", in order to have legal node name
00211         node_name = ("hd_monitor_"+hostname).replace ("-", "_")
00212         rospy.init_node(node_name)
00213     except rospy.exceptions.ROSInitException:
00214         print('HD monitor is unable to initialize node. Master may not be running.', file=sys.stderr)
00215         sys.exit(0)
00216 
00217     hd_monitor = hd_monitor(hostname, options.diag_hostname, options.directory)
00218     rospy.spin()


cob_monitoring
Author(s): Florian Weisshardt , Felix Messmer
autogenerated on Sun Jun 9 2019 20:20:19