hd_monitor.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 #
3 # Copyright 2017 Fraunhofer Institute for Manufacturing Engineering and Automation (IPA)
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 
17 
18 
19 
20 import sys, os, time
21 import subprocess
22 import socket
23 
24 import rospy
25 from diagnostic_msgs.msg import DiagnosticArray, DiagnosticStatus, KeyValue
26 
27 stat_dict = { DiagnosticStatus.OK: 'OK', DiagnosticStatus.WARN: 'Warning', DiagnosticStatus.ERROR: 'Error' }
28 usage_dict = { DiagnosticStatus.OK: 'OK', DiagnosticStatus.WARN: 'Low Disk Space', DiagnosticStatus.ERROR: 'Very Low Disk Space' }
29 
30 class hd_monitor():
31  def __init__(self, hostname, diag_hostname, home_dir = ''):
32  self._hostname = hostname
33  self._home_dir = home_dir
34 
35  self.unit = 'G'
36  self.low_hd_level = rospy.get_param('~low_hd_level', 5) #self.unit
37  self.critical_hd_level = rospy.get_param('~critical_hd_level', 1) #self.unit
38 
39  self._usage_stat = DiagnosticStatus()
40  self._usage_stat.level = DiagnosticStatus.WARN
41  self._usage_stat.hardware_id = hostname
42  self._usage_stat.name = '%s HD Usage' % diag_hostname
43  self._usage_stat.message = 'No Data'
44  self._usage_stat.values = []
45 
46  self._io_stat = DiagnosticStatus()
47  self._io_stat.name = '%s HD IO' % diag_hostname
48  self._io_stat.level = DiagnosticStatus.WARN
49  self._io_stat.hardware_id = hostname
50  self._io_stat.message = 'No Data'
51  self._io_stat.values = []
52 
53  self._diag_pub = rospy.Publisher('/diagnostics', DiagnosticArray, queue_size=1)
54  self._publish_timer = rospy.Timer(rospy.Duration(1.0), self.publish_stats)
55  self._usage_timer = rospy.Timer(rospy.Duration(5.0), self.check_disk_usage)
56  self._io_timer = rospy.Timer(rospy.Duration(5.0), self.check_io_stat)
57 
58  def check_io_stat(self, event):
59  diag_vals = []
60  diag_msg = 'OK'
61  diag_level = DiagnosticStatus.OK
62 
63  try:
64  p = subprocess.Popen('iostat -d',
65  stdout = subprocess.PIPE,
66  stderr = subprocess.PIPE, shell = True)
67  stdout, stderr = p.communicate()
68  retcode = p.returncode
69  try:
70  stdout = stdout.decode() #python3
71  except (UnicodeDecodeError, AttributeError):
72  pass
73 
74  if retcode != 0:
75  diag_level = DiagnosticStatus.ERROR
76  diag_msg = 'HD IO Error'
77  diag_vals = [ KeyValue(key = 'HD IO Error', value = stderr),
78  KeyValue(key = 'Output', value = stdout) ]
79  return (diag_vals, diag_msg, diag_level)
80 
81  for index, row in enumerate(stdout.split('\n')):
82  if index < 3:
83  continue
84 
85  lst = row.split()
86  #Device: tps kB_read/s kB_wrtn/s kB_read kB_wrtn
87  device = lst[0]
88  tps = lst[1]
89  kB_read_s = lst[2]
90  kB_wrtn_s = lst[3]
91  kB_read = lst[4]
92  kB_wrtn = lst[5]
93 
94  diag_vals.append(KeyValue(
95  key = '%s tps' % device, value=tps))
96  diag_vals.append(KeyValue(
97  key = '%s kB_read/s' % device, value=kB_read_s))
98  diag_vals.append(KeyValue(
99  key = '%s kB_wrtn/s' % device, value=kB_wrtn_s))
100  diag_vals.append(KeyValue(
101  key = '%s kB_read' % device, value=kB_read))
102  diag_vals.append(KeyValue(
103  key = '%s kB_wrtn' % device, value=kB_wrtn))
104 
105  except Exception as e:
106  diag_level = DiagnosticStatus.ERROR
107  diag_msg = 'HD IO Exception'
108  diag_vals = [ KeyValue(key = 'Exception', value = str(e)) ]
109 
110  self._io_stat.values = diag_vals
111  self._io_stat.message = diag_msg
112  self._io_stat.level = diag_level
113 
114  def check_disk_usage(self, event):
115  diag_vals = []
116  diag_message = ''
117  diag_level = DiagnosticStatus.OK
118  try:
119  p = subprocess.Popen(["df", "--print-type", "--portability", "--block-size=1"+self.unit, self._home_dir],
120  stdout=subprocess.PIPE, stderr=subprocess.PIPE)
121  stdout, stderr = p.communicate()
122  retcode = p.returncode
123  try:
124  stdout = stdout.decode() #python3
125  except (UnicodeDecodeError, AttributeError):
126  pass
127 
128  if retcode != 0:
129  diag_level = DiagnosticStatus.ERROR
130  diag_message = 'HD Usage Error'
131  diag_vals = [ KeyValue(key = 'HD Usage Error', value = stderr),
132  KeyValue(key = 'Output', value = stdout) ]
133 
134  else:
135  diag_vals.append(KeyValue(key = 'Disk Space Reading', value = 'OK'))
136  row_count = 0
137  for row in stdout.split('\n'):
138  try:
139  if len(row.split()) < 2:
140  continue
141  if float(row.split()[2]) < 10: # Ignore small drives
142  continue
143  except ValueError:
144  continue
145 
146  row_count += 1
147  #Filesystem Type 1073741824-blocks Used Available Capacity Mounted on
148  name = row.split()[0]
149  #hd_type = row.split()[1]
150  size = row.split()[2]
151  used = row.split()[3]
152  available = row.split()[4]
153  capacity = row.split()[5]
154  mount_pt = row.split()[6]
155 
156  if (float(available) > self.low_hd_level):
157  level = DiagnosticStatus.OK
158  elif (float(available) > self.critical_hd_level):
159  level = DiagnosticStatus.WARN
160  else:
161  level = DiagnosticStatus.ERROR
162 
163  diag_vals.append(KeyValue(
164  key = 'Disk %d Name' % row_count, value = name))
165  diag_vals.append(KeyValue(
166  key = 'Disk %d Size' % row_count, value = size + ' ' +self.unit))
167  diag_vals.append(KeyValue(
168  key = 'Disk %d Used' % row_count, value = used + ' ' +self.unit))
169  diag_vals.append(KeyValue(
170  key = 'Disk %d Available' % row_count, value = available + ' ' +self.unit))
171  diag_vals.append(KeyValue(
172  key = 'Disk %d Capacity' % row_count, value = capacity))
173  diag_vals.append(KeyValue(
174  key = 'Disk %d Status' % row_count, value = stat_dict[level]))
175  diag_vals.append(KeyValue(
176  key = 'Disk %d Mount Point' % row_count, value = mount_pt))
177 
178  diag_level = max(diag_level, level)
179  diag_message = usage_dict[diag_level]
180 
181  except Exception as e:
182  diag_level = DiagnosticStatus.ERROR
183  diag_message = 'HD Usage Exception'
184  diag_vals = [ KeyValue(key = 'Exception', value = str(e)) ]
185 
186  self._usage_stat.values = diag_vals
187  self._usage_stat.message = diag_message
188  self._usage_stat.level = diag_level
189 
190  def publish_stats(self, event):
191  msg = DiagnosticArray()
192  msg.header.stamp = rospy.get_rostime()
193  msg.status.append(self._usage_stat)
194  self._diag_pub.publish(msg)
195 
196 
197 ##\todo Need to check HD input/output too using iostat
198 
199 if __name__ == '__main__':
200  hostname = socket.gethostname()
201 
202  import optparse
203  parser = optparse.OptionParser(usage="usage: hd_monitor.py --diag-hostname=X --directory=/name_of_dir")
204  parser.add_option("--diag-hostname",
205  dest="diag_hostname",
206  help="Computer name in diagnostics output (ex: 'b1' for the base PC, 'h32' for the head PC and so on)",
207  metavar="DIAG_HOSTNAME",
208  action="store",
209  default=False)
210  parser.add_option("--directory",
211  dest="directory",
212  help="Enter the directory name (ex: /directory/sub_directory)",
213  metavar="DIR_NAME",
214  default="/") ## taking the root directory as the default directory for checking HDD usage
215  options, args = parser.parse_args(rospy.myargv())
216  if len(sys.argv[1:]) == 0:
217  parser.error("argument not found.")
218 
219  try:
220  ## the hostname consists of hiphens,
221  ## replacing hiphens "-" with underscore "_", in order to have legal node name
222  node_name = ("hd_monitor_"+hostname).replace ("-", "_")
223  rospy.init_node(node_name)
224  except rospy.exceptions.ROSInitException:
225  print('HD monitor is unable to initialize node. Master may not be running.')
226  sys.exit(0)
227 
228  hd_monitor = hd_monitor(hostname, options.diag_hostname, options.directory)
229  rospy.spin()
def check_io_stat(self, event)
Definition: hd_monitor.py:58
def publish_stats(self, event)
Definition: hd_monitor.py:190
def __init__(self, hostname, diag_hostname, home_dir='')
Definition: hd_monitor.py:31
def check_disk_usage(self, event)
Definition: hd_monitor.py:114


cob_monitoring
Author(s): Florian Weisshardt , Felix Messmer
autogenerated on Wed Apr 7 2021 03:03:11