hd_monitor.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 #
3 # Copyright 2017 Fraunhofer Institute for Manufacturing Engineering and Automation (IPA)
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 
17 
18 
19 
20 import sys, os, time
21 import traceback
22 import subprocess
23 import socket
24 
25 import rospy
26 from diagnostic_msgs.msg import DiagnosticArray, DiagnosticStatus, KeyValue
27 
28 stat_dict = { DiagnosticStatus.OK: 'OK', DiagnosticStatus.WARN: 'Warning', DiagnosticStatus.ERROR: 'Error' }
29 usage_dict = { DiagnosticStatus.OK: 'OK', DiagnosticStatus.WARN: 'Low Disk Space', DiagnosticStatus.ERROR: 'Very Low Disk Space' }
30 
31 class hd_monitor():
32  def __init__(self, hostname, diag_hostname, home_dir = ''):
33  self._hostname = hostname
34  self._home_dir = home_dir
35 
36  self.unit = 'G'
37  self.low_hd_level = rospy.get_param('~low_hd_level', 5) #self.unit
38  self.critical_hd_level = rospy.get_param('~critical_hd_level', 1) #self.unit
39 
40  self._usage_stat = DiagnosticStatus()
41  self._usage_stat.level = DiagnosticStatus.WARN
42  self._usage_stat.hardware_id = hostname
43  self._usage_stat.name = '%s HD Usage' % diag_hostname
44  self._usage_stat.message = 'No Data'
45  self._usage_stat.values = []
46 
47  self._io_stat = DiagnosticStatus()
48  self._io_stat.name = '%s HD IO' % diag_hostname
49  self._io_stat.level = DiagnosticStatus.WARN
50  self._io_stat.hardware_id = hostname
51  self._io_stat.message = 'No Data'
52  self._io_stat.values = []
53 
54  self._diag_pub = rospy.Publisher('/diagnostics', DiagnosticArray, queue_size=1)
55  self._publish_timer = rospy.Timer(rospy.Duration(1.0), self.publish_stats)
56  self._usage_timer = rospy.Timer(rospy.Duration(5.0), self.check_disk_usage)
57  self._io_timer = rospy.Timer(rospy.Duration(5.0), self.check_io_stat)
58 
59  def check_io_stat(self, event):
60  diag_vals = []
61  diag_msg = 'OK'
62  diag_level = DiagnosticStatus.OK
63 
64  try:
65  p = subprocess.Popen('iostat -d',
66  stdout = subprocess.PIPE,
67  stderr = subprocess.PIPE, shell = True)
68  stdout, stderr = p.communicate()
69  retcode = p.returncode
70  try:
71  stdout = stdout.decode() #python3
72  except (UnicodeDecodeError, AttributeError):
73  pass
74 
75  if retcode != 0:
76  diag_level = DiagnosticStatus.ERROR
77  diag_msg = 'HD IO Error'
78  diag_vals = [ KeyValue(key = 'HD IO Error', value = stderr),
79  KeyValue(key = 'Output', value = stdout) ]
80  return (diag_vals, diag_msg, diag_level)
81 
82  for index, row in enumerate(stdout.split('\n')):
83  if index < 3:
84  continue
85 
86  lst = row.split()
87  #Device: tps kB_read/s kB_wrtn/s kB_read kB_wrtn
88  device = lst[0]
89  tps = lst[1]
90  kB_read_s = lst[2]
91  kB_wrtn_s = lst[3]
92  kB_read = lst[4]
93  kB_wrtn = lst[5]
94 
95  diag_vals.append(KeyValue(
96  key = '%s tps' % device, value=tps))
97  diag_vals.append(KeyValue(
98  key = '%s kB_read/s' % device, value=kB_read_s))
99  diag_vals.append(KeyValue(
100  key = '%s kB_wrtn/s' % device, value=kB_wrtn_s))
101  diag_vals.append(KeyValue(
102  key = '%s kB_read' % device, value=kB_read))
103  diag_vals.append(KeyValue(
104  key = '%s kB_wrtn' % device, value=kB_wrtn))
105 
106  except Exception as e:
107  diag_level = DiagnosticStatus.ERROR
108  diag_msg = 'HD IO Exception'
109  diag_vals = [ KeyValue(key = 'Exception', value = str(e)), KeyValue(key = 'Traceback', value = str(traceback.format_exc())) ]
110 
111  self._io_stat.values = diag_vals
112  self._io_stat.message = diag_msg
113  self._io_stat.level = diag_level
114 
115  def check_disk_usage(self, event):
116  diag_vals = []
117  diag_message = ''
118  diag_level = DiagnosticStatus.OK
119  try:
120  p = subprocess.Popen(["df", "--print-type", "--portability", "--block-size=1"+self.unit, self._home_dir],
121  stdout=subprocess.PIPE, stderr=subprocess.PIPE)
122  stdout, stderr = p.communicate()
123  retcode = p.returncode
124  try:
125  stdout = stdout.decode() #python3
126  except (UnicodeDecodeError, AttributeError):
127  pass
128 
129  if retcode != 0:
130  diag_level = DiagnosticStatus.ERROR
131  diag_message = 'HD Usage Error'
132  diag_vals = [ KeyValue(key = 'HD Usage Error', value = stderr),
133  KeyValue(key = 'Output', value = stdout) ]
134 
135  else:
136  diag_vals.append(KeyValue(key = 'Disk Space Reading', value = 'OK'))
137  row_count = 0
138  for row in stdout.split('\n'):
139  try:
140  if len(row.split()) < 2:
141  continue
142  if float(row.split()[2]) < 10: # Ignore small drives
143  continue
144  except ValueError:
145  continue
146 
147  row_count += 1
148  #Filesystem Type 1073741824-blocks Used Available Capacity Mounted on
149  name = row.split()[0]
150  #hd_type = row.split()[1]
151  size = row.split()[2]
152  used = row.split()[3]
153  available = row.split()[4]
154  capacity = row.split()[5]
155  mount_pt = row.split()[6]
156 
157  if (float(available) > self.low_hd_level):
158  level = DiagnosticStatus.OK
159  elif (float(available) > self.critical_hd_level):
160  level = DiagnosticStatus.WARN
161  else:
162  level = DiagnosticStatus.ERROR
163 
164  diag_vals.append(KeyValue(
165  key = 'Disk %d Name' % row_count, value = name))
166  diag_vals.append(KeyValue(
167  key = 'Disk %d Size' % row_count, value = size + ' ' +self.unit))
168  diag_vals.append(KeyValue(
169  key = 'Disk %d Used' % row_count, value = used + ' ' +self.unit))
170  diag_vals.append(KeyValue(
171  key = 'Disk %d Available' % row_count, value = available + ' ' +self.unit))
172  diag_vals.append(KeyValue(
173  key = 'Disk %d Capacity' % row_count, value = capacity))
174  diag_vals.append(KeyValue(
175  key = 'Disk %d Status' % row_count, value = stat_dict[level]))
176  diag_vals.append(KeyValue(
177  key = 'Disk %d Mount Point' % row_count, value = mount_pt))
178 
179  diag_level = max(diag_level, level)
180  diag_message = usage_dict[diag_level]
181 
182  except Exception as e:
183  diag_level = DiagnosticStatus.ERROR
184  diag_message = 'HD Usage Exception'
185  diag_vals = [ KeyValue(key = 'Exception', value = str(e)), KeyValue(key = 'Traceback', value = str(traceback.format_exc())) ]
186 
187  self._usage_stat.values = diag_vals
188  self._usage_stat.message = diag_message
189  self._usage_stat.level = diag_level
190 
191  def publish_stats(self, event):
192  msg = DiagnosticArray()
193  msg.header.stamp = rospy.get_rostime()
194  msg.status.append(self._usage_stat)
195  self._diag_pub.publish(msg)
196 
197 
198 
199 
200 if __name__ == '__main__':
201  hostname = socket.gethostname()
202 
203  import optparse
204  parser = optparse.OptionParser(usage="usage: hd_monitor.py --diag-hostname=X --directory=/name_of_dir")
205  parser.add_option("--diag-hostname",
206  dest="diag_hostname",
207  help="Computer name in diagnostics output (ex: 'b1' for the base PC, 'h32' for the head PC and so on)",
208  metavar="DIAG_HOSTNAME",
209  action="store",
210  default=False)
211  parser.add_option("--directory",
212  dest="directory",
213  help="Enter the directory name (ex: /directory/sub_directory)",
214  metavar="DIR_NAME",
215  default="/")
216  options, args = parser.parse_args(rospy.myargv())
217  if len(sys.argv[1:]) == 0:
218  parser.error("argument not found.")
219 
220  try:
221 
223  node_name = ("hd_monitor_"+hostname).replace ("-", "_")
224  rospy.init_node(node_name)
225  except rospy.exceptions.ROSInitException:
226  print('HD monitor is unable to initialize node. Master may not be running.')
227  sys.exit(0)
228 
229  hd_monitor = hd_monitor(hostname, options.diag_hostname, options.directory)
230  rospy.spin()
hd_monitor.hd_monitor.check_disk_usage
def check_disk_usage(self, event)
Definition: hd_monitor.py:115
hd_monitor.hd_monitor.check_io_stat
def check_io_stat(self, event)
Definition: hd_monitor.py:59
hd_monitor.hd_monitor.unit
unit
Definition: hd_monitor.py:36
hd_monitor.hd_monitor._io_stat
_io_stat
Definition: hd_monitor.py:47
hd_monitor.hd_monitor.critical_hd_level
critical_hd_level
Definition: hd_monitor.py:38
hd_monitor.hd_monitor._hostname
_hostname
Definition: hd_monitor.py:33
hd_monitor.hd_monitor.low_hd_level
low_hd_level
Definition: hd_monitor.py:37
hd_monitor.hd_monitor._io_timer
_io_timer
Definition: hd_monitor.py:57
hd_monitor.hd_monitor._usage_timer
_usage_timer
Definition: hd_monitor.py:56
hd_monitor.hd_monitor._diag_pub
_diag_pub
Definition: hd_monitor.py:54
hd_monitor.hd_monitor._home_dir
_home_dir
Definition: hd_monitor.py:34
hd_monitor.hd_monitor._usage_stat
_usage_stat
Definition: hd_monitor.py:40
hd_monitor.hd_monitor._publish_timer
_publish_timer
Definition: hd_monitor.py:55
hd_monitor.hd_monitor.publish_stats
def publish_stats(self, event)
Definition: hd_monitor.py:191
hd_monitor.hd_monitor.__init__
def __init__(self, hostname, diag_hostname, home_dir='')
Definition: hd_monitor.py:32
hd_monitor.hd_monitor
Definition: hd_monitor.py:31


cob_monitoring
Author(s): Florian Weisshardt , Felix Messmer
autogenerated on Fri Aug 2 2024 09:45:52