nvidia_smi_util.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 #
3 # Software License Agreement (BSD License)
4 #
5 # Copyright (c) 2010, Willow Garage, Inc.
6 # All rights reserved.
7 #
8 # Redistribution and use in source and binary forms, with or without
9 # modification, are permitted provided that the following conditions
10 # are met:
11 #
12 # * Redistributions of source code must retain the above copyright
13 # notice, this list of conditions and the following disclaimer.
14 # * Redistributions in binary form must reproduce the above
15 # copyright notice, this list of conditions and the following
16 # disclaimer in the documentation and/or other materials provided
17 # with the distribution.
18 # * Neither the name of the Willow Garage nor the names of its
19 # contributors may be used to endorse or promote products derived
20 # from this software without specific prior written permission.
21 #
22 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25 # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26 # COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27 # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
28 # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29 # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
30 # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
32 # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 # POSSIBILITY OF SUCH DAMAGE.
34 
35 ##\author Kevin Watts
36 
37 from __future__ import division
38 
39 PKG = 'pr2_computer_monitor'
40 import roslib; roslib.load_manifest(PKG)
41 
42 import rospy
43 
44 from diagnostic_msgs.msg import DiagnosticArray, DiagnosticStatus, KeyValue
45 from pr2_msgs.msg import GPUStatus
46 
47 import subprocess
48 import math
49 
50 MAX_FAN_RPM = 4500
51 
52 def _rads_to_rpm(rads):
53  return rads / (2 * math.pi) * 60
54 
55 def _rpm_to_rads(rpm):
56  return rpm * (2 * math.pi) / 60
57 
58 def gpu_status_to_diag(gpu_stat):
59  stat = DiagnosticStatus()
60  stat.name = 'GPU Status'
61  stat.message = 'OK'
62  stat.level = DiagnosticStatus.OK
63  stat.hardware_id = gpu_stat.pci_device_id
64 
65  stat.values.append(KeyValue(key='Product Name', value = gpu_stat.product_name))
66  stat.values.append(KeyValue(key='PCI Device/Vendor ID', value = gpu_stat.pci_device_id))
67  stat.values.append(KeyValue(key='PCI Location ID', value = gpu_stat.pci_location))
68  stat.values.append(KeyValue(key='Display', value = gpu_stat.display))
69  stat.values.append(KeyValue(key='Driver Version', value = gpu_stat.driver_version))
70  stat.values.append(KeyValue(key='Temperature (C)', value = '%.0f' % gpu_stat.temperature))
71  stat.values.append(KeyValue(key='Fan Speed (RPM)', value = '%.0f' % _rads_to_rpm(gpu_stat.fan_speed)))
72  stat.values.append(KeyValue(key='Usage (%)', value = '%.0f' % gpu_stat.gpu_usage))
73  stat.values.append(KeyValue(key='Memory (%)', value = '%.0f' % gpu_stat.memory_usage))
74 
75  # Check for valid data
76  if not gpu_stat.product_name or not gpu_stat.pci_device_id:
77  stat.level = DiagnosticStatus.ERROR
78  stat.message = 'No Device Data'
79  return stat
80 
81  # Check load
82  if gpu_stat.gpu_usage > 98:
83  stat.level = max(stat.level, DiagnosticStatus.WARN)
84  stat.message = 'High Load'
85 
86  # Check thresholds
87  if gpu_stat.temperature > 90:
88  stat.level = max(stat.level, DiagnosticStatus.WARN)
89  stat.message = 'High Temperature'
90  if gpu_stat.temperature > 95:
91  stat.level = max(stat.level, DiagnosticStatus.ERROR)
92  stat.message = 'Temperature Alarm'
93 
94  # Check fan
95  if gpu_stat.fan_speed == 0:
96  stat.level = max(stat.level, DiagnosticStatus.ERROR)
97  stat.message = 'No Fan Speed'
98 
99 
100 
101  return stat
102 
103 
104 def _find_val(output, word):
105  lines = output.split('\n')
106  for line in lines:
107  tple = line.split(':')
108  if not len(tple) > 1:
109  continue
110 
111  name = tple[0].strip()
112  val = ':'.join(tple[1:]).strip()
113 
114  if not name.lower() == word.lower():
115  continue
116 
117  return val.strip()
118 
119  return ''
120 
121 def parse_smi_output(output):
122  gpu_stat = GPUStatus()
123 
124 
125  gpu_stat.product_name = _find_val(output, 'Product Name')
126  gpu_stat.pci_device_id = _find_val(output, 'PCI Device/Vendor ID')
127  gpu_stat.pci_location = _find_val(output, 'PCI Location ID')
128  gpu_stat.display = _find_val(output, 'Display')
129  gpu_stat.driver_version = _find_val(output, 'Driver Version')
130 
131  temp_str = _find_val(output, 'Temperature')
132  if temp_str:
133  temp, units = temp_str.split()
134  gpu_stat.temperature = int(temp)
135 
136  fan_str = _find_val(output, 'Fan Speed')
137  if fan_str:
138  # Fan speed in RPM
139  fan_spd = float(fan_str.strip('\%').strip()) * 0.01 * MAX_FAN_RPM
140  # Convert fan speed to Hz
141  gpu_stat.fan_speed = _rpm_to_rads(fan_spd)
142 
143  usage_str = _find_val(output, 'GPU')
144  if usage_str:
145  usage = usage_str.strip('\%').strip()
146  gpu_stat.gpu_usage = int(usage)
147 
148  mem_str = _find_val(output, 'Memory')
149  if mem_str:
150  mem = mem_str.strip('\%').strip()
151  gpu_stat.memory_usage = int(mem)
152 
153  return gpu_stat
154 
156  p = subprocess.Popen('sudo nvidia-smi -a', stdout = subprocess.PIPE,
157  stderr = subprocess.PIPE, shell = True)
158  (o, e) = p.communicate()
159 
160  if not p.returncode == 0:
161  return ''
162 
163  if not o: return ''
164 
165  return o


pr2_computer_monitor
Author(s): Kevin Watts (watts@willowgarage.com)
autogenerated on Tue Jun 1 2021 02:50:51