nvidia_smi_util.py
Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 #
00003 # Software License Agreement (BSD License)
00004 #
00005 # Copyright (c) 2010, Willow Garage, Inc.
00006 # All rights reserved.
00007 #
00008 # Redistribution and use in source and binary forms, with or without
00009 # modification, are permitted provided that the following conditions
00010 # are met:
00011 #
00012 #  * Redistributions of source code must retain the above copyright
00013 #    notice, this list of conditions and the following disclaimer.
00014 #  * Redistributions in binary form must reproduce the above
00015 #    copyright notice, this list of conditions and the following
00016 #    disclaimer in the documentation and/or other materials provided
00017 #    with the distribution.
00018 #  * Neither the name of the Willow Garage nor the names of its
00019 #    contributors may be used to endorse or promote products derived
00020 #    from this software without specific prior written permission.
00021 #
00022 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00023 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00024 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
00025 # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
00026 # COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
00027 # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
00028 # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
00029 # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
00030 # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00031 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
00032 # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00033 # POSSIBILITY OF SUCH DAMAGE.
00034 
00035 ##\author Kevin Watts
00036 
00037 from __future__ import division
00038 
00039 PKG = 'pr2_computer_monitor'
00040 import roslib; roslib.load_manifest(PKG)
00041 
00042 import rospy
00043 
00044 from diagnostic_msgs.msg import DiagnosticArray, DiagnosticStatus, KeyValue
00045 from pr2_msgs.msg import GPUStatus
00046 
00047 import subprocess
00048 import math
00049 
00050 MAX_FAN_RPM = 4500
00051 
00052 def _rads_to_rpm(rads):
00053     return rads / (2 * math.pi) * 60
00054 
00055 def _rpm_to_rads(rpm):
00056     return rpm * (2 * math.pi) / 60
00057 
00058 def gpu_status_to_diag(gpu_stat):
00059     stat = DiagnosticStatus()
00060     stat.name = 'GPU Status'
00061     stat.message = 'OK'
00062     stat.level = DiagnosticStatus.OK
00063     stat.hardware_id = gpu_stat.pci_device_id
00064 
00065     stat.values.append(KeyValue(key='Product Name',         value = gpu_stat.product_name))
00066     stat.values.append(KeyValue(key='PCI Device/Vendor ID', value = gpu_stat.pci_device_id))
00067     stat.values.append(KeyValue(key='PCI Location ID',      value = gpu_stat.pci_location))
00068     stat.values.append(KeyValue(key='Display',              value = gpu_stat.display))
00069     stat.values.append(KeyValue(key='Driver Version',       value = gpu_stat.driver_version))
00070     stat.values.append(KeyValue(key='Temperature (C)',      value = '%.0f' % gpu_stat.temperature))
00071     stat.values.append(KeyValue(key='Fan Speed (RPM)',      value = '%.0f' % _rads_to_rpm(gpu_stat.fan_speed)))
00072     stat.values.append(KeyValue(key='Usage (%)',            value = '%.0f' % gpu_stat.gpu_usage))
00073     stat.values.append(KeyValue(key='Memory (%)',           value = '%.0f' % gpu_stat.memory_usage))
00074 
00075     # Check for valid data
00076     if not gpu_stat.product_name or not gpu_stat.pci_device_id:
00077         stat.level = DiagnosticStatus.ERROR
00078         stat.message = 'No Device Data'
00079         return stat
00080 
00081     # Check load
00082     if gpu_stat.gpu_usage > 98:
00083         stat.level = max(stat.level, DiagnosticStatus.WARN)
00084         stat.message = 'High Load'
00085 
00086     # Check thresholds
00087     if gpu_stat.temperature > 90:
00088         stat.level = max(stat.level, DiagnosticStatus.WARN)
00089         stat.message = 'High Temperature'
00090     if gpu_stat.temperature > 95:
00091         stat.level = max(stat.level, DiagnosticStatus.ERROR)
00092         stat.message = 'Temperature Alarm'
00093 
00094     # Check fan
00095     if gpu_stat.fan_speed == 0:
00096         stat.level = max(stat.level, DiagnosticStatus.ERROR)
00097         stat.message = 'No Fan Speed'
00098 
00099 
00100 
00101     return stat
00102 
00103 
00104 def _find_val(output, word):
00105     lines = output.split('\n')
00106     for line in lines:
00107         tple = line.split(':')
00108         if not len(tple) > 1:
00109             continue
00110 
00111         name = tple[0].strip()
00112         val = ':'.join(tple[1:]).strip()
00113 
00114         if not name.lower() == word.lower():
00115             continue
00116         
00117         return val.strip()
00118 
00119     return ''
00120 
00121 def parse_smi_output(output):
00122     gpu_stat = GPUStatus()
00123 
00124     
00125     gpu_stat.product_name   = _find_val(output, 'Product Name')
00126     gpu_stat.pci_device_id  = _find_val(output, 'PCI Device/Vendor ID')
00127     gpu_stat.pci_location   = _find_val(output, 'PCI Location ID')
00128     gpu_stat.display        = _find_val(output, 'Display')
00129     gpu_stat.driver_version = _find_val(output, 'Driver Version')
00130     
00131     temp_str = _find_val(output, 'Temperature')
00132     if temp_str:
00133         temp, units = temp_str.split()
00134         gpu_stat.temperature = int(temp)
00135 
00136     fan_str = _find_val(output, 'Fan Speed')
00137     if fan_str:
00138         # Fan speed in RPM
00139         fan_spd = float(fan_str.strip('\%').strip()) * 0.01 * MAX_FAN_RPM
00140         # Convert fan speed to Hz
00141         gpu_stat.fan_speed = _rpm_to_rads(fan_spd)
00142 
00143     usage_str = _find_val(output, 'GPU')
00144     if usage_str:
00145         usage = usage_str.strip('\%').strip()
00146         gpu_stat.gpu_usage = int(usage)
00147         
00148     mem_str = _find_val(output, 'Memory')
00149     if mem_str:
00150         mem = mem_str.strip('\%').strip()
00151         gpu_stat.memory_usage = int(mem)
00152 
00153     return gpu_stat
00154         
00155 def get_gpu_status():
00156     p = subprocess.Popen('sudo nvidia-smi -a', stdout = subprocess.PIPE, 
00157                          stderr = subprocess.PIPE, shell = True)
00158     (o, e) = p.communicate()
00159 
00160     if not p.returncode == 0:
00161         return ''
00162 
00163     if not o: return ''
00164 
00165     return o


pr2_computer_monitor
Author(s): Kevin Watts (watts@willowgarage.com)
autogenerated on Fri Dec 6 2013 20:23:59