30 from diagnostic_msgs.msg
import DiagnosticArray, DiagnosticStatus, KeyValue
32 stat_dict = { DiagnosticStatus.OK:
'OK', DiagnosticStatus.WARN:
'Warning', DiagnosticStatus.ERROR:
'Error', DiagnosticStatus.STALE:
'Stale' }
56 self.
_num_cores = rospy.get_param(
'~num_cores', psutil.cpu_count())
63 self._info_stat.name =
'%s CPU Info' % diag_hostname
64 self._info_stat.level = DiagnosticStatus.WARN
65 self._info_stat.hardware_id = hostname
66 self._info_stat.message =
'No Data' 67 self._info_stat.values = []
70 self._usage_stat.name =
'%s CPU Usage' % diag_hostname
71 self._usage_stat.level = DiagnosticStatus.WARN
72 self._usage_stat.hardware_id = hostname
73 self._usage_stat.message =
'No Data' 74 self._usage_stat.values = []
77 self._memory_stat.name =
'%s Memory Usage' % diag_hostname
78 self._memory_stat.level = DiagnosticStatus.WARN
79 self._memory_stat.hardware_id = hostname
80 self._memory_stat.message =
'No Data' 81 self._memory_stat.values = []
83 self.
_diag_pub = rospy.Publisher(
'/diagnostics', DiagnosticArray, queue_size=1)
93 diag_level = DiagnosticStatus.OK
96 p = subprocess.Popen(
'sudo ipmitool sdr',
97 stdout = subprocess.PIPE,
98 stderr = subprocess.PIPE, shell =
True)
99 stdout, stderr = p.communicate()
100 retcode = p.returncode
102 stdout = stdout.decode()
103 except (UnicodeDecodeError, AttributeError):
107 diag_level = DiagnosticStatus.ERROR
108 diag_msgs = [
'ipmitool Error' ]
109 diag_vals = [ KeyValue(key =
'IPMI Error', value = stderr) ,
110 KeyValue(key =
'Output', value = stdout) ]
111 return diag_vals, diag_msgs, diag_level
113 lines = stdout.split(
'\n')
115 diag_vals = [ KeyValue(key =
'ipmitool status', value =
'No output') ]
117 diag_msgs = [
'No ipmitool response' ]
118 diag_level = DiagnosticStatus.ERROR
120 return diag_vals, diag_msgs, diag_level
126 words = ln.split(
'|')
130 name = words[0].strip()
131 ipmi_val = words[1].strip()
135 if words[0].startswith(
'CPU')
and words[0].strip().endswith(
'Temp'):
136 if words[1].strip().endswith(
'degrees C'):
137 tmp = ipmi_val.rstrip(
' degrees C').lstrip()
139 temperature = float(tmp)
140 diag_vals.append(KeyValue(key = name +
' (C)', value = tmp))
144 diag_level = max(diag_level, DiagnosticStatus.ERROR)
145 diag_msgs.append(
'CPU Hot')
147 diag_level = max(diag_level, DiagnosticStatus.WARN)
148 diag_msgs.append(
'CPU Warm')
150 diag_level = max(diag_level, DiagnosticStatus.ERROR)
151 diag_msgs.append(
'Error: temperature not numeric')
153 diag_vals.append(KeyValue(key = name, value = words[1]))
157 if name ==
'MB Temp' or name ==
'BP Temp' or name ==
'FP Temp':
158 if ipmi_val.endswith(
'degrees C'):
159 tmp = ipmi_val.rstrip(
' degrees C').lstrip()
160 diag_vals.append(KeyValue(key = name +
' (C)', value = tmp))
162 dev_name = name.split()[0]
164 temperature = float(tmp)
166 if temperature >= 60
and temperature < 75:
167 diag_level = max(diag_level, DiagnosticStatus.WARN)
168 diag_msgs.append(
'%s Warm' % dev_name)
170 if temperature >= 75:
171 diag_level = max(diag_level, DiagnosticStatus.ERROR)
172 diag_msgs.append(
'%s Hot' % dev_name)
174 diag_level = max(diag_level, DiagnosticStatus.ERROR)
175 diag_msgs.append(
'%s Error: temperature not numeric' % dev_name)
177 diag_vals.append(KeyValue(key = name, value = ipmi_val))
180 if (name.startswith(
'CPU')
and name.endswith(
'Fan'))
or name ==
'MB Fan':
181 if ipmi_val.endswith(
'RPM'):
182 rpm = ipmi_val.rstrip(
' RPM').lstrip()
185 diag_level = max(diag_level, DiagnosticStatus.ERROR)
186 diag_msgs.append(
'CPU Fan Off')
188 diag_vals.append(KeyValue(key = name +
' RPM', value = rpm))
190 diag_vals.append(KeyValue(key = name, value = ipmi_val))
194 if name.startswith(
'CPU')
and name.endswith(
'hot'):
195 if ipmi_val ==
'0x01':
196 diag_vals.append(KeyValue(key = name, value =
'OK'))
198 diag_vals.append(KeyValue(key = name, value =
'Hot'))
199 diag_level = max(diag_level, DiagnosticStatus.ERROR)
200 diag_msgs.append(
'CPU Hot Alarm')
202 except Exception
as e:
203 diag_level = DiagnosticStatus.ERROR
204 diag_msgs = [
'IPMI Exception' ]
205 diag_vals = [ KeyValue(key =
'Exception', value = str(e)) ]
207 return diag_vals, diag_msgs, diag_level
215 diag_level = DiagnosticStatus.OK
218 for device_type, devices
in list(self._temp_vals.items()):
220 cmd =
'cat %s' % dev[1]
221 p = subprocess.Popen(cmd, stdout = subprocess.PIPE,
222 stderr = subprocess.PIPE, shell =
True)
223 stdout, stderr = p.communicate()
224 retcode = p.returncode
226 stdout = stdout.decode()
227 except (UnicodeDecodeError, AttributeError):
231 diag_level = DiagnosticStatus.ERROR
232 diag_msgs = [
'Core Temp Error' ]
233 diag_vals = [ KeyValue(key =
'Core Temp Error', value = stderr),
234 KeyValue(key =
'Output', value = stdout) ]
235 return diag_vals, diag_msgs, diag_level
238 if device_type ==
'platform':
240 temp = float(tmp) / 1000
241 diag_vals.append(KeyValue(key =
'Temp '+dev[0], value = str(temp)))
244 diag_level = max(diag_level, DiagnosticStatus.OK)
245 diag_msgs.append(
'CPU Hot')
247 diag_level = max(diag_level, DiagnosticStatus.OK)
248 diag_msgs.append(
'CPU Warm')
250 diag_level = max(diag_level, DiagnosticStatus.ERROR)
251 diag_vals.append(KeyValue(key =
'Temp '+dev[0], value = tmp))
253 diag_vals.append(KeyValue(key =
'Temp '+dev[0], value = tmp))
255 except Exception
as e:
256 diag_level = DiagnosticStatus.ERROR
257 diag_msgs = [
'Core Temp Exception' ]
258 diag_vals = [ KeyValue(key =
'Exception', value = str(e)) ]
260 return diag_vals, diag_msgs, diag_level
266 diag_level = DiagnosticStatus.OK
270 p = subprocess.Popen(
'cat /proc/cpuinfo | grep MHz',
271 stdout = subprocess.PIPE,
272 stderr = subprocess.PIPE, shell =
True)
273 stdout, stderr = p.communicate()
274 retcode = p.returncode
276 stdout = stdout.decode()
277 except (UnicodeDecodeError, AttributeError):
281 diag_level = DiagnosticStatus.ERROR
282 diag_msgs = [
'Clock Speed Error' ]
283 diag_vals = [ KeyValue(key =
'Clock Speed Error', value = stderr),
284 KeyValue(key =
'Output', value = stdout) ]
285 return (diag_vals, diag_msgs, diag_level)
287 for index, ln
in enumerate(stdout.split(
'\n')):
288 words = ln.split(
':')
292 speed = words[1].strip().split(
'.')[0]
293 diag_vals.append(KeyValue(key =
'Core %d MHz' % index, value = speed))
297 diag_level = max(diag_level, DiagnosticStatus.ERROR)
298 diag_msgs = [
'Clock speed not numeric' ]
301 p = subprocess.Popen(
'lscpu | grep "max MHz"',
302 stdout = subprocess.PIPE,
303 stderr = subprocess.PIPE, shell =
True)
304 stdout, stderr = p.communicate()
305 retcode = p.returncode
307 stdout = stdout.decode()
308 except (UnicodeDecodeError, AttributeError):
312 diag_level = DiagnosticStatus.ERROR
313 diag_msgs = [
'Clock Speed Error' ]
314 diag_vals = [ KeyValue(key =
'Clock Speed Error', value = stderr),
315 KeyValue(key =
'Output', value = stdout) ]
316 return (diag_vals, diag_msgs, diag_level)
318 diag_vals.append(KeyValue(key = stdout.split(
':')[0].strip(), value = str(stdout.split(
':')[1].strip())))
321 p = subprocess.Popen(
'lscpu | grep "min MHz"',
322 stdout = subprocess.PIPE,
323 stderr = subprocess.PIPE, shell =
True)
324 stdout, stderr = p.communicate()
325 retcode = p.returncode
327 stdout = stdout.decode()
328 except (UnicodeDecodeError, AttributeError):
332 diag_level = DiagnosticStatus.ERROR
333 diag_msgs = [
'Clock Speed Error' ]
334 diag_vals = [ KeyValue(key =
'Clock Speed Error', value = stderr),
335 KeyValue(key =
'Output', value = stdout) ]
336 return (diag_vals, diag_msgs, diag_level)
338 diag_vals.append(KeyValue(key = stdout.split(
':')[0].strip(), value = str(stdout.split(
':')[1].strip())))
340 except Exception
as e:
341 diag_level = DiagnosticStatus.ERROR
342 diag_msgs = [
'Clock Speed Exception' ]
343 diag_vals = [ KeyValue(key =
'Exception', value = str(e)) ]
345 return diag_vals, diag_msgs, diag_level
351 diag_level = DiagnosticStatus.OK
353 load_dict = { DiagnosticStatus.OK:
'OK', DiagnosticStatus.WARN:
'High Load', DiagnosticStatus.ERROR:
'Very High Load' }
356 p = subprocess.Popen(
'uptime', stdout = subprocess.PIPE,
357 stderr = subprocess.PIPE, shell =
True)
358 stdout, stderr = p.communicate()
359 retcode = p.returncode
361 stdout = stdout.decode()
362 except (UnicodeDecodeError, AttributeError):
366 diag_level = DiagnosticStatus.ERROR
367 diag_msg =
'Uptime Error' 368 diag_vals = [ KeyValue(key =
'Uptime Error', value = stderr),
369 KeyValue(key =
'Output', value = stdout) ]
370 return (diag_vals, diag_msg, diag_level)
372 upvals = stdout.split()
373 load1 = upvals[-3].rstrip(
',').replace(
',',
'.')
374 load5 = upvals[-2].rstrip(
',').replace(
',',
'.')
375 load15 = upvals[-1].replace(
',',
'.')
376 num_users = upvals[-7]
380 diag_level = DiagnosticStatus.WARN
382 diag_vals.append(KeyValue(key =
'Load Average Status', value = load_dict[diag_level]))
383 diag_vals.append(KeyValue(key =
'1 min Load Average', value = load1))
384 diag_vals.append(KeyValue(key =
'1 min Load Average Threshold', value = str(self.
_load1_threshold)))
385 diag_vals.append(KeyValue(key =
'5 min Load Average', value = load5))
386 diag_vals.append(KeyValue(key =
'5 min Load Average Threshold', value = str(self.
_load5_threshold)))
387 diag_vals.append(KeyValue(key =
'15 min Load Average', value = load15))
388 diag_vals.append(KeyValue(key =
'Number of Users', value = num_users))
390 diag_msg = load_dict[diag_level]
392 except Exception
as e:
393 diag_level = DiagnosticStatus.ERROR
394 diag_msg =
'Uptime Exception' 395 diag_vals = [ KeyValue(key =
'Exception', value = str(e)) ]
397 return diag_vals, diag_msg, diag_level
403 diag_level = DiagnosticStatus.OK
405 mem_dict = { DiagnosticStatus.OK:
'OK', DiagnosticStatus.WARN:
'Low Memory', DiagnosticStatus.ERROR:
'Very Low Memory' }
408 p = subprocess.Popen(
'free -m',
409 stdout = subprocess.PIPE,
410 stderr = subprocess.PIPE, shell =
True)
411 stdout, stderr = p.communicate()
412 retcode = p.returncode
414 stdout = stdout.decode()
415 except (UnicodeDecodeError, AttributeError):
419 diag_level = DiagnosticStatus.ERROR
420 diag_msg =
'Memory Usage Error' 421 diag_vals = [ KeyValue(key =
'Memory Usage Error', value = stderr),
422 KeyValue(key =
'Output', value = stdout) ]
423 return (diag_vals, diag_msg, diag_level)
425 rows = stdout.split(
'\n')
428 data = rows[1].split()
434 available_mem = data[6]
436 diag_level = DiagnosticStatus.OK
438 diag_level = DiagnosticStatus.WARN
440 diag_level = DiagnosticStatus.ERROR
442 diag_vals.append(KeyValue(key =
'Mem Status', value = mem_dict[diag_level]))
443 diag_vals.append(KeyValue(key =
'Mem Total', value = total_mem))
444 diag_vals.append(KeyValue(key =
'Mem Used', value = used_mem))
445 diag_vals.append(KeyValue(key =
'Mem Free', value = free_mem))
446 diag_vals.append(KeyValue(key =
'Mem Shared', value = shared_mem))
447 diag_vals.append(KeyValue(key =
'Mem Buff/Cache', value = cache_mem))
448 diag_vals.append(KeyValue(key =
'Mem Available', value = available_mem))
451 data = rows[2].split()
456 diag_vals.append(KeyValue(key =
'Swap Total', value = total_mem))
457 diag_vals.append(KeyValue(key =
'Swap Used', value = used_mem))
458 diag_vals.append(KeyValue(key =
'Swap Free', value = free_mem))
460 diag_msg = mem_dict[diag_level]
462 except Exception
as e:
463 diag_level = DiagnosticStatus.ERROR
464 diag_msg =
'Memory Usage Exception' 465 diag_vals = [ KeyValue(key =
'Exception', value = str(e)) ]
467 return diag_vals, diag_msg, diag_level
473 diag_level = DiagnosticStatus.OK
475 load_dict = { DiagnosticStatus.OK:
'OK', DiagnosticStatus.WARN:
'High Load', DiagnosticStatus.ERROR:
'Error' }
478 p = subprocess.Popen(
'mpstat -P ALL 1 1',
479 stdout = subprocess.PIPE,
480 stderr = subprocess.PIPE, shell =
True)
481 stdout, stderr = p.communicate()
482 retcode = p.returncode
484 stdout = stdout.decode()
485 except (UnicodeDecodeError, AttributeError):
489 diag_level = DiagnosticStatus.ERROR
490 diag_msg =
'CPU Usage Error' 491 diag_vals = [ KeyValue(key =
'CPU Usage Error', value = stderr),
492 KeyValue(key =
'Output', value = stdout) ]
493 return (diag_vals, diag_msg, diag_level)
497 rows = stdout.split(
'\n')
498 col_names = rows[2].split()
499 idle_col = -1
if (len(col_names) > 2
and col_names[-1] ==
'%idle')
else -2
503 for index, row
in enumerate(stdout.split(
'\n')):
508 if row.find(
'all') > -1:
516 if lst[0].startswith(
'Average'):
519 cpu_name =
'%d' % (num_cores)
520 idle = lst[idle_col].replace(
',',
'.')
521 user = lst[3].replace(
',',
'.')
522 nice = lst[4].replace(
',',
'.')
523 system = lst[5].replace(
',',
'.')
525 core_level = DiagnosticStatus.OK
526 usage = float(user) + float(nice)
529 core_level = DiagnosticStatus.WARN
531 core_level = DiagnosticStatus.ERROR
533 diag_vals.append(KeyValue(key =
'CPU %s Status' % cpu_name, value = load_dict[core_level]))
534 diag_vals.append(KeyValue(key =
'CPU %s User' % cpu_name, value = user))
535 diag_vals.append(KeyValue(key =
'CPU %s Nice' % cpu_name, value = nice))
536 diag_vals.append(KeyValue(key =
'CPU %s System' % cpu_name, value = system))
537 diag_vals.append(KeyValue(key =
'CPU %s Idle' % cpu_name, value = idle))
542 if num_cores - cores_loaded <= 2
and num_cores > 2:
543 diag_level = DiagnosticStatus.WARN
547 diag_level = DiagnosticStatus.ERROR
548 diag_msg =
'Incorrect number of CPU cores: Expected %d, got %d. Computer may have not booted properly.' % self.
_num_cores, num_cores
549 return diag_vals, diag_msg, diag_level
551 diag_msg = load_dict[diag_level]
553 except Exception
as e:
554 diag_level = DiagnosticStatus.ERROR
555 diag_msg =
'CPU Usage Exception' 556 diag_vals = [ KeyValue(key =
'Exception', value = str(e)) ]
558 return diag_vals, diag_msg, diag_level
562 NETDATA_URI =
'http://127.0.0.1:19999/api/v1/data?chart=%s&format=json&after=-%d' 563 url = NETDATA_URI % (chart, int(after))
566 r = requests.get(url)
567 except requests.ConnectionError
as ex:
568 rospy.logerr(
"NetData ConnectionError %r", ex)
571 if r.status_code != 200:
572 rospy.logerr(
"NetData request not successful with status_code %d", r.status_code)
577 sdata = list(zip(*rdata[
'data']))
580 for idx, label
in enumerate(rdata[
'labels']):
581 np_array = np.array(sdata[idx])
582 if np_array.dtype == np.object:
583 rospy.logwarn(
"Data from NetData malformed")
591 throt_dict = {DiagnosticStatus.OK:
'OK', DiagnosticStatus.WARN:
'High Thermal Throttling Events',
592 DiagnosticStatus.ERROR:
'No valid Data from NetData'}
594 throt_level = DiagnosticStatus.OK
598 netdata = self.
query_netdata(
'cpu.core_throttling', interval)
601 lbl =
'CPU %d Thermal Throttling Events' % i
602 netdata_key =
'cpu%d' % i
605 if netdata
is not None and netdata_key
in netdata:
606 core_data = netdata[netdata_key]
607 if core_data
is not None:
608 core_mean = np.mean(core_data)
611 throt_level = DiagnosticStatus.WARN
613 throt_level = DiagnosticStatus.ERROR
615 vals.append(KeyValue(key=lbl, value=
'%r' % core_mean))
617 vals.insert(0, KeyValue(key=
'Thermal Throttling Status', value=throt_dict[throt_level]))
620 return throt_level, throt_dict[throt_level], vals
624 jitter_dict = {DiagnosticStatus.OK:
'OK', DiagnosticStatus.WARN:
'High IDLE Jitter',
625 DiagnosticStatus.ERROR:
'No valid Data from NetData'}
627 jitter_level = DiagnosticStatus.OK
639 for metric_label, metric_key, metric_threshold, aggregate_fnc
in metric_list:
640 metric_aggreagte =
'N/A' 641 if netdata
is not None and metric_key
in netdata:
642 metric_data = netdata[metric_key]
643 if metric_data
is not None:
644 metric_aggreagte = aggregate_fnc(metric_data)
646 if metric_aggreagte > metric_threshold:
647 jitter_level = DiagnosticStatus.WARN
649 jitter_level = DiagnosticStatus.ERROR
651 vals.append(KeyValue(key=metric_label, value=str(metric_aggreagte)))
652 vals.append(KeyValue(key=metric_label +
' Threshold', value=str(metric_threshold)))
654 vals.insert(0, KeyValue(key=
'IDLE Jitter Status', value=jitter_dict[jitter_level]))
656 return jitter_level, jitter_dict[jitter_level], vals
666 p = subprocess.Popen(
'find /sys/devices/platform -name temp*_input',
667 stdout = subprocess.PIPE,
668 stderr = subprocess.PIPE, shell =
True)
669 stdout, stderr = p.communicate()
670 retcode = p.returncode
672 stdout = stdout.decode()
673 except (UnicodeDecodeError, AttributeError):
677 rospy.logerr(
'Error find core temp locations: %s' % stderr)
680 for ln
in stdout.split(
'\n'):
682 device_path, device_file = os.path.split(ln.strip())
683 device_label = device_path+
'/'+device_file.split(
'_')[0]+
'_label' 684 name = open(device_label,
'r').read() 685 pair = (name.strip(), ln.strip()) 686 platform_vals.append(pair) 689 p = subprocess.Popen(
'find /sys/devices/virtual -name temp*_input',
690 stdout = subprocess.PIPE,
691 stderr = subprocess.PIPE, shell =
True)
692 stdout, stderr = p.communicate()
693 retcode = p.returncode
695 stdout = stdout.decode()
696 except (UnicodeDecodeError, AttributeError):
700 rospy.logerr(
'Error find core temp locations: %s' % stderr)
703 for ln
in stdout.split(
'\n'):
705 device_path, device_file = os.path.split(ln.strip())
706 name = open(device_path+
'/name',
'r').read() 707 pair = (name.strip(), ln.strip()) 708 virtual_vals.append(pair) 710 devices['platform'] = platform_vals
711 devices[
'virtual'] = virtual_vals
713 except Exception
as e:
714 rospy.logerr(
'Exception finding temp vals: {}'.format(e))
721 diag_level = DiagnosticStatus.OK
724 ipmi_vals, ipmi_msgs, ipmi_level = self.
check_ipmi()
725 diag_vals.extend(ipmi_vals)
726 diag_msgs.extend(ipmi_msgs)
727 diag_level = max(diag_level, ipmi_level)
731 diag_vals.extend(core_vals)
732 diag_msgs.extend(core_msgs)
733 diag_level = max(diag_level, core_level)
736 diag_vals.extend(clock_vals)
737 diag_msgs.extend(clock_msgs)
738 diag_level = max(diag_level, clock_level)
740 diag_log = set(diag_msgs)
741 if len(diag_log) > DiagnosticStatus.OK:
742 message =
', '.join(diag_log)
744 message = stat_dict[diag_level]
746 self._info_stat.values = diag_vals
747 self._info_stat.message = message
748 self._info_stat.level = diag_level
753 diag_level = DiagnosticStatus.OK
757 diag_vals.extend(mp_vals)
758 if mp_level > DiagnosticStatus.OK:
759 diag_msgs.append(mp_msg)
760 diag_level = max(diag_level, mp_level)
764 interval = math.ceil(self._usage_timer._period.to_sec())
766 diag_vals.extend(throt_vals)
768 diag_msgs.append(throt_msg)
769 diag_level = max(diag_level, throt_level)
773 interval = math.ceil(self._usage_timer._period.to_sec())
774 jitter_level, jitter_msg, jitter_vals = self.
check_idlejitter(interval=interval)
775 diag_vals.extend(jitter_vals)
777 diag_msgs.append(jitter_msg)
778 diag_level = max(diag_level, jitter_level)
782 diag_vals.extend(up_vals)
783 if up_level > DiagnosticStatus.OK:
784 diag_msgs.append(up_msg)
785 diag_level = max(diag_level, up_level)
787 if diag_msgs
and diag_level > DiagnosticStatus.OK:
788 usage_msg =
', '.join(set(diag_msgs))
790 usage_msg = stat_dict[diag_level]
792 self._usage_stat.values = diag_vals
793 self._usage_stat.message = usage_msg
794 self._usage_stat.level = diag_level
799 diag_level = DiagnosticStatus.OK
803 diag_vals.extend(mem_vals)
804 if mem_level > DiagnosticStatus.OK:
805 diag_msgs.append(mem_msg)
806 diag_level = max(diag_level, mem_level)
808 if diag_msgs
and diag_level > DiagnosticStatus.OK:
809 memory_msg =
', '.join(set(diag_msgs))
811 memory_msg = stat_dict[diag_level]
813 self._memory_stat.values = diag_vals
814 self._memory_stat.message = memory_msg
815 self._memory_stat.level = diag_level
818 msg = DiagnosticArray()
819 msg.header.stamp = rospy.get_rostime()
823 self._diag_pub.publish(msg)
826 if __name__ ==
'__main__':
827 hostname = socket.gethostname()
830 parser = optparse.OptionParser(usage=
"usage: cpu_monitor.py [--diag-hostname=cX]")
831 parser.add_option(
"--diag-hostname", dest=
"diag_hostname",
832 help=
"Computer name in diagnostics output (ex: 'c1')",
833 metavar=
"DIAG_HOSTNAME",
834 action=
"store", default = hostname)
835 options, args = parser.parse_args(rospy.myargv())
838 rospy.init_node(
'cpu_monitor_%s' % hostname)
839 except rospy.exceptions.ROSInitException:
840 print(
'CPU monitor is unable to initialize node. Master may not be running.')
def check_clock_speed(self)
Checks clock speed from reading from CPU info.
def check_memory(self, event)
def check_usage(self, event)
def check_core_throttling(self, interval=1)
def check_free_memory(self)
Uses 'free -m' to check free memory.
def check_ipmi(self)
Output entire IPMI data set.
def get_core_temp_names(self)
Returns names for core temperature files.
_idlejitter_max_threshold
def query_netdata(self, chart, after)
def check_uptime(self)
Uses 'uptime' to see load average.
def __init__(self, hostname, diag_hostname)
_idlejitter_average_threshold
_thermal_throttling_threshold
def publish_stats(self, event)
def check_info(self, event)
def check_idlejitter(self, interval=1)
_idlejitter_min_threshold
_check_thermal_throttling_events
def check_mpstat(self)
Use mpstat to find CPU usage.
def check_core_temps(self)
Check CPU core temps.