centreon-plugins/cloud/prometheus/exporters/nodeexporter/mode/cpudetailed.pm

361 lines
14 KiB
Perl

#
# Copyright 2021 Centreon (http://www.centreon.com/)
#
# Centreon is a full-fledged industry-strength solution that meets
# the needs in IT infrastructure and application monitoring for
# service performance.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
package cloud::prometheus::exporters::nodeexporter::mode::cpudetailed;
use base qw(centreon::plugins::templates::counter);
use strict;
use warnings;
sub set_counters {
my ($self, %options) = @_;
$self->{maps_counters_type} = [
{ name => 'nodes', type => 3, cb_prefix_output => 'prefix_node_output', cb_long_output => 'node_long_output',
message_multiple => 'All nodes usage are ok', indent_long_output => ' ',
group => [
{ name => 'global_cpu', cb_prefix_output => 'prefix_global_cpu_output', type => 0, skipped_code => { -10 => 1 } },
{ name => 'cpu', display_long => 1, cb_prefix_output => 'prefix_cpu_output',
message_multiple => 'All CPUs usage are ok', type => 1, skipped_code => { -10 => 1 } },
]
}
];
$self->{maps_counters}->{global_cpu} = [
{ label => 'node-wait', nlabel => 'node.cpu.wait.utilization.percentage', set => {
key_values => [ { name => 'iowait' } ],
output_template => 'Wait: %.2f %%',
perfdatas => [
{ label => 'wait', value => 'iowait', template => '%.2f',
min => 0, max => 100, unit => '%', label_extra_instance => 1 },
],
}
},
{ label => 'node-user', nlabel => 'node.cpu.user.utilization.percentage', set => {
key_values => [ { name => 'user' } ],
output_template => 'User: %.2f %%',
perfdatas => [
{ label => 'user', value => 'user', template => '%.2f',
min => 0, max => 100, unit => '%', label_extra_instance => 1 },
],
}
},
{ label => 'node-softirq', nlabel => 'node.cpu.softirq.utilization.percentage', set => {
key_values => [ { name => 'softirq' } ],
output_template => 'Soft Irq: %.2f %%',
perfdatas => [
{ label => 'softirq', value => 'softirq', template => '%.2f',
min => 0, max => 100, unit => '%', label_extra_instance => 1 },
],
}
},
{ label => 'node-interrupt', nlabel => 'node.cpu.interrupt.utilization.percentage', set => {
key_values => [ { name => 'irq' } ],
output_template => 'Interrupt: %.2f %%',
perfdatas => [
{ label => 'interrupt', value => 'irq', template => '%.2f',
min => 0, max => 100, unit => '%', label_extra_instance => 1 },
],
}
},
{ label => 'node-idle', nlabel => 'node.cpu.idle.utilization.percentage', set => {
key_values => [ { name => 'idle' } ],
output_template => 'Idle: %.2f %%',
perfdatas => [
{ label => 'idle', value => 'idle', template => '%.2f',
min => 0, max => 100, unit => '%', label_extra_instance => 1 },
],
}
},
{ label => 'node-steal', nlabel => 'node.cpu.steal.utilization.percentage', set => {
key_values => [ { name => 'steal' } ],
output_template => 'Steal: %.2f %%',
perfdatas => [
{ label => 'steal', value => 'steal', template => '%.2f',
min => 0, max => 100, unit => '%', label_extra_instance => 1 },
],
}
},
{ label => 'node-system', nlabel => 'node.cpu.system.utilization.percentage', set => {
key_values => [ { name => 'system' } ],
output_template => 'System: %.2f %%',
perfdatas => [
{ label => 'system', value => 'system', template => '%.2f',
min => 0, max => 100, unit => '%', label_extra_instance => 1 },
],
}
},
{ label => 'node-nice', nlabel => 'node.cpu.nice.utilization.percentage', set => {
key_values => [ { name => 'nice' } ],
output_template => 'Nice: %.2f %%',
perfdatas => [
{ label => 'nice', value => 'nice', template => '%.2f',
min => 0, max => 100, unit => '%', label_extra_instance => 1 },
],
}
},
];
$self->{maps_counters}->{cpu} = [
{ label => 'cpu-wait', nlabel => 'core.cpu.wait.utilization.percentage', set => {
key_values => [ { name => 'iowait' }, { name => 'display' } ],
output_template => 'Wait: %.2f %%',
perfdatas => [
{ label => 'wait', value => 'iowait', template => '%.2f',
min => 0, max => 100, unit => '%', label_extra_instance => 1 },
],
}
},
{ label => 'cpu-user', nlabel => 'core.cpu.user.utilization.percentage', set => {
key_values => [ { name => 'user' }, { name => 'display' } ],
output_template => 'User: %.2f %%',
perfdatas => [
{ label => 'user', value => 'user', template => '%.2f',
min => 0, max => 100, unit => '%', label_extra_instance => 1 },
],
}
},
{ label => 'cpu-softirq', nlabel => 'core.cpu.softirq.utilization.percentage', set => {
key_values => [ { name => 'softirq' }, { name => 'display' } ],
output_template => 'Soft Irq: %.2f %%',
perfdatas => [
{ label => 'softirq', value => 'softirq', template => '%.2f',
min => 0, max => 100, unit => '%', label_extra_instance => 1 },
],
}
},
{ label => 'cpu-interrupt', nlabel => 'core.cpu.interrupt.utilization.percentage', set => {
key_values => [ { name => 'irq' }, { name => 'display' } ],
output_template => 'Interrupt: %.2f %%',
perfdatas => [
{ label => 'interrupt', value => 'irq', template => '%.2f',
min => 0, max => 100, unit => '%', label_extra_instance => 1 },
],
}
},
{ label => 'cpu-idle', nlabel => 'core.cpu.idle.utilization.percentage', set => {
key_values => [ { name => 'idle' }, { name => 'display' } ],
output_template => 'Idle: %.2f %%',
perfdatas => [
{ label => 'idle', value => 'idle', template => '%.2f',
min => 0, max => 100, unit => '%', label_extra_instance => 1 },
],
}
},
{ label => 'cpu-steal', nlabel => 'core.cpu.steal.utilization.percentage', set => {
key_values => [ { name => 'steal' }, { name => 'display' } ],
output_template => 'Steal: %.2f %%',
perfdatas => [
{ label => 'steal', value => 'steal', template => '%.2f',
min => 0, max => 100, unit => '%', label_extra_instance => 1 },
],
}
},
{ label => 'cpu-system', nlabel => 'core.cpu.system.utilization.percentage', set => {
key_values => [ { name => 'system' }, { name => 'display' } ],
output_template => 'System: %.2f %%',
perfdatas => [
{ label => 'system', value => 'system', template => '%.2f',
min => 0, max => 100, unit => '%', label_extra_instance => 1 },
],
}
},
{ label => 'cpu-nice', nlabel => 'core.cpu.nice.utilization.percentage', set => {
key_values => [ { name => 'nice' }, { name => 'display' } ],
output_template => 'Nice: %.2f %%',
perfdatas => [
{ label => 'nice', value => 'nice', template => '%.2f',
min => 0, max => 100, unit => '%', label_extra_instance => 1 },
],
}
},
];
}
sub prefix_node_output {
my ($self, %options) = @_;
return "Node '" . $options{instance_value}->{display} . "' ";
}
sub node_long_output {
my ($self, %options) = @_;
return "Checking node '" . $options{instance_value}->{display} . "'";
}
sub prefix_global_cpu_output {
my ($self, %options) = @_;
return "CPU Average ";
}
sub prefix_cpu_output {
my ($self, %options) = @_;
return "CPU '" . $options{instance_value}->{display} . "' ";
}
sub new {
my ($class, %options) = @_;
my $self = $class->SUPER::new(package => __PACKAGE__, %options);
bless $self, $class;
$options{options}->add_options(arguments => {
"instance:s" => { name => 'instance', default => 'instance=~".*"' },
"cpu:s" => { name => 'cpu', default => 'cpu=~".*"' },
"type:s" => { name => 'type', default => 'mode=~".*"' },
"extra-filter:s@" => { name => 'extra_filter' },
"metric-overload:s@" => { name => 'metric_overload' },
});
return $self;
}
sub check_options {
my ($self, %options) = @_;
$self->SUPER::check_options(%options);
$self->{metrics} = {
'cpu' => "^node_cpu_seconds_total.*",
};
foreach my $metric (@{$self->{option_results}->{metric_overload}}) {
next if ($metric !~ /(.*),(.*)/);
$self->{metrics}->{$1} = $2 if (defined($self->{metrics}->{$1}));
}
$self->{labels} = {};
foreach my $label (('instance', 'cpu', 'type')) {
if ($self->{option_results}->{$label} !~ /^(\w+)[!~=]+\".*\"$/) {
$self->{output}->add_option_msg(short_msg => "Need to specify --" . $label . " option as a PromQL filter.");
$self->{output}->option_exit();
}
$self->{labels}->{$label} = $1;
}
$self->{extra_filter} = '';
foreach my $filter (@{$self->{option_results}->{extra_filter}}) {
$self->{extra_filter} .= ',' . $filter;
}
$self->{prom_timeframe} = defined($self->{option_results}->{timeframe}) ? $self->{option_results}->{timeframe} : 900;
$self->{prom_step} = defined($self->{option_results}->{step}) ? $self->{option_results}->{step} : "5m";
}
sub manage_selection {
my ($self, %options) = @_;
$self->{nodes} = {};
my $results = $options{custom}->query_range(
queries => [
'(irate({__name__=~"' . $self->{metrics}->{cpu} . '",' .
$self->{option_results}->{instance} . ',' .
$self->{option_results}->{cpu} . ',' .
$self->{option_results}->{type} .
$self->{extra_filter} . '}[' . $self->{prom_step} . '])) * 100'
],
timeframe => $self->{prom_timeframe}, step => $self->{prom_step}
);
foreach my $result (@{$results}) {
my $average = $options{custom}->compute(aggregation => 'average', values => $result->{values});
$self->{nodes}->{$result->{metric}->{$self->{labels}->{instance}}}->{display} = $result->{metric}->{$self->{labels}->{instance}};
$self->{nodes}->{$result->{metric}->{$self->{labels}->{instance}}}->{global_cpu}->{$result->{metric}->{$self->{labels}->{type}}} += $average;
$self->{nodes}->{$result->{metric}->{$self->{labels}->{instance}}}->{cpu}->{$result->{metric}->{$self->{labels}->{cpu}}}->{display} = $result->{metric}->{$self->{labels}->{cpu}};
$self->{nodes}->{$result->{metric}->{$self->{labels}->{instance}}}->{cpu}->{$result->{metric}->{$self->{labels}->{cpu}}}->{$result->{metric}->{$self->{labels}->{type}}} = $average;
}
foreach my $node (keys %{$self->{nodes}}) {
foreach my $metric (keys %{$self->{nodes}->{$node}->{global_cpu}}) {
next if ($metric =~ /cpu|display/);
$self->{nodes}->{$node}->{global_cpu}->{$metric} /= scalar(keys %{$self->{nodes}->{$node}->{cpu}});
}
}
if (scalar(keys %{$self->{nodes}}) <= 0) {
$self->{output}->add_option_msg(short_msg => "No nodes found.");
$self->{output}->option_exit();
}
}
1;
__END__
=head1 MODE
Check CPU detailed usage for nodes and each of their cores.
=over 8
=item B<--instance>
Filter on a specific instance (Must be a PromQL filter, Default: 'instance=~".*"')
=item B<--cpu>
Filter on a specific cpu (Must be a PromQL filter, Default: 'cpu=~".*"')
=item B<--type>
Filter on a specific type (Must be a PromQL filter, Default: 'mode=~".*"')
=item B<--warning-*>
Threshold warning.
Can be: 'node-idle', 'node-wait', 'node-interrupt', 'node-nice',
'node-softirq', 'node-steal', 'node-system', 'node-user',
'cpu-idle', 'cpu-wait', 'cpu-interrupt', 'cpu-nice', 'cpu-softirq',
'cpu-steal', 'cpu-system', 'cpu-user'.
=item B<--critical-*>
Threshold critical.
Can be: 'node-idle', 'node-wait', 'node-interrupt', 'node-nice',
'node-softirq', 'node-steal', 'node-system', 'node-user',
'cpu-idle', 'cpu-wait', 'cpu-interrupt', 'cpu-nice', 'cpu-softirq',
'cpu-steal', 'cpu-system', 'cpu-user'.
=item B<--extra-filter>
Add a PromQL filter (Can be multiple)
Example : --extra-filter='name=~".*pretty.*"'
=item B<--metric-overload>
Overload default metrics name (Can be multiple)
Example : --metric-overload='metric,^my_metric_name$'
Default :
- cpu: ^node_cpu_seconds_total.*
=item B<--filter-counters>
Only display some counters (regexp can be used).
Example: --filter-counters='wait'
=back
=cut