enh(checkpoint/snmp): hardware mode - add threshold options

This commit is contained in:
garnier-quentin 2021-07-26 10:33:54 +02:00
parent a361355644
commit 82b6dc7636
6 changed files with 176 additions and 74 deletions

View File

@ -26,13 +26,13 @@ use warnings;
my %map_states_fan = (
0 => 'false',
1 => 'true',
2 => 'reading error',
2 => 'reading error'
);
my $mapping = {
fanSpeedSensorName => { oid => '.1.3.6.1.4.1.2620.1.6.7.8.2.1.2' },
fanSpeedSensorValue => { oid => '.1.3.6.1.4.1.2620.1.6.7.8.2.1.3' },
fanSpeedSensorStatus => { oid => '.1.3.6.1.4.1.2620.1.6.7.8.2.1.6', map => \%map_states_fan },
fanSpeedSensorStatus => { oid => '.1.3.6.1.4.1.2620.1.6.7.8.2.1.6', map => \%map_states_fan }
};
my $oid_fanSpeedSensorEntry = '.1.3.6.1.4.1.2620.1.6.7.8.2.1';
@ -53,28 +53,49 @@ sub check {
next if ($oid !~ /^$mapping->{fanSpeedSensorStatus}->{oid}\.(.*)$/);
my $instance = $1;
my $result = $self->{snmp}->map_instance(mapping => $mapping, results => $self->{results}->{$oid_fanSpeedSensorEntry}, instance => $instance);
next if ($self->check_filter(section => 'fan', instance => $instance));
next if ($self->check_filter(section => 'fan', instance => $instance, name => $result->{fanSpeedSensorName}));
# can be SysFAN(J4)
next if ($result->{fanSpeedSensorName} !~ /^[\(\)0-9a-zA-Z ]+$/); # sometimes there is some wrong values in hex
$self->{components}->{fan}->{total}++;
$self->{output}->output_add(long_msg => sprintf("Fan '%s' sensor out of range status is '%s'",
$result->{fanSpeedSensorName}, $result->{fanSpeedSensorStatus}));
my $exit = $self->get_severity(section => 'fan', value => $result->{fanSpeedSensorStatus});
$self->{output}->output_add(
long_msg => sprintf(
"fan '%s' sensor out of range status is '%s'",
$result->{fanSpeedSensorName},
$result->{fanSpeedSensorStatus}
)
);
my $exit = $self->get_severity(section => 'fan', instance => $instance, value => $result->{fanSpeedSensorStatus});
if (!$self->{output}->is_status(value => $exit, compare => 'ok', litteral => 1)) {
$self->{output}->output_add(severity => $exit,
short_msg => sprintf("Fan '%s' sensor out of range status is '%s'", $result->{fanSpeedSensorName}, $result->{fanSpeedSensorStatus}));
}
if (defined($result->{fanSpeedSensorValue}) && $result->{fanSpeedSensorValue} =~ /^[0-9\.]+$/) {
$self->{output}->perfdata_add(
label => 'fan_speed', unit => 'rpm',
nlabel => 'hardware.fan.speed.rpm',
instances => [$result->{fanSpeedSensorName}, $instance],
value => sprintf("%d", $result->{fanSpeedSensorValue})
$self->{output}->output_add(
severity => $exit,
short_msg => sprintf(
"Fan '%s' sensor out of range status is '%s'",
$result->{fanSpeedSensorName},
$result->{fanSpeedSensorStatus}
)
);
}
next if (defined($result->{fanSpeedSensorValue}) && $result->{fanSpeedSensorValue} !~ /^[0-9\.]+$/);
my ($exit2, $warn, $crit, $checked) = $self->get_severity_numeric(section => 'fan', instance => $instance, name => $result->{fanSpeedSensorName}, value => $result->{fanSpeedSensorValue});
if (!$self->{output}->is_status(value => $exit2, compare => 'ok', litteral => 1)) {
$self->{output}->output_add(
severity => $exit2,
short_msg => sprintf("Fan '%s' sensor is %s rpm", $result->{fanSpeedSensorName}, $result->{fanSpeedSensorValue})
);
}
$self->{output}->perfdata_add(
label => 'fan_speed', unit => 'rpm',
nlabel => 'hardware.fan.speed.rpm',
instances => [$result->{fanSpeedSensorName}, $instance],
value => sprintf('%d', $result->{fanSpeedSensorValue}),
warning => $warn,
critical => $crit
);
}
}

View File

@ -30,7 +30,7 @@ my $oid_powerSupplyStatus = '.1.3.6.1.4.1.2620.1.6.7.9.1.1.2';
sub load {
my ($self) = @_;
push @{$self->{request}}, { oid => $oid_powerSupplyStatus };
}
@ -40,21 +40,32 @@ sub check {
$self->{output}->output_add(long_msg => "Checking power supplies");
$self->{components}->{psu} = {name => 'psus', total => 0, skip => 0};
return if ($self->check_filter(section => 'psu'));
foreach my $oid ($self->{snmp}->oid_lex_sort(keys %{$self->{results}->{$oid_powerSupplyStatus}})) {
next if ($oid !~ /^$mapping->{powerSupplyStatus}->{oid}\.(.*)$/);
my $instance = $1;
my $result = $self->{snmp}->map_instance(mapping => $mapping, results => $self->{results}->{$oid_powerSupplyStatus}, instance => $instance);
next if ($self->check_filter(section => 'psu', instance => $instance));
$self->{components}->{psu}->{total}++;
$self->{output}->output_add(long_msg => sprintf("Power supply '%s' status is '%s'",
$instance, $result->{powerSupplyStatus}));
my $exit = $self->get_severity(section => 'psu', value => $result->{powerSupplyStatus});
$self->{output}->output_add(
long_msg => sprintf(
"Power supply '%s' status is '%s'",
$instance,
$result->{powerSupplyStatus}
)
);
my $exit = $self->get_severity(section => 'psu', instane => $instance, value => $result->{powerSupplyStatus});
if (!$self->{output}->is_status(value => $exit, compare => 'ok', litteral => 1)) {
$self->{output}->output_add(severity => $exit,
short_msg => sprintf("Power supply '%s' status is '%s'", $instance, $result->{powerSupplyStatus}));
$self->{output}->output_add(
severity => $exit,
short_msg => sprintf(
"Power supply '%s' status is '%s'",
$instance,
$result->{powerSupplyStatus}
)
);
}
}
}

View File

@ -40,17 +40,17 @@ my %map_states_disk = (
12 => 'rebuild',
13 => 'failed',
14 => 'copyback',
255 => 'other_offline',
255 => 'other_offline'
);
my $mapping = {
raidDiskProductID => { oid => '.1.3.6.1.4.1.2620.1.6.7.7.2.1.6' },
raidDiskState => { oid => '.1.3.6.1.4.1.2620.1.6.7.7.2.1.9', map => \%map_states_disk },
raidDiskState => { oid => '.1.3.6.1.4.1.2620.1.6.7.7.2.1.9', map => \%map_states_disk }
};
sub load {
my ($self) = @_;
push @{$self->{request}}, { oid => $mapping->{raidDiskProductID}->{oid} },
{ oid => $mapping->{raidDiskState}->{oid} };
}
@ -65,20 +65,34 @@ sub check {
foreach my $oid ($self->{snmp}->oid_lex_sort(keys %{$self->{results}->{$mapping->{raidDiskProductID}->{oid}}})) {
$oid =~ /^$mapping->{raidDiskProductID}->{oid}\.(.*)$/;
my $instance = $1;
my $result = $self->{snmp}->map_instance(mapping => $mapping, results =>
$self->{results}->{$mapping->{raidDiskState}->{oid}}, instance => $instance);
next if ($self->check_filter(section => 'raiddisk', instance => $instance));
my $result = $self->{snmp}->map_instance(
mapping => $mapping, results =>
$self->{results}->{ $mapping->{raidDiskState}->{oid} },
instance => $instance
);
my $name = centreon::plugins::misc::trim($self->{results}->{ $mapping->{raidDiskProductID}->{oid} }->{$oid});
next if ($self->check_filter(section => 'raiddisk', instance => $instance, name => $name));
my $name = centreon::plugins::misc::trim($self->{results}->{$mapping->{raidDiskProductID}->{oid}}->{$oid});
$self->{components}->{raiddisk}->{total}++;
$self->{output}->output_add(long_msg => sprintf("raid disk '%s' status is '%s'",
$name, $result->{raidDiskState}));
my $exit = $self->get_severity(section => 'raiddisk', value => $result->{raidDiskState});
$self->{output}->output_add(
long_msg => sprintf(
"raid disk '%s' status is '%s'",
$name,
$result->{raidDiskState}
)
);
my $exit = $self->get_severity(section => 'raiddisk', instance => $instance, value => $result->{raidDiskState});
if (!$self->{output}->is_status(value => $exit, compare => 'ok', litteral => 1)) {
$self->{output}->output_add(severity => $exit,
short_msg => sprintf("Raid disk '%s' status is '%s'",
$name, $result->{raidDiskState}));
$self->{output}->output_add(
severity => $exit,
short_msg => sprintf(
"Raid disk '%s' status is '%s'",
$name,
$result->{raidDiskState}
)
);
}
}
}

View File

@ -26,13 +26,13 @@ use warnings;
my %map_states_temperature = (
0 => 'false',
1 => 'true',
2 => 'reading error',
2 => 'reading error'
);
my $mapping = {
tempertureSensorName => { oid => '.1.3.6.1.4.1.2620.1.6.7.8.1.1.2' },
tempertureSensorValue => { oid => '.1.3.6.1.4.1.2620.1.6.7.8.1.1.3' },
tempertureSensorStatus => { oid => '.1.3.6.1.4.1.2620.1.6.7.8.1.1.6', map => \%map_states_temperature },
tempertureSensorStatus => { oid => '.1.3.6.1.4.1.2620.1.6.7.8.1.1.6', map => \%map_states_temperature }
};
my $oid_tempertureSensorEntry = '.1.3.6.1.4.1.2620.1.6.7.8.1.1';
@ -54,26 +54,48 @@ sub check {
my $instance = $1;
my $result = $self->{snmp}->map_instance(mapping => $mapping, results => $self->{results}->{$oid_tempertureSensorEntry}, instance => $instance);
next if ($self->check_filter(section => 'temperature', instance => $instance));
next if ($self->check_filter(section => 'temperature', instance => $instance, name => $result->{tempertureSensorName}));
next if ($result->{tempertureSensorName} !~ /^[0-9a-zA-Z ]+$/); # sometimes there is some wrong values in hex
$self->{components}->{temperature}->{total}++;
$self->{output}->output_add(long_msg => sprintf("Temperature '%s' sensor out of range status is '%s' [instance: %s]",
$result->{tempertureSensorName}, $result->{tempertureSensorStatus}, $instance));
my $exit = $self->get_severity(section => 'temperature', value => $result->{tempertureSensorStatus});
$self->{output}->output_add(
long_msg => sprintf(
"temperature '%s' sensor out of range status is '%s' [instance: %s]",
$result->{tempertureSensorName},
$result->{tempertureSensorStatus},
$instance
)
);
my $exit = $self->get_severity(section => 'temperature', instance => $instance, value => $result->{tempertureSensorStatus});
if (!$self->{output}->is_status(value => $exit, compare => 'ok', litteral => 1)) {
$self->{output}->output_add(severity => $exit,
short_msg => sprintf("Temperature '%s/%s' sensor out of range status is '%s'", $result->{tempertureSensorName}, $instance, $result->{tempertureSensorStatus}));
}
if (defined($result->{tempertureSensorValue}) && $result->{tempertureSensorValue} =~ /^[0-9\.]+$/) {
$self->{output}->perfdata_add(
label => 'temp', unit => 'C',
nlabel => 'hardware.temperature.celsius',
instances => [$result->{tempertureSensorName}, $instance],
value => sprintf("%.2f", $result->{tempertureSensorValue})
$self->{output}->output_add(
severity => $exit,
short_msg => sprintf(
"Temperature '%s/%s' sensor out of range status is '%s'",
$result->{tempertureSensorName},
$instance,
$result->{tempertureSensorStatus})
);
}
next if (defined($result->{tempertureSensorValue}) && $result->{tempertureSensorValue} !~ /^[0-9\.]+$/);
my ($exit2, $warn, $crit, $checked) = $self->get_severity_numeric(section => 'temperature', instance => $instance, name => $result->{tempertureSensorName}, value => $result->{tempertureSensorValue});
if (!$self->{output}->is_status(value => $exit2, compare => 'ok', litteral => 1)) {
$self->{output}->output_add(
severity => $exit2,
short_msg => sprintf("Temperature '%s/%s' sensor is %.2f C", $result->{tempertureSensorName}, $instance, $result->{tempertureSensorValue})
);
}
$self->{output}->perfdata_add(
label => 'temp', unit => 'C',
nlabel => 'hardware.temperature.celsius',
instances => [$result->{tempertureSensorName}, $instance],
value => sprintf('%.2f', $result->{tempertureSensorValue}),
warning => $warn,
critical => $crit
);
}
}

View File

@ -26,13 +26,13 @@ use warnings;
my %map_states_voltage = (
0 => 'false',
1 => 'true',
2 => 'reading error',
2 => 'reading error'
);
my $mapping = {
voltageSensorName => { oid => '.1.3.6.1.4.1.2620.1.6.7.8.3.1.2' },
voltageSensorValue => { oid => '.1.3.6.1.4.1.2620.1.6.7.8.3.1.3' },
voltageSensorStatus => { oid => '.1.3.6.1.4.1.2620.1.6.7.8.3.1.6', map => \%map_states_voltage },
voltageSensorStatus => { oid => '.1.3.6.1.4.1.2620.1.6.7.8.3.1.6', map => \%map_states_voltage }
};
my $oid_voltageSensorEntry = '.1.3.6.1.4.1.2620.1.6.7.8.3.1';
@ -53,27 +53,50 @@ sub check {
next if ($oid !~ /^$mapping->{voltageSensorStatus}->{oid}\.(.*)$/);
my $instance = $1;
my $result = $self->{snmp}->map_instance(mapping => $mapping, results => $self->{results}->{$oid_voltageSensorEntry}, instance => $instance);
next if ($self->check_filter(section => 'voltage', instance => $instance));
next if ($self->check_filter(section => 'voltage', instance => $instance, name => $result->{voltageSensorName}));
next if ($result->{voltageSensorName} !~ /^[0-9a-zA-Z ]+$/); # sometimes there is some wrong values in hex
$self->{components}->{voltage}->{total}++;
$self->{output}->output_add(long_msg => sprintf("Voltage '%s' sensor out of range status is '%s' [instance: %s]",
$result->{voltageSensorName}, $result->{voltageSensorStatus}, $instance));
my $exit = $self->get_severity(section => 'voltage', value => $result->{voltageSensorStatus});
$self->{output}->output_add(
long_msg => sprintf(
"voltage '%s' sensor out of range status is '%s' [instance: %s]",
$result->{voltageSensorName},
$result->{voltageSensorStatus},
$instance
)
);
my $exit = $self->get_severity(section => 'voltage', instance => $instance, value => $result->{voltageSensorStatus});
if (!$self->{output}->is_status(value => $exit, compare => 'ok', litteral => 1)) {
$self->{output}->output_add(severity => $exit,
short_msg => sprintf("Voltage '%s/%s' sensor out of range status is '%s'", $result->{voltageSensorName}, $instance, $result->{voltageSensorStatus}));
}
if (defined($result->{voltageSensorValue}) && $result->{voltageSensorValue} =~ /^[0-9\.]+$/) {
$self->{output}->perfdata_add(
label => 'volt', unit => 'V',
nlabel => 'hardware.voltage.volt',
instances => [$result->{voltageSensorName}, $instance],
value => sprintf("%.2f", $result->{voltageSensorValue})
$self->{output}->output_add(
severity => $exit,
short_msg => sprintf(
"Voltage '%s/%s' sensor out of range status is '%s'",
$result->{voltageSensorName},
$instance,
$result->{voltageSensorStatus}
)
);
}
next if (defined($result->{voltageSensorValue}) && $result->{voltageSensorValue} !~ /^[0-9\.]+$/);
my ($exit2, $warn, $crit, $checked) = $self->get_severity_numeric(section => 'voltage', instance => $instance, name => $result->{voltageSensorName}, value => $result->{voltageSensorValue});
if (!$self->{output}->is_status(value => $exit2, compare => 'ok', litteral => 1)) {
$self->{output}->output_add(
severity => $exit2,
short_msg => sprintf("Voltage '%s/%s' sensor is %.2f V", $result->{voltageSensorName}, $instance, $result->{voltageSensorValue})
);
}
$self->{output}->perfdata_add(
label => 'volt', unit => 'V',
nlabel => 'hardware.voltage.volt',
instances => [$result->{voltageSensorName}, $instance],
value => sprintf('%.2f', $result->{voltageSensorValue}),
warning => $warn,
critical => $crit
);
}
}

View File

@ -28,6 +28,8 @@ use warnings;
sub set_system {
my ($self, %options) = @_;
$self->{regexp_threshold_numeric_check_section_option} = '^(?:fan|temperature|voltage)$';
$self->{cb_hook2} = 'snmp_execute';
$self->{thresholds} = {
@ -85,7 +87,7 @@ sub snmp_execute {
sub new {
my ($class, %options) = @_;
my $self = $class->SUPER::new(package => __PACKAGE__, %options, no_performance => 1, no_absent => 1);
my $self = $class->SUPER::new(package => __PACKAGE__, %options, no_absent => 1);
bless $self, $class;
$options{options}->add_options(arguments => {});
@ -108,6 +110,10 @@ Check hardware (fans, power supplies, temperatures, voltages).
Which component to check (Default: '.*').
Can be: 'psu', 'fan', 'temperature', 'voltage', 'raiddisk'.
=item B<--add-name-instance>
Add literal description for instance value (used in filter and threshold options).
=item B<--filter>
Exclude some parts (comma seperated list) (Example: --filter=fan --filter=psu)
@ -124,6 +130,11 @@ Set to overload default threshold values (syntax: section,[instance,]status,rege
It used before default thresholds (order stays).
Example: --threshold-overload='fan,CRITICAL,^(?!(false)$)'
=item B<--warning> B<--critical>
Set thresholds for 'fan', 'temperature', 'voltage' (syntax: type,regexp,threshold)
Example: --warning='temperature,.*,40' --warning='critical,.*,45'
=back
=cut