break(pacemaker): refactoring crm mode (#2747)
This commit is contained in:
parent
98fc713cb4
commit
667dd3bec8
|
@ -20,36 +20,268 @@
|
|||
|
||||
package apps::pacemaker::local::mode::crm;
|
||||
|
||||
use base qw(centreon::plugins::mode);
|
||||
use base qw(centreon::plugins::templates::counter);
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
use centreon::plugins::templates::catalog_functions qw(catalog_status_threshold_ng);
|
||||
|
||||
sub custom_resource_threshold {
|
||||
my ($self, %options) = @_;
|
||||
|
||||
my $status = catalog_status_threshold_ng($self, %options);
|
||||
if (defined($self->{instance_mode}->{resources_check}->{ $self->{result_values}->{name} })
|
||||
&& $self->{instance_mode}->{resources_check}->{ $self->{result_values}->{name} } ne $self->{result_values}->{node}) {
|
||||
return $self->{output}->get_most_critical(status => [ $status, 'warning' ]);
|
||||
}
|
||||
|
||||
return $status;
|
||||
}
|
||||
|
||||
sub custom_connection_status_output {
|
||||
my ($self, %options) = @_;
|
||||
|
||||
return sprintf(
|
||||
'connection status: %s [error: %s]',
|
||||
$self->{result_values}->{connection_status},
|
||||
$self->{result_values}->{connection_error}
|
||||
);
|
||||
}
|
||||
|
||||
sub custom_quorum_status_output {
|
||||
my ($self, %options) = @_;
|
||||
|
||||
return sprintf(
|
||||
'quorum status: %s',
|
||||
$self->{result_values}->{quorum_status}
|
||||
);
|
||||
}
|
||||
|
||||
sub custom_nodes_online_output {
|
||||
my ($self, %options) = @_;
|
||||
|
||||
return sprintf(
|
||||
'online: %s [%s]',
|
||||
$self->{result_values}->{online},
|
||||
$self->{result_values}->{online_names}
|
||||
);
|
||||
}
|
||||
|
||||
sub custom_nodes_offline_output {
|
||||
my ($self, %options) = @_;
|
||||
|
||||
return sprintf(
|
||||
'offline: %s [%s]',
|
||||
$self->{result_values}->{offline},
|
||||
$self->{result_values}->{offline_names}
|
||||
);
|
||||
}
|
||||
|
||||
sub custom_nodes_standby_output {
|
||||
my ($self, %options) = @_;
|
||||
|
||||
return sprintf(
|
||||
'standby: %s [%s]',
|
||||
$self->{result_values}->{standby},
|
||||
$self->{result_values}->{standby_names}
|
||||
);
|
||||
}
|
||||
|
||||
sub custom_resource_status_output {
|
||||
my ($self, %options) = @_;
|
||||
|
||||
return sprintf(
|
||||
'status: %s [node: %s] [unmanaged: %s]',
|
||||
$self->{result_values}->{status},
|
||||
$self->{result_values}->{node},
|
||||
$self->{result_values}->{is_unmanaged}
|
||||
);
|
||||
}
|
||||
|
||||
sub custom_clone_resource_status_output {
|
||||
my ($self, %options) = @_;
|
||||
|
||||
return sprintf(
|
||||
'status: %s [masters: %s] [slaves: %s] [unmanaged: %s]',
|
||||
$self->{result_values}->{status},
|
||||
$self->{result_values}->{masters_nodes_name},
|
||||
$self->{result_values}->{slaves_nodes_name},
|
||||
$self->{result_values}->{is_unmanaged}
|
||||
);
|
||||
}
|
||||
|
||||
sub prefix_rsc_output {
|
||||
my ($self, %options) = @_;
|
||||
|
||||
return "resource '" . $options{instance_value}->{name} . "' ";
|
||||
}
|
||||
|
||||
sub prefix_clone_rsc_output {
|
||||
my ($self, %options) = @_;
|
||||
|
||||
return "clone resource '" . $options{instance_value}->{name} . "' ";
|
||||
}
|
||||
|
||||
sub prefix_nodes_output {
|
||||
my ($self, %options) = @_;
|
||||
|
||||
return 'nodes ';
|
||||
}
|
||||
|
||||
sub cluster_long_output {
|
||||
my ($self, %options) = @_;
|
||||
|
||||
return 'checking cluster';
|
||||
}
|
||||
|
||||
sub set_counters {
|
||||
my ($self, %options) = @_;
|
||||
|
||||
$self->{maps_counters_type} = [
|
||||
{ name => 'cluster', type => 3, cb_long_output => 'cluster_long_output', indent_long_output => ' ',
|
||||
group => [
|
||||
{ name => 'connection', type => 0, display_short => 0, skipped_code => { -10 => 1 } },
|
||||
{ name => 'quorum', type => 0, display_short => 0, skipped_code => { -10 => 1 } },
|
||||
{ name => 'nodes', type => 0, display_short => 0, cb_prefix_output => 'prefix_nodes_output', skipped_code => { -10 => 1 } },
|
||||
{ name => 'actions', type => 0, display_short => 0, skipped_code => { -10 => 1 } },
|
||||
]
|
||||
},
|
||||
{ name => 'resources', type => 1, display_short => 0, cb_prefix_output => 'prefix_rsc_output', skipped_code => { -10 => 1 } },
|
||||
{ name => 'clone_resources', type => 1, display_short => 0, cb_prefix_output => 'prefix_clone_rsc_output', skipped_code => { -10 => 1 } },
|
||||
];
|
||||
|
||||
$self->{maps_counters}->{connection} = [
|
||||
{ label => 'connection-status', type => 2, critical_default => '%{connection_status} =~ /failed/i', set => {
|
||||
key_values => [ { name => 'connection_status' }, { name => 'connection_error' } ],
|
||||
closure_custom_output => $self->can('custom_connection_status_output'),
|
||||
closure_custom_perfdata => sub { return 0; },
|
||||
closure_custom_threshold_check => \&catalog_status_threshold_ng
|
||||
}
|
||||
}
|
||||
];
|
||||
|
||||
$self->{maps_counters}->{quorum} = [
|
||||
{ label => 'quorum-status', type => 2, critical_default => '%{quorum_status} =~ /noQuorum/i', set => {
|
||||
key_values => [ { name => 'quorum_status' } ],
|
||||
closure_custom_output => $self->can('custom_quorum_status_output'),
|
||||
closure_custom_perfdata => sub { return 0; },
|
||||
closure_custom_threshold_check => \&catalog_status_threshold_ng
|
||||
}
|
||||
}
|
||||
];
|
||||
|
||||
$self->{maps_counters}->{nodes} = [
|
||||
{ label => 'nodes-online', nlabel => 'cluster.nodes.online.count', set => {
|
||||
key_values => [ { name => 'online' }, { name => 'online_names' } ],
|
||||
closure_custom_output => $self->can('custom_nodes_online_output'),
|
||||
perfdatas => [
|
||||
{ template => '%s', min => 0 }
|
||||
]
|
||||
}
|
||||
},
|
||||
{ label => 'nodes-offline', nlabel => 'cluster.nodes.offline.count', set => {
|
||||
key_values => [ { name => 'offline' }, { name => 'offline_names' } ],
|
||||
closure_custom_output => $self->can('custom_nodes_offline_output'),
|
||||
perfdatas => [
|
||||
{ template => '%s', min => 0 }
|
||||
]
|
||||
}
|
||||
},
|
||||
{ label => 'nodes-standby', nlabel => 'cluster.nodes.standby.count', set => {
|
||||
key_values => [ { name => 'standby' }, { name => 'standby_names' } ],
|
||||
closure_custom_output => $self->can('custom_nodes_standby_output'),
|
||||
perfdatas => [
|
||||
{ template => '%s', min => 0 }
|
||||
]
|
||||
}
|
||||
}
|
||||
];
|
||||
|
||||
$self->{maps_counters}->{actions} = [
|
||||
{ label => 'cluster-actions-failed', nlabel => 'cluster.actions.failed.count', set => {
|
||||
key_values => [ { name => 'failed' } ],
|
||||
output_template => 'actions failed: %s',
|
||||
perfdatas => [
|
||||
{ template => '%s', min => 0 }
|
||||
]
|
||||
}
|
||||
}
|
||||
];
|
||||
|
||||
$self->{maps_counters}->{resources} = [
|
||||
{ label => 'resource-status', type => 2, critical_default => '%{status} =~ /stopped|failed/i', set => {
|
||||
key_values => [ { name => 'status' }, { name => 'is_unmanaged' }, { name => 'node' }, { name => 'name' } ],
|
||||
closure_custom_output => $self->can('custom_resource_status_output'),
|
||||
closure_custom_perfdata => sub { return 0; },
|
||||
closure_custom_threshold_check => $self->can('custom_resource_threshold')
|
||||
}
|
||||
},
|
||||
{ label => 'resource-actions-failed', nlabel => 'resource.actions.failed.count', set => {
|
||||
key_values => [ { name => 'failed_actions' } ],
|
||||
output_template => 'actions failed: %s',
|
||||
perfdatas => [
|
||||
{ template => '%s', min => 0, label_extra_instance => 1 }
|
||||
]
|
||||
}
|
||||
},
|
||||
{ label => 'resource-migration-failed', nlabel => 'resource.migration.failed.count', set => {
|
||||
key_values => [ { name => 'failed_migration' } ],
|
||||
output_template => 'migration failed: %s',
|
||||
perfdatas => [
|
||||
{ template => '%s', min => 0, label_extra_instance => 1 }
|
||||
]
|
||||
}
|
||||
}
|
||||
];
|
||||
|
||||
$self->{maps_counters}->{clone_resources} = [
|
||||
{ label => 'clone-resource-status', type => 2, critical_default => '%{status} =~ /stopped|failed/i', set => {
|
||||
key_values => [
|
||||
{ name => 'name' }, { name => 'status' }, { name => 'is_unmanaged' },
|
||||
{ name => 'masters_nodes_name' }, { name => 'slaves_nodes_name' }
|
||||
],
|
||||
closure_custom_output => $self->can('custom_clone_resource_status_output'),
|
||||
closure_custom_perfdata => sub { return 0; },
|
||||
closure_custom_threshold_check => \&catalog_status_threshold_ng
|
||||
}
|
||||
},
|
||||
{ label => 'clone-resource-actions-failed', nlabel => 'clone_resource.actions.failed.count', set => {
|
||||
key_values => [ { name => 'failed_actions' } ],
|
||||
output_template => 'actions failed: %s',
|
||||
perfdatas => [
|
||||
{ template => '%s', min => 0, label_extra_instance => 1 }
|
||||
]
|
||||
}
|
||||
},
|
||||
{ label => 'clone-resource-migration-failed', nlabel => 'clone_resource.migration.failed.count', set => {
|
||||
key_values => [ { name => 'failed_migration' } ],
|
||||
output_template => 'migration failed: %s',
|
||||
perfdatas => [
|
||||
{ template => '%s', min => 0, label_extra_instance => 1 }
|
||||
]
|
||||
}
|
||||
}
|
||||
];
|
||||
}
|
||||
|
||||
sub new {
|
||||
my ($class, %options) = @_;
|
||||
my $self = $class->SUPER::new(package => __PACKAGE__, %options);
|
||||
my $self = $class->SUPER::new(package => __PACKAGE__, %options, force_new_perfdata => 1);
|
||||
bless $self, $class;
|
||||
|
||||
$options{options}->add_options(arguments => {
|
||||
'warning' => { name => 'warning' },
|
||||
'standbyignore' => { name => 'standbyignore' },
|
||||
'resources:s' => { name => 'resources' },
|
||||
'ignore-stopped-clone:s' => { name => 'ignore_stopped_clone' },
|
||||
'filter-resource-name:s' => { name => 'filter_resource_name' },
|
||||
'resources:s' => { name => 'resources' }, # legacy
|
||||
'ignore-failed-actions:s@' => { name => 'ignore_failed_actions' }
|
||||
});
|
||||
|
||||
$self->{threshold} = 'CRITICAL';
|
||||
$self->{resources_check} = {};
|
||||
return $self;
|
||||
}
|
||||
|
||||
sub check_options {
|
||||
my ($self, %options) = @_;
|
||||
$self->SUPER::init(%options);
|
||||
$self->SUPER::check_options(%options);
|
||||
|
||||
if (defined($self->{option_results}->{warning})) {
|
||||
$self->{threshold} = 'WARNING';
|
||||
}
|
||||
if (defined($self->{option_results}->{resources})) {
|
||||
foreach (split(/,/, $self->{option_results}->{resources})) {
|
||||
my ($rsc_name, $node) = split(/:/, $_);
|
||||
|
@ -61,115 +293,151 @@ sub check_options {
|
|||
}
|
||||
}
|
||||
|
||||
sub parse_output {
|
||||
sub parse_crm {
|
||||
my ($self, %options) = @_;
|
||||
|
||||
my @standby;
|
||||
$self->{output}->output_add(
|
||||
severity => 'OK',
|
||||
short_msg => "Cluster is OK"
|
||||
);
|
||||
|
||||
$self->{cluster} = {
|
||||
global => {
|
||||
connection => {
|
||||
connection_status => 'ok',
|
||||
connection_error => '-',
|
||||
},
|
||||
quorum => {
|
||||
quorum_status => '-'
|
||||
},
|
||||
nodes => {
|
||||
online => 0,
|
||||
online_names => '',
|
||||
offline => 0,
|
||||
offline_names => '',
|
||||
standby => 0,
|
||||
standby_names => '',
|
||||
},
|
||||
actions => {
|
||||
failed => 0
|
||||
}
|
||||
}
|
||||
};
|
||||
$self->{resources} = {};
|
||||
$self->{clone_resources} = {};
|
||||
|
||||
my @lines = split /\n/, $options{crm_out};
|
||||
foreach my $line (@lines) {
|
||||
if ($line =~ /Connection to cluster failed\:(.*)/i ) {
|
||||
$self->{output}->output_add(
|
||||
severity => 'CRITICAL',
|
||||
short_msg => "Connection to cluster FAILED: $1"
|
||||
);
|
||||
return ;
|
||||
} elsif ($line =~ /Current DC:/) {
|
||||
if ($line !~ m/partition with quorum$/ ) {
|
||||
$self->{output}->output_add(
|
||||
severity => 'CRITICAL',
|
||||
short_msg => "No Quorum"
|
||||
);
|
||||
my $num_lines = scalar(@lines);
|
||||
for (my $i = 0; $i < $num_lines; $i++) {
|
||||
if ($lines[$i] =~ /Connection to cluster failed\:(.*)/i ) {
|
||||
$self->{cluster}->{global}->{connection}->{connection_status} = 'failed';
|
||||
$self->{cluster}->{global}->{connection}->{connection_error} = $1;
|
||||
} elsif ($lines[$i] =~ /Current DC:/) {
|
||||
$self->{cluster}->{global}->{quorum}->{quorum_status} = 'ok';
|
||||
if ($lines[$i] !~ /partition with quorum$/) {
|
||||
$self->{cluster}->{global}->{quorum}->{quorum_status} = 'noQuorum';
|
||||
}
|
||||
} elsif ($line =~ /^offline:\s*\[\s*(\S.*?)\s*\]/i) {
|
||||
# Count offline nodes
|
||||
my @offline = split( /\s+/, $1 );
|
||||
my $numoffline = scalar @offline;
|
||||
$self->{output}->output_add(
|
||||
severity => $self->{threshold},
|
||||
short_msg => "$numoffline Nodes Offline"
|
||||
);
|
||||
} elsif ($line =~ /^node\s+(\S.*):\s*standby/i) {
|
||||
push @standby, $1;
|
||||
} elsif ($line =~ /\s*([0-9a-zA-Z_\-]+)\s+\(\S+\)\:\s+Started\s+([0-9a-zA-Z_\-]+)/) {
|
||||
# Check Resources pos
|
||||
if (defined($self->{resources_check}->{$1}) && $self->{resources_check}->{$1} ne $2) {
|
||||
$self->{output}->output_add(
|
||||
severity => $self->{threshold},
|
||||
short_msg => "Resource '$1' is started on node '$2'"
|
||||
);
|
||||
} elsif ($lines[$i] =~ /^(offline|online):\s*\[\s*(.*?)\s*\]/i) {
|
||||
my @nodes = split(/\s+/, $2);
|
||||
$self->{cluster}->{global}->{nodes}->{lc($1)} = scalar(@nodes);
|
||||
$self->{cluster}->{global}->{nodes}->{lc($1) . '_names'} = ' ' . join(' ', @nodes);
|
||||
} elsif ($lines[$i] =~ /^node\s+(\S+?):\s*standby/i) {
|
||||
$self->{cluster}->{global}->{nodes}->{standby}++;
|
||||
$self->{cluster}->{global}->{nodes}->{standby_names} .= ' ' . $1;
|
||||
} elsif ($lines[$i] =~ /\s*([0-9a-zA-Z_\-]+)\s+\(\S+\)\:\s+Started\s+([0-9a-zA-Z_\-]+)/) {
|
||||
my ($name, $node) = ($1, $2);
|
||||
if (defined($self->{option_results}->{filter_resource_name}) && $self->{option_results}->{filter_resource_name} ne '' &&
|
||||
$name !~ /$self->{option_results}->{filter_resource_name}/) {
|
||||
$self->{output}->output_add(long_msg => "skipping '" . $name . "': no matching filter.", debug => 1);
|
||||
next;
|
||||
}
|
||||
$self->{output}->output_add(long_msg => "Resource '$1' is started on node '$2'");
|
||||
} elsif ($line =~ /\s*([0-9a-zA-Z_\-]+)\s+\(\S+\)\:\s+Stopped/ || $line =~ /\s*([0-9a-zA-Z_\-]+)\s+\(\S+\)\:\s+\(\S+\)\s+Stopped/) {
|
||||
$self->{output}->output_add(
|
||||
severity => $self->{threshold},
|
||||
short_msg => "Resource '$1' is stopped",
|
||||
long_msg => "Resource '$1' is stopped"
|
||||
);
|
||||
} elsif ($line =~ m/\s*stopped\:\s*\[\s*(.*)\s*\]/i) {
|
||||
# Check Master/Slave stopped
|
||||
my @stopped = ();
|
||||
foreach my $node (split /\s+/, $1) {
|
||||
if (!defined($self->{option_results}->{ignore_stopped_clone}) || $self->{option_results}->{ignore_stopped_clone} eq '' ||
|
||||
$node !~ /$self->{option_results}->{ignore_stopped_clone}/) {
|
||||
push @stopped, $node;
|
||||
$self->{resources}->{$name} = { name => $name, failed_actions => 0, failed_migration => 0, status => 'started', node => $node, is_unmanaged => 'no' };
|
||||
$self->{resources}->{$name}->{is_unmanaged} = 'yes' if ($lines[$i] =~ /unmanaged/);
|
||||
$self->{resources}->{$name}->{status} = 'failed' if ($lines[$i] =~ /FAILED/i);
|
||||
} elsif ($lines[$i] =~ /\s*([0-9a-zA-Z_\-]+)\s+\(\S+\)\:\s+Stopped/ || $lines[$i] =~ /\s*([0-9a-zA-Z_\-]+)\s+\(\S+\)\:\s+\(\S+\)\s+Stopped/) {
|
||||
my $name = $1;
|
||||
if (defined($self->{option_results}->{filter_resource_name}) && $self->{option_results}->{filter_resource_name} ne '' &&
|
||||
$name !~ /$self->{option_results}->{filter_resource_name}/) {
|
||||
$self->{output}->output_add(long_msg => "skipping '" . $name . "': no matching filter.", debug => 1);
|
||||
next;
|
||||
}
|
||||
$self->{resources}->{$name} = { name => $name, failed_actions => 0, failed_migration => 0, status => 'stopped', node => '-', is_unmanaged => 'no' };
|
||||
$self->{resources}->{$name}->{is_unmanaged} = 'yes' if ($lines[$i] =~ /unmanaged/);
|
||||
$self->{resources}->{$name}->{status} = 'failed' if ($lines[$i] =~ /FAILED/i);
|
||||
} elsif ($lines[$i] =~ /Master\/Slave.*\[(.*)\]/i) {
|
||||
#Master/Slave Set: ms_mysql-master [ms_mysql]
|
||||
# ms_mysql (ocf::heartbeat:mysql-centreon): FAILED node-db-passive
|
||||
# Masters: [ node-db-active ]
|
||||
# Stopped: [ node-map-active node-map-passive ]
|
||||
|
||||
#Master/Slave Set: ms_mysql-master [ms_mysql]
|
||||
# Masters: [ node-db-active ]
|
||||
# Slaves: [ node-db-passive ]
|
||||
# Stopped: [ node-map-active node-map-passive ]
|
||||
|
||||
#Master/Slave Set: ms_mysql-master [ms_mysql]
|
||||
# ms_mysql (ocf::heartbeat:mysql-centreon): Master node-db-active (unmanaged)
|
||||
# ms_mysql (ocf::heartbeat:mysql-centreon): Slave node-db-passive (unmanaged)
|
||||
# Stopped: [ cps-map-active cps-map-passive ]
|
||||
my $name = $1;
|
||||
if (defined($self->{option_results}->{filter_resource_name}) && $self->{option_results}->{filter_resource_name} ne '' &&
|
||||
$name !~ /$self->{option_results}->{filter_resource_name}/) {
|
||||
$self->{output}->output_add(long_msg => "skipping '" . $name . "': no matching filter.", debug => 1);
|
||||
next;
|
||||
}
|
||||
|
||||
$self->{clone_resources}->{$name} = {
|
||||
name => $name,
|
||||
failed_actions => 0,
|
||||
failed_migration => 0,
|
||||
status => 'ok',
|
||||
is_unmanaged => 'no',
|
||||
masters_nodes_name => '',
|
||||
slaves_nodes => 0,
|
||||
slaves_nodes_name => '',
|
||||
stopped_nodes => 0,
|
||||
stopped_nodes_name => ''
|
||||
};
|
||||
for (; $i < $num_lines; $i++) {
|
||||
if ($lines[$i + 1] =~ /^\s+(masters|slaves|stopped):\s*\[\s*(.*?)\s*\]/i) {
|
||||
my $type = lc($1);
|
||||
my @nodes = split(/\s+/, $2);
|
||||
$self->{clone_resources}->{$name}->{$type . '_nodes'} = scalar(@nodes);
|
||||
$self->{clone_resources}->{$name}->{$type . '_nodes_name'} = join(' ', @nodes);
|
||||
} elsif ($lines[$i + 1] =~ /^\s+$name\s+.*unmanaged/) {
|
||||
$self->{clone_resources}->{$name}->{is_unmanaged} = 'yes';
|
||||
} elsif ($lines[$i + 1] =~ /^\s+$name\s+.*FAILED/i) {
|
||||
$self->{clone_resources}->{$name}->{status} = 'failed';
|
||||
} else {
|
||||
last;
|
||||
}
|
||||
}
|
||||
if (scalar(@stopped) > 0) {
|
||||
$self->{output}->output_add(
|
||||
severity => $self->{threshold},
|
||||
short_msg => join(' ', @stopped) . " Stopped"
|
||||
);
|
||||
}
|
||||
} elsif ($line =~ /^Failed actions\:/) {
|
||||
# Check Failed Actions
|
||||
my $error = 0;
|
||||
foreach my $line_failed_action (shift @lines) {
|
||||
} elsif ($lines[$i] =~ /^Failed\s+(?:(Resource|Fencing)\s+)?actions:/i) {
|
||||
for (; $i < $num_lines; $i++) {
|
||||
last if ($lines[$i + 1] !~ /^\*\s+/);
|
||||
my $skip = 0;
|
||||
foreach (@{$self->{option_results}->{ignore_failed_actions}}) {
|
||||
if ($line_failed_action =~ /$_/) {
|
||||
if ($lines[$i + 1] =~ /$_/) {
|
||||
$skip = 1;
|
||||
last;
|
||||
}
|
||||
}
|
||||
if ($skip == 0) {
|
||||
$error = 1;
|
||||
last;
|
||||
next if ($skip == 1);
|
||||
|
||||
if ($lines[$i + 1] =~ /^\*\s+(\S+?)_(start|stop|status|monitor|promote|demote)_/) {
|
||||
$self->{clone_resources}->{$1}->{failed_actions}++
|
||||
if (defined($self->{clone_resources}->{$1}));
|
||||
$self->{resources}->{$1}->{failed_actions}++
|
||||
if (defined($self->{resources}->{$1}));
|
||||
}
|
||||
|
||||
$self->{cluster}->{global}->{actions}->{failed}++;
|
||||
}
|
||||
if ($error == 1) {
|
||||
$self->{output}->output_add(
|
||||
severity => 'CRITICAL',
|
||||
short_msg => "FAILED actions detected or not cleaned up"
|
||||
);
|
||||
}
|
||||
} elsif ($line =~ /\s*(\S+?)\s+ \(.*\)\:\s+\w+\s+\w+\s+\(unmanaged\)\s+FAILED/) {
|
||||
# Check Unmanaged
|
||||
$self->{output}->output_add(
|
||||
severity => 'CRITICAL',
|
||||
short_msg => "$1 unmanaged FAILED"
|
||||
);
|
||||
} elsif ($line =~ /\s*(\S+?):.*(fail-count=\d+)/i) {
|
||||
# Check for resource Fail count
|
||||
$self->{output}->output_add(
|
||||
severity => 'WARNING',
|
||||
short_msg => "$1 failure detected, $2"
|
||||
);
|
||||
} elsif ($lines[$i] =~ /\s*(\S+?):.*migration.*fail-count=(\d+)/i) {
|
||||
$self->{clone_resources}->{$1}->{failed_migration} += $2
|
||||
if (defined($self->{clone_resources}->{$1}));
|
||||
$self->{resources}->{$1}->{failed_migration} += $2
|
||||
if (defined($self->{resources}->{$1}));
|
||||
}
|
||||
}
|
||||
|
||||
if (scalar(@standby) > 0 && !defined($self->{option_results}->{standbyignore})) {
|
||||
$self->{output}->output_add(
|
||||
severity => $self->{threshold},
|
||||
short_msg => join( ', ', @standby ) . " in Standby"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
sub run {
|
||||
sub manage_selection {
|
||||
my ($self, %options) = @_;
|
||||
|
||||
my ($stdout) = $options{custom}->execute_command(
|
||||
|
@ -177,11 +445,9 @@ sub run {
|
|||
command_path => '/usr/sbin',
|
||||
command_options => '-1 -r -f 2>&1'
|
||||
);
|
||||
$self->parse_crm(crm_out => $stdout);
|
||||
|
||||
$self->parse_output(crm_out => $stdout);
|
||||
|
||||
$self->{output}->display();
|
||||
$self->{output}->exit();
|
||||
$self->{output}->output_add(short_msg => 'Cluster is ok');
|
||||
}
|
||||
|
||||
1;
|
||||
|
@ -190,34 +456,73 @@ __END__
|
|||
|
||||
=head1 MODE
|
||||
|
||||
Check Cluster Resource Manager (need 'crm_mon' command).
|
||||
Check cluster resource manager (need 'crm_mon' command).
|
||||
Should be executed on a cluster node.
|
||||
|
||||
Command used: /usr/sbin/crm_mon -1 -r -f 2>&1
|
||||
|
||||
=over 8
|
||||
|
||||
=item B<--warning>
|
||||
=item B<--filter-resource-name>
|
||||
|
||||
If failed Nodes, stopped Resources detected or Standby Nodes sends Warning instead of Critical (default)
|
||||
as long as there are no other errors and there is Quorum.
|
||||
Filter resource (also clone resource) by name (can be a regexp).
|
||||
|
||||
=item B<--standbyignore>
|
||||
=item B<--warning-connection-status>
|
||||
|
||||
Ignore any node(s) in standby, by default return threshold choosen.
|
||||
Set warning threshold for status.
|
||||
Can used special variables like: %{connection_status}, %{connection_error}
|
||||
|
||||
=item B<--critical-connection-status>
|
||||
|
||||
Set critical threshold for status (Default: '%{connection_status} =~ /failed/i').
|
||||
Can used special variables like: %{connection_status}, %{connection_error}
|
||||
|
||||
=item B<--warning-quorum-status>
|
||||
|
||||
Set warning threshold for status.
|
||||
Can used special variables like: %{quorum_status}
|
||||
|
||||
=item B<--critical-quorum-status>
|
||||
|
||||
Set critical threshold for status (Default: '%{quorum_status} =~ /noQuorum/i').
|
||||
Can used special variables like: %{quorum_status}
|
||||
|
||||
=item B<--warning-resource-status>
|
||||
|
||||
Set warning threshold for status.
|
||||
Can used special variables like: %{name}, %{status}, %{node}, %{is_unmanaged}
|
||||
|
||||
=item B<--critical-resource-status>
|
||||
|
||||
Set critical threshold for status (Default: '%{status} =~ /stopped|failed/i').
|
||||
Can used special variables like: %{name}, %{status}, %{node}, %{is_unmanaged}
|
||||
|
||||
=item B<--warning-clone-resource-status>
|
||||
|
||||
Set warning threshold for status.
|
||||
Can used special variables like: %{name}, %{status}, %{masters_nodes_name}, %{slaves_nodes_name}, %{is_unmanaged}
|
||||
|
||||
=item B<--critical-clone-resource-status>
|
||||
|
||||
Set critical threshold for status (Default: '%{status} =~ /stopped|failed/i').
|
||||
Can used special variables like: %{name}, %{status}, %{masters_nodes_name}, %{slaves_nodes_name}, %{is_unmanaged}
|
||||
|
||||
=item B<--ignore-failed-actions>
|
||||
|
||||
Failed actions errors (that match) are skipped.
|
||||
|
||||
=item B<--resources>
|
||||
|
||||
If resources not started on the node specified, send a warning message:
|
||||
(format: <rsc_name>:<node>,<rsc_name>:<node>,...)
|
||||
|
||||
=item B<--ignore-stopped-clone>
|
||||
=item B<--warning-*> B<--critical-*>
|
||||
|
||||
Stopped clone resource on nodes (that match) are skipped.
|
||||
|
||||
=item B<--ignore-failed-actions>
|
||||
|
||||
Failed actions errors (that match) are skipped.
|
||||
Thresholds.
|
||||
Can be: 'cluster-actions-failed',
|
||||
'clone-resource-actions-failed', 'clone-resource-migration-failed',
|
||||
'nodes-online', 'nodes-offline', 'nodes-standby',
|
||||
'resource-actions-failed', 'resource-migration-failed'.
|
||||
|
||||
=back
|
||||
|
||||
|
|
|
@ -315,6 +315,7 @@ sub run_instances {
|
|||
return undef if (defined($options{config}->{cb_init}) && $self->call_object_callback(method_name => $options{config}->{cb_init}) == 1);
|
||||
my $cb_init_counters = $self->get_callback(method_name => $options{config}->{cb_init_counters});
|
||||
my $display_status_lo = defined($options{display_status_long_output}) && $options{display_status_long_output} == 1 ? 1 : 0;
|
||||
my $display_short = (!defined($options{config}->{display_short}) || $options{config}->{display_short} != 0) ? 1 : 0;
|
||||
my $resume = defined($options{resume}) && $options{resume} == 1 ? 1 : 0;
|
||||
my $no_message_multiple = 1;
|
||||
|
||||
|
@ -402,12 +403,14 @@ sub run_instances {
|
|||
}
|
||||
|
||||
if ($self->{multiple} == 0) {
|
||||
$self->{output}->output_add(short_msg => $prefix_output . $long_msg . $suffix_output);
|
||||
$self->{output}->output_add(short_msg => $prefix_output . $long_msg . $suffix_output)
|
||||
if ($display_short == 1);
|
||||
}
|
||||
}
|
||||
|
||||
if ($no_message_multiple == 0 && $self->{multiple} == 1 && $resume == 0) {
|
||||
$self->{output}->output_add(short_msg => $options{config}->{message_multiple});
|
||||
$self->{output}->output_add(short_msg => $options{config}->{message_multiple})
|
||||
if ($display_short == 1);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue