2013-12-13 16:14:12 +01:00
#
2017-01-09 17:12:12 +01:00
# Copyright 2017 Centreon (http://www.centreon.com/)
2015-07-21 11:51:02 +02:00
#
# Centreon is a full-fledged industry-strength solution that meets
# the needs in IT infrastructure and application monitoring for
# service performance.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
2013-12-13 16:14:12 +01:00
package database::mysql::mode::replicationmasterslave ;
use base qw( centreon::plugins::mode ) ;
use strict ;
use warnings ;
sub new {
my ( $ class , % options ) = @ _ ;
my $ self = $ class - > SUPER:: new ( package = > __PACKAGE__ , % options ) ;
bless $ self , $ class ;
$ self - > { version } = '1.0' ;
$ options { options } - > add_options ( arguments = >
{
"warning:s" = > { name = > 'warning' , } ,
"critical:s" = > { name = > 'critical' , } ,
} ) ;
return $ self ;
}
sub check_options {
my ( $ self , % options ) = @ _ ;
$ self - > SUPER:: init ( % options ) ;
if ( ( $ self - > { perfdata } - > threshold_validate ( label = > 'warning' , value = > $ self - > { option_results } - > { warning } ) ) == 0 ) {
2016-04-01 11:26:58 +02:00
$ self - > { output } - > add_option_msg ( short_msg = > "Wrong warning threshold '" . $ self - > { option_results } - > { warning } . "'." ) ;
$ self - > { output } - > option_exit ( ) ;
2013-12-13 16:14:12 +01:00
}
if ( ( $ self - > { perfdata } - > threshold_validate ( label = > 'critical' , value = > $ self - > { option_results } - > { critical } ) ) == 0 ) {
2016-04-01 11:26:58 +02:00
$ self - > { output } - > add_option_msg ( short_msg = > "Wrong critical threshold '" . $ self - > { option_results } - > { critical } . "'." ) ;
$ self - > { output } - > option_exit ( ) ;
2013-12-13 16:14:12 +01:00
}
}
sub run {
my ( $ self , % options ) = @ _ ;
if ( ref ( $ options { sql } ) ne 'ARRAY' ) {
$ self - > { output } - > add_option_msg ( short_msg = > "Need to use --multiple options." ) ;
$ self - > { output } - > option_exit ( ) ;
}
if ( scalar ( @ { $ options { sql } } ) < 2 ) {
$ self - > { output } - > add_option_msg ( short_msg = > "Need to specify two MySQL Server." ) ;
$ self - > { output } - > option_exit ( ) ;
}
my ( $ sql_one , $ sql_two ) = @ { $ options { sql } } ;
my ( $ slave_status , $ slave_status_error ) = ( 0 , "" ) ;
my ( $ position_status , $ position_status_error ) = ( 0 , "" ) ;
my ( $ connection_status_name_srv1 , $ connection_status_name_srv2 ) = ( $ sql_one - > get_id ( ) , $ sql_two - > get_id ( ) ) ;
my ( $ master_save , $ slave_save ) ;
my ( $ exit1 , $ msg_error1 ) = $ sql_one - > connect ( dontquit = > 1 ) ;
my ( $ exit2 , $ msg_error2 ) = $ sql_two - > connect ( dontquit = > 1 ) ;
$ self - > { output } - > output_add ( severity = > 'OK' ,
short_msg = > "No problems. Replication is ok." ) ;
if ( $ exit1 == - 1 ) {
$ self - > { output } - > output_add ( severity = > 'CRITICAL' ,
short_msg = > "Connection Status '" . $ sql_one - > get_id ( ) . "': " . $ msg_error1 ) ;
} else {
$ self - > { output } - > output_add ( long_msg = > "Connection Status '" . $ sql_one - > get_id ( ) . "' [OK]" ) ;
}
if ( $ exit2 == - 1 ) {
$ self - > { output } - > output_add ( severity = > 'CRITICAL' ,
short_msg = > "Connection Status '" . $ sql_two - > get_id ( ) . "': " . $ msg_error2 ) ;
} else {
$ self - > { output } - > output_add ( long_msg = > "Connection Status '" . $ sql_two - > get_id ( ) . "' [OK]" ) ;
}
#####
# Find SLAVE
#####
my ( $ total_srv1 , $ total_srv2 ) ;
my ( $ last_error1 , $ last_error2 ) ;
my ( $ io_thread_status_srv1 , $ sql_thread_status_srv1 ) ;
if ( $ exit1 != - 1 ) {
$ sql_one - > query ( query = > q{
SHOW SLAVE STATUS
} ) ;
my $ result = $ sql_one - > fetchrow_hashref ( ) ;
my $ slave_io_running = $ result - > { Slave_IO_Running } ;
my $ slave_sql_running = $ result - > { Slave_SQL_Running } ;
$ last_error1 = $ result - > { Last_Error } ;
if ( defined ( $ slave_io_running ) && $ slave_io_running =~ /^yes$/i ) {
$ io_thread_status_srv1 = 0 ;
} else {
$ io_thread_status_srv1 = 1 ;
}
if ( defined ( $ slave_sql_running ) && $ slave_sql_running =~ /^yes$/i ) {
$ sql_thread_status_srv1 = 0 ;
} else {
$ sql_thread_status_srv1 = 1 ;
}
} else {
$ io_thread_status_srv1 = 100 ;
$ sql_thread_status_srv1 = 100 ;
}
my ( $ io_thread_status_srv2 , $ sql_thread_status_srv2 ) ;
if ( $ exit2 != - 1 ) {
$ sql_two - > query ( query = > q{
SHOW SLAVE STATUS
} ) ;
my $ result = $ sql_two - > fetchrow_hashref ( ) ;
my $ slave_io_running = $ result - > { Slave_IO_Running } ;
my $ slave_sql_running = $ result - > { Slave_SQL_Running } ;
$ last_error2 = $ result - > { Last_Error } ;
if ( defined ( $ slave_io_running ) && $ slave_io_running =~ /^yes$/i ) {
$ io_thread_status_srv2 = 0 ;
} else {
$ io_thread_status_srv2 = 1 ;
}
if ( defined ( $ slave_sql_running ) && $ slave_sql_running =~ /^yes$/i ) {
$ sql_thread_status_srv2 = 0 ;
} else {
$ sql_thread_status_srv2 = 1 ;
}
} else {
$ io_thread_status_srv2 = 100 ;
$ sql_thread_status_srv2 = 100 ;
}
$ total_srv1 = $ io_thread_status_srv1 + $ sql_thread_status_srv1 ;
$ total_srv2 = $ io_thread_status_srv2 + $ sql_thread_status_srv2 ;
# Check If there is two slave
if ( $ total_srv1 < 2 && $ total_srv2 < 2 ) {
$ slave_status = 1 ;
$ slave_status_error = "Two slave. Need to have only one." ;
} else {
# Check if a thread is down
if ( $ total_srv1 == 1 ) {
$ slave_status = - 1 ;
$ slave_status_error = "A Replication thread is down on '" . $ sql_one - > get_id ( ) . "'." ;
if ( $ sql_thread_status_srv1 != 0 ) {
if ( defined ( $ last_error1 ) && $ last_error1 ne "" ) {
$ slave_status = 1 ;
$ slave_status_error . = " SQL Thread is stopped because of an error (error='" . $ last_error1 . "')." ;
}
}
}
if ( $ total_srv2 == 1 ) {
$ slave_status = - 1 ;
$ slave_status_error = "A Replication thread is down on '" . $ sql_two - > get_id ( ) . "'." ;
if ( $ sql_thread_status_srv2 != 0 ) {
if ( defined ( $ last_error2 ) && $ last_error2 ne "" ) {
$ slave_status = 1 ;
$ slave_status_error . = " SQL Thread is stopped because of an error (error='" . $ last_error2 . "')." ;
}
}
}
# Check if we need to SKIP
if ( $ io_thread_status_srv1 == 100 ) {
$ slave_status = - 1 ;
$ slave_status_error . = " Skip check on '" . $ sql_one - > get_id ( ) . "'." ;
}
if ( $ io_thread_status_srv2 == 100 ) {
$ slave_status = - 1 ;
$ slave_status_error . = " Skip check on '" . $ sql_two - > get_id ( ) . "'." ;
}
# Save Slave
if ( $ total_srv1 < 2 ) {
$ slave_save = $ sql_one ;
$ master_save = $ sql_two ;
}
if ( $ total_srv2 < 2 ) {
$ slave_save = $ sql_two ;
$ master_save = $ sql_one ;
}
if ( $ total_srv2 > 1 && $ total_srv1 > 1 ) {
$ slave_status = 1 ;
$ slave_status_error . = " No slave (maybe because we cannot check a server)." ;
}
}
####
# Check Slave position
####
if ( ! defined ( $ slave_save ) ) {
$ position_status = - 2 ;
$ position_status_error = "Skip because we can't identify a unique slave." ;
} else {
if ( $ master_save - > get_id ( ) eq $ connection_status_name_srv1 && $ exit1 == - 1 ) {
$ position_status = - 1 ;
$ position_status_error = "Can't get master position on '" . $ master_save - > get_id ( ) . "'." ;
} elsif ( $ master_save - > get_id ( ) eq $ connection_status_name_srv2 && $ exit2 == - 1 ) {
$ position_status = - 1 ;
$ position_status_error = "Can't get master position on '" . $ master_save - > get_id ( ) . "'." ;
} else {
# Get Master Position
$ master_save - > query ( query = > q{
SHOW MASTER STATUS
} ) ;
my $ result = $ master_save - > fetchrow_hashref ( ) ;
my $ master_file = $ result - > { File } ;
my $ master_position = $ result - > { Position } ;
$ slave_save - > query ( query = > q{
SHOW SLAVE STATUS
} ) ;
my $ result2 = $ slave_save - > fetchrow_hashref ( ) ;
my $ slave_file = $ result2 - > { Master_Log_File } ; # 'Master_Log_File'
my $ slave_position = $ result2 - > { Read_Master_Log_Pos } ; # 'Read_Master_Log_Pos'
my $ num_sec_lates = $ result2 - > { Seconds_Behind_Master } ;
2016-04-01 11:26:58 +02:00
my $ exit_code_sec = $ self - > { perfdata } - > threshold_check ( value = > $ num_sec_lates , threshold = > [ { label = > 'critical' , exit_litteral = > 'critical' } , { label = > 'warning' , exit_litteral = > 'warning' } ] ) ;
2013-12-13 16:14:12 +01:00
if ( ! $ self - > { output } - > is_status ( value = > $ exit_code_sec , compare = > 'ok' , litteral = > 1 ) ) {
$ self - > { output } - > output_add ( severity = > $ exit_code_sec ,
short_msg = > sprintf ( "Slave has %d seconds latency behind master" , $ num_sec_lates ) ) ;
}
$ self - > { output } - > perfdata_add ( label = > 'slave_latency' , unit = > 's' ,
value = > $ num_sec_lates ,
warning = > $ self - > { perfdata } - > get_perfdata_for_output ( label = > 'warning' ) ,
critical = > $ self - > { perfdata } - > get_perfdata_for_output ( label = > 'critical' ) ,
min = > 0 ) ;
my $ slave_sql_thread_ko = 1 ;
my $ slave_sql_thread_warning = 1 ;
my $ slave_sql_thread_ok = 1 ;
$ slave_save - > query ( query = > q{
SHOW FULL PROCESSLIST
} ) ;
while ( ( my $ row = $ slave_save - > fetchrow_hashref ( ) ) ) {
my $ state = $ row - > { State } ;
$ slave_sql_thread_ko = 0 if ( defined ( $ state ) && $ state =~ /^(Waiting to reconnect after a failed binlog dump request|Connecting to master|Reconnecting after a failed binlog dump request|Waiting to reconnect after a failed master event read|Waiting for the slave SQL thread to free enough relay log space)$/i ) ;
$ slave_sql_thread_warning = 0 if ( defined ( $ state ) && $ state =~ /^Waiting for the next event in relay log|Reading event from the relay log$/i ) ;
$ slave_sql_thread_ok = 0 if ( defined ( $ state ) && $ state =~ /^Has read all relay log; waiting for the slave I\/O thread to update it$/i ) ;
}
if ( $ slave_sql_thread_ko == 0 ) {
$ position_status = 1 ;
$ position_status_error . = " Slave replication has connection issue with the master." ;
} elsif ( ( $ master_file ne $ slave_file || $ master_position != $ slave_position ) && $ slave_sql_thread_warning == 0 ) {
$ position_status = - 1 ;
2016-04-01 11:26:58 +02:00
$ position_status_error . = " Slave replication is late but it's progressing." ;
2013-12-13 16:14:12 +01:00
} elsif ( ( $ master_file ne $ slave_file || $ master_position != $ slave_position ) && $ slave_sql_thread_ok == 0 ) {
$ position_status = - 1 ;
2016-04-01 11:26:58 +02:00
$ position_status_error . = " Slave replication is late but it's progressing." ;
} else {
$ master_file =~ /(\d+)$/ ;
my $ master_bin_num = $ 1 ;
$ slave_file =~ /(\d+)$/ ;
my $ slave_bin_num = $ 1 ;
my $ diff_binlog = abs ( $ master_bin_num - $ slave_bin_num ) ;
# surely of missconfiguration of the plugin
if ( $ diff_binlog > 1 && $ num_sec_lates < 10 ) {
$ position_status = - 3 ;
$ position_status_error . = " Surely a configuration problem of the plugin (not good master and slave server used)" ;
}
2013-12-13 16:14:12 +01:00
}
}
}
$ self - > replication_add ( $ slave_status , "Slave Thread Status" , $ slave_status_error ) ;
2016-04-01 11:26:58 +02:00
$ self - > replication_add ( $ position_status , "Position Status" , $ position_status_error ) ;
2013-12-13 16:14:12 +01:00
$ self - > { output } - > display ( ) ;
$ self - > { output } - > exit ( ) ;
}
sub replication_add {
2016-04-01 11:26:58 +02:00
my ( $ self , $ lstate , $ str_display , $ lerr ) = @ _ ;
my $ status ;
2013-12-13 16:14:12 +01:00
my $ status_msg ;
2016-04-01 11:26:58 +02:00
if ( $ lstate == 0 ) {
$ status = 'OK' ;
} elsif ( $ lstate == - 1 ) {
$ status = 'WARNING' ;
} elsif ( $ lstate == - 2 ) {
$ status = 'CRITICAL' ;
2013-12-13 16:14:12 +01:00
$ status_msg = 'SKIP' ;
2016-04-01 11:26:58 +02:00
} elsif ( $ lstate == - 3 ) {
$ status = 'UNKNOWN' ;
} else {
$ status = 'CRITICAL' ;
}
2013-12-13 16:14:12 +01:00
my $ output ;
2016-04-01 11:26:58 +02:00
if ( defined ( $ lerr ) && $ lerr ne "" ) {
$ output = $ str_display . " [" . ( defined ( $ status_msg ) ? $ status_msg : $ status ) . "] [" . $ lerr . "]" ;
} else {
$ output = $ str_display . " [" . ( defined ( $ status_msg ) ? $ status_msg : $ status ) . "]" ;
}
2013-12-13 16:14:12 +01:00
if ( ! $ self - > { output } - > is_status ( value = > $ status , compare = > 'ok' , litteral = > 1 ) ) {
$ self - > { output } - > output_add ( severity = > $ status ,
short_msg = > $ output ) ;
}
2016-04-01 11:26:58 +02:00
$ self - > { output } - > output_add ( long_msg = > $ output ) ;
2013-12-13 16:14:12 +01:00
}
1 ;
__END__
= head1 MODE
Check MySQL replication master / slave ( need to use - - multiple ) .
= over 8
= item B <--warning>
Threshold warning in seconds ( slave latency ) .
= item B <--critical>
Threshold critical in seconds ( slave latency ) .
= back
= cut