From f31d177eb1677a0b1fba030540f9135ed0e5ffe5 Mon Sep 17 00:00:00 2001 From: tcharles Date: Thu, 25 May 2023 10:19:25 +0200 Subject: [PATCH] apps::monitoring::splunk - improve splunk api handling (#4407) --- src/apps/monitoring/splunk/custom/api.pm | 60 +++++++++++++++++------- 1 file changed, 44 insertions(+), 16 deletions(-) diff --git a/src/apps/monitoring/splunk/custom/api.pm b/src/apps/monitoring/splunk/custom/api.pm index eee5369be..4badfc23a 100644 --- a/src/apps/monitoring/splunk/custom/api.pm +++ b/src/apps/monitoring/splunk/custom/api.pm @@ -54,7 +54,9 @@ sub new { 'timeout:s' => { name => 'timeout' }, 'unknown-http-status:s' => { name => 'unknown_http_status' }, 'warning-http-status:s' => { name => 'warning_http_status' }, - 'critical-http-status:s' => { name => 'critical_http_status' } + 'critical-http-status:s' => { name => 'critical_http_status' }, + 'splunk-retries:s' => { name => 'splunk_retries' }, + 'splunk-wait:s' => { name => 'splunk_wait' } }); } $options{options}->add_help(package => __PACKAGE__, sections => 'XMLAPI OPTIONS', once => 1); @@ -86,6 +88,8 @@ sub check_options { $self->{unknown_http_status} = (defined($self->{option_results}->{unknown_http_status})) ? $self->{option_results}->{unknown_http_status} : '%{http_code} < 200 or %{http_code} >= 300'; $self->{warning_http_status} = (defined($self->{option_results}->{warning_http_status})) ? $self->{option_results}->{warning_http_status} : ''; $self->{critical_http_status} = (defined($self->{option_results}->{critical_http_status})) ? $self->{option_results}->{critical_http_status} : ''; + $self->{splunk_retries} = (defined($self->{option_results}->{splunk_retries})) ? $self->{option_results}->{splunk_retries} : 5; + $self->{splunk_wait} = (defined($self->{option_results}->{splunk_wait})) ? $self->{option_results}->{splunk_wait} : 2; if ($self->{hostname} eq '') { $self->{output}->add_option_msg(short_msg => 'Need to specify hostname option.'); @@ -200,7 +204,7 @@ sub get_access_token { $self->{output}->add_option_msg(short_msg => 'error retrieving session_token'); $self->{output}->option_exit(); } - + $session_token = $xml_result->{sessionKey}; my $datas = { session_token => $session_token }; @@ -251,7 +255,6 @@ sub get_splunkd_health { } return \@splunkd_features_health; - } sub query_count { @@ -270,27 +273,44 @@ sub query_count { $self->{output}->option_exit(); } - sleep(1.5); + my $retries = 0; + my $is_done = 0; - my $query_status = $self->request_api( - method => 'GET', - endpoint => '/services/search/jobs/' . $query_sid->{sid}, - ); + while ($retries < $self->{http}->{options}->{splunk_retries}) { + my $query_status = $self->request_api( + method => 'GET', + endpoint => '/services/search/jobs/' . $query_sid->{sid} + ); - foreach (@{$query_status->{content}->{'s:dict'}->{'s:key'}}) { - if ($_->{name} eq 'isDone' && $_->{content} == 0){ - $self->{output}->add_option_msg(short_msg => "Search command wasn't completed."); - $self->{output}->option_exit(); - } elsif ($_->{name} eq 'isFailed' && $_->{content} == 1) { - $self->{output}->add_option_msg(short_msg => "Search command failed."); - $self->{output}->option_exit(); + foreach (@{$query_status->{content}->{'s:dict'}->{'s:key'}}) { + if ($_->{name} eq 'isDone' && $_->{content} == 1){ + $is_done = 1; + last; + } elsif ($_->{name} eq 'isFailed' && $_->{content} == 1) { + $self->{output}->add_option_msg(short_msg => "Search command failed."); + $self->{output}->option_exit(); + } } + + if ($is_done) { + last; + } + + $retries++; + sleep($self->{http}->{options}->{splunk_wait}); + } + + # it took too long to run query + if (!$is_done) { + $self->{output}->add_option_msg(short_msg => "Search command didn't finish in time. Considere tweaking --splunk-wait and --splunk-retries if the search is just slow"); + $self->{output}->option_exit(); } my $query_res = $self->request_api( method => 'GET', - endpoint => '/services/search/jobs/' . $query_sid->{sid} . '/results', + endpoint => '/services/search/jobs/' . $query_sid->{sid} . '/results' ); + my $query_count = $query_res->{result}->{field}->{value}->{text}; return $query_count; @@ -387,6 +407,14 @@ Specify api password. Set HTTP timeout. +=item B<--splunk-retries> + +How many times we should retry queries to splunk. To use in par with the --splunk-wait paramater (Default: 5) + +=item B<--splunk-wait> + +How long (in seconds) should we wait between each retry. To use in par with the --splunk-retries paramater (Default: 2) + =back =head1 DESCRIPTION