Add -t | --timezone and TimeZone configuration directive to be able to change the timezone. When set, SquidAnalyzer will read time from log file as UTC time and will add the hours specified in the timezone option. This is usefull if the log file is not parsed on a computer with the same timezone thant the squid server. Thanks to Anderson - BR Suporte for the feature request.

This commit is contained in:
Darold Gilles 2015-12-13 16:31:14 +01:00
parent 7878251edc
commit bcc2e7a0f2
5 changed files with 92 additions and 17 deletions

15
README
View File

@ -167,6 +167,10 @@ USAGE
Default /tmp/
-r | --rebuild : use this option to rebuild all html and graphs
output from all data files.
-t, --timezone +/-HH : set number of hours from GMT of the timezone.
Use this to adjust date/time of SquidAnalyzer
output when it is run on a different timezone
than the squid server.
-v | version : show version and exit.
--no-year-stat : disable years statistics, reports will start
from month level only.
@ -474,6 +478,17 @@ CONFIGURATION
corrupted line before the next run. This can be useful if you have
special characters in some fields like mime type.
TimeZone
Set timezone to use when SquidAnalyzer is used in a different server
than the one running squid and there is a different timezone between
these two machines. The value must follow format: +/-HH. Default is
to use local time. For example:
TimeZone +01
for a log file generated on zone Europe/Paris with UTC+0100 and
parsed on a computer with different timezone.
SUPPORT
Release annoucement
Please follow us on twitter to receive release annoucement and latest

View File

@ -20,7 +20,7 @@ BEGIN {
use IO::File;
use Socket;
use Time::HiRes qw/ualarm/;
use Time::Local 'timelocal_nocheck';
use Time::Local qw/timelocal_nocheck timegm_nocheck/;
use Fcntl qw(:flock);
use IO::Handle;
use FileHandle;
@ -417,14 +417,14 @@ my $ug_format_regex1 = qr/^(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2}) .* (B
sub new
{
my ($class, $conf_file, $log_file, $debug, $rebuild, $pid_dir, $pidfile) = @_;
my ($class, $conf_file, $log_file, $debug, $rebuild, $pid_dir, $pidfile, $timezone) = @_;
# Construct the class
my $self = {};
bless $self, $class;
# Initialize all variables
$self->_init($conf_file, $log_file, $debug, $rebuild, $pid_dir, $pidfile);
$self->_init($conf_file, $log_file, $debug, $rebuild, $pid_dir, $pidfile, $timezone);
# Return the instance
return($self);
@ -533,6 +533,7 @@ sub look_for_timestamp
my ($self, $line) = @_;
my $time = 0;
my $tz = ((0-$self->{TimeZone})*3600);
# Squid native format
if ( $line =~ $native_format_regex1 ) {
$time = $1;
@ -542,19 +543,31 @@ sub look_for_timestamp
} elsif ( $line =~ $common_format_regex1 ) {
$time = $4;
$time =~ /(\d+)\/(...)\/(\d+):(\d+):(\d+):(\d+)\s/;
$time = timelocal_nocheck($6, $5, $4, $1, $month_number{$2} - 1, $3 - 1900);
if (!$self->{TimeZone}) {
$time = timelocal_nocheck($6, $5, $4, $1, $month_number{$2} - 1, $3 - 1900);
} else {
$time = timegm_nocheck($6, $5, $4, $1, $month_number{$2} - 1, $3 - 1900) + $tz;
}
$self->{is_squidguard_log} = 0;
$self->{is_ufdbguard_log} = 0;
# SquidGuard log format
} elsif ( $line =~ $sg_format_regex1 ) {
$self->{is_squidguard_log} = 1;
$self->{is_ufdbguard_log} = 0;
$time = timelocal_nocheck($6, $5, $4, $3, $2 - 1, $1 - 1900);
if (!$self->{TimeZone}) {
$time = timelocal_nocheck($6, $5, $4, $3, $2 - 1, $1 - 1900);
} else {
$time = timegm_nocheck($6, $5, $4, $3, $2 - 1, $1 - 1900) + $tz;
}
# ufdbGuard log format
} elsif ( $line =~ $ug_format_regex1 ) {
$self->{is_ufdbguard_log} = 1;
$self->{is_squidguard_log} = 0;
$time = timelocal_nocheck($6, $5, $4, $3, $2 - 1, $1 - 1900);
if (!$self->{TimeZone}) {
$time = timelocal_nocheck($6, $5, $4, $3, $2 - 1, $1 - 1900);
} else {
$time = timegm_nocheck($6, $5, $4, $3, $2 - 1, $1 - 1900) + $tz;
}
}
return $time;
@ -1153,6 +1166,9 @@ sub _parse_file_part
}
}
# Set timezone in seconds
my $tz = ((0-$self->{TimeZone})*3600);
# The log file format must be :
# time elapsed client code/status bytes method URL rfc931 peerstatus/peerhost type
# This is the default format of squid access log file.
@ -1193,6 +1209,7 @@ sub _parse_file_part
my $format = 'native';
if ( $line =~ $native_format_regex1 ) {
$time = $1;
$time += $tz;
$elapsed = abs($2);
$client_ip = $3;
$code = $4;
@ -1213,7 +1230,11 @@ sub _parse_file_part
$code = $11;
$mime_type = $12;
$time =~ /(\d+)\/(...)\/(\d+):(\d+):(\d+):(\d+)\s/;
$time = timelocal_nocheck($6, $5, $4, $1, $month_number{$2} - 1, $3 - 1900);
if (!$self->{TimeZone}) {
$time = timelocal_nocheck($6, $5, $4, $1, $month_number{$2} - 1, $3 - 1900);
} else {
$time = timegm_nocheck($6, $5, $4, $1, $month_number{$2} - 1, $3 - 1900) + $tz;
}
# Some site has corrupted mime_type, try to remove nasty characters
$mime_type =~ s/[^\-\/\.\(\)\+\_,\=a-z0-9]+//igs;
} elsif ($line =~ $sg_format_regex1) {
@ -1229,7 +1250,11 @@ sub _parse_file_part
$bytes = 0;
$code = $12 . ':';
$mime_type = '';
$time = timelocal_nocheck($6, $5, $4, $3, $2 - 1, $1 - 1900);
if (!$self->{TimeZone}) {
$time = timelocal_nocheck($6, $5, $4, $3, $2 - 1, $1 - 1900);
} else {
$time = timegm_nocheck($6, $5, $4, $3, $2 - 1, $1 - 1900) + $tz;
}
# Log format for ufdbGuard logs: timestamp [pid] BLOCK user clienthost aclname category url method
} elsif ($line =~ $ug_format_regex1) {
$format = 'ufdbguard';
@ -1244,7 +1269,11 @@ sub _parse_file_part
$bytes = 0;
$code = 'REDIRECT:';
$mime_type = '';
$time = timelocal_nocheck($6, $5, $4, $3, $2 - 1, $1 - 1900);
if (!$self->{TimeZone}) {
$time = timelocal_nocheck($6, $5, $4, $3, $2 - 1, $1 - 1900);
} else {
$time = timegm_nocheck($6, $5, $4, $3, $2 - 1, $1 - 1900) + $tz;
}
} else {
next;
}
@ -1444,7 +1473,7 @@ sub _clear_stats
sub _init
{
my ($self, $conf_file, $log_file, $debug, $rebuild, $pid_dir, $pidfile) = @_;
my ($self, $conf_file, $log_file, $debug, $rebuild, $pid_dir, $pidfile, $timezone) = @_;
# Set path to pid file
$pidfile = $pid_dir . '/' . $pidfile;
@ -1505,6 +1534,7 @@ sub _init
$self->{child_count} = 0;
$self->{rebuild} = $rebuild || 0;
$self->{is_squidguard_log} = 0;
$self->{TimeZone} = $options{TimeZone} || $timezone || 0;
# Cleanup old temporary files
foreach my $tmp_file ('last_parsed.tmp', 'sg_last_parsed.tmp') {
@ -5435,6 +5465,10 @@ sub parse_config
$self->localdie("ERROR: unknown image format. See option: ImgFormat\n");
}
if ($opt{TimeZone} && $opt{TimeZone} !~ /^[+\-]\d{1,2}$/) {
$self->localdie("ERROR: timezone format: +/-HH, ex: +01. See option: TimeZone\n");
}
return %opt;
}

View File

@ -171,6 +171,10 @@ Usage: squid-analyzer [ -c squidanalyzer.conf ] [logfile(s)]
Default /tmp/
-r | --rebuild : use this option to rebuild all html and graphs
output from all data files.
-t, --timezone +/-HH : set number of hours from GMT of the timezone.
Use this to adjust date/time of SquidAnalyzer
output when it is run on a different timezone
than the squid server.
-v | version : show version and exit.
--no-year-stat : disable years statistics, reports will start
from month level only.
@ -180,7 +184,6 @@ Log files to parse can be given as command line arguments or as a comma separate
list of file for the LogFile configuration directive. By default SquidAnalyer will
use file: /var/log/squid/access.log
There is special options like --rebuild that force SquidAnalyzer to rebuild all
HTML reports, useful after an new feature or a bug fix. If you want to limit the
rebuild to a single day, a single month or year, you can use the --build_date
@ -506,6 +509,18 @@ You can force him to wait for a certain amount of errors before exiting. Of
course you might want to remove the corrupted line before the next run. This
can be useful if you have special characters in some fields like mime type.
=item TimeZone
Set timezone to use when SquidAnalyzer is used in a different server than
the one running squid and there is a different timezone between these two
machines. The value must follow format: +/-HH. Default is to use local time.
For example:
TimeZone +01
for a log file generated on zone Europe/Paris with UTC+0100 and parsed on a
computer with different timezone.
=back
=head1 SUPPORT

View File

@ -154,3 +154,8 @@ TopUrlUser 10
# can be useful if you have special characters in some fields like mime type.
#MaxFormatError 0
# Set timezone to use when SquidAnalyzer is used in a different server than
# the one running squid and there is a different timezone between these two
# machine. The value must follow format: +/-HH. Default is to use local time.
#TimeZone +01

View File

@ -4,7 +4,7 @@
#
use strict;
use SquidAnalyzer;
use Getopt::Long qw(:config no_ignore_case bundling);;
use Getopt::Long qw(:config no_ignore_case bundling);
use Benchmark;
use POSIX ":sys_wait_h";
@ -21,11 +21,12 @@ my $preserve = '';
my $debug = 0;
my $version = 0;
my $build_date = '';
my $pid_dir = '/tmp';
my $pidfile = 'squid-analyzer.pid';
my $queue_size = 0;
my $timezone = '';
my $no_year_stat = 0;
my $no_week_stat = 0;
my $pid_dir = '/tmp';
my $pidfile = 'squid-analyzer.pid';
my $queue_size = 0;
my $t0 = Benchmark->new;
# get the command line parameters
@ -36,9 +37,10 @@ my $result = GetOptions (
"h|help" => \$help,
"j|jobs=i" => \$queue_size,
"l|logfile" => \$obsolete,
"r|rebuild!" => \$rebuild,
"p|preserve=i" => \$preserve,
"P|pid_dir=s" => \$pid_dir,
"r|rebuild!" => \$rebuild,
"t|timezone=s" => \$timezone,
"v|version!" => \$version,
"no-year-stat!" => \$no_year_stat,
"no-week-stat!" => \$no_week_stat,
@ -90,7 +92,7 @@ close(OUT);
unlink("$pid_dir/last_parsed.tmp");
# Instanciate SquidAnalyzer.pm perl module
my $sa = new SquidAnalyzer($configfile, join(',', @logfile), $debug, $rebuild, $pid_dir, $pidfile);
my $sa = new SquidAnalyzer($configfile, join(',', @logfile), $debug, $rebuild, $pid_dir, $pidfile, $timezone);
$sa->{no_year_stat} = $no_year_stat;
$sa->{no_week_stat} = $no_week_stat;
$sa->{queue_size} = $queue_size;
@ -193,6 +195,10 @@ Usage: squid-analyzer [ -c squidanalyzer.conf ] [logfile(s)]
Default /tmp/
-r | --rebuild : use this option to rebuild all html and graphs
output from all data files.
-t, --timezone +/-HH : set number of hours from GMT of the timezone.
Use this to adjust date/time of SquidAnalyzer
output when it is run on a different timezone
than the squid server.
-v | version : show version and exit.
--no-year-stat : disable years statistics, reports will start
from month level only.