diff --git a/README b/README index 107c0ce..45dc324 100644 --- a/README +++ b/README @@ -1,5 +1,5 @@ NAME - SquidAnalyzer v4.3 - Squid access log report generation tool + SquidAnalyzer v4.4 - Squid access log report generation tool DESCRIPTION SquidAnalyzer parse native access log format of the Squid proxy and @@ -215,9 +215,23 @@ CONFIGURATION Default is top 10. Exclude exclusion_file - Used to set client ip addresses, network addresses and/or auth login - name to exclude from report. Format of the file is a login or an ip - address by line. + Used to set client ip addresses, network addresses, auth login or + uri to exclude from report. + + You can define one by line exclusion by specifying first the type of + the exclusion (USER, CLIENT or URI) and a space separated list of + valid regex. + + See example bellow: + + CLIENT 192\.168\.1\.2 + CLIENT 10\.169\.1\.\d+ 192\.168\.10\..* + USER myloginstr + USER guestlogin\d+ guestdemo + URI http:\/\/myinternetdomain.dom.* + URI .*\.webmail\.com\/.*\/login\.php.* + + you can have multiple line of the same exclusion type. Lang language_file Used to set the translation file to be used. Value must be set to a diff --git a/SquidAnalyzer.pm b/SquidAnalyzer.pm index 71ceff7..625e28e 100644 --- a/SquidAnalyzer.pm +++ b/SquidAnalyzer.pm @@ -223,12 +223,46 @@ sub parseFile $id = $login; } next if (!$id || !$bytes); - # Skip exclusion of client - next if (grep(/^($client_ip|$login)$/, @{$self->{Exclude}})); - # Skip exclusion of network if client is not a dns name - if ($client_ip =~ /^(\d{1,3}\.\d{1,3}\.\d{1,3}\.)/) { - my $netex = quotemeta($1 . '0'); - next if (grep(/^$netex$/, @{$self->{Exclude}})); + # check for client/user exclusion in old syntax + my $found = 0; + if (exists $self->{Exclude}{all}) { + foreach my $e (@{$self->{Exclude}{all}}) { + if ( ($client_ip =~ m#^$e$#i) || ($login =~ m#^$e$#i)) { + $found = 1; + last; + } + } + next if ($found); + } + # check for user exclusion + if (exists $self->{Exclude}{users}) { + foreach my $e (@{$self->{Exclude}{users}}) { + if ($login =~ m#^$e$#i) { + $found = 1; + last; + } + } + next if ($found); + } + # check for client exclusion + if (exists $self->{Exclude}{clients}) { + foreach my $e (@{$self->{Exclude}{clients}}) { + if ($client_ip =~ m#^$e$#i) { + $found = 1; + last; + } + } + next if ($found); + } + # check for URL exclusion + if (exists $self->{Exclude}{uris}) { + foreach my $e (@{$self->{Exclude}{uris}}) { + if ($url =~ m#^$e$#i) { + $found = 1; + last; + } + } + next if ($found); } # Anonymize all users if ($self->{AnonymizeLogin} && ($client_ip ne $id)) { @@ -425,7 +459,7 @@ sub _init } $self->{NetworkAlias} = &parse_network_aliases($options{NetworkAlias} || ''); $self->{UserAlias} = &parse_user_aliases($options{UserAlias} || ''); - $self->{Exclude} = &parse_user_exclusion($options{Exclude} || ''); + %{$self->{Exclude}} = &parse_exclusion($options{Exclude} || ''); $self->{CostPrice} = $options{CostPrice} || 0; $self->{Currency} = $options{Currency} || '€'; @@ -2910,22 +2944,31 @@ sub parse_user_aliases return \%alias; } -sub parse_user_exclusion +sub parse_exclusion { my ($file) = @_; return if (!$file || !-f $file); - my @exclusion = (); - open(EXCLUDED, $file) or die "ERROR: can't open client exclusion file $file, $!\n"; + my %exclusion = (); + open(EXCLUDED, $file) or die "ERROR: can't open exclusion file $file, $!\n"; while (my $l = ) { chomp($l); next if (!$l || ($l =~ /^[\s\t]*#/)); - push(@exclusion, $l); + if ($l =~ m#^USER[\s\t]+(.*)#) { + push(@{$exclusion{users}}, split(m#[\s\t]+#, $1)); + } elsif ($l =~ m#^CLIENT[\s\t]+(.*)#) { + push(@{$exclusion{clients}}, split(m#[\s\t]+#, $1)); + } elsif ($l =~ m#^URI[\s\t]+(.*)#) { + push(@{$exclusion{uris}}, split(m#[\s\t]+#, $1)); + } else { + # backward compatibility + push(@{$exclusion{all}}, $l); + } } close(EXCLUDED); - return \@exclusion; + return %exclusion; } # User URL-encode diff --git a/doc/SquidAnalyzer.pod b/doc/SquidAnalyzer.pod index 3b2b574..bd10f91 100644 --- a/doc/SquidAnalyzer.pod +++ b/doc/SquidAnalyzer.pod @@ -1,6 +1,6 @@ =head1 NAME -SquidAnalyzer v4.3 - Squid access log report generation tool +SquidAnalyzer v4.4 - Squid access log report generation tool =head1 DESCRIPTION @@ -234,9 +234,22 @@ Default is top 10. =item Exclude exclusion_file -Used to set client ip addresses, network addresses and/or auth login -name to exclude from report. Format of the file is a login or an ip -address by line. +Used to set client ip addresses, network addresses, auth login or +uri to exclude from report. + +You can define one by line exclusion by specifying first the type of the +exclusion (USER, CLIENT or URI) and a space separated list of valid regex. + +See example bellow: + + CLIENT 192\.168\.1\.2 + CLIENT 10\.169\.1\.\d+ 192\.168\.10\..* + USER myloginstr + USER guestlogin\d+ guestdemo + URI http:\/\/myinternetdomain.dom.* + URI .*\.webmail\.com\/.*\/login\.php.* + +you can have multiple line of the same exclusion type. =item Lang language_file diff --git a/doc/squidanalyzer.3 b/doc/squidanalyzer.3 index 32c0b8d..c6f14fa 100644 --- a/doc/squidanalyzer.3 +++ b/doc/squidanalyzer.3 @@ -124,7 +124,7 @@ .\" ======================================================================== .\" .IX Title "SQUIDANALYZER 1" -.TH SQUIDANALYZER 1 "2012-04-26" "perl v5.10.1" "User Contributed Perl Documentation" +.TH SQUIDANALYZER 1 "2012-07-05" "perl v5.10.1" "User Contributed Perl Documentation" .\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" way too many mistakes in technical documents. .if n .ad l @@ -363,9 +363,24 @@ Used to set the number of top url and second level domain to show. Default is top 10. .IP "Exclude exclusion_file" 4 .IX Item "Exclude exclusion_file" -Used to set client ip addresses, network addresses and/or auth login -name to exclude from report. Format of the file is a login or an ip -address by line. +Used to set client ip addresses, network addresses, auth login or +uri to exclude from report. +.Sp +You can define one by line exclusion by specifying first the type of the +exclusion (\s-1USER\s0, \s-1CLIENT\s0 or \s-1URI\s0) and a space separated list of valid regex. +.Sp +See example bellow: +.Sp +.Vb 6 +\& CLIENT 192\e.168\e.1\e.2 +\& CLIENT 10\e.169\e.1\e.\ed+ 192\e.168\e.10\e..* +\& USER myloginstr +\& USER guestlogin\ed+ guestdemo +\& URI http:\e/\e/myinternetdomain.dom.* +\& URI .*\e.webmail\e.com\e/.*\e/login\e.php.* +.Ve +.Sp +you can have multiple line of the same exclusion type. .IP "Lang language_file" 4 .IX Item "Lang language_file" Used to set the translation file to be used. Value must be set to diff --git a/excluded b/excluded index 3204700..4974b69 100644 --- a/excluded +++ b/excluded @@ -1,10 +1,15 @@ #------------------------------------------------------------------------------ -# File used to defined which client ip address, network regex address and/or -# auth login to exclude from report. Typically if your boss or you don't -# want to appears in the statistic report add your ip address or login to -# this file. One by line. See example bellow: +# File used to defined which client ip address, network regex address and auth +# login and URI to exclude from report. +# You can define one by line exclusion by specifying first the type of the +# exclusion (USER, CLIENT or URI) and a space separated list of valid regex. +# +# See example bellow: #------------------------------------------------------------------------------ -#192.168.1.2 -#myloginstr -#^10\.169\.1\. -#guestlogin\d+ +#CLIENT 192\.168\.1\.2 +#CLIENT 10\.169\.1\.\d+ 192\.168\.10\..* +#USER myloginstr +#USER guestlogin\d+ guestdemo +#URI http:\/\/myinternetdomain.dom.* +#URI .*\.webmail\.com\/.*\/login\.php.* +