squidanalyzer/SquidAnalyzer.pm

7066 lines
242 KiB
Perl
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package SquidAnalyzer;
#------------------------------------------------------------------------------
# Project : Squid Log Analyzer
# Name : SquidAnalyzer.pm
# Language : Perl 5
# OS : All
# Copyright: Copyright (c) 2001-2019 Gilles Darold - All rights reserved.
# Licence : This program is free software; you can redistribute it
# and/or modify it under the same terms as Perl itself.
# Author : Gilles Darold, gilles _AT_ darold _DOT_ net
# Function : Main perl module for Squid Log Analyzer
# Usage : See documentation.
#------------------------------------------------------------------------------
use strict qw/vars/;
BEGIN {
use Exporter();
use vars qw($VERSION $COPYRIGHT $AUTHOR @ISA @EXPORT $ZCAT_PROG $BZCAT_PROG $XZCAT_PROG $RM_PROG);
use POSIX qw/ strftime sys_wait_h /;
use IO::File;
use Socket ();
use Time::HiRes qw/ualarm usleep/;
use Time::Local qw/timelocal_nocheck timegm_nocheck timegm timelocal/;
use Fcntl qw(:flock);
use IO::Handle;
use FileHandle;
use POSIX qw(locale_h);
setlocale(LC_NUMERIC, '');
setlocale(LC_ALL, 'C');
# Set all internal variable
$VERSION = '6.6';
$COPYRIGHT = 'Copyright (c) 2001-2019 Gilles Darold - All rights reserved.';
$AUTHOR = "Gilles Darold - gilles _AT_ darold _DOT_ net";
@ISA = qw(Exporter);
@EXPORT = qw//;
$| = 1;
}
$ZCAT_PROG = "/usr/bin/zcat";
$BZCAT_PROG = "/usr/bin/bzcat";
$RM_PROG = "/bin/rm";
$XZCAT_PROG = "/usr/bin/xzcat";
# DNS Cache
my %CACHE = ();
# Color used to draw grpahs
my @GRAPH_COLORS = ('#6e9dc9', '#f4ab3a', '#ac7fa8', '#8dbd0f');
# Default translation srings
my %Translate = (
'CharSet' => 'utf-8',
'01' => 'Jan',
'02' => 'Feb',
'03' => 'Mar',
'04' => 'Apr',
'05' => 'May',
'06' => 'Jun',
'07' => 'Jul',
'08' => 'Aug',
'09' => 'Sep',
'10' => 'Oct',
'11' => 'Nov',
'12' => 'Dec',
'KB' => 'Kilo bytes',
'MB' => 'Mega bytes',
'GB' => 'Giga bytes',
'Bytes' => 'Bytes',
'Total' => 'Total',
'Years' => 'Years',
'Users' => 'Users',
'Sites' => 'Sites',
'Cost' => 'Cost',
'Requests' => 'Requests',
'Megabytes' => 'Mega bytes',
'Months' => 'Months',
'Days' => 'Days',
'Hit' => 'Hit',
'Miss' => 'Miss',
'Denied' => 'Denied',
'Domains' => 'Domains',
'Requests_graph' => 'Requests',
'Megabytes_graph' => 'Mega bytes',
'Months_graph' => 'Months',
'Days_graph' => 'Days',
'Hit_graph' => 'Hit',
'Miss_graph' => 'Miss',
'Denied_graph' => 'Denied',
'Total_graph' => 'Total',
'Domains_graph' => 'Domains',
'Users_help' => 'Total number of different users for this period',
'Sites_help' => 'Total number of different visited sites for this period',
'Domains_help' => 'Total number of different second level visited domain for this period',
'Hit_help' => 'Objects found in cache',
'Miss_help' => 'Objects not found in cache',
'Denied_help' => 'Objects with denied access',
'Cost_help' => '1 Mega byte =',
'Generation' => 'Report generated on',
'Generation_from' => 'From %s ',
'Generation_to' => 'Upto %s',
'Main_cache_title' => 'Cache Statistics',
'Cache_title' => 'Cache Statistics on',
'Stat_label' => 'Stat',
'Mime_link' => 'Mime Types',
'Network_link' => 'Networks',
'User_link' => 'Users',
'Top_url_link' => 'Top Urls',
'Top_domain_link' => 'Top Domains',
'Back_link' => 'Back',
'Graph_cache_hit_title' => '%s Requests statistics on',
'Graph_cache_byte_title' => '%s Mega Bytes statistics on',
'Hourly' => 'Hourly',
'Hours' => 'Hours',
'Daily' => 'Daily',
'Days' => 'Days',
'Monthly' => 'Monthly',
'Months' => 'Months',
'Mime_title' => 'Mime Type Statistics on',
'Mime_number' => 'Number of mime type',
'Network_title' => 'Network Statistics on',
'Network_number' => 'Number of network',
'Duration' => 'Duration',
'Time' => 'Time',
'Largest' => 'Largest',
'Url' => 'Url',
'User_title' => 'User Statistics on',
'User_number' => 'Number of user',
'Url_title' => 'Top %d site on',
'Url_Hits_title' => 'Top %d Url hits on',
'Url_Bytes_title' => 'Top %d Url bytes on',
'Url_Duration_title' => 'Top %d Url duration on',
'Url_number' => 'Number of Url',
'Domain_Hits_title' => 'Top %d Domain hits on',
'Domain_Bytes_title' => 'Top %d Domain bytes on',
'Domain_Duration_title' => 'Top %d Domain duration on',
'Domain_number' => 'Number of domain',
'Domain_graph_hits_title' => 'Domain Hits Statistics on',
'Domain_graph_bytes_title' => 'Domain Bytes Statistics on',
'Second_domain_graph_hits_title' => 'Second level Hits Statistics on',
'Second_domain_graph_bytes_title' => 'Second level Bytes Statistics on',
'First_visit' => 'First visit',
'Last_visit' => 'Last visit',
'Globals_Statistics' => 'Globals Statistics',
'Legend' => 'Legend',
'File_Generated' => 'File generated by',
'Up_link' => 'Up',
'Click_year_stat' => 'Click on year\'s statistics link for details',
'Mime_graph_hits_title' => 'Mime Type Hits Statistics on',
'Mime_graph_bytes_title' => 'Mime Type MBytes Statistics on',
'User' => 'User',
'Count' => 'Count',
'WeekDay' => 'Su Mo Tu We Th Fr Sa',
'Week' => 'Week',
'Top_denied_link' => 'Top Denied',
'Blocklist_acl_title' => 'Blocklist ACL use',
'Throughput' => 'Throughput',
'Graph_throughput_title' => '%s throughput on',
'Throughput_graph' => 'Bytes/sec',
'User_Ip' => 'User Ip',
);
my @TLD1 = (
'\.com\.ac','\.net\.ac','\.gov\.ac','\.org\.ac','\.mil\.ac','\.co\.ae',
'\.net\.ae','\.gov\.ae','\.ac\.ae','\.sch\.ae','\.org\.ae','\.mil\.ae','\.pro\.ae',
'\.name\.ae','\.com\.af','\.edu\.af','\.gov\.af','\.net\.af','\.org\.af','\.com\.al',
'\.edu\.al','\.gov\.al','\.mil\.al','\.net\.al','\.org\.al','\.ed\.ao','\.gv\.ao',
'\.og\.ao','\.co\.ao','\.pb\.ao','\.it\.ao','\.com\.ar','\.edu\.ar','\.gob\.ar',
'\.gov\.ar','\.gov\.ar','\.int\.ar','\.mil\.ar','\.net\.ar','\.org\.ar','\.tur\.ar',
'\.gv\.at','\.ac\.at','\.co\.at','\.or\.at','\.com\.au','\.net\.au','\.org\.au',
'\.edu\.au','\.gov\.au','\.csiro\.au','\.asn\.au','\.id\.au','\.org\.ba','\.net\.ba',
'\.edu\.ba','\.gov\.ba','\.mil\.ba','\.unsa\.ba','\.untz\.ba','\.unmo\.ba','\.unbi\.ba',
'\.unze\.ba','\.co\.ba','\.com\.ba','\.rs\.ba','\.co\.bb','\.com\.bb','\.net\.bb',
'\.org\.bb','\.gov\.bb','\.edu\.bb','\.info\.bb','\.store\.bb','\.tv\.bb','\.biz\.bb',
'\.com\.bh','\.info\.bh','\.cc\.bh','\.edu\.bh','\.biz\.bh','\.net\.bh','\.org\.bh',
'\.gov\.bh','\.com\.bn','\.edu\.bn','\.gov\.bn','\.net\.bn','\.org\.bn','\.com\.bo',
'\.net\.bo','\.org\.bo','\.tv\.bo','\.mil\.bo','\.int\.bo','\.gob\.bo','\.gov\.bo',
'\.edu\.bo','\.adm\.br','\.adv\.br','\.agr\.br','\.am\.br','\.arq\.br','\.art\.br',
'\.ato\.br','\.b\.br','\.bio\.br','\.blog\.br','\.bmd\.br','\.cim\.br','\.cng\.br',
'\.cnt\.br','\.com\.br','\.coop\.br','\.ecn\.br','\.edu\.br','\.eng\.br','\.esp\.br',
'\.etc\.br','\.eti\.br','\.far\.br','\.flog\.br','\.fm\.br','\.fnd\.br','\.fot\.br',
'\.fst\.br','\.g12\.br','\.ggf\.br','\.gov\.br','\.imb\.br','\.ind\.br','\.inf\.br',
'\.jor\.br','\.jus\.br','\.lel\.br','\.mat\.br','\.med\.br','\.mil\.br','\.mus\.br',
'\.net\.br','\.nom\.br','\.not\.br','\.ntr\.br','\.odo\.br','\.org\.br','\.ppg\.br',
'\.pro\.br','\.psc\.br','\.psi\.br','\.qsl\.br','\.rec\.br','\.slg\.br','\.srv\.br',
'\.tmp\.br','\.trd\.br','\.tur\.br','\.tv\.br','\.vet\.br','\.vlog\.br','\.wiki\.br',
'\.zlg\.br','\.com\.bs','\.net\.bs','\.org\.bs','\.edu\.bs','\.gov\.bs','com\.bz',
'edu\.bz','gov\.bz','net\.bz','org\.bz','\.ab\.ca','\.bc\.ca','\.mb\.ca','\.nb\.ca',
'\.nf\.ca','\.nl\.ca','\.ns\.ca','\.nt\.ca','\.nu\.ca','\.on\.ca','\.pe\.ca','\.qc\.ca',
'\.sk\.ca','\.yk\.ca','\.co\.ck','\.org\.ck','\.edu\.ck','\.gov\.ck','\.net\.ck',
'\.gen\.ck','\.biz\.ck','\.info\.ck','\.ac\.cn','\.com\.cn','\.edu\.cn','\.gov\.cn',
'\.mil\.cn','\.net\.cn','\.org\.cn','\.ah\.cn','\.bj\.cn','\.cq\.cn','\.fj\.cn','\.gd\.cn',
'\.gs\.cn','\.gz\.cn','\.gx\.cn','\.ha\.cn','\.hb\.cn','\.he\.cn','\.hi\.cn','\.hl\.cn',
'\.hn\.cn','\.jl\.cn','\.js\.cn','\.jx\.cn','\.ln\.cn','\.nm\.cn','\.nx\.cn','\.qh\.cn',
'\.sc\.cn','\.sd\.cn','\.sh\.cn','\.sn\.cn','\.sx\.cn','\.tj\.cn','\.tw\.cn','\.xj\.cn',
'\.xz\.cn','\.yn\.cn','\.zj\.cn','\.com\.co','\.org\.co','\.edu\.co','\.gov\.co',
'\.net\.co','\.mil\.co','\.nom\.co','\.ac\.cr','\.co\.cr','\.ed\.cr','\.fi\.cr','\.go\.cr',
'\.com\.cu','\.edu\.cu','\.gov\.cu','\.net\.cu','\.org\.cu',
'\.or\.cr','\.sa\.cr','\.cr','\.ac\.cy','\.net\.cy','\.gov\.cy','\.org\.cy',
'\.pro\.cy','\.name\.cy','\.ekloges\.cy','\.tm\.cy','\.ltd\.cy','\.biz\.cy','\.press\.cy',
'\.parliament\.cy','\.com\.cy','\.edu\.do','\.gob\.do','\.gov\.do','\.com\.do','\.sld\.do',
'\.org\.do','\.net\.do','\.web\.do','\.mil\.do','\.art\.do','\.com\.dz','\.org\.dz',
'\.net\.dz','\.gov\.dz','\.edu\.dz','\.asso\.dz','\.pol\.dz','\.art\.dz','\.com\.ec',
'\.info\.ec','\.net\.ec','\.fin\.ec','\.med\.ec','\.pro\.ec','\.org\.ec','\.edu\.ec',
'\.gov\.ec','\.mil\.ec','\.com\.eg','\.edu\.eg','\.eun\.eg','\.gov\.eg','\.mil\.eg',
'\.name\.eg','\.net\.eg','\.org\.eg','\.sci\.eg','\.com\.er','\.edu\.er','\.gov\.er',
'\.mil\.er','\.net\.er','\.org\.er','\.ind\.er','\.rochest\.er','\.w\.er','\.com\.es',
'\.nom\.es','\.org\.es','\.gob\.es','\.edu\.es','\.com\.et','\.gov\.et','\.org\.et',
'\.edu\.et','\.net\.et','\.biz\.et','\.name\.et','\.info\.et','\.ac\.fj','\.biz\.fj',
'\.com\.fj','\.info\.fj','\.mil\.fj','\.name\.fj','\.net\.fj','\.org\.fj','\.pro\.fj',
'\.co\.fk','\.org\.fk','\.gov\.fk','\.ac\.fk','\.nom\.fk','\.net\.fk','\.fr','\.tm\.fr',
'\.asso\.fr','\.nom\.fr','\.prd\.fr','\.presse\.fr','\.com\.fr','\.gouv\.fr','\.co\.gg',
'\.net\.gg','\.org\.gg','\.com\.gh','\.edu\.gh','\.gov\.gh','\.org\.gh','\.mil\.gh',
'\.com\.gn','\.ac\.gn','\.gov\.gn','\.org\.gn','\.net\.gn','\.com\.gr','\.edu\.gr','\.net\.gr',
'\.org\.gr','\.gov\.gr','\.mil\.gr','\.com\.gt','\.edu\.gt','\.net\.gt','\.gob\.gt',
'\.org\.gt','\.mil\.gt','\.ind\.gt','\.com\.gu','\.net\.gu','\.gov\.gu','\.org\.gu','\.edu\.gu',
'\.com\.hk','\.edu\.hk','\.gov\.hk','\.idv\.hk','\.net\.hk','\.org\.hk','\.ac\.id','\.co\.id',
'\.net\.id','\.or\.id','\.web\.id','\.sch\.id','\.mil\.id','\.go\.id','\.war\.net\.id','\.ac\.il',
'\.co\.il','\.org\.il','\.net\.il','\.k12\.il','\.gov\.il','\.muni\.il','\.idf\.il','\.in',
'\.co\.in','\.firm\.in','\.net\.in','\.org\.in','\.gen\.in','\.ind\.in','\.ac\.in','\.edu\.in',
'\.res\.in','\.ernet\.in','\.gov\.in','\.mil\.in','\.nic\.in','\.nic\.in','\.iq','\.gov\.iq',
'\.edu\.iq','\.com\.iq','\.mil\.iq','\.org\.iq','\.net\.iq','\.ir','\.ac\.ir','\.co\.ir',
'\.gov\.ir','\.id\.ir','\.net\.ir','\.org\.ir','\.sch\.ir','\.dnssec\.ir','\.gov\.it',
'\.edu\.it','\.co\.je','\.net\.je','\.org\.je','\.com\.jo','\.net\.jo','\.gov\.jo','\.edu\.jo',
'\.org\.jo','\.mil\.jo','\.name\.jo','\.sch\.jo','\.ac\.jp','\.ad\.jp','\.co\.jp','\.ed\.jp',
'\.go\.jp','\.gr\.jp','\.lg\.jp','\.ne\.jp','\.or\.jp','\.co\.ke','\.or\.ke','\.ne\.ke','\.go\.ke',
'\.ac\.ke','\.sc\.ke','\.me\.ke','\.mobi\.ke','\.info\.ke','\.per\.kh','\.com\.kh','\.edu\.kh',
'\.gov\.kh','\.mil\.kh','\.net\.kh','\.org\.kh','\.com\.ki','\.biz\.ki','\.de\.ki','\.net\.ki',
'\.info\.ki','\.org\.ki','\.gov\.ki','\.edu\.ki','\.mob\.ki','\.tel\.ki','\.km','\.com\.km',
'\.coop\.km','\.asso\.km','\.nom\.km','\.presse\.km','\.tm\.km','\.medecin\.km','\.notaires\.km',
'\.pharmaciens\.km','\.veterinaire\.km','\.edu\.km','\.gouv\.km','\.mil\.km','\.net\.kn',
'\.org\.kn','\.edu\.kn','\.gov\.kn','\.kr','\.co\.kr','\.ne\.kr','\.or\.kr','\.re\.kr','\.pe\.kr',
'\.go\.kr','\.mil\.kr','\.ac\.kr','\.hs\.kr','\.ms\.kr','\.es\.kr','\.sc\.kr','\.kg\.kr',
'\.seoul\.kr','\.busan\.kr','\.daegu\.kr','\.incheon\.kr','\.gwangju\.kr','\.daejeon\.kr',
'\.ulsan\.kr','\.gyeonggi\.kr','\.gangwon\.kr','\.chungbuk\.kr','\.chungnam\.kr','\.jeonbuk\.kr',
'\.jeonnam\.kr','\.gyeongbuk\.kr','\.gyeongnam\.kr','\.jeju\.kr','\.edu\.kw','\.com\.kw',
'\.net\.kw','\.org\.kw','\.gov\.kw','\.com\.ky','\.org\.ky','\.net\.ky','\.edu\.ky','\.gov\.ky',
'\.com\.kz','\.edu\.kz','\.gov\.kz','\.mil\.kz','\.net\.kz','\.org\.kz','\.com\.lb','\.edu\.lb',
'\.gov\.lb','\.net\.lb','\.org\.lb','\.gov\.lk','\.sch\.lk','\.net\.lk','\.int\.lk','\.com\.lk',
'\.org\.lk','\.edu\.lk','\.ngo\.lk','\.soc\.lk','\.web\.lk','\.ltd\.lk','\.assn\.lk','\.grp\.lk',
'\.hotel\.lk','\.com\.lr','\.edu\.lr','\.gov\.lr','\.org\.lr','\.net\.lr','\.com\.lv','\.edu\.lv',
'\.gov\.lv','\.org\.lv','\.mil\.lv','\.id\.lv','\.net\.lv','\.asn\.lv','\.conf\.lv','\.com\.ly',
'\.net\.ly','\.gov\.ly','\.plc\.ly','\.edu\.ly','\.sch\.ly','\.med\.ly','\.org\.ly','\.id\.ly',
'\.ma','\.net\.ma','\.ac\.ma','\.org\.ma','\.gov\.ma','\.press\.ma','\.co\.ma','\.tm\.mc',
'\.asso\.mc','\.co\.me','\.net\.me','\.org\.me','\.edu\.me','\.ac\.me','\.gov\.me','\.its\.me',
'\.priv\.me','\.org\.mg','\.nom\.mg','\.gov\.mg','\.prd\.mg','\.tm\.mg','\.edu\.mg','\.mil\.mg',
'\.com\.mg','\.com\.mk','\.org\.mk','\.net\.mk','\.edu\.mk','\.gov\.mk','\.inf\.mk','\.name\.mk',
'\.pro\.mk','\.com\.ml','\.net\.ml','\.org\.ml','\.edu\.ml','\.gov\.ml','\.presse\.ml','\.gov\.mn',
'\.edu\.mn','\.org\.mn','\.com\.mo','\.edu\.mo','\.gov\.mo','\.net\.mo','\.org\.mo','\.com\.mt',
'\.org\.mt','\.net\.mt','\.edu\.mt','\.gov\.mt','\.aero\.mv','\.biz\.mv','\.com\.mv','\.coop\.mv',
'\.edu\.mv','\.gov\.mv','\.info\.mv','\.int\.mv','\.mil\.mv','\.museum\.mv','\.name\.mv','\.net\.mv',
'\.org\.mv','\.pro\.mv','\.ac\.mw','\.co\.mw','\.com\.mw','\.coop\.mw','\.edu\.mw','\.gov\.mw',
'\.int\.mw','\.museum\.mw','\.net\.mw','\.org\.mw','\.com\.mx','\.net\.mx','\.org\.mx','\.edu\.mx',
'\.gob\.mx','\.com\.my','\.net\.my','\.org\.my','\.gov\.my','\.edu\.my','\.sch\.my','\.mil\.my',
'\.name\.my','\.com\.nf','\.net\.nf','\.arts\.nf','\.store\.nf','\.web\.nf','\.firm\.nf',
'\.info\.nf','\.other\.nf','\.per\.nf','\.rec\.nf','\.com\.ng','\.org\.ng','\.gov\.ng','\.edu\.ng',
'\.net\.ng','\.sch\.ng','\.name\.ng','\.mobi\.ng','\.biz\.ng','\.mil\.ng','\.gob\.ni','\.co\.ni',
'\.com\.ni','\.ac\.ni','\.edu\.ni','\.org\.ni','\.nom\.ni','\.net\.ni','\.mil\.ni','\.com\.np',
'\.edu\.np','\.gov\.np','\.org\.np','\.mil\.np','\.net\.np','\.edu\.nr','\.gov\.nr','\.biz\.nr',
'\.info\.nr','\.net\.nr','\.org\.nr','\.com\.nr','\.com\.om','\.co\.om','\.edu\.om','\.ac\.om',
'\.sch\.om','\.gov\.om','\.net\.om','\.org\.om','\.mil\.om','\.museum\.om','\.biz\.om','\.pro\.om',
'\.med\.om','\.edu\.pe','\.gob\.pe','\.nom\.pe','\.mil\.pe','\.sld\.pe','\.org\.pe','\.com\.pe',
'\.net\.pe','\.com\.ph','\.net\.ph','\.org\.ph','\.mil\.ph','\.ngo\.ph','\.i\.ph','\.gov\.ph',
'\.edu\.ph','\.com\.pk','\.net\.pk','\.edu\.pk','\.org\.pk','\.fam\.pk','\.biz\.pk','\.web\.pk',
'\.gov\.pk','\.gob\.pk','\.gok\.pk','\.gon\.pk','\.gop\.pk','\.gos\.pk','\.pwr\.pl','\.com\.pl',
'\.biz\.pl','\.net\.pl','\.art\.pl','\.edu\.pl','\.org\.pl','\.ngo\.pl','\.gov\.pl','\.info\.pl',
'\.mil\.pl','\.waw\.pl','\.warszawa\.pl','\.wroc\.pl','\.wroclaw\.pl','\.krakow\.pl','\.katowice\.pl',
'\.poznan\.pl','\.lodz\.pl','\.gda\.pl','\.gdansk\.pl','\.slupsk\.pl','\.radom\.pl','\.szczecin\.pl',
'\.lublin\.pl','\.bialystok\.pl','\.olsztyn\.pl','\.torun\.pl','\.gorzow\.pl','\.zgora\.pl',
'\.biz\.pr','\.com\.pr','\.edu\.pr','\.gov\.pr','\.info\.pr','\.isla\.pr','\.name\.pr','\.net\.pr',
'\.org\.pr','\.pro\.pr','\.est\.pr','\.prof\.pr','\.ac\.pr','\.com\.ps','\.net\.ps','\.org\.ps',
'\.edu\.ps','\.gov\.ps','\.plo\.ps','\.sec\.ps','\.co\.pw','\.ne\.pw','\.or\.pw','\.ed\.pw','\.go\.pw',
'\.belau\.pw','\.arts\.ro','\.com\.ro','\.firm\.ro','\.info\.ro','\.nom\.ro','\.nt\.ro','\.org\.ro',
'\.rec\.ro','\.store\.ro','\.tm\.ro','\.www\.ro','\.co\.rs','\.org\.rs','\.edu\.rs','\.ac\.rs',
'\.gov\.rs','\.in\.rs','\.com\.sb','\.net\.sb','\.edu\.sb','\.org\.sb','\.gov\.sb','\.com\.sc',
'\.net\.sc','\.edu\.sc','\.gov\.sc','\.org\.sc','\.co\.sh','\.com\.sh','\.org\.sh','\.gov\.sh',
'\.edu\.sh','\.net\.sh','\.nom\.sh','\.com\.sl','\.net\.sl','\.org\.sl','\.edu\.sl','\.gov\.sl',
'\.gov\.st','\.saotome\.st','\.principe\.st','\.consulado\.st','\.embaixada\.st','\.org\.st',
'\.edu\.st','\.net\.st','\.com\.st','\.store\.st','\.mil\.st','\.co\.st','\.edu\.sv','\.gob\.sv',
'\.com\.sv','\.org\.sv','\.red\.sv','\.co\.sz','\.ac\.sz','\.org\.sz','\.com\.tr','\.gen\.tr',
'\.org\.tr','\.biz\.tr','\.info\.tr','\.av\.tr','\.dr\.tr','\.pol\.tr','\.bel\.tr','\.tsk\.tr',
'\.bbs\.tr','\.k12\.tr','\.edu\.tr','\.name\.tr','\.net\.tr','\.gov\.tr','\.web\.tr','\.tel\.tr',
'\.tv\.tr','\.co\.tt','\.com\.tt','\.org\.tt','\.net\.tt','\.biz\.tt','\.info\.tt','\.pro\.tt',
'\.int\.tt','\.coop\.tt','\.jobs\.tt','\.mobi\.tt','\.travel\.tt','\.museum\.tt','\.aero\.tt',
'\.cat\.tt','\.tel\.tt','\.name\.tt','\.mil\.tt','\.edu\.tt','\.gov\.tt','\.edu\.tw','\.gov\.tw',
'\.mil\.tw','\.com\.tw','\.net\.tw','\.org\.tw','\.idv\.tw','\.game\.tw','\.ebiz\.tw','\.club\.tw',
'\.com\.mu','\.gov\.mu','\.net\.mu','\.org\.mu','\.ac\.mu','\.co\.mu','\.or\.mu','\.ac\.mz',
'\.co\.mz','\.edu\.mz','\.org\.mz','\.gov\.mz','\.com\.na','\.co\.na','\.ac\.nz','\.co\.nz',
'\.cri\.nz','\.geek\.nz','\.gen\.nz','\.govt\.nz','\.health\.nz','\.iwi\.nz','\.maori\.nz',
'\.mil\.nz','\.net\.nz','\.org\.nz','\.parliament\.nz','\.school\.nz','\.abo\.pa','\.ac\.pa',
'\.com\.pa','\.edu\.pa','\.gob\.pa','\.ing\.pa','\.med\.pa','\.net\.pa','\.nom\.pa','\.org\.pa',
'\.sld\.pa','\.com\.pt','\.edu\.pt','\.gov\.pt','\.int\.pt','\.net\.pt','\.nome\.pt','\.org\.pt',
'\.publ\.pt','\.com\.py','\.edu\.py','\.gov\.py','\.mil\.py','\.net\.py','\.org\.py','\.com\.qa',
'\.edu\.qa','\.gov\.qa','\.mil\.qa','\.net\.qa','\.org\.qa','\.asso\.re','\.com\.re','\.nom\.re',
'\.ac\.ru','\.adygeya\.ru','\.altai\.ru','\.amur\.ru','\.arkhangelsk\.ru','\.astrakhan\.ru',
'\.bashkiria\.ru','\.belgorod\.ru','\.bir\.ru','\.bryansk\.ru','\.buryatia\.ru','\.cbg\.ru',
'\.chel\.ru','\.chelyabinsk\.ru','\.chita\.ru','\.chita\.ru','\.chukotka\.ru','\.chuvashia\.ru',
'\.com\.ru','\.dagestan\.ru','\.e-burg\.ru','\.edu\.ru','\.gov\.ru','\.grozny\.ru','\.int\.ru',
'\.irkutsk\.ru','\.ivanovo\.ru','\.izhevsk\.ru','\.jar\.ru','\.joshkar-ola\.ru','\.kalmykia\.ru',
'\.kaluga\.ru','\.kamchatka\.ru','\.karelia\.ru','\.kazan\.ru','\.kchr\.ru','\.kemerovo\.ru',
'\.khabarovsk\.ru','\.khakassia\.ru','\.khv\.ru','\.kirov\.ru','\.koenig\.ru','\.komi\.ru',
'\.kostroma\.ru','\.kranoyarsk\.ru','\.kuban\.ru','\.kurgan\.ru','\.kursk\.ru','\.lipetsk\.ru',
'\.magadan\.ru','\.mari\.ru','\.mari-el\.ru','\.marine\.ru','\.mil\.ru','\.mordovia\.ru',
'\.mosreg\.ru','\.msk\.ru','\.murmansk\.ru','\.nalchik\.ru','\.net\.ru','\.nnov\.ru','\.nov\.ru',
'\.novosibirsk\.ru','\.nsk\.ru','\.omsk\.ru','\.orenburg\.ru','\.org\.ru','\.oryol\.ru','\.penza\.ru',
'\.perm\.ru','\.pp\.ru','\.pskov\.ru','\.ptz\.ru','\.rnd\.ru','\.ryazan\.ru','\.sakhalin\.ru','\.samara\.ru',
'\.saratov\.ru','\.simbirsk\.ru','\.smolensk\.ru','\.spb\.ru','\.stavropol\.ru','\.stv\.ru',
'\.surgut\.ru','\.tambov\.ru','\.tatarstan\.ru','\.tom\.ru','\.tomsk\.ru','\.tsaritsyn\.ru',
'\.tsk\.ru','\.tula\.ru','\.tuva\.ru','\.tver\.ru','\.tyumen\.ru','\.udm\.ru','\.udmurtia\.ru','\.ulan-ude\.ru',
'\.vladikavkaz\.ru','\.vladimir\.ru','\.vladivostok\.ru','\.volgograd\.ru','\.vologda\.ru',
'\.voronezh\.ru','\.vrn\.ru','\.vyatka\.ru','\.yakutia\.ru','\.yamal\.ru','\.yekaterinburg\.ru',
'\.yuzhno-sakhalinsk\.ru','\.ac\.rw','\.co\.rw','\.com\.rw','\.edu\.rw','\.gouv\.rw','\.gov\.rw',
'\.int\.rw','\.mil\.rw','\.net\.rw','\.com\.sa','\.edu\.sa','\.gov\.sa','\.med\.sa','\.net\.sa',
'\.org\.sa','\.pub\.sa','\.sch\.sa','\.com\.sd','\.edu\.sd','\.gov\.sd','\.info\.sd','\.med\.sd',
'\.net\.sd','\.org\.sd','\.tv\.sd','\.a\.se','\.ac\.se','\.b\.se','\.bd\.se','\.c\.se','\.d\.se',
'\.e\.se','\.f\.se','\.g\.se','\.h\.se','\.i\.se','\.k\.se','\.l\.se','\.m\.se','\.n\.se','\.o\.se',
'\.org\.se','\.p\.se','\.parti\.se','\.pp\.se','\.press\.se','\.r\.se','\.s\.se','\.t\.se','\.tm\.se',
'\.u\.se','\.w\.se','\.x\.se','\.y\.se','\.z\.se','\.com\.sg','\.edu\.sg','\.gov\.sg','\.idn\.sg',
'\.net\.sg','\.org\.sg','\.per\.sg','\.art\.sn','\.com\.sn','\.edu\.sn','\.gouv\.sn','\.org\.sn',
'\.perso\.sn','\.univ\.sn','\.com\.sy','\.edu\.sy','\.gov\.sy','\.mil\.sy','\.net\.sy','\.news\.sy',
'\.org\.sy','\.ac\.th','\.co\.th','\.go\.th','\.in\.th','\.mi\.th','\.net\.th','\.or\.th','\.ac\.tj',
'\.biz\.tj','\.co\.tj','\.com\.tj','\.edu\.tj','\.go\.tj','\.gov\.tj','\.info\.tj','\.int\.tj',
'\.mil\.tj','\.name\.tj','\.net\.tj','\.nic\.tj','\.org\.tj','\.test\.tj','\.web\.tj','\.agrinet\.tn',
'\.com\.tn','\.defense\.tn','\.edunet\.tn','\.ens\.tn','\.fin\.tn','\.gov\.tn','\.ind\.tn','\.info\.tn',
'\.intl\.tn','\.mincom\.tn','\.nat\.tn','\.net\.tn','\.org\.tn','\.perso\.tn','\.rnrt\.tn','\.rns\.tn',
'\.rnu\.tn','\.tourism\.tn','\.ac\.tz','\.co\.tz','\.go\.tz','\.ne\.tz','\.or\.tz','\.biz\.ua',
'\.cherkassy\.ua','\.chernigov\.ua','\.chernovtsy\.ua','\.ck\.ua','\.cn\.ua','\.co\.ua','\.com\.ua',
'\.crimea\.ua','\.cv\.ua','\.dn\.ua','\.dnepropetrovsk\.ua','\.donetsk\.ua','\.dp\.ua','\.edu\.ua',
'\.gov\.ua','\.if\.ua','\.in\.ua','\.ivano-frankivsk\.ua','\.kh\.ua','\.kharkov\.ua','\.kherson\.ua',
'\.khmelnitskiy\.ua','\.kiev\.ua','\.kirovograd\.ua','\.km\.ua','\.kr\.ua','\.ks\.ua','\.kv\.ua',
'\.lg\.ua','\.lugansk\.ua','\.lutsk\.ua','\.lviv\.ua','\.me\.ua','\.mk\.ua','\.net\.ua','\.nikolaev\.ua',
'\.od\.ua','\.odessa\.ua','\.org\.ua','\.pl\.ua','\.poltava\.ua','\.pp\.ua','\.rovno\.ua','\.rv\.ua',
'\.sebastopol\.ua','\.sumy\.ua','\.te\.ua','\.ternopil\.ua','\.uzhgorod\.ua','\.vinnica\.ua','\.vn\.ua',
'\.zaporizhzhe\.ua','\.zhitomir\.ua','\.zp\.ua','\.zt\.ua','\.ac\.ug','\.co\.ug','\.go\.ug','\.ne\.ug',
'\.or\.ug','\.org\.ug','\.sc\.ug','\.ac\.uk','\.bl\.uk','\.british-library\.uk','\.co\.uk','\.cym\.uk',
'\.gov\.uk','\.govt\.uk','\.icnet\.uk','\.jet\.uk','\.lea\.uk','\.ltd\.uk','\.me\.uk','\.mil\.uk',
'\.mod\.uk','\.mod\.uk','\.national-library-scotland\.uk','\.nel\.uk','\.net\.uk','\.nhs\.uk',
'\.nhs\.uk','\.nic\.uk','\.nls\.uk','\.org\.uk','\.orgn\.uk','\.parliament\.uk','\.parliament\.uk',
'\.plc\.uk','\.police\.uk','\.sch\.uk','\.scot\.uk','\.soc\.uk','\.dni\.us','\.fed\.us','\.isa\.us',
'\.kids\.us','\.nsn\.us','\.com\.uy','\.edu\.uy','\.gub\.uy','\.mil\.uy','\.net\.uy','\.org\.uy',
'\.co\.ve','\.com\.ve','\.edu\.ve','\.gob\.ve','\.info\.ve','\.mil\.ve','\.net\.ve','\.org\.ve',
'\.web\.ve','\.co\.vi','\.com\.vi','\.k12\.vi','\.net\.vi','\.org\.vi','\.ac\.vn','\.biz\.vn',
'\.com\.vn','\.edu\.vn','\.gov\.vn','\.health\.vn','\.info\.vn','\.int\.vn','\.name\.vn','\.net\.vn',
'\.org\.vn','\.pro\.vn','\.co\.ye','\.com\.ye','\.gov\.ye','\.ltd\.ye','\.me\.ye','\.net\.ye',
'\.org\.ye','\.plc\.ye','\.ac\.yu','\.co\.yu','\.edu\.yu','\.gov\.yu','\.org\.yu','\.ac\.za',
'\.agric\.za','\.alt\.za','\.bourse\.za','\.city\.za','\.co\.za','\.cybernet\.za','\.db\.za',
'\.ecape\.school\.za','\.edu\.za','\.fs\.school\.za','\.gov\.za','\.gp\.school\.za','\.grondar\.za',
'\.iaccess\.za','\.imt\.za','\.inca\.za','\.kzn\.school\.za','\.landesign\.za','\.law\.za',
'\.lp\.school\.za','\.mil\.za','\.mpm\.school\.za','\.ncape\.school\.za','\.net\.za','\.ngo\.za',
'\.nis\.za','\.nom\.za','\.nw\.school\.za','\.olivetti\.za','\.org\.za','\.pix\.za','\.school\.za',
'\.tm\.za','\.wcape\.school\.za','\.web\.za','\.ac\.zm','\.co\.zm','\.com\.zm','\.edu\.zm','\.gov\.zm',
'\.net\.zm','\.org\.zm','\.sch\.zm'
);
my @TLD2 = (
'\.ac','\.ad','\.ae','\.af','\.ag','\.ai','\.al','\.am','\.ao','\.aq',
'\.ar','\.as','\.at','\.au','\.aw','\.ax','\.az','\.ba','\.bb','\.bd',
'\.be','\.bf','\.bg','\.bh','\.bi','\.bj','\.bm','\.bn','\.bo','\.br',
'\.bs','\.bt','\.bw','\.by','\.bz','\.ca','\.cc','\.cd','\.cf','\.cg',
'\.ch','\.ci','\.ck','\.cl','\.cm','\.cn','\.co','\.cr','\.cu','\.cv',
'\.cw','\.cx','\.cy','\.cz','\.de','\.dj','\.dk','\.dm','\.do','\.dz',
'\.ec','\.ee','\.eg','\.er','\.es','\.et','\.eu','\.fi','\.fj','\.fk',
'\.fm','\.fo','\.fr','\.ga','\.gd','\.ge','\.gf','\.gg','\.gh','\.gi',
'\.gl','\.gm','\.gn','\.gp','\.gq','\.gr','\.gs','\.gt','\.gu','\.gw',
'\.gy','\.hk','\.hm','\.hn','\.hr','\.ht','\.hu','\.id','\.ie','\.il',
'\.im','\.in','\.io','\.iq','\.ir','\.is','\.it','\.je','\.jm','\.jo',
'\.jp','\.ke','\.kg','\.kh','\.ki','\.km','\.kn','\.kp','\.kr','\.kw',
'\.ky','\.kz','\.la','\.lb','\.lc','\.li','\.lk','\.lr','\.ls','\.lt',
'\.lu','\.lv','\.ly','\.ma','\.mc','\.md','\.me','\.mg','\.mh','\.mk',
'\.ml','\.mm','\.mn','\.mo','\.mp','\.mq','\.mr','\.ms','\.mt','\.mu',
'\.mv','\.mw','\.mx','\.my','\.mz','\.na','\.nc','\.ne','\.nf','\.ng',
'\.ni','\.nl','\.no','\.np','\.nr','\.nu','\.nz','\.om','\.pa','\.pe',
'\.pf','\.pg','\.ph','\.pk','\.pl','\.pm','\.pn','\.pr','\.ps','\.pt',
'\.pw','\.py','\.qa','\.re','\.ro','\.rs','\.ru','\.rw','\.sa','\.sb',
'\.sc','\.sd','\.se','\.sg','\.sh','\.si','\.sk','\.sl','\.sm','\.sn',
'\.so','\.sr','\.ss','\.st','\.su','\.sv','\.sx','\.sy','\.sz','\.tc',
'\.td','\.tf','\.tg','\.th','\.tj','\.tk','\.tl','\.tm','\.tn','\.to',
'\.tr','\.tt','\.tv','\.tw','\.tz','\.ua','\.ug','\.uk','\.us','\.uy',
'\.uz','\.va','\.vc','\.ve','\.vg','\.vi','\.vn','\.vu','\.wf','\.ws',
'\.ye','\.za','\.zm','\.zw','\.com','\.info','\.net','\.org','\.biz',
'\.name','\.pro','\.xxx','\.aero','\.asia','\.bzh','\.cat','\.coop',
'\.edu','\.gov','\.int','\.jobs','\.mil','\.mobi','\.museum','\.paris',
'\.sport','\.tel','\.travel','\.kids','\.mail','\.post','\.arpa','\.example',
'\.invalid','\.localhost','\.test','\.bitnet','\.csnet','\.lan','\.local',
'\.onion','\.root','\.uucp','\.tld','\.nato'
);
my %month_number = (
'Jan' => '01',
'Feb' => '02',
'Mar' => '03',
'Apr' => '04',
'May' => '05',
'Jun' => '06',
'Jul' => '07',
'Aug' => '08',
'Sep' => '09',
'Oct' => '10',
'Nov' => '11',
'Dec' => '12',
);
# Regex to match ipv4 and ipv6 address
my $ip_regexp = qr/^([a-fA-F0-9\.\:]+)$/;
my $cidr_regex = qr/^[a-fA-F0-9\.\:]+\/\d+$/;
# Native log format squid %ts.%03tu %6tr %>a %Ss/%03>Hs %<st %rm %ru %un %Sh/%<A %mt
my $native_format_regex1 = qr/^(\d{10}\.\d{3})\s+(\d+)\s+([^\s]+)\s+([^\s]+)\s+(\d+)\s+([^\s]+)\s+(.*)/;
my $native_format_regex2 = qr/^([^\s]+?)\s+([^\s]+)\s+([^\s]+\/[^\s]*)\s+([^\s]+)\s*/;
#logformat common %>a %[ui %[un [%tl] "%rm %ru HTTP/%rv" %>Hs %<st %Ss:%Sh
#logformat combined %>a %[ui %[un [%tl] "%rm %ru HTTP/%rv" %>Hs %<st "%{Referer}>h" "%{User-Agent}>h" %Ss:%Sh
my $common_format_regex1 = qr/([^\s]+)\s([^\s]+)\s([^\s]+)\s\[(\d+\/...\/\d+:\d+:\d+:\d+\s[\d\+\-]+)\]\s"([^\s]+)\s([^\s]+)\s([^\s]+)"\s(\d+)\s+(\d+)(.*)\s([^\s:]+:[^\s]+)\s*([^\/]+\/[^\s]+|-)?$/;
# Log format for SquidGuard logs
my $sg_format_regex1 = qr/^(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2}) .* Request\(([^\/]+\/[^\/]+)\/[^\)]*\) ([^\s]+) ([^\s\\]+)\/[^\s]+ ([^\s]+) ([^\s]+) ([^\s]+)/;
my $sg_format_regex2 = qr/^(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2}) .* (New setting|Added User|init domainlist|Going into emergency mode|ending emergency mode)/;
# Log format for ufdbGuard logs: BLOCK user clienthost aclname category url method
my $ug_format_regex1 = qr/^(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2}) .*(BLOCK) ([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+(.*)$/;
sub new
{
my ($class, $conf_file, $log_file, $debug, $rebuild, $pid_dir, $pidfile, $timezone, $skip_history, $refresh) = @_;
# Construct the class
my $self = {};
bless $self, $class;
# Initialize all variables
$self->_init($conf_file, $log_file, $debug, $rebuild, $pid_dir, $pidfile, $timezone, $skip_history, $refresh);
# Return the instance
return($self);
}
sub localdie
{
my ($self, $msg) = @_;
print STDERR "$msg";
unlink("$self->{pidfile}");
# Cleanup old temporary files
foreach my $tmp_file ('last_parsed.tmp', 'sg_last_parsed.tmp', 'ug_last_parsed.tmp') {
unlink("$self->{pid_dir}/$tmp_file");
}
exit 1;
}
####
# method used to fork as many child as wanted
##
sub spawn
{
my $self = shift;
my $coderef = shift;
unless (@_ == 0 && $coderef && ref($coderef) eq 'CODE') {
print "usage: spawn CODEREF";
exit 0;
}
my $pid;
if (!defined($pid = fork)) {
print STDERR "Error: cannot fork: $!\n";
return;
} elsif ($pid) {
$self->{running_pids}{$pid} = 1;
return; # the parent
}
# the child -- go spawn
$< = $>;
$( = $); # suid progs only
exit &$coderef();
}
sub wait_all_childs
{
my $self = shift;
while (scalar keys %{$self->{running_pids}} > 0) {
my $kid = waitpid(-1, WNOHANG);
if ($kid > 0) {
delete $self->{running_pids}{$kid};
}
usleep(50000);
}
}
sub manage_queue_size
{
my ($self, $child_count) = @_;
while ($child_count >= $self->{queue_size}) {
my $kid = waitpid(-1, WNOHANG);
if ($kid > 0) {
$child_count--;
delete $self->{running_pids}{$kid};
}
usleep(50000);
}
return $child_count;
}
sub save_current_line
{
my $self = shift;
if ($self->{end_time} and (!$self->{history_time} or $self->{end_time} > $self->{history_time})) {
my $current = new IO::File;
$current->open(">$self->{Output}/SquidAnalyzer.current") or $self->localdie("FATAL: Can't write to file $self->{Output}/SquidAnalyzer.current, $!\n");
print $current "$self->{end_time}\t$self->{end_offset}";
$current->close;
}
if ($self->{sg_end_time} and (!$self->{sg_history_time} or $self->{sg_end_time} > $self->{sg_history_time})) {
my $current = new IO::File;
$current->open(">$self->{Output}/SquidGuard.current") or $self->localdie("FATAL: Can't write to file $self->{Output}/SquidGuard.current, $!\n");
print $current "$self->{sg_end_time}\t$self->{sg_end_offset}";
$current->close;
}
if ($self->{ug_end_time} and (!$self->{ug_history_time} or $self->{ug_end_time} > $self->{ug_history_time})) {
my $current = new IO::File;
$current->open(">$self->{Output}/ufdbGuard.current") or $self->localdie("FATAL: Can't write to file $self->{Output}/ufdbGuard.current, $!\n");
print $current "$self->{ug_end_time}\t$self->{ug_end_offset}";
$current->close;
}
}
# Extract number of seconds since epoch from timestamp in log line
sub look_for_timestamp
{
my ($self, $line) = @_;
my $time = 0;
# Squid native format
if ( $line =~ $native_format_regex1 ) {
$time = $1 + $self->{TimeZone};
$self->{is_squidguard_log} = 0;
$self->{is_ufdbguard_log} = 0;
# Squid common HTTP format
} elsif ( $line =~ $common_format_regex1 ) {
$time = $4;
$time =~ /(\d+)\/(...)\/(\d+):(\d+):(\d+):(\d+)\s/;
$time = timelocal_nocheck($6, $5, $4, $1, $month_number{$2} - 1, $3 - 1900) + $self->{TimeZone};
$self->{is_squidguard_log} = 0;
$self->{is_ufdbguard_log} = 0;
# SquidGuard log format
} elsif (( $line =~ $sg_format_regex1 ) || ( $line =~ $sg_format_regex2 )) {
$self->{is_squidguard_log} = 1;
$self->{is_ufdbguard_log} = 0;
$time = timelocal_nocheck($6, $5, $4, $3, $2 - 1, $1 - 1900) + $self->{TimeZone};
# ufdbGuard log format
} elsif ( $line =~ $ug_format_regex1 ) {
$self->{is_ufdbguard_log} = 1;
$self->{is_squidguard_log} = 0;
$time = timelocal_nocheck($6, $5, $4, $3, $2 - 1, $1 - 1900) + $self->{TimeZone};
}
return $time;
}
# Detect if log file is a squidGuard log or not
sub get_log_format
{
my ($self, $file) = @_;
my $logfile = new IO::File;
$logfile->open($file) || $self->localdie("ERROR: Unable to open log file $file. $!\n");
my $max_line = 10000;
my $i = 0;
while (my $line = <$logfile>) {
chomp($line);
# SquidGuard log format
if (( $line =~ $sg_format_regex1 ) || ( $line =~ $sg_format_regex2 )) {
$self->{is_squidguard_log} = 1;
$self->{is_ufdbguard_log} = 0;
last;
# ufdbGuard log format
} elsif ( $line =~ $ug_format_regex1 ) {
$self->{is_ufdbguard_log} = 1;
$self->{is_squidguard_log} = 0;
last;
# Squid native format
} elsif ( $line =~ $native_format_regex1 ) {
$self->{is_squidguard_log} = 0;
$self->{is_ufdbguard_log} = 0;
last;
# Squid common HTTP format
} elsif ( $line =~ $common_format_regex1 ) {
$self->{is_squidguard_log} = 0;
$self->{is_ufdbguard_log} = 0;
last;
} else {
last if ($i > $max_line);
}
$i++;
}
$logfile->close();
}
sub parseFile
{
my ($self) = @_;
my $line_count = 0;
my $line_processed_count = 0;
my $line_stored_count = 0;
my $saved_queue_size = $self->{queue_size};
my $history_offset = $self->{end_offset};
foreach my $lfile (@{$self->{LogFile}}) {
# Detect if log file is from squid or squidguard
$self->get_log_format($lfile);
if ($self->{is_ufdbguard_log}) {
$history_offset = $self->{ug_end_offset};
} elsif ($self->{is_squidguard_log}) {
$history_offset = $self->{sg_end_offset};
} else {
$history_offset = $self->{end_offset};
}
print STDERR "Starting to parse logfile $lfile.\n" if (!$self->{QuietMode});
if ((!-f $lfile) || (-z $lfile)) {
print STDERR "DEBUG: bad or empty log file $lfile.\n" if (!$self->{QuietMode});
next;
}
# Restore the right multiprocess queue
$self->{queue_size} = $saved_queue_size;
# Compressed file do not allow multiprocess
if ($lfile =~ /\.(gz|bz2)$/) {
$self->{queue_size} = 1;
}
# Search the last position in logfile
if ($history_offset) {
# Initialize start offset for each file
if (!$self->{is_squidguard_log} && !$self->{is_ufdbguard_log}) {
$self->{end_offset} = $history_offset;
} elsif (!$self->{is_squidguard_log}) {
$self->{ug_end_offset} = $history_offset;
} else {
$self->{sg_end_offset} = $history_offset;
}
# Compressed file are always read from the begining
if ($lfile =~ /\.(gz|bz2)$/i) {
if (!$self->{is_squidguard_log} && !$self->{is_ufdbguard_log}) {
$self->{end_offset} = 0;
} elsif (!$self->{is_squidguard_log}) {
$self->{ug_end_offset} = 0;
} else {
$self->{sg_end_offset} = 0;
}
} else {
# Look at first line to see if the file should be parse from the begining.
my $logfile = new IO::File;
$logfile->open($lfile) || $self->localdie("ERROR: Unable to open log file $lfile. $!\n");
my $line = <$logfile>;
chomp($line);
# Remove syslog header and mark the format
if ($line =~ s/^... \d+ \d{2}:\d{2}:\d{2} [^\s]+ [^\s]+ \d+ [^\s]+ (\d{10}\.\d{3}) /$1 /) {
print STDERR "DEBUG: log was generated through syslog, the header will be removed.\n" if (!$self->{QuietMode});
$self->{Syslog} = 1;
}
my $curtime = $self->look_for_timestamp($line);
my $hist_time = $self->{history_time};
if ($self->{is_squidguard_log}) {
$hist_time = $self->{sg_history_time};
} elsif ($self->{is_ufdbguard_log}) {
$hist_time = $self->{ug_history_time};
}
# if the first timestamp is higher that the history time, start from the beginning
if ($curtime > $hist_time) {
print STDERR "DEBUG: new file: $lfile, start from the beginning.\n" if (!$self->{QuietMode});
if (!$self->{is_squidguard_log} && !$self->{is_ufdbguard_log}) {
$self->{end_offset} = 0;
} elsif (!$self->{is_squidguard_log}) {
$self->{ug_end_offset} = 0;
} else {
$self->{sg_end_offset} = 0;
}
# If the size of the file is lower than the history offset, parse this file from the beginning
} elsif ((lstat($lfile))[7] <= $history_offset) {
# move at begining of the file to see if this is a new one
$logfile->seek(0, 0);
for (my $i = 1; $i <= 10000; $i++) {
$line = <$logfile>;
chomp($line);
# Remove syslog header and mark the format
if ($self->{Syslog}) {
$line =~ s/^[A-Z][a-z]{2} \d{2}:\d{2}:\d{2} [^\s]+ [^\s]+ \d+ \[[^\]]+\] (\d{10}\.\d{3})/$1/;
}
$curtime = $self->look_for_timestamp($line);
if ($curtime) {
# If timestamp found at startup is lower than the history file,
# the file will not be parsed at all.
if ($hist_time > $curtime) {
print STDERR "DEBUG: this file will not be parsed: $lfile, size is lower than expected.\n" if (!$self->{QuietMode});
print STDERR "DEBUG: exploring $lfile, timestamp found at startup, $curtime, is lower than history time $hist_time.\n" if (!$self->{QuietMode});
$line = 'NOK';
last;
}
}
}
$logfile->close;
# This file should be ommitted jump to the next file
next if ($line eq 'NOK');
print STDERR "DEBUG: new file: $lfile, start from the beginning.\n" if (!$self->{QuietMode});
if (!$self->{is_squidguard_log} && !$self->{is_ufdbguard_log}) {
$self->{end_offset} = 0;
} elsif (!$self->{is_squidguard_log}) {
$self->{ug_end_offset} = 0;
} else {
$self->{sg_end_offset} = 0;
}
} else {
# move at offset and see if next line is older than history time
$logfile->seek($history_offset, 0);
for (my $i = 1; $i <= 10; $i++) {
$line = <$logfile>;
chomp($line);
# Remove syslog header and mark the format
if ($self->{Syslog}) {
$line =~ s/^[A-Z][a-z]{2} \d{2}:\d{2}:\d{2} [^\s]+ [^\s]+ \d+ \[[^\]]+\] (\d{10}\.\d{3})/$1/;
}
$curtime = $self->look_for_timestamp($line);
if ($curtime) {
if ($curtime < $hist_time) {
my $tmp_time = CORE::localtime($curtime);
print STDERR "DEBUG: this file will not be parsed: $lfile, line after offset is older than expected: $curtime < $hist_time.\n" if (!$self->{QuietMode});
$line = 'NOK';
last;
}
}
}
$logfile->close;
# This file should be ommitted jump to the next file
next if ($line eq 'NOK');
}
$logfile->close;
}
} else {
print STDERR "DEBUG: this file will be parsed, no history found.\n" if (!$self->{QuietMode});
# Initialise start offset for each file
if (!$self->{is_squidguard_log} && !$self->{is_ufdbguard_log}) {
$self->{end_offset} = 0;
} elsif (!$self->{is_squidguard_log}) {
$self->{ug_end_offset} = 0;
} else {
$self->{sg_end_offset} = 0;
}
}
if ($self->{queue_size} <= 1) {
if (!$self->{is_squidguard_log} && !$self->{is_ufdbguard_log}) {
$self->_parse_file_part($lfile, $self->{end_offset});
} elsif (!$self->{is_squidguard_log}) {
$self->_parse_file_part($lfile, $self->{ug_end_offset});
} else {
$self->_parse_file_part($lfile, $self->{sg_end_offset});
}
} else {
# Create multiple processes to parse one log file by chunks of data
my @chunks = $self->split_logfile($lfile);
my $child_count = 0;
for (my $i = 0; $i < $#chunks; $i++) {
if ($self->{interrupt}) {
print STDERR "FATAL: Abort signal received when processing to next chunk\n";
return;
}
$self->spawn(sub {
$self->_parse_file_part($lfile, $chunks[$i], $chunks[$i+1], $i);
});
$child_count = $self->manage_queue_size(++$child_count);
}
}
}
# Wait for last child stop
$self->wait_all_childs() if ($self->{queue_size} > 1);
# Get the last information parsed in this file part
foreach my $tmp_file ('last_parsed.tmp', 'sg_last_parsed.tmp', 'sg_last_parsed.tmp')
{
if (-e "$self->{pid_dir}/$tmp_file") {
if (open(IN, "$self->{pid_dir}/$tmp_file")) {
my %history_tmp = ();
while (my $l = <IN>) {
chomp($l);
my @data = split(/\s/, $l);
$history_tmp{$data[3]}{$data[4]} = join(' ', @data);
$line_stored_count += $data[5];
$line_processed_count += $data[6];
$line_count += $data[7];
if (!$self->{first_year} || ("$data[8]$data[9]" lt "$self->{first_year}$self->{first_month}{$data[8]}}") ) {
$self->{first_year} = $data[8];
$self->{first_month}{$data[8]} = $data[9];
}
my @tmp = split(/,/, $data[10]);
foreach my $w (@tmp) {
if (!grep(/^$w$/, @{$self->{week_parsed}})) {
push(@{$self->{week_parsed}}, $w);
}
}
}
close(IN);
foreach my $ts (sort {$b <=> $a} keys %history_tmp) {
foreach my $offset (sort {$b <=> $a} keys %{$history_tmp{$ts}}) {
my @data = split(/\s/, $history_tmp{$ts}{$offset});
$self->{last_year} = $data[0];
$self->{last_month}{$data[0]} = $data[1];
$self->{last_day}{$data[0]} = $data[2];
if ($tmp_file eq 'last_parsed.tmp') {
$self->{end_time} = $data[3];
$self->{end_offset} = $data[4];
} elsif ($tmp_file eq 'ug_last_parsed.tmp') {
$self->{ug_end_time} = $data[3];
$self->{ug_end_offset} = $data[4];
} elsif ($tmp_file eq 'sg_last_parsed.tmp') {
$self->{sg_end_time} = $data[3];
$self->{sg_end_offset} = $data[4];
}
last;
}
last;
}
} else {
print STDERR "ERROR: can't read last parsed line from $self->{pid_dir}/$tmp_file, $!\n";
}
}
}
if (!$self->{last_year}) {
print STDERR "No new log registered...\n" if (!$self->{QuietMode});
} else {
if (!$self->{QuietMode}) {
print STDERR "SQUID LOG END TIME : ", strftime("%a %b %e %H:%M:%S %Y", CORE::localtime($self->{end_time}+$self->{TimeZone})), "\n" if ($self->{end_time});
print STDERR "SQUIGUARD LOG END TIME : ", strftime("%a %b %e %H:%M:%S %Y", CORE::localtime($self->{sg_end_time}+$self->{TimeZone})), "\n" if ($self->{sg_end_time});
print STDERR "UFDBGUARD LOG END TIME : ", strftime("%a %b %e %H:%M:%S %Y", CORE::localtime($self->{ug_end_time}+$self->{TimeZone})), "\n" if ($self->{ug_end_time});
print STDERR "Read $line_count lines, matched $line_processed_count and found $line_stored_count new lines\n";
}
# Set the current start time into history file
$self->save_current_line() if (!$self->{SkipHistory} || $self->{OverrideHistory});
# Force reordering and unique sorting of data files
my $child_count = 0;
if (!$self->{rebuild}) {
if (!$self->{QuietMode}) {
print STDERR "Reordering daily data files now...\n";
}
for my $date ("$self->{first_year}$self->{first_month}{$self->{first_year}}" .. "$self->{last_year}$self->{last_month}{$self->{last_year}}") {
$date =~ /^(\d{4})(\d{2})$/;
my $y = $1;
my $m = $2;
next if (($m < 1) || ($m > 12));
if ($self->{interrupt}) {
print STDERR "FATAL: Abort signal received\n";
return;
}
if (-d "$self->{Output}/$y/$m") {
foreach my $d ("01" .. "31") {
if (-d "$self->{Output}/$y/$m/$d") {
if ($self->{queue_size} > 1) {
$self->spawn(sub {
$self->_save_stat($y, $m, $d);
});
$child_count = $self->manage_queue_size(++$child_count);
} else {
$self->_save_stat($y, $m, $d);
}
$self->_clear_stats();
}
}
}
}
# Wait for last child stop
$self->wait_all_childs() if ($self->{queue_size} > 1);
$child_count = 0;
}
# Compute week statistics
if (!$self->{no_week_stat}) {
if (!$self->{QuietMode}) {
print STDERR "Generating weekly data files now...\n";
}
foreach my $week (@{$self->{week_parsed}}) {
my ($y, $m, $wn) = split(/\//, $week);
my @wd = &get_wdays_per_month($wn, "$y-$m");
$wn++;
print STDERR "Compute and dump weekly statistics for week $wn on $y\n" if (!$self->{QuietMode});
if ($self->{queue_size} > 1) {
$self->spawn(sub {
$self->_save_data($y, $m, undef, sprintf("%02d", $wn), @wd);
});
$child_count = $self->manage_queue_size(++$child_count);
} else {
$self->_save_data($y, $m, undef, sprintf("%02d", $wn), @wd);
}
$self->_clear_stats();
}
}
# Wait for last child stop
$self->wait_all_childs() if ($self->{queue_size} > 1);
$child_count = 0;
# Compute month statistics
if (!$self->{QuietMode}) {
print STDERR "Generating monthly data files now...\n";
}
for my $date ("$self->{first_year}$self->{first_month}{$self->{first_year}}" .. "$self->{last_year}$self->{last_month}{$self->{last_year}}") {
$date =~ /^(\d{4})(\d{2})$/;
my $y = $1;
my $m = $2;
next if (($m < 1) || ($m > 12));
if ($self->{interrupt}) {
print STDERR "FATAL: Abort signal received\n";
return;
}
if (-d "$self->{Output}/$y/$m") {
print STDERR "Compute and dump month statistics for $y/$m\n" if (!$self->{QuietMode});
if ($self->{queue_size} > 1) {
$self->spawn(sub {
$self->_save_data("$y", "$m");
});
$child_count = $self->manage_queue_size(++$child_count);
} else {
$self->_save_data("$y", "$m");
}
$self->_clear_stats();
}
}
# Wait for last child stop
$self->wait_all_childs() if ($self->{queue_size} > 1);
# Compute year statistics
$child_count = 0;
if (!$self->{QuietMode}) {
print STDERR "Generating yearly data files now...\n";
}
for my $year ($self->{first_year} .. $self->{last_year}) {
if ($self->{interrupt}) {
print STDERR "FATAL: Abort signal received\n";
return;
}
if (-d "$self->{Output}/$year") {
print STDERR "Compute and dump year statistics for $year\n" if (!$self->{QuietMode});
if ($self->{queue_size} > 1) {
$self->spawn(sub {
$self->_save_data("$year");
});
$child_count = $self->manage_queue_size(++$child_count);
} else {
$self->_save_data("$year");
}
$self->_clear_stats();
}
}
# Wait for last child stop
$self->wait_all_childs() if ($self->{queue_size} > 1);
}
}
sub split_logfile
{
my ($self, $logf) = @_;
my @chunks = (0);
# get file size
my $totalsize = (stat("$logf"))[7] || 0;
my $offsplit = $self->{end_offset};
if ($self->{is_squidguard_log}) {
$offsplit = $self->{sg_end_offset};
} elsif ($self->{is_ufdbguard_log}) {
$offsplit = $self->{ug_end_offset};
}
# If the file is very small, many jobs actually make the parsing take longer
if ( ($totalsize <= 16777216) || ($totalsize <= $offsplit)) { #16MB
push(@chunks, $totalsize);
return @chunks;
}
# Split and search the right position in file corresponding to the number of jobs
my $i = 1;
if ($offsplit && ($offsplit < $totalsize)) {
$chunks[0] = $offsplit;
}
my $lfile = undef;
open($lfile, $logf) || die "FATAL: cannot read log file $logf. $!\n";
while ($i < $self->{queue_size}) {
my $pos = int((($totalsize-$offsplit)/$self->{queue_size}) * $i);
$pos += $offsplit;
if ($pos > $chunks[0]) {
$lfile->seek($pos, 0);
#Move the offset to the BEGINNING of each line, because the logic in process_file requires so
$pos = $pos + length(<$lfile>) - 1;
push(@chunks, $pos) if ($pos < $totalsize);
}
last if ($pos >= $totalsize);
$i++;
}
$lfile->close();
push(@chunks, $totalsize);
return @chunks;
}
sub check_exclusions
{
my ($self, $login, $client_ip, $url) = @_;
return 0 if (!exists $self->{Exclude}{users} && !exists $self->{Exclude}{clients} && !exists $self->{Exclude}{networks} && !exists $self->{Exclude}{uris});
# check for user exclusion
if (exists $self->{Exclude}{users} && $login) {
return 1 if (grep(/^$login$/i, @{ $self->{UserExcludeCache} }));
foreach my $e (@{$self->{Exclude}{users}}) {
# look for users using the following format: user@domain.tld, domain\user and user
if ( ($login =~ m#^$e$#i) || ($login =~ m#^$e\@#i) || ($login =~ m#\\$e$#i) ) {
push(@{ $self->{UsersExcludeCache} }, $login);
return 1;
}
}
}
# If login is a client ip, checked login against clients and networks filters
if (!$client_ip && ($login =~ /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|[0-9a-fA-F:]+$/)) {
$client_ip = $login;
}
# check for client exclusion
if (exists $self->{Exclude}{clients} && $client_ip) {
return 1 if (grep(/^$client_ip$/i, @{ $self->{ClientExcludeCache} }));
foreach my $e (@{$self->{Exclude}{clients}}) {
if ($client_ip =~ m#^$e$#i) {
push(@{ $self->{ClientExcludeCache} }, $client_ip);
return 1;
}
}
}
# check for Network exclusion
if (exists $self->{Exclude}{networks} && $client_ip) {
return 1 if (grep(/^$client_ip$/, @{ $self->{ClientExcludeCache} }));
foreach my $e (@{$self->{Exclude}{networks}}) {
if (&check_ip($client_ip, $e)) {
push(@{ $self->{ClientExcludeCache} }, $client_ip);
return 1;
}
}
}
# check for URL exclusion
if (exists $self->{Exclude}{uris} && $url) {
if (exists $self->{UrlExcludeCache}{$url}) {
%{ $self->{UrlExcludeCache} } = () if (scalar keys %{ $self->{UrlExcludeCache} } > 10000);
return 1;
}
map { return 1 if ($url =~ m#^\Q$_\E$#i); } @{$self->{Exclude}{uris}};
}
return 0;
}
sub check_inclusions
{
my ($self, $login, $client_ip) = @_;
return 1 if (!exists $self->{Include}{users} && !exists $self->{Include}{clients} && !exists $self->{Include}{networks});
# check for user inclusion
if (exists $self->{Include}{users} && $login) {
return 1 if (grep(/^$login$/i, @{ $self->{UserIncludeCache} }));
foreach my $e (@{$self->{Include}{users}}) {
# look for users using the following format: user@domain.tld, domain\user and user
if ( ($login =~ m#^$e$#i) || ($login =~ m#^$e\@#i) || ($login =~ m#\\$e$#i) ) {
push(@{ $self->{UserIncludeCache} }, $login);
return 1;
}
}
}
# If login is a client ip, checked login against clients and networks filters
if (!$client_ip && ($login =~ /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|[0-9a-fA-F:]+$/)) {
$client_ip = $login;
}
# check for client inclusion
if (exists $self->{Include}{clients} && $client_ip) {
return 1 if (grep(/^$client_ip$/i, @{ $self->{ClientIncludeCache} }));
foreach my $e (@{$self->{Include}{clients}}) {
if ($client_ip =~ m#^$e$#i) {
push(@{ $self->{ClientIncludeCache} }, $client_ip);
return 1;
}
}
}
# check for Network inclusion
if (exists $self->{Include}{networks} && $client_ip) {
return 1 if (grep(/^$client_ip$/i, @{ $self->{ClientIncludeCache} }));
foreach my $e (@{$self->{Include}{networks}}) {
if (&check_ip($client_ip, $e)) {
push(@{ $self->{ClientIncludeCache} }, $client_ip);
return 1;
}
}
}
return 0;
}
sub _parse_file_part
{
my ($self, $file, $start_offset, $stop_offset) = @_;
print STDERR "Reading file $file from offset $start_offset to ", ($stop_offset||'end'), ".\n" if (!$self->{QuietMode});
# Open logfile
my $logfile = new IO::File;
if ($file =~ /\.gz/) {
# Open a pipe to zcat program for compressed log
$logfile->open("$ZCAT_PROG $file |") || $self->localdie("ERROR: cannot read from pipe to $ZCAT_PROG $file. $!\n");
} elsif ($file =~ /\.bz2/) {
# Open a pipe to bzcat program for compressed log
$logfile->open("$BZCAT_PROG $file |") || $self->localdie("ERROR: cannot read from pipe to $BZCAT_PROG $file. $!\n");
} elsif ($file =~ /\.xz/) {
# Open a pipe to xzcat program for compressed log
$logfile->open("$XZCAT_PROG $file |") || $self->localdie("ERROR: cannot read from pipe to $XZCAT_PROG $file. $!\n");
} else {
$logfile->open($file) || $self->localdie("ERROR: Unable to open Squid access.log file $file. $!\n");
}
my $line = '';
my $time = 0;
my $elapsed = 0;
my $client_ip = '';
my $client_name = '';
my $code = '';
my $bytes = 0;
my $method = '';
my $url = '';
my $login = '';
my $status = '';
my $mime_type = '';
$self->{Syslog} = 0;
my $acl = '';
my $line_count = 0;
my $line_processed_count = 0;
my $line_stored_count = 0;
# Move directly to the start position
if ($start_offset) {
$logfile->seek($start_offset, 0);
if (!$self->{is_squidguard_log} && !$self->{is_ufdbguard_log}) {
$self->{end_offset} = $start_offset;
} elsif (!$self->{is_squidguard_log}) {
$self->{ug_end_offset} = $start_offset;
} else {
$self->{sg_end_offset} = $start_offset;
}
}
# The log file format must be :
# time elapsed client code/status bytes method URL rfc931 peerstatus/peerhost type
# This is the default format of squid access log file.
# Read and parse each line of the access log file
while ($line = <$logfile>) {
# quit this log if we reach the ending offset
if (!$self->{is_squidguard_log} && !$self->{is_ufdbguard_log}) {
last if ($stop_offset && ($self->{end_offset}>= $stop_offset));
# Store the current position in logfile
$self->{end_offset} += length($line);
} elsif (!$self->{is_squidguard_log}) {
last if ($stop_offset && ($self->{ug_end_offset}>= $stop_offset));
# Store the current position in logfile
$self->{ug_end_offset} += length($line);
} else {
last if ($stop_offset && ($self->{sg_end_offset}>= $stop_offset));
# Store the current position in logfile
$self->{sg_end_offset} += length($line);
}
chomp($line);
next if (!$line);
# skip immediately lines that squid is not able to tag.
next if ($line =~ / TAG_NONE(_ABORTED)?\//);
# Remove syslog header and mark the format
if ($self->{Syslog} == 1) {
$line =~ s/^... \d+ \d{2}:\d{2}:\d{2} [^\s]+ [^\s]+ \d+ [^\s]+ (\d{10}\.\d{3}) /$1 /;
# Remove syslog header and mark the format
} elsif (!$self->{Syslog} && ($line =~ s/^... \d+ \d{2}:\d{2}:\d{2} [^\s]+ [^\s]+ \d+ [^\s]+ (\d{10}\.\d{3}) /$1 /)) {
print STDERR "DEBUG: log was generated through syslog, the header will be removed.\n" if (!$self->{QuietMode});
$self->{Syslog} = 1;
} else {
$self->{Syslog} = 2;
}
# Number of log lines parsed
$line_count++;
# SquidAnalyzer supports the following squid log format:
#logformat squid %ts.%03tu %6tr %>a %Ss/%03>Hs %<st %rm %ru %un %Sh/%<A %mt
#logformat squidmime %ts.%03tu %6tr %>a %Ss/%03>Hs %<st %rm %ru %un %Sh/%<A %mt [%>h] [%<h]
#logformat common %>a %[ui %[un [%tl] "%rm %ru HTTP/%rv" %>Hs %<st %Ss:%Sh
#logformat combined %>a %[ui %[un [%tl] "%rm %ru HTTP/%rv" %>Hs %<st "%{Referer}>h" "%{User-Agent}>h" %Ss:%Sh
# Parse log with format: time elapsed client code/status bytes method URL rfc931 peerstatus/peerhost mime_type
my $format = 'native';
if ( !$self->{is_squidguard_log} && !$self->{is_ufdbguard_log} && ($line =~ $native_format_regex1) ) {
$time = $1 + $self->{TimeZone};
$elapsed = abs($2);
$client_ip = $3;
$code = $4;
$bytes = $5;
$method = $6;
$line = $7;
if ($self->{TimeStart} || $self->{TimeStop}) {
my $hour = strftime("%H:%M", CORE::localtime($time));
next if ($self->{TimeStart} && $hour lt $self->{TimeStart});
last if ($self->{TimeStop} && $hour gt $self->{TimeStop});
}
} elsif ( !$self->{is_squidguard_log} && !$self->{is_ufdbguard_log} && ($line =~ $common_format_regex1) ) {
$format = 'http';
$client_ip = $1;
$elapsed = abs($2);
$login = lc($3);
$time = $4;
$method = $5;
$url = lc($6);
$status = $8;
$bytes = $9;
$line = $10;
$code = $11;
$mime_type = $12;
$time =~ /(\d+)\/(...)\/(\d+):(\d+):(\d+):(\d+)\s/;
next if ($self->{TimeStart} && "$4:$5" lt $self->{TimeStart});
last if ($self->{TimeStop} && "$4:$5" gt $self->{TimeStop});
$time = timelocal_nocheck($6, $5, $4, $1, $month_number{$2} - 1, $3 - 1900) + $self->{TimeZone};
# Some site has corrupted mime_type, try to remove nasty characters
if ($mime_type =~ s/[^\-\/\.\(\)\+\_,\=a-z0-9]+//igs) {
$mime_type = 'invalid/type';
}
} elsif ( !$self->{is_ufdbguard_log} && ($line =~ $sg_format_regex1) ) {
$format = 'squidguard';
$self->{is_squidguard_log} = 1;
$acl = $7;
$client_ip = $9;
$elapsed = 0;
$login = lc($10);
$method = $11;
$url = lc($8);
$status = 301;
$bytes = 0;
$code = $12 . ':';
$mime_type = '';
next if ($self->{TimeStart} && "$4:$5" lt $self->{TimeStart});
last if ($self->{TimeStop} && "$4:$5" gt $self->{TimeStop});
$time = timelocal_nocheck($6, $5, $4, $3, $2 - 1, $1 - 1900) + $self->{TimeZone};
# Log format for ufdbGuard logs: timestamp [pid] BLOCK user clienthost aclname category url method
} elsif ($line =~ $ug_format_regex1) {
$format = 'ufdbguard';
$self->{is_ufdbguard_log} = 1;
$acl = "$10/$11";
$client_ip = $9;
$elapsed = 0;
$login = lc($8);
$method = $13;
$url = lc($12);
$status = 301;
$bytes = 0;
$code = 'REDIRECT:';
$mime_type = '';
next if ($self->{TimeStart} && "$4:$5" lt $self->{TimeStart});
last if ($self->{TimeStop} && "$4:$5" gt $self->{TimeStop});
$time = timelocal_nocheck($6, $5, $4, $3, $2 - 1, $1 - 1900) + $self->{TimeZone};
} else {
next;
}
if ($time) {
# end parsing if time range exceeded
last if ($self->{history_endtime} && ($time > $self->{history_endtime}));
# Do not parse some unwanted method
my $qm_method = quotemeta($method) || '';
next if (($#{$self->{ExcludedMethods}} >= 0) && grep(/^$qm_method$/, @{$self->{ExcludedMethods}}));
# Do not parse some unwanted code; e.g. TCP_DENIED/403
my $qm_code = quotemeta($code) || '';
next if (($#{$self->{ExcludedCodes}} >= 0) && grep(m#^$code$#, @{$self->{ExcludedCodes}}));
# Go to last parsed date (incremental mode)
if (!$self->{is_squidguard_log} && !$self->{is_ufdbguard_log}) {
next if ($self->{history_time} && ($time <= $self->{history_time}));
} elsif (!$self->{is_squidguard_log}) {
next if ($self->{ug_history_time} && ($time <= $self->{ug_history_time}));
} else {
next if ($self->{sg_history_time} && ($time <= $self->{sg_history_time}));
}
# Register the last parsing time and last offset position in logfile
if (!$self->{is_squidguard_log} && !$self->{is_ufdbguard_log}) {
$self->{end_time} = $time if ($self->{end_time} < $time);
# Register the first parsing time
if (!$self->{begin_time} || ($self->{begin_time} > $time)) {
$self->{begin_time} = $time;
print STDERR "SQUID LOG SET START TIME: ", strftime("%a %b %e %H:%M:%S %Y", CORE::localtime($time+$self->{TimeZone})), "\n" if (!$self->{QuietMode});
}
} elsif (!$self->{is_squidguard_log}) {
$self->{ug_end_time} = $time if ($self->{ug_end_time} < $time);
# Register the first parsing time
if (!$self->{ug_begin_time} || ($self->{ug_begin_time} > $time)) {
$self->{ug_begin_time} = $time;
print STDERR "UFDBGUARD LOG SET START TIME: ", strftime("%a %b %e %H:%M:%S %Y", CORE::localtime($time+$self->{TimeZone})), "\n" if (!$self->{QuietMode});
}
} else {
$self->{sg_end_time} = $time if ($self->{sg_end_time} < $time);
# Register the first parsing time
if (!$self->{sg_begin_time} || ($self->{sg_begin_time} > $time)) {
$self->{sg_begin_time} = $time;
print STDERR "SQUIDGUARD LOG SET START TIME: ", strftime("%a %b %e %H:%M:%S %Y", CORE::localtime($time+$self->{TimeZone})), "\n" if (!$self->{QuietMode});
}
}
# Only store (HIT|UNMODIFIED)/(MISS|MODIFIED|TUNNEL)/(DENIED|REDIRECT) status
# and peer CD_SIBLING_HIT/ aswell as peer SIBLING_HIT/...
if ( ($code =~ m#(HIT|UNMODIFIED)[:/]#) || ($self->{SiblingHit} && ($line =~ / (CD_)?SIBLING_HIT/)) ) {
$code = 'HIT';
} elsif ($code =~ m#(MISS|MODIFIED|TUNNEL)[:/]#) {
$code = 'MISS';
} elsif ($code =~ m#(DENIED|DENIED_REPLY|REDIRECT)[:/]#) {
if ($code =~ m#DENIED[:/]407#) {
$code = 'DENIED_407';
} else {
$code = 'DENIED';
}
} else {
next;
}
# With common and combined log format those fields have already been parsed
if (($format eq 'native') && ($line =~ $native_format_regex2) ) {
$url = lc($1);
$login = lc($2);
$status = lc($3);
$mime_type = lc($4);
# Some site has corrupted mime_type, try to remove nasty characters
if ($mime_type =~ s/[^\-\/\.\(\)\+\_,\=a-z0-9]+//igs) {
$mime_type = 'invalid/type';
}
}
if ($url) {
if (!$mime_type || ($mime_type eq '-')) {
$mime_type = 'none';
}
# Do not parse some unwanted method
next if (($#{$self->{ExcludedMimes}} >= 0) && map {$mime_type =~ m#^$_$#} @{$self->{ExcludedMimes}});
# Remove extra space character in username
$login =~ s/\%20//g;
my $id = $client_ip || '';
if ($login ne '-') {
$id = $login;
}
next if (!$id || (!$bytes && ($code !~ /DENIED/)));
#####
# If there's some mandatory inclusion, check the entry against the definitions
# The entry is skipped directly if it is not in an inclusion list
#####
next if (!$self->check_inclusions($login, $client_ip));
#####
# Check the entry against the exclusion definitions. The entry
# is skipped directly when it match an exclusion definition.
#####
next if ($self->check_exclusions($login, $client_ip, $url));
# Set default user login to client ip address
# Anonymize all users
if ($self->{AnonymizeLogin} && ($client_ip ne $id)) {
if (!exists $self->{AnonymizedId}{$id}) {
$self->{AnonymizedId}{$id} = &anonymize_id();
}
$id = $self->{AnonymizedId}{$id};
}
# With code TCP_DENIED/407 we need to store it until
# we are sure that no 200 response is found later
if ($code eq 'DENIED_407')
{
$code = 'DENIED';
push(@{ $self->{'Kerberos_Challenge'}{$client_ip}{$url}{$time} }, $elapsed, $client_ip, $code, $bytes, $url, $id, $mime_type, $acl, $method);
$self->{has_407} = 1;
}
else
{
# Now parse data and generate statistics
$self->_parseData($time, $elapsed, $client_ip, $code, $bytes, $url, $id, $mime_type, $acl, $method);
$line_stored_count++;
}
}
$line_processed_count++;
}
}
$logfile->close();
foreach my $ip (keys %{ $self->{'Kerberos_Challenge'} })
{
foreach my $url (keys %{ $self->{'Kerberos_Challenge'}{$ip} })
{
foreach my $time (keys %{ $self->{'Kerberos_Challenge'}{$ip}{$url} })
{
# Now parse data and generate statistics
$self->_parseData($time, @{ $self->{'Kerberos_Challenge'}{$ip}{$url}{$time} });
$line_stored_count++;
}
}
}
$self->{'Kerberos_Challenge'} = ();
$self->{has_407} = 0;
if ($self->{cur_year}) {
# Save last parsed data
$self->_append_data($self->{cur_year}, $self->{cur_month}, $self->{cur_day});
# Stats can be cleared
$self->_clear_stats();
# Stores last week to process
my $wn = &get_week_number($self->{cur_year}, $self->{cur_month}, $self->{cur_day});
if (!grep(/^$self->{cur_year}\/$self->{cur_month}\/$wn$/, @{$self->{week_parsed}})) {
push(@{$self->{week_parsed}}, "$self->{cur_year}/$self->{cur_month}/$wn");
}
# Save the last information parsed in this file part
my $tmp_file = 'last_parsed.tmp';
if ($self->{is_squidguard_log}) {
$tmp_file = 'sg_last_parsed.tmp';
} elsif ($self->{is_ufdbguard_log}) {
$tmp_file = 'ug_last_parsed.tmp';
}
if (open(OUT, ">>$self->{pid_dir}/$tmp_file")) {
flock(OUT, 2) || die "FATAL: can't acquire lock on file $tmp_file, $!\n";
if (!$self->{is_squidguard_log} && !$self->{is_ufdbguard_log}) {
print OUT "$self->{last_year} $self->{last_month}{$self->{last_year}} $self->{last_day}{$self->{last_year}} $self->{end_time} $self->{end_offset} $line_stored_count $line_processed_count $line_count $self->{first_year} $self->{first_month}{$self->{first_year}} ", join(',', @{$self->{week_parsed}}), "\n";
} elsif (!$self->{is_squidguard_log}) {
print OUT "$self->{last_year} $self->{last_month}{$self->{last_year}} $self->{last_day}{$self->{last_year}} $self->{ug_end_time} $self->{ug_end_offset} $line_stored_count $line_processed_count $line_count $self->{first_year} $self->{first_month}{$self->{first_year}} ", join(',', @{$self->{week_parsed}}), "\n";
} else {
print OUT "$self->{last_year} $self->{last_month}{$self->{last_year}} $self->{last_day}{$self->{last_year}} $self->{sg_end_time} $self->{sg_end_offset} $line_stored_count $line_processed_count $line_count $self->{first_year} $self->{first_month}{$self->{first_year}} ", join(',', @{$self->{week_parsed}}), "\n";
}
close(OUT);
} else {
print STDERR "ERROR: can't save last parsed line into $self->{pid_dir}/$tmp_file, $!\n";
}
}
}
sub _clear_stats
{
my $self = shift;
# Hashes to store user statistics
$self->{stat_user_hour} = ();
$self->{stat_user_day} = ();
$self->{stat_user_month} = ();
$self->{stat_usermax_hour} = ();
$self->{stat_usermax_day} = ();
$self->{stat_usermax_month} = ();
$self->{stat_user_url_hour} = ();
$self->{stat_user_url_day} = ();
$self->{stat_user_url_month} = ();
# Hashes to store network statistics
$self->{stat_network_hour} = ();
$self->{stat_network_day} = ();
$self->{stat_network_month} = ();
$self->{stat_netmax_hour} = ();
$self->{stat_netmax_day} = ();
$self->{stat_netmax_month} = ();
# Hashes to store user / network statistics
$self->{stat_netuser_hour} = ();
$self->{stat_netuser_day} = ();
$self->{stat_netuser_month} = ();
# Hashes to store cache status (hit/miss)
$self->{stat_code_hour} = ();
$self->{stat_code_day} = ();
$self->{stat_code_month} = ();
# Hashes to store throughput statsœ
$self->{stat_throughput_hour} = ();
$self->{stat_throughput_day} = ();
$self->{stat_throughput_month} = ();
# Hashes to store mime type
$self->{stat_mime_type_hour} = ();
$self->{stat_mime_type_day} = ();
$self->{stat_mime_type_month} = ();
}
sub get_history_time
{
my ($self, $file, $type) = @_;
my $current = new IO::File;
unless($current->open($file)) {
print STDERR "ERROR: Can't read file $file, $!\n" if (!$self->{QuietMode});
print STDERR "Starting at the first line of ", ucfirst($type), " log file.\n" if (!$self->{QuietMode});
} else {
my $tmp = <$current>;
chomp($tmp);
my ($history_time, $end_offset) = split(/[\t]/, $tmp);
if ($history_time) {
my $htime = 0;
if ($type eq 'SQUID') {
$self->{history_time} = $history_time;
$self->{end_offset} = $end_offset;
$self->{begin_time} = $history_time;
} elsif ($type eq 'SQUIDGUARD') {
$self->{sg_history_time} = $history_time;
$self->{sg_end_offset} = $end_offset;
$self->{sg_begin_time} = $history_time;
} elsif ($type eq 'UFDBGUARD') {
$self->{ug_history_time} = $history_time;
$self->{ug_end_offset} = $end_offset;
$self->{ug_begin_time} = $history_time;
}
print STDERR "$type LOG HISTORY TIME: ", strftime("%a %b %e %H:%M:%S %Y", CORE::localtime($history_time+$self->{TimeZone})), " - HISTORY OFFSET: $self->{end_offset}\n" if (!$self->{QuietMode});
}
}
}
sub _init
{
my ($self, $conf_file, $log_file, $debug, $rebuild, $pid_dir, $pidfile, $timezone, $skip_history, $refresh_time) = @_;
# Set path to pid file
$pidfile = $pid_dir . '/' . $pidfile;
# Prevent for a call without instance
if (!ref($self)) {
print STDERR "ERROR - init : Unable to call init without an object instance.\n";
unlink("$pidfile");
exit(0);
}
$self->{pidfile} = $pidfile || '/tmp/squid-analyzer.pid';
# Load configuration information
if (!$conf_file) {
if (-f '/etc/squidanalyzer/squidanalyzer.conf') {
$conf_file = '/etc/squidanalyzer/squidanalyzer.conf';
} elsif (-f '/etc/squidanalyzer.conf') {
$conf_file = '/etc/squidanalyzer.conf';
} elsif (-f 'squidanalyzer.conf') {
$conf_file = 'squidanalyzer.conf';
}
}
my %options = $self->parse_config($conf_file, $log_file, $rebuild);
# Configuration options
$self->{MinPie} = $options{MinPie} || 2;
$self->{QuietMode} = $options{QuietMode} || 0;
$self->{UrlReport} = $options{UrlReport} || 0;
$self->{UrlHitsOnly} = $options{UrlHitsOnly} || 0;
$self->{MaxFormatError} = $options{MaxFormatError} || 0;
if (defined $options{UserReport}) {
$self->{UserReport} = $options{UserReport};
} else {
# Assure backward compatibility after update otherwize
# data files will lost users information if directive
# is not found in the configuration file
$self->{UserReport} = 1;
}
$self->{Output} = $options{Output} || '';
$self->{WebUrl} = $options{WebUrl} || '';
$self->{WebUrl} .= '/' if ($self->{WebUrl} && ($self->{WebUrl} !~ /\/$/));
$self->{DateFormat} = $options{DateFormat} || '%y-%m-%d';
$self->{Lang} = $options{Lang} || '';
$self->{AnonymizeLogin} = $options{AnonymizeLogin} || 0;
$self->{SiblingHit} = $options{SiblingHit} || 1;
$self->{ImgFormat} = $options{ImgFormat} || 'png';
$self->{Locale} = $options{Locale} || '';
$self->{TopUrlUser} = $options{TopUrlUser} || 0;
$self->{no_year_stat} = 0;
$self->{with_month_stat} = 0;
$self->{no_week_stat} = 0;
$self->{UseClientDNSName} = $options{UseClientDNSName} || 0;
$self->{DNSLookupTimeout} = $options{DNSLookupTimeout} || 100;
$self->{DNSLookupTimeout} = int($self->{DNSLookupTimeout} * 1000);
$self->{LogFile} = ();
$self->{queue_size} = 1;
$self->{running_pids} = ();
$self->{pid_dir} = $pid_dir || '/tmp';
$self->{child_count} = 0;
$self->{rebuild} = $rebuild || 0;
$self->{is_squidguard_log} = 0;
$self->{Syslog} = 0;
$self->{UseUrlPort} = 1;
$self->{TimeStart} = $options{TimeStart} || '';
$self->{TimeStop} = $options{TimeStop} || '';
$self->{SkipHistory} = $skip_history || 0;
$self->{OverrideHistory} = 0;
$self->{StoreUserIp} = $options{StoreUserIp} || 0;
# Set default timezone
$self->{TimeZone} = 0;
if ($options{TimeZone} eq 'auto') {
my @lt = localtime();
# count TimeZone and Daylight Saving Time
$self->{TimeZone} = timelocal(@lt) - timegm(@lt);
print STDERR "DEBUG: using autodetected timezone $self->{TimeZone}\n" if ($debug);
} elsif ($options{TimeZone} || $timezone) {
$self->{TimeZone} = (0-($options{TimeZone} || $timezone || 0))*3600;
print STDERR "DEBUG: using timezone $self->{TimeZone}\n" if ($debug);
}
# Cleanup old temporary files
foreach my $tmp_file ('last_parsed.tmp', 'sg_last_parsed.tmp', 'ug_last_parsed.tmp') {
unlink("$self->{pid_dir}/$tmp_file");
}
$self->{CustomHeader} = $options{CustomHeader} || qq{<a href="$self->{WebUrl}"><img src="$self->{WebUrl}images/logo-squidanalyzer.png" title="SquidAnalyzer $VERSION" border="0"></a> SquidAnalyzer};
$self->{CustomTitle} = $options{CustomTitle} || qq{SquidAnalyzer $VERSION Report};
$self->{ExcludedMethods} = ();
if ($options{ExcludedMethods}) {
push(@{$self->{ExcludedMethods}}, split(/\s*,\s*/, $options{ExcludedMethods}));
}
$self->{ExcludedCodes} = ();
if ($options{ExcludedCodes}) {
push(@{$self->{ExcludedCodes}}, split(/\s*,\s*/, $options{ExcludedCodes}));
}
$self->{ExcludedMimes} = ();
if ($options{ExcludedMimes}) {
push(@{$self->{ExcludedMimes}}, split(/\s*,\s*/, $options{ExcludedMimes}));
}
if ($self->{Lang}) {
open(IN, "$self->{Lang}") or die "ERROR: can't open translation file $self->{Lang}, $!\n";
while (my $l = <IN>) {
chomp($l);
$l =~ s/\r//gs;
next if ($l =~ /^\s*#/);
next if (!$l);
my ($key, $str) = split(/\t+/, $l);
$Translate{$key} = $str;
}
close(IN);
}
if (!$self->{Output}) {
die "ERROR: 'Output' configuration option must be set.\n";
}
if (! -d $self->{Output}) {
die "ERROR: 'Output' directory $self->{Output} doesn't exists.\n";
}
if (!$self->{rebuild}) {
push(@{$self->{LogFile}}, @{$options{LogFile}});
if ($#{$self->{LogFile}} < 0) {
die "ERROR: 'LogFile' configuration directive must be set or a log file given at command line.\n";
}
}
$self->{RefreshTime} = $refresh_time || $options{RefreshTime} || 0;
$self->{OrderUser} = lc($options{OrderUser}) || 'bytes';
$self->{OrderNetwork} = lc($options{OrderNetwork}) || 'bytes';
$self->{OrderUrl} = lc($options{OrderUrl}) || 'bytes';
$self->{OrderMime} = lc($options{OrderMime}) || 'bytes';
if ($self->{OrderUser} !~ /^(hits|bytes|duration)$/) {
die "ERROR: OrderUser must be one of these values: hits, bytes or duration\n";
}
if ($self->{OrderNetwork} !~ /^(hits|bytes|duration)$/) {
die "ERROR: OrderNetwork must be one of these values: hits, bytes or duration\n";
}
if ($self->{OrderUrl} !~ /^(hits|bytes|duration)$/) {
die "ERROR: OrderUrl must be one of these values: hits, bytes or duration\n";
}
if ($self->{OrderMime} !~ /^(hits|bytes)$/) {
die "ERROR: OrderMime must be one of these values: hits or bytes\n";
}
%{$self->{UrlAliasName}} = ();
%{$self->{NetworkAlias}} = $self->parse_network_aliases($options{NetworkAlias} || '');
%{$self->{UserAlias}} = $self->parse_user_aliases($options{UserAlias} || '');
%{$self->{Exclude}} = $self->parse_exclusion($options{Exclude} || '');
%{$self->{Include}} = $self->parse_inclusion($options{Include} || '');
%{$self->{UrlAlias}} = $self->parse_url_aliases($options{UrlAlias} || '');
# Some caches to improve parsing speed
%{$self->{NetworkAliasCache}} = ();
%{$self->{UserAliasCache}} = ();
%{$self->{UrlAliasCache}} = ();
@{$self->{UserExcludeCache}} = ();
@{$self->{ClientExcludeCache}} = ();
%{$self->{UrlExcludeCache}} = ();
@{$self->{UserIncludeCache}} = ();
@{$self->{ClientIncludeCache}} = ();
$self->{has_network_alias} = scalar keys %{$self->{NetworkAlias}};
$self->{has_user_alias} = scalar keys %{$self->{UserAlias}};
$self->{has_url_alias} = scalar keys %{$self->{UrlAlias}};
$self->{CostPrice} = $options{CostPrice} || 0;
$self->{Currency} = $options{Currency} || '&euro;';
$self->{TopNumber} = $options{TopNumber} || 10;
$self->{TopDenied} = $options{TopDenied} || 10;
$self->{TopStorage} = $options{TopStorage} || 0;
$self->{TransfertUnit} = $options{TransfertUnit} || 'BYTES';
if (!grep(/^$self->{TransfertUnit}$/i, 'BYTES', 'KB', 'MB', 'GB')) {
die "ERROR: TransfertUnit must be one of these values: KB, MB or GB\n";
} else {
if (uc($self->{TransfertUnit}) eq 'BYTES') {
$self->{TransfertUnitValue} = 1;
$self->{TransfertUnit} = 'Bytes';
} elsif (uc($self->{TransfertUnit}) eq 'KB') {
$self->{TransfertUnitValue} = 1024;
} elsif (uc($self->{TransfertUnit}) eq 'MB') {
$self->{TransfertUnitValue} = 1024*1024;
} elsif (uc($self->{TransfertUnit}) eq 'GB') {
$self->{TransfertUnitValue} = 1024*1024*1024;
}
}
# Init statistics storage hashes
$self->_clear_stats();
# Used to store the first and last date parsed
$self->{last_year} = 0;
$self->{last_month} = ();
$self->{last_day} = ();
$self->{cur_year} = 0;
$self->{cur_month} = 0;
$self->{cur_day} = 0;
$self->{first_year} = 0;
$self->{first_month} = ();
$self->{begin_time} = 0;
$self->{end_time} = 0;
$self->{end_offset} = 0;
$self->{week_parsed} = ();
# Used to stored command line parameters from squid-analyzer
$self->{history_time} = 0;
$self->{history_endtime} = 0;
$self->{sg_history_time} = 0;
$self->{ug_history_time} = 0;
$self->{preserve} = 0;
$self->{sg_end_time} = 0;
$self->{sg_end_offset} = 0;
$self->{ug_end_time} = 0;
$self->{ug_end_offset} = 0;
$self->{'Kerberos_Challenge'} = ();
$self->{has_407} = 0;
# Override verbose mode
$self->{QuietMode} = 0 if ($debug);
# Enable local date format if defined, else strftime will be used. The limitation
# this behavior is that all dates in HTML files will be the same for performences reasons.
if ($self->{Locale}) {
my $lang = 'LANG=' . $self->{Locale};
$self->{start_date} = `$lang date | iconv -t $Translate{CharSet} 2>/dev/null`;
chomp($self->{start_date});
}
# Get the last parsing date for Squid log incremental parsing
if (!$rebuild && !$self->{SkipHistory} && -e "$self->{Output}/SquidAnalyzer.current") {
$self->get_history_time("$self->{Output}/SquidAnalyzer.current", 'SQUID');
}
# Get the last parsing date for SquidGuard log incremental parsing
if (!$rebuild && !$self->{SkipHistory} && -e "$self->{Output}/SquidGuard.current") {
$self->get_history_time("$self->{Output}/SquidGuard.current", 'SQUIDGUARD');
}
# Get the last parsing date for ufdbGuard log incremental parsing
if (!$rebuild && !$self->{SkipHistory} && -e "$self->{Output}/ufdbGuard.current") {
$self->get_history_time("$self->{Output}/ufdbGuard.current", 'UFDBGUARD');
}
$self->{menu} = qq{
<div id="menu">
<ul>
<li><a href="../index.html"><span class="iconArrow">$Translate{'Back_link'}</span></a></li>
};
if ($self->{UrlReport}) {
$self->{menu} .= qq{
<li><a href="domain.html"><span class="iconDomain">$Translate{'Top_domain_link'}</span></a></li>
<li><a href="url.html"><span class="iconUrl">$Translate{'Top_url_link'}</span></a></li>
<li><a href="denied.html"><span class="iconUrl">$Translate{'Top_denied_link'}</span></a></li>
};
}
if ($self->{UserReport}) {
$self->{menu} .= qq{
<li><a href="user.html"><span class="iconUser">$Translate{'User_link'}</span></a></li>
};
}
$self->{menu} .= qq{
<li><a href="network.html"><span class="iconNetwork">$Translate{'Network_link'}</span></a></li>
<li><a href="mime_type.html"><span class="iconMime">$Translate{'Mime_link'}</span></a></li>
</ul>
</div>
};
$self->{menu2} = qq{
<div id="menu">
<ul>
<li><a href="../../index.html"><span class="iconArrow">$Translate{'Back_link'}</span></a></li>
};
if ($self->{UrlReport}) {
$self->{menu2} .= qq{
<li><a href="../../domain.html"><span class="iconDomain">$Translate{'Top_domain_link'}</span></a></li>
<li><a href="../../url.html"><span class="iconUrl">$Translate{'Top_url_link'}</span></a></li>A
<li><a href="../../denied.html"><span class="iconUrl">$Translate{'Top_denied_link'}</span></a></li>A
};
}
if ($self->{UserReport}) {
$self->{menu2} .= qq{
<li><a href="../../user.html"><span class="iconUser">$Translate{'User_link'}</span></a></li>
};
}
$self->{menu2} .= qq{
<li><a href="../../network.html"><span class="iconNetwork">$Translate{'Network_link'}</span></a></li>
<li><a href="../../mime_type.html"><span class="iconMime">$Translate{'Mime_link'}</span></a></li>
</ul>
</div>
};
$self->{menu3} = qq{
<div id="menu">
<ul>
<li><a href="../index.html"><span class="iconArrow">$Translate{'Back_link'}</span></a></li>
</ul>
</div>
};
}
sub _gethostbyaddr
{
my ($self, $ip) = @_;
my $host = undef;
my $err = '';
unless(exists $CACHE{$ip}) {
eval {
local $SIG{ALRM} = sub { die "DNS lookup timeout.\n"; };
ualarm $self->{DNSLookupTimeout};
my @addrs = ();
if ($] < 5.014) {
$host = gethostbyaddr(inet_aton($ip), AF_INET);
} else {
# We also need to resolve IPV6 addresses
if ($ip =~ /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/) {
($err, @addrs) = Socket::getaddrinfo( $ip, 0, { 'protocol' => Socket::IPPROTO_TCP, 'family' => Socket::AF_INET } );
} else {
($err, @addrs) = Socket::getaddrinfo( $ip, 0, { 'protocol' => Socket::IPPROTO_TCP, 'family' => Socket::AF_INET6 } );
}
}
for my $addr (@addrs) {
($err, $host) = Socket::getnameinfo( $addr->{addr});
last;
}
ualarm 0;
};
if ($@) {
delete $CACHE{$ip};
if (!$self->{QuietMode}) {
warn "_gethostbyaddr timeout reach for ip: $ip, timeout can be adjusted with directive DNSLookupTimeout\n";
}
} elsif ($err) {
delete $CACHE{$ip};
if (!$self->{QuietMode}) {
warn "_gethostbyaddr error resolving ip: $ip, $err\n";
}
}
else {
$CACHE{$ip} = $host;
#printf "_gethostbyaddr success : %s (%s)\n", $ip, $host;
}
}
return $CACHE{$ip} || $ip;
}
sub apply_network_alias
{
my ($self, $ip) = @_;
return $self->{NetworkAliasCache}{$ip} if (exists $self->{NetworkAliasCache}{$ip});
my $found = 0;
foreach my $r (keys %{$self->{NetworkAlias}}) {
if ($r =~ $cidr_regex) {
if (&check_ip($ip, $r)) {
$self->{NetworkAliasCache}{$ip} = $self->{NetworkAlias}->{$r};
$ip = $self->{NetworkAlias}->{$r};
$found = 1;
last;
}
} elsif ($ip =~ /^$r/) {
$self->{NetworkAliasCache}{$ip} = $self->{NetworkAlias}->{$r};
$ip = $self->{NetworkAlias}->{$r};
$found = 1;
last;
}
}
if (!$found) {
# Set default to a class C network
if ($ip =~ /^(.*)([:\.]+)\d+$/) {
$self->{NetworkAliasCache}{$ip} = "$1$2". "0";
$ip = "$1$2". "0";
} else {
$self->{NetworkAliasCache}{$ip} = $ip;
}
}
return $ip;
}
sub apply_user_alias
{
my ($self, $id) = @_;
return $self->{UserAliasCache}{$id} if (exists $self->{UserAliasCache}{$id});
my $found = 0;
foreach my $u (keys %{$self->{UserAlias}}) {
if ( $id =~ /^$u$/i ) {
$self->{UserAliasCache}{$id} = $self->{UserAlias}->{$u};
$id = $self->{UserAlias}->{$u};
$found = 1;
last;
}
}
$self->{UserAliasCache}{$id} = $id if (!$found);
return $id;
}
sub apply_url_alias
{
my ($self, $url) = @_;
return $self->{UrlAliasCache}{$url} if (exists $self->{UrlAliasCache}{$url});
foreach my $r (keys %{$self->{UrlAlias}})
{
if ($url =~ /^$r/)
{
$self->{UrlAliasCache}{$url} = $self->{UrlAlias}->{$r};
$url = $self->{UrlAlias}->{$r};
last;
}
}
return $url;
}
sub _parseData
{
my ($self, $time, $elapsed, $client, $code, $bytes, $url, $id, $type, $acl, $method) = @_;
# Save original IP address for dns resolving
my $client_ip_addr = $client;
# Get the current year and month
my ($sec,$min,$hour,$day,$month,$year,$wday,$yday,$isdst) = CORE::localtime($time);
$year += 1900;
$month = sprintf("%02d", $month + 1);
$day = sprintf("%02d", $day);
# Store data when hour change to save memory
if ($self->{cur_year} && ($self->{cur_hour} ne '') && ($hour != $self->{cur_hour}) ) {
# If the day has changed then we want to save stats of the previous one
$self->_append_data($self->{cur_year}, $self->{cur_month}, $self->{cur_day});
# Stats can be cleared
print STDERR "Clearing statistics storage hashes, for $self->{cur_year}-$self->{cur_month}-$self->{cur_day} ", sprintf("%02d", $self->{cur_hour}), ":00:00.\n" if (!$self->{QuietMode});
$self->_clear_stats();
}
# Stores weeks to process
if (!$self->{no_week_stat}) {
if ("$year$month$day" ne "$self->{cur_year}$self->{cur_month}$self->{cur_day}") {
my $wn = &get_week_number($year, $month, $day);
if (!grep(/^$year\/$month\/$wn$/, @{$self->{week_parsed}})) {
push(@{$self->{week_parsed}}, "$year/$month/$wn");
}
}
}
# Extract the domainname part of the URL
$url =~ s/:\d+.*// if (!$self->{UseUrlPort});
$url =~ m/^(?:[^\/]+\/\/|)([^\/:]+)/;
my $dest = $1 || $url;
# Replace username by his dnsname if there's no username
# (login is equal to ip) and if client is an ip address
if ( ($id eq $client) && $self->{UseClientDNSName}) {
if ($client =~ $ip_regexp) {
my $dnsname = $self->_gethostbyaddr($client);
if ($dnsname) {
$id = $dnsname;
}
}
}
# Replace network by his aliases if any
my $network = (!$self->{has_network_alias}) ? '' : $self->apply_network_alias($client);
if (!$network) {
# set network to a default class C
$client =~ /^(.*)([:\.]+)\d+$/;
$network = "$1$2" . "0";
}
# Replace username by his alias if any
$id = (!$self->{has_user_alias}) ? $id : $self->apply_user_alias($id);
# Replace url by his alias if any
$dest = (!$self->{has_url_alias}) ? $dest : $self->apply_url_alias($dest);
# Stores last parsed date part
if (!$self->{last_year} || ("$year$month$day" gt "$self->{last_year}$self->{last_month}{$self->{last_year}}$self->{last_day}{$self->{last_year}}")) {
$self->{last_year} = $year;
$self->{last_month}{$self->{last_year}} = $month;
$self->{last_day}{$self->{last_year}} = $day;
}
# Stores first parsed date part
if (!$self->{first_year} || ("$self->{first_year}$self->{first_month}{$self->{first_year}}" gt "$year$month")) {
$self->{first_year} = $year;
$self->{first_month}{$self->{first_year}} = $month;
}
# Stores current processed values
$self->{cur_year} = $year;
$self->{cur_month} = $month;
$self->{cur_day} = $day;
$self->{cur_hour} = $hour;
$hour = sprintf("%02d", $hour);
#### Store access denied statistics
if ($code eq 'DENIED')
{
$self->{stat_code_hour}{$code}{$hour}{hits}++;
$self->{stat_code_hour}{$code}{$hour}{bytes} += $bytes;
$self->{stat_code_day}{$code}{$self->{last_day}}{hits}++;
$self->{stat_code_day}{$code}{$self->{last_day}}{bytes} += $bytes;
$self->{stat_throughput_hour}{$code}{$hour}{bytes} += $bytes;
$self->{stat_throughput_day}{$code}{$self->{last_day}}{bytes} += $bytes;
$self->{stat_throughput_hour}{$code}{$hour}{elapsed} += $elapsed;
$self->{stat_throughput_day}{$code}{$self->{last_day}}{elapsed} += $elapsed;
#### Store url statistics
if ($self->{UrlReport}) {
$self->{stat_denied_url_hour}{$id}{$dest}{hits}++;
$self->{stat_denied_url_hour}{$id}{$dest}{firsthit} = $time if (!$self->{stat_denied_url_hour}{$id}{$dest}{firsthit} || ($time < $self->{stat_denied_url_hour}{$id}{$dest}{firsthit}));
$self->{stat_denied_url_hour}{$id}{$dest}{lasthit} = $time if (!$self->{stat_denied_url_hour}{$id}{$dest}{lasthit} || ($time > $self->{stat_denied_url_hour}{$id}{$dest}{lasthit}));
$self->{stat_denied_url_hour}{$id}{$dest}{blacklist}{$acl}++ if ($acl);
$self->{stat_denied_url_day}{$id}{$dest}{hits}++;
$self->{stat_denied_url_day}{$id}{$dest}{firsthit} = $time if (!$self->{stat_denied_url_day}{$id}{$dest}{firsthit} || ($time < $self->{stat_denied_url_day}{$id}{$dest}{firsthit}));
$self->{stat_denied_url_day}{$id}{$dest}{lasthit} = $time if (!$self->{stat_denied_url_day}{$id}{$dest}{lasthit} || ($time > $self->{stat_denied_url_day}{$id}{$dest}{lasthit}));
$self->{stat_denied_url_day}{$id}{$dest}{blacklist}{$acl}++ if ($acl);
$self->{stat_user_hour}{$id}{$hour}{hits} += 0;
$self->{stat_user_hour}{$id}{$hour}{bytes} += 0;
$self->{stat_user_hour}{$id}{$hour}{duration} += 0;
$self->{stat_user_day}{$id}{$self->{last_day}}{hits} += 0;
$self->{stat_user_day}{$id}{$self->{last_day}}{bytes} += 0;
$self->{stat_user_day}{$id}{$self->{last_day}}{duration} += 0;
}
return;
}
elsif ($self->{has_407} && exists $self->{'Kerberos_Challenge'}{$client}{$url})
{
delete $self->{'Kerberos_Challenge'}{$client}{$url};
delete $self->{'Kerberos_Challenge'}{$client} if (scalar keys %{ $self->{'Kerberos_Challenge'}{$client} } == 0);
}
#### Store client statistics
if ($self->{UserReport}) {
$self->{stat_user_hour}{$id}{$hour}{hits}++;
$self->{stat_user_hour}{$id}{$hour}{bytes} += $bytes;
$self->{stat_user_hour}{$id}{$hour}{duration} += $elapsed;
$self->{stat_user_day}{$id}{$self->{last_day}}{hits}++;
$self->{stat_user_day}{$id}{$self->{last_day}}{bytes} += $bytes;
$self->{stat_user_day}{$id}{$self->{last_day}}{duration} += $elapsed;
if ($bytes > $self->{stat_usermax_hour}{$id}{largest_file_size}) {
$self->{stat_usermax_hour}{$id}{largest_file_size} = $bytes;
$self->{stat_usermax_hour}{$id}{largest_file_url} = $url;
}
if ($bytes > $self->{stat_usermax_day}{$id}{largest_file_size}) {
$self->{stat_usermax_day}{$id}{largest_file_size} = $bytes;
$self->{stat_usermax_day}{$id}{largest_file_url} = $url;
}
}
#### Store networks statistics
$self->{stat_network_hour}{$network}{$hour}{hits}++;
$self->{stat_network_hour}{$network}{$hour}{bytes} += $bytes;
$self->{stat_network_hour}{$network}{$hour}{duration} += $elapsed;
$self->{stat_network_day}{$network}{$self->{last_day}}{hits}++;
$self->{stat_network_day}{$network}{$self->{last_day}}{bytes} += $bytes;
$self->{stat_network_day}{$network}{$self->{last_day}}{duration} += $elapsed;
if ($bytes > $self->{stat_netmax_hour}{$network}{largest_file_size}) {
$self->{stat_netmax_hour}{$network}{largest_file_size} = $bytes;
$self->{stat_netmax_hour}{$network}{largest_file_url} = $url;
}
if ($bytes > $self->{stat_netmax_day}{$network}{largest_file_size}) {
$self->{stat_netmax_day}{$network}{largest_file_size} = $bytes;
$self->{stat_netmax_day}{$network}{largest_file_url} = $url;
}
#### Store HIT/MISS/DENIED statistics
$self->{stat_code_hour}{$code}{$hour}{hits}++;
$self->{stat_code_hour}{$code}{$hour}{bytes} += $bytes;
$self->{stat_code_hour}{$code}{$hour}{elapsed} += $elapsed;
$self->{stat_code_day}{$code}{$self->{last_day}}{hits}++;
$self->{stat_code_day}{$code}{$self->{last_day}}{bytes} += $bytes;
$self->{stat_code_day}{$code}{$self->{last_day}}{elapsed} += $elapsed;
$self->{stat_throughput_hour}{$code}{$hour}{bytes} += $bytes;
$self->{stat_throughput_day}{$code}{$self->{last_day}}{bytes} += $bytes;
$self->{stat_throughput_hour}{$code}{$hour}{elapsed} += $elapsed;
$self->{stat_throughput_day}{$code}{$self->{last_day}}{elapsed} += $elapsed;
#### Store url statistics
if ($self->{UrlReport}) {
$self->{stat_user_url_hour}{$id}{$dest}{duration} += $elapsed;
$self->{stat_user_url_hour}{$id}{$dest}{hits}++;
$self->{stat_user_url_hour}{$id}{$dest}{bytes} += $bytes;
$self->{stat_user_url_hour}{$id}{$dest}{firsthit} = $time if (!$self->{stat_user_url_hour}{$id}{$dest}{firsthit} || ($time < $self->{stat_user_url_hour}{$id}{$dest}{firsthit}));
$self->{stat_user_url_hour}{$id}{$dest}{lasthit} = $time if (!$self->{stat_user_url_hour}{$id}{$dest}{lasthit} || ($time > $self->{stat_user_url_hour}{$id}{$dest}{lasthit}));
if (!exists $self->{stat_user_url_hour}{$id}{$dest}{arr_last} || ($#{$self->{stat_user_url_hour}{$id}{$dest}{arr_last}} < 9) || ($time > ($self->{stat_user_url_hour}{$id}{$dest}{arr_last}[-1]+300))) {
push(@{$self->{stat_user_url_hour}{$id}{$dest}{arr_last}}, $time);
shift(@{$self->{stat_user_url_hour}{$id}{$dest}{arr_last}}) if ($#{$self->{stat_user_url_hour}{$id}{$dest}{arr_last}} > 9);
}
$self->{stat_user_url_day}{$id}{$dest}{duration} += $elapsed;
$self->{stat_user_url_day}{$id}{$dest}{hits}++;
$self->{stat_user_url_day}{$id}{$dest}{firsthit} = $time if (!$self->{stat_user_url_day}{$id}{$dest}{firsthit} || ($time < $self->{stat_user_url_day}{$id}{$dest}{firsthit}));
$self->{stat_user_url_day}{$id}{$dest}{lasthit} = $time if (!$self->{stat_user_url_day}{$id}{$dest}{lasthit} || ($time > $self->{stat_user_url_day}{$id}{$dest}{lasthit}));
$self->{stat_user_url_day}{$id}{$dest}{bytes} += $bytes;
if ($code eq 'HIT') {
$self->{stat_user_url_day}{$id}{$dest}{cache_hit}++;
$self->{stat_user_url_day}{$id}{$dest}{cache_bytes} += $bytes;
}
if (!exists $self->{stat_user_url_day}{$id}{$dest}{arr_last} || ($#{$self->{stat_user_url_day}{$id}{$dest}{arr_last}} < 9) || ($time > ($self->{stat_user_url_day}{$id}{$dest}{arr_last}[-1]+1800))) {
push(@{$self->{stat_user_url_day}{$id}{$dest}{arr_last}}, $time);
shift(@{$self->{stat_user_url_day}{$id}{$dest}{arr_last}}) if ($#{$self->{stat_user_url_day}{$id}{$dest}{arr_last}} > 9);
}
if ($self->{StoreUserIp} and $id and ($id ne $client)
and !grep(/^$client$/, @{$self->{stat_user_url_day}{$id}{$dest}{user_ip}}))
{
push(@{$self->{stat_user_url_hour}{$id}{$dest}{user_ip}}, $client);
push(@{$self->{stat_user_url_day}{$id}{$dest}{user_ip}}, $client);
}
}
#### Store user per networks statistics
if ($self->{UserReport}) {
$self->{stat_netuser_hour}{$network}{$id}{duration} += $elapsed;
$self->{stat_netuser_hour}{$network}{$id}{bytes} += $bytes;
$self->{stat_netuser_hour}{$network}{$id}{hits}++;
if ($bytes > $self->{stat_netuser_hour}{$network}{$id}{largest_file_size}) {
$self->{stat_netuser_hour}{$network}{$id}{largest_file_size} = $bytes;
$self->{stat_netuser_hour}{$network}{$id}{largest_file_url} = $url;
}
$self->{stat_netuser_day}{$network}{$id}{duration} += $elapsed;
$self->{stat_netuser_day}{$network}{$id}{bytes} += $bytes;
$self->{stat_netuser_day}{$network}{$id}{hits}++;
if ($bytes > $self->{stat_netuser_day}{$network}{$id}{largest_file_size}) {
$self->{stat_netuser_day}{$network}{$id}{largest_file_size} = $bytes;
$self->{stat_netuser_day}{$network}{$id}{largest_file_url} = $url;
}
}
#### Store mime type statistics
$self->{stat_mime_type_hour}{"$type"}{hits}++;
$self->{stat_mime_type_hour}{"$type"}{bytes} += $bytes;
$self->{stat_mime_type_day}{"$type"}{hits}++;
$self->{stat_mime_type_day}{"$type"}{bytes} += $bytes;
}
sub _load_history
{
my ($self, $type, $year, $month, $day, $path, $kind, $wn, @wd) = @_;
#### Load history
if ($type eq 'day') {
foreach my $d ("01" .. "31") {
$self->_read_stat($year, $month, $d, 'day', $kind);
}
} elsif ($type eq 'week') {
$path = "$year/week$wn";
foreach my $wdate (@wd) {
$wdate =~ /^(\d+)-(\d+)-(\d+)$/;
$self->_read_stat($1, $2, $3, 'day', $kind, $wn);
}
$type = 'day';
} elsif ($type eq 'month') {
foreach my $m ("01" .. "12") {
$self->_read_stat($year, $m, $day, 'month', $kind);
}
} else {
$self->_read_stat($year, $month, $day, '', $kind);
}
}
sub _append_stat
{
my ($self, $year, $month, $day) = @_;
my $read_type = '';
my $type = 'hour';
if (!$day) {
$type = 'day';
}
if (!$month) {
$type = 'month';
}
$read_type = $type;
my $path = join('/', $year, $month, $day);
$path =~ s/[\/]+$//;
print STDERR "Appending data into $self->{Output}/$path\n" if (!$self->{QuietMode});
#### Save cache statistics
my $dat_file_code = new IO::File;
$dat_file_code->open(">>$self->{Output}/$path/stat_code.dat")
or $self->localdie("ERROR: Can't write to file $self->{Output}/$path/stat_code.dat, $!\n");
flock($dat_file_code, 2) || die "FATAL: can't acquire lock on file, $!\n";
$self->_write_stat_data($dat_file_code, $type, 'stat_code');
$dat_file_code->close();
#### With huge log file we only store global statistics in year and month views
if ( $self->{no_year_stat} && ($type ne 'hour') ) {
# unless month view is explicitly wanted
return if (!$self->{with_month_stat} || ($type ne 'day'));
}
#### Save url statistics per user
if ($self->{UrlReport}) {
my $dat_file_user_url = new IO::File;
$dat_file_user_url->open(">>$self->{Output}/$path/stat_user_url.dat")
or $self->localdie("ERROR: Can't write to file $self->{Output}/$path/stat_user_url.dat, $!\n");
flock($dat_file_user_url, 2) || die "FATAL: can't acquire lock on file, $!\n";
$self->_write_stat_data($dat_file_user_url, $type, 'stat_user_url');
$dat_file_user_url->close();
# Denied URL
my $dat_file_denied_url = new IO::File;
$dat_file_denied_url->open(">>$self->{Output}/$path/stat_denied_url.dat")
or $self->localdie("ERROR: Can't write to file $self->{Output}/$path/stat_denied_url.dat, $!\n");
flock($dat_file_denied_url, 2) || die "FATAL: can't acquire lock on file, $!\n";
$self->_write_stat_data($dat_file_denied_url, $type, 'stat_denied_url');
$dat_file_denied_url->close();
}
#### Save user statistics
if ($self->{UserReport}) {
my $dat_file_user = new IO::File;
$dat_file_user->open(">>$self->{Output}/$path/stat_user.dat")
or $self->localdie("ERROR: Can't write to file $self->{Output}/$path/stat_user.dat, $!\n");
flock($dat_file_user, 2) || die "FATAL: can't acquire lock on file, $!\n";
$self->_write_stat_data($dat_file_user, $type, 'stat_user');
$dat_file_user->close();
}
#### Save network statistics
my $dat_file_network = new IO::File;
$dat_file_network->open(">>$self->{Output}/$path/stat_network.dat")
or $self->localdie("ERROR: Can't write to file $self->{Output}/$path/stat_network.dat, $!\n");
flock($dat_file_network, 2) || die "FATAL: can't acquire lock on file, $!\n";
$self->_write_stat_data($dat_file_network, $type, 'stat_network');
$dat_file_network->close();
#### Save user per network statistics
if ($self->{UserReport}) {
my $dat_file_netuser = new IO::File;
$dat_file_netuser->open(">>$self->{Output}/$path/stat_netuser.dat")
or $self->localdie("ERROR: Can't write to file $self->{Output}/$path/stat_netuser.dat, $!\n");
flock($dat_file_netuser, 2) || die "FATAL: can't acquire lock on file, $!\n";
$self->_write_stat_data($dat_file_netuser, $type, 'stat_netuser');
$dat_file_netuser->close();
}
#### Save mime statistics
my $dat_file_mime_type = new IO::File;
$dat_file_mime_type->open(">>$self->{Output}/$path/stat_mime_type.dat")
or $self->localdie("ERROR: Can't write to file $self->{Output}/$path/stat_mime_type.dat, $!\n");
flock($dat_file_mime_type, 2) || die "FATAL: can't acquire lock on file, $!\n";
$self->_write_stat_data($dat_file_mime_type, $type, 'stat_mime_type');
$dat_file_mime_type->close();
}
sub _save_stat
{
my ($self, $year, $month, $day, $wn, @wd) = @_;
my $path = join('/', $year, $month, $day);
$path =~ s/[\/]+$//;
my $read_type = '';
my $type = 'hour';
if (!$day) {
$type = 'day';
}
if ($wn) {
$type = 'week';
$path = "$year/week$wn";
}
if (!$month) {
$type = 'month';
}
$read_type = $type;
print STDERR "Saving data into $self->{Output}/$path\n" if (!$self->{QuietMode});
#### Save cache statistics
my $dat_file_code = new IO::File;
$self->_load_history($read_type, $year, $month, $day, $path, 'stat_code', $wn, @wd);
$dat_file_code->open(">$self->{Output}/$path/stat_code.dat")
or $self->localdie("ERROR: Can't write to file $self->{Output}/$path/stat_code.dat, $!\n");
flock($dat_file_code, 2) || die "FATAL: can't acquire lock on file, $!\n";
$self->_write_stat_data($dat_file_code, $type, 'stat_code');
$dat_file_code->close();
#### With huge log file we only store global statistics in year and month views
if ( $self->{no_year_stat} && (($type ne 'hour') && !$wn) ) {
# unless month view is explicitly wanted
return if (!$self->{with_month_stat} || ($type ne 'day'));
}
#### Save url statistics per user
if ($self->{UrlReport}) {
my $dat_file_user_url = new IO::File;
$self->_load_history($read_type, $year, $month, $day, $path, 'stat_user_url', $wn, @wd);
$dat_file_user_url->open(">$self->{Output}/$path/stat_user_url.dat")
or $self->localdie("ERROR: Can't write to file $self->{Output}/$path/stat_user_url.dat, $!\n");
flock($dat_file_user_url, 2) || die "FATAL: can't acquire lock on file, $!\n";
$self->_write_stat_data($dat_file_user_url, $type, 'stat_user_url');
$dat_file_user_url->close();
# Denied URL
my $dat_file_denied_url = new IO::File;
$self->_load_history($read_type, $year, $month, $day, $path, 'stat_denied_url', $wn, @wd);
$dat_file_denied_url->open(">$self->{Output}/$path/stat_denied_url.dat")
or $self->localdie("ERROR: Can't write to file $self->{Output}/$path/stat_denied_url.dat, $!\n");
flock($dat_file_denied_url, 2) || die "FATAL: can't acquire lock on file, $!\n";
$self->_write_stat_data($dat_file_denied_url, $type, 'stat_denied_url');
$dat_file_denied_url->close();
}
#### Save user statistics
if ($self->{UserReport}) {
my $dat_file_user = new IO::File;
$self->_load_history($read_type, $year, $month, $day, $path, 'stat_user', $wn, @wd);
$dat_file_user->open(">$self->{Output}/$path/stat_user.dat")
or $self->localdie("ERROR: Can't write to file $self->{Output}/$path/stat_user.dat, $!\n");
flock($dat_file_user, 2) || die "FATAL: can't acquire lock on file, $!\n";
$self->_write_stat_data($dat_file_user, $type, 'stat_user');
$dat_file_user->close();
}
#### Save network statistics
my $dat_file_network = new IO::File;
$self->_load_history($read_type, $year, $month, $day, $path, 'stat_network', $wn, @wd);
$dat_file_network->open(">$self->{Output}/$path/stat_network.dat")
or $self->localdie("ERROR: Can't write to file $self->{Output}/$path/stat_network.dat, $!\n");
flock($dat_file_network, 2) || die "FATAL: can't acquire lock on file, $!\n";
$self->_write_stat_data($dat_file_network, $type, 'stat_network');
$dat_file_network->close();
#### Save user per network statistics
if ($self->{UserReport}) {
my $dat_file_netuser = new IO::File;
$self->_load_history($read_type, $year, $month, $day, $path, 'stat_netuser', $wn, @wd);
$dat_file_netuser->open(">$self->{Output}/$path/stat_netuser.dat")
or $self->localdie("ERROR: Can't write to file $self->{Output}/$path/stat_netuser.dat, $!\n");
flock($dat_file_netuser, 2) || die "FATAL: can't acquire lock on file, $!\n";
$self->_write_stat_data($dat_file_netuser, $type, 'stat_netuser');
$dat_file_netuser->close();
}
#### Save mime statistics
my $dat_file_mime_type = new IO::File;
$self->_load_history($read_type, $year, $month, $day, $path, 'stat_mime_type', $wn, @wd);
$dat_file_mime_type->open(">$self->{Output}/$path/stat_mime_type.dat")
or $self->localdie("ERROR: Can't write to file $self->{Output}/$path/stat_mime_type.dat, $!\n");
flock($dat_file_mime_type, 2) || die "FATAL: can't acquire lock on file, $!\n";
$self->_write_stat_data($dat_file_mime_type, $type, 'stat_mime_type');
$dat_file_mime_type->close();
}
sub _write_stat_data
{
my ($self, $fh, $type, $kind) = @_;
$type = 'day' if ($type eq 'week');
#### Save cache statistics
if ($kind eq 'stat_code') {
foreach my $code (sort {$a cmp $b} keys %{$self->{"stat_code_$type"}}) {
$fh->print("$code " . "hits_$type=");
foreach my $tmp (sort {$a <=> $b} keys %{$self->{"stat_code_$type"}{$code}}) {
$fh->print("$tmp:" . $self->{"stat_code_$type"}{$code}{$tmp}{hits} . ",");
}
$fh->print(";bytes_$type=");
foreach my $tmp (sort {$a <=> $b} keys %{$self->{"stat_code_$type"}{$code}}) {
$fh->print("$tmp:" . $self->{"stat_code_$type"}{$code}{$tmp}{bytes} . ",");
}
$fh->print(";thp_bytes_$type=");
foreach my $tmp (sort {$a <=> $b} keys %{$self->{"stat_throughput_$type"}{$code}}) {
$fh->print("$tmp:" . $self->{"stat_throughput_$type"}{$code}{$tmp}{bytes} . ",");
}
$fh->print(";thp_duration_$type=");
foreach my $tmp (sort {$a <=> $b} keys %{$self->{"stat_throughput_$type"}{$code}}) {
$fh->print("$tmp:" . $self->{"stat_throughput_$type"}{$code}{$tmp}{elapsed} . ",");
}
$fh->print("\n");
}
$self->{"stat_code_$type"} = ();
}
#### Save denied url statistics per user
if ($kind eq 'stat_denied_url') {
foreach my $id (sort {$a cmp $b} keys %{$self->{"stat_denied_url_$type"}}) {
foreach my $dest (keys %{$self->{"stat_denied_url_$type"}{$id}}) {
next if (!$dest);
my $u = $id;
$u = '-' if (!$self->{UserReport});
my $bl = '';
if (exists $self->{"stat_denied_url_$type"}{$id}{$dest}{blacklist}) {
foreach my $b (keys %{$self->{"stat_denied_url_$type"}{$id}{$dest}{blacklist}}) {
$bl .= $b . ',' . $self->{"stat_denied_url_$type"}{$id}{$dest}{blacklist}{$b} . ',';
}
$bl =~ s/,$//;
}
$fh->print(
"$id hits=" . $self->{"stat_denied_url_$type"}{$id}{$dest}{hits} . ";" .
"first=" . $self->{"stat_denied_url_$type"}{$id}{$dest}{firsthit} . ";" .
"last=" . $self->{"stat_denied_url_$type"}{$id}{$dest}{lasthit} . ";" .
"url=$dest" . ";" .
"blacklist=" . $bl .
"\n");
}
}
$self->{"stat_denied_url_$type"} = ();
}
#### Save url statistics per user
if ($kind eq 'stat_user_url') {
foreach my $id (sort {$a cmp $b} keys %{$self->{"stat_user_url_$type"}}) {
my $i = 0;
foreach my $dest (sort {
$self->{"stat_user_url_$type"}{$id}{$b}{$self->{OrderUrl}} <=> $self->{"stat_user_url_$type"}{$id}{$a}{$self->{OrderUrl}}
} keys %{$self->{"stat_user_url_$type"}{$id}}) {
last if ($self->{TopStorage} && ($i > $self->{TopStorage}));
my $u = $id;
$u = '-' if (!$self->{UserReport});
$i++;
my $user_ip = '';
if ($self->{StoreUserIp} && $#{$self->{"stat_user_url_$type"}{$id}{$dest}{user_ip}} >= 0) {
$user_ip = ";user_ip=" . join(',', @{$self->{"stat_user_url_$type"}{$id}{$dest}{user_ip}});
}
$fh->print(
"$id hits=" . $self->{"stat_user_url_$type"}{$id}{$dest}{hits} . ";" .
"bytes=" . $self->{"stat_user_url_$type"}{$id}{$dest}{bytes} . ";" .
"duration=" . $self->{"stat_user_url_$type"}{$id}{$dest}{duration} . ";" .
"first=" . $self->{"stat_user_url_$type"}{$id}{$dest}{firsthit} . ";" .
"last=" . $self->{"stat_user_url_$type"}{$id}{$dest}{lasthit} . ";" .
"url=$dest;" .
"cache_hit=" . ($self->{"stat_user_url_$type"}{$id}{$dest}{cache_hit}||0) . ";" .
"cache_bytes=" . ($self->{"stat_user_url_$type"}{$id}{$dest}{cache_bytes}||0) . ";" .
"arr_last=" . join(',', @{$self->{"stat_user_url_$type"}{$id}{$dest}{arr_last}}) .
$user_ip .
"\n");
}
}
$self->{"stat_user_url_$type"} = ();
}
#### Save user statistics
if ($kind eq 'stat_user') {
foreach my $id (sort {$a cmp $b} keys %{$self->{"stat_user_$type"}}) {
my $name = $id;
$name =~ s/\s+//g;
$fh->print("$name hits_$type=");
foreach my $tmp (sort {$a <=> $b} keys %{$self->{"stat_user_$type"}{$id}}) {
$fh->print("$tmp:" . $self->{"stat_user_$type"}{$id}{$tmp}{hits} . ",");
}
$fh->print(";bytes_$type=");
foreach my $tmp (sort {$a <=> $b} keys %{$self->{"stat_user_$type"}{$id}}) {
$fh->print("$tmp:" . $self->{"stat_user_$type"}{$id}{$tmp}{bytes} . ",");
}
$fh->print(";duration_$type=");
foreach my $tmp (sort {$a <=> $b} keys %{$self->{"stat_user_$type"}{$id}}) {
$fh->print("$tmp:" . $self->{"stat_user_$type"}{$id}{$tmp}{duration} . ",");
}
$fh->print(";largest_file_size=" . $self->{"stat_usermax_$type"}{$id}{largest_file_size});
$fh->print(";largest_file_url=" . $self->{"stat_usermax_$type"}{$id}{largest_file_url});
$fh->print("\n");
}
$self->{"stat_user_$type"} = ();
$self->{"stat_usermax_$type"} = ();
}
#### Save network statistics
if ($kind eq 'stat_network') {
foreach my $net (sort {$a cmp $b} keys %{$self->{"stat_network_$type"}}) {
$fh->print("$net\thits_$type=");
foreach my $tmp (sort {$a <=> $b} keys %{$self->{"stat_network_$type"}{$net}}) {
$fh->print("$tmp:" . $self->{"stat_network_$type"}{$net}{$tmp}{hits} . ",");
}
$fh->print(";bytes_$type=");
foreach my $tmp (sort {$a <=> $b} keys %{$self->{"stat_network_$type"}{$net}}) {
$fh->print("$tmp:" . $self->{"stat_network_$type"}{$net}{$tmp}{bytes} . ",");
}
$fh->print(";duration_$type=");
foreach my $tmp (sort {$a <=> $b} keys %{$self->{"stat_network_$type"}{$net}}) {
$fh->print("$tmp:" . $self->{"stat_network_$type"}{$net}{$tmp}{duration} . ",");
}
$fh->print(";largest_file_size=" . $self->{"stat_netmax_$type"}{$net}{largest_file_size});
$fh->print(";largest_file_url=" . $self->{"stat_netmax_$type"}{$net}{largest_file_url} . "\n");
}
$self->{"stat_network_$type"} = ();
$self->{"stat_netmax_$type"} = ();
}
#### Save user per network statistics
if ($kind eq 'stat_netuser') {
foreach my $net (sort {$a cmp $b} keys %{$self->{"stat_netuser_$type"}}) {
foreach my $id (sort {$a cmp $b} keys %{$self->{"stat_netuser_$type"}{$net}}) {
$fh->print("$net\t$id\thits=" . $self->{"stat_netuser_$type"}{$net}{$id}{hits} . ";" .
"bytes=" . $self->{"stat_netuser_$type"}{$net}{$id}{bytes} . ";" .
"duration=" . $self->{"stat_netuser_$type"}{$net}{$id}{duration} . ";");
$fh->print("largest_file_size=" .
$self->{"stat_netuser_$type"}{$net}{$id}{largest_file_size} . ";" .
"largest_file_url=" . $self->{"stat_netuser_$type"}{$net}{$id}{largest_file_url} . "\n");
}
}
$self->{"stat_netuser_$type"} = ();
}
#### Save mime statistics
if ($kind eq 'stat_mime_type') {
foreach my $mime (sort {$a cmp $b} keys %{$self->{"stat_mime_type_$type"}}) {
$fh->print("$mime hits=" . $self->{"stat_mime_type_$type"}{$mime}{hits} . ";" .
"bytes=" . $self->{"stat_mime_type_$type"}{$mime}{bytes} . "\n");
}
$self->{"stat_mime_type_$type"} = ();
}
}
sub _read_stat
{
my ($self, $year, $month, $day, $sum_type, $kind, $wn) = @_;
my $type = 'hour';
if (!$day) {
$type = 'day';
}
if (!$month) {
$type = 'month';
}
my $path = join('/', $year, $month, $day);
$path =~ s/[\/]+$//;
return if (! -d "$self->{Output}/$path");
my $k = '';
my $key = '';
$key = $day if ($sum_type eq 'day');
$key = $month if ($sum_type eq 'month');
$sum_type ||= $type;
#### Read previous cache statistics
if (!$kind || ($kind eq 'stat_code')) {
my $dat_file_code = new IO::File;
if ($dat_file_code->open("$self->{Output}/$path/stat_code.dat")) {
my $i = 1;
my $error = 0;
while (my $l = <$dat_file_code>) {
chomp($l);
if ($l =~ s/^([^\s]+)\s+hits_$type=([^;]+);bytes_$type=([^;]+)//) {
my $code = $1;
my $hits = $2 || '';
my $bytes = $3 || '';
$hits =~ s/,$//;
$bytes =~ s/,$//;
my %hits_tmp = split(/[:,]/, $hits);
foreach my $tmp (sort {$a <=> $b} keys %hits_tmp) {
if ($key ne '') { $k = $key; } else { $k = $tmp; }
$self->{"stat_code_$sum_type"}{$code}{$k}{hits} += $hits_tmp{$tmp};
}
my %bytes_tmp = split(/[:,]/, $bytes);
foreach my $tmp (sort {$a <=> $b} keys %bytes_tmp) {
if ($key ne '') { $k = $key; } else { $k = $tmp; }
$self->{"stat_code_$sum_type"}{$code}{$k}{bytes} += $bytes_tmp{$tmp};
}
if ($l =~ s/thp_bytes_$type=([^;]+);thp_duration_$type=([^;]+)//) {
$bytes = $1 || '';
my $elapsed = $2 || '';
$elapsed =~ s/,$//;
my %bytes_tmp = split(/[:,]/, $bytes);
foreach my $tmp (sort {$a <=> $b} keys %bytes_tmp) {
if ($key ne '') { $k = $key; } else { $k = $tmp; }
$self->{"stat_throughput_$sum_type"}{$code}{$k}{bytes} += $bytes_tmp{$tmp};
}
my %elapsed_tmp = split(/[:,]/, $elapsed);
foreach my $tmp (sort {$a <=> $b} keys %elapsed_tmp) {
if ($key ne '') { $k = $key; } else { $k = $tmp; }
$self->{"stat_throughput_$sum_type"}{$code}{$k}{elapsed} += $elapsed_tmp{$tmp};
}
}
} else {
print STDERR "ERROR: bad format at line $i into $self->{Output}/$path/stat_code.dat\n";
print STDERR "$l\n";
if ($error > $self->{MaxFormatError}) {
unlink($self->{pidfile});
exit 0;
}
$error++;
}
$i++;
}
$dat_file_code->close();
}
}
#### With huge log file we only store global statistics in year and month views
if ( $self->{no_year_stat} && ($type ne 'hour') ) {
# unless month view is explicitly wanted
return if (!$self->{with_month_stat} || ($type ne 'day'));
}
#### Read previous client statistics
if (!$kind || ($kind eq 'stat_user')) {
my $dat_file_user = new IO::File;
if ($dat_file_user->open("$self->{Output}/$path/stat_user.dat")) {
my $i = 1;
my $error = 0;
while (my $l = <$dat_file_user>) {
chomp($l);
if ($l =~ s/^([^\s]+)\s+hits_$type=([^;]+);bytes_$type=([^;]+);duration_$type=([^;]+);largest_file_size=([^;]*);largest_file_url=(.*)$//) {
my $id = $1;
my $hits = $2 || '';
my $bytes = $3 || '';
my $duration = $4 || '';
my $lsize = $5 || 0;
my $lurl = $6 || 0;
my @user_ips = ();
if ($self->{rebuild}) {
next if (!$self->check_inclusions($id));
next if ($self->check_exclusions($id));
}
# Anonymize all users
if ($self->{AnonymizeLogin} && ($id !~ /^Anon[a-zA-Z0-9]{16}$/)) {
if (!exists $self->{AnonymizedId}{$id}) {
$self->{AnonymizedId}{$id} = &anonymize_id();
}
$id = $self->{AnonymizedId}{$id};
}
if ($lsize > $self->{"stat_usermax_$sum_type"}{$id}{largest_file_size}) {
$self->{"stat_usermax_$sum_type"}{$id}{largest_file_size} = $lsize;
$self->{"stat_usermax_$sum_type"}{$id}{largest_file_url} = $lurl;
}
$hits =~ s/,$//;
$bytes =~ s/,$//;
$duration =~ s/,$//;
my %hits_tmp = split(/[:,]/, $hits);
foreach my $tmp (sort {$a <=> $b} keys %hits_tmp) {
if ($key ne '') { $k = $key; } else { $k = $tmp; }
$self->{"stat_user_$sum_type"}{$id}{$k}{hits} += $hits_tmp{$tmp};
}
my %bytes_tmp = split(/[:,]/, $bytes);
foreach my $tmp (sort {$a <=> $b} keys %bytes_tmp) {
if ($key ne '') { $k = $key; } else { $k = $tmp; }
$self->{"stat_user_$sum_type"}{$id}{$k}{bytes} += $bytes_tmp{$tmp};
}
my %duration_tmp = split(/[:,]/, $duration);
foreach my $tmp (sort {$a <=> $b} keys %duration_tmp) {
if ($key ne '') { $k = $key; } else { $k = $tmp; }
$self->{"stat_user_$sum_type"}{$id}{$k}{duration} += $duration_tmp{$tmp};
}
} else {
print STDERR "ERROR: bad format at line $i into $self->{Output}/$path/stat_user.dat:\n";
print STDERR "$l\n";
if ($error > $self->{MaxFormatError}) {
unlink($self->{pidfile});
exit 0;
}
$error++;
}
$i++;
}
$dat_file_user->close();
}
}
#### Read previous url statistics
if ($self->{UrlReport}) {
if (!$kind || ($kind eq 'stat_user_url')) {
my $dat_file_user_url = new IO::File;
if ($dat_file_user_url->open("$self->{Output}/$path/stat_user_url.dat")) {
my $i = 1;
my $error = 0;
while (my $l = <$dat_file_user_url>) {
chomp($l);
my $id = '';
if ($l =~ /^([^\s]+)\s+hits=/) {
$id = $1;
}
$id = '-' if (!$self->{UserReport});
if ($self->{rebuild}) {
next if (!$self->check_inclusions($id));
next if ($self->check_exclusions($id));
}
# Anonymize all users
if ($self->{AnonymizeLogin} && ($id !~ /^Anon[a-zA-Z0-9]{16}$/)) {
if (!exists $self->{AnonymizedId}{$id}) {
$self->{AnonymizedId}{$id} = &anonymize_id();
}
$id = $self->{AnonymizedId}{$id};
}
if ($l =~ s/^([^\s]+)\s+hits=(\d+);bytes=(\d+);duration=([\-\d]+);first=([^;]*);last=([^;]*);url=(.*?);cache_hit=(\d*);cache_bytes=(\d*)//) {
my $url = $7;
$self->{"stat_user_url_$sum_type"}{$id}{"$url"}{hits} += $2;
$self->{"stat_user_url_$sum_type"}{$id}{"$url"}{bytes} += $3;
$self->{"stat_user_url_$sum_type"}{$id}{"$url"}{duration} += abs($4);
$self->{"stat_user_url_$sum_type"}{$id}{"$url"}{firsthit} = $5 if (!$self->{"stat_user_url_$sum_type"}{$id}{"$url"}{firsthit} || ($5 < $self->{"stat_user_url_$sum_type"}{$id}{"$url"}{firsthit}));
$self->{"stat_user_url_$sum_type"}{$id}{"$url"}{lasthit} = $6 if (!$self->{"stat_user_url_$sum_type"}{$id}{"$url"}{lasthit} || ($6 > $self->{"stat_user_url_$sum_type"}{$id}{"$url"}{lasthit}));
$self->{"stat_user_url_$sum_type"}{$id}{"$url"}{cache_hit} += $8;
$self->{"stat_user_url_$sum_type"}{$id}{"$url"}{cache_bytes} += $9;
if ($self->{rebuild}) {
if ($self->check_exclusions('', '', $url)) {
delete $self->{"stat_user_url_$sum_type"}{$id}{"$url"};
next;
}
}
if ($l =~ s/;user_ip=(.*)//) {
my @ips = split(/,/, $1);
foreach my $ip (@ips) {
push(@{$self->{"stat_user_url_$sum_type"}{$id}{$url}{user_ip}}, $ip) if (!grep(/^$ip$/, @{$self->{"stat_user_url_$sum_type"}{$id}{$url}{user_ip}}));
}
}
if ($l =~ s/^;arr_last=(.*)//) {
my $incr = 1800;
$incr = 300 if ($sum_type eq 'hour');
$incr = 86400 if ($sum_type eq 'month');
foreach my $tm (split(/,/, $1)) {
if (!exists $self->{"stat_user_url_$sum_type"}{$id}{$url}{arr_last} || ($#{$self->{"stat_user_url_$sum_type"}{$id}{$url}{arr_last}} < 9) || ($tm > ${$self->{"stat_user_url_$sum_type"}{$id}{$url}{arr_last}}[-1] + $incr)) {
push(@{$self->{"stat_user_url_$sum_type"}{$id}{$url}{arr_last}}, $tm);
shift(@{$self->{"stat_user_url_$sum_type"}{$id}{$url}{arr_last}}) if ($#{$self->{"stat_user_url_$sum_type"}{$id}{$url}{arr_last}} > 9);
}
}
}
} elsif ($l =~ s/^([^\s]+)\s+hits=(\d+);bytes=(\d+);duration=([\-\d]+);first=([^;]*);last=([^;]*);url=(.*)$//) {
my $url = $7;
$self->{"stat_user_url_$sum_type"}{$id}{"$url"}{hits} += $2;
$self->{"stat_user_url_$sum_type"}{$id}{"$url"}{bytes} += $3;
$self->{"stat_user_url_$sum_type"}{$id}{"$url"}{duration} += abs($4);
$self->{"stat_user_url_$sum_type"}{$id}{"$url"}{firsthit} = $5 if (!$self->{"stat_user_url_$sum_type"}{$id}{"$url"}{firsthit} || ($5 < $self->{"stat_user_url_$sum_type"}{$id}{"$url"}{firsthit}));
$self->{"stat_user_url_$sum_type"}{$id}{"$url"}{lasthit} = $6 if (!$self->{"stat_user_url_$sum_type"}{$id}{"$url"}{lasthit} || ($6 > $self->{"stat_user_url_$sum_type"}{$id}{"$url"}{lasthit}));
if ($self->{rebuild}) {
if ($self->check_exclusions('', '', $url)) {
delete $self->{"stat_user_url_$sum_type"}{$id}{"$url"};
next;
}
}
} elsif ($l =~ s/^([^\s]+)\s+hits=(\d+);bytes=(\d+);duration=([\-\d]+);url=(.*)$//) {
my $url = $5;
$self->{"stat_user_url_$sum_type"}{$id}{"$url"}{hits} += $2;
$self->{"stat_user_url_$sum_type"}{$id}{"$url"}{bytes} += $3;
$self->{"stat_user_url_$sum_type"}{$id}{"$url"}{duration} += abs($4);
if ($self->{rebuild}) {
if ($self->check_exclusions('', '', $url)) {
delete $self->{"stat_user_url_$sum_type"}{$id}{"$url"};
next;
}
}
} else {
print STDERR "ERROR: bad format at line $i into $self->{Output}/$path/stat_user_url.dat\n";
print STDERR "$l\n";
if ($error > $self->{MaxFormatError}) {
unlink($self->{pidfile});
exit 0;
}
$error++;
}
$i++;
}
$dat_file_user_url->close();
}
}
if (!$kind || ($kind eq 'stat_denied_url')) {
my $dat_file_denied_url = new IO::File;
if ($dat_file_denied_url->open("$self->{Output}/$path/stat_denied_url.dat")) {
my $i = 1;
my $error = 0;
while (my $l = <$dat_file_denied_url>) {
chomp($l);
my $id = '';
if ($l =~ /^([^\s]+)\s+hits=/) {
$id = $1;
}
$id = '-' if (!$self->{UserReport});
if ($self->{rebuild}) {
next if (!$self->check_inclusions($id));
next if ($self->check_exclusions($id));
}
# Anonymize all denieds
if ($self->{AnonymizeLogin} && ($id !~ /^Anon[a-zA-Z0-9]{16}$/)) {
if (!exists $self->{AnonymizedId}{$id}) {
$self->{AnonymizedId}{$id} = &anonymize_id();
}
$id = $self->{AnonymizedId}{$id};
}
if ($l =~ s/^([^\s]+)\s+hits=(\d+);first=([^;]*);last=([^;]*);url=(.*);blacklist=(.*)//) {
if ($self->{rebuild}) {
next if ($self->check_exclusions('', '', $5));
}
$self->{"stat_denied_url_$sum_type"}{$id}{"$5"}{hits} += $2;
$self->{"stat_denied_url_$sum_type"}{$id}{"$5"}{firsthit} = $3 if (!$self->{"stat_denied_url_$sum_type"}{$id}{"$5"}{firsthit} || ($3 < $self->{"stat_denied_url_$sum_type"}{$id}{"$7"}{firsthit}));
$self->{"stat_denied_url_$sum_type"}{$id}{"$5"}{lasthit} = $4 if (!$self->{"stat_denied_url_$sum_type"}{$id}{"$5"}{lasthit} || ($4 > $self->{"stat_denied_url_$sum_type"}{$id}{"$5"}{lasthit}));
if ($6) {
my %tmp = split(/,/, $6);
foreach my $k (keys %tmp) {
$self->{"stat_denied_url_$sum_type"}{$id}{"$5"}{blacklist}{$k} += $tmp{$k};
}
}
} elsif ($l =~ s/^([^\s]+)\s+hits=(\d+);first=([^;]*);last=([^;]*);url=(.*)//) {
if ($self->{rebuild}) {
next if ($self->check_exclusions('', '', $5));
}
$self->{"stat_denied_url_$sum_type"}{$id}{"$5"}{hits} += $2;
$self->{"stat_denied_url_$sum_type"}{$id}{"$5"}{firsthit} = $3 if (!$self->{"stat_denied_url_$sum_type"}{$id}{"$5"}{firsthit} || ($3 < $self->{"stat_denied_url_$sum_type"}{$id}{"$7"}{firsthit}));
$self->{"stat_denied_url_$sum_type"}{$id}{"$5"}{lasthit} = $4 if (!$self->{"stat_denied_url_$sum_type"}{$id}{"$5"}{lasthit} || ($4 > $self->{"stat_denied_url_$sum_type"}{$id}{"$5"}{lasthit}));
} elsif ($l =~ /^([^\s]+)\s+hits=;first=;last=;url=/) {
# do nothing, this should not appears, but fixes issue #81
} else {
print STDERR "ERROR: bad format at line $i into $self->{Output}/$path/stat_denied_url.dat\n";
print STDERR "$l\n";
if ($error > $self->{MaxFormatError}) {
unlink($self->{pidfile});
exit 0;
}
$error++;
}
$i++;
}
$dat_file_denied_url->close();
}
}
}
#### Read previous network statistics
if (!$kind || ($kind eq 'stat_network')) {
my $dat_file_network = new IO::File;
if ($dat_file_network->open("$self->{Output}/$path/stat_network.dat")) {
my $i = 1;
my $error = 0;
while (my $l = <$dat_file_network>) {
chomp($l);
my ($net, $data) = split(/\t/, $l);
if (!$data) {
# Assume backward compatibility
$l =~ s/^(.*)\shits_$type=/hits_$type=/;
$net = $1;
$data = $l;
}
if ($self->{rebuild} && !exists $self->{NetworkAlias}->{$net}) {
next if (!$self->check_inclusions('', $net));
next if ($self->check_exclusions('', $net));
}
if ($self->{UpdateAlias}) {
# Replace network by his aliases if any
$net = (!$self->{has_network_alias}) ? $net : $self->apply_network_alias($net)
}
if ($data =~ s/^hits_$type=([^;]+);bytes_$type=([^;]+);duration_$type=([^;]+);largest_file_size=([^;]*);largest_file_url=(.*)$//) {
my $hits = $1 || '';
my $bytes = $2 || '';
my $duration = $3 || '';
if ($4 > $self->{"stat_netmax_$sum_type"}{$net}{largest_file_size}) {
$self->{"stat_netmax_$sum_type"}{$net}{largest_file_size} = $4;
$self->{"stat_netmax_$sum_type"}{$net}{largest_file_url} = $5;
}
$hits =~ s/,$//;
$bytes =~ s/,$//;
$duration =~ s/,$//;
my %hits_tmp = split(/[:,]/, $hits);
foreach my $tmp (sort {$a <=> $b} keys %hits_tmp) {
if ($key ne '') { $k = $key; } else { $k = $tmp; }
$self->{"stat_network_$sum_type"}{$net}{$k}{hits} += $hits_tmp{$tmp};
}
my %bytes_tmp = split(/[:,]/, $bytes);
foreach my $tmp (sort {$a <=> $b} keys %bytes_tmp) {
if ($key ne '') { $k = $key; } else { $k = $tmp; }
$self->{"stat_network_$sum_type"}{$net}{$k}{bytes} += $bytes_tmp{$tmp};
}
my %duration_tmp = split(/[:,]/, $duration);
foreach my $tmp (sort {$a <=> $b} keys %duration_tmp) {
if ($key ne '') { $k = $key; } else { $k = $tmp; }
$self->{"stat_network_$sum_type"}{$net}{$k}{duration} += $duration_tmp{$tmp};
}
} else {
print STDERR "ERROR: bad format at line $i into $self->{Output}/$path/stat_network.dat\n";
print STDERR "$l\n";
if ($error > $self->{MaxFormatError}) {
unlink($self->{pidfile});
exit 0;
}
$error++;
}
$i++;
}
$dat_file_network->close();
}
}
#### Read previous user per network statistics
if ($self->{UserReport}) {
if (!$kind || ($kind eq 'stat_netuser')) {
my $dat_file_netuser = new IO::File;
if ($dat_file_netuser->open("$self->{Output}/$path/stat_netuser.dat")) {
my $i = 1;
my $error = 0;
while (my $l = <$dat_file_netuser>) {
chomp($l);
my ($net, $id, $data) = split(/\t/, $l);
if (!$data) {
# Assume backward compatibility
$l =~ s/^(.*)\s([^\s]+)\shits=/hits=/;
$net = $1;
$id = $2;
$data = $l;
}
if ($self->{rebuild}) {
next if (!$self->check_inclusions($id, $net));
next if ($self->check_exclusions($id, $net));
}
# Replace network by his aliases if any
$net = (!$self->{has_network_alias}) ? $net : $self->apply_network_alias($net);
# Anonymize all users
if ($self->{AnonymizeLogin} && ($id !~ /^Anon[a-zA-Z0-9]{16}$/)) {
if (!exists $self->{AnonymizedId}{$id}) {
$self->{AnonymizedId}{$id} = &anonymize_id();
}
$id = $self->{AnonymizedId}{$id};
}
if ($data =~ s/^hits=(\d+);bytes=(\d+);duration=([\-\d]+);largest_file_size=([^;]*);largest_file_url=(.*)$//) {
$self->{"stat_netuser_$sum_type"}{$net}{$id}{hits} += $1;
$self->{"stat_netuser_$sum_type"}{$net}{$id}{bytes} += $2;
$self->{"stat_netuser_$sum_type"}{$net}{$id}{duration} += abs($3);
if ($4 > $self->{"stat_netuser_$sum_type"}{$net}{$id}{largest_file_size}) {
$self->{"stat_netuser_$sum_type"}{$net}{$id}{largest_file_size} = $4;
$self->{"stat_netuser_$sum_type"}{$net}{$id}{largest_file_url} = $5;
}
} else {
print STDERR "ERROR: bad format at line $i into $self->{Output}/$path/stat_netuser.dat\n";
print STDERR "$l\n";
if ($error > $self->{MaxFormatError}) {
unlink($self->{pidfile});
exit 0;
}
$error++;
}
$i++;
}
$dat_file_netuser->close();
}
}
}
#### Read previous mime statistics
if (!$kind || ($kind eq 'stat_mime_type')) {
my $dat_file_mime_type = new IO::File;
if ($dat_file_mime_type->open("$self->{Output}/$path/stat_mime_type.dat")) {
my $i = 1;
my $error = 0;
while (my $l = <$dat_file_mime_type>) {
chomp($l);
if ($l =~ s/^([^\s]+)\s+hits=(\d+);bytes=(\d+)//) {
my $mime = $1;
$self->{"stat_mime_type_$sum_type"}{$mime}{hits} += $2;
$self->{"stat_mime_type_$sum_type"}{$mime}{bytes} += $3;
} else {
print STDERR "ERROR: bad format at line $i into $self->{Output}/$path/stat_mime_type.dat\n";
print STDERR "$l\n";
if ($error > $self->{MaxFormatError}) {
unlink($self->{pidfile});
exit 0;
}
$error++;
}
$i++;
}
$dat_file_mime_type->close();
}
}
}
sub _save_data
{
my ($self, $year, $month, $day, $wn, @wd) = @_;
#### Create directory structure
if (!-d "$self->{Output}/$year") {
mkdir("$self->{Output}/$year", 0755) || $self->localdie("ERROR: can't create directory $self->{Output}/$year, $!\n");
}
if ($month && !-d "$self->{Output}/$year/$month") {
mkdir("$self->{Output}/$year/$month", 0755) || $self->localdie("ERROR: can't create directory $self->{Output}/$year/$month, $!\n");
}
if ($day && !-d "$self->{Output}/$year/$month/$day") {
mkdir("$self->{Output}/$year/$month/$day", 0755) || $self->localdie("ERROR: can't create directory $self->{Output}/$year/$month/$day, $!\n");
}
if (!$self->{no_week_stat}) {
if ($wn && !-d "$self->{Output}/$year/week$wn") {
mkdir("$self->{Output}/$year/week$wn", 0755) || $self->localdie("ERROR: can't create directory $self->{Output}/$year/week$wn, $!\n");
}
}
# Dumping data
$self->_save_stat($year, $month, $day, $wn, @wd);
}
sub _append_data
{
my ($self, $year, $month, $day) = @_;
#### Create directory structure
if (!-d "$self->{Output}/$year") {
mkdir("$self->{Output}/$year", 0755) || $self->localdie("ERROR: can't create directory $self->{Output}/$year, $!\n");
}
if ($month && !-d "$self->{Output}/$year/$month") {
mkdir("$self->{Output}/$year/$month", 0755) || $self->localdie("ERROR: can't create directory $self->{Output}/$year/$month, $!\n");
}
if ($day && !-d "$self->{Output}/$year/$month/$day") {
mkdir("$self->{Output}/$year/$month/$day", 0755) || $self->localdie("ERROR: can't create directory $self->{Output}/$year/$month/$day, $!\n");
}
# Dumping data
$self->_append_stat($year, $month, $day);
}
sub _print_main_header
{
my ($self, $fileout, $menu, $calendar) = @_;
$self->_print_header($fileout, $menu, $calendar, undef, 1);
}
sub _print_header
{
my ($self, $fileout, $menu, $calendar, $sortpos, $dorefresh) = @_;
my $now = $self->{start_date} || strftime("%a %b %e %H:%M:%S %Y", CORE::localtime);
$sortpos ||= 2;
my $sorttable = '';
$sorttable = "var myTH = document.getElementById('contenu').getElementsByTagName('th')[$sortpos]; sorttable.innerSortFunction.apply(myTH, []);";
my $reportrange = '';
if ($self->{report_starttime} || $self->{report_endtime}) {
$reportrange = '<br>';
if ($self->{report_starttime}) {
my $t1 = $Translate{'Generation_from'};
$t1 =~ s/\%s/$self->{report_starttime}/;
$reportrange .= $t1;
}
if ($self->{report_endtime}) {
my $t1 = $Translate{'Generation_to'};
$t1 =~ s/\%s/$self->{report_endtime}/;
$reportrange .= $t1;
}
$reportrange .= '.';
}
my $refresh_tag = '';
if ($self->{RefreshTime}) {
$refresh_tag = '<meta HTTP-EQUIV="refresh" CONTENT="' . $self->{RefreshTime}*60 . '">';
}
print $$fileout qq{
<html>
<head>
<meta NAME="robots" CONTENT="noindex,nofollow" />
<meta HTTP-EQUIV="Pragma" CONTENT="no-cache" />
<meta HTTP-EQUIV="Cache-Control" content="no-cache" />
<meta HTTP-EQUIV="Expires" CONTENT="$now" />
<meta HTTP-EQUIV="Generator" CONTENT="SquidAnalyzer $VERSION" />
<meta HTTP-EQUIV="Date" CONTENT="$now" />
<meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=$Translate{'CharSet'}" />
$refresh_tag
<title>$self->{CustomTitle}</title>
<link rel="stylesheet" type="text/css" href="$self->{WebUrl}squidanalyzer.css" media="screen" />
<!-- javascript to sort table -->
<script type="text/javascript" src="$self->{WebUrl}sorttable.js"></script>
<!-- javascript to draw graphics -->
<script type="text/javascript" src="$self->{WebUrl}flotr2.js"></script>
<script type="text/javascript" >sortpos = $sortpos;</script>
</head>
<body onload="$sorttable">
<div id="conteneur">
<a name="atop"></a>
<div id="header">
<div id="alignLeft">
<h1>
$self->{CustomHeader}
</h1>
<p class="sous-titre">
$Translate{'Generation'} $now.$reportrange
</p>
</div>
$calendar
</div>
$menu
<div id="contenu">
};
}
sub _print_footer
{
my ($self, $fileout) = @_;
print $$fileout qq{
</div>
<div id="footer">
<h4>
$Translate{'File_Generated'} <a href="http://squidanalyzer.darold.net/">SquidAnalyzer v$VERSION</a>
</h4>
</div>
</div>
</body>
</html>
};
}
sub check_build_date
{
my ($self, $year, $month, $day) = @_;
return 0 if (!$self->{build_date});
my ($y, $m, $d) = split(/\-/, $self->{build_date});
return 1 if ($year ne $y);
if ($m) {
return 1 if ($month && ($month ne $m));
if ($d) {
return 1 if ($day && ($day ne $d));
}
}
return 0;
}
sub buildHTML
{
my ($self, $outdir) = @_;
# No new log registered, no html buid required
if (!$self->{rebuild}) {
if (!$self->{last_year} && !$self->{last_month} && !$self->{last_day}) {
print STDERR "Skipping HTML build.\n" if (!$self->{QuietMode});
return;
}
}
$outdir ||= $self->{Output};
print STDERR "Building HTML output into $outdir\n" if (!$self->{QuietMode});
# Load history data for incremental scan
my $old_year = 0;
my $old_month = 0;
my $old_day = 0;
my $p_month = 0;
my $p_year = 0;
my $p_week = 0;
if ($self->{history_time} || $self->{sg_history_time} || $self->{ug_history_time} || $self->{begin_time}) {
my @ltime = CORE::localtime($self->{history_time} || $self->{sg_history_time} || $self->{ug_history_time} || $self->{begin_time});
if ($self->{is_squidguard_log}) {
@ltime = CORE::localtime($self->{sg_history_time} || $self->{begin_time});
} elsif ($self->{is_ufdbguard_log}) {
@ltime = CORE::localtime($self->{ug_history_time} || $self->{begin_time});
}
$old_year = $ltime[5]+1900;
$old_month = $ltime[4]+1;
$old_month = "0$old_month" if ($old_month < 10);
$old_day = $ltime[3];
$old_day = "0$old_day" if ($old_day < 10);
# Set oldest stat to preserve based on history time, not current time
if ($self->{preserve} > 0) {
if (!$self->{is_squidguard_log} && !$self->{is_ufdbguard_log}) {
my $histtime = $self->{history_time} || time();
@ltime = CORE::localtime($histtime-($self->{preserve}*2592000));
} elsif (!$self->{is_squidguard_log}) {
my $histtime = $self->{ug_history_time} || time();
@ltime = CORE::localtime($histtime-($self->{preserve}*2592000));
} else {
my $histtime = $self->{sg_history_time} || time();
@ltime = CORE::localtime($histtime-($self->{preserve}*2592000));
}
$p_year = $ltime[5]+1900;
$p_month = $ltime[4]+1;
$p_month = sprintf("%02d", $p_month);
$p_week = &get_week_number($p_year, $p_month, "01");
print STDERR "Obsolete statistics before $p_year-$p_month, week $p_year-$p_week\n" if (!$self->{QuietMode});
}
}
# Remove obsolete directories first
opendir(DIR, $outdir) || die "Error: can't opendir $outdir: $!";
my @years = grep { /^\d{4}$/ && -d "$outdir/$_"} readdir(DIR);
closedir DIR;
if ($self->{preserve} && $p_year) {
foreach my $y (sort {$a <=> $b} @years) {
# Remove the full year repository if it is older that the last year to preserve
if ($y < $p_year) {
print STDERR "Removing obsolete statistics for year $y\n" if (!$self->{QuietMode});
system ($RM_PROG, "-rf", "$outdir/$y");
next;
}
# Remove the full month repository if it is older that the last month to preserve
opendir(DIR, "$outdir/$y") || $self->localdie("FATAL: can't opendir $outdir/$y: $!");
my @months = grep { /^\d{2}$/ && -d "$outdir/$y/$_"} readdir(DIR);
closedir DIR;
foreach my $m (sort {$a <=> $b} @months) {
if ("$y$m" < "$p_year$p_month") {
print STDERR "Removing obsolete statistics for month $y-$m\n" if (!$self->{QuietMode});
system ($RM_PROG, "-rf", "$outdir/$y/$m");
}
}
# Remove the full week repository if it is older that the last week to preserve
opendir(DIR, "$outdir/$y") || $self->localdie("FATAL: can't opendir $outdir/$y: $!");
my @weeks = grep { -d "$outdir/$y/$_" && /^week\d{2}/ } readdir(DIR);
closedir DIR;
map { s/^week(\d{2})/$1/; } @weeks;
foreach my $w (sort {$a <=> $b} @weeks) {
# Remove the full week repository if it is older that the last date to preserve
if ("$y$w" < "$p_year$p_week") {
print STDERR "Removing obsolete statistics for week $y-week$w\n" if (!$self->{QuietMode});
system ($RM_PROG, "-rf", "$outdir/$y/week$w");
}
}
}
}
# Generate all HTML output
opendir(DIR, $outdir) || die "Error: can't opendir $outdir: $!";
@years = grep { /^\d{4}$/ && -d "$outdir/$_"} readdir(DIR);
closedir DIR;
$self->{child_count} = 0;
my @years_cal = ();
my @months_cal = ();
my @weeks_cal = ();
my @array_count = ();
foreach my $y (sort {$a <=> $b} @years) {
next if (!$y || ($y < $self->{first_year}));
next if ($self->check_build_date($y));
next if ($y < $old_year);
opendir(DIR, "$outdir/$y") || $self->localdie("FATAL: can't opendir $outdir/$y: $!");
my @months = grep { /^\d{2}$/ && -d "$outdir/$y/$_"} readdir(DIR);
my @weeks = grep { /^week\d{2}$/ && -d "$outdir/$y/$_"} readdir(DIR);
closedir DIR;
my @weeks_to_build = ();
foreach my $m (sort {$a <=> $b} @months) {
next if (!$m || ($m < $self->{first_month}{$y}));
next if ($self->check_build_date($y, $m));
next if ("$y$m" < "$old_year$old_month");
opendir(DIR, "$outdir/$y/$m") || $self->localdie("FATAL: can't opendir $outdir/$y/$m: $!");
my @days = grep { /^\d{2}$/ && -d "$outdir/$y/$m/$_"} readdir(DIR);
closedir DIR;
foreach my $d (sort {$a <=> $b} @days) {
next if ($self->check_build_date($y, $m, $d));
next if ("$y$m$d" < "$old_year$old_month$old_day");
print STDERR "Generating statistics for day $y-$m-$d\n" if (!$self->{QuietMode});
$self->gen_html_output($outdir, $y, $m, $d);
push(@array_count, "$outdir/$y/$m/$d");
my $wn = &get_week_number($y,$m,$d);
push(@weeks_to_build, $wn) if (!grep(/^$wn$/, @weeks_to_build));
}
print STDERR "Generating statistics for month $y-$m\n" if (!$self->{QuietMode});
push(@months_cal, "$outdir/$y/$m");
$self->gen_html_output($outdir, $y, $m);
}
if (!$self->{no_week_stat}) {
foreach my $w (sort @weeks_to_build) {
$w = sprintf("%02d", $w+1);
push(@array_count, "$outdir/$y/week$w");
print STDERR "Generating statistics for week $w on year $y\n" if (!$self->{QuietMode});
$self->gen_html_output($outdir, $y, '', '', $w);
}
}
print STDERR "Generating statistics for year $y\n" if (!$self->{QuietMode});
$self->gen_html_output($outdir, $y);
push(@years_cal, "$outdir/$y");
}
# Wait for last child stop
$self->wait_all_childs() if ($self->{queue_size} > 1);
# Set calendar in each new files by replacing SA_CALENDAR_SA in the right HTML code
# Same with number of users, urls and domains
foreach my $p (@years_cal) {
$p =~ /\/(\d+)$/;
my $stat_date = $self->set_date($1);
my $cal = $self->_get_calendar($stat_date, $1, $2, 'month', $p);
my $nuser = '-';
my $nurl = '-';
my $ndomain = '-';
# Search for item count
if (-e "$p/stat_count.dat") {
open(IN, "$p/stat_count.dat") or die "FATAL: can't read file $p/stat_count.dat, $!\n";
while (my $l = <IN>) {
chomp($l);
if ($l =~ /^users:(\d+)/) {
$nuser = $1;
} elsif ($l =~ /^urls:(\d+)/) {
$nurl = $1;
} elsif ($l =~ /^domains:(\d+)/) {
$ndomain = $1;
}
}
close(IN);
unlink("$p/stat_count.dat");
}
opendir(DIR, "$p") || $self->localdie("FATAL: can't opendir $p: $!\n");
my @html = grep { /\.html$/ } readdir(DIR);
closedir DIR;
foreach my $f (@html) {
open(IN, "$p/$f") or $self->localdie("FATAL: can't read file $p/$f\n");
my @content = <IN>;
close IN;
map { s/SA_CALENDAR_SA/$cal/ } @content;
map { s/SA_NUSERS_SA/$nuser/ } @content;
map { s/SA_NURLS_SA/$nurl/ } @content;
map { s/SA_NDOMAINS_SA/$ndomain/ } @content;
open(OUT, ">$p/$f") or $self->localdie("FATAL: can't write to file $p/$f\n");
print OUT @content;
close OUT;
}
}
foreach my $p (@months_cal) {
$p =~ /\/(\d+)\/(\d+)$/;
my $stat_date = $self->set_date($1, $2);
my $cal = $self->_get_calendar($stat_date, $1, $2, 'day', $p);
my $nuser = '-';
my $nurl = '-';
my $ndomain = '-';
# Search for item count
if (-e "$p/stat_count.dat") {
open(IN, "$p/stat_count.dat") or die "FATAL: can't read file $p/stat_count.dat, $!\n";
while (my $l = <IN>) {
chomp($l);
if ($l =~ /^users:(\d+)/) {
$nuser = $1;
} elsif ($l =~ /^urls:(\d+)/) {
$nurl = $1;
} elsif ($l =~ /^domains:(\d+)/) {
$ndomain = $1;
}
}
close(IN);
unlink("$p/stat_count.dat");
}
opendir(DIR, "$p") || $self->localdie("FATAL: can't opendir $p: $!\n");
my @html = grep { /\.html$/ } readdir(DIR);
closedir DIR;
foreach my $f (@html) {
open(IN, "$p/$f") or $self->localdie("FATAL: can't read file $p/$f\n");
my @content = <IN>;
close IN;
map { s/SA_CALENDAR_SA/$cal/ } @content;
map { s/SA_NUSERS_SA/$nuser/ } @content;
map { s/SA_NURLS_SA/$nurl/ } @content;
map { s/SA_NDOMAINS_SA/$ndomain/ } @content;
open(OUT, ">$p/$f") or $self->localdie("FATAL: can't write to file $p/$f\n");
print OUT @content;
close OUT;
}
}
foreach my $p (@array_count) {
my $nuser = '-';
my $nurl = '-';
my $ndomain = '-';
my $cal = '';
if ($p =~ /^(.*)\/(\d{4})\/(\d{2})\/\d{2}/) {
my $stat_date = $self->set_date($2, $3);
$cal = $self->_get_calendar($stat_date, $2, $3, 'day', "$1/$2/$3", '../');
}
# Search for item count
if (-e "$p/stat_count.dat") {
open(IN, "$p/stat_count.dat") or die "FATAL: can't read file $p/stat_count.dat, $!\n";
while (my $l = <IN>) {
chomp($l);
if ($l =~ /^users:(\d+)/) {
$nuser = $1;
} elsif ($l =~ /^urls:(\d+)/) {
$nurl = $1;
} elsif ($l =~ /^domains:(\d+)/) {
$ndomain = $1;
}
}
close(IN);
unlink("$p/stat_count.dat");
}
opendir(DIR, "$p") || $self->localdie("FATAL: can't opendir $p: $!\n");
my @html = grep { /\.html$/ } readdir(DIR);
closedir DIR;
foreach my $f (@html) {
open(IN, "$p/$f") or $self->localdie("FATAL: can't read file $p/$f\n");
my @content = <IN>;
close IN;
map { s/SA_CALENDAR_SA/$cal/ } @content;
map { s/SA_NUSERS_SA/$nuser/ } @content;
map { s/SA_NURLS_SA/$nurl/ } @content;
map { s/SA_NDOMAINS_SA/$ndomain/ } @content;
open(OUT, ">$p/$f") or $self->localdie("FATAL: can't write to file $p/$f\n");
print OUT @content;
close OUT;
}
}
$self->_gen_summary($outdir);
}
sub gen_html_output
{
my ($self, $outdir, $year, $month, $day, $week) = @_;
my $dir = "$outdir";
if ($year) {
$dir .= "/$year";
}
if ($month) {
$dir .= "/$month";
}
if ($day) {
$dir .= "/$day";
}
my $stat_date = $self->set_date($year, $month, $day);
if ($week) {
$dir .= "/week$week";
$stat_date = "$Translate{Week} $week - $year";
}
#### With huge log file we do not store detail statistics
if ( (!$self->{no_year_stat} || $self->{with_month_stat}) || ($self->{no_year_stat} && ($day || $week)) ) {
if ($self->{queue_size} <= 1) {
if ($self->{UserReport}) {
$self->_print_user_stat($dir, $year, $month, $day, $week);
}
$self->_print_mime_stat($dir, $year, $month, $day, $week);
$self->_print_network_stat($dir, $year, $month, $day, $week);
if ($self->{UrlReport}) {
$self->_print_top_url_stat($dir, $year, $month, $day, $week);
$self->_print_top_denied_stat($dir, $year, $month, $day, $week);
$self->_print_top_domain_stat($dir, $year, $month, $day, $week);
}
} else {
if ($self->{UserReport}) {
$self->spawn(sub {
$self->_print_user_stat($dir, $year, $month, $day, $week);
});
$self->{child_count} = $self->manage_queue_size(++$self->{child_count});
}
$self->spawn(sub {
$self->_print_mime_stat($dir, $year, $month, $day, $week);
});
$self->{child_count} = $self->manage_queue_size(++$self->{child_count});
$self->spawn(sub {
$self->_print_network_stat($dir, $year, $month, $day, $week);
});
$self->{child_count} = $self->manage_queue_size(++$self->{child_count});
if ($self->{UrlReport}) {
$self->spawn(sub {
$self->_print_top_url_stat($dir, $year, $month, $day, $week);
});
$self->{child_count} = $self->manage_queue_size(++$self->{child_count});
$self->spawn(sub {
$self->_print_top_denied_stat($dir, $year, $month, $day, $week);
});
$self->{child_count} = $self->manage_queue_size(++$self->{child_count});
$self->spawn(sub {
$self->_print_top_domain_stat($dir, $year, $month, $day, $week);
});
$self->{child_count} = $self->manage_queue_size(++$self->{child_count});
}
}
}
$self->_print_cache_stat($dir, $year, $month, $day, $week);
}
sub parse_duration
{
my ($secondes) = @_;
my $hours = int($secondes/3600);
$hours = "0$hours" if ($hours < 10);
$secondes = $secondes - ($hours*3600);
my $minutes = int($secondes/60);
$minutes = "0$minutes" if ($minutes < 10);
$secondes = $secondes - ($minutes*60);
$secondes = "0$secondes" if ($secondes < 10);
return "$hours:$minutes:$secondes";
}
sub _print_cache_stat
{
my ($self, $outdir, $year, $month, $day, $week) = @_;
print STDERR "\tCache statistics in $outdir...\n" if (!$self->{QuietMode});
$0 = "squid-analyzer: Printing cache statistics in $outdir";
my $stat_date = $self->set_date($year, $month, $day);
my $type = 'hour';
if (!$day) {
$type = 'day';
}
if (!$month) {
$type = 'month';
}
if ($week) {
$type = 'day';
}
# Load code statistics
my %code_stat = ();
my %throughput_stat = ();
my %detail_code_stat = ();
my $infile = new IO::File;
if ($infile->open("$outdir/stat_code.dat")) {
while (my $l = <$infile>) {
chomp($l);
my ($code, $data) = split(/\s/, $l);
$data =~ /hits_$type=([^;]+);bytes_$type=([^;]+)/;
my $hits = $1 || '';
my $bytes = $2 || '';
$hits =~ s/,$//;
$bytes =~ s/,$//;
my %hits_tmp = split(/[:,]/, $hits);
foreach my $tmp (keys %hits_tmp) {
$detail_code_stat{$code}{$tmp}{request} = $hits_tmp{$tmp};
$code_stat{$code}{request} += $hits_tmp{$tmp};
}
my %bytes_tmp = split(/[:,]/, $bytes);
foreach my $tmp (keys %bytes_tmp) {
$detail_code_stat{$code}{$tmp}{bytes} = $bytes_tmp{$tmp};
$code_stat{$code}{bytes} += $bytes_tmp{$tmp};
}
if ($data =~ /thp_bytes_$type=([^;]+);thp_duration_$type=([^;]+)/) {
$bytes = $1 || '';
my $elapsed = $2 || '';
$bytes =~ s/,$//;
$elapsed =~ s/,$//;
my %bytes_tmp = split(/[:,]/, $bytes);
foreach my $tmp (keys %bytes_tmp) {
$detail_code_stat{throughput}{"$tmp"}{bytes} = $bytes_tmp{"$tmp"};
$throughput_stat{$code}{bytes} += $bytes_tmp{$tmp};
}
my %elapsed_tmp = split(/[:,]/, $elapsed);
foreach my $tmp (keys %elapsed_tmp) {
$detail_code_stat{throughput}{"$tmp"}{elapsed} = $elapsed_tmp{"$tmp"};
$throughput_stat{$code}{elapsed} += $elapsed_tmp{$tmp};
}
}
}
$infile->close();
}
my $total_request = ($code_stat{MISS}{request} + $code_stat{HIT}{request}) || 1;
my $total_bytes = ($code_stat{HIT}{bytes} + $code_stat{MISS}{bytes}) || 1;
my $total_elapsed = ($throughput_stat{HIT}{elapsed} + $throughput_stat{MISS}{elapsed}) || 1;
my $total_throughput = int(($throughput_stat{HIT}{bytes} + $throughput_stat{MISS}{bytes}) / (($total_elapsed/1000) || 1));
my $total_all_request = ($code_stat{DENIED}{request} + $code_stat{MISS}{request} + $code_stat{HIT}{request}) || 1;
my $total_all_bytes = ($code_stat{DENIED}{bytes} + $code_stat{HIT}{bytes} + $code_stat{MISS}{bytes}) || 1;
if ($week && !-d "$outdir") {
return;
}
my $file = $outdir . '/index.html';
my $out = new IO::File;
$out->open(">$file.tmp") || $self->localdie("ERROR: Unable to open $file.tmp. $!\n");
# Print the HTML header
my $cal = 'SA_CALENDAR_SA';
$cal = '' if ($week);
if ( (!$self->{no_year_stat} || $self->{with_month_stat}) || ($type ne 'month') ) {
$self->_print_main_header(\$out, $self->{menu}, $cal);
} else {
$self->_print_main_header(\$out, $self->{menu3}, $cal);
}
print $out $self->_print_title($Translate{'Cache_title'}, $stat_date, $week);
my $total_cost = sprintf("%2.2f", int(($code_stat{HIT}{bytes} + $code_stat{MISS}{bytes})/1000000) * $self->{CostPrice});
my $comma_bytes = $self->format_bytes($total_bytes);
my $comma_throughput = $self->format_bytes($total_throughput);
my $hit_bytes = $self->format_bytes($code_stat{HIT}{bytes});
my $miss_bytes = $self->format_bytes($code_stat{MISS}{bytes});
my $denied_bytes = $self->format_bytes($code_stat{DENIED}{bytes});
my $colspn = 6;
$colspn = 7 if ($self->{CostPrice});
my $title = $Translate{'Hourly'} || 'Hourly';
my $unit = $Translate{'Hours'} || 'Hours';
my @xaxis = ();
my @xstick = ();
if ($type eq 'day') {
if (!$week) {
$title = $Translate{'Daily'} || 'Daily';
for ("01" .. "31") {
push(@xaxis, "$_");
}
} else {
@xaxis = &get_wdays_per_year($week - 1, $year, $month);
foreach my $x (@xaxis) {
push(@xstick, POSIX::strftime("%F", CORE::localtime($x/1000)));
}
map { s/\d{4}-\d{2}-//; } @xstick;
$title = $Translate{'Weekly'} || 'Weekly';
$type = 'week';
$type = '[' . join(',', @xstick) . ']';
}
$unit = $Translate{'Days'} || 'Days';
} elsif ($type eq 'month') {
$title = $Translate{'Monthly'} || 'Monthly';
$unit = $Translate{'Months'} || 'Months';
for ("01" .. "12") {
push(@xaxis, "$_");
}
} else {
for ("00" .. "23") {
push(@xaxis, "$_");
}
}
my @hit = ();
my @miss = ();
my @denied = ();
my @throughput = ();
my @total = ();
for (my $i = 0; $i <= $#xaxis; $i++) {
my $ddate = $xaxis[$i];
$ddate = $xstick[$i] if ($#xstick >= 0);
my $tot = 0;
if (exists $detail_code_stat{HIT}{$ddate}{request}) {
push(@hit, "[ $xaxis[$i], $detail_code_stat{HIT}{$ddate}{request} ]");
$tot += $detail_code_stat{HIT}{$ddate}{request};
} else {
push(@hit, "[ $xaxis[$i], 0 ]");
}
if (exists $detail_code_stat{MISS}{$ddate}{request}) {
push(@miss, "[ $xaxis[$i], $detail_code_stat{MISS}{$ddate}{request} ]");
$tot += $detail_code_stat{MISS}{$ddate}{request};
} else {
push(@miss, "[ $xaxis[$i], 0 ]");
}
if (exists $detail_code_stat{DENIED}{$ddate}{request}) {
push(@denied, "[ $xaxis[$i], $detail_code_stat{DENIED}{$ddate}{request} ]");
} else {
push(@denied, "[ $xaxis[$i], 0 ]");
}
if (exists $detail_code_stat{throughput}{$ddate}{bytes}) {
$detail_code_stat{throughput}{$ddate}{elapsed} ||= 1;
push(@throughput, "[ $xaxis[$i], " . int($detail_code_stat{throughput}{$ddate}{bytes}/($detail_code_stat{throughput}{$ddate}{elapsed}/1000)) . " ]");
} else {
push(@throughput, "[ $xaxis[$i], 0 ]");
}
push(@total, "[ $xaxis[$i], $tot ]");
delete $detail_code_stat{HIT}{$ddate}{request};
delete $detail_code_stat{MISS}{$ddate}{request};
delete $detail_code_stat{DENIED}{$ddate}{request};
delete $detail_code_stat{throughput}{$ddate};
}
my $t1 = $Translate{'Graph_cache_hit_title'};
$t1 =~ s/\%s/$title/;
$t1 = "$t1 $stat_date";
my $xlabel = $unit || '';
my $ylabel = $Translate{'Requests_graph'} || 'Requests';
my $code_requests = $self->flotr2_bargraph(1, 'code_requests', $type, $t1, $xlabel, $ylabel,
join(',', @total), $Translate{'Total_graph'} . " ($Translate{'Hit_graph'}+$Translate{'Miss_graph'})",
join(',', @hit), $Translate{'Hit_graph'},
join(',', @miss), $Translate{'Miss_graph'},
join(',', @denied), $Translate{'Denied_graph'} );
@hit = ();
@miss = ();
@denied = ();
@total = ();
for (my $i = 0; $i <= $#xaxis; $i++) {
my $ddate = $xaxis[$i];
$ddate = $xstick[$i] if ($#xstick >= 0);
my $tot = 0;
if (exists $detail_code_stat{HIT}{$ddate}{bytes}) {
push(@hit, "[ $xaxis[$i], " . int($detail_code_stat{HIT}{$ddate}{bytes}/1000000) . " ]");
$tot += $detail_code_stat{HIT}{$ddate}{bytes};
} else {
push(@hit, "[ $xaxis[$i], 0 ]");
}
if (exists $detail_code_stat{MISS}{$ddate}{bytes}) {
push(@miss, "[ $xaxis[$i], " . int($detail_code_stat{MISS}{$ddate}{bytes}/1000000) . " ]");
$tot += $detail_code_stat{MISS}{$ddate}{bytes};
} else {
push(@miss, "[ $xaxis[$i], 0 ]");
}
if (exists $detail_code_stat{DENIED}{$ddate}{bytes}) {
push(@denied, "[ $xaxis[$i], " . int($detail_code_stat{DENIED}{$ddate}{bytes}/1000000) . " ]");
} else {
push(@denied, "[ $xaxis[$i], 0 ]");
}
push(@total, "[ $xaxis[$i], " . int($tot/1000000) . " ]");
}
%detail_code_stat = ();
$t1 = $Translate{'Graph_cache_byte_title'};
$t1 =~ s/\%s/$title/;
$t1 = "$t1 $stat_date";
$ylabel = $Translate{'Megabytes_graph'} || $Translate{'Megabytes'};
my $code_bytes = $self->flotr2_bargraph(2, 'code_bytes', $type, $t1, $xlabel, $ylabel,
join(',', @total), $Translate{'Total_graph'} . " ($Translate{'Hit_graph'}+$Translate{'Miss_graph'})",
join(',', @hit), $Translate{'Hit_graph'},
join(',', @miss), $Translate{'Miss_graph'},
join(',', @denied), $Translate{'Denied_graph'});
@hit = ();
@miss = ();
@denied = ();
@total = ();
$t1 = $Translate{'Graph_throughput_title'};
$t1 =~ s/\%s/$title/;
$t1 = "$t1 $stat_date";
$ylabel = $Translate{'Bytes_graph'} || $Translate{'Bytes'};
my $throughput_bytes = $self->flotr2_bargraph(3, 'throughput_bytes', $type, $t1, $xlabel, $ylabel,
join(',', @throughput), $Translate{'Throughput_graph'});
@throughput = ();
print $out qq{
<table class="stata">
<tr>
<th colspan="3" class="headerBlack">$Translate{'Requests'}</th>
<th colspan="3" class="headerBlack">$Translate{$self->{TransfertUnit}}</th>
<th colspan="$colspn" class="headerBlack">$Translate{'Total'}</th>
</tr>
<tr>
<th>$Translate{'Hit'}</th>
<th>$Translate{'Miss'}</th>
<th>$Translate{'Denied'}</th>
<th>$Translate{'Hit'}</th>
<th>$Translate{'Miss'}</th>
<th>$Translate{'Denied'}</th>
<th>$Translate{'Requests'}</th>
<th>$Translate{$self->{TransfertUnit}}</th>
<th>$Translate{'Throughput'}</th>
<th>$Translate{'Users'}</th>
<th>$Translate{'Sites'}</th>
<th>$Translate{'Domains'}</th>
};
print $out qq{
<th>$Translate{'Cost'} $self->{Currency}</th>
} if ($self->{CostPrice});
my $percent_hit = sprintf("%.2f", ($code_stat{HIT}{request}/$total_all_request)*100);
my $percent_miss = sprintf("%.2f", ($code_stat{MISS}{request}/$total_all_request)*100);
my $percent_denied = sprintf("%.2f", ($code_stat{DENIED}{request}/$total_all_request)*100);
my $percent_bhit = sprintf("%.2f", ($code_stat{HIT}{bytes}/$total_all_bytes)*100);
my $percent_bmiss = sprintf("%.2f", ($code_stat{MISS}{bytes}/$total_all_bytes)*100);
my $percent_bdenied = sprintf("%.2f", ($code_stat{DENIED}{bytes}/$total_all_bytes)*100);
my $trfunit = $self->{TransfertUnit} || 'B';
$trfunit = 'B' if ($trfunit eq 'BYTE');
print $out qq{
</tr>
<tr>
<td title="$percent_hit %">$code_stat{HIT}{request}</td>
<td title="$percent_miss %">$code_stat{MISS}{request}</td>
<td title="$percent_denied %">$code_stat{DENIED}{request}</td>
<td title="$percent_bhit %">$hit_bytes</td>
<td title="$percent_bmiss %">$miss_bytes</td>
<td title="$percent_bdenied %">$denied_bytes</td>
<td>$total_request</td>
<td>$comma_bytes</td>
<td>$comma_throughput $trfunit/s</td>
<td>SA_NUSERS_SA</td>
<td>SA_NURLS_SA</td>
<td>SA_NDOMAINS_SA</td>
};
print $out qq{
<td class="cacheValues">$total_cost</td>
} if ($self->{CostPrice});
print $out qq{
</tr>
</table>
<style>
#container {
display: table;
}
#row {
display: table-row;
}
#code_requests, #code_bytes {
display: table-cell;
}
#code_requests { z-index: 999; }
</style>
<table class="graphs">
<tr><td>
<div id="container">
$code_requests
</div>
</td><td>
<div id="container">
$code_bytes
</div>
</td></tr>
<tr><td colspan="2" align="center">$throughput_bytes</td></tr>
</table>
<h4>$Translate{'Legend'}</h4>
<div class="line-separator"></div>
<div class="displayLegend">
<span class="legendeTitle">$Translate{'Hit'}:</span> <span class="descLegend">$Translate{'Hit_help'}</span><br/>
<span class="legendeTitle">$Translate{'Miss'}:</span> <span class="descLegend">$Translate{'Miss_help'}</span><br/>
<span class="legendeTitle">$Translate{'Denied'}:</span> <span class="descLegend">$Translate{'Denied_help'}</span><br/>
<span class="legendeTitle">$Translate{'Users'}:</span> <span class="descLegend">$Translate{'Users_help'}</span><br/>
<span class="legendeTitle">$Translate{'Sites'}:</span> <span class="descLegend">$Translate{'Sites_help'}</span><br/>
<span class="legendeTitle">$Translate{'Domains'}:</span> <span class="descLegend">$Translate{'Domains_help'}</span><br/>
};
print $out qq{
<span class="legendeTitle">$Translate{'Cost'}:</span> <span class="descLegend">$Translate{'Cost_help'} $self->{CostPrice} $self->{Currency}</span><br/>
} if ($self->{CostPrice});
print $out qq{
</div>
};
%code_stat = ();
$self->_print_footer(\$out);
$out->close();
rename("$file.tmp", "$file");
}
sub _print_mime_stat
{
my ($self, $outdir, $year, $month, $day, $week) = @_;
print STDERR "\tMime type statistics in $outdir...\n" if (!$self->{QuietMode});
$0 = "squid-analyzer: Printing mime statistics in $outdir";
my $stat_date = $self->set_date($year, $month, $day);
my $type = 'hour';
if (!$day) {
$type = 'day';
}
if (!$month) {
$type = 'month';
}
if ($week) {
$type = 'day';
}
# Load code statistics
my $infile = new IO::File;
$infile->open("$outdir/stat_mime_type.dat") || return;
my %mime_stat = ();
my $total_count = 0;
my $total_bytes = 0;
while(my $l = <$infile>) {
chomp($l);
my ($code, $data) = split(/\s/, $l);
$data =~ /hits=(\d+);bytes=(\d+)/;
$mime_stat{$code}{hits} = $1;
$mime_stat{$code}{bytes} = $2;
$total_count += $1;
$total_bytes += $2;
}
$infile->close();
my $ntype = scalar keys %mime_stat;
my $file = $outdir . '/mime_type.html';
my $out = new IO::File;
$out->open(">$file") || $self->localdie("ERROR: Unable to open $file. $!\n");
my $sortpos = 1;
$sortpos = 2 if ($self->{OrderMime} eq 'bytes');
$sortpos = 3 if ($self->{OrderMime} eq 'duration');
# Print the HTML header
my $cal = 'SA_CALENDAR_SA';
$cal = '' if ($week);
$self->_print_header(\$out, $self->{menu}, $cal, $sortpos);
# Print title and calendar view
print $out $self->_print_title($Translate{'Mime_title'}, $stat_date, $week);
my %data = ();
$total_count ||= 1;
foreach my $mime (keys %mime_stat) {
if (($mime_stat{$mime}{hits}/$total_count)*100 > $self->{MinPie}) {
$data{$mime} = $mime_stat{$mime}{hits};
} else {
$data{'others'} += $mime_stat{$mime}{hits};
}
}
my $title = "$Translate{'Mime_graph_hits_title'} $stat_date";
my $mime_hits = $self->flotr2_piegraph(1, 'mime_hits', $title, $Translate{'Mime_graph'}, '', %data);
print $out qq{
<style>
#container {
display: table;
}
#row {
display: table-row;
}
#mime_hits, #mime_bytes {
display: table-cell;
}
#mime_hits { z-index: 999; }
</style>
<table class="graphs"><tr><td>
<div id="container">
$mime_hits
</div>
</td><td>
};
$mime_hits = '';
%data = ();
$total_bytes ||= 1;
foreach my $mime (keys %mime_stat) {
if (($mime_stat{$mime}{bytes}/$total_bytes)*100 > $self->{MinPie}) {
$data{$mime} = int($mime_stat{$mime}{bytes}/1000000);
} else {
$data{'others'} += $mime_stat{$mime}{bytes};
}
}
$data{'others'} = int($data{'others'}/1000000);
$title = "$Translate{'Mime_graph_bytes_title'} $stat_date";
my $mime_bytes = $self->flotr2_piegraph(1, 'mime_bytes', $title, $Translate{'Mime_graph'}, '', %data);
print $out qq{
<div id="container">
$mime_bytes
</div>
</td></tr></table>
};
$mime_bytes = '';
%data = ();
print $out "<h3>$Translate{'Mime_number'}: $ntype</h3>\n";
print $out qq{
<table class="sortable stata">
<thead>
<tr>
<th>$Translate{'Mime_link'}</th>
<th>$Translate{'Requests'} (%)</th>
<th>$Translate{$self->{TransfertUnit}} (%)</th>
};
print $out qq{
<th>$Translate{'Cost'} $self->{Currency}</th>
} if ($self->{CostPrice});
print $out qq{
</tr>
</thead>
<tbody>
};
foreach (sort { $mime_stat{$b}{"$self->{OrderMime}"} <=> $mime_stat{$a}{"$self->{OrderMime}"} } keys %mime_stat) {
my $c_percent = '0.0';
$c_percent = sprintf("%2.2f", ($mime_stat{$_}{hits}/$total_count) * 100) if ($total_count);
my $b_percent = '0.0';
$b_percent = sprintf("%2.2f", ($mime_stat{$_}{bytes}/$total_bytes) * 100) if ($total_bytes);
my $total_cost = sprintf("%2.2f", int($mime_stat{$_}{bytes}/1000000) * $self->{CostPrice});
my $comma_bytes = $self->format_bytes($mime_stat{$_}{bytes});
print $out qq{
<tr>
<td>$_</td>
<td>$mime_stat{$_}{hits} <span class="italicPercent">($c_percent)</span></td>
<td>$comma_bytes <span class="italicPercent">($b_percent)</span></td>
};
print $out qq{
<td>$total_cost</td>
} if ($self->{CostPrice});
print $out qq{
</tr>};
}
$sortpos = 1;
$sortpos = 2 if ($self->{OrderMime} eq 'bytes');
print $out qq{
</tbody>
</table>
};
print $out qq{
<div class="uplink">
<a href="#atop"><span class="iconUpArrow">$Translate{'Up_link'}</span></a>
</div>
};
$self->_print_footer(\$out);
$out->close();
}
sub _print_network_stat
{
my ($self, $outdir, $year, $month, $day, $week) = @_;
print STDERR "\tNetwork statistics in $outdir...\n" if (!$self->{QuietMode});
$0 = "squid-analyzer: Printing network statistics in $outdir";
my $stat_date = $self->set_date($year, $month, $day);
my $type = 'hour';
if (!$day) {
$type = 'day';
}
if (!$month) {
$type = 'month';
}
if ($week) {
$type = 'day';
}
# Load code statistics
my $infile = new IO::File;
$infile->open("$outdir/stat_network.dat") || return;
my %network_stat = ();
my %detail_network_stat = ();
my %total_net_detail = ();
my $total_hit = 0;
my $total_bytes = 0;
my $total_duration = 0;
while (my $l = <$infile>) {
chomp($l);
my ($network, $data) = split(/\t/, $l);
if (!$data) {
# Assume backward compatibility
$l =~ s/^(.*)\shits_$type=/hits_$type=/;
$network = $1;
$data = $l;
}
$data =~ /^hits_$type=([^;]+);bytes_$type=([^;]+);duration_$type=([^;]+);largest_file_size=([^;]*);largest_file_url=(.*)/;
if ($self->{rebuild} && !exists $self->{NetworkAlias}->{$network}) {
next if (!$self->check_inclusions('', $network));
next if ($self->check_exclusions('', $network));
}
if ($self->{UpdateAlias}) {
# Replace network by his aliases if any
$network = (!$self->{has_network_alias}) ? $network : $self->apply_network_alias($network);
}
my $hits = $1 || '';
my $bytes = $2 || '';
my $duration = $3 || '';
$network_stat{$network}{largest_file} = $4;
$network_stat{$network}{url} = $5;
$hits =~ s/,$//;
$bytes =~ s/,$//;
$duration =~ s/,$//;
my %hits_tmp = split(/[:,]/, $hits);
foreach my $tmp (sort {$a <=> $b} keys %hits_tmp) {
$detail_network_stat{$network}{$tmp}{hits} = $hits_tmp{$tmp};
$total_net_detail{$tmp}{hits} += $hits_tmp{$tmp};
$network_stat{$network}{hits} += $hits_tmp{$tmp};
$total_hit += $hits_tmp{$tmp};
}
my %bytes_tmp = split(/[:,]/, $bytes);
foreach my $tmp (sort {$a <=> $b} keys %bytes_tmp) {
$detail_network_stat{$network}{$tmp}{bytes} = $bytes_tmp{$tmp};
$total_net_detail{$tmp}{bytes} += $bytes_tmp{$tmp};
$network_stat{$network}{bytes} += $bytes_tmp{$tmp};
$total_bytes += $bytes_tmp{$tmp};
}
my %duration_tmp = split(/[:,]/, $duration);
foreach my $tmp (sort {$a <=> $b} keys %duration_tmp) {
$detail_network_stat{$network}{$tmp}{duration} = $duration_tmp{$tmp};
$total_net_detail{$tmp}{duration} += $duration_tmp{$tmp};
$network_stat{$network}{duration} += $duration_tmp{$tmp};
$total_duration += $duration_tmp{$tmp};
}
}
$infile->close();
my $nnet = scalar keys %network_stat;
my $sortpos = 1;
$sortpos = 2 if ($self->{OrderNetwork} eq 'bytes');
$sortpos = 3 if ($self->{OrderNetwork} eq 'duration');
my $file = $outdir . '/network.html';
my $out = new IO::File;
$out->open(">$file") || $self->localdie("ERROR: Unable to open $file. $!\n");
# Print the HTML header
my $cal = 'SA_CALENDAR_SA';
$cal = '' if ($week);
$self->_print_header(\$out, $self->{menu}, $cal, $sortpos);
print $out $self->_print_title($Translate{'Network_title'}, $stat_date, $week);
my $last = '23';
my $first = '00';
my $title = $Translate{'Hourly'} || 'Hourly';
my $unit = $Translate{'Hours'} || 'Hours';
if ($type eq 'day') {
$last = '31';
$first = '01';
$title = $Translate{'Daily'} || 'Daily';
$unit = $Translate{'Days'} || 'Days';
} elsif ($type eq 'month') {
$last = '12';
$first = '01';
$title = $Translate{'Monthly'} || 'Monthly';
$unit = $Translate{'Months'} || 'Months';
}
my $trfunit = $self->{TransfertUnit} || 'B';
$trfunit = 'B' if ($trfunit eq 'BYTE');
print $out "<h3>$Translate{'Network_number'}: $nnet</h3>\n";
print $out qq{
<table class="sortable stata">
<thead>
<tr>
<th>$Translate{'Network_link'}</th>
<th>$Translate{'Requests'} (%)</th>
<th>$Translate{$self->{TransfertUnit}} (%)</th>
<th>$Translate{'Duration'} (%)</th>
<th>$Translate{'Throughput'} ($trfunit/s)</th>
};
print $out qq{
<th>$Translate{'Cost'} $self->{Currency}</th>
} if ($self->{CostPrice});
print $out qq{
<th>$Translate{'Users'}</th>
} if ($self->{UserReport});
print $out qq{
<th>$Translate{'Largest'}</th>
<th style="text-align: left;">$Translate{'Url'}</th>
</tr>
</thead>
<tbody>
};
if (!-d "$outdir/networks") {
mkdir("$outdir/networks", 0755) || return;
}
$total_duration = abs($total_duration);
foreach my $net (sort { $network_stat{$b}{"$self->{OrderNetwork}"} <=> $network_stat{$a}{"$self->{OrderNetwork}"} } keys %network_stat) {
my $h_percent = '0.0';
$h_percent = sprintf("%2.2f", ($network_stat{$net}{hits}/$total_hit) * 100) if ($total_hit);
my $b_percent = '0.0';
$b_percent = sprintf("%2.2f", ($network_stat{$net}{bytes}/$total_bytes) * 100) if ($total_bytes);
my $d_percent = '0.0';
$d_percent = sprintf("%2.2f", ($network_stat{$net}{duration}/$total_duration) * 100) if ($total_duration);
my $total_cost = sprintf("%2.2f", int($network_stat{$net}{bytes}/1000000) * $self->{CostPrice});
my $total_throughput = int($network_stat{$net}{bytes} / (($network_stat{$net}{duration}/1000) || 1) );
my $comma_throughput = $self->format_bytes($total_throughput);
$network_stat{$net}{duration} = &parse_duration(int($network_stat{$net}{duration}/1000));
my $show = (!$self->{has_network_alias}) ? $net : $self->apply_network_alias($net);
my $comma_bytes = $self->format_bytes($network_stat{$net}{bytes});
print $out qq{
<tr>
<td><a href="networks/$net/$net.html">$show</a></td>
<td>$network_stat{$net}{hits} <span class="italicPercent">($h_percent)</span></td>
<td>$comma_bytes <span class="italicPercent">($b_percent)</span></td>
<td>$network_stat{$net}{duration} <span class="italicPercent">($d_percent)</span></td>
<td>$comma_throughput</td>
};
print $out qq{
<td>$total_cost</td>
} if ($self->{CostPrice});
if (!-d "$outdir/networks/$net") {
mkdir("$outdir/networks/$net", 0755) || return;
}
my $outnet = new IO::File;
$outnet->open(">$outdir/networks/$net/$net.html") || return;
# Print the HTML header
my $cal = '';
$self->_print_header(\$outnet, $self->{menu2}, $cal, $sortpos);
print $outnet $self->_print_title("$Translate{'Network_title'} $show -", $stat_date, $week);
my @hits = ();
my @bytes = ();
for ("$first" .. "$last") {
if (exists $detail_network_stat{$net}{$_}{hits}) {
push(@hits, "[ $_, " . $detail_network_stat{$net}{$_}{hits} . " ]");
} else {
push(@hits, "[ $_, 0 ]");
}
if (exists $detail_network_stat{$net}{$_}{bytes}) {
push(@bytes, "[ $_, " . int($detail_network_stat{$net}{$_}{bytes}/1000000) . " ]");
} else {
push(@bytes, "[ $_, 0 ]");
}
}
delete $detail_network_stat{$net};
my $t1 = $Translate{'Graph_cache_hit_title'};
$t1 =~ s/\%s/$title $show/;
$t1 = "$t1 $stat_date";
my $xlabel = $unit || '';
my $ylabel = $Translate{'Requests_graph'} || 'Requests';
my $network_hits = $self->flotr2_bargraph(1, 'network_hits', $type, $t1, $xlabel, $ylabel,
join(',', @hits), $Translate{'Hit_graph'} );
@hits = ();
print $outnet qq{
<style>
#container {
display: table;
}
#row {
display: table-row;
}
#network_hits, #network_bytes {
display: table-cell;
}
#network_hits { z-index: 999; }
</style>
<table class="graphs"><tr><td>
<div id="container">
$network_hits
</div>
</td><td>
};
$network_hits = '';
$t1 = $Translate{'Graph_cache_byte_title'};
$t1 =~ s/\%s/$title/;
$t1 = "$t1 $stat_date";
$xlabel = $unit || '';
$ylabel = $Translate{'Megabytes_graph'} || $Translate{'Megabytes'};
my $network_bytes = $self->flotr2_bargraph(1, 'network_bytes', $type, $t1, $xlabel, $ylabel,
join(',', @bytes), $Translate{'Bytes'} );
@bytes = ();
print $outnet qq{
<div id="container">
$network_bytes
</div>
</td></tr></table>
};
$network_bytes = '';
my $retuser = $self->_print_netuser_stat($outdir, \$outnet, $net);
my $comma_largest = $self->format_bytes($network_stat{$net}{largest_file});
$retuser = '-' if ($retuser eq '');
$comma_largest = '-' if ($comma_largest eq '');
$network_stat{$net}{url} = '-' if ($network_stat{$net}{url} eq '');
print $out qq{
<td>$retuser</td>
} if ($self->{UserReport});
print $out qq{
<td>$comma_largest</td>
<td style="text-align: left;">$network_stat{$net}{url}</td>
</tr>
};
$sortpos = 1;
$sortpos = 2 if ($self->{OrderNetwork} eq 'bytes');
$sortpos = 3 if ($self->{OrderNetwork} eq 'duration');
print $outnet qq{
<div class="uplink">
<a href="#atop"><span class="iconUpArrow">$Translate{'Up_link'}</span></a>
</div>
};
$self->_print_footer(\$outnet);
$outnet->close();
}
print $out "</tbody></table>\n";
$sortpos = 1;
$sortpos = 2 if ($self->{OrderNetwork} eq 'bytes');
$sortpos = 3 if ($self->{OrderNetwork} eq 'duration');
print $out qq{
<div class="uplink">
<a href="#atop"><span class="iconUpArrow">$Translate{'Up_link'}</span></a>
</div>
};
$self->_print_footer(\$out);
$out->close();
}
sub _print_user_stat
{
my ($self, $outdir, $year, $month, $day, $week) = @_;
print STDERR "\tUser statistics in $outdir...\n" if (!$self->{QuietMode});
$0 = "squid-analyzer: Printing user statistics in $outdir";
my $stat_date = $self->set_date($year, $month, $day);
my $type = 'hour';
if (!$day) {
$type = 'day';
}
if (!$month) {
$type = 'month';
}
if ($week) {
$type = 'day';
}
# Load code statistics
my $infile = new IO::File;
$infile->open("$outdir/stat_user.dat") || return;
my %user_stat = ();
my %detail_user_stat = ();
my %total_user_detail = ();
my $total_hit = 0;
my $total_bytes = 0;
my $total_duration = 0;
while (my $l = <$infile>) {
chomp($l);
my ($user, $data) = split(/\s/, $l);
if ($self->{rebuild}) {
next if (!$self->check_inclusions($user));
next if ($self->check_exclusions($user));
}
# Anonymize all users
if ($self->{AnonymizeLogin} && ($user !~ /^Anon[a-zA-Z0-9]{16}$/)) {
if (!exists $self->{AnonymizedId}{$user}) {
$self->{AnonymizedId}{$user} = &anonymize_id();
}
$user = $self->{AnonymizedId}{$user};
}
my ($hits,$bytes,$duration,$largest_file,$url) = ($data =~ /hits_$type=([^;]+);bytes_$type=([^;]+);duration_$type=([^;]+);largest_file_size=([^;]*);largest_file_url=(.*)/);
$user_stat{$user}{largest_file} = $largest_file;
$user_stat{$user}{url} = $url;
$hits =~ s/,$//;
$bytes =~ s/,$//;
$duration =~ s/,$//;
my %hits_tmp = split(/[:,]/, $hits);
foreach my $tmp (sort {$a <=> $b} keys %hits_tmp) {
$detail_user_stat{$user}{$tmp}{hits} = $hits_tmp{$tmp};
$total_user_detail{$tmp}{hits} += $hits_tmp{$tmp};
$user_stat{$user}{hits} += $hits_tmp{$tmp};
$total_hit += $hits_tmp{$tmp};
}
my %bytes_tmp = split(/[:,]/, $bytes);
foreach my $tmp (sort {$a <=> $b} keys %bytes_tmp) {
$detail_user_stat{$user}{$tmp}{bytes} = $bytes_tmp{$tmp};
$total_user_detail{$tmp}{bytes} += $bytes_tmp{$tmp};
$user_stat{$user}{bytes} += $bytes_tmp{$tmp};
$total_bytes += $bytes_tmp{$tmp};
}
my %duration_tmp = split(/[:,]/, $duration);
foreach my $tmp (sort {$a <=> $b} keys %duration_tmp) {
$detail_user_stat{$user}{$tmp}{duration} = $duration_tmp{$tmp};
$total_user_detail{$tmp}{duration} += $duration_tmp{$tmp};
$user_stat{$user}{duration} += $duration_tmp{$tmp};
$total_duration += $duration_tmp{$tmp};
}
}
$infile->close();
# Store number of users
my $nuser = scalar keys %user_stat;
my $outf = new IO::File;
$outf->open(">>$outdir/stat_count.dat") || return;
flock($outf, 2) || die "FATAL: can't acquire lock on file $outdir/stat_count.dat, $!\n";
$outf->print("users:$nuser\n");
$outf->close;
my $file = $outdir . '/user.html';
my $out = new IO::File;
$out->open(">$file") || $self->localdie("ERROR: Unable to open $file. $!\n");
my $sortpos = 1;
$sortpos = 2 if ($self->{OrderUser} eq 'bytes');
$sortpos = 3 if ($self->{OrderUser} eq 'duration');
# Print the HTML header
my $cal = 'SA_CALENDAR_SA';
$cal = '' if ($week);
$self->_print_header(\$out, $self->{menu}, $cal, $sortpos);
my $last = '23';
my $first = '00';
my $title = $Translate{'Hourly'} || 'Hourly';
my $unit = $Translate{'Hours'} || 'Hours';
if ($type eq 'day') {
$last = '31';
$first = '01';
$title = $Translate{'Daily'} || 'Daily';
$unit = $Translate{'Days'} || 'Days';
} elsif ($type eq 'month') {
$last = '12';
$first = '01';
$title = $Translate{'Monthly'} || 'Monthly';
$unit = $Translate{'Months'} || 'Months';
}
%total_user_detail = ();
print $out $self->_print_title($Translate{'User_title'}, $stat_date, $week);
print $out "<h3>$Translate{'User_number'}: $nuser</h3>\n";
my $trfunit = $self->{TransfertUnit} || 'B';
$trfunit = 'B' if ($trfunit eq 'BYTE');
my $user_ip_title = '';
$user_ip_title = "<th>$Translate{'User_Ip'}</th>" if ($self->{StoreUserIp});
print $out qq{
<table class="sortable stata" >
<thead>
<tr>
<th>$Translate{'Users'}</th>
<th>$Translate{'Requests'} (%)</th>
<th>$Translate{$self->{TransfertUnit}} (%)</th>
<th>$Translate{'Duration'} (%)</th>
<th>$Translate{'Throughput'} ($trfunit/s)</th>
};
print $out qq{
<th>$Translate{'Cost'} $self->{Currency}</th>
} if ($self->{CostPrice});
print $out qq{
<th>$Translate{'Largest'}</th>
<th style="text-align: left;">$Translate{'Url'}</th>
$user_ip_title
</tr>
</thead>
<tbody>
};
if (!-d "$outdir/users") {
mkdir("$outdir/users", 0755) || return;
}
$total_duration = abs($total_duration);
foreach my $usr (sort { $user_stat{$b}{"$self->{OrderUser}"} <=> $user_stat{$a}{"$self->{OrderUser}"} } keys %user_stat) {
next if (!$user_stat{$usr}{hits}); # Entries with no hits may be reject
my $h_percent = '0.0';
$h_percent = sprintf("%2.2f", ($user_stat{$usr}{hits}/$total_hit) * 100) if ($total_hit);
my $b_percent = '0.0';
$b_percent = sprintf("%2.2f", ($user_stat{$usr}{bytes}/$total_bytes) * 100) if ($total_bytes);
my $d_percent = '0.0';
$d_percent = sprintf("%2.2f", ($user_stat{$usr}{duration}/$total_duration) * 100) if ($total_duration);
my $total_cost = sprintf("%2.2f", int($user_stat{$usr}{bytes}/1000000) * $self->{CostPrice});
my $total_throughput = int($user_stat{$usr}{bytes} / (($user_stat{$usr}{duration}/1000) || 1));
my $comma_throughput = $self->format_bytes($total_throughput);
$user_stat{$usr}{duration} = &parse_duration(int($user_stat{$usr}{duration}/1000));
my $show = (!$self->{has_user_alias}) ? $usr : $self->apply_user_alias($usr);
$show =~ s/_SPC_/ /g;
my $upath = &escape($usr);
my $comma_bytes = $self->format_bytes($user_stat{$usr}{bytes});
if ($self->{UrlReport}) {
print $out qq{
<tr>
<td><a href="users/$upath/$upath.html">$show</a></td>
};
} else {
print $out qq{
<tr>
<td>$show</td>
};
}
print $out qq{
<td>$user_stat{$usr}{hits} <span class="italicPercent">($h_percent)</span></td>
<td>$comma_bytes <span class="italicPercent">($b_percent)</span></td>
<td>$user_stat{$usr}{duration} <span class="italicPercent">($d_percent)</span></td>
<td>$comma_throughput</td>
};
print $out qq{
<td>$total_cost</td>
} if ($self->{CostPrice});
my $comma_largest = $self->format_bytes($user_stat{$usr}{largest_file});
print $out qq{
<td>$comma_largest</td>
<td style="text-align: left;">$user_stat{$usr}{url}</td>
};
if (!-d "$outdir/users/$upath") {
mkdir("$outdir/users/$upath", 0755) || return;
}
my $outusr = new IO::File;
$outusr->open(">$outdir/users/$upath/$upath.html") || return;
# Print the HTML header
my $cal = '';
$self->_print_header(\$outusr, $self->{menu2}, $cal, $sortpos);
my $usr_lbl = $usr;
$usr_lbl =~ s/_SPC_/ /g;
print $outusr $self->_print_title("$Translate{'User_title'} $usr_lbl -", $stat_date, $week);
my @hits = ();
my @bytes = ();
for my $d ("$first" .. "$last") {
if (exists $detail_user_stat{$usr}{$d}{hits}) {
push(@hits, "[ $d, $detail_user_stat{$usr}{$d}{hits} ]");
} else {
push(@hits, "[ $d, 0 ]");
}
if (exists $detail_user_stat{$usr}{$d}{bytes}) {
push(@bytes, "[ $d, " . int($detail_user_stat{$usr}{$d}{bytes}/1000000) . " ]");
} else {
push(@bytes, "[ $d, 0 ]");
}
}
delete $detail_user_stat{$usr};
my $t1 = $Translate{'Graph_cache_hit_title'};
$t1 =~ s/\%s/$title $show/;
$t1 = "$t1 $stat_date";
my $xlabel = $unit || '';
my $ylabel = $Translate{'Requests_graph'} || 'Requests';
my $user_hits = $self->flotr2_bargraph(1, 'user_hits', $type, $t1, $xlabel, $ylabel,
join(',', @hits), $Translate{'Hit_graph'});
@hits = ();
print $outusr qq{
<style>
#container {
display: table;
}
#row {
display: table-row;
}
#user_hits, #user_bytes {
display: table-cell;
}
#user_hits { z-index: 999; }
</style>
<table class="graphs"><tr><td>
<div id="container">
$user_hits
</div>
</td><td>
};
$user_hits = '';
$t1 = $Translate{'Graph_cache_byte_title'};
$t1 =~ s/\%s/$title $show/;
$t1 = "$t1 $stat_date";
$xlabel = $unit || '';
$ylabel = $Translate{'Megabytes_graph'} || $Translate{'Megabytes'};
my $user_bytes = $self->flotr2_bargraph(1, 'user_bytes', $type, $t1, $xlabel, $ylabel,
join(',', @bytes), $Translate{'Bytes'});
@bytes = ();
print $outusr qq{
<div id="container">
$user_bytes
</div>
</td></tr></table>
};
$user_bytes = '';
delete $user_stat{$usr};
my %user_ips = ();
if ($self->{UrlReport}) {
$self->_print_user_denied_detail(\$outusr, $outdir, $usr, $type);
%user_ips = $self->_print_user_detail(\$outusr, $outdir, $usr, $type);
}
$self->_print_footer(\$outusr);
$outusr->close();
if ($self->{StoreUserIp}) {
print $out "<td>", join(',', sort { $user_ips{$b} <=> $user_ips{$a} } keys %user_ips), "</td>\n";
}
print $out qq{
</tr>
};
}
$sortpos = 1;
$sortpos = 2 if ($self->{OrderUser} eq 'bytes');
$sortpos = 3 if ($self->{OrderUser} eq 'duration');
print $out qq{
</tbody>
</table>
};
print $out qq{
<div class="uplink">
<a href="#atop"><span class="iconUpArrow">$Translate{'Up_link'}</span></a>
</div>
};
$self->_print_footer(\$out);
$out->close();
return $nuser;
}
sub _print_netuser_stat
{
my ($self, $outdir, $out, $usrnet) = @_;
$0 = "squid-analyzer: Printing network user statistics in $outdir";
# Load code statistics
my $infile = new IO::File;
$infile->open("$outdir/stat_netuser.dat") || return;
my %netuser_stat = ();
my $total_hit = 0;
my $total_bytes = 0;
my $total_duration = 0;
while(my $l = <$infile>) {
chomp($l);
my ($network, $user, $data) = split(/\t/, $l);
if (!$data) {
# Assume backward compatibility
$l =~ s/^(.*)\s([^\s]+)\shits=/hits=/;
$network = $1;
$user = $2;
$data = $l;
}
next if ($network ne $usrnet);
if ($self->{rebuild}) {
next if (!$self->check_inclusions($user, $network));
next if ($self->check_exclusions($user, $network));
}
# Anonymize all users
if ($self->{AnonymizeLogin} && ($user !~ /^Anon[a-zA-Z0-9]{16}$/)) {
if (!exists $self->{AnonymizedId}{$user}) {
$self->{AnonymizedId}{$user} = &anonymize_id();
}
$user = $self->{AnonymizedId}{$user};
}
$data =~ /^hits=(\d+);bytes=(\d+);duration=([\-\d]+);largest_file_size=([^;]*);largest_file_url=(.*)/;
$netuser_stat{$user}{hits} = $1;
$netuser_stat{$user}{bytes} = $2;
$netuser_stat{$user}{duration} = abs($3);
$netuser_stat{$user}{largest_file} = $4;
$total_hit += $1;
$total_bytes += $2;
$total_duration += $3;
$netuser_stat{$user}{url} = $5;
}
$infile->close();
my $nuser = scalar keys %netuser_stat;
my $trfunit = $self->{TransfertUnit} || 'B';
$trfunit = 'B' if ($trfunit eq 'BYTE');
print $$out qq{
<h3>$Translate{'User_number'}: $nuser</h3>
};
print $$out qq{
<table class="sortable stata">
<thead>
<tr>
<th>$Translate{'Users'}</th>
<th>$Translate{'Requests'} (%)</th>
<th>$Translate{$self->{TransfertUnit}} (%)</th>
<th>$Translate{'Duration'} (%)</th>
<th>$Translate{'Throughput'} ($trfunit/s)</th>
};
print $$out qq{
<th>$Translate{'Cost'} $self->{Currency}</th>
} if ($self->{CostPrice});
print $$out qq{
<th>$Translate{'Largest'}</th>
<th style="text-align: left;">$Translate{'Url'}</th>
</tr>
</thead>
<tbody>
};
$total_duration = abs($total_duration);
foreach my $usr (sort { $netuser_stat{$b}{"$self->{OrderUser}"} <=> $netuser_stat{$a}{"$self->{OrderUser}"} } keys %netuser_stat) {
next if (!$netuser_stat{$usr}{hits});
my $h_percent = '0.0';
$h_percent = sprintf("%2.2f", ($netuser_stat{$usr}{hits}/$total_hit) * 100) if ($total_hit);
my $b_percent = '0.0';
$b_percent = sprintf("%2.2f", ($netuser_stat{$usr}{bytes}/$total_bytes) * 100) if ($total_bytes);
my $d_percent = '0.0';
$d_percent = sprintf("%2.2f", ($netuser_stat{$usr}{duration}/$total_duration) * 100) if ($total_duration);
my $total_cost = sprintf("%2.2f", int($netuser_stat{$usr}{bytes}/1000000) * $self->{CostPrice});
my $total_throughput = int($netuser_stat{$usr}{bytes} / (($netuser_stat{$usr}{duration}/1000) || 1) );
my $comma_throughput = $self->format_bytes($total_throughput);
$netuser_stat{$usr}{duration} = &parse_duration(int($netuser_stat{$usr}{duration}/1000));
my $show = (!$self->{has_user_alias}) ? $usr : $self->apply_user_alias($usr);
$show =~ s/_SPC_/ /g;
my $url = &escape($usr);
my $comma_bytes = $self->format_bytes($netuser_stat{$usr}{bytes});
if ($self->{UrlReport}) {
print $$out qq{
<tr>
<td><a href="../../users/$url/$url.html">$show</a></td>
};
} else {
print $$out qq{
<tr>
<td>$show</td>
};
}
print $$out qq{
<td>$netuser_stat{$usr}{hits} <span class="italicPercent">($h_percent)</span></td>
<td>$comma_bytes <span class="italicPercent">($b_percent)</span></td>
<td>$netuser_stat{$usr}{duration} <span class="italicPercent">($d_percent)</span></td>
<td>$comma_throughput</td>
};
print $$out qq{
<td>$total_cost</td>
} if ($self->{CostPrice});
my $comma_largest = $self->format_bytes($netuser_stat{$usr}{largest_file});
print $$out qq{
<td>$comma_largest</td>
<td style="text-align: left;">$netuser_stat{$usr}{url}</td>
</tr>};
}
print $$out qq{
</tbody>
</table>
};
return $nuser;
}
sub _print_user_detail
{
my ($self, $out, $outdir, $usr, $type) = @_;
$0 = "squid-analyzer: Printing user details statistics in $outdir";
# Load user statistics
my $infile = new IO::File;
$infile->open("$outdir/stat_user_url.dat") || return;
my %url_stat = ();
my $total_hit = 0;
my $total_bytes = 0;
my $total_duration = 0;
my $total_cache_hit = 0;
my $total_cache_bytes = 0;
my $ok = 0;
while(my $l = <$infile>) {
chomp($l);
my ($user, $data) = split(/\s/, $l);
last if (($user ne $usr) && $ok);
next if ($user ne $usr);
$ok = 1;
if ($self->{rebuild}) {
next if (!$self->check_inclusions($user));
next if ($self->check_exclusions($user));
}
if ($data =~ /hits=(\d+);bytes=(\d+);duration=([\-\d]+);first=([^;]*);last=([^;]*);url=(.*?);cache_hit=(\d*);cache_bytes=(\d*)/) {
my $url = $6;
$url_stat{$url}{hits} += $1;
$url_stat{$url}{bytes} += $2;
$url_stat{$url}{duration} += abs($3);
$url_stat{$url}{firsthit} = $4 if (!$url_stat{$url}{firsthit} || ($4 < $url_stat{$url}{firsthit}));
$url_stat{$url}{lasthit} = $5 if (!$url_stat{$url}{lasthit} || ($5 > $url_stat{$url}{lasthit}));
$url_stat{$url}{cache_hit} += $7;
$url_stat{$url}{cache_bytes} += $8;
if ($self->check_exclusions('','',$url)) {
delete $url_stat{$url};
next;
}
$total_hit += $url_stat{$url}{hits} || 0;
$total_bytes += $url_stat{$url}{bytes} || 0;
$total_duration += $url_stat{$url}{duration} || 0;
$total_cache_hit += $url_stat{$url}{cache_hit} || 0;
$total_cache_bytes += $url_stat{$url}{cache_bytes} || 0;
if ($data =~ s/;user_ip=(.*)//) {
push(@{$url_stat{$url}{user_ip}}, split(/,/, $1));
}
if ($data =~ /;arr_last=(.*)/) {
push(@{$url_stat{$url}{arr_last}}, split(/,/, $1));
map { $_ = ucfirst(strftime("%b %d %T", CORE::localtime($_))); } @{$url_stat{$url}{arr_last}};
}
} elsif ($data =~ /hits=(\d+);bytes=(\d+);duration=([\-\d]+);first=([^;]*);last=([^;]*);url=(.*)/) {
my $url = $6;
$url_stat{$6}{hits} += $1;
$url_stat{$6}{bytes} += $2;
$url_stat{$6}{duration} += abs($3);
$url_stat{$6}{firsthit} = $4 if (!$url_stat{$6}{firsthit} || ($4 < $url_stat{$6}{firsthit}));
$url_stat{$6}{lasthit} = $5 if (!$url_stat{$6}{lasthit} || ($5 > $url_stat{$6}{lasthit}));
if ($self->{rebuild}) {
if ($self->check_exclusions('','',$url)) {
delete $url_stat{$url};
next;
}
}
$total_hit += $url_stat{$url}{hits} || 0;
$total_bytes += $url_stat{$url}{bytes} || 0;
$total_duration += $url_stat{$url}{duration} || 0;
} elsif ($data =~ /hits=(\d+);bytes=(\d+);duration=([\-\d]+);url=(.*)/) {
my $url = $4;
$url_stat{$4}{hits} += $1;
$url_stat{$4}{bytes} += $2;
$url_stat{$4}{duration} = abs($3);
if ($self->{rebuild}) {
if ($self->check_exclusions('','',$url)) {
delete $url_stat{$url};
next;
}
}
$total_hit += $url_stat{$url}{hits} || 0;
$total_bytes += $url_stat{$url}{bytes} || 0;
$total_duration += $url_stat{$url}{duration} || 0;
}
}
$infile->close();
my $trfunit = $self->{TransfertUnit} || 'B';
$trfunit = 'B' if ($trfunit eq 'BYTE');
my $nurl = scalar keys %url_stat;
my $t1 = $Translate{"Url_title"};
$t1 =~ s/\%d/$self->{TopNumber}\/$nurl/;
my $ip_user_title = '';
$ip_user_title = "<th>$Translate{'User_Ip'}</th>" if ($self->{StoreUserIp});
print $$out qq{
<h3>$t1</h3>
<table class="sortable stata">
<thead>
<tr>
<th>$Translate{'Url'}</th>
<th>$Translate{'Requests'} (%)</th>
<th>$Translate{$self->{TransfertUnit}} (%)</th>
<th>$Translate{'Duration'} (%)</th>
<th>$Translate{'Throughput'} ($trfunit/s)</th>
<th>$Translate{'Last_visit'}</th>
};
print $$out qq{
<th>$Translate{'First_visit'}</th>
} if ($type eq 'hour');
print $$out qq{
<th>$Translate{'Cost'} $self->{Currency}</th>
} if ($self->{CostPrice});
print $$out qq{$ip_user_title
</tr>
</thead>
<tbody>
};
$total_duration = abs($total_duration);
my %all_ips = ();
my $i = 0;
foreach my $url (sort { $url_stat{$b}{"$self->{OrderUrl}"} <=> $url_stat{$a}{"$self->{OrderUrl}"} } keys %url_stat) {
my $h_percent = '0.0';
$h_percent = sprintf("%2.2f", ($url_stat{$url}{hits}/$total_hit) * 100) if ($total_hit);
my $b_percent = '0.0';
$b_percent = sprintf("%2.2f", ($url_stat{$url}{bytes}/$total_bytes) * 100) if ($total_bytes);
my $d_percent = '0.0';
$d_percent = sprintf("%2.2f", ($url_stat{$url}{duration}/$total_duration) * 100) if ($total_duration);
my $total_cost = sprintf("%2.2f", int($url_stat{$url}{bytes}/1000000) * $self->{CostPrice});
my $comma_bytes = $self->format_bytes($url_stat{$url}{bytes});
my $total_throughput = int($url_stat{$url}{bytes} / (($url_stat{$url}{duration}/1000) || 1) );
my $comma_throughput = $self->format_bytes($total_throughput);
$url_stat{$url}{duration} = &parse_duration(int($url_stat{$url}{duration}/1000));
my $firsthit = '-';
if ($url_stat{$url}{firsthit}) {
$firsthit = ucfirst(strftime("%b %d %T", CORE::localtime($url_stat{$url}{firsthit})));
}
my $lasthit = '-';
if ($url_stat{$url}{lasthit}) {
$lasthit = ucfirst(strftime("%b %d %T", CORE::localtime($url_stat{$url}{lasthit})));
}
if ($type eq 'hour') {
if ($url_stat{$url}{firsthit}) {
$firsthit = ucfirst(strftime("%T", CORE::localtime($url_stat{$url}{firsthit})));
} else {
$firsthit = '-';
}
if ($url_stat{$url}{lasthit}) {
$lasthit = ucfirst(strftime("%T", CORE::localtime($url_stat{$url}{lasthit})));
} else {
$firsthit = '-';
}
}
if (exists $url_stat{$url}{arr_last}) {
$lasthit = qq{
<div class="tooltipLink"><span class="information">$lasthit</span><div class="tooltip">
<table><tr><th>$Translate{'Last_visit'}</th></tr>
};
foreach my $tm (reverse @{$url_stat{$url}{arr_last}}) {
$lasthit .= "<tr><td>$tm</td></tr>\n";
}
$lasthit .= "</table>\n</div></div>\n";
}
my $ip_user = '<td>-</td>' if ($self->{StoreUserIp});
$ip_user = "<td>" . join(',', @{$url_stat{$url}{user_ip}}) . "</td>" if ($self->{StoreUserIp} && $#{$url_stat{$url}{user_ip}} >= 0);
foreach my $ip (@{$url_stat{$url}{user_ip}}) {
$all_ips{$ip}++;
}
my $show = "<a href=\"http://$url/\" target=\"_blank\" class=\"domainLink\">$url</a>";
$show = $url if (exists $self->{UrlAliasName}{$url});
print $$out qq{
<tr>
<td>$show</td>
<td>$url_stat{$url}{hits} <span class="italicPercent">($h_percent)</span></td>
<td>$comma_bytes <span class="italicPercent">($b_percent)</span></td>
<td>$url_stat{$url}{duration} <span class="italicPercent">($d_percent)</span></td>
<td>$comma_throughput</td>
<td>$lasthit</td>
};
print $$out qq{
<td>$firsthit</td>
} if ($type eq 'hour');
print $$out qq{
<td>$total_cost</td>
} if ($self->{CostPrice});
print $$out qq{$ip_user
</tr>};
$i++;
last if ($i > $self->{TopNumber});
}
my $sortpos = 1;
print $$out qq{
</tbody>
</table>
};
return %all_ips;
}
sub _print_user_denied_detail
{
my ($self, $out, $outdir, $usr, $type) = @_;
$0 = "squid-analyzer: Printing user denied statistics in $outdir";
# Load user URL statistics
my $infile = new IO::File;
$infile->open("$outdir/stat_denied_url.dat") || return;
my %denied_stat = ();
my $total_hits = 0;
while (my $l = <$infile>) {
chomp($l);
my ($user, $data) = split(/\s/, $l);
next if ($user ne $usr);
if ($self->{rebuild}) {
next if (!$self->check_inclusions($user));
next if ($self->check_exclusions($user));
}
if ($data =~ /hits=(\d+);first=([^;]*);last=([^;]*);url=(.*);blacklist=(.*)/) {
my $hits = $1;
my $firsthit = $2;
my $lasthit = $3;
my $url = $4;
my $blacklist = $5;
if ($self->{rebuild}) {
next if ($self->check_exclusions('','',$url));
}
$total_hits += $hits;
$denied_stat{$url}{hits} += $hits;
if ($lasthit =~ /^(\d{10})\d+/) {
$lasthit = $1;
}
if ($firsthit =~ /^(\d{10})\d+/) {
$firsthit = $1;
}
$denied_stat{$url}{firsthit} = $firsthit if (!$denied_stat{$url}{firsthit} || ($firsthit < $denied_stat{$url}{firsthit}));
$denied_stat{$url}{lasthit} = $lasthit if (!$denied_stat{$url}{lasthit} || ($lasthit > $denied_stat{$url}{lasthit}));
$denied_stat{$url}{users}{$user}++ if ($self->{TopUrlUser} && $self->{UserReport});
if ($blacklist) {
my %tmp = split(/,/, $blacklist);
foreach my $k (keys %tmp) {
$denied_stat{$url}{blacklist}{$k} += $tmp{$k};
$denied_stat{$url}{users}{$user}{blacklist}{$k} += $tmp{$k} if ($self->{TopUrlUser} && $self->{UserReport});
}
}
}
}
$infile->close();
return if (!$total_hits);
print $$out qq{
<h3>$Translate{'Top_denied_link'}</h3>
<table class="sortable stata">
<thead>
<tr>
<th>$Translate{'Url'}</th>
<th>Blocklist ACLs</th>
<th>$Translate{'Requests'} (%)</th>
<th>$Translate{'Last_visit'}</th>
};
print $$out qq{
<th>$Translate{'First_visit'}</th>
} if ($type eq 'hour');
print $$out qq{
</tr>
</thead>
<tbody>
};
my $i = 0;
foreach my $u (sort { $denied_stat{$b}{hits} <=> $denied_stat{$a}{hits} } keys %denied_stat) {
my $h_percent = '0.0';
$h_percent = sprintf("%2.2f", ($denied_stat{$u}{hits}/$total_hits) * 100) if ($total_hits);
my $firsthit = '-';
if ($denied_stat{$u}{firsthit} && ($denied_stat{$u}{firsthit} =~ /^\d{10}(\.\d{3})?$/)) {
$firsthit = ucfirst(strftime("%b %d %T", CORE::localtime($denied_stat{$u}{firsthit})));
}
my $lasthit = '-';
if ($denied_stat{$u}{lasthit} && ($denied_stat{$u}{lasthit} =~ /^\d{10}(\.\d{3})?$/)) {
$lasthit = ucfirst(strftime("%b %d %T", CORE::localtime($denied_stat{$u}{lasthit})));
}
if ($type eq 'hour') {
if ($denied_stat{$u}{firsthit} && ($denied_stat{$u}{firsthit} =~ /^\d{10}(\.\d{3})?$/)) {
$firsthit = ucfirst(strftime("%T", CORE::localtime($denied_stat{$u}{firsthit})));
} else {
$firsthit = '-';
}
if ($denied_stat{$u}{lasthit} && ($denied_stat{$u}{lasthit} =~ /^\d{10}(\.\d{3})?$/)) {
$lasthit = ucfirst(strftime("%T", CORE::localtime($denied_stat{$u}{lasthit})));
} else {
$firsthit = '-';
}
}
my $bl = '-';
if (exists $denied_stat{$u}{blacklist}) {
$bl = '';
foreach my $k (sort keys %{$denied_stat{$u}{blacklist}}) {
$bl .= $k . '=' . $denied_stat{$u}{blacklist}{$k} . ' ';
}
}
print $$out qq{
<tr><td>
<a href="http://$u/" target="_blank" class="domainLink">$u</a>
</td>
<td>$bl</td>
<td>$denied_stat{$u}{hits} <span class="italicPercent">($h_percent)</span></td>
<td>$lasthit</td>
};
print $$out qq{
<td>$firsthit</td>
} if ($type eq 'hour');
$i++;
last if ($i > $self->{TopNumber});
}
print $$out qq{
</tbody>
</table>
};
}
sub _print_top_url_stat
{
my ($self, $outdir, $year, $month, $day, $week) = @_;
print STDERR "\tTop URL statistics in $outdir...\n" if (!$self->{QuietMode});
$0 = "squid-analyzer: Printing top urls statistics in $outdir";
my $stat_date = $self->set_date($year, $month, $day);
my $type = 'hour';
if (!$day) {
$type = 'day';
}
if (!$month) {
$type = 'month';
}
if ($week) {
$type = 'day';
}
# Load user URL statistics
my $infile = new IO::File;
$infile->open("$outdir/stat_user_url.dat") || return;
my %url_stat = ();
my $total_hits = 0;
my $total_bytes = 0;
my $total_duration = 0;
my $total_cache_hit = 0;
my $total_cache_bytes = 0;
while (my $l = <$infile>) {
chomp($l);
my ($user, $data) = split(/\s/, $l);
if ($self->{rebuild}) {
next if (!$self->check_inclusions($user));
next if ($self->check_exclusions($user));
}
# Anonymize all users
if ($self->{UserReport}) {
if ($self->{AnonymizeLogin} && ($user !~ /^Anon[a-zA-Z0-9]{16}$/)) {
if (!exists $self->{AnonymizedId}{$user}) {
$self->{AnonymizedId}{$user} = &anonymize_id();
}
$user = $self->{AnonymizedId}{$user};
}
} else {
$user = '-';
}
my $url = '';
my $hits = 0;
my $bytes = 0;
my $duration = 0;
if ($data =~ /hits=(\d+);bytes=(\d+);duration=([\-\d]+);first=([^;]*);last=([^;]*);url=(.*?);cache_hit=(\d*);cache_bytes=(\d*)/) {
$url = $6;
$hits = $1;
$bytes = $2;
$duration = abs($3);
$url_stat{$url}{firsthit} = $4 if (!$url_stat{$url}{firsthit} || ($4 < $url_stat{$url}{firsthit}));
$url_stat{$url}{lasthit} = $5 if (!$url_stat{$url}{lasthit} || ($5 > $url_stat{$url}{lasthit}));
$url_stat{$url}{cache_hit} += $7;
$url_stat{$url}{cache_bytes} += $8;
if ($self->{rebuild}) {
if ($self->check_exclusions('','',$url)) {
delete $url_stat{$url};
next;
}
}
$total_cache_hit += $url_stat{$url}{cache_hit} || 0;
$total_cache_bytes += $url_stat{$url}{cache_bytes} || 0;
} elsif ($data =~ /hits=(\d+);bytes=(\d+);duration=([\-\d]+);first=([^;]*);last=([^;]*);url=(.*)/) {
$url = $6;
$hits = $1;
$bytes = $2;
$duration = abs($3);
$url_stat{$url}{firsthit} = $4 if (!$url_stat{$url}{firsthit} || ($4 < $url_stat{$url}{firsthit}));
$url_stat{$url}{lasthit} = $5 if (!$url_stat{$url}{lasthit} || ($5 > $url_stat{$url}{lasthit}));
if ($self->{rebuild}) {
if ($self->check_exclusions('','',$url)) {
delete $url_stat{$url};
next;
}
}
} elsif ($data =~ /hits=(\d+);bytes=(\d+);duration=([\-\d]+);url=(.*)/) {
$url = $4;
$hits = $1;
$bytes = $2;
$duration = abs($3);
if ($self->{rebuild}) {
if ($self->check_exclusions('','',$url)) {
delete $url_stat{$url};
next;
}
}
}
$url_stat{$url}{hits} += $hits;
$url_stat{$url}{bytes} += $bytes;
$url_stat{$url}{duration} += $duration;
$total_hits += $url_stat{$url}{hits} || 0;
$total_bytes += $url_stat{$url}{bytes} || 0;
$total_duration += $url_stat{$url}{duration} || 0;
if ($url && $self->{TopUrlUser} && $self->{UserReport}) {
$url_stat{$url}{users}{$user}{hits} += $hits;
$url_stat{$url}{users}{$user}{bytes} += $bytes;
$url_stat{$url}{users}{$user}{duration} += $bytes;
}
}
$infile->close();
# Store number of urls
my $nurl = scalar keys %url_stat;
my $outf = new IO::File;
$outf->open(">>$outdir/stat_count.dat") || return;
flock($outf, 2) || die "FATAL: can't acquire lock on file $outdir/stat_count.dat, $!\n";
$outf->print("urls:$nurl\n");
$outf->close;
my $file = $outdir . '/url.html';
my $out = new IO::File;
$out->open(">$file") || $self->localdie("ERROR: Unable to open $file. $!\n");
my $sortpos = 1;
my $trfunit = $self->{TransfertUnit} || 'B';
$trfunit = 'B' if ($trfunit eq 'BYTE');
# Print the HTML header
my $cal = 'SA_CALENDAR_SA';
$cal = '' if ($week);
$self->_print_header(\$out, $self->{menu}, $cal, $sortpos);
print $out "<h3>$Translate{'Url_number'}: $nurl</h3>\n";
for my $tpe ('Hits', 'Bytes', 'Duration') {
my $t1 = $Translate{"Url_${tpe}_title"};
$t1 =~ s/\%d/$self->{TopNumber}/;
if ($tpe eq 'Hits') {
print $out $self->_print_title($t1, $stat_date, $week);
} else {
print $out "<h4>$t1 $stat_date</h4><div class=\"line-separator\"></div>\n";
}
print $out qq{
<table class="sortable stata">
<thead>
<tr>
<th>$Translate{'Url'}</th>
};
if ($tpe eq 'Hits') {
print $out qq{
<th>$Translate{'Requests'} (%)</th>
<th>$Translate{$self->{TransfertUnit}} (%)</th>
<th>$Translate{'Duration'} (%)</th>
}
} elsif ($tpe eq 'Bytes') {
print $out qq{
<th>$Translate{$self->{TransfertUnit}} (%)</th>
<th>$Translate{'Requests'} (%)</th>
<th>$Translate{'Duration'} (%)</th>
}
} else {
print $out qq{
<th>$Translate{'Duration'} (%)</th>
<th>$Translate{'Requests'} (%)</th>
<th>$Translate{$self->{TransfertUnit}} (%)</th>
}
}
print $out qq{
<th>$Translate{'Throughput'} ($trfunit/s)</th>
<th>$Translate{'Last_visit'}</th>
};
print $out qq{
<th>$Translate{'First_visit'}</th>
} if ($type eq 'hour');
print $out qq{
<th>$Translate{'Cost'} $self->{Currency}</th>
} if ($self->{CostPrice});
print $out qq{
</tr>
</thead>
<tbody>
};
$total_duration = abs($total_duration);
my $i = 0;
foreach my $u (sort { $url_stat{$b}{"\L$tpe\E"} <=> $url_stat{$a}{"\L$tpe\E"} } keys %url_stat) {
next if (!$url_stat{$u}{hits});
my $h_percent = '0.0';
$h_percent = sprintf("%2.2f", ($url_stat{$u}{hits}/$total_hits) * 100) if ($total_hits);
my $b_percent = '0.0';
$b_percent = sprintf("%2.2f", ($url_stat{$u}{bytes}/$total_bytes) * 100) if ($total_bytes);
my $d_percent = '0.0';
$d_percent = sprintf("%2.2f", ($url_stat{$u}{duration}/$total_duration) * 100) if ($total_duration);
my $total_cost = sprintf("%2.2f", int($url_stat{$u}{bytes}/1000000) * $self->{CostPrice});
my $duration = &parse_duration(int($url_stat{$u}{duration}/1000));
my $comma_bytes = $self->format_bytes($url_stat{$u}{bytes});
my $total_throughput = int($url_stat{$u}{bytes} / (($url_stat{$u}{duration}/1000) || 1));
my $comma_throughput = $self->format_bytes($total_throughput);
my $firsthit = '-';
if ($url_stat{$u}{firsthit}) {
$firsthit = ucfirst(strftime("%b %d %T", CORE::localtime($url_stat{$u}{firsthit})));
}
my $lasthit = '-';
if ($url_stat{$u}{lasthit}) {
$lasthit = ucfirst(strftime("%b %d %T", CORE::localtime($url_stat{$u}{lasthit})));
}
if ($type eq 'hour') {
if ($url_stat{$u}{firsthit}) {
$firsthit = ucfirst(strftime("%T", CORE::localtime($url_stat{$u}{firsthit})));
} else {
$firsthit = '-';
}
if ($url_stat{$u}{lasthit}) {
$lasthit = ucfirst(strftime("%T", CORE::localtime($url_stat{$u}{lasthit})));
} else {
$firsthit = '-';
}
}
print $out "<tr><td>\n";
if (exists $url_stat{$u}{users} && $self->{UserReport}) {
my $label = 'Duration';
if ($tpe eq 'Bytes') {
$label = 'Megabytes';
} elsif ($tpe eq 'Hits') {
$label = 'Requests';
}
my $show = "<a href=\"http://$u/\" target=\"_blank\" class=\"domainLink\">$u</a>";
$show = $u if (exists $self->{UrlAliasName}{$u});
print $out "<div class=\"tooltipLink\"><span class=\"information\">$u</span><div class=\"tooltip\"><table><tr><th>$Translate{'User'}</th><th>$Translate{$label}</th></tr>\n";
my $k = 1;
foreach my $user (sort { $url_stat{$u}{users}{$b}{lc($tpe)} <=> $url_stat{$u}{users}{$a}{lc($tpe)} } keys %{$url_stat{$u}{users}}) {
my $value = $url_stat{$u}{users}{$user}{lc($tpe)};
if ($tpe eq 'Bytes') {
$value = $self->format_bytes($value);
} elsif ($tpe eq 'Duration') {
$value = &parse_duration(int($value/1000));
}
my $usr_lbl = $user;
$usr_lbl =~ s/_SPC_/ /g;
print $out "<tr><td>$usr_lbl</td><td>$value</td></tr>\n";
$k++;
last if ($k > $self->{TopUrlUser});
}
print $out "</table>\n</div></div>\n";
} else {
print $out "<a href=\"http://$u/\" target=\"_blank\" class=\"domainLink\">$u</a>\n";
}
print $out qq{</td>};
if ($tpe eq 'Hits') {
print $out qq{
<td>$url_stat{$u}{hits} <span class="italicPercent">($h_percent)</span></td>
<td>$comma_bytes <span class="italicPercent">($b_percent)</span></td>
<td>$duration <span class="italicPercent">($d_percent)</span></td>
};
} elsif ($tpe eq 'Bytes') {
print $out qq{
<td>$comma_bytes <span class="italicPercent">($b_percent)</span></td>
<td>$url_stat{$u}{hits} <span class="italicPercent">($h_percent)</span></td>
<td>$duration <span class="italicPercent">($d_percent)</span></td>
};
} else {
print $out qq{
<td>$duration <span class="italicPercent">($d_percent)</span></td>
<td>$url_stat{$u}{hits} <span class="italicPercent">($h_percent)</span></td>
<td>$comma_bytes <span class="italicPercent">($b_percent)</span></td>
};
}
print $out qq{
<td>$comma_throughput</span></td>
<td>$lasthit</td>
};
print $out qq{
<td>$firsthit</td>
} if ($type eq 'hour');
print $out qq{
<td>$total_cost</td>
} if ($self->{CostPrice});
print $out qq{</tr>};
$i++;
last if ($i > $self->{TopNumber});
}
print $out qq{</tbody></table>};
# Do not show other tables if required
last if ($self->{UrlHitsOnly});
}
print $out qq{
<div class="uplink">
<a href="#atop"><span class="iconUpArrow">$Translate{'Up_link'}</span></a>
</div>
};
$self->_print_footer(\$out);
$out->close();
return $nurl;
}
sub _print_top_denied_stat
{
my ($self, $outdir, $year, $month, $day, $week) = @_;
print STDERR "\tTop denied URL statistics in $outdir...\n" if (!$self->{QuietMode});
$0 = "squid-analyzer: Printing top denied url statistics in $outdir";
my $stat_date = $self->set_date($year, $month, $day);
my $type = 'hour';
if (!$day) {
$type = 'day';
}
if (!$month) {
$type = 'month';
}
if ($week) {
$type = 'day';
}
# Load user URL statistics
my $infile = new IO::File;
$infile->open("$outdir/stat_denied_url.dat") || return;
my %denied_stat = ();
my $total_hits = 0;
my $total_acl = 0;
while (my $l = <$infile>) {
chomp($l);
my ($user, $data) = split(/\s/, $l);
if ($self->{rebuild}) {
next if (!$self->check_inclusions($user));
next if ($self->check_exclusions($user));
}
# Anonymize all users
if ($self->{UserReport}) {
if ($self->{AnonymizeLogin} && ($user !~ /^Anon[a-zA-Z0-9]{16}$/)) {
if (!exists $self->{AnonymizedId}{$user}) {
$self->{AnonymizedId}{$user} = &anonymize_id();
}
$user = $self->{AnonymizedId}{$user};
}
} else {
$user = '-';
}
if ($data =~ /hits=(\d+);first=([^;]*);last=([^;]*);url=(.*);blacklist=(.*)/) {
my $hits = $1;
my $firsthit = $2;
my $lasthit = $3;
my $url = $4;
my $blacklist = $5;
if ($self->{rebuild}) {
next if ($self->check_exclusions('','',$url));
}
$total_hits += $hits;
$denied_stat{$url}{hits} += $hits;
if ($lasthit =~ /^(\d{10})\d+/) {
$lasthit = $1;
}
if ($firsthit =~ /^(\d{10})\d+/) {
$firsthit = $1;
}
$denied_stat{$url}{firsthit} = $firsthit if (!$denied_stat{$url}{firsthit} || ($firsthit < $denied_stat{$url}{firsthit}));
$denied_stat{$url}{lasthit} = $lasthit if (!$denied_stat{$url}{lasthit} || ($lasthit > $denied_stat{$url}{lasthit}));
$denied_stat{$url}{users}{$user}++ if ($self->{TopUrlUser} && $self->{UserReport});
if ($blacklist) {
my %tmp = split(/,/, $blacklist);
foreach my $k (keys %tmp) {
$denied_stat{$url}{blacklist}{$k} += $tmp{$k};
$denied_stat{$url}{users}{$user}{blacklist}{$k} += $tmp{$k} if ($self->{TopUrlUser} && $self->{UserReport});
$total_acl += $tmp{$k};
}
}
}
}
$infile->close();
# Store number of denieds
my $ndenied = scalar keys %denied_stat;
my $outf = new IO::File;
$outf->open(">>$outdir/stat_count.dat") || return;
flock($outf, 2) || die "FATAL: can't acquire lock on file $outdir/stat_count.dat, $!\n";
$outf->print("denied:$ndenied\n");
$outf->close;
my $file = $outdir . '/denied.html';
my $out = new IO::File;
$out->open(">$file") || $self->localdie("ERROR: Unable to open $file. $!\n");
my $sortpos = 1;
# Print the HTML header
my $cal = 'SA_CALENDAR_SA';
$cal = '' if ($week);
$self->_print_header(\$out, $self->{menu}, $cal, $sortpos);
print $out "<h3>$Translate{'Url_number'}: $ndenied</h3>\n";
my %data_acl = ();
$total_acl ||= 1;
foreach my $u (sort { $denied_stat{$b}{hits} <=> $denied_stat{$a}{hits} } keys %denied_stat) {
next if (!exists $denied_stat{$u}{blacklist});
foreach my $k (sort keys %{$denied_stat{$u}{blacklist}}) {
$data_acl{$k} += $denied_stat{$u}{blacklist}{$k};
}
}
foreach my $k (keys %data_acl) {
if (($data_acl{$k}/$total_acl)*100 < $self->{MinPie}) {
$data_acl{'others'} += $data_acl{$k};
delete $data_acl{$k};
}
}
if (scalar keys %data_acl) {
print $out $self->_print_title($Translate{"Blocklist_acl_title"}, $stat_date, $week);
my $squidguard_acl = $self->flotr2_piegraph(1, 'squidguard_acl', $Translate{"Blocklist_acl_title"}, $Translate{'Blocklist_acl_graph'}, '', %data_acl);
print $out qq{<table class="graphs"><tr><td>$squidguard_acl</td></tr></table>};
}
my $t1 = $Translate{"Url_Hits_title"};
$t1 =~ s/\%d/$self->{TopDenied}/;
print $out $self->_print_title($t1, $stat_date, $week);
print $out qq{
<table class="sortable stata">
<thead>
<tr>
<th>$Translate{'Url'}</th>
<th>$Translate{'Requests'} (%)</th>
<th>$Translate{'Last_visit'}</th>
};
print $out qq{
<th>$Translate{'First_visit'}</th>
} if ($type eq 'hour');
print $out qq{<th>Blocklist ACLs</th>};
print $out qq{
</tr>
</thead>
<tbody>
};
my $i = 0;
foreach my $u (sort { $denied_stat{$b}{hits} <=> $denied_stat{$a}{hits} } keys %denied_stat) {
my $h_percent = '0.0';
$h_percent = sprintf("%2.2f", ($denied_stat{$u}{hits}/$total_hits) * 100) if ($total_hits);
my $firsthit = '-';
if ($denied_stat{$u}{firsthit} && ($denied_stat{$u}{firsthit} =~ /^\d{10}(\.\d{3})?$/)) {
$firsthit = ucfirst(strftime("%b %d %T", CORE::localtime($denied_stat{$u}{firsthit})));
}
my $lasthit = '-';
if ($denied_stat{$u}{lasthit} && ($denied_stat{$u}{lasthit} =~ /^\d{10}(\.\d{3})?$/)) {
$lasthit = ucfirst(strftime("%b %d %T", CORE::localtime($denied_stat{$u}{lasthit})));
}
if ($type eq 'hour') {
if ($denied_stat{$u}{firsthit} && ($denied_stat{$u}{firsthit} =~ /^\d{10}(\.\d{3})?$/)) {
$firsthit = ucfirst(strftime("%T", CORE::localtime($denied_stat{$u}{firsthit})));
} else {
$firsthit = '-';
}
if ($denied_stat{$u}{lasthit} && ($denied_stat{$u}{lasthit} =~ /^\d{10}(\.\d{3})?$/)) {
$lasthit = ucfirst(strftime("%T", CORE::localtime($denied_stat{$u}{lasthit})));
} else {
$firsthit = '-';
}
}
print $out "<tr><td>\n";
if (exists $denied_stat{$u}{users} && $self->{UserReport}) {
print $out qq{
<div class="tooltipLink"><span class="information"><a href="http://$u/" target="_blank" class="domainLink">$u</a></span><div class="tooltip">
<table><tr><th>$Translate{'User'}</th><th>$Translate{'Count'}</th></tr>
};
my $k = 1;
foreach my $user (sort { $denied_stat{$u}{users}{$b} <=> $denied_stat{$u}{users}{$a} } keys %{$denied_stat{$u}{users}}) {
my $usr_lbl = $user;
$usr_lbl =~ s/_SPC_/ /g;
print $out "<tr><td>$usr_lbl</td><td>$denied_stat{$u}{users}{$user}</td></tr>\n";
$k++;
last if ($k > $self->{TopUrlUser});
}
print $out "</table>\n</div></div>";
} else {
print $out "<a href=\"http://$u/\" target=\"_blank\" class=\"domainLink\">$u</a>\n";
}
print $out qq{
</td>
<td>$denied_stat{$u}{hits} <span class="italicPercent">($h_percent)</span></td>
<td>$lasthit</td>
};
print $out qq{
<td>$firsthit</td>
} if ($type eq 'hour');
my $bl = '-';
if (exists $denied_stat{$u}{blacklist}) {
$bl = '';
foreach my $k (sort keys %{$denied_stat{$u}{blacklist}}) {
$bl .= $k . '=' . $denied_stat{$u}{blacklist}{$k} . ' ';
}
}
print $out qq{
<td>$bl</td>
</tr>};
$i++;
last if ($i > $self->{TopDenied});
}
print $out qq{</tbody></table>};
print $out qq{
<div class="uplink">
<a href="#atop"><span class="iconUpArrow">$Translate{'Up_link'}</span></a>
</div>
};
$self->_print_footer(\$out);
$out->close();
return $ndenied;
}
sub _print_top_domain_stat
{
my ($self, $outdir, $year, $month, $day, $week) = @_;
print STDERR "\tTop domain statistics in $outdir...\n" if (!$self->{QuietMode});
$0 = "squid-analyzer: Printing top domain statistics in $outdir";
my $stat_date = $self->set_date($year, $month, $day);
my $type = 'hour';
if (!$day) {
$type = 'day';
}
if (!$month) {
$type = 'month';
}
if ($week) {
$type = 'day';
}
# Load code statistics
my $infile = new IO::File;
$infile->open("$outdir/stat_user_url.dat") || return;
my %url_stat = ();
my %domain_stat = ();
my $total_hits = 0;
my $total_bytes = 0;
my $total_duration = 0;
my %perdomain = ();
my $url = '';
my $hits = 0;
my $bytes = 0;
my $cache_hit = 0;
my $cache_bytes = 0;
my $total_cache_hit = 0;
my $total_cache_bytes = 0;
my $duration = 0;
my $first = 0;
my $last = 0;
my $tld_pattern1 = '(' . join('|', @TLD1) . ')';
$tld_pattern1 = qr/([^\.]+)$tld_pattern1$/;
my $tld_pattern2 = '(' . join('|', @TLD2) . ')';
$tld_pattern2 = qr/([^\.]+)$tld_pattern2$/;
while (my $l = <$infile>) {
chomp($l);
my ($user, $data) = split(/\s/, $l);
$user = '-' if (!$self->{UserReport});
if ($self->{rebuild}) {
next if (!$self->check_inclusions($user));
next if ($self->check_exclusions($user));
}
if ($data =~ /hits=(\d+);bytes=(\d+);duration=([\-\d]+);first=([^;]*);last=([^;]*);url=(.*?);cache_hit=(\d*);cache_bytes=(\d*)/) {
$url = lc($6);
$hits = $1;
$bytes = $2;
$duration = abs($3);
$first = $4;
$last = $5;
$cache_hit = $7;
$cache_bytes= $8;
} elsif ($data =~ /hits=(\d+);bytes=(\d+);duration=([\-\d]+);first=([^;]*);last=([^;]*);url=(.*)/) {
$url = lc($6);
$hits = $1;
$bytes = $2;
$duration = abs($3);
$first = $4;
$last = $5;
} elsif ($data =~ /hits=(\d+);bytes=(\d+);duration=([\-\d]+);url=(.*)/) {
$url = $4;
$hits = $1;
$bytes = $2;
$duration = abs($3);
}
if ($self->{rebuild}) {
next if ($self->check_exclusions('','',$url));
}
$url =~ s/:\d+//;
my $done = 0;
if ($url !~ /\.\d+$/) {
if ($url =~ $tld_pattern1) {
$domain_stat{"$1$2"}{hits} += $hits;
$domain_stat{"$1$2"}{bytes} += $bytes;
$domain_stat{"$1$2"}{duration} += $duration;
$domain_stat{"$1$2"}{firsthit} = $first if (!$domain_stat{"$1$2"}{firsthit} || ($first < $domain_stat{"$1$2"}{firsthit}));
$domain_stat{"$1$2"}{lasthit} = $last if (!$domain_stat{"$1$2"}{lasthit} || ($last > $domain_stat{"$1$2"}{lasthit}));
$domain_stat{"$1$2"}{cache_hit} += $cache_hit;
$domain_stat{"$1$2"}{cache_bytes} += $cache_bytes;
$perdomain{"$2"}{hits} += $hits;
$perdomain{"$2"}{bytes} += $bytes;
$perdomain{"$2"}{cache_hit} += $cache_hit;
$perdomain{"$2"}{cache_bytes} += $cache_bytes;
$done = 1;
} elsif ($url =~ $tld_pattern2) {
$domain_stat{"$1$2"}{hits} += $hits;
$domain_stat{"$1$2"}{bytes} += $bytes;
$domain_stat{"$1$2"}{duration} += $duration;
$domain_stat{"$1$2"}{firsthit} = $first if (!$domain_stat{"$1$2"}{firsthit} || ($first < $domain_stat{"$1$2"}{firsthit}));
$domain_stat{"$1$2"}{lasthit} = $last if (!$domain_stat{"$1$2"}{lasthit} || ($last > $domain_stat{"$1$2"}{lasthit}));
$domain_stat{"$1$2"}{cache_hit} += $cache_hit;
$domain_stat{"$1$2"}{cache_bytes} += $cache_bytes;
$perdomain{"$2"}{hits} += $hits;
$perdomain{"$2"}{bytes} += $bytes;
$perdomain{"$2"}{cache_hit} += $cache_hit;
$perdomain{"$2"}{cache_bytes} += $cache_bytes;
$done = 1;
}
if ($self->{TopUrlUser} && $self->{UserReport}) {
$domain_stat{"$1$2"}{users}{$user}{hits} += $hits;
$domain_stat{"$1$2"}{users}{$user}{bytes}+= $bytes;
$domain_stat{"$1$2"}{users}{$user}{duration}+= $duration;
}
}
if (!$done) {
my $unknown = 'unknown';
my $others = 'others';
foreach my $u (keys %{$self->{UrlAliasName}}) {
if ($u =~ m|^$url$|i) {
$unknown = $u;
$others = $u;
last;
}
}
$domain_stat{$unknown}{hits} += $hits;
$domain_stat{$unknown}{bytes} += $bytes;
$domain_stat{$unknown}{duration} += $duration;
$domain_stat{$unknown}{firsthit} = $first if (!$domain_stat{$unknown}{firsthit} || ($first < $domain_stat{$unknown}{firsthit}));
$domain_stat{$unknown}{lasthit} = $last if (!$domain_stat{$unknown}{lasthit} || ($last > $domain_stat{$unknown}{lasthit}));
if ($self->{TopUrlUser} && $self->{UserReport}) {
$domain_stat{$unknown}{users}{$user}{hits} += $hits;
$domain_stat{$unknown}{users}{$user}{bytes}+= $bytes;
$domain_stat{$unknown}{users}{$user}{duration}+= $duration;
}
$domain_stat{$unknown}{cache_hit} += $cache_hit;
$domain_stat{$unknown}{cache_bytes} += $cache_bytes;
$perdomain{$others}{hits} += $hits;
$perdomain{$others}{bytes} += $bytes;
$perdomain{$others}{cache_hit} += $cache_hit;
$perdomain{$others}{cache_bytes} += $cache_bytes;
}
$total_hits += $hits;
$total_bytes += $bytes;
$total_duration += $duration;
$total_cache_hit += $cache_hit;
$total_cache_bytes += $cache_bytes;
}
$infile->close();
# Store number of urls
my $ndom = scalar keys %domain_stat;
my $outf = new IO::File;
$outf->open(">>$outdir/stat_count.dat") || return;
flock($outf, 2) || die "FATAL: can't acquire lock on file $outdir/stat_count.dat, $!\n";
$outf->print("domains:$ndom\n");
$outf->close;
my $file = $outdir . '/domain.html';
my $out = new IO::File;
$out->open(">$file") || $self->localdie("ERROR: Unable to open $file. $!\n");
my $sortpos = 1;
# Print the HTML header
my $cal = 'SA_CALENDAR_SA';
$cal = '' if ($week);
$self->_print_header(\$out, $self->{menu}, $cal, $sortpos);
print $out "<h3>$Translate{'Domain_number'}: $ndom</h3>\n";
$total_hits ||= 1;
$total_bytes ||= 1;
for my $tpe ('Hits', 'Bytes', 'Duration') {
my $t1 = $Translate{"Domain_${tpe}_title"};
$t1 =~ s/\%d/$self->{TopNumber}/;
if ($tpe eq 'Hits') {
print $out $self->_print_title($t1, $stat_date, $week);
my %data = ();
foreach my $dom (keys %perdomain) {
if (($perdomain{$dom}{hits}/$total_hits)*100 > $self->{MinPie}) {
$data{$dom} = $perdomain{$dom}{hits};
} else {
$data{'others'} += $perdomain{$dom}{hits};
}
}
my $title = "$Translate{'Domain_graph_hits_title'} $stat_date";
my $domain_hits = $self->flotr2_piegraph(1, 'domain_hits', $title, $Translate{'Domains_graph'}, '', %data);
%data = ();
foreach my $dom (keys %domain_stat) {
if (($domain_stat{$dom}{hits}/$total_hits)*100 > $self->{MinPie}) {
$data{$dom} = $domain_stat{$dom}{hits};
} else {
$data{'others'} += $domain_stat{$dom}{hits};
}
}
my $title2 = "$Translate{'Second_domain_graph_hits_title'} $stat_date";
my $domain2_hits = $self->flotr2_piegraph(1, 'second_domain_hits', $title2, $Translate{'Domains_graph'}, '', %data);
print $out qq{
<style>
#container {
display: table;
}
#row {
display: table-row;
}
#domain_hits, #second_domain_hits {
display: table-cell;
}
#domain_hits { z-index: 999; }
</style>
<table class="graphs"><tr><td>
<div id="container">
$domain_hits
</div>
</td><td>
<div id="container">
$domain2_hits
</div>
</td></tr>
};
$domain_hits = '';
$domain2_hits = '';
%data = ();
foreach my $dom (keys %perdomain) {
if (($perdomain{$dom}{bytes}/$total_bytes)*100 > $self->{MinPie}) {
$data{$dom} = $perdomain{$dom}{bytes};
} else {
$data{'others'} += $perdomain{$dom}{bytes};
}
}
$title = "$Translate{'Domain_graph_bytes_title'} $stat_date";
my $domain_bytes = $self->flotr2_piegraph(1, 'domain_bytes', $title, $Translate{'Domains_graph'}, '', %data);
%data = ();
foreach my $dom (keys %domain_stat) {
if (($domain_stat{$dom}{bytes}/$total_bytes)*100 > $self->{MinPie}) {
$data{$dom} = $domain_stat{$dom}{bytes};
} else {
$data{'others'} += $domain_stat{$dom}{bytes};
}
}
$title2 = "$Translate{'Second_domain_graph_bytes_title'} $stat_date";
my $domain2_bytes = $self->flotr2_piegraph(1, 'second_domain_bytes', $title2, $Translate{'Domains_graph'}, '', %data);
print $out qq{<tr><td>
<style>
#container {
display: table;
}
#row {
display: table-row;
}
#domain_bytes, #second_domain_bytes {
display: table-cell;
}
#domain_bytes { z-index: 999; }
</style>
<div id="container">
$domain_bytes
</div>
</td><td>
<div id="container">
$domain2_bytes
</div>
</td></tr></table>};
$domain_bytes = '';
$domain2_bytes = '';
%data = ();
} else {
print $out "<h4>$t1 $stat_date</h4><div class=\"line-separator\"></div>\n";
}
my $trfunit = $self->{TransfertUnit} || 'B';
$trfunit = 'B' if ($trfunit eq 'BYTE');
print $out qq{
<table class="sortable stata">
<thead>
<tr>
<th>$Translate{'Url'}</th>
};
if ($tpe eq 'Hits') {
print $out qq{
<th>$Translate{'Requests'} (%)</th>
<th>$Translate{$self->{TransfertUnit}} (%)</th>
<th>$Translate{'Duration'} (%)</th>
};
} elsif ($tpe eq 'Bytes') {
print $out qq{
<th>$Translate{$self->{TransfertUnit}} (%)</th>
<th>$Translate{'Requests'} (%)</th>
<th>$Translate{'Duration'} (%)</th>
};
} else {
print $out qq{
<th>$Translate{'Duration'} (%)</th>
<th>$Translate{'Requests'} (%)</th>
<th>$Translate{$self->{TransfertUnit}} (%)</th>
};
}
print $out qq{
<th>$Translate{'Throughput'} ($trfunit/s)</th>
<th>$Translate{'Last_visit'}</th>
};
print $out qq{
<th>$Translate{'First_visit'}</th>
} if ($type eq 'hour');
print $out qq{
<th>$Translate{'Cost'} $self->{Currency}</th>
} if ($self->{CostPrice});
print $out qq{
</tr>
</thead>
<tbody>
};
$total_duration = abs($total_duration);
my $i = 0;
foreach my $u (sort { $domain_stat{$b}{"\L$tpe\E"} <=> $domain_stat{$a}{"\L$tpe\E"} } keys %domain_stat) {
next if (!$domain_stat{$u}{hits});
my $h_percent = '0.0';
$h_percent = sprintf("%2.2f", ($domain_stat{$u}{hits}/$total_hits) * 100) if ($total_hits);
my $b_percent = '0.0';
$b_percent = sprintf("%2.2f", ($domain_stat{$u}{bytes}/$total_bytes) * 100) if ($total_bytes);
my $d_percent = '0.0';
$d_percent = sprintf("%2.2f", ($domain_stat{$u}{duration}/$total_duration) * 100) if ($total_duration);
my $total_cost = sprintf("%2.2f", int($domain_stat{$u}{bytes}/1000000) * $self->{CostPrice});
my $duration = &parse_duration(int($domain_stat{$u}{duration}/1000));
my $comma_bytes = $self->format_bytes($domain_stat{$u}{bytes});
my $total_throughput = int($domain_stat{$u}{bytes} / (($domain_stat{$u}{duration}/1000) || 1));
my $comma_throughput = $self->format_bytes($total_throughput);
my $firsthit = '-';
if ($domain_stat{$u}{firsthit}) {
$firsthit = ucfirst(strftime("%b %d %T", CORE::localtime($domain_stat{$u}{firsthit})));
}
my $lasthit = '-';
if ($domain_stat{$u}{lasthit}) {
$lasthit = ucfirst(strftime("%b %d %T", CORE::localtime($domain_stat{$u}{lasthit})));
}
if ($type eq 'hour') {
if ($domain_stat{$u}{firsthit}) {
$firsthit = ucfirst(strftime("%T", CORE::localtime($domain_stat{$u}{firsthit})));
} else {
$firsthit = '-';
}
if ($domain_stat{$u}{lasthit}) {
$lasthit = ucfirst(strftime("%T", CORE::localtime($domain_stat{$u}{lasthit})));
} else {
$lasthit = '-';
}
}
print $out "<tr><td>\n";
if (exists $domain_stat{$u}{users} && $self->{UserReport}) {
my $dname = "*.$u";
$dname = $u if (grep(/^$u$/i, 'localhost', 'unknown') || exists $self->{UrlAliasName});
my $label = 'Duration';
if ($tpe eq 'Bytes') {
$label = 'Megabytes';
} elsif ($tpe eq 'Hits') {
$label = 'Requests';
}
print $out "<div class=\"tooltipLink\"><span class=\"information\">$dname</span><div class=\"tooltip\"><table><tr><th>$Translate{'User'}</th><th>$Translate{$label}</th></tr>\n";
my $k = 1;
foreach my $user (sort { $domain_stat{$u}{users}{$b}{lc($tpe)} <=> $domain_stat{$u}{users}{$a}{lc($tpe)} } keys %{$domain_stat{$u}{users}}) {
my $value = $domain_stat{$u}{users}{$user}{lc($tpe)};
if ($tpe eq 'Bytes') {
$value = $self->format_bytes($value);
} elsif ($tpe eq 'Duration') {
$value = &parse_duration(int($value/1000));
}
my $usr_lbl = $user;
$usr_lbl =~ s/_SPC_/ /g;
print $out "<tr><td>$usr_lbl</td><td>$value</td></td></tr>\n";
$k++;
last if ($k > $self->{TopUrlUser});
}
print $out "</table>\n";
} else {
print $out "*.$u\n";
}
print $out qq{
</div></div>
</td>
};
if ($tpe eq 'Hits') {
print $out qq{
<td>$domain_stat{$u}{hits} <span class="italicPercent">($h_percent)</span></td>
<td>$comma_bytes <span class="italicPercent">($b_percent)</span></td>
<td>$duration <span class="italicPercent">($d_percent)</span></td>
};
} elsif ($tpe eq 'Bytes') {
print $out qq{
<td>$comma_bytes <span class="italicPercent">($b_percent)</span></td>
<td>$domain_stat{$u}{hits} <span class="italicPercent">($h_percent)</span></td>
<td>$duration <span class="italicPercent">($d_percent)</span></td>
};
} else {
print $out qq{
<td>$duration <span class="italicPercent">($d_percent)</span></td>
<td>$domain_stat{$u}{hits} <span class="italicPercent">($h_percent)</span></td>
<td>$comma_bytes <span class="italicPercent">($b_percent)</span></td>
};
}
print $out qq{
<td>$comma_throughput</td>
<td>$lasthit</td>
};
print $out qq{
<td>$firsthit</td>
} if ($type eq 'hour');
print $out qq{
<td>$total_cost</td>
} if ($self->{CostPrice});
print $out qq{
</tr>};
$i++;
last if ($i > $self->{TopNumber});
}
print $out qq{</tbody></table>};
# Do not show other tables if required
last if ($self->{UrlHitsOnly});
}
print $out qq{
<div class="uplink">
<a href="#atop"><span class="iconUpArrow">$Translate{'Up_link'}</span></a>
</div>
};
$self->_print_footer(\$out);
$out->close();
return $ndom;
}
sub _gen_summary
{
my ($self, $outdir) = @_;
# Get all day subdirectory
opendir(DIR, "$outdir") or $self->localdie("ERROR: Can't read directory $outdir, $!\n");
my @dirs = grep { /^\d{4}$/ && -d "$outdir/$_" } readdir(DIR);
closedir DIR;
my %code_stat = ();
my %throughput_stat = ();
my %total_request = ();
my %total_bytes = ();
my %total_elapsed = ();
my %total_throughput = ();
foreach my $d (@dirs) {
# Load code statistics
my $infile = new IO::File;
$infile->open("$outdir/$d/stat_code.dat") || return;
while (my $l = <$infile>) {
chomp($l);
my ($code, $data) = split(/\s/, $l);
$data =~ /hits_month=([^;]+);bytes_month=([^;]+)/;
my $hits = $1 || '';
my $bytes = $2 || '';
$hits =~ s/,$//;
$bytes =~ s/,$//;
my %hits_tmp = split(/[:,]/, $hits);
foreach my $tmp (sort {$a <=> $b} keys %hits_tmp) {
$code_stat{$d}{$code}{request} += $hits_tmp{$tmp};
}
my %bytes_tmp = split(/[:,]/, $bytes);
foreach my $tmp (sort {$a <=> $b} keys %bytes_tmp) {
$code_stat{$d}{$code}{bytes} += $bytes_tmp{$tmp};
}
if ($data =~ /thp_bytes_month=([^;]+);thp_duration_month=([^;]+)/) {
$bytes = $1 || '';
my $elapsed = $2 || '';
$elapsed =~ s/,$//;
my %bytes_tmp = split(/[:,]/, $bytes);
foreach my $tmp (sort {$a <=> $b} keys %bytes_tmp) {
$throughput_stat{$d}{$code}{bytes} += $bytes_tmp{$tmp};
}
my %elapsed_tmp = split(/[:,]/, $elapsed);
foreach my $tmp (sort {$a <=> $b} keys %elapsed_tmp) {
$throughput_stat{$d}{$code}{elapsed} += $elapsed_tmp{$tmp};
}
}
}
$infile->close();
$total_request{$d} = $code_stat{$d}{HIT}{request} + $code_stat{$d}{MISS}{request};
$total_bytes{$d} = $code_stat{$d}{HIT}{bytes} + $code_stat{$d}{MISS}{bytes};
$total_bytes{$d} = $code_stat{$d}{HIT}{bytes} + $code_stat{$d}{MISS}{bytes};
$total_throughput{$d} = $throughput_stat{$d}{HIT}{bytes} + $throughput_stat{$d}{MISS}{bytes};
$total_elapsed{$d} = $code_stat{$d}{HIT}{elapsed} + $code_stat{$d}{MISS}{elapsed};
}
my $file = $outdir . '/index.html';
my $out = new IO::File;
$out->open(">$file.tmp") || $self->localdie("ERROR: Unable to open $file. $!\n");
# Print the HTML header
$self->_print_main_header(\$out);
my $colspn = 3;
$colspn = 4 if ($self->{CostPrice});
print $out qq{
<h4>$Translate{'Globals_Statistics'}</h4>
<div class="line-separator"></div>
<table class="stata">
<thead>
<tr>
<th class="nobg"></th>
<th colspan="3" scope="col" class="headerBlack">$Translate{'Requests'}</th>
<th colspan="3" scope="col" class="headerBlack">$Translate{$self->{TransfertUnit}}</th>
<th colspan="$colspn" scope="col" class="headerBlack">$Translate{'Total'}</th>
</tr>
<tr>
<th scope="col">$Translate{'Years'}</th>
<th scope="col">$Translate{'Hit'}</th>
<th scope="col">$Translate{'Miss'}</th>
<th scope="col">$Translate{'Denied'}</th>
<th scope="col">$Translate{'Hit'}</th>
<th scope="col">$Translate{'Miss'}</th>
<th scope="col">$Translate{'Denied'}</th>
<th scope="col">$Translate{'Requests'}</th>
<th scope="col">$Translate{$self->{TransfertUnit}}</th>
<th scope="col">$Translate{'Throughput'}</th>
};
print $out qq{
<th scope="col">$Translate{'Cost'} $self->{Currency}</th>
} if ($self->{CostPrice});
print $out qq{
</tr>
</thead>
<tbody>
};
foreach my $year (sort {$b <=> $a} keys %code_stat) {
my $comma_bytes = $self->format_bytes($total_bytes{$year});
my $hit_bytes = $self->format_bytes($code_stat{$year}{HIT}{bytes});
my $miss_bytes = $self->format_bytes($code_stat{$year}{MISS}{bytes});
my $denied_bytes = $self->format_bytes($code_stat{$year}{DENIED}{bytes});
my $total_cost = sprintf("%2.2f", int($total_bytes{$year}/1000000) * $self->{CostPrice});
my $subtotal = ($throughput_stat{$year}{MISS}{elapsed}+$throughput_stat{$year}{HIT}{elapsed}) || 1;
my $total_throughputs = int($total_throughput{$year}/(($subtotal/1000) || 1));
my $comma_throughput = $self->format_bytes($total_throughputs);
my $trfunit = $self->{TransfertUnit} || 'B';
$trfunit = 'B' if ($trfunit eq 'BYTE');
print $out qq{
<tr>
<td><a href="$year/index.html">$Translate{'Stat_label'} $year *</a></td>
<td>$code_stat{$year}{HIT}{request}</td>
<td>$code_stat{$year}{MISS}{request}</td>
<td>$code_stat{$year}{DENIED}{request}</td>
<td>$hit_bytes</td>
<td>$miss_bytes</td>
<td>$denied_bytes</td>
<td>$total_request{$year}</td>
<td>$comma_bytes</td>
<td>$comma_throughput $trfunit/s</td>
};
print $out qq{<td>$total_cost</td>} if ($self->{CostPrice});
print $out qq{</tr>};
}
print $out qq{
</tbody>
</table>
<blockquote class="notification">(*) $Translate{'Click_year_stat'}</blockquote>
<h4>$Translate{'Legend'}</h4>
<div class="line-separator"></div>
<div class="displayLegend">
<span class="legendeTitle">$Translate{'Hit'}</span>: <span class="descLegend">$Translate{'Hit_help'}</span><br/>
<span class="legendeTitle">$Translate{'Miss'}</span>: <span class="descLegend">$Translate{'Miss_help'}</span><br/>
};
print $out qq{<span class="legendeTitle">$Translate{'Cost'}</span>: <span class="descLegend">$Translate{'Cost_help'} $self->{CostPrice} $self->{Currency}</span><br/>} if ($self->{CostPrice});
print $out qq{
</div>
};
$self->_print_footer(\$out);
$out->close();
rename("$file.tmp", "$file");
}
sub parse_config
{
my ($self, $file, $log_file, $rebuild) = @_;
$self->localdie("FATAL: no configuration file!\n") if (!-e $file);
my %opt = ();
open(CONF, $file) or $self->localdie("ERROR: can't open file $file, $!\n");
while (my $l = <CONF>) {
chomp($l);
$l =~ s/\r//;
next if (!$l || ($l =~ /^\s*#/));
my ($key, $val) = split(/\s+/, $l, 2);
if ($key ne 'LogFile') {
$opt{$key} = $val;
} else {
push(@{$opt{LogFile}}, split(/[,]+/, $val));
}
}
close(CONF);
# Set logfile from command line if any.
@{$opt{LogFile}} = split(/[,]+/, $log_file) if ($log_file);
# Check config
if (!exists $opt{Output} || !-d $opt{Output}) {
$self->localdie("ERROR: you must give a valid output directory. See option: Output\n");
}
if ( ($#{$opt{LogFile}} < 0) && !$rebuild) {
$self->localdie("ERROR: you must give a Squid log file to parse. See LogFile or option -l\n");
} elsif (!$rebuild) {
foreach my $f (@{$opt{LogFile}}) {
if (!-f $f) {
$self->localdie("ERROR: you must give a valid path to the Squid log file, \"$f\" is not valid.\n");
}
}
}
if (exists $opt{DateFormat}) {
if ( ($opt{DateFormat} !~ m#\%y#) || (($opt{DateFormat} !~ m#\%m#) && ($opt{DateFormat} !~ m#\%M#) )|| ($opt{DateFormat} !~ m#\%d#) ) {
$self->localdie("ERROR: bad date format: $opt{DateFormat}, must have \%y, \%m or \%M, \%d. See DateFormat option.\n");
}
}
if ($opt{Lang} && !-e $opt{Lang}) {
$self->localdie("ERROR: can't find translation file $opt{Lang}. See option: Lang\n");
}
if ($opt{ImgFormat} && !grep(/^$opt{ImgFormat}$/, 'png','jpg')) {
$self->localdie("ERROR: unknown image format. See option: ImgFormat\n");
}
if (defined $opt{TimeZone} && $opt{TimeZone} ne 'auto' && $opt{TimeZone} !~ /^[+\-]\d{1,2}$/) {
$self->localdie("ERROR: timezone format: +/-HH, ex: +01. See configuration directive: TimeZone\n");
}
return %opt;
}
sub parse_network_aliases
{
my ($self, $file) = @_;
return if (!$file || !-f $file);
my %alias = ();
open(ALIAS, $file) or $self->localdie("ERROR: can't open network aliases file $file, $!\n");
my $i = 0;
while (my $l = <ALIAS>) {
chomp($l);
$i++;
next if (!$l || ($l =~ /^\s*#/));
$l =~ s/\s*#.*//;
my @data = split(/\t+/, $l, 2);
if ($#data == 1) {
my @rg = split(/(?<!\{\d)[\s,;](?!\d+\})/, $data[1]);
foreach my $r (@rg) {
$r =~ s/^\^//;
# If this is not a cidr notation
if ($r !~ /^\d+\.\d+\.\d+\.\d+\/\d+$/) {
$self->check_regex($r, "$file at line $i");
}
$alias{"$r"} = $data[0];
}
} else {
$self->localdie("ERROR: wrong format in network aliases file $file, line $i\n");
}
}
close(ALIAS);
return %alias;
}
sub parse_user_aliases
{
my ($self, $file) = @_;
return if (!$file || !-f $file);
my %alias = ();
open(ALIAS, $file) or $self->localdie("ERROR: can't open user aliases file $file, $!\n");
my $i = 0;
while (my $l = <ALIAS>) {
chomp($l);
$i++;
next if (!$l || ($l =~ /^\s*#/));
my @data = split(/\t+/, $l, 2);
$data[0] =~ s/\s+/_SPC_/g; # Replace space, they are not allowed
if ($#data == 1) {
my @rg = split(/(?<!\{\d)[\s,;](?!\d+\})/, $data[1]);
foreach my $r (@rg) {
$r =~ s/^\^//;
$r =~ s/([^\\])\$$/$1/;
$self->check_regex($r, "$file at line $i");
$alias{"$r"} = $data[0];
}
} else {
$self->localdie("ERROR: wrong format in user aliases file $file, line $i\n");
}
}
close(ALIAS);
return %alias;
}
sub parse_url_aliases
{
my ($self, $file) = @_;
return if (!$file || !-f $file);
my %alias = ();
open(ALIAS, $file) or $self->localdie("ERROR: can't open url aliases file $file, $!\n");
my $i = 0;
while (my $l = <ALIAS>) {
chomp($l);
$i++;
next if (!$l || ($l =~ /^\s*#/));
$l =~ s/\s*#.*//;
my @data = split(/\t+/, $l, 2);
if ($#data == 1) {
my @rg = split(/,/, $data[1]);
foreach my $r (@rg) {
$r =~ s/^\^//;
$self->check_regex($r, "$file at line $i");
$alias{"$r"} = $data[0];
}
$self->{UrlAliasName}{$data[0]} = 1;
} else {
$self->localdie("ERROR: wrong format in url aliases file $file, line $i\n");
}
}
close(ALIAS);
return %alias;
}
sub parse_exclusion
{
my ($self, $file) = @_;
return if (!$file || !-f $file);
my %exclusion = ();
open(EXCLUDED, $file) or $self->localdie("ERROR: can't open exclusion file $file, $!\n");
my $i = 0;
while (my $l = <EXCLUDED>) {
chomp($l);
$i++;
next if (!$l || ($l =~ /^\s*#/));
# remove comments at end of line
$l =~ s/\s*#.*//;
if ($l =~ m#^(USER|CLIENT|URI|NETWORK)\s+(.*)#) {
my $lbl = lc($1) . 's';
my @rg = split(m#\s+#, $2);
foreach my $r (@rg) {
next if ($lbl eq 'networks');
$self->check_regex($r, "$file at line $i");
}
push(@{$exclusion{$lbl}}, @rg);
} else {
# backward compatibility is not more supported
$self->localdie("ERROR: wrong line format in file $file at line $i => $l\n");
}
}
close(EXCLUDED);
return %exclusion;
}
sub parse_inclusion
{
my ($self, $file) = @_;
return if (!$file || !-f $file);
my %inclusion = ();
open(INCLUDED, $file) or $self->localdie("ERROR: can't open inclusion file $file, $!\n");
my $i = 0;
while (my $l = <INCLUDED>) {
chomp($l);
$i++;
next if (!$l || ($l =~ /^\s*#/));
# remove comments at end of line
$l =~ s/\s*#.*//;
if ($l =~ m#^(USER|CLIENT|NETWORK)\s+(.*)#) {
my $lbl = lc($1) . 's';
my @rg = split(m#\s+#, $2);
foreach my $r (@rg) {
next if ($lbl eq 'networks');
$self->check_regex($r, "$file at line $i");
}
push(@{$inclusion{$lbl}}, @rg);
} else {
# backward compatibility is not more supported
$self->localdie("ERROR: wrong line format in file $file at line $i\n");
}
}
close(INCLUDED);
return %inclusion;
}
# User URL-encode
sub escape
{
my ($toencode) = @_;
return undef unless defined($toencode);
$toencode =~ s/[^a-zA-Z0-9_.-]/_/g;
return $toencode;
}
# Set date to user format
sub set_date
{
my ($self, $year, $month, $day) = @_;
my $date_format = $self->{DateFormat};
$date_format =~ s/\%y/$year/;
$date_format =~ s/\%m/$month/;
$date_format =~ s/\%d/$day/;
$date_format =~ s/\%M/$Translate{$month}/;
$date_format =~ s/([^\p{Letter}\p{Digit}]){2,3}/$1/;
$date_format =~ s/^[^\p{Letter}\p{Digit}]+//;
$date_format =~ s/[^\p{Letter}\p{Digit}]+$//;
return $date_format;
}
# Format bytes with comma for better reading
sub format_bytes
{
my ($self, $text) = @_;
if ($self->{TransfertUnitValue} > 1) {
$text = sprintf("%.2f", $text / $self->{TransfertUnitValue});
}
$text = reverse $text;
$text =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g;
return scalar reverse $text;
}
sub _print_title
{
my ($self, $title, $stat_date, $week) = @_;
my $week_title = '';
$week_title = " $Translate{Week} $week" if ($week);
my $para = qq{
<h4>$title $stat_date$week_title</h4>
<div class="line-separator"></div>
};
return $para;
}
sub _get_calendar
{
my ($self, $stat_date, $year, $month, $type, $outdir, $rewind) = @_;
my $para = "<div id=\"calendar\">\n";
if ($type eq 'day') {
$para .= "<table><tr><th colspan=\"8\">$stat_date</th></tr>\n";
my @wday = qw(Mo Tu We Th Fr Sa Su);
my @std_day = qw(Su Mo Tu We Th Fr Sa);
my %day_lbl = ();
if (exists $Translate{WeekDay}) {
my @tmpwday = split(/\s+/, $Translate{WeekDay});
for (my $i = 0; $i <= $#std_day; $i++) {
$day_lbl{$std_day[$i]} = $tmpwday[$i];
}
} else {
for (my $i = 0; $i <= $#wday; $i++) {
$day_lbl{$wday[$i]} = $wday[$i];
}
}
$para .= "<tr><td>&nbsp;</td>";
map { $para .= '<td align="center">' . $day_lbl{$_} . '</td>'; } @wday;
$para .= "</tr>\n";
my @currow = ('','','','','','','');
my %weeks_num = ();
my $wn = '';
my $wn_ok = '';
my $wd = '';
for my $d ("01" .. "31") {
$wn = &get_week_number($year,$month,$d);
next if ($wn == -1);
$wd = &get_day_of_week($year,$month,$d);
next if ($wd == -1);
$wn_ok = $wn;
if (-f "$outdir/$d/index.html") {
$currow[$wd-1] = "<td><a href=\"$rewind$d/index.html\">$d</a></td>";
} else {
$currow[$wd-1] = "<td>$d</td>";
}
if ($wd == 7) {
map { $_ = "<td>&nbsp;</td>" if ($_ eq ''); } @currow;
@{$weeks_num{$wn_ok}} = @currow;
@currow = ('','','','','','','');
}
}
if ( ($wd < 7) && ($wd != -1) && ($wn_ok != -1) ) {
map { $_ = "<td>&nbsp;</td>" if ($_ eq ''); } @currow;
@{$weeks_num{$wn_ok}} = @currow;
}
my $path = $outdir;
$path =~ s/(\/\d{4})\/\d{2}.*/$1/;
my $prefix = $self->{WebUrl} || '';
$prefix .= '/' if ( $self->{WebUrl} && ($self->{WebUrl} !~ m#\/$#) );
foreach my $w (sort { $a <=> $b } keys %weeks_num) {
my $ww = sprintf("%02d", $w+1);
my $week = "<tr><th>$ww</th>";
if (-d "$path/week$ww") {
$week = "<tr><th><a href=\"$prefix$year/week$ww\">$ww</a></th>";
}
$para .= $week . join('', @{$weeks_num{$w}}) . "</tr>\n";
}
$para .= "</table>\n";
} elsif ($type eq 'month') {
$para .= "<table><tr><th colspan=\"4\">$stat_date</th></tr>\n";
for my $i ('01' .. '12') {
$para .= "<tr>" if (grep(/^$i$/, '01', '04', '07','10'));
if (-f "$outdir/$i/index.html") {
$para .= "<td><a href=\"$i/index.html\">$Translate{$i}</a></td>";
} else {
$para .= "<td>$Translate{$i}</td>";
}
$para .= "</tr>\n" if (grep(/^$i$/, '03', '06', '09', '12'));
}
$para .= "</table>\n";
}
$para .= "</div>\n";
return $para;
}
sub anonymize_id
{
my $u_id = '';
while (length($u_id) < 16) {
my $c = chr(int(rand(127)));
if ($c =~ /[a-zA-Z0-9]/) {
$u_id .= $c;
}
}
return 'Anon' . $u_id;
}
sub flotr2_bargraph
{
my ($self, $buttonid, $divid, $xtype, $title, $xtitle, $ytitle, $data1, $legend1, $data2, $legend2, $data3, $legend3, $data4, $legend4) = @_;
my @legend = ();
my @data = ();
my $i = 0;
push(@data, "var d1 = [$data1];\n") if ($data1);
push(@legend, "{ data: d1, label: \"$legend1\", color: \"$GRAPH_COLORS[$i++]\", mouse:{track:true} },\n") if ($data1);
push(@data, "var d2 = [$data2];\n") if ($data2);
push(@legend, "{ data: d2, label: \"$legend2\", color: \"$GRAPH_COLORS[$i++]\", mouse:{track:true} },\n") if ($data2);
push(@data, "var d3 = [$data3];\n") if ($data3);
push(@legend, "{ data: d3, label: \"$legend3\", color: \"$GRAPH_COLORS[$i++]\", mouse:{track:true} },\n") if ($data3);
push(@data, "var d4 = [$data4];\n") if ($data4);
push(@legend, "{ data: d4, label: \"$legend4\", color: \"$GRAPH_COLORS[$i++]\", mouse:{track:true} },\n") if ($data4);
my $month_array = '';
my $xlabel = '';
my $numticks = 0;
my $xmode = 'normal';
if ($xtype eq 'month') {
$month_array = qq{var months = [ '$Translate{"01"}', '$Translate{"02"}', '$Translate{"03"}', '$Translate{"04"}', '$Translate{"05"}', '$Translate{"06"}', '$Translate{"07"}', '$Translate{"08"}', '$Translate{"09"}', '$Translate{"10"}', '$Translate{"11"}', '$Translate{"12"}' ];
};
$xlabel = qq{return months[(pos -1) % 12];};
$numticks = 12;
} elsif ($xtype eq 'day') {
$xlabel = qq{var days = [01,02,03,04,05,06,07,08,09,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31];
return days[(pos - 1) % 31];
};
$numticks = 31;
} elsif ($xtype =~ /\[.*\]/) {
$xmode = 'time';
$xlabel = qq{var days = $xtype;
return days[(pos - 1) % 7];
};
$numticks = 7;
} else {
$xlabel = qq{var hours = [00,01,02,03,04,05,06,07,08,09,10,11,12,13,14,15,16,17,18,19,20,21,22,23];
return hours[pos % 24];
};
$numticks = 24;
}
my $tickFormatter = qq{
tickFormatter: function(x) {
var pos = parseInt(x);
$xlabel
},
};
$tickFormatter = '' if ($xmode eq 'time');
my $dateTracker_lblopts = '';
map { if (/label: "([^"]+)"/) { $dateTracker_lblopts .= "'$1',"; } } @legend;
$dateTracker_lblopts =~ s/,$//;
$dateTracker_lblopts = "[$dateTracker_lblopts]";
my $dateTracker_dataopts = '';
map { if (/var (d\d+) =/) { $dateTracker_dataopts .= "$1,"; } } @data;
$dateTracker_dataopts =~ s/,$//;
$dateTracker_dataopts = "[$dateTracker_dataopts]";
return <<EOF;
<div id="$divid"></div>
<script type="text/javascript">
$month_array
(function mouse_zoom(container) {
//document.writeln('<table class="tbbutton"><tr><td><input type="button" class="dldButton" value="To Image" id="toimage$buttonid" onclick="return false;">'+
// '<input type="button" class="dldButton" value="Download" id="download$buttonid" onclick="return false;">' +
// '<input type="button" class="dldButton" value="Reset" id="reset$buttonid" onclick="return false;"></td></tr><tr><td>&nbsp;</td></tr></table>'
// );
@data
var options = {
mouse: {
track: true,
relative: true,
trackFormatter: function(obj){ return dateTracker(obj,'$xtype',$dateTracker_lblopts,$dateTracker_dataopts) },
},
yaxis: {
min: 0,
mode: "normal",
autoscaleMargin: 1,
title: "$ytitle",
},
xaxis: {
mode: "$xmode",
noTicks: $numticks,
$tickFormatter
title: "$xtitle",
},
title: "$title",
legend: {
position: "nw",
backgroundColor: "#D2E8FF",
backgroundOpacity: 0.4
},
HtmlText: false,
};
function drawGraph(opts) {
var o = Flotr._.extend(Flotr._.clone(options), opts );
return Flotr.draw(
container,
[
@legend
],
o
);
}
var graph = drawGraph();
Flotr.EventAdapter.observe(container, "flotr:select", function(area) {
f = drawGraph({
xaxis: {
min: area.x1,
max: area.x2
},
yaxis: {
min: area.y1,
max: area.y2
}
});
});
Flotr.EventAdapter.observe(container, "flotr:click", function() {
drawGraph();
});
/*
document.getElementById('reset$buttonid').onclick = function() {
graph.download.restoreCanvas();
};
document.getElementById('download$buttonid').onclick = function(){
if (Flotr.isIE && Flotr.isIE < 9) {
alert(
"Your browser doesn't allow you to get a bitmap image from the plot, " +
"you can only get a VML image that you can use in Microsoft Office.<br />"
);
}
graph.download.saveImage('$self->{ImgFormat}');
};
document.getElementById('toimage$buttonid').onclick = function() {
if (Flotr.isIE && Flotr.isIE < 9) {
alert(
"Your browser doesn't allow you to get a bitmap image from the plot, " +
"you can only get a VML image that you can use in Microsoft Office.<br />"
);
}
graph.download.saveImage('$self->{ImgFormat}', null, null, true);
};
*/
})(document.getElementById("$divid"));
</script>
EOF
}
sub flotr2_piegraph
{
my ($self, $buttonid, $divid, $title, $xlabel, $ylabel, %data) = @_;
my @datadef = ();
my @contdef = ();
my $i = 1;
foreach my $k (sort {$data{$b} <=> $data{$a} } keys %data) {
push(@datadef, "var d$i = [ [0,$data{$k}] ];\n");
push(@contdef, "{ data: d$i, label: \"$k\" },\n");
$i++;
}
return <<EOF;
<div id="$divid"></div>
<script type="text/javascript">
(function basic_pie(container) {
//document.writeln('<input type="button" class="dldButton" value="To Image" id="toimage$buttonid" onclick="return false;">'+
// '<input type="button" class="dldButton" value="Download" id="download$buttonid" onclick="return false;">' +
// '<input type="button" class="dldButton" value="Reset" id="reset$buttonid" onclick="return false;">'
// );
@datadef
var graph = Flotr.draw(container, [
@contdef
], {
title: "$title",
HtmlText: false,
grid: {
verticalLines: false,
horizontalLines: false,
outline: '',
},
xaxis: {
showLabels: false,
title: "$xlabel"
},
yaxis: {
showLabels: false,
title: "$ylabel"
},
pie: {
show: true,
explode: 6
},
mouse: {
track: true,
trackFormatter: function(obj){ return pieTracker(obj) },
relative: true
},
legend: {
position: "sw",
backgroundColor: "#D2E8FF",
backgroundOpacity: 0.4
}
});
/*
document.getElementById('reset$buttonid').onclick = function() {
graph.download.restoreCanvas();
};
document.getElementById('download$buttonid').onclick = function(){
if (Flotr.isIE && Flotr.isIE < 9) {
alert(
"Your browser doesn't allow you to get a bitmap image from the plot, " +
"you can only get a VML image that you can use in Microsoft Office.<br />"
);
}
graph.download.saveImage('$self->{ImgFormat}');
};
document.getElementById('toimage$buttonid').onclick = function() {
if (Flotr.isIE && Flotr.isIE < 9) {
alert(
"Your browser doesn't allow you to get a bitmap image from the plot, " +
"you can only get a VML image that you can use in Microsoft Office.<br />"
);
}
graph.download.saveImage('$self->{ImgFormat}', null, null, true);
};
i*/
})(document.getElementById("$divid"));
</script>
EOF
}
sub check_regex
{
my ($self, $pattern, $label) = @_;
eval { $pattern =~ m/^$pattern$/i;};
if ($@) {
$self->localdie("FATAL: $label invalid regex '$pattern', $!\n");
}
}
sub check_ip
{
my ($ip, $block) = @_;
# When $client_ip is not an ip address proceed to regex search
if ($ip !~ /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|[0-9a-fA-F:]+$/) {
if ( $ip =~ /$block/ ) {
return 1;
} else {
return 0;
}
}
my @ip = split(/\./, $ip);
my $ip1 = $ip[0] * 2**24 + $ip[1] * 2**16 + $ip[2] * 2**8 + $ip[3];
my @submask = split(/\//, $block);
my $ip2 = $submask[0];
my $netmask = $submask[1];
my @ip2 = split(/\./, $ip2);
$ip2 = $ip2[0] * 2**24 + $ip2[1] * 2**16 + $ip2[2] * 2**8 + $ip2[3];
if ( $ip1 >> (32-$netmask) == $ip2 >> (32-$netmask)) {
return 1;
}
return 0;
}
sub _gen_year_summary
{
my ($self, $outdir) = @_;
# Get all day subdirectory
opendir(DIR, "$outdir") or $self->localdie("ERROR: Can't read directory $outdir, $!\n");
my @dirs = grep { /^\d{4}$/ && -d "$outdir/$_" } readdir(DIR);
closedir DIR;
my %code_stat = ();
my %total_request = ();
my %total_bytes = ();
foreach my $d (@dirs) {
$code_stat{$d} = 1;
}
my $file = $outdir . '/index.html';
my $out = new IO::File;
$out->open(">$file.tmp") || $self->localdie("ERROR: Unable to open $file. $!\n");
# Print the HTML header
$self->_print_main_header(\$out);
my $colspn = 2;
$colspn = 3 if ($self->{CostPrice});
print $out qq{
<h4>$Translate{'Globals_Statistics'}</h4>
<div class="line-separator"></div>
<table class="stata">
<thead>
<tr>
<th scope="col">$Translate{'Years'}</th>
</tr>
</thead>
<tbody>
};
foreach my $year (sort {$b <=> $a} keys %code_stat) {
print $out qq{
<tr>
<td><a href="$year/index.html">$Translate{'Stat_label'} $year *</a></td>
</tr>
};
}
print $out qq{
</tbody>
</table>
<blockquote class="notification">(*) $Translate{'Click_year_stat'}</blockquote>
<h4>$Translate{'Legend'}</h4>
<div class="line-separator"></div>
<div class="displayLegend">
<span class="legendeTitle">$Translate{'Hit'}</span>: <span class="descLegend">$Translate{'Hit_help'}</span><br/>
<span class="legendeTitle">$Translate{'Miss'}</span>: <span class="descLegend">$Translate{'Miss_help'}</span><br/>
};
print $out qq{<span class="legendeTitle">$Translate{'Cost'}</span>: <span class="descLegend">$Translate{'Cost_help'} $self->{CostPrice} $self->{Currency}</span><br/>} if ($self->{CostPrice});
print $out qq{
</div>
};
$self->_print_footer(\$out);
$out->close();
rename("$file.tmp", "$file");
}
####
# Get the week day of a date
####
sub get_day_of_week
{
my ($year, $month, $day) = @_;
# %u The day of the week as a decimal, range 1 to 7, Monday being 1.
# %w The day of the week as a decimal, range 0 to 6, Sunday being 0.
#my $weekDay = POSIX::strftime("%u", gmtime timelocal_nocheck(0,0,0,$day,--$month,$year));
my $weekDay = POSIX::strftime("%u", 1,1,1,$day,--$month,$year-1900);
return $weekDay;
}
####
# Get week number
####
sub get_week_number
{
my ($year, $month, $day) = @_;
# %U The week number of the current year as a decimal number, range 00 to 53, starting with the first
# Sunday as the first day of week 01.
# %V The ISO 8601 week number (see NOTES) of the current year as a decimal number, range 01 to 53,
# where week 1 is the first week that has at least 4 days in the new year.
# %W The week number of the current year as a decimal number, range 00 to 53, starting with the first
# Monday as the first day of week 01.
# Check if the date is valide first
my $datefmt = POSIX::strftime("%F", 1, 1, 1, $day, $month - 1, $year - 1900);
if ($datefmt ne "$year-$month-$day") {
return -1;
}
my $weekNumber = POSIX::strftime("%W", 1, 1, 1, $day, $month - 1, $year - 1900);
return $weekNumber;
}
# Returns all days following the week number
sub get_wdays_per_month
{
my $wn = shift;
my ($year, $month) = split(/\-/, shift);
my @months = ();
my @retdays = ();
$month ||= '01';
push(@months, "$year$month");
if ($month eq '01') {
unshift(@months, ($year - 1) . "12");
} else {
unshift(@months, $year . sprintf("%02d", $month - 1));
}
if ($month == 12) {
push(@months, ($year+1) . "01");
} else {
push(@months, $year . sprintf("%02d", $month + 1));
}
foreach my $d (@months) {
$d =~ /^(\d{4})(\d{2})$/;
my $y = $1;
my $m = $2;
foreach my $day ("01" .. "31") {
# Check if the date is valide first
my $datefmt = POSIX::strftime("%F", 1, 1, 1, $day, $m - 1, $y - 1900);
if ($datefmt ne "$y-$m-$day") {
next;
}
my $weekNumber = POSIX::strftime("%W", 1, 1, 1, $day, $m - 1, $y - 1900);
if ( ($weekNumber == $wn) || ( ($weekNumber eq '00') && (($wn == 1) || ($wn >= 52)) ) ) {
push(@retdays, "$year-$m-$day");
return @retdays if ($#retdays == 6);
}
next if ($weekNumber > $wn);
}
}
return @retdays;
}
# Returns all days following the week number
sub get_wdays_per_year
{
my ($wn, $year, $mon) = @_;
$mon ||= '01';
my @months = ("$year$mon");
my @retdays = ();
foreach my $a ($months[0] .. "${year}12") {
push(@months, $a) if (!grep(/^$a$/, @months));
}
if ($mon == 1) {
unshift(@months, ($year - 1) . "12");
} else {
my $d = $year . sprintf("%02d", $mon - 1);
unshift(@months, $d) if (!grep(/^$d$/, @months));
}
if ($mon == 12) {
push(@months, ($year+1) . "01");
} else {
my $d = $year . sprintf("%02d", $mon + 1);
push(@months, $d) if (!grep(/^$d$/, @months));
}
foreach my $d (@months) {
$d =~ /^(\d{4})(\d{2})$/;
my $y = $1;
my $m = $2;
foreach my $day ("01" .. "31") {
# Check if the date is valide first
my $datefmt = POSIX::strftime("%F", 1, 1, 1, $day, $m - 1, $y - 1900);
if ($datefmt ne "$y-$m-$day") {
next;
}
my $weekNumber = POSIX::strftime("%W", 1, 1, 1, $day, $m - 1, $y - 1900);
#if ( ($weekNumber == $wn) || ( (($weekNumber eq '00') || ($weekNumber == 53) ) && (($wn == 1) || ($wn >= 52)) ) ) {
if ( ($weekNumber == $wn) || (($weekNumber == 0) && ($wn == 52)) || (($weekNumber == 52) && ($wn == 0)) ) {
my $time = timelocal_nocheck(0, 0, 0, $day, $m - 1, $y - 1900);
push(@retdays, $time*1000);
return @retdays if ($#retdays == 6);
}
}
}
return @retdays;
}
1;
__END__