#!/usr/bin/perl

use Date::Manip;
use strict;

my $ALWAYS_EXCLUDE = '';

my $TIME_SPEC = $ARGV[0];

$TIME_SPEC = UnixDate($TIME_SPEC, "%D");

my $USAGE = <<EOF;
Usage:   $0 day_spec [-n N] [-e domain1,domain2,...,domainN] \\
                     [-s NNN] [-m EXPR] [domain.one ... domain.nnn]
Where:
    day_spec: Date::Manip compatible date expression
              (Examples: 'today', 'yesterday', '12/15/2003')
          -n: Limit number of sites output to N
          -h: This screen
          -e: Exclude comma-separated list of domain names
              from output.
          -m: Limit count to URLs that match EXPR
          -s: Limit count to URLs that generated a status NNN

#  Top 10 hit site hit counts for yesterday
$0 yesterday -n 10

#  Show #s of 404 counts for all sites except foo.com and bar.org
$0 today -s 404 -e foo.com,bar.org

EOF

if (! $TIME_SPEC) {
    print "Invalid time specification `$ARGV[0]'!\n";
    print $USAGE;
    exit 1;
}

if (grep(/^(?:-h|--help)$/, @ARGV)) { 
    print $USAGE;
    exit 1;
}

my $ARGS = join(' ', @ARGV);
my $LIMIT = ($ARGS =~ m/\s+-n\s*(\d+)/)[0];
my $STATUS = ($ARGS =~ m/\s+-s\s*(\S+)/)[0];
my $MATCH = ($ARGS =~ m/\s+-m\s+(\S+)/)[0] || '.';
my $EXCLUDE = join('|', split(',', ($ARGS =~ m/\s+-e\s*(\S+)/)[0]));
my $ONLY = join('|', (grep(/\S+\.\S+/, @ARGV)));

if ($EXCLUDE) {
    $EXCLUDE .= "|$ALWAYS_EXCLUDE" if $ALWAYS_EXCLUDE;
} else {
    $EXCLUDE = $ALWAYS_EXCLUDE if $ALWAYS_EXCLUDE;
}

my $CMD = "/usr/local/sbin/vsearch_access_logs " .
          "--hide-action --hide-status " .
          "--hide-host --hide-date --hide-match " .
          qq!--on-day "$TIME_SPEC"!;

$CMD.= " --status '$STATUS'" if $STATUS;
$CMD.= " '$MATCH'" if $MATCH;

open(CMD, "$CMD |") ||
   die "Can't open read pipe with $CMD: $!\n";

my %HITS;
my $DOMAIN;

print <<EOF;
===============================================================
                   REPORT FOR DATE: $TIME_SPEC
DOMAIN                                            HITS    PCT %
===============================================================
EOF

my $TOTAL;

while (<CMD>) {
   s/,//go; 
   my @f = split(" ", $_);
   if (/Query/) {
       $DOMAIN = $f[4];
   }
   if (scalar(@f) == 1 && $f[0] =~ /\d+/) {
       if ($ONLY) {
           next unless $DOMAIN =~ /$ONLY/;
       }
       if ($EXCLUDE) {
           next if $DOMAIN =~ /$EXCLUDE/;
       }
       $HITS{$DOMAIN} = $f[0];
       $TOTAL += $f[0];
   }
}

close(CMD);

my $PCT;

my $SITE_LIMIT = $LIMIT;

my $REPORT_HITS;

for my $site (sort { $HITS{$b} <=> $HITS{$a} } keys %HITS) {
    my $pct = ($HITS{$site}/$TOTAL)*100.00;
    printf "%-40s    %10d   %6.2f\n", $site, $HITS{$site}, $pct;
    $PCT += $pct;
    $REPORT_HITS += $HITS{$site};
    if ($SITE_LIMIT) {
        $LIMIT--;
        last if $LIMIT == 0;
    }
}

print <<EOF;
===============================================================
EOF

if ($SITE_LIMIT != 0) {
    printf "%-40s    %10d   %6.2f\n", 
           "NUMBER OF HITS/% TOTAL HITS THIS REPORT", 
           $REPORT_HITS, $PCT;
    print "---------------------------------------------------------------\n";
}


printf "%-40s    %10d   %6.2f\n", 
       "TOTAL HITS/TOTAL %", $TOTAL, 100.00;

print <<EOF;
===============================================================
EOF

exit 0;