#!/usr/bin/perl -w

# Usage: scanlog.pl <ASCII-single-day's-log>

# David Harris, Version of 17 Aug 2005 pm 5:30

# Goes thru raw log lines, isolating "visits" which are all hits by a
#  single IP address use within 30 minutes of the previous last such hit.


# Where is the single log?:
# This hard coded location is the only line that needs to be changed for new loc'n.
$BASEDIR = "C:\\Documents and Settings\\User 2\\My Documents\\DDLogs";

#-------------------------------------------------------------------------

# MAIN ROUTINE starts here.

# Check the number of arguments on the command line:
if ($#ARGV+1 != 2) {
  die "scanlog.pl needs dated folder name as argument";
  };

# Dated folder contains darwinday.org:
$TODAYSFILE = "$BASEDIR\\$ARGV[1]\\darwinday.org";
# Check existence of the named log file file:
(open TODAYSFILE, "<$TODAYSFILE") or die "scanlog.pl $TODAYSFILE not found.\n";

$n = 0;  # Count total lines
#Read log lines, putting into hash of arrays:
while ($inputline = <TODAYSFILE>) {
  # IP value [hash index]
  ($ipadr = $inputline) =~
           s/^([0-9]+)\.([0-9]+)\.([0-9]+)\.([0-9]+).+$/ip_$1_$2_$3_$4/ ;
  chomp($ipadr);
  # Clock/calendar time of hit:  [0]
  ($hittime = $inputline) =~ s/^.+\[(.+) -0400].+$/$1/ ;
  chomp($hittime);
  ($day, $hr, $min, $sec) = split(":", $hittime);
  # Time of hit in seconds since midnight = 0 [2]:
  $hitsec = (3600*$hr) + (60*$min) + $sec;
  # A hit loading:
  ($hitloading = $inputline) =~ s/^[^"]+"([^ ]+ [^ ]+ ).+$/$1/ ;
  chomp($hitloading);
    # Result code:
  ($resultcd = $inputline) =~
           s/^.+".+" (\d+) .+$/$1/ ;
  chomp($resultcd);  
  ($referrer = $inputline) =~ s/^.+".+".+"(.+)".*$/$1/ ;
  chomp($referrer);
# --------------------------------
  # Increment # of hits for this IPadr:

  if ( not exists $hitcounter{$ipadr} ) {
    $firsthits{$ipadr} = $hitsec ;
    $hitcounter{$ipadr} = 0;
  }
  $delay = $hitsec - $firsthits{$ipadr} ;
  @event= ( $hittime, $hitsec, $delay,
                    $hitloading, $referrer );
  $events{$ipadr}[$hitcounter{$ipadr}] = [ @event ];
  
  print "For $ipadr, at @{ $events{$ipadr}[$hitcounter{$ipadr}]}[0], a delay of @{ $events{$ipadr}[$hitcounter{$ipadr}]}[2] sec.,\n";
  print "command was @{ $events{$ipadr}[$hitcounter{$ipadr}]}[3] \n" ;
  
  #print "For $ipadr, at $events{$ipadr}[$hitcounter][0], a delay of $events{$ipadr}[$hitcounter][2] sec.,\n";
  #print "command was $events{$ipadr}[$hitcounter][3] \n" ;
      
  $hitcounter{$ipadr} = $hitcounter{$ipadr} +1 ;

  $n++;
} # end of while loop

close TODAYSFILE;  # Whole log is now read
$EVENTSFILE = "$BASEDIR\\$ARGV[1]\\darwinday.sorted";
(open EVENTSFILE, ">$EVENTSFILE") or die "scanlog.pl $TODAYSFILE not found.\n";
for $ipadr (sort keys %events) {
  for ( $i=0; $i < $hitcounter{$ipadr}; $i++) {
    print "$ipadr: @{ $events{$ipadr}[$i]} \n";
    print EVENTSFILE "$ipadr: @{ $events{$ipadr}[$i]} \n";
  }
  print EVENTSFILE "\n";  # Separate the users (Could print a per user summary here).
  print  "\n";  # Separate the users (Could print a per user summary here).
}
print EVENTSFILE "(scalar keys %events) IPadrs occurred.\n";  # Summaries over all the IP groups (roughly 1 user each?)
print "(scalar keys %events) IPadrs occurred.\n";  # Summaries over all the IP groups (roughly 1 user each?)

close EVENTSFILE;
exit;
# End of program
