View Single Post
  #6  
Old 02-08-2006, 11:07 AM
Syzygies Syzygies is offline
Registered User
 
Join Date: Jan 2006
Posts: 23
Sure, most forums are 99% lurkers, and the code took a bit of work, just curious. I see how many people read my post, and they can study source to figure out who I am.

Here's a simple Perl script that can extract this kind of information from my apache server. No idea how portable the code is, but it works for me:

Code:
#!/usr/bin/perl
use warnings;
use strict;

# apachelog.pl  --  Perl script for extracting reports from /usr/local/apache/logs/access_log
#
# Copyright (c) 2006 Dave Bayer
# Subject to the terms and conditions of the MIT License.

(my $help = <<'EOF') =~ s/^: ?//mg;
:
: Usage:
:
: 	apachelog.pl count path
:		count distinct accesses
:		(in all, how many distinct visitors?)
:
: 	apachelog.pl who path
:		list distinct accesses by caller
:		(who were the visitors?)
:		
: 	apachelog.pl when path
:		list distinct accesses by count for each date
:		(for each date, how many distinct visitors?)
:		
: 	apachelog.pl what path
:		list distinct accesses by ascending count
:		(for each distinct visitor, what did they access?)
:
:	apachelog.pl first path
:		list distinct first accesses by ascending count
:		(for each distinct visitor, what did they access first?)
:
: Examples:
:
:	apachelog.pl what ''
:	apachelog.pl when bayer/coffee
:
EOF

my $get = ' "GET /(?:~|%[0-9a-fA-F]{2})*';

sub openlog {
	open LOG, "</usr/local/apache/logs/access_log" or die "unable to read /usr/local/apache/logs/access_log\n";
}

sub convertdate {
	local $_;
	($_) = @_;
	
	s/(J|j)an[a-z]*/01/;
	s/(F|f)eb[a-z]*/02/;
	s/(M|m)ar[a-z]*/03/;
	s/(A|a)pr[a-z]*/04/;
	s/(M|m)ay[a-z]*/05/;
	s/(J|j)un[a-z]*/06/;
	s/(J|j)ul[a-z]*/07/;
	s/(A|a)ug[a-z]*/08/;
	s/(S|s)ep[a-z]*/09/;
	s/(O|o)ct[a-z]*/10/;
	s/(N|n)ov[a-z]*/11/;
	s/(D|d)ec[a-z]*/12/;
	s:(\d*)/(\d*)/(\d*):$3-$2-$1:;
	$_;
}

sub who {
	my ($match) = @_;
	my (%who, $addr);

	openlog();
	while (<LOG>) {
		/$get$match/ or next;
		($addr = $_) =~ s/^(\S*)\s.*\n/$1/;
		$who{$addr}++;
	}
	close LOG;
	%who;
}

sub when {
	my ($match) = @_;
	my (%when, $date, $addr);

	openlog();
	while (<LOG>) {
		/$get$match/ or next;
		($date = $_) =~ s/^[^[]*\[([^:]*):.*\n/$1/;
		($addr = $_) =~ s/^(\S*)\s.*\n/$1/;
		$when{convertdate($date)}{$addr}++;
	}
	close LOG;
	%when;
}

sub what {
	my ($match) = @_;
	my (%what, $file, $addr);

	openlog();
	while (<LOG>) {
		/$get$match/ or next;
		($file = $_) =~ s/^.*?$get($match\/?[^&#%\?\/\s]*).*\n/$1/;
		$file =~ s/\/$//;
		($addr = $_) =~ s/^(\S*)\s.*\n/$1/;
		$what{$file}{$addr}++;
	}
	close LOG;
	%what;
}

sub first {
	my ($match) = @_;
	my (%start, %first, $file, $addr);

	openlog();
	while (<LOG>) {
		/$get$match/ or next;
		($file = $_) =~ s/^.*?$get($match\/?[^&#%\?\/\s]*).*\n/$1/;
		$file =~ s/\/$//;
		($addr = $_) =~ s/^(\S*)\s.*\n/$1/;
		if (defined $start{$addr}) { next; }
		$start{$addr} = $file;
	}
	close LOG;
	foreach $addr (keys %start) {
		$first{$start{$addr}}++;
	}
	%first;
}

sub count_cmd {
	my %who = who(@_);
	printf "%d\n", scalar keys %who;
}

sub who_cmd {
	my %who = who(@_);
	foreach my $addr (sort keys %who) {
		print "$addr\n";
	}
}

sub when_cmd {
	my %when = when(@_);
	foreach my $date (sort keys %when) {
		printf "%8d  %s\n", scalar keys %{$when{$date}}, $date;
	}
}

sub what_cmd {
	my %what = what(@_);
	my @list;
	foreach my $file (sort keys %what) {
		push @list, sprintf "%8d  %s\n", scalar keys %{$what{$file}}, $file;
	}
	print sort @list;
}

sub first_cmd {
	my %first = first(@_);
	my @list;
	foreach my $file (sort keys %first) {
		push @list, sprintf "%8d  %s\n", $first{$file}, $file;
	}
	print sort @list;
}

my %run = (
	'count' => \&count_cmd,
	'who' => \&who_cmd,
	'when' => \&when_cmd,
	'what' => \&what_cmd,
	'first' => \&first_cmd,
);

my ($do, $match) = @ARGV;
if ($#ARGV != 1 or not $run{$do}) { 
		print $help;
		exit(1);
}
$run{$do}($match);
Reply With Quote