#!/usr/bin/perl use URI::Escape; open(LOGS, $ARGV[0] || "-") or die $!; my %urls; my %counts; while () { if (m/([0-9.]+) .*?"(?:GET|POST|HEAD)([^"]+)".*?"([^"]+)"/) { ($ip,$a,$b) = ($1,$2,$3); $a =~ s,HTTP/1.[01]$,,; if ($b ne "-" && $b !~ m/http:\/\/(www\.)?semicomplete.com/i) { #if(1) { $b = "Google: $1" if ($b =~ m/google.*?q=([^&]+)/); $b = "Yahoo: $1" if ($b =~ m/search\.yahoo.*?p=([^&]+)/); $b = "MSN: $1" if ($b =~ m/search\.msn.*?q=([^&]+)/); $b = uri_unescape($b); $b =~ s/\+/ /g; $a =~ s/^\s+//; $a =~ s,^/blog,,; #next if ($b !~ /^http/); next if ($b =~ /(bloglines|livejournal|google.com\/reader)/); push(@{$urls{$a}}, $b); $counts{$a}{$b}++; $counts{$a}{":root:"}++; } } } @urls = sort { $counts{$b}{":root:"} <=> $counts{$a}{":root:"} || $a cmp $b } keys(%urls); for my $x (@urls) { print "\n"; print "$x [".$counts{$x}{":root:"}."]\n"; my %uniq; map( $uniq{$_}++, @{$urls{$x}}); my @k = sort { $counts{$x}{$b} <=> $counts{$x}{$a} || $a cmp $b } keys(%uniq); for my $y (@k) { print " [".$counts{$x}{$y}."] $y\n"; } }