use strict;
use Getopt::Long;

our ($fmt, $uniq, %distinct, $log_format, $log_regex);
our @failed = ();

my $fields = {
	i => 'ip',
	d => 'date',
	m => 'method',
	f => 'file',
	p => 'proto',
	c => 'result',
	b => 'bytes',
	r => 'referer',
	a => 'agent',
	v => 'vhost',
};

my $log_fmts = {
	extended => qr'(.+) - - \[([^\]]+)\] "(\w+) (\S+) (\S+)" (\d+) (\S+) "([^"]+)" "([^"]+)" \d+ (\S+)',
	common =>   qr'(.+) - - \[([^\]]+)\] "(\w+) (\S+) (\S+)" (\d+) (\d+)'
};

$log_regex = $log_fmts->{'extended'};

my %options = (
	'f=s' => \$fmt,
	'fmt=s' => \$fmt,
	'l=s' => \$log_format,
	'u' => \$uniq,
	'uniq' => \$uniq,
);

GetOptions(%options);

$log_regex = (log_format and defined($log_fmts->{$log_format})) ? $log_fmts->{$log_format} : $log_fmts->{extended};

my $f = make_formatter($fmt, $uniq);

my $r;
while (<>) {
	$r = parse_row($_);	
	#if ($uniq) {
	#	$distinct{$f->($r)} += 1;
	#}
	if ( $uniq) {
		next if $distinct{$f->($r)};
		print $f->($r), "\n";
	}
	else {
		print $f->($r), "\n";
	}
}

#if ($uniq) {
#	foreach my $k ( keys %distinct ) {
#		printf ("% 5s %s\n", $distinct{$k}, $k);
#	}
#}

if (@failed) {
	print "Failed to parse " . scalar(@failed) .  " rows\n";
}

sub parse_row {
	my $text = shift;
	if ($text =~ m/$log_regex/ ) {
	
	#m/(.+) - - \[([^\]]+)\] "(\w+) (\S+) (\S+)" (\d+) (\S+) "([^"]+)" "([^"]+)" \d+ (\S+)/ ) {
		my $r = {
			ip=>$1,date=>$2,method=>$3,file=>$4,proto=>$5,
			result=>$6,bytes=>$7,referer=>$8,agent=>$9,vhost=>$10
		};
	
		
    	# deal w/ squid induced weirdness
    	if (index($r->{ip}, ',')){
    		$r->{ip} =~ m/(\d+\.\d+\.\d+\.\d+)/;
    	    $r->{ip} = $1;
    	}
		return $r;
	}
	else {
		push @failed, $text;
		return undef;
	}
}



sub make_formatter {
	my ($fmt) = @_;
	$fmt = "%i | %f" unless $fmt;
	my @fields = ();
	$fmt =~ s/%([idmfpcbravu])/push @fields, $fields->{$1}; "%s"/sgeox;
	my $sub = sub {
		my $r = shift;
		return sprintf($fmt, @$r{@fields});
	};
	return $sub;
}
