Newer
Older
Digital_Repository / Repositories / Maps / css_map.pl
#!/usr/bin/env perl
use strict;
use Time::HiRes qw( gettimeofday );
use CGI;
use DBI;
use Geo::IP;
use Geo::Proj4;

my ( $start_sec, $start_micro ) = gettimeofday;
my ($start_time) = ( $start_sec * 1000 ) + round( $start_micro / 1000 );

my ($page);

# Database connection.
my ($dsn)       = "DBI:mysql:database=eprintstats;host=localhost";
my ($user_name) = "eprintstatspriv";
my ($password)  = "AuldGrizzel";
my ( $connect, $query, %types, %unmapped, $stat, $row, $num_rows, $vtype );
my ($where) = '';

# Geolocation database.
my ( $gi, $proj );
my ($gidb) = '/usr/local/share/GeoIP/GeoLiteCity.dat';

# Miscellaneous variables.
my ( $width, $height );
my ($x_offset) = 16986796.16;
my ($y_offset) = 8615499.05;
my ($max_x)    = $x_offset * 2;
my ($max_y)    = $y_offset * 2;
my ( %cities, %IPs );
my ($num_entries) = -1;    # include all entries from database
my ($num_hits)    = 0;
my ( $ip,   $count, $location );
my ( $city, $lat,   $long, $x, $y ) = ( 0, 0, '', 0, 0 );
my ($show_only) = 'both';    # include both abstracts & downloads
my ($eprint) = '';
my ($tc) = '#000000';

$page = new CGI;
print $page->header( -type => "text/html", -Pragma => 'no-cache', -Cache-Control => 'no-cache' );
$num_entries = $page->param('top')  if ( defined $page->param('top') );
$show_only   = $page->param('show') if ( defined $page->param('show') );
$eprint   = $page->param('eprint') if ( defined $page->param('eprint') );

$gi = Geo::IP->open( $gidb, GEOIP_STANDARD )
  or die "Unable to open GeoIP database $gidb\n";

$proj = Geo::Proj4->new( proj => "robin", ellps => "sphere", lon_0 => 10 )
  or die "parameter error: " . Geo::Proj4->error . "\n";

$width  = $page->param('width');
$height = $page->param('height');

$connect = DBI->connect( $dsn, $user_name, $password, { RaiseError => 1 } );

$types{'download'}    = $types{'abstract'}    = 0;
$unmapped{'download'} = $unmapped{'abstract'} = 0;

# Set up query.
if ( $show_only eq 'both')
{
	$where = "view_type IN ('download', 'abstract')";
}
else
{
	$where = "view_type = '$show_only'";
}

$where .= " AND archiveid IN ($eprint)" unless ( $eprint eq '' );

$query = "SELECT ip, view_type, COUNT(*) AS count
	 FROM view
	 WHERE $where
	 GROUP BY ip, view_type
	 ORDER BY count DESC" . ( ( $num_entries > 0 ) ? " LIMIT $num_entries" : '' );

$stat = $connect->prepare($query);
$stat->execute();
$num_rows = $stat->rows;

if ( $num_rows > 0 )
{
	$num_entries = $num_rows if ( $num_entries < 1 );

	while ( $row = $stat->fetchrow_hashref() )
	{
		$ip    = $row->{'ip'};
		$count = $row->{'count'};
		$vtype = $row->{'view_type'};

		$IPs{$ip} = 1;

		$location = $gi->record_by_addr($ip);

		if ( defined($location) )
		{
			$lat  = $location->latitude;
			$long = $location->longitude;
			$city = (
					  ( $location->city eq '' )
					  ? 'Unknown'
					  : $location->city
			) . " ($lat, $long)";
			
			( $x, $y ) = $proj->forward( $lat, $long );
			$x = round( ( $x + $x_offset ) / $max_x * $width );
			$y = round( ( $y_offset - $y ) / $max_y * $height );

			if ( !defined( $cities{$city} ) )
			{
				$cities{$city}{'lat'}      = $lat;
				$cities{$city}{'long'}     = $long;
				$cities{$city}{'abstract'} = 0;
				$cities{$city}{'download'} = 0;
				$cities{$city}{'count'}    = 0;
			}
			$cities{$city}{$vtype} += $count;
			$cities{$city}{'count'} += $count;
			$types{$vtype}         += $count;

			$cities{$city}{'x'} = $x;
			$cities{$city}{'y'} = $y;
		}
		else
		{
			$unmapped{$vtype} += $count;
		}
	}
}

$stat->finish();
$connect->disconnect();

# Generate dots for each city.
CITY: foreach $city ( keys %cities )
{
	if ( $show_only eq 'both' )
	{
		# Blend colour according to the ratio of abstracts to downloads.
		$tc = sprintf( '#%02x00%02x',
			round( $cities{$city}{'download'} / $cities{$city}{'count'} * 255 ),
			round( $cities{$city}{'abstract'} / $cities{$city}{'count'} * 255 )
		);
	}
	elsif ( $show_only eq 'download' )
	{
		next CITY if ( $cities{$city}{'download'} == 0 );
		$tc = '#ff0000';
	}
	elsif ( $show_only eq 'abstract' )
	{
		next CITY if ( $cities{$city}{'abstract'} == 0 );
		$tc = '#0000ff';
	}
	else    # ack, boom
	{
		last CITY;
	}
	print
	  '<div style="position:absolute; width:3px; height:3px; left:'
	  . ( $cities{$city}{'x'} - 1 )
	  . 'px; top:'
	  . ( $cities{$city}{'y'} - 1 )
	  . 'px; background-color:'
	  . $tc
	  . ';"></div>' . "\n";
}

# Output summary data.
print '<div style="position:absolute; left:1px; top:1px; font-size:small;">';
if ( ( $show_only eq 'both' ) || ( $show_only eq 'download' ) )
{
	print '<span style="color:red;">'
	  . $types{'download'}
	  . ' downloads'
	  . (
		  ( $unmapped{'download'} > 0 ) ? " (+$unmapped{'download'} unmappable)"
		  : ''
	  )
	  . '</span>';
}
print "<br />\n";
if ( ( $show_only eq 'both' ) || ( $show_only eq 'abstract' ) )
{
	print '<span style="color:blue;">'
	  . $types{'abstract'}
	  . ' abstracts'
	  . (
		  ( $unmapped{'abstract'} > 0 ) ? " (+$unmapped{'abstract'} unmappable)"
		  : ''
	  )
	  . '</span>';
}
print "<br />\nfrom "
  . scalar( keys %cities )
  . ' cities<br />('
  . scalar( keys %IPs )
  . ' IP addresses)</div>' . "\n";

my ( $finish_sec, $finish_micro ) = gettimeofday();
my ($finish_time) = ( $finish_sec * 1000 ) + round( $finish_micro / 1000 );

print '<div style="position:absolute; left:1px; bottom:1px; font-size:small;">';
print 'Map generated in ' . ( $finish_time - $start_time ) . ' ms</div>' . "\n";

open RESULTS, ">>/tmp/css_results_$num_entries.txt" or die "Argh!\n";
print RESULTS ( $finish_time - $start_time ) . "\n";
close RESULTS;

sub round
{
	my ($n) = shift;
	return int( $n + 0.5 * ( $n <=> 0 ) );
}