Newer
Older
Digital_Repository / Repositories / Maps / gd_map.pl
#!/usr/bin/env perl
use strict;
use Time::HiRes qw( gettimeofday );
use CGI;
use DBI;
use GD;
use Geo::IP;
use Geo::Proj4;

my ( $start_sec, $start_micro ) = gettimeofday;
my ($start_time) = ( $start_sec * 1000 ) + round( $start_micro / 1000 );

my ($page);

# Database connection.
my ($dsn)       = "DBI:mysql:database=eprintstats;host=localhost";
my ($user_name) = "eprintstatspriv";
my ($password)  = "AuldGrizzel";
my ( $connect, $query, %types, %unmapped, $stat, $row, $num_rows, $vtype );
my ($where) = '';

# GD image stuff.
my ( $mapimage, $mapimagefile );
my ( $red,      $white, $black, $blue, $tc );
my ( $width,    $height );

# Geolocation database.
my ( $gi, $proj );
my ($gidb) = '/usr/local/share/GeoIP/GeoLiteCity.dat';

# Miscellaneous variables.
my ($x_offset) = 16986796.16;
my ($y_offset) = 8615499.05;
my ($max_x)    = $x_offset * 2;
my ($max_y)    = $y_offset * 2;
my ( %cities, %IPs );
my ($num_entries) = -1;    # include all entries from database
my ($num_hits)    = 0;
my ( $ip, $count, $location );
my ( $city, $lat, $long, $x, $y ) = ( 0, 0, '', 0, 0 );
my ($show_only) = 'both';    # include both abstracts & downloads
my ($eprint) = '';

$page = new CGI;
print $page->header(
					 -type          => "image/jpeg",
					 -Pragma        => 'no-cache',
					 -Cache-Control => 'no-cache'
);
$num_entries = $page->param('top')  if ( defined $page->param('top') );
$show_only   = $page->param('show') if ( defined $page->param('show') );
$eprint   = $page->param('eprint') if ( defined $page->param('eprint') );

$gi = Geo::IP->open( $gidb, GEOIP_STANDARD )
  or die "Unable to open GeoIP database $gidb\n";

$proj = Geo::Proj4->new( proj => "robin", ellps => "sphere", lon_0 => 10 )
  or die "parameter error: " . Geo::Proj4->error . "\n";

$width        = $page->param('width');
$height       = $page->param('height');
$mapimagefile = "/Users/nstanger/Sites/maps/map_${width}x${height}.png";
$mapimage     = GD::Image->newFromPng( $mapimagefile, 1 );
$white        = $mapimage->colorAllocate( 255, 255, 255 );
$black        = $mapimage->colorAllocate( 0, 0, 0 );
$red          = $mapimage->colorAllocate( 255, 0, 0 );
$blue         = $mapimage->colorAllocate( 0, 0, 255 );
for ( my $i = 1; $i < 255; $i++ )
{
	$tc = $mapimage->colorAllocate( $i, 0, ( 255 - $i ) );
}

$connect = DBI->connect( $dsn, $user_name, $password, { RaiseError => 1 } );

$types{'download'}    = $types{'abstract'}    = 0;
$unmapped{'download'} = $unmapped{'abstract'} = 0;

# Set up query.
if ( $show_only eq 'both')
{
	$where = "view_type IN ('download', 'abstract')";
}
else
{
	$where = "view_type = '$show_only'";
}

$where .= " AND archiveid IN ($eprint)" unless ( $eprint eq '' );

$query = "SELECT ip, view_type, COUNT(*) AS count
	 FROM view
	 WHERE $where
	 GROUP BY ip, view_type
	 ORDER BY count DESC" . ( ( $num_entries > 0 ) ? " LIMIT $num_entries" : '' );

$stat = $connect->prepare($query);
$stat->execute();
$num_rows = $stat->rows;

if ( $num_rows > 0 )
{
	$num_entries = $num_rows if ( $num_entries < 1 );

	while ( $row = $stat->fetchrow_hashref() )
	{
		$ip    = $row->{'ip'};
		$count = $row->{'count'};
		$vtype = $row->{'view_type'};

		$IPs{$ip} = 1;

		$location = $gi->record_by_addr($ip);

		if ( defined($location) )
		{
			$lat  = $location->latitude;
			$long = $location->longitude;
			$city = (
					  ( $location->city eq '' )
					  ? 'Unknown'
					  : $location->city
			  )
			  . " ($lat, $long)";

			( $x, $y ) = $proj->forward( $lat, $long );
			$x = round( ( $x + $x_offset ) / $max_x * $width );
			$y = round( ( $y_offset - $y ) / $max_y * $height );

			if ( !defined( $cities{$city} ) )
			{
				$cities{$city}{'lat'}      = $lat;
				$cities{$city}{'long'}     = $long;
				$cities{$city}{'abstract'} = 0;
				$cities{$city}{'download'} = 0;
				$cities{$city}{'count'}    = 0;
			}
			$cities{$city}{$vtype}  += $count;
			$cities{$city}{'count'} += $count;
			$types{$vtype}          += $count;

			$cities{$city}{'x'} = $x;
			$cities{$city}{'y'} = $y;
		}
		else
		{
			$unmapped{$vtype} += $count;
		}
	}
}

$stat->finish();
$connect->disconnect();

# Generate dots for each city.
CITY: foreach $city ( keys %cities )
{
	if ( $show_only eq 'both' )
	{
		# Blend colour according to the ratio of abstracts to downloads.
		$tc = $mapimage->colorClosest(
			round( $cities{$city}{'download'} / $cities{$city}{'count'} * 255 ),
			0,
			round( $cities{$city}{'abstract'} / $cities{$city}{'count'} * 255 )
		);
	}
	elsif ( $show_only eq 'download' )
	{
		next CITY if ( $cities{$city}{'download'} == 0 );
		$tc = $red;
	}
	elsif ( $show_only eq 'abstract' )
	{
		next CITY if ( $cities{$city}{'abstract'} == 0 );
		$tc = $blue;
	}
	else    # ack, boom
	{
		last CITY;
	}
	$mapimage->filledRectangle(
								$cities{$city}{'x'} - 1,
								$cities{$city}{'y'} - 1,
								$cities{$city}{'x'} + 1,
								$cities{$city}{'y'} + 1,
								$tc
	);
}

# Output summary data.
if ( ( $show_only eq 'both' ) || ( $show_only eq 'download' ) )
{
	$mapimage->string( gdSmallFont,
					   3, 3,
					   "$types{'download'} downloads"
						 . (
							 ( $unmapped{'download'} > 0 )
							 ? " (+$unmapped{'download'} unmappable)"
							 : ''
						 ),
					   $red
	);
}
if ( ( $show_only eq 'both' ) || ( $show_only eq 'abstract' ) )
{
	$mapimage->string( gdSmallFont,
					   3, 15,
					   "$types{'abstract'} abstracts"
						 . (
							 ( $unmapped{'abstract'} > 0 )
							 ? " (+$unmapped{'abstract'} unmappable)"
							 : ''
						 ),
					   $blue
	);
}
$mapimage->string( gdSmallFont, 3, 27,
				   'from ' . scalar( keys %cities ) . ' cities', $black );
$mapimage->string( gdSmallFont, 3, 39,
				   '(' . scalar( keys %IPs ) . ' IP addresses)', $black );

my ( $finish_sec, $finish_micro ) = gettimeofday();
my ($finish_time) = ( $finish_sec * 1000 ) + round( $finish_micro / 1000 );
$mapimage->string( gdSmallFont, 3,
				   $height - 15,
				   'Map generated in ' . ( $finish_time - $start_time ) . ' ms',
				   $black );

open RESULTS, ">>/tmp/gd_results_$num_entries.txt" or die "Argh!\n";
print RESULTS ( $finish_time - $start_time ) . "\n";
close RESULTS;

binmode(STDOUT);

print $mapimage->jpeg();

sub round
{
	my ($n) = shift;
	return int( $n + 0.5 * ( $n <=> 0 ) );
}