Newer
Older
Digital_Repository / Repositories / Maps / google_map_generate_data.pl
#!/usr/bin/env perl
use strict;
use Time::HiRes qw( gettimeofday );
use CGI;
use DBI;
use Geo::IP;

my ( $start_sec, $start_micro ) = gettimeofday;
my ($start_time) = ( $start_sec * 1000 ) + round( $start_micro / 1000 );

my ($page);

# Database connection.
my ($dsn)       = "DBI:mysql:database=eprintstats;host=localhost";
my ($user_name) = "eprintstatspriv";
my ($password)  = "AuldGrizzel";
my ( $connect, $query, %types, %unmapped, $stat, $row, $num_rows, $vtype );
my ($where) = '';

# Geolocation database.
my ($gi);
my ($gidb) = '/usr/local/share/GeoIP/GeoLiteCity.dat';

# Miscellaneous variable.
my ( %cities, %IPs );
my ($num_entries) = -1;    # include all entries from database
my ($num_hits)    = 0;
my ( $ip,  $count, $location );
my ( $lat, $long,  $city ) = ( 0, 0, '' );
my ($show_only) = 'both';    # include both abstracts & downloads
my ($eprint) = '';

$page = new CGI;
print $page->header( -type => "text/xml", -Pragma => 'no-cache', -Cache-Control => 'no-cache' );
$num_entries = $page->param('top')  if ( defined $page->param('top') );
$show_only   = $page->param('show') if ( defined $page->param('show') );
$eprint   = $page->param('eprint') if ( defined $page->param('eprint') );

$gi = Geo::IP->open( $gidb, GEOIP_STANDARD )
  or die "Unable to open GeoIP database $gidb\n";

$connect = DBI->connect( $dsn, $user_name, $password, { RaiseError => 1 } );

$types{'download'}    = $types{'abstract'}    = 0;
$unmapped{'download'} = $unmapped{'abstract'} = 0;

# Set up query.
if ( $show_only eq 'both')
{
	$where = "view_type IN ('download', 'abstract')";
}
else
{
	$where = "view_type = '$show_only'";
}

$where .= " AND archiveid IN ($eprint)" unless ( $eprint eq '' );

$query = "SELECT ip, view_type, COUNT(*) AS count
	 FROM view
	 WHERE $where
	 GROUP BY ip, view_type
	 ORDER BY count DESC" . ( ( $num_entries > 0 ) ? " LIMIT $num_entries" : '' );

$stat = $connect->prepare($query);
$stat->execute();
$num_rows = $stat->rows;

if ( $num_rows > 0 )
{
	$num_entries = $num_rows if ( $num_entries < 1 );

	while ( $row = $stat->fetchrow_hashref() )
	{
		$ip    = $row->{'ip'};
		$count = $row->{'count'};
		$vtype = $row->{'view_type'};

		$IPs{$ip} = 1;

		$location = $gi->record_by_addr($ip);

		if ( defined($location) )
		{
			$lat  = $location->latitude;
			$long = $location->longitude;
			$city = (
					  ( $location->city eq '' )
					  ? 'Unknown'
					  : $location->city
			) . " ($lat, $long)";

			if ( !defined( $cities{$city} ) )
			{
				$cities{$city}{'lat'}      = $lat;
				$cities{$city}{'long'}     = $long;
				$cities{$city}{'abstract'} = 0;
				$cities{$city}{'download'} = 0;
			}
			$cities{$city}{$vtype} += $count;
			$types{$vtype}         += $count;
		}
		else
		{
			$unmapped{$vtype} += $count;
		}
	}

	# Need to wait until we have all the counts before writing the data out.
	print '<?xml version="1.0"?>' . "\n";
	print '<markers abs="'
	  . $types{'abstract'}
	  . '" dl="'
	  . $types{'download'}
	  . '" ips="'
	  . scalar( keys %IPs )
	  . '" ua="'
	  . $unmapped{'abstract'}
	  . '" ud="'
	  . $unmapped{'download'} . '">' . "\n";

	# Generate markers for each city.
	foreach $city ( keys %cities )
	{
		print '<marker city="' . $city
		  . '" lat="'
		  . $cities{$city}{'lat'}
		  . '" long="'
		  . $cities{$city}{'long'}
		  . '" abs="'
		  . $cities{$city}{'abstract'}
		  . '" dl="'
		  . $cities{$city}{'download'} . '" />' . "\n";
	}

	print "</markers>\n";
}

$stat->finish();
$connect->disconnect();

my ( $finish_sec, $finish_micro ) = gettimeofday();
my ($finish_time) = ( $finish_sec * 1000 ) + round( $finish_micro / 1000 );

open RESULTS, ">>/tmp/google_results_$num_entries.txt" or die "Argh!\n";
print RESULTS ( $finish_time - $start_time ) . "\n";
close RESULTS;

sub round
{
	my ($n) = shift;
	return int( $n + 0.5 * ( $n <=> 0 ) );
}