Newer
Older
Digital_Repository / Repositories / Maps / google_earth_generate_kml_bars.pl
#!/usr/bin/env perl
use strict;
# use CGI;
use DBI;
use Geo::IP;
use POSIX qw(floor);

# my ($page);

# Database connection.
my ($dsn)       = "DBI:mysql:database=eprintstats;host=localhost";
my ($user_name) = "eprintstatspub";
my ($password)  = "AuldGrizzel";
my ( $connect, $query, %types, %unmapped, $stat, $row, $num_rows, $vtype );

# Geolocation database.
my ($gi);
my ($gidb) = '/usr/local/share/GeoIP/GeoLiteCity.dat';

# Longitude adjustment lookup table. The size of a degree of longitude
# shrinks as we approach the pole, so we need to scale the size of the
# bars that we draw to compensate. The values are derived from simple
# trigonometry. One degree latitude intervals is probably overkill, but
# it was easy to generate and is probably also easier to process (we just
# take the absolute value and round to the nearest integer).
my (@long_adjust) = (
	1, 1.000149936, 1.000625031, 1.001401156, 1.002504822, 1.003938193, 1.005527222, 1.007577665, 1.009814805, 1.012548385,
	1.015451362, 1.018863602, 1.022455549, 1.026414998, 1.030723667, 1.035416774, 1.040342076, 1.045806114, 1.051493576, 1.057774136,
	1.064328263, 1.071308119, 1.07855449, 1.086459634, 1.094691064, 1.103413101, 1.112646298, 1.122444401, 1.132640933, 1.143416279,
	1.154798315, 1.166647234, 1.179193306, 1.192438327, 1.206420497, 1.220995149, 1.236155295, 1.252205744, 1.269161249, 1.286871805,
	1.30559452, 1.325133265, 1.34585379, 1.367512898, 1.390218486, 1.414290258, 1.439883436, 1.466366234, 1.494622451, 1.52427924,
	1.555922715, 1.589286423, 1.624573794, 1.661877673, 1.701368815, 1.743769606, 1.788337802, 1.836254359, 1.887432209, 1.94210986,
	2.000049973, 2.062828574, 2.130469499, 2.203424356, 2.281552845, 2.366264633, 2.459472746, 2.560324974, 2.669801881, 2.7913935,
	2.924802689, 3.072783109, 3.236797412, 3.421061629, 3.629874841, 3.865462623, 4.133753357, 4.448482828, 4.811613369, 5.242729893,
	5.759533746, 6.395493768, 7.190621631, 8.209846154, 9.579463858, 11.47777459, 14.35030477, 19.13145315, 28.71090387, 57.42180775,
	57.42180775,
);

# Miscellaneous variables.
my ( %cities, %IPs );
my ($num_entries) = -1;
my ($num_hits)    = 0;
my ( $ip,  $count, $location, $country );
my ( $lat, $long,  $city, $key ) = ( 0, 0, '', '' );
my ( $maxcolour, $red, $blue );
# A degree of latitude is always of constant length, so define it globally.
# The length of a degree of longitude changes depending on the latitude.
my ( $latsize ) = 0.075;


# $page = new CGI;
# print $page->header( -type => "text/xml", -Pragma => 'no-cache' );
$num_entries = -1;#$page->param('top');

$gi = Geo::IP->open( $gidb, GEOIP_STANDARD )
  or die "Unable to open GeoIP database $gidb\n";

$connect = DBI->connect( $dsn, $user_name, $password, { RaiseError => 1 } );

$types{'download'}    = $types{'abstract'}    = 0;
$unmapped{'download'} = $unmapped{'abstract'} = 0;
$query = "SELECT ip, view_type, country_name, COUNT(*) AS count
	 FROM view
	 GROUP BY ip, view_type, country_name
	 ORDER BY count DESC" . ( ( $num_entries > 0 ) ? " LIMIT $num_entries" : '' );

$stat = $connect->prepare($query);
$stat->execute();
$num_rows = $stat->rows;

if ( $num_rows > 0 )
{
	$num_entries = $num_rows if ( $num_entries < 1 );

	while ( $row = $stat->fetchrow_hashref() )
	{
		$ip    = $row->{'ip'};
		$count = $row->{'count'};
		$country = $row->{'country_name'};
		$country = 'New Zealand' if ( ( $country eq 'Otago Intranet' ) || ( $country eq 'Repository Admin' ) );
		$vtype = $row->{'view_type'};

		$IPs{$ip} = 1;

		$location = $gi->record_by_addr($ip);

		if ( defined($location) )
		{
			$lat  = $location->latitude;
			$long = $location->longitude;
			if ( $location->city eq '' )
			{
				$city = $country . ' (unidentified)';
				$key  = sprintf( "!%s (%f, %f)", $country, $lat, $long );
			}
			else
			{
				$city = $location->city;
				$key  = sprintf( "%s %s", $city, $country );
			}
			
			
# 			print "$key\t$city\t$country\t$lat\t$long\t$count\t$vtype\n";
			
			# If there are multiple points for the same city in
			# the same country, we accumulate the lats and longs for these
			# points and keep a track of how many points there are in total,
			# so that we can work out a weighted average latlong for the
			# city. Note that this assumes that each city name only exists
			# once within a country, i.e., it will break if there are
			# multiple cities with the same name in the same country!
			if ( !defined( $cities{$key} ) )
			{
				$cities{$key}{'name'}       = $city;
				$cities{$key}{'lat'}        = 0;
				$cities{$key}{'long'}       = 0;
				$cities{$key}{'abstract'}   = 0;
				$cities{$key}{'download'}   = 0;
				$cities{$key}{'num_points'} = 0;
			}
			$cities{$key}{'lat'}  += $lat;
			$cities{$key}{'long'} += $long;
			$cities{$key}{$vtype} += $count;
			$types{$vtype}        += $count;
			$cities{$key}{'num_points'}++;
		}
		else
		{
			$unmapped{$vtype} += $count;
		}
	}

# 	exit 0;
	
	# Average the location of multiple points for the same city, weighted
	# by the number of hits for each point.
	foreach $city ( keys %cities )
	{
		$cities{$city}{'lat'} = $cities{$city}{'lat'} / $cities{$city}{'num_points'};
		$cities{$city}{'long'} = $cities{$city}{'long'} / $cities{$city}{'num_points'};
	}

	# Need to wait until we have all the counts before writing the data out.
	print '<?xml version="1.0"?>
<kml xmlns="http://earth.google.com/kml/2.1">
	<Document>
		<name>Otago Eprints Repository: Usage Statistics</name>
		<description>Visualisation of usage statistics for the EPrints repository at the School of Business, University of Otago, Dunedin, New Zealand. Statistics are cumulative since November 17 2005.</description>
		<Folder>
			<name>Locations (identified)</name>
			<description>Locations from which the repository has been accessed. The icon is colour-coded according to the relative proportion of abstract views (blue) versus document downloads (red). Only identified locations are displayed.</description>
			<visibility>0</visibility>';

	# Plot known location icons as a separate folder so that we can turn them
	# on and off as a group.
	foreach $city ( keys %cities )
	{
		# Exclude unknown locations.
		if ( $city !~ /^!/ )
		{
			make_icon(
				$cities{$city}{'name'},
				$cities{$city}{'download'},
				$cities{$city}{'abstract'},
				$cities{$city}{'lat'},
				$cities{$city}{'long'}
			);
		}
	}
	print '
		</Folder>
		<Folder>
			<name>Locations (unidentified)</name>
			<description>Locations from which the repository has been accessed. The icon is colour-coded according to the relative proportion of abstract views (blue) versus document downloads (red). Only unidentified locations are displayed.</description>
			<visibility>0</visibility>';

	# Plot known location icons as a separate folder so that we can turn them
	# on and off as a group.
	foreach $city ( keys %cities )
	{
		# Exclude known locations.
		if ( $city =~ /^!/ )
		{
			make_icon(
				$cities{$city}{'name'},
				$cities{$city}{'download'},
				$cities{$city}{'abstract'},
				$cities{$city}{'lat'},
				$cities{$city}{'long'}
			);
		}
	}
	print '
		</Folder>
		<Folder>
			<name>Volume (known locations)</name>
			<description>The volume of abstract views (blue) and document downloads (red), represented as a bar chart. Each 1,000 metres of height represents one abstract view or document download. Only known locations are displayed.</description>
			<visibility>0</visibility>';

	# Plot volume bars for known locations as a separate folder so that
	# we can turn them on and off as a group.
	foreach $city ( keys %cities )
	{
		# Exclude unknown locations because they mess up the display somewhat.
		if ( $city !~ /^!/ )
		{
			make_bar(
				$cities{$city}{'name'},
				$cities{$city}{'download'},
				$cities{$city}{'lat'},
				$cities{$city}{'long'},
				'download'
			);
			make_bar(
				$cities{$city}{'name'},
				$cities{$city}{'abstract'},
				$cities{$city}{'lat'},
				$cities{$city}{'long'},
				'abstract'
			);
		}
	}

	print '
		</Folder>
		<Folder>
			<name>Volume (unknown locations)</name>
			<description>The volume of abstract views (blue) and document downloads (red), represented as a bar chart. Each 1,000 metres of height represents one abstract view or document download. Only unknown locations are displayed.</description>
			<visibility>0</visibility>';

	# Plot volume bars for unknown locations as a separate folder so that
	# we can turn them on and off as a group.
	foreach $city ( keys %cities )
	{
		# Include unknown locations only.
		if ( $city =~ /^!/ )
		{
			make_bar(
				$cities{$city}{'name'},
				$cities{$city}{'download'},
				$cities{$city}{'lat'},
				$cities{$city}{'long'},
				'download'
			);
			make_bar(
				$cities{$city}{'name'},
				$cities{$city}{'abstract'},
				$cities{$city}{'lat'},
				$cities{$city}{'long'},
				'abstract'
			);
		}
	}

	print '
		</Folder>
	</Document>
</kml>
';
}

$stat->finish();
$connect->disconnect();

sub round
{
	my ( $n ) = shift;
	return int( $n + 0.5 * ( $n <=> 0 ) );
}


sub make_icon
{
	my ( $name, $download, $abstract, $lat, $long ) = @_;
	my ($maxcolour) = $download + $abstract;
	my ($red)       = round( $download / $maxcolour * 255 );
	my ($blue)      = round( $abstract / $maxcolour * 255 );

	print '
			<Placemark>
				<name>'
					. $name
					. '</name>
				<visibility>0</visibility>
				<description>
					<![CDATA[<span style="color:red;">'
					. $download
					. ' download'
					. ( ( $download != 1 ) ? 's' : '' )
					. '</span>, <span style="color:blue;">'
					. $abstract
					. ' abstract'
					. ( ( $abstract != 1 ) ? 's' : '' )
					. '</span>]]>
				</description>
				<Style>
					<IconStyle>
						<color>'
							. sprintf( 'ff%02x00%02x', $blue, $red )
							. '</color>
						<Icon>
							<href>root://icons/palette-4.png</href>
							<x>96</x>
							<y>128</y>
							<w>32</w>
							<h>32</h>
						</Icon>
					</IconStyle>
				</Style>
				<LookAt>
					<longitude>'
						. $long
						. '</longitude>
					<latitude>'
						. $lat
						. '</latitude>
					<range>540.68</range>
					<tilt>0</tilt>
					<heading>3</heading>
				</LookAt>
				<Point>
					<coordinates>'
						. $long . ','
						. $lat
						. ',0</coordinates>
				</Point>
			</Placemark>';
}


sub make_bar
{
	my ( $name, $count, $lat, $long, $type ) = @_;
	
	# Work out the longitudinal width of the bar adjusted for the latitude.
	# The latitude size is constant.
	my ( $longsize ) = latitude_adjust( $lat, $latsize * 2 );
	
	# A download bar starts $longsize degrees to the west of $long.
	# An abstract bar starts at $long.
	$long -= $longsize if ( $type eq 'download' );
	
	print '
			<Placemark>
				<name>'
					. $name
					. '</name>
				<visibility>0</visibility>
				<description>
					<![CDATA[<span style="color:'
					. ( ( $type eq 'download' ) ? 'red' : 'blue' )
					. ';">'
					. $count
					. ' '
					. $type
					. ( ( $count != 1 ) ? 's' : '' )
					. '</span>]]>
				</description>
				<Style>
					<PolyStyle>
						<color>c0'
							. ( ( $type eq 'abstract' ) ? 'ff' : '00' ) 
							. '00'
							. ( ( $type eq 'download' ) ? 'ff' : '00' )
							. '</color>
					</PolyStyle>
				</Style>
				<Polygon>
					<extrude>1</extrude>
					<altitudeMode>relativeToGround</altitudeMode>
					<outerBoundaryIs>
						<LinearRing>
							<coordinates>
								' . $long               . ',' . ($lat - $latsize) . ',' . ($count * 1000) . '
								' . $long               . ',' . ($lat + $latsize) . ',' . ($count * 1000) . '
								' . ($long + $longsize) . ',' . ($lat + $latsize) . ',' . ($count * 1000) . '
								' . ($long + $longsize) . ',' . ($lat - $latsize) . ',' . ($count * 1000) . '
								' . $long               . ',' . ($lat - $latsize) . ',' . ($count * 1000) . '
							</coordinates>
						</LinearRing>
					</outerBoundaryIs>
				</Polygon>
			</Placemark>';
}


sub latitude_adjust
{
	my ( $lat, $angle ) = @_;
	
	$lat = round( abs( $lat ) );
	return ( $long_adjust[$lat] * $angle );
}