Newer
Older
Digital_Repository / Repositories / Maps / Timeline / Eprints_Data_Google.pl
#!/usr/bin/env perl
use strict;

# use CGI;
use DBI;
use Geo::IP;
use POSIX qw(floor);

# my ($page);
# Database connection.
my ($dsn)       = "DBI:mysql:database=eprintstats;host=127.0.0.1";
my ($user_name) = "eprintstatspriv";
my ($password)  = "AuldGrizzel";
my ( $connect, $query, %types, %unmapped, $stat, $row, $num_rows, $vtype );

# Geolocation database.
my ($gi);
my ($gidb) = '/usr/local/share/GeoIP/GeoLiteCity.dat';

# Longitude adjustment lookup table. The size of a degree of longitude
# shrinks as we approach the pole, so we need to scale the size of the
# bars that we draw to compensate. The values are derived from simple
# trigonometry. One degree latitude intervals is probably overkill, but
# it was easy to generate and is probably also easier to process (we just
# take the absolute value and round to the nearest integer).
my (@long_adjust) = (
					  1,           1.000149936, 1.000625031, 1.001401156,
					  1.002504822, 1.003938193, 1.005527222, 1.007577665,
					  1.009814805, 1.012548385, 1.015451362, 1.018863602,
					  1.022455549, 1.026414998, 1.030723667, 1.035416774,
					  1.040342076, 1.045806114, 1.051493576, 1.057774136,
					  1.064328263, 1.071308119, 1.07855449,  1.086459634,
					  1.094691064, 1.103413101, 1.112646298, 1.122444401,
					  1.132640933, 1.143416279, 1.154798315, 1.166647234,
					  1.179193306, 1.192438327, 1.206420497, 1.220995149,
					  1.236155295, 1.252205744, 1.269161249, 1.286871805,
					  1.30559452,  1.325133265, 1.34585379,  1.367512898,
					  1.390218486, 1.414290258, 1.439883436, 1.466366234,
					  1.494622451, 1.52427924,  1.555922715, 1.589286423,
					  1.624573794, 1.661877673, 1.701368815, 1.743769606,
					  1.788337802, 1.836254359, 1.887432209, 1.94210986,
					  2.000049973, 2.062828574, 2.130469499, 2.203424356,
					  2.281552845, 2.366264633, 2.459472746, 2.560324974,
					  2.669801881, 2.7913935,   2.924802689, 3.072783109,
					  3.236797412, 3.421061629, 3.629874841, 3.865462623,
					  4.133753357, 4.448482828, 4.811613369, 5.242729893,
					  5.759533746, 6.395493768, 7.190621631, 8.209846154,
					  9.579463858, 11.47777459, 14.35030477, 19.13145315,
					  28.71090387, 57.42180775, 57.42180775,
);

# Miscellaneous variables.
my ( %cities, %IPs );
my ($num_entries) = -1;
my ($num_hits)    = 0;
my ( $ip, $count, $location, $country, $year, $month );
my ( $lat, $long, $city, $key ) = ( 0, 0, '', '' );
my ( $maxcolour, $red, $blue );

# A degree of latitude is always of constant length, so define it globally.
# The length of a degree of longitude changes depending on the latitude.
my ($latsize) = 0.075;

# $page = new CGI;
# print $page->header( -type => "text/xml", -Pragma => 'no-cache' );
$num_entries = -1;                             #$page->param('top');
$gi = Geo::IP->open( $gidb, GEOIP_STANDARD )
  or die "Unable to open GeoIP database $gidb\n";
$connect = DBI->connect( $dsn, $user_name, $password, { RaiseError => 1 } );
$types{'download'}    = $types{'abstract'}    = 0;
$unmapped{'download'} = $unmapped{'abstract'} = 0;
$query = "SELECT ip, view_type, country_name, COUNT(*) AS count
	 FROM view
	 WHERE archive_name = 'otago_eprints' AND country_code NOT LIKE 'X@%'
	 GROUP BY ip, view_type, country_name
	 ORDER BY count DESC" . ( ( $num_entries > 0 ) ? " LIMIT $num_entries" : '' );
$stat = $connect->prepare($query);
$stat->execute();
$num_rows = $stat->rows;

if ( $num_rows > 0 ) {
	$num_entries = $num_rows if ( $num_entries < 1 );
	while ( $row = $stat->fetchrow_hashref() ) {
		$ip      = $row->{'ip'};
		$count   = $row->{'count'};
		$country = $row->{'country_name'};
		$country = 'New Zealand'
		  if (    ( $country eq 'Otago Intranet' )
			   || ( $country eq 'Repository Admin' ) );
		$vtype    = $row->{'view_type'};
		$IPs{$ip} = 1;
		$location = $gi->record_by_addr($ip);
		if ( defined($location) ) {
			$lat  = $location->latitude;
			$long = $location->longitude;
			if ( $location->city eq '' ) {
				$city = $country . ' (unidentified)';
				$key = sprintf( "!%s (%f, %f)", $country, $lat, $long );
			} else {
				$city = $location->city;
				$key = sprintf( "%s %s", $city, $country );
			}

			# 			print "$key\t$city\t$country\t$lat\t$long\t$count\t$vtype\n";
			# If there are multiple points for the same city in
			# the same country, we accumulate the lats and longs for these
			# points and keep a track of how many points there are in total,
			# so that we can work out a weighted average latlong for the
			# city. Note that this assumes that each city name only exists
			# once within a country, i.e., it will break if there are
			# multiple cities with the same name in the same country!
			if ( !defined( $cities{$key} ) ) {
				$cities{$key}{'name'}       = $city;
				$cities{$key}{'lat'}        = 0;
				$cities{$key}{'long'}       = 0;
				$cities{$key}{'abstract'}   = 0;
				$cities{$key}{'download'}   = 0;
				$cities{$key}{'num_points'} = 0;
			}
			$cities{$key}{'lat'}  += $lat;
			$cities{$key}{'long'} += $long;
			$cities{$key}{$vtype} += $count;
			$types{$vtype}        += $count;
			$cities{$key}{'num_points'}++;
		} else {
			$unmapped{$vtype} += $count;
		}
	}

	# 	exit 0;
	# Average the location of multiple points for the same city, weighted
	# by the number of hits for each point.
	foreach $city ( keys %cities ) {
		$cities{$city}{'lat'} =
		  $cities{$city}{'lat'} / $cities{$city}{'num_points'};
		$cities{$city}{'long'} =
		  $cities{$city}{'long'} / $cities{$city}{'num_points'};
	}

	# Need to wait until we have all the counts before writing the data out.
	print '<?xml version="1.0"?>
<kml xmlns="http://earth.google.com/kml/2.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://earth.google.com/kml/2.1 KMLSchema.xsd">
	<Document>
		<name>Otago Eprints Repository: Usage Statistics</name>
		<visibility>1</visibility>
		<open>1</open>
		<description>Visualisation of usage statistics for the EPrints repository at the School of Business, University of Otago, Dunedin, New Zealand. Statistics are cumulative since November 17 2005.</description>
		<Style>
			<ListStyle>
      			<listItemType>radioFolder</listItemType>
    		</ListStyle>
    	</Style>
    	<Folder>
    		<name>Points</name>
    		<description>Select this folder to display points from which the repository has been accessed</description>
			<Folder>
				<name>Locations (identified)</name>
				<description>Locations from which the repository has been accessed. The icon is colour-coded according to the relative proportion of abstract views (blue) versus document downloads (red). Only identified locations are displayed.</description>';

	# Plot known location icons as a separate folder so that we can turn them
	# on and off as a group.
	foreach $city ( sort( keys(%cities) ) ) {

		# Exclude unknown locations.
		if ( $city !~ /^!/ ) {
			make_icon(
					   $cities{$city}{'name'},     $cities{$city}{'download'},
					   $cities{$city}{'abstract'}, $cities{$city}{'lat'},
					   $cities{$city}{'long'}
			);
		}
	}
	print '
			</Folder>
			<Folder>
				<name>Locations (unidentified)</name>
				<description>Locations from which the repository has been accessed. The icon is colour-coded according to the relative proportion of abstract views (blue) versus document downloads (red). Only unidentified locations are displayed.</description>';

	# Plot known location icons as a separate folder so that we can turn them
	# on and off as a group.
	foreach $city ( sort( keys(%cities) ) ) {

		# Exclude known locations.
		if ( $city =~ /^!/ ) {
			make_icon(
					   $cities{$city}{'name'},     $cities{$city}{'download'},
					   $cities{$city}{'abstract'}, $cities{$city}{'lat'},
					   $cities{$city}{'long'}
			);
		}
	}
	print '
			</Folder>
		</Folder>
		<Folder>
    		<name>Volume (Total)</name>
    		<description>Select this folder to display the volume with which the repository has been accessed</description>
			<Folder>
				<name>Volume (known locations)</name>
				<visibility>1</visibility>
				<description>The volume of abstract views (blue) and document downloads (red), represented as a bar chart. Each 1,000 metres of height represents one abstract view or document download. Only known locations are displayed.</description>';

	# Plot volume bars for known locations as a separate folder so that
	# we can turn them on and off as a group.
	foreach $city ( sort( keys(%cities) ) ) {

		# Exclude unknown locations because they mess up the display somewhat.
		if ( $city !~ /^!/ ) {
			make_bar( $cities{$city}{'name'},
					  $cities{$city}{'download'},
					  $cities{$city}{'lat'},
					  $cities{$city}{'long'}, 'download' );
			make_bar( $cities{$city}{'name'},
					  $cities{$city}{'abstract'},
					  $cities{$city}{'lat'},
					  $cities{$city}{'long'}, 'abstract' );
		}
	}
	print '
			</Folder>
			<Folder>
				<name>Volume (unknown locations)</name>
				<visibility>1</visibility>
				<description>The volume of abstract views (blue) and document downloads (red), represented as a bar chart. Each 1,000 metres of height represents one abstract view or document download. Only unknown locations are displayed.</description>';

	# Plot volume bars for unknown locations as a separate folder so that
	# we can turn them on and off as a group.
	foreach $city ( sort( keys(%cities) ) ) {

		# Include unknown locations only.
		if ( $city =~ /^!/ ) {
			make_bar( $cities{$city}{'name'},
					  $cities{$city}{'download'},
					  $cities{$city}{'lat'},
					  $cities{$city}{'long'}, 'download' );
			make_bar( $cities{$city}{'name'},
					  $cities{$city}{'abstract'},
					  $cities{$city}{'lat'},
					  $cities{$city}{'long'}, 'abstract' );
		}
	}
	print '
			</Folder>
		</Folder>
		<Folder>
			<name>Volume (absolute, by Month)</name>
			<visibility>1</visibility>
			<description>Select this folder to display the volume with which the repository has been accessed each month</description>';
}

# Miscellaneous variables.
undef %cities;
undef %IPs;
$num_entries = -1;
$num_hits    = 0;
$lat         = $long = 0;
$city        = $key = '';
$num_entries = -1;
$types{'download'}    = $types{'abstract'}    = 0;
$unmapped{'download'} = $unmapped{'abstract'} = 0;
$query                =
"SELECT ip, view_type, country_name, year(request_date) as year, month(request_date) as month, COUNT(*) AS count 
FROM view 
WHERE archive_name = 'otago_eprints' AND country_code NOT LIKE 'X@%'
GROUP BY ip, view_type, country_name, year, month
ORDER BY year, month"
  . ( ( $num_entries > 0 ) ? " LIMIT $num_entries" : '' );
$stat = $connect->prepare($query);
$stat->execute();
$num_rows = $stat->rows;

if ( $num_rows > 0 ) {
	$num_entries = $num_rows if ( $num_entries < 1 );
	while ( $row = $stat->fetchrow_hashref() ) {
		$ip      = $row->{'ip'};
		$count   = $row->{'count'};
		$country = $row->{'country_name'};
		$country = 'New Zealand'
		  if (    ( $country eq 'Otago Intranet' )
			   || ( $country eq 'Repository Admin' ) );
		$vtype    = $row->{'view_type'};
		$year     = $row->{'year'};
		$month    = $row->{'month'};
		$month    = sprintf( "%02d", $month );
		$IPs{$ip} = 1;
		$location = $gi->record_by_addr($ip);

		if ( defined($location) ) {
			$lat  = $location->latitude;
			$long = $location->longitude;
			if ( $location->city eq '' ) {
				$city = $country . ' (unidentified)';
				$key  = sprintf( "!%s (%f, %f) %s %s",
								$year, $month, $country, $lat, $long );
			} else {
				$city = $location->city;
				$key = sprintf( "%s %s %s %s", $year, $month, $city, $country );
			}

			# 			print "$key\t$city\t$country\t$lat\t$long\t$count\t$vtype\n";
			# If there are multiple points for the same city in
			# the same country, we accumulate the lats and longs for these
			# points and keep a track of how many points there are in total,
			# so that we can work out a weighted average latlong for the
			# city. Note that this assumes that each city name only exists
			# once within a country, i.e., it will break if there are
			# multiple cities with the same name in the same country!
			if ( !defined( $cities{$key} ) ) {
				$cities{$key}{'name'}       = $city;
				$cities{$key}{'year'}       = $year;
				$cities{$key}{'month'}      = $month;
				$cities{$key}{'lat'}        = 0;
				$cities{$key}{'long'}       = 0;
				$cities{$key}{'abstract'}   = 0;
				$cities{$key}{'download'}   = 0;
				$cities{$key}{'num_points'} = 0;
			}
			$cities{$key}{'lat'}  += $lat;
			$cities{$key}{'long'} += $long;
			$cities{$key}{$vtype} += $count;
			$types{$vtype}        += $count;
			$cities{$key}{'num_points'}++;
		} else {
			$unmapped{$vtype} += $count;
		}
	}

	# 	exit 0;
	# Average the location of multiple points for the same city, weighted
	# by the number of hits for each point.
	foreach $city ( keys %cities ) {
		$cities{$city}{'lat'} =
		  $cities{$city}{'lat'} / $cities{$city}{'num_points'};
		$cities{$city}{'long'} =
		  $cities{$city}{'long'} / $cities{$city}{'num_points'};
	}
	foreach $city ( sort( keys(%cities) ) ) {

		# Exclude unknown locations because they mess up the display somewhat.
		make_sim(
				  $cities{$city}{'name'}, $cities{$city}{'download'},
				  $cities{$city}{'lat'},  $cities{$city}{'long'},
				  'download',             $cities{$city}{'year'},
				  $cities{$city}{'month'}
		);
		make_sim(
				  $cities{$city}{'name'}, $cities{$city}{'abstract'},
				  $cities{$city}{'lat'},  $cities{$city}{'long'},
				  'abstract',             $cities{$city}{'year'},
				  $cities{$city}{'month'}
		);

		
	}
	print '
		</Folder>
	</Document>
</kml>
';
}
$stat->finish();
$connect->disconnect();

sub round {
	my ($n) = shift;
	return int( $n + 0.5 * ( $n <=> 0 ) );
}

sub make_icon {
	my ( $name, $download, $abstract, $lat, $long ) = @_;
	my ($maxcolour) = $download + $abstract;
	my ($red)       = round( $download / $maxcolour * 255 );
	my ($blue)      = round( $abstract / $maxcolour * 255 );
	print '
			<Placemark>
				<name>'
	  . $name . '</name>
				<visibility>0</visibility>
				<description>
					<![CDATA[<span style="color:red;">'
	  . $download
	  . ' download'
	  . ( ( $download != 1 ) ? 's' : '' )
	  . '</span>, <span style="color:blue;">'
	  . $abstract
	  . ' abstract'
	  . ( ( $abstract != 1 ) ? 's' : '' )
	  . '</span>]]>
				</description>
				<LookAt>
					<longitude>'
	  . $long . '</longitude>
					<latitude>'
	  . $lat . '</latitude>
					<range>60000</range>
					<tilt>45</tilt>
					<heading>3</heading>
				</LookAt>
				<Style>
					<IconStyle>
						<color>'
	  . sprintf( 'ff%02x00%02x', $blue, $red ) . '</color>
						<Icon>
							<href>root://icons/palette-4.png</href>
						</Icon>
						<hotSpot x="0.5" y="0.5" xunits="fraction" yunits="fraction" />
					</IconStyle>
				</Style>
				<Point>
					<coordinates>'
	  . $long . ',' . $lat . ',0</coordinates>
				</Point>
			</Placemark>';
}

sub make_bar {
	my ( $name, $count, $lat, $long, $type ) = @_;

	# Work out the longitudinal width of the bar adjusted for the latitude.
	# The latitude size is constant.
	my ($longsize) = latitude_adjust( $lat, $latsize * 2 );

	# A download bar starts $longsize degrees to the west of $long.
	# An abstract bar starts at $long.
	$long -= $longsize if ( $type eq 'download' );
	print '
			<Placemark>
				<name>'
	  . $name . '</name>
				<visibility>0</visibility>
				<description>
					<![CDATA[<span style="color:'
	  . ( ( $type eq 'download' ) ? 'red' : 'blue' ) . ';">' . $count . ' '
	  . $type
	  . ( ( $count != 1 ) ? 's' : '' )
	  . '</span>]]>
				</description>
				<LookAt>
					<longitude>'
	  . $long . '</longitude>
					<latitude>'
	  . $lat . '</latitude>
					<range>60000</range>
					<tilt>45</tilt>
					<heading>3</heading>
				</LookAt>
				<Style>
					<PolyStyle>
						<color>c0'
	  . ( ( $type eq 'abstract' ) ? 'ff' : '00' ) . '00'
	  . ( ( $type eq 'download' ) ? 'ff' : '00' )
	  . '</color>
					</PolyStyle>
				</Style>
				<Polygon>
					<extrude>1</extrude>
					<altitudeMode>relativeToGround</altitudeMode>
					<outerBoundaryIs>
						<LinearRing>
							<coordinates>
								' . $long . ',' . ( $lat - $latsize ) . ',' . ( $count * 1000 ) . '
								' . $long . ',' . ( $lat + $latsize ) . ',' . ( $count * 1000 ) . '
								'
	  . ( $long + $longsize ) . ','
	  . ( $lat + $latsize ) . ','
	  . ( $count * 1000 ) . '
								'
	  . ( $long + $longsize ) . ','
	  . ( $lat - $latsize ) . ','
	  . ( $count * 1000 ) . '
								' . $long . ',' . ( $lat - $latsize ) . ',' . ( $count * 1000 ) . '
							</coordinates>
						</LinearRing>
					</outerBoundaryIs>
				</Polygon>
			</Placemark>';
}

sub make_sim {
	my ( $name, $count, $lat, $long, $type, $year, $month ) = @_;
	my ($endmonth) = $month + 1;
	my ($endyear)  = $year;
	if ( $endmonth == 13 ) {
		$endyear += 1;
		$endmonth = 1;
	}
	$endmonth = sprintf( "%02d", $endmonth );

	# Work out the longitudinal width of the bar adjusted for the latitude.
	# The latitude size is constant.
	my ($longsize) = latitude_adjust( $lat, $latsize * 2 );

	# A download bar starts $longsize degrees to the west of $long.
	# An abstract bar starts at $long.
	$long -= $longsize if ( $type eq 'download' );
	print '
				<Placemark>
					<name>'
	  . $name . '</name>
					<visibility>0</visibility>
					<description>
						<![CDATA[<span style="color:'
	  . ( ( $type eq 'download' ) ? 'red' : 'blue' ) . ';">' . $count . ' '
	  . $type
	  . ( ( $count != 1 ) ? 's' : '' )
	  . '</span>]]>
					</description>
					<LookAt>
						<longitude>'
	  . $long . '</longitude>
						<latitude>'
	  . $lat . '</latitude>
					<range>60000</range>
						<tilt>45</tilt>
						<heading>3</heading>
					</LookAt>
					<TimeSpan>
					<begin>'
	  . $year . '-' . $month . '</begin>
					<end>' . $endyear . '-' . ($endmonth) . '</end>
					</TimeSpan>
					<Style>
						<PolyStyle>
							<color>c0'
	  . ( ( $type eq 'abstract' ) ? 'ff' : '00' ) . '00'
	  . ( ( $type eq 'download' ) ? 'ff' : '00' )
	  . '</color>
						</PolyStyle>
					</Style>
					<Polygon>
						<extrude>1</extrude>
						<altitudeMode>relativeToGround</altitudeMode>
						<outerBoundaryIs>
							<LinearRing>
								<coordinates>
									' . $long . ',' . ( $lat - $latsize ) . ',' . ( $count * 1000 ) . '
									' . $long . ',' . ( $lat + $latsize ) . ',' . ( $count * 1000 ) . '
									'
	  . ( $long + $longsize ) . ','
	  . ( $lat + $latsize ) . ','
	  . ( $count * 1000 ) . '
									'
	  . ( $long + $longsize ) . ','
	  . ( $lat - $latsize ) . ','
	  . ( $count * 1000 ) . '
									' . $long . ',' . ( $lat - $latsize ) . ',' . ( $count * 1000 ) . '
								</coordinates>
							</LinearRing>
						</outerBoundaryIs>
					</Polygon>
				</Placemark>';
}

sub latitude_adjust {
	my ( $lat, $angle ) = @_;
	$lat = round( abs($lat) );
	return ( $long_adjust[$lat] * $angle );
}