#!/usr/bin/env perl use strict; # use CGI; use DBI; use Geo::IP; use POSIX qw(floor); # my ($page); # Database connection. my ($dsn) = "DBI:mysql:database=eprintstats;host=127.0.0.1"; my ($user_name) = "eprintstatspriv"; my ($password) = "AuldGrizzel"; my ( $connect, $query, %types, %unmapped, $stat, $row, $num_rows, $vtype ); # Geolocation database. my ($gi); my ($gidb) = '/usr/local/share/GeoIP/GeoLiteCity.dat'; # Longitude adjustment lookup table. The size of a degree of longitude # shrinks as we approach the pole, so we need to scale the size of the # bars that we draw to compensate. The values are derived from simple # trigonometry. One degree latitude intervals is probably overkill, but # it was easy to generate and is probably also easier to process (we just # take the absolute value and round to the nearest integer). my (@long_adjust) = ( 1, 1.000149936, 1.000625031, 1.001401156, 1.002504822, 1.003938193, 1.005527222, 1.007577665, 1.009814805, 1.012548385, 1.015451362, 1.018863602, 1.022455549, 1.026414998, 1.030723667, 1.035416774, 1.040342076, 1.045806114, 1.051493576, 1.057774136, 1.064328263, 1.071308119, 1.07855449, 1.086459634, 1.094691064, 1.103413101, 1.112646298, 1.122444401, 1.132640933, 1.143416279, 1.154798315, 1.166647234, 1.179193306, 1.192438327, 1.206420497, 1.220995149, 1.236155295, 1.252205744, 1.269161249, 1.286871805, 1.30559452, 1.325133265, 1.34585379, 1.367512898, 1.390218486, 1.414290258, 1.439883436, 1.466366234, 1.494622451, 1.52427924, 1.555922715, 1.589286423, 1.624573794, 1.661877673, 1.701368815, 1.743769606, 1.788337802, 1.836254359, 1.887432209, 1.94210986, 2.000049973, 2.062828574, 2.130469499, 2.203424356, 2.281552845, 2.366264633, 2.459472746, 2.560324974, 2.669801881, 2.7913935, 2.924802689, 3.072783109, 3.236797412, 3.421061629, 3.629874841, 3.865462623, 4.133753357, 4.448482828, 4.811613369, 5.242729893, 5.759533746, 6.395493768, 7.190621631, 8.209846154, 9.579463858, 11.47777459, 14.35030477, 19.13145315, 28.71090387, 57.42180775, 57.42180775, ); # Miscellaneous variables. my ( %cities, %IPs ); my ($num_entries) = -1; my ($num_hits) = 0; my ( $ip, $count, $location, $country, $year, $month ); my ( $lat, $long, $city, $key ) = ( 0, 0, '', '' ); my ( $maxcolour, $red, $blue ); # A degree of latitude is always of constant length, so define it globally. # The length of a degree of longitude changes depending on the latitude. my ($latsize) = 0.075; # $page = new CGI; # print $page->header( -type => "text/xml", -Pragma => 'no-cache' ); $num_entries = -1; #$page->param('top'); $gi = Geo::IP->open( $gidb, GEOIP_STANDARD ) or die "Unable to open GeoIP database $gidb\n"; $connect = DBI->connect( $dsn, $user_name, $password, { RaiseError => 1 } ); $types{'download'} = $types{'abstract'} = 0; $unmapped{'download'} = $unmapped{'abstract'} = 0; $query = "SELECT ip, view_type, country_name, COUNT(*) AS count FROM view WHERE archive_name = 'otago_eprints' AND country_code NOT LIKE 'X@%' GROUP BY ip, view_type, country_name ORDER BY count DESC" . ( ( $num_entries > 0 ) ? " LIMIT $num_entries" : '' ); $stat = $connect->prepare($query); $stat->execute(); $num_rows = $stat->rows; if ( $num_rows > 0 ) { $num_entries = $num_rows if ( $num_entries < 1 ); while ( $row = $stat->fetchrow_hashref() ) { $ip = $row->{'ip'}; $count = $row->{'count'}; $country = $row->{'country_name'}; $country = 'New Zealand' if ( ( $country eq 'Otago Intranet' ) || ( $country eq 'Repository Admin' ) ); $vtype = $row->{'view_type'}; $IPs{$ip} = 1; $location = $gi->record_by_addr($ip); if ( defined($location) ) { $lat = $location->latitude; $long = $location->longitude; if ( $location->city eq '' ) { $city = $country . ' (unidentified)'; $key = sprintf( "!%s (%f, %f)", $country, $lat, $long ); } else { $city = $location->city; $key = sprintf( "%s %s", $city, $country ); } # print "$key\t$city\t$country\t$lat\t$long\t$count\t$vtype\n"; # If there are multiple points for the same city in # the same country, we accumulate the lats and longs for these # points and keep a track of how many points there are in total, # so that we can work out a weighted average latlong for the # city. Note that this assumes that each city name only exists # once within a country, i.e., it will break if there are # multiple cities with the same name in the same country! if ( !defined( $cities{$key} ) ) { $cities{$key}{'name'} = $city; $cities{$key}{'lat'} = 0; $cities{$key}{'long'} = 0; $cities{$key}{'abstract'} = 0; $cities{$key}{'download'} = 0; $cities{$key}{'num_points'} = 0; } $cities{$key}{'lat'} += $lat; $cities{$key}{'long'} += $long; $cities{$key}{$vtype} += $count; $types{$vtype} += $count; $cities{$key}{'num_points'}++; } else { $unmapped{$vtype} += $count; } } # exit 0; # Average the location of multiple points for the same city, weighted # by the number of hits for each point. foreach $city ( keys %cities ) { $cities{$city}{'lat'} = $cities{$city}{'lat'} / $cities{$city}{'num_points'}; $cities{$city}{'long'} = $cities{$city}{'long'} / $cities{$city}{'num_points'}; } # Need to wait until we have all the counts before writing the data out. print '<?xml version="1.0"?> <kml xmlns="http://earth.google.com/kml/2.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://earth.google.com/kml/2.1 KMLSchema.xsd"> <Document> <name>Otago Eprints Repository: Usage Statistics</name> <visibility>1</visibility> <open>1</open> <description>Visualisation of usage statistics for the EPrints repository at the School of Business, University of Otago, Dunedin, New Zealand. Statistics are cumulative since November 17 2005.</description> <Style> <ListStyle> <listItemType>radioFolder</listItemType> </ListStyle> </Style> <Folder> <name>Points</name> <description>Select this folder to display points from which the repository has been accessed</description> <Folder> <name>Locations (identified)</name> <description>Locations from which the repository has been accessed. The icon is colour-coded according to the relative proportion of abstract views (blue) versus document downloads (red). Only identified locations are displayed.</description>'; # Plot known location icons as a separate folder so that we can turn them # on and off as a group. foreach $city ( sort( keys(%cities) ) ) { # Exclude unknown locations. if ( $city !~ /^!/ ) { make_icon( $cities{$city}{'name'}, $cities{$city}{'download'}, $cities{$city}{'abstract'}, $cities{$city}{'lat'}, $cities{$city}{'long'} ); } } print ' </Folder> <Folder> <name>Locations (unidentified)</name> <description>Locations from which the repository has been accessed. The icon is colour-coded according to the relative proportion of abstract views (blue) versus document downloads (red). Only unidentified locations are displayed.</description>'; # Plot known location icons as a separate folder so that we can turn them # on and off as a group. foreach $city ( sort( keys(%cities) ) ) { # Exclude known locations. if ( $city =~ /^!/ ) { make_icon( $cities{$city}{'name'}, $cities{$city}{'download'}, $cities{$city}{'abstract'}, $cities{$city}{'lat'}, $cities{$city}{'long'} ); } } print ' </Folder> </Folder> <Folder> <name>Volume (Total)</name> <description>Select this folder to display the volume with which the repository has been accessed</description> <Folder> <name>Volume (known locations)</name> <visibility>1</visibility> <description>The volume of abstract views (blue) and document downloads (red), represented as a bar chart. Each 1,000 metres of height represents one abstract view or document download. Only known locations are displayed.</description>'; # Plot volume bars for known locations as a separate folder so that # we can turn them on and off as a group. foreach $city ( sort( keys(%cities) ) ) { # Exclude unknown locations because they mess up the display somewhat. if ( $city !~ /^!/ ) { make_bar( $cities{$city}{'name'}, $cities{$city}{'download'}, $cities{$city}{'lat'}, $cities{$city}{'long'}, 'download' ); make_bar( $cities{$city}{'name'}, $cities{$city}{'abstract'}, $cities{$city}{'lat'}, $cities{$city}{'long'}, 'abstract' ); } } print ' </Folder> <Folder> <name>Volume (unknown locations)</name> <visibility>1</visibility> <description>The volume of abstract views (blue) and document downloads (red), represented as a bar chart. Each 1,000 metres of height represents one abstract view or document download. Only unknown locations are displayed.</description>'; # Plot volume bars for unknown locations as a separate folder so that # we can turn them on and off as a group. foreach $city ( sort( keys(%cities) ) ) { # Include unknown locations only. if ( $city =~ /^!/ ) { make_bar( $cities{$city}{'name'}, $cities{$city}{'download'}, $cities{$city}{'lat'}, $cities{$city}{'long'}, 'download' ); make_bar( $cities{$city}{'name'}, $cities{$city}{'abstract'}, $cities{$city}{'lat'}, $cities{$city}{'long'}, 'abstract' ); } } print ' </Folder> </Folder> <Folder> <name>Volume (absolute, by Month)</name> <visibility>1</visibility> <description>Select this folder to display the volume with which the repository has been accessed each month</description>'; } # Miscellaneous variables. undef %cities; undef %IPs; $num_entries = -1; $num_hits = 0; $lat = $long = 0; $city = $key = ''; $num_entries = -1; $types{'download'} = $types{'abstract'} = 0; $unmapped{'download'} = $unmapped{'abstract'} = 0; $query = "SELECT ip, view_type, country_name, year(request_date) as year, month(request_date) as month, COUNT(*) AS count FROM view WHERE archive_name = 'otago_eprints' AND country_code NOT LIKE 'X@%' GROUP BY ip, view_type, country_name, year, month ORDER BY year, month" . ( ( $num_entries > 0 ) ? " LIMIT $num_entries" : '' ); $stat = $connect->prepare($query); $stat->execute(); $num_rows = $stat->rows; if ( $num_rows > 0 ) { $num_entries = $num_rows if ( $num_entries < 1 ); while ( $row = $stat->fetchrow_hashref() ) { $ip = $row->{'ip'}; $count = $row->{'count'}; $country = $row->{'country_name'}; $country = 'New Zealand' if ( ( $country eq 'Otago Intranet' ) || ( $country eq 'Repository Admin' ) ); $vtype = $row->{'view_type'}; $year = $row->{'year'}; $month = $row->{'month'}; $month = sprintf( "%02d", $month ); $IPs{$ip} = 1; $location = $gi->record_by_addr($ip); if ( defined($location) ) { $lat = $location->latitude; $long = $location->longitude; if ( $location->city eq '' ) { $city = $country . ' (unidentified)'; $key = sprintf( "!%s (%f, %f) %s %s", $year, $month, $country, $lat, $long ); } else { $city = $location->city; $key = sprintf( "%s %s %s %s", $year, $month, $city, $country ); } # print "$key\t$city\t$country\t$lat\t$long\t$count\t$vtype\n"; # If there are multiple points for the same city in # the same country, we accumulate the lats and longs for these # points and keep a track of how many points there are in total, # so that we can work out a weighted average latlong for the # city. Note that this assumes that each city name only exists # once within a country, i.e., it will break if there are # multiple cities with the same name in the same country! if ( !defined( $cities{$key} ) ) { $cities{$key}{'name'} = $city; $cities{$key}{'year'} = $year; $cities{$key}{'month'} = $month; $cities{$key}{'lat'} = 0; $cities{$key}{'long'} = 0; $cities{$key}{'abstract'} = 0; $cities{$key}{'download'} = 0; $cities{$key}{'num_points'} = 0; } $cities{$key}{'lat'} += $lat; $cities{$key}{'long'} += $long; $cities{$key}{$vtype} += $count; $types{$vtype} += $count; $cities{$key}{'num_points'}++; } else { $unmapped{$vtype} += $count; } } # exit 0; # Average the location of multiple points for the same city, weighted # by the number of hits for each point. foreach $city ( keys %cities ) { $cities{$city}{'lat'} = $cities{$city}{'lat'} / $cities{$city}{'num_points'}; $cities{$city}{'long'} = $cities{$city}{'long'} / $cities{$city}{'num_points'}; } foreach $city ( sort( keys(%cities) ) ) { # Exclude unknown locations because they mess up the display somewhat. make_sim( $cities{$city}{'name'}, $cities{$city}{'download'}, $cities{$city}{'lat'}, $cities{$city}{'long'}, 'download', $cities{$city}{'year'}, $cities{$city}{'month'} ); make_sim( $cities{$city}{'name'}, $cities{$city}{'abstract'}, $cities{$city}{'lat'}, $cities{$city}{'long'}, 'abstract', $cities{$city}{'year'}, $cities{$city}{'month'} ); } print ' </Folder> </Document> </kml> '; } $stat->finish(); $connect->disconnect(); sub round { my ($n) = shift; return int( $n + 0.5 * ( $n <=> 0 ) ); } sub make_icon { my ( $name, $download, $abstract, $lat, $long ) = @_; my ($maxcolour) = $download + $abstract; my ($red) = round( $download / $maxcolour * 255 ); my ($blue) = round( $abstract / $maxcolour * 255 ); print ' <Placemark> <name>' . $name . '</name> <visibility>0</visibility> <description> <![CDATA[<span style="color:red;">' . $download . ' download' . ( ( $download != 1 ) ? 's' : '' ) . '</span>, <span style="color:blue;">' . $abstract . ' abstract' . ( ( $abstract != 1 ) ? 's' : '' ) . '</span>]]> </description> <LookAt> <longitude>' . $long . '</longitude> <latitude>' . $lat . '</latitude> <range>60000</range> <tilt>45</tilt> <heading>3</heading> </LookAt> <Style> <IconStyle> <color>' . sprintf( 'ff%02x00%02x', $blue, $red ) . '</color> <Icon> <href>root://icons/palette-4.png</href> </Icon> <hotSpot x="0.5" y="0.5" xunits="fraction" yunits="fraction" /> </IconStyle> </Style> <Point> <coordinates>' . $long . ',' . $lat . ',0</coordinates> </Point> </Placemark>'; } sub make_bar { my ( $name, $count, $lat, $long, $type ) = @_; # Work out the longitudinal width of the bar adjusted for the latitude. # The latitude size is constant. my ($longsize) = latitude_adjust( $lat, $latsize * 2 ); # A download bar starts $longsize degrees to the west of $long. # An abstract bar starts at $long. $long -= $longsize if ( $type eq 'download' ); print ' <Placemark> <name>' . $name . '</name> <visibility>0</visibility> <description> <![CDATA[<span style="color:' . ( ( $type eq 'download' ) ? 'red' : 'blue' ) . ';">' . $count . ' ' . $type . ( ( $count != 1 ) ? 's' : '' ) . '</span>]]> </description> <LookAt> <longitude>' . $long . '</longitude> <latitude>' . $lat . '</latitude> <range>60000</range> <tilt>45</tilt> <heading>3</heading> </LookAt> <Style> <PolyStyle> <color>c0' . ( ( $type eq 'abstract' ) ? 'ff' : '00' ) . '00' . ( ( $type eq 'download' ) ? 'ff' : '00' ) . '</color> </PolyStyle> </Style> <Polygon> <extrude>1</extrude> <altitudeMode>relativeToGround</altitudeMode> <outerBoundaryIs> <LinearRing> <coordinates> ' . $long . ',' . ( $lat - $latsize ) . ',' . ( $count * 1000 ) . ' ' . $long . ',' . ( $lat + $latsize ) . ',' . ( $count * 1000 ) . ' ' . ( $long + $longsize ) . ',' . ( $lat + $latsize ) . ',' . ( $count * 1000 ) . ' ' . ( $long + $longsize ) . ',' . ( $lat - $latsize ) . ',' . ( $count * 1000 ) . ' ' . $long . ',' . ( $lat - $latsize ) . ',' . ( $count * 1000 ) . ' </coordinates> </LinearRing> </outerBoundaryIs> </Polygon> </Placemark>'; } sub make_sim { my ( $name, $count, $lat, $long, $type, $year, $month ) = @_; my ($endmonth) = $month + 1; my ($endyear) = $year; if ( $endmonth == 13 ) { $endyear += 1; $endmonth = 1; } $endmonth = sprintf( "%02d", $endmonth ); # Work out the longitudinal width of the bar adjusted for the latitude. # The latitude size is constant. my ($longsize) = latitude_adjust( $lat, $latsize * 2 ); # A download bar starts $longsize degrees to the west of $long. # An abstract bar starts at $long. $long -= $longsize if ( $type eq 'download' ); print ' <Placemark> <name>' . $name . '</name> <visibility>0</visibility> <description> <![CDATA[<span style="color:' . ( ( $type eq 'download' ) ? 'red' : 'blue' ) . ';">' . $count . ' ' . $type . ( ( $count != 1 ) ? 's' : '' ) . '</span>]]> </description> <LookAt> <longitude>' . $long . '</longitude> <latitude>' . $lat . '</latitude> <range>60000</range> <tilt>45</tilt> <heading>3</heading> </LookAt> <TimeSpan> <begin>' . $year . '-' . $month . '</begin> <end>' . $endyear . '-' . ($endmonth) . '</end> </TimeSpan> <Style> <PolyStyle> <color>c0' . ( ( $type eq 'abstract' ) ? 'ff' : '00' ) . '00' . ( ( $type eq 'download' ) ? 'ff' : '00' ) . '</color> </PolyStyle> </Style> <Polygon> <extrude>1</extrude> <altitudeMode>relativeToGround</altitudeMode> <outerBoundaryIs> <LinearRing> <coordinates> ' . $long . ',' . ( $lat - $latsize ) . ',' . ( $count * 1000 ) . ' ' . $long . ',' . ( $lat + $latsize ) . ',' . ( $count * 1000 ) . ' ' . ( $long + $longsize ) . ',' . ( $lat + $latsize ) . ',' . ( $count * 1000 ) . ' ' . ( $long + $longsize ) . ',' . ( $lat - $latsize ) . ',' . ( $count * 1000 ) . ' ' . $long . ',' . ( $lat - $latsize ) . ',' . ( $count * 1000 ) . ' </coordinates> </LinearRing> </outerBoundaryIs> </Polygon> </Placemark>'; } sub latitude_adjust { my ( $lat, $angle ) = @_; $lat = round( abs($lat) ); return ( $long_adjust[$lat] * $angle ); }