Newer
Older
Digital_Repository / Repositories / Maps / css_map.pl
  1. #!/usr/bin/env perl
  2. use strict;
  3. use Time::HiRes qw( gettimeofday );
  4. use CGI;
  5. use DBI;
  6. use Geo::IP;
  7. use Geo::Proj4;
  8.  
  9. my ( $start_sec, $start_micro ) = gettimeofday;
  10. my ($start_time) = ( $start_sec * 1000 ) + round( $start_micro / 1000 );
  11.  
  12. my ($page);
  13.  
  14. # Database connection.
  15. my ($dsn) = "DBI:mysql:database=eprintstats;host=localhost";
  16. my ($user_name) = "eprintstatspriv";
  17. my ($password) = "AuldGrizzel";
  18. my ( $connect, $query, %types, %unmapped, $stat, $row, $num_rows, $vtype );
  19. my ($where) = '';
  20.  
  21. # Geolocation database.
  22. my ( $gi, $proj );
  23. my ($gidb) = '/usr/local/share/GeoIP/GeoLiteCity.dat';
  24.  
  25. # Miscellaneous variables.
  26. my ( $width, $height );
  27. my ($x_offset) = 16986796.16;
  28. my ($y_offset) = 8615499.05;
  29. my ($max_x) = $x_offset * 2;
  30. my ($max_y) = $y_offset * 2;
  31. my ( %cities, %IPs );
  32. my ($num_entries) = -1; # include all entries from database
  33. my ($num_hits) = 0;
  34. my ( $ip, $count, $location );
  35. my ( $city, $lat, $long, $x, $y ) = ( 0, 0, '', 0, 0 );
  36. my ($show_only) = 'both'; # include both abstracts & downloads
  37. my ($eprint) = '';
  38. my ($tc) = '#000000';
  39.  
  40. $page = new CGI;
  41. print $page->header( -type => "text/html", -Pragma => 'no-cache', -Cache-Control => 'no-cache' );
  42. $num_entries = $page->param('top') if ( defined $page->param('top') );
  43. $show_only = $page->param('show') if ( defined $page->param('show') );
  44. $eprint = $page->param('eprint') if ( defined $page->param('eprint') );
  45.  
  46. $gi = Geo::IP->open( $gidb, GEOIP_STANDARD )
  47. or die "Unable to open GeoIP database $gidb\n";
  48.  
  49. $proj = Geo::Proj4->new( proj => "robin", ellps => "sphere", lon_0 => 10 )
  50. or die "parameter error: " . Geo::Proj4->error . "\n";
  51.  
  52. $width = $page->param('width');
  53. $height = $page->param('height');
  54.  
  55. $connect = DBI->connect( $dsn, $user_name, $password, { RaiseError => 1 } );
  56.  
  57. $types{'download'} = $types{'abstract'} = 0;
  58. $unmapped{'download'} = $unmapped{'abstract'} = 0;
  59.  
  60. # Set up query.
  61. if ( $show_only eq 'both')
  62. {
  63. $where = "view_type IN ('download', 'abstract')";
  64. }
  65. else
  66. {
  67. $where = "view_type = '$show_only'";
  68. }
  69.  
  70. $where .= " AND archiveid IN ($eprint)" unless ( $eprint eq '' );
  71.  
  72. $query = "SELECT ip, view_type, COUNT(*) AS count
  73. FROM view
  74. WHERE $where
  75. GROUP BY ip, view_type
  76. ORDER BY count DESC" . ( ( $num_entries > 0 ) ? " LIMIT $num_entries" : '' );
  77.  
  78. $stat = $connect->prepare($query);
  79. $stat->execute();
  80. $num_rows = $stat->rows;
  81.  
  82. if ( $num_rows > 0 )
  83. {
  84. $num_entries = $num_rows if ( $num_entries < 1 );
  85.  
  86. while ( $row = $stat->fetchrow_hashref() )
  87. {
  88. $ip = $row->{'ip'};
  89. $count = $row->{'count'};
  90. $vtype = $row->{'view_type'};
  91.  
  92. $IPs{$ip} = 1;
  93.  
  94. $location = $gi->record_by_addr($ip);
  95.  
  96. if ( defined($location) )
  97. {
  98. $lat = $location->latitude;
  99. $long = $location->longitude;
  100. $city = (
  101. ( $location->city eq '' )
  102. ? 'Unknown'
  103. : $location->city
  104. ) . " ($lat, $long)";
  105. ( $x, $y ) = $proj->forward( $lat, $long );
  106. $x = round( ( $x + $x_offset ) / $max_x * $width );
  107. $y = round( ( $y_offset - $y ) / $max_y * $height );
  108.  
  109. if ( !defined( $cities{$city} ) )
  110. {
  111. $cities{$city}{'lat'} = $lat;
  112. $cities{$city}{'long'} = $long;
  113. $cities{$city}{'abstract'} = 0;
  114. $cities{$city}{'download'} = 0;
  115. $cities{$city}{'count'} = 0;
  116. }
  117. $cities{$city}{$vtype} += $count;
  118. $cities{$city}{'count'} += $count;
  119. $types{$vtype} += $count;
  120.  
  121. $cities{$city}{'x'} = $x;
  122. $cities{$city}{'y'} = $y;
  123. }
  124. else
  125. {
  126. $unmapped{$vtype} += $count;
  127. }
  128. }
  129. }
  130.  
  131. $stat->finish();
  132. $connect->disconnect();
  133.  
  134. # Generate dots for each city.
  135. CITY: foreach $city ( keys %cities )
  136. {
  137. if ( $show_only eq 'both' )
  138. {
  139. # Blend colour according to the ratio of abstracts to downloads.
  140. $tc = sprintf( '#%02x00%02x',
  141. round( $cities{$city}{'download'} / $cities{$city}{'count'} * 255 ),
  142. round( $cities{$city}{'abstract'} / $cities{$city}{'count'} * 255 )
  143. );
  144. }
  145. elsif ( $show_only eq 'download' )
  146. {
  147. next CITY if ( $cities{$city}{'download'} == 0 );
  148. $tc = '#ff0000';
  149. }
  150. elsif ( $show_only eq 'abstract' )
  151. {
  152. next CITY if ( $cities{$city}{'abstract'} == 0 );
  153. $tc = '#0000ff';
  154. }
  155. else # ack, boom
  156. {
  157. last CITY;
  158. }
  159. print
  160. '<div style="position:absolute; width:3px; height:3px; left:'
  161. . ( $cities{$city}{'x'} - 1 )
  162. . 'px; top:'
  163. . ( $cities{$city}{'y'} - 1 )
  164. . 'px; background-color:'
  165. . $tc
  166. . ';"></div>' . "\n";
  167. }
  168.  
  169. # Output summary data.
  170. print '<div style="position:absolute; left:1px; top:1px; font-size:small;">';
  171. if ( ( $show_only eq 'both' ) || ( $show_only eq 'download' ) )
  172. {
  173. print '<span style="color:red;">'
  174. . $types{'download'}
  175. . ' downloads'
  176. . (
  177. ( $unmapped{'download'} > 0 ) ? " (+$unmapped{'download'} unmappable)"
  178. : ''
  179. )
  180. . '</span>';
  181. }
  182. print "<br />\n";
  183. if ( ( $show_only eq 'both' ) || ( $show_only eq 'abstract' ) )
  184. {
  185. print '<span style="color:blue;">'
  186. . $types{'abstract'}
  187. . ' abstracts'
  188. . (
  189. ( $unmapped{'abstract'} > 0 ) ? " (+$unmapped{'abstract'} unmappable)"
  190. : ''
  191. )
  192. . '</span>';
  193. }
  194. print "<br />\nfrom "
  195. . scalar( keys %cities )
  196. . ' cities<br />('
  197. . scalar( keys %IPs )
  198. . ' IP addresses)</div>' . "\n";
  199.  
  200. my ( $finish_sec, $finish_micro ) = gettimeofday();
  201. my ($finish_time) = ( $finish_sec * 1000 ) + round( $finish_micro / 1000 );
  202.  
  203. print '<div style="position:absolute; left:1px; bottom:1px; font-size:small;">';
  204. print 'Map generated in ' . ( $finish_time - $start_time ) . ' ms</div>' . "\n";
  205.  
  206. open RESULTS, ">>/tmp/css_results_$num_entries.txt" or die "Argh!\n";
  207. print RESULTS ( $finish_time - $start_time ) . "\n";
  208. close RESULTS;
  209.  
  210. sub round
  211. {
  212. my ($n) = shift;
  213. return int( $n + 0.5 * ( $n <=> 0 ) );
  214. }