Newer
Older
Digital_Repository / Repositories / Maps / google_map_generate_data.pl
  1. #!/usr/bin/env perl
  2. use strict;
  3. use Time::HiRes qw( gettimeofday );
  4. use CGI;
  5. use DBI;
  6. use Geo::IP;
  7.  
  8. my ( $start_sec, $start_micro ) = gettimeofday;
  9. my ($start_time) = ( $start_sec * 1000 ) + round( $start_micro / 1000 );
  10.  
  11. my ($page);
  12.  
  13. # Database connection.
  14. my ($dsn) = "DBI:mysql:database=eprintstats;host=localhost";
  15. my ($user_name) = "eprintstatspriv";
  16. my ($password) = "AuldGrizzel";
  17. my ( $connect, $query, %types, %unmapped, $stat, $row, $num_rows, $vtype );
  18. my ($where) = '';
  19.  
  20. # Geolocation database.
  21. my ($gi);
  22. my ($gidb) = '/usr/local/share/GeoIP/GeoLiteCity.dat';
  23.  
  24. # Miscellaneous variable.
  25. my ( %cities, %IPs );
  26. my ($num_entries) = -1; # include all entries from database
  27. my ($num_hits) = 0;
  28. my ( $ip, $count, $location );
  29. my ( $lat, $long, $city ) = ( 0, 0, '' );
  30. my ($show_only) = 'both'; # include both abstracts & downloads
  31. my ($eprint) = '';
  32.  
  33. $page = new CGI;
  34. print $page->header( -type => "text/xml", -Pragma => 'no-cache', -Cache-Control => 'no-cache' );
  35. $num_entries = $page->param('top') if ( defined $page->param('top') );
  36. $show_only = $page->param('show') if ( defined $page->param('show') );
  37. $eprint = $page->param('eprint') if ( defined $page->param('eprint') );
  38.  
  39. $gi = Geo::IP->open( $gidb, GEOIP_STANDARD )
  40. or die "Unable to open GeoIP database $gidb\n";
  41.  
  42. $connect = DBI->connect( $dsn, $user_name, $password, { RaiseError => 1 } );
  43.  
  44. $types{'download'} = $types{'abstract'} = 0;
  45. $unmapped{'download'} = $unmapped{'abstract'} = 0;
  46.  
  47. # Set up query.
  48. if ( $show_only eq 'both')
  49. {
  50. $where = "view_type IN ('download', 'abstract')";
  51. }
  52. else
  53. {
  54. $where = "view_type = '$show_only'";
  55. }
  56.  
  57. $where .= " AND archiveid IN ($eprint)" unless ( $eprint eq '' );
  58.  
  59. $query = "SELECT ip, view_type, COUNT(*) AS count
  60. FROM view
  61. WHERE $where
  62. GROUP BY ip, view_type
  63. ORDER BY count DESC" . ( ( $num_entries > 0 ) ? " LIMIT $num_entries" : '' );
  64.  
  65. $stat = $connect->prepare($query);
  66. $stat->execute();
  67. $num_rows = $stat->rows;
  68.  
  69. if ( $num_rows > 0 )
  70. {
  71. $num_entries = $num_rows if ( $num_entries < 1 );
  72.  
  73. while ( $row = $stat->fetchrow_hashref() )
  74. {
  75. $ip = $row->{'ip'};
  76. $count = $row->{'count'};
  77. $vtype = $row->{'view_type'};
  78.  
  79. $IPs{$ip} = 1;
  80.  
  81. $location = $gi->record_by_addr($ip);
  82.  
  83. if ( defined($location) )
  84. {
  85. $lat = $location->latitude;
  86. $long = $location->longitude;
  87. $city = (
  88. ( $location->city eq '' )
  89. ? 'Unknown'
  90. : $location->city
  91. ) . " ($lat, $long)";
  92.  
  93. if ( !defined( $cities{$city} ) )
  94. {
  95. $cities{$city}{'lat'} = $lat;
  96. $cities{$city}{'long'} = $long;
  97. $cities{$city}{'abstract'} = 0;
  98. $cities{$city}{'download'} = 0;
  99. }
  100. $cities{$city}{$vtype} += $count;
  101. $types{$vtype} += $count;
  102. }
  103. else
  104. {
  105. $unmapped{$vtype} += $count;
  106. }
  107. }
  108.  
  109. # Need to wait until we have all the counts before writing the data out.
  110. print '<?xml version="1.0"?>' . "\n";
  111. print '<markers abs="'
  112. . $types{'abstract'}
  113. . '" dl="'
  114. . $types{'download'}
  115. . '" ips="'
  116. . scalar( keys %IPs )
  117. . '" ua="'
  118. . $unmapped{'abstract'}
  119. . '" ud="'
  120. . $unmapped{'download'} . '">' . "\n";
  121.  
  122. # Generate markers for each city.
  123. foreach $city ( keys %cities )
  124. {
  125. print '<marker city="' . $city
  126. . '" lat="'
  127. . $cities{$city}{'lat'}
  128. . '" long="'
  129. . $cities{$city}{'long'}
  130. . '" abs="'
  131. . $cities{$city}{'abstract'}
  132. . '" dl="'
  133. . $cities{$city}{'download'} . '" />' . "\n";
  134. }
  135.  
  136. print "</markers>\n";
  137. }
  138.  
  139. $stat->finish();
  140. $connect->disconnect();
  141.  
  142. my ( $finish_sec, $finish_micro ) = gettimeofday();
  143. my ($finish_time) = ( $finish_sec * 1000 ) + round( $finish_micro / 1000 );
  144.  
  145. open RESULTS, ">>/tmp/google_results_$num_entries.txt" or die "Argh!\n";
  146. print RESULTS ( $finish_time - $start_time ) . "\n";
  147. close RESULTS;
  148.  
  149. sub round
  150. {
  151. my ($n) = shift;
  152. return int( $n + 0.5 * ( $n <=> 0 ) );
  153. }