Newer
Older
Digital_Repository / Repositories / statistics / scripts / eprints-usage_src.php
  1. <?php
  2.  
  3. include("geoip.inc");
  4.  
  5. $gi = geoip_open("##GEOIP_DATABASE##",GEOIP_STANDARD);
  6.  
  7. /*
  8.  
  9. Apache log for ePrints uses this format:
  10. LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"" combined
  11.  
  12. If the log format differs the regular expression matching would need to be adjusted.
  13. Parse:
  14. ip
  15. date YYYY MM DD
  16. archive ID
  17.  
  18. */
  19.  
  20. // Web server log files
  21. $log_dir = '##APACHE_LOG_LOCATION##';
  22. $log_file = array(
  23. 'otago_eprints' => '##APACHE_LOG_NAME##',
  24. );
  25.  
  26.  
  27. // eprintstats db
  28. $sqlserver = 'localhost';
  29. $sqluser = 'eprintstatspriv';
  30. $sqlpass = 'AuldGrizzel';
  31. $sqldatabase = 'eprintstats';
  32.  
  33. // SQL details of your ePrints installation
  34. $sqlserver2 = 'localhost';
  35. $sqluser2 = 'otago_eprints';
  36. $sqlpass2 = 'DrSyntaxRidesAgain';
  37.  
  38. // IP ranges for your local Intranet. Each pair represents the lower
  39. // and upper bound of the range, respectively.
  40. $local_name = 'Otago Intranet';
  41. $local_IPs = array(
  42. array(
  43. 'lower' => ip2long('139.80.0.0'),
  44. 'upper' => ip2long('139.80.127.255'),
  45. ),
  46. );
  47.  
  48. ###########################################
  49. ##
  50. ## No configuration required below here.
  51. ##
  52. ###########################################
  53.  
  54. $connect = mysql_pconnect ($sqlserver,$sqluser,$sqlpass);
  55. $db = mysql_select_db($sqldatabase,$connect) or die("Could not connect");
  56.  
  57. // First get the date of last update
  58. $query = "select lastproc from lastproc order by timeinsert desc limit 1";
  59. $result = mysql_query($query,$connect);
  60. $num_rows = mysql_num_rows($result);
  61. if ($num_rows > 0) {
  62. $row = mysql_fetch_assoc($result);
  63. $lastproc = $row["lastproc"];
  64. $datetestA = strtotime($lastproc);
  65. }
  66. else {
  67. $datetestA = 0;
  68. }
  69.  
  70. $connect2 = mysql_connect($sqlserver2,$sqluser2,$sqlpass2);
  71. $counter = 1;
  72. foreach($log_file as $archivename=>$archivelog) {
  73. $logf = $log_dir . $archivelog;
  74. $archive_name = $archivename;
  75. $handle = fopen($logf, "r");
  76. while (!feof($handle)) {
  77. $buffer = fgets($handle, 4096);
  78. if ((preg_match("/^(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - - \[(.*?)\] \"GET \/archive\/0{1,8}(\d{1,4}).*? HTTP\/1..\" 200 .*/i",$buffer,$matches)) ||
  79. (preg_match("/^(\S{1,}\.\S{1,}\.\S{1,}\.\S{1,}) - - \[(.*?)\] \"GET \/archive\/0{1,8}(\d{1,4}).*? HTTP\/1..\" 200 .*/i",$buffer,$matches)) ||
  80. (preg_match("/^(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - - \[(.*?)\] \"GET \/(\d{1,4}).*? HTTP\/1..\" 200 .*/i",$buffer,$matches)))
  81. {
  82. $counter++;
  83. $country_code = '';
  84. $country_name = '';
  85. $insertid = '';
  86. $eprint_name = '';
  87. $view_type = '';
  88. $uniquebits = '';
  89. $ip = $matches[1];
  90. // Determine country code and name.
  91. // If the number falls into the local intranet range, then
  92. // use that instead of GeoIP.
  93. $ip_long = ip2long($ip);
  94. foreach ($local_IPs as $key => $range)
  95. {
  96. if (($ip_long >= $range['lower']) && ($ip_long <= $range['upper']))
  97. {
  98. $country_code = 'T5';
  99. $country_name = $local_name;
  100. }
  101. else
  102. {
  103. $country_code = geoip_country_code_by_addr($gi, $ip);
  104. $country_name = geoip_country_name_by_addr($gi, $ip);
  105. }
  106. }
  107. $date = $matches[2];
  108. $archive = $matches[3];
  109. $uniquebits = $buffer;
  110. $date = preg_replace("/:.*/","",$date);
  111. $date = preg_replace("/\//", " ", $date);
  112. $when = getdate(strtotime($date));
  113. $request_date = $when["year"]."-".$when["mon"]."-".$when["mday"];
  114. $datetestB = strtotime($request_date);
  115. if ($datetestB < $datetestA)
  116. continue;
  117. if(preg_match("/GET \/archive\/0{1,8}\d{1,4}\/\d\d\//i",$buffer) || preg_match("/GET \/\d{1,4}\/\d\d\//i",$buffer)) {
  118. $view_type = "download";
  119. } else {
  120. $view_type = "abstract";
  121. }
  122. if(isset($eprintname[$archive])) {
  123. $eprint_name = $eprintname[$archive];
  124. } else {
  125. $eprint_name = getePrintName($archive_name,$archive);
  126. $eprintname[$archive] = $eprint_name;
  127. }
  128. if($eprint_name=='') {
  129. // Do nothing.
  130. } else {
  131. $eprint_name = mysql_escape_string($eprint_name);
  132. $query = "
  133. INSERT into view (uniquebits,archive_name,ip,request_date,archiveid,country_code,country_name,view_type,eprint_name)
  134. values('".$uniquebits."','".$archive_name."','".$ip."','".$request_date."',".$archive.",'".$country_code."','".$country_name."','".$view_type."','".$eprint_name."')";
  135. $result = mysql_query($query,$connect);
  136. $insertid = mysql_insert_id($connect);
  137. }
  138.  
  139. } else {
  140. // print "NO match" . "\n";
  141. }
  142. }
  143. fclose($handle);
  144. }
  145.  
  146. /*
  147. Keep track of where we are. Should avoid duplication of results
  148. if the script is run more than once on the same log file
  149. */
  150.  
  151. $query = "INSERT into lastproc (lastproc) values('".$request_date."')";
  152. $result = mysql_query($query,$connect);
  153.  
  154. #print "Records counted: $counter\n";
  155. #print "Last count: $request_date\n";
  156. mysql_close($connect2);
  157. mysql_close($connect);
  158.  
  159. function getePrintName($db,$eprintid) {
  160. global $connect2;
  161. $sqldatabase = $db;
  162. $db = mysql_select_db($sqldatabase,$connect2);
  163. $query3 = "select title from archive where eprintid = $eprintid";
  164. $result3 = mysql_query($query3,$connect2);
  165. $row = mysql_fetch_assoc($result3);
  166. $row["title"] = trim($row["title"]);
  167. $row["title"] = preg_replace("/\s+/"," ",$row["title"]);
  168. return $row["title"];
  169. }
  170.  
  171. ?>
  172.