- <?php
- include("geoip.inc");
- $gi = geoip_open("##GEOIP_DATABASE##",GEOIP_STANDARD);
- /*
- Apache log for ePrints uses this format:
- LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"" combined
- If the log format differs the regular expression matching would need to be adjusted.
- Parse:
- ip
- date YYYY MM DD
- archive ID
- */
- // Web server log files
- $log_dir = '##APACHE_LOG_LOCATION##';
- $log_file = array(
- 'otago_eprints' => '##APACHE_LOG_NAME##',
- );
- // eprintstats db
- $sqlserver = 'localhost';
- $sqluser = 'eprintstatspriv';
- $sqlpass = 'AuldGrizzel';
- $sqldatabase = 'eprintstats';
- // SQL details of your ePrints installation
- $sqlserver2 = 'localhost';
- $sqluser2 = 'otago_eprints';
- $sqlpass2 = 'DrSyntaxRidesAgain';
- // IP ranges for your local Intranet. Each pair represents the lower
- // and upper bound of the range, respectively.
- $local_name = 'Otago Intranet';
- $local_IPs = array(
- array(
- 'lower' => ip2long(''),
- 'upper' => ip2long(''),
- ),
- );
- ###########################################
- ##
- ## No configuration required below here.
- ##
- ###########################################
- $connect = mysql_pconnect ($sqlserver,$sqluser,$sqlpass);
- $db = mysql_select_db($sqldatabase,$connect) or die("Could not connect");
- // First get the date of last update
- $query = "select lastproc from lastproc order by timeinsert desc limit 1";
- $result = mysql_query($query,$connect);
- $num_rows = mysql_num_rows($result);
- if ($num_rows > 0) {
- $row = mysql_fetch_assoc($result);
- $lastproc = $row["lastproc"];
- $datetestA = strtotime($lastproc);
- }
- else {
- $datetestA = 0;
- }
- $connect2 = mysql_connect($sqlserver2,$sqluser2,$sqlpass2);
- $counter = 1;
- foreach($log_file as $archivename=>$archivelog) {
- $logf = $log_dir . $archivelog;
- $archive_name = $archivename;
- $handle = fopen($logf, "r");
- while (!feof($handle)) {
- $buffer = fgets($handle, 4096);
- if ((preg_match("/^(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - - \[(.*?)\] \"GET \/archive\/0{1,8}(\d{1,4}).*? HTTP\/1..\" 200 .*/i",$buffer,$matches)) ||
- (preg_match("/^(\S{1,}\.\S{1,}\.\S{1,}\.\S{1,}) - - \[(.*?)\] \"GET \/archive\/0{1,8}(\d{1,4}).*? HTTP\/1..\" 200 .*/i",$buffer,$matches)) ||
- (preg_match("/^(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - - \[(.*?)\] \"GET \/(\d{1,4}).*? HTTP\/1..\" 200 .*/i",$buffer,$matches)))
- {
- $counter++;
- $country_code = '';
- $country_name = '';
- $insertid = '';
- $eprint_name = '';
- $view_type = '';
- $uniquebits = '';
- $ip = $matches[1];
- // Determine country code and name.
- // If the number falls into the local intranet range, then
- // use that instead of GeoIP.
- $ip_long = ip2long($ip);
- foreach ($local_IPs as $key => $range)
- {
- if (($ip_long >= $range['lower']) && ($ip_long <= $range['upper']))
- {
- $country_code = 'T5';
- $country_name = $local_name;
- }
- else
- {
- $country_code = geoip_country_code_by_addr($gi, $ip);
- $country_name = geoip_country_name_by_addr($gi, $ip);
- }
- }
- $date = $matches[2];
- $archive = $matches[3];
- $uniquebits = $buffer;
- $date = preg_replace("/:.*/","",$date);
- $date = preg_replace("/\//", " ", $date);
- $when = getdate(strtotime($date));
- $request_date = $when["year"]."-".$when["mon"]."-".$when["mday"];
- $datetestB = strtotime($request_date);
- if ($datetestB < $datetestA)
- continue;
- if(preg_match("/GET \/archive\/0{1,8}\d{1,4}\/\d\d\//i",$buffer) || preg_match("/GET \/\d{1,4}\/\d\d\//i",$buffer)) {
- $view_type = "download";
- } else {
- $view_type = "abstract";
- }
- if(isset($eprintname[$archive])) {
- $eprint_name = $eprintname[$archive];
- } else {
- $eprint_name = getePrintName($archive_name,$archive);
- $eprintname[$archive] = $eprint_name;
- }
- if($eprint_name=='') {
- // Do nothing.
- } else {
- $eprint_name = mysql_escape_string($eprint_name);
- $query = "
- INSERT into view (uniquebits,archive_name,ip,request_date,archiveid,country_code,country_name,view_type,eprint_name)
- values('".$uniquebits."','".$archive_name."','".$ip."','".$request_date."',".$archive.",'".$country_code."','".$country_name."','".$view_type."','".$eprint_name."')";
- $result = mysql_query($query,$connect);
- $insertid = mysql_insert_id($connect);
- }
- } else {
- // print "NO match" . "\n";
- }
- }
- fclose($handle);
- }
- /*
- Keep track of where we are. Should avoid duplication of results
- if the script is run more than once on the same log file
- */
- $query = "INSERT into lastproc (lastproc) values('".$request_date."')";
- $result = mysql_query($query,$connect);
- #print "Records counted: $counter\n";
- #print "Last count: $request_date\n";
- mysql_close($connect2);
- mysql_close($connect);
- function getePrintName($db,$eprintid) {
- global $connect2;
- $sqldatabase = $db;
- $db = mysql_select_db($sqldatabase,$connect2);
- $query3 = "select title from archive where eprintid = $eprintid";
- $result3 = mysql_query($query3,$connect2);
- $row = mysql_fetch_assoc($result3);
- $row["title"] = trim($row["title"]);
- $row["title"] = preg_replace("/\s+/"," ",$row["title"]);
- return $row["title"];
- }
- ?>