<?php
include("geoip.inc");
$gi = geoip_open("/usr/local/share/GeoIP/GeoIP.dat",GEOIP_STANDARD);
/*
Apache log for ePrints uses this format:
LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"" combined
If the log format differs the regular expression matching would need to be adjusted.
Parse:
ip
date YYYY MM DD
archive ID
*/
// Web server log files
$log_dir = '/sw/var/apache2/logs/';
$log_file = array(
'otago_eprints' => 'access_log',
);
// eprintstats db
$sqlserver = 'localhost';
$sqluser = 'eprintstatspriv';
$sqlpass = 'AuldGrizzel';
$sqldatabase = 'eprintstats';
// SQL details of your ePrints installation
$sqlserver2 = 'localhost';
$sqluser2 = 'otago_eprints';
$sqlpass2 = 'DrSyntaxRidesAgain';
// IP ranges for your local Intranet. Each pair represents the lower
// and upper bound of the range, respectively.
$local_name = 'Otago Intranet';
$local_IPs = array(
array(
'lower' => ip2long('139.80.0.0'),
'upper' => ip2long('139.80.127.255'),
),
);
###########################################
##
## No configuration required below here.
##
###########################################
$connect = mysql_pconnect ($sqlserver,$sqluser,$sqlpass);
$db = mysql_select_db($sqldatabase,$connect) or die("Could not connect");
// First get the date of last update
$query = "select lastproc from lastproc order by timeinsert desc limit 1";
$result = mysql_query($query,$connect);
$num_rows = mysql_num_rows($result);
if ($num_rows > 0) {
$row = mysql_fetch_assoc($result);
$lastproc = $row["lastproc"];
$datetestA = strtotime($lastproc);
}
else {
$datetestA = 0;
}
$connect2 = mysql_connect($sqlserver2,$sqluser2,$sqlpass2);
$counter = 1;
foreach($log_file as $archivename=>$archivelog) {
$logf = $log_dir . $archivelog;
$archive_name = $archivename;
$handle = fopen($logf, "r");
while (!feof($handle)) {
$buffer = fgets($handle, 4096);
if ((preg_match("/^(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - - \[(.*?)\] \"GET \/archive\/0{1,8}(\d{1,4}).*? HTTP\/1..\" 200 .*/i",$buffer,$matches)) ||
(preg_match("/^(\S{1,}\.\S{1,}\.\S{1,}\.\S{1,}) - - \[(.*?)\] \"GET \/archive\/0{1,8}(\d{1,4}).*? HTTP\/1..\" 200 .*/i",$buffer,$matches)) ||
(preg_match("/^(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - - \[(.*?)\] \"GET \/(\d{1,4}).*? HTTP\/1..\" 200 .*/i",$buffer,$matches)))
{
$counter++;
$country_code = '';
$country_name = '';
$insertid = '';
$eprint_name = '';
$view_type = '';
$uniquebits = '';
$ip = $matches[1];
// Determine country code and name.
// If the number falls into the local intranet range, then
// use that instead of GeoIP.
$ip_long = ip2long($ip);
foreach ($local_IPs as $key => $range)
{
if (($ip_long >= $range['lower']) && ($ip_long <= $range['upper']))
{
$country_code = 'T5';
$country_name = $local_name;
}
else
{
$country_code = geoip_country_code_by_addr($gi, $ip);
$country_name = geoip_country_name_by_addr($gi, $ip);
}
}
$date = $matches[2];
$archive = $matches[3];
$uniquebits = $buffer;
$date = preg_replace("/:.*/","",$date);
$date = preg_replace("/\//", " ", $date);
$when = getdate(strtotime($date));
$request_date = $when["year"]."-".$when["mon"]."-".$when["mday"];
$datetestB = strtotime($request_date);
if ($datetestB < $datetestA)
continue;
if(preg_match("/GET \/archive\/0{1,8}\d{1,4}\/\d\d\//i",$buffer) || preg_match("/GET \/\d{1,4}\/\d\d\//i",$buffer)) {
$view_type = "download";
} else {
$view_type = "abstract";
}
if(isset($eprintname[$archive])) {
$eprint_name = $eprintname[$archive];
} else {
$eprint_name = getePrintName($archive_name,$archive);
$eprintname[$archive] = $eprint_name;
}
if($eprint_name=='') {
// Do nothing.
} else {
$eprint_name = mysql_escape_string($eprint_name);
$query = "
INSERT into view (uniquebits,archive_name,ip,request_date,archiveid,country_code,country_name,view_type,eprint_name)
values('".$uniquebits."','".$archive_name."','".$ip."','".$request_date."',".$archive.",'".$country_code."','".$country_name."','".$view_type."','".$eprint_name."')";
$result = mysql_query($query,$connect);
$insertid = mysql_insert_id($connect);
}
} else {
// print "NO match" . "\n";
}
}
fclose($handle);
}
/*
Keep track of where we are. Should avoid duplication of results
if the script is run more than once on the same log file
*/
$query = "INSERT into lastproc (lastproc) values('".$request_date."')";
$result = mysql_query($query,$connect);
#print "Records counted: $counter\n";
#print "Last count: $request_date\n";
mysql_close($connect2);
mysql_close($connect);
function getePrintName($db,$eprintid) {
global $connect2;
$sqldatabase = $db;
$db = mysql_select_db($sqldatabase,$connect2);
$query3 = "select title from archive where eprintid = $eprintid";
$result3 = mysql_query($query3,$connect2);
$row = mysql_fetch_assoc($result3);
$row["title"] = trim($row["title"]);
$row["title"] = preg_replace("/\s+/"," ",$row["title"]);
return $row["title"];
}
?>