Newer
Older
Digital_Repository / Repositories / statistics / scripts / eprints-usage_src.php
<?php

include("geoip.inc");

$gi = geoip_open("##GEOIP_DATABASE##",GEOIP_STANDARD);

	/*

	Apache log for ePrints uses this format:
	LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"" combined

	If the log format differs the regular expression matching would need to be adjusted.		
	
	Parse:
		ip
		date YYYY MM DD
		archive ID

	*/

// Web server log files
$log_dir = '##APACHE_LOG_LOCATION##';
$log_file = array(
	'otago_eprints' => '##APACHE_LOG_NAME##',
);


// eprintstats db
$sqlserver = 'localhost';
$sqluser = 'eprintstatspriv';
$sqlpass = 'AuldGrizzel';
$sqldatabase = 'eprintstats';

// SQL details of your ePrints installation
$sqlserver2 = 'localhost';
$sqluser2 = 'otago_eprints';
$sqlpass2 = 'DrSyntaxRidesAgain';

// IP ranges for your local Intranet. Each pair represents the lower
// and upper bound of the range, respectively.
$local_name = 'Otago Intranet';
$local_IPs = array(
	array(
		'lower' => ip2long('139.80.0.0'),
		'upper' => ip2long('139.80.127.255'),
	),
);

###########################################
##
## No configuration required below here.
##
###########################################

$connect = mysql_pconnect ($sqlserver,$sqluser,$sqlpass);
$db = mysql_select_db($sqldatabase,$connect) or die("Could not connect");

// First get the date of last update
$query = "select lastproc from lastproc order by timeinsert desc limit 1";
$result = mysql_query($query,$connect);
$num_rows = mysql_num_rows($result);
if ($num_rows > 0) {
	$row = mysql_fetch_assoc($result);
	$lastproc = $row["lastproc"];
	$datetestA = strtotime($lastproc);
}
else {
	$datetestA = 0;
}

$connect2 = mysql_connect($sqlserver2,$sqluser2,$sqlpass2);
$counter = 1;
foreach($log_file as $archivename=>$archivelog) {
	$logf = $log_dir . $archivelog;
	$archive_name = $archivename;
	$handle = fopen($logf, "r");
	while (!feof($handle)) {
		$buffer = fgets($handle, 4096);
		if	((preg_match("/^(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - - \[(.*?)\] \"GET \/archive\/0{1,8}(\d{1,4}).*? HTTP\/1..\" 200 .*/i",$buffer,$matches)) ||
			(preg_match("/^(\S{1,}\.\S{1,}\.\S{1,}\.\S{1,}) - - \[(.*?)\] \"GET \/archive\/0{1,8}(\d{1,4}).*? HTTP\/1..\" 200 .*/i",$buffer,$matches)) ||
			(preg_match("/^(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - - \[(.*?)\] \"GET \/(\d{1,4}).*? HTTP\/1..\" 200 .*/i",$buffer,$matches)))
		{
			$counter++;
			$country_code = '';
			$country_name = '';
			$insertid = '';
			$eprint_name = '';
			$view_type = '';
			$uniquebits = '';
			$ip = $matches[1];
			
			// Determine country code and name.
			// If the number falls into the local intranet range, then
			// use that instead of GeoIP.
			$ip_long = ip2long($ip);
			foreach ($local_IPs as $key => $range)
			{
				if (($ip_long >= $range['lower']) && ($ip_long <= $range['upper']))
				{
					$country_code = 'T5';
					$country_name = $local_name;
				}
				else
				{
					$country_code = geoip_country_code_by_addr($gi, $ip);
					$country_name = geoip_country_name_by_addr($gi, $ip);
				}
			}
			
			$date = $matches[2];
			$archive = $matches[3];
			$uniquebits = $buffer;
			$date = preg_replace("/:.*/","",$date);
			$date = preg_replace("/\//", " ", $date);
			$when = getdate(strtotime($date));
			$request_date = $when["year"]."-".$when["mon"]."-".$when["mday"];
			$datetestB = strtotime($request_date);
			if ($datetestB < $datetestA)
				continue;
			
			if(preg_match("/GET \/archive\/0{1,8}\d{1,4}\/\d\d\//i",$buffer) || preg_match("/GET \/\d{1,4}\/\d\d\//i",$buffer)) {
				$view_type = "download";
			} else {
				$view_type = "abstract";
			}
			if(isset($eprintname[$archive])) {
				$eprint_name = $eprintname[$archive];
			} else {
				$eprint_name = getePrintName($archive_name,$archive);
				$eprintname[$archive] = $eprint_name;
			}
			if($eprint_name=='') {
				// Do nothing.
			} else {
				$eprint_name = mysql_escape_string($eprint_name);
				$query = "
				INSERT into view (uniquebits,archive_name,ip,request_date,archiveid,country_code,country_name,view_type,eprint_name)
				values('".$uniquebits."','".$archive_name."','".$ip."','".$request_date."',".$archive.",'".$country_code."','".$country_name."','".$view_type."','".$eprint_name."')";
				$result = mysql_query($query,$connect);
				$insertid = mysql_insert_id($connect);
			}

		} else {
			// print "NO match" . "\n";
		}
	}
	fclose($handle);
}

	/*
		Keep track of where we are. Should avoid duplication of results
		if the script is run more than once on the same log file
	*/

$query = "INSERT into lastproc (lastproc) values('".$request_date."')";
$result = mysql_query($query,$connect);

#print "Records counted: $counter\n";
#print "Last count: $request_date\n";
mysql_close($connect2);
mysql_close($connect);

function getePrintName($db,$eprintid) {
	global $connect2;
	$sqldatabase = $db;
	$db = mysql_select_db($sqldatabase,$connect2);
	$query3 = "select title from archive where eprintid = $eprintid";
	$result3 = mysql_query($query3,$connect2);
	$row = mysql_fetch_assoc($result3);
	$row["title"] = trim($row["title"]);
	$row["title"] = preg_replace("/\s+/"," ",$row["title"]);
	return $row["title"];
}

?>