<?php include("geoip.inc"); $gi = geoip_open("/usr/local/share/GeoIP/GeoIP.dat",GEOIP_STANDARD); /* Apache log for ePrints uses this format: LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"" combined If the log format differs the regular expression matching would need to be adjusted. Parse: ip date YYYY MM DD archive ID */ // Web server log files $log_dir = '/sw/var/apache2/logs/'; $log_file = array( 'otago_eprints' => 'access_log', ); // eprintstats db $sqlserver = 'localhost'; $sqluser = 'eprintstatspriv'; $sqlpass = 'AuldGrizzel'; $sqldatabase = 'eprintstats'; // SQL details of your ePrints installation $sqlserver2 = 'localhost'; $sqluser2 = 'otago_eprints'; $sqlpass2 = 'DrSyntaxRidesAgain'; // IP ranges for your local Intranet. Each pair represents the lower // and upper bound of the range, respectively. $local_name = 'Otago Intranet'; $local_IPs = array( array( 'lower' => ip2long('139.80.0.0'), 'upper' => ip2long('139.80.127.255'), ), ); ########################################### ## ## No configuration required below here. ## ########################################### $connect = mysql_pconnect ($sqlserver,$sqluser,$sqlpass); $db = mysql_select_db($sqldatabase,$connect) or die("Could not connect"); // First get the date of last update $query = "select lastproc from lastproc order by timeinsert desc limit 1"; $result = mysql_query($query,$connect); $num_rows = mysql_num_rows($result); if ($num_rows > 0) { $row = mysql_fetch_assoc($result); $lastproc = $row["lastproc"]; $datetestA = strtotime($lastproc); } else { $datetestA = 0; } $connect2 = mysql_connect($sqlserver2,$sqluser2,$sqlpass2); $counter = 1; foreach($log_file as $archivename=>$archivelog) { $logf = $log_dir . $archivelog; $archive_name = $archivename; $handle = fopen($logf, "r"); while (!feof($handle)) { $buffer = fgets($handle, 4096); if ((preg_match("/^(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - - \[(.*?)\] \"GET \/archive\/0{1,8}(\d{1,4}).*? HTTP\/1..\" 200 .*/i",$buffer,$matches)) || (preg_match("/^(\S{1,}\.\S{1,}\.\S{1,}\.\S{1,}) - - \[(.*?)\] \"GET \/archive\/0{1,8}(\d{1,4}).*? HTTP\/1..\" 200 .*/i",$buffer,$matches)) || (preg_match("/^(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - - \[(.*?)\] \"GET \/(\d{1,4}).*? HTTP\/1..\" 200 .*/i",$buffer,$matches))) { $counter++; $country_code = ''; $country_name = ''; $insertid = ''; $eprint_name = ''; $view_type = ''; $uniquebits = ''; $ip = $matches[1]; // Determine country code and name. // If the number falls into the local intranet range, then // use that instead of GeoIP. $ip_long = ip2long($ip); foreach ($local_IPs as $key => $range) { if (($ip_long >= $range['lower']) && ($ip_long <= $range['upper'])) { $country_code = 'T5'; $country_name = $local_name; } else { $country_code = geoip_country_code_by_addr($gi, $ip); $country_name = geoip_country_name_by_addr($gi, $ip); } } $date = $matches[2]; $archive = $matches[3]; $uniquebits = $buffer; $date = preg_replace("/:.*/","",$date); $date = preg_replace("/\//", " ", $date); $when = getdate(strtotime($date)); $request_date = $when["year"]."-".$when["mon"]."-".$when["mday"]; $datetestB = strtotime($request_date); if ($datetestB < $datetestA) continue; if(preg_match("/GET \/archive\/0{1,8}\d{1,4}\/\d\d\//i",$buffer) || preg_match("/GET \/\d{1,4}\/\d\d\//i",$buffer)) { $view_type = "download"; } else { $view_type = "abstract"; } if(isset($eprintname[$archive])) { $eprint_name = $eprintname[$archive]; } else { $eprint_name = getePrintName($archive_name,$archive); $eprintname[$archive] = $eprint_name; } if($eprint_name=='') { // Do nothing. } else { $eprint_name = mysql_escape_string($eprint_name); $query = " INSERT into view (uniquebits,archive_name,ip,request_date,archiveid,country_code,country_name,view_type,eprint_name) values('".$uniquebits."','".$archive_name."','".$ip."','".$request_date."',".$archive.",'".$country_code."','".$country_name."','".$view_type."','".$eprint_name."')"; $result = mysql_query($query,$connect); $insertid = mysql_insert_id($connect); } } else { // print "NO match" . "\n"; } } fclose($handle); } /* Keep track of where we are. Should avoid duplication of results if the script is run more than once on the same log file */ $query = "INSERT into lastproc (lastproc) values('".$request_date."')"; $result = mysql_query($query,$connect); #print "Records counted: $counter\n"; #print "Last count: $request_date\n"; mysql_close($connect2); mysql_close($connect); function getePrintName($db,$eprintid) { global $connect2; $sqldatabase = $db; $db = mysql_select_db($sqldatabase,$connect2); $query3 = "select title from archive where eprintid = $eprintid"; $result3 = mysql_query($query3,$connect2); $row = mysql_fetch_assoc($result3); $row["title"] = trim($row["title"]); $row["title"] = preg_replace("/\s+/"," ",$row["title"]); return $row["title"]; } ?>