- <?php
-
- // NJS 2005-12-09 Switched to GeoIP from GeoIP:IPfree.
- include("geoip.inc");
-
- $gi = geoip_open("##GEOIP_DATABASE##",GEOIP_STANDARD);
-
- /*
-
- Apache log for ePrints uses this format:
- LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"" combined
-
- If the log format differs the regular expression matching would need to be adjusted.
-
- Parse:
- ip
- date YYYY MM DD
- archive ID
-
- */
-
- // Web server log files
- $log_dir = '##APACHE_LOG_LOCATION##';
- $log_file = array(
- 'otago_eprints' => '##APACHE_LOG_NAME##',
- );
-
-
- // eprintstats db
- $sqlserver = 'localhost';
- $sqluser = 'eprintstatspriv';
- $sqlpass = 'AuldGrizzel';
- $sqldatabase = 'eprintstats';
-
- // SQL details of your ePrints installation
- $sqlserver2 = 'localhost';
- $sqluser2 = 'otago_eprints';
- $sqlpass2 = 'DrSyntaxRidesAgain';
-
- /* NJS 2005-12-16
- IP address ranges for your local Intranet(s). You can have multiple
- ranges of IP addresses, each with a different "country name", so that
- they will appear as separate entries in the by country stats pages.
- You should use a different country code for each range (ISO 3166-1
- specifies the range XA through XZ as "user-assignable", so you can use
- codes from there as necessary), and create flag icons as appropriate.
-
- Each address range key is the name that will appear in the statistics
- database (the "country name"), followed by a comma, followed by the
- appropriate ISO 3166-1 country code as noted above. Each entry in the
- range is either a single IP address, or an array specifying a lower and
- upper bound for a contiguous IP address range (see example below).
-
- All IP addresses must be converted to long values using the ip2long()
- function before being stored.
-
- Note that address ranges may overlap. The script will use the first
- range that matches a given IP, so list the ranges in the correct order
- of precedence for your needs.
-
- Example:
-
- $local_IPs = array(
- 'Repository Admin,XA' => array(
- ip2long('192.168.1.5'),
- ip2long('192.168.1.22'),
- array(
- ip2long('192.168.1.30'),
- ip2long('192.168.1.35'),
- ),
- ),
- 'Our Intranet,XI' => array(
- array(
- lower => ip2long('192.168.1.0'),
- upper => ip2long('192.168.255.255'),
- ),
- ),
- );
-
- 'Repository Admin' covers the IP addresses 192.168.1.5, 192.168.1.22 and
- the range 192.168.1.30 to 192.168.1.35, inclusive. 'Our Intranet' covers
- the range 192.168.1.0 to 192.168.255.255, inclusive. A machine will only
- match the 'Our Intranet' range if it first fails to match the
- 'Repository Admin' range.
- */
- $local_IPs = array(
- 'Repository Admin,XA' => array(
- ip2long('139.80.75.110'), // Nigel @ Uni
- ip2long('60.234.209.74'), // Nigel @ home
- ip2long('139.80.92.138'), // Monica & Jeremy
- ip2long('139.80.92.151'), // @ Uni
- ip2long('203.89.162.155'), // Monica @ home
- ip2long('139.80.81.50'), // eprints.otago.ac.nz
- ),
- 'Otago Intranet,XI' => array(
- array(
- 'lower' => ip2long('139.80.0.0'),
- 'upper' => ip2long('139.80.127.255'),
- ),
- ),
- );
-
- ###########################################
- ##
- ## No configuration required below here.
- ##
- ###########################################
-
- $connect = mysql_pconnect ($sqlserver,$sqluser,$sqlpass);
- $db = mysql_select_db($sqldatabase,$connect) or die("Could not connect");
-
- // First get the date of last update
- $query = "select lastproc from lastproc order by timeinsert desc limit 1";
- $result = mysql_query($query,$connect);
- $num_rows = mysql_num_rows($result);
- if ($num_rows > 0) {
- $row = mysql_fetch_assoc($result);
- $lastproc = $row["lastproc"];
- $datetestA = strtotime($lastproc);
- }
- else {
- $datetestA = 0;
- }
-
- $connect2 = mysql_connect($sqlserver2,$sqluser2,$sqlpass2);
- $counter = 1;
- foreach($log_file as $archivename=>$archivelog) {
- $logf = $log_dir . $archivelog;
- $archive_name = $archivename;
- $handle = fopen($logf, "r");
- while (!feof($handle)) {
- $buffer = fgets($handle, 4096);
- // NJS 2005-11-25 Added regexp for EPrints short URLs.
- if ((preg_match("/^(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - - \[(.*?)\] \"GET \/archive\/0{1,8}(\d{1,4}).*? HTTP\/1..\" 200 .*/i",$buffer,$matches)) ||
- (preg_match("/^(\S{1,}\.\S{1,}\.\S{1,}\.\S{1,}) - - \[(.*?)\] \"GET \/archive\/0{1,8}(\d{1,4}).*? HTTP\/1..\" 200 .*/i",$buffer,$matches)) ||
- (preg_match("/^(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - - \[(.*?)\] \"GET \/(\d{1,4}).*? HTTP\/1..\" 200 .*/i",$buffer,$matches)))
- {
- $counter++;
- $country_code = '';
- $country_name = '';
- $insertid = '';
- $eprint_name = '';
- $view_type = '';
- $uniquebits = '';
- $ip = $matches[1];
-
- /* NJS 2005-12-16
- Determine country code and name.
- Check whether the IP number falls into any of the local
- intranet ranges. If so, then use that.
- */
- $ip_long = ip2long($ip);
- $found_country = FALSE;
- foreach ($local_IPs as $id => $addresses)
- {
- foreach ($addresses as $ip_range)
- {
- if (is_array($ip_range)) // check against lower/upper bounds
- {
- $found_country = (($ip_long >= $ip_range['lower'])
- && ($ip_long <= $ip_range['upper']));
- break;
- }
- else if (is_long($ip_range)) // data type sanity check
- {
- $found_country = ($ip_long == $ip_range);
- break;
- }
- else // something is seriously broken, ignore this entry
- {
- print "Unsupported data type " . gettype($ip_range) .
- " (value " . $ip_range .
- ") in \$local_IPs (expected long).\n";
- continue;
- }
- }
-
- if ($found_country)
- {
- list($country_name, $country_code) = explode(',', $id);
- break;
- }
- }
-
- // Otherwise, fall back to GeoIP.
- if (!$found_country)
- {
- $country_code = geoip_country_code_by_addr($gi, $ip);
- $country_name = geoip_country_name_by_addr($gi, $ip);
- }
- // end NJS 2005-12-16
-
- $date = $matches[2];
- $archive = $matches[3];
- $uniquebits = $buffer;
- $date = preg_replace("/:.*/","",$date);
- $date = preg_replace("/\//", " ", $date);
- $when = getdate(strtotime($date));
- $request_date = $when["year"]."-".$when["mon"]."-".$when["mday"];
- $datetestB = strtotime($request_date);
- if ($datetestB < $datetestA)
- continue;
-
- // NJS 2005-11-25 Added regexp for EPrints short URLs.
- if(preg_match("/GET \/archive\/0{1,8}\d{1,4}\/\d\d\//i",$buffer) || preg_match("/GET \/\d{1,4}\/\d\d\//i",$buffer)) {
- $view_type = "download";
- } else {
- $view_type = "abstract";
- }
- if(isset($eprintname[$archive])) {
- $eprint_name = $eprintname[$archive];
- } else {
- $eprint_name = getePrintName($archive_name,$archive);
- $eprintname[$archive] = $eprint_name;
- }
- if($eprint_name=='') {
- // Do nothing.
- } else {
- $eprint_name = mysql_escape_string($eprint_name);
- $query = "
- INSERT into view (uniquebits,archive_name,ip,request_date,archiveid,country_code,country_name,view_type,eprint_name)
- values('".$uniquebits."','".$archive_name."','".$ip."','".$request_date."',".$archive.",'".$country_code."','".$country_name."','".$view_type."','".$eprint_name."')";
- $result = mysql_query($query,$connect);
- $insertid = mysql_insert_id($connect);
- }
-
- } else {
- // print "NO match" . "\n";
- }
- }
- fclose($handle);
- }
-
- /*
- Keep track of where we are. Should avoid duplication of results
- if the script is run more than once on the same log file
- */
-
- $query = "INSERT into lastproc (lastproc) values('".$request_date."')";
- $result = mysql_query($query,$connect);
-
- #print "Records counted: $counter\n";
- #print "Last count: $request_date\n";
- mysql_close($connect2);
- mysql_close($connect);
-
- function getePrintName($db,$eprintid) {
- global $connect2;
- $sqldatabase = $db;
- $db = mysql_select_db($sqldatabase,$connect2);
- $query3 = "select title from archive where eprintid = $eprintid";
- $result3 = mysql_query($query3,$connect2);
- $row = mysql_fetch_assoc($result3);
- $row["title"] = trim($row["title"]);
- $row["title"] = preg_replace("/\s+/"," ",$row["title"]);
- return $row["title"];
- }
-
- ?>
-