diff --git a/Repositories/statistics/scripts/eprints-usage_src.php b/Repositories/statistics/scripts/eprints-usage_src.php index a3237cf..79e56bb 100755 --- a/Repositories/statistics/scripts/eprints-usage_src.php +++ b/Repositories/statistics/scripts/eprints-usage_src.php @@ -462,13 +462,35 @@ $eprint_name = ''; $view_type = ''; $uniquebits = ''; - $ip = $matches[1]; + + /* NJS 2007-01-29 + Moved date checking to the start of the loop, as there's + no point in doing any of the regexp checks if we've already + processed this log entry and are going to discard it anyway. + */ + $date = $matches[2]; + /* NJS 2006-04-28 + Switched to timestamp rather than date-based comparison. + First, clean up the Apache request date into something + that strtotime understands. Note that the Apache log + dates include time zone info by default. + */ + $date = preg_replace("/:/"," ",$date,1); // Change first ":" to " ". + $date = preg_replace("/\//", " ", $date); // Change all "/" to " ". + $datetestB = strtotime($date); + + if ($datetestB < $datetestA) + continue; + + // Convert to properly formatted date string. + $request_date = date('Y-m-d H:i:s O', $datetestB); /* NJS 2005-12-16 Determine country code and name. Check whether the IP number falls into any of the local intranet ranges. If so, then use that. */ + $ip = $matches[1]; $ip_long = ip2long($ip); $found_country = FALSE; foreach ($local_IPs as $id => $addresses) @@ -510,8 +532,9 @@ } // end NJS 2005-12-16 - // NJS 2007-01-26 - // Check whether this is a bot reference. + /* NJS 2007-01-26 + Check whether this is a bot reference. + */ $referer = $matches[4]; $found_country = FALSE; foreach ($bot_patterns as $id => $patterns) @@ -533,24 +556,9 @@ } // end NJS 2007-01-26 - - $date = $matches[2]; + // Now sort out the remaining bits and we're done. $eprint_id = $matches[3]; $uniquebits = $buffer; - /* NJS 2006-04-28 - Switched to timestamp rather than date-based comparison. - First, clean up the Apache request date into something - that strtotime understands. Note that the Apache log - dates include time zone info by default. - */ - $date = preg_replace("/:/"," ",$date,1); // Change first ":" to " ". - $date = preg_replace("/\//", " ", $date); // Change all "/" to " ". - $datetestB = strtotime($date); - // Convert to properly formatted date string. - $request_date = date('Y-m-d H:i:s O', $datetestB); - - if ($datetestB < $datetestA) - continue; // NJS 2005-11-25 Added regexp for EPrints short URLs. if(preg_match("/GET \/archive\/0{1,8}\d{1,4}\/\d\d\//i",$buffer) || preg_match("/GET \/\d{1,4}\/\d\d\//i",$buffer)) {