- Added missing regexp for short EPrints URLs on log lines starting with
a domain name rather than an IP address.
1 parent 5f877c6 commit 59564685d2b5997f9fc7d3d4422aa48866d186c1
nstanger authored on 29 Jan 2007
Showing 1 changed file
View
12
Repositories/statistics/scripts/eprints-usage_src.php
$handle = fopen($logf, "r");
while (!feof($handle)) {
$buffer = fgets($handle, 4096);
// NJS 2005-11-25 Added regexp for EPrints short URLs.
if ((preg_match("/^(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - - \[(.*?)\] \"GET \/archive\/0{1,8}(\d{1,4}).*? HTTP\/1..\" 200 .*(\"[^\"]+\")$/i",$buffer,$matches)) ||
(preg_match("/^(\S{1,}\.\S{1,}\.\S{1,}\.\S{1,}) - - \[(.*?)\] \"GET \/archive\/0{1,8}(\d{1,4}).*? HTTP\/1..\" 200 .*(\"[^\"]+\")$/i",$buffer,$matches)) ||
(preg_match("/^(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - - \[(.*?)\] \"GET \/(\d{1,4}).*? HTTP\/1..\" 200 .*(\"[^\"]+\")$/i",$buffer,$matches)))
// NJS 2007-01-26 Added referer match to all regexps to enable bot detection.
// NJS 2007-01-29 Added missing regexp for EPrints short URLs with domain names rather than IP addresses.
if ((preg_match("/^(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - - \[(.*?)\] \"GET \/archive\/0{1,8}(\d{1,4}).*? HTTP\/1..\" 200 .*(\"[^\"]+\")?$/i",$buffer,$matches)) ||
(preg_match("/^(\S{1,}\.\S{1,}\.\S{1,}\.\S{1,}) - - \[(.*?)\] \"GET \/archive\/0{1,8}(\d{1,4}).*? HTTP\/1..\" 200 .*(\"[^\"]+\")?$/i",$buffer,$matches)) ||
(preg_match("/^(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - - \[(.*?)\] \"GET \/(\d{1,4}).*? HTTP\/1..\" 200 .*(\"[^\"]+\")?$/i",$buffer,$matches)) ||
(preg_match("/^(\S{1,}\.\S{1,}\.\S{1,}\.\S{1,}) - - \[(.*?)\] \"GET \/(\d{1,4}).*? HTTP\/1..\" 200 .*(\"[^\"]+\")?$/i",$buffer,$matches)))
{
$counter++;
$country_code = '';
$country_name = '';