diff --git a/Repositories/statistics/scripts/eprints-usage_src.php b/Repositories/statistics/scripts/eprints-usage_src.php
index 4ef8a31..cd46d81 100755
--- a/Repositories/statistics/scripts/eprints-usage_src.php
+++ b/Repositories/statistics/scripts/eprints-usage_src.php
@@ -197,238 +197,192 @@
 the user-agent part of the HTTP response, which gets recorded in the Apache
 log file. We can look for these and re-code log entries as appropriate.
 
-The format of this list is similar to that of the $local_IPs variable. The
-key is the "country name" (in this case the name of the search engine) plus
-an "X" ISO 3166-1 country code, separated by a comma. Each key value has an
-associated list of corresponding regular expressions that can occur in the
-user-agent part of the Apache log entry. If any one of these REs matches
-the user-agent part of the log entry, then we should re-code the country
-appropriately.
+The format of this list is similar to that of the $local_IPs variable.
+The key is the "country name" (in this case the name of the search
+engine) plus a non-standard four-character country code starting with
+"X@", separated by a comma. Each key value has an associated list of
+corresponding regular expressions that can occur in the user-agent part
+of the Apache log entry. If any one of these REs matches the user-agent
+part of the log entry, then we should re-code the country appropriately.
 
-Note that this means that several of the "X" country codes are now reserved
-and can no longer be used in $local_IPs.
+A four-character code is used because that what the database allows, and
+it avoids having to reserve several of the "X" country codes for search
+engines.
 */
 $bot_patterns = array(
-	// Google (http://www.google.com/)
-	'Google,XG' => array(
-		'/Googlebot/',
-		'/http:\/\/www\.google\.com\/bot\.html/',
+	// Yahoo! (http://www.yahoo.com/)
+	'Yahoo!,X@YH' => array(
+		'/yahoo! slurp/i',
+		'/yahooseeker/i',
 	),
 	// Windows Live Search (http://search.msn.com/)
-	'Windows Live Search,XM' => array(
-		'/msnbot/',
-		'/http:\/\/search\.msn\.com\/msnbot\.htm/',
+	'Windows Live Search,X@MS' => array(
+		'/msnbot/i',
 	),
-	// Yahoo! (http://www.yahoo.com/)
-	'Yahoo!,XY' => array(
-		'/Yahoo! Slurp/',
-		'/YahooSeeker/',
-		'/http:\/\/help\.yahoo\.com\/help\/us\/ysearch\/slurp/',
-		'/yahooseeker-jp-mobile AT Yahoo!JAPAN/',
+	// Google (http://www.google.com/)
+	'Google,X@GG' => array(
+		'/googlebot/i',
 	),
 	// Ask.com (http://www.ask.com/)
-	'Ask.com,XJ' => array(
-		'/Ask Jeeves\/Teoma/',
-		'/http:\/\/about\.ask\.com\/en\/docs\/about\/webmasters\.shtml/',
+	'Ask.com,X@AC' => array(
+		'/ask jeeves\/teoma/i',
 	),
 	// Everything else I could find in our log files :)
-	'Other search engine,XZ' => array(
+	'Other search engine,X@OS' => array(
 		// TAMU Internet Research Lab (http://irl.cs.tamu.edu/)
-		'/http:\/\/irl\.cs\.tamu\.edu\/crawler/',
+		'/http:\/\/irl\.cs\.tamu\.edu\/crawler/i',
 		// Alexa web search (http://www.alexa.com/)
-		'/ia_archiver/',
+		'/ia_archiver/i',
 		// TrueKnowledge for Web (http://www.authoritativeweb.com/)
-		'/ConveraCrawler/',
-		'/http:\/\/www\.authoritativeweb\.com\/crawl/',
+		'/converacrawler/i',
 		// Majestic 12 distributed search engine (http://www.majestic12.co.uk/)
-		'/MJ12bot/',
-		'/http:\/\/majestic12\.co\.uk\/bot\.php/',
+		'/mj12bot/i',
 		// Picsearch (http://www.picsearch.com/)
-		'/psbot/',
-		'/http:\/\/www\.picsearch\.com\/bot\.html/',
+		'/psbot/i',
 		// Exalead (http://www.exalead.com/search)
-		'/Exabot/',
+		'/exabot/i',
 		// Cazoodle (note cazoodle.com doesn't exist)
-		'/CazoodleBot Crawler/',
-		'/http:\/\/www\.cazoodle\.com/',
-		'/mqbot@cazoodle\.com/',
+		'/cazoodlebot crawler/i',
+		'/mqbot@cazoodle\.com/i',
 		// Gigablast (http://www.gigablast.com/)
-		'/Gigabot/',
-		'/http:\/\/www\.gigablast\.com\/spider\.html/',
+		'/gigabot/i',
 		// Houxou (http://www.houxou.com/)
-		'/HouxouCrawler/',
-		'/http:\/\/www\.houxou\.com\/crawler/',
-		'/crawler at houxou dot com/',
+		'/houxoucrawler/i',
+		'/crawler at houxou dot com/i',
 		// IBM Almaden Research Center Computer Science group (http://www.almaden.ibm.com/cs/)
-		'/http:\/\/www\.almaden\.ibm\.com\/cs\/crawler/',
+		'/http:\/\/www\.almaden\.ibm\.com\/cs\/crawler/i',
 		// Goo? (http://help.goo.ne.jp/)
-		'/ichiro/',
-		'/http:\/\/help\.goo\.ne\.jp\/door\/crawler\.html/',
+		'/ichiro/i',
 		// Daum Communications Corp (Korea)
-		'/Edacious & Intelligent Web Robot/',
-		'/Daum Communications Corp/',
-		'/DAUM Web Robot/',
-		'/MSIE is not me/',
-		'/DAUMOA/',
+		'/edacious & intelligent web robot/i',
+		'/daum communications corp/i',
+		'/daum web robot/i',
+		'/msie is not me/i',
+		'/daumoa/i',
 		// Girafa (http://www.girafa.com/)
-		'/[Gg]irafabot/',
-		'/girafabot at girafa dot com/',
-		'/http:\/\/www\.girafa\.com/',
+		'/girafabot/i',
 		// The Generations Network (http://www.myfamilyinc.com/)
-		'/MyFamilyBot/',
-		'/http:\/\/www\.ancestry\.com\/learn\/bot\.aspx/',
-		'/http:\/\/www\.myfamilyinc\.com/',
+		'/myfamilybot/i',
 		// Naver? (http://www.naver.com/)
-		'/NaverBot/',
-		'/http:\/\/help\.naver\.com\/delete_main\.asp/',
+		'/naverbot/i',
 		// WiseNut (http://www.wisenutbot.com/)
-		'/ZyBorg/',
-		'/wn-[0-9]+\.zyborg@looksmart\.net/',
-		'/http:\/\/www\.WISEnutbot\.com/',
+		'/zyborg/i',
+		'/wn-[0-9]+\.zyborg@looksmart\.net/i',
 		// Accelobot (http://www.accelobot.com/)
 		// This one seems particularly busy!
-		'/heritrix/',
-		'/http:\/\/www\.accelobot\.com/',
+		'/heritrix/i',
 		// Seeqpod (http://www.seeqpod.com/)
-		'/seeqpod-vertical-crawler/',
-		'/http:\/\/www\.seeqpod\.com/',
+		'/seeqpod-vertical-crawler/i',
 		// University of Illinois at Urbana-Champaign, Computer Science (http://www.cs.uiuc.edu/)
-		'/MQBOT Crawler/',
-		'/http:\/\/falcon\.cs\.uiuc\.edu/',
-		'/mqbot@cs\.uiuc\.edu/',
+		'/mqbot crawler/i',
+		'/mqbot@cs\.uiuc\.edu/i',
 		// Microsoft Research (http://research.microsoft.com/)
-		'/MSRBOT/',
-		'/http:\/\/research\.microsoft\.com\/research\/sv\/msrbot\//',
+		'/msrbot/i',
 		// Nusearch
-		'/Nusearch Spider/',
-		'/www\.nusearch\.com/',
+		'/nusearch spider/i',
 		// SourceForge (http://www.sf.net/)
-		'/nutch-agent@lists\.sourceforge\.net/',
+		'/nutch-agent@lists\.sourceforge\.net/i',
 		// Lucene (http://lucene.apache.org/)
-		'/nutch-agent@lucene\.apache\.org/',
-		'/raphael@unterreuth.de/',
+		'/nutch-agent@lucene\.apache\.org/i',
+		'/raphael@unterreuth.de/i',
 		// Computer Science, University of Washington (http://cs.washington.edu/)
-		'/Nutch running at UW/',
-		'/http:\/\/crawlers\.cs\.washington\.edu\//',
-		'/sycrawl@cs\.washington\.edu/',
+		'/nutch running at uw/i',
+		'/sycrawl@cs\.washington\.edu/i',
 		// Chikayama & Taura Laboratory, University of Tokyo (http://www.logos.ic.i.u-tokyo.ac.jp/)
-		'/Shim-Crawler/',
-		'/http:\/\/www\.logos\.ic\.i\.u-tokyo\.ac\.jp\/crawler\//',
-		'/crawl@logos\.ic\.i\.u-tokyo\.ac\.jp/',
+		'/shim-crawler/i',
+		'/crawl@logos\.ic\.i\.u-tokyo\.ac\.jp/i',
 		// Sproose (http://www.sproose.com/)
-		'/sproose bot/',
-		'/http:\/\/www\.sproose\.com\/bot\.html/',
-		'/crawler@sproose\.com/',
+		'/sproose bot/i',
+		'/crawler@sproose\.com/i',
 		// Turnitin (http://www.turnitin.com/)
-		'/TurnitinBot/',
-		'/http:\/\/www\.turnitin\.com\/robot\/crawlerinfo\.html/',
+		'/turnitinbot/i',
 		// WISH Project (http://wish.slis.tsukuba.ac.jp/)
-		'/wish-project/',
-		'/http:\/\/wish\.slis\.tsukuba\.ac\.jp\//',
+		'/wish-project/i',
 		// WWWster
-		'/wwwster/',
-		'/gue@cis\.uni-muenchen\.de/',
+		'/wwwster/i',
+		'/gue@cis\.uni-muenchen\.de/i',
 		// Forex Trading Network Organization (http://www.netforex.org/)
-		'/Forex Trading Network Organization/',
-		'/http:\/\/www\.netforex\.org/',
-		'/info@netforex\.org/',
+		'/forex trading network organization/i',
+		'/info@netforex\.org/i',
 		// FunnelBack (http://www.funnelback.com/)
-		'/FunnelBack/',
-		'/http:\/\/www\.funnelback\.com\/robot\.html/',
+		'/funnelback/i',
 		// Baidu (http://www.baidu.com/)
-		'/Baiduspider/',
-		'/http:\/\/www\.baidu\.com\/search\/spider\.htm/',
+		'/baiduspider/i',
 		// Brandimensions (http://www.brandimensions.com/)
-		'/BDFetch/',
+		'/bdfetch/i',
 		// Blaiz Enterprises (http://www.blaiz.net/)
-		'/Blaiz-Bee/',
-		'/http:\/\/www\.blaiz\.net/',
+		'/blaiz-bee/i',
 		// Boitho/SearchDaimon (http://www.boitho.com/ or http://www.searchdaimon.com/)
-		'/boitho\.com-dc/',
-		'/http:\/\/www\.boitho\.com\/dcbot\.html/',
+		'/boitho\.com-dc/i',
 		// Celestial (OAI aggregator, see http://oai-perl.sourceforge.net/ for a little info)
-		'/Celestial/',
+		'/celestial/i',
 		// Cipinet (http://www.cipinet.com/)
-		'/CipinetBot/',
-		'/http:\/\/www\.cipinet\.com\/bot\.html/',
+		'/cipinetbot/i',
 		// iVia (http://ivia.ucr.edu/)
-		'/CrawlerTest CrawlerTest/',
-		'/http:\/\/ivia\.ucr\.edu\/useragents\.shtml/',
+		'/crawlertest crawlertest/i',
 		// Encyclopedia of Keywords (http://keywen.com/)
-		'/EasyDL/',
-		'/http:\/\/keywen\.com\/Encyclopedia\/Bot/',
+		'/easydl/i',
 		// Everest-Vulcan Inc. (http://everest.vulcan.com/)
-		'/Everest-Vulcan Inc/',
-		'/http:\/\/everest\.vulcan\.com\/crawlerhelp/',
+		'/everest-vulcan inc/i',
 		// FactBites (http://www.factbites.com/)
-		'/Factbot/',
-		'/http:\/\/www\.factbites\.com\/webmasters\.php/',
+		'/factbot/i',
 		// Scirus (http://www.scirus.com/)
-		'/Scirus scirus-crawler@fast\.no/',
-		'/http:\/\/www\.scirus\.com\/srsapp\/contactus\//',
+		'/scirus scirus-crawler@fast\.no/i',
 		// UOL (http://www.uol.com.br/)
-		'/UOLCrawler/',
-		'/soscrawler@uol\.com\.br/',
+		'/uolcrawler/i',
+		'/soscrawler@uol\.com\.br/i',
 		// Always Updated (http://www.updated.com/)
-		'/updated crawler/',
-		'/crawler@updated\.com/',
-		'/http:\/\/www\.updated\.com/',
+		'/updated crawler/i',
+		'/crawler@updated\.com/i',
 		// FAST Enterprise Search (http://www.fast.no/)
-		'/crawler@fast\.no/',
-		'/FAST MetaWeb Crawler/',
-		'/helpdesk at fastsearch dot com/',
+		'/fast metaweb crawler/i',
+		'/crawler@fast\.no/i',
+		'/helpdesk at fastsearch dot com/i',
 		// Deutsche Wortschatz Portal (http://wortschatz.uni-leipzig.de/)
-		'/findlinks/',
-		'/http:\/\/wortschatz\.uni-leipzig\.de\/findlinks\//',
+		'/findlinks/i',
 		// Gais (http://gais.cs.ccu.edu.tw/)
-		'/Gaisbot/',
-		'/robot[0-9]{2}@gais.cs.ccu.edu.tw/',
-		'/http:\/\/gais\.cs\.ccu\.edu\.tw\/robot\.php/',
+		'/gaisbot/i',
+		'/robot[0-9]{2}@gais.cs.ccu.edu.tw/i',
 		// http://ilse.net/
-		'/INGRID/',
-		'/http:\/\/webmaster\.ilse\.nl\/jsp\/webmaster\.jsp/',
+		'/ingrid/i',
 		// Krugle (http://corp.krugle.com/)
-		'/Krugle\/Krugle/',
-		'/Krugle web crawler/',
-		'/http:\/\/corp\.krugle\.com\/crawler\/info\.html/',
-		'/webcrawler@krugle\.com/',
+		'/krugle\/krugle/i',
+		'/krugle web crawler/i',
+		'/webcrawler@krugle\.com/i',
 		// WebWobot (http://www.webwobot.com/)
-		'/ScollSpider/',
-		'/http:\/\/www\.webwobot\.com/',
+		'/scollspider/i',
 		// Omni-Explorer (http://www.omni-explorer.com/)
-		'/OmniExplorer_Bot/',
-		'/http:\/\/www\.omni-explorer\.com/',
-		'/WorldIndexer/',
+		'/omniexplorer_bot/i',
+		'/worldindexer/i',
 		// PageBull (http://www.pagebull.com/)
-		'/Pagebull http:\/\/www\.pagebull\.com\//',
+		'/pagebull http:\/\/www\.pagebull\.com\//i',
 		// dir.com (http://dir.com/)
-		'/Pompos/',
-		'/http:\/\/dir\.com\/pompos\.html/',
+		'/pompos/i',
 		// Sensis (http://sensis.com.au/)
-		'/Sensis Web Crawler/',
-		'/search_comments\\\\at\\\\sensis\\\\dot\\\\com\\\\dot\\\\au/',
+		'/sensis web crawler/i',
+		'/search_comments\\\\at\\\\sensis\\\\dot\\\\com\\\\dot\\\\au/i',
 		// Shopwiki (http://www.shopwiki.com/)
-		'/ShopWiki/',
-		'/http:\/\/www\.shopwiki\.com\//',
+		'/shopwiki/i',
 		// Guruji (http://www.terrawiz.com/)
-		'/TerrawizBot/',
-		'/http:\/\/www\.terrawiz\.com\/bot\.html/',
+		'/terrawizbot/i',
 		// Language Observatory Project (http://www.language-observatory.org/)
-		'/UbiCrawler/',
-		'/http:\/\/gii\.nagaokaut\.ac\.jp\/~ubi\//',
+		'/ubicrawler/i',
+		// MSIE offline bookmarks crawler
+		'/msiecrawler/i',
 		// Unidentified
-		'/[Bb]ot/',
-		'/[Cc]rawler/',
-		'/[Ss]pider/',
-		'/larbin/', // also larbinSpider
-		'/HTTrack/',
-		'/voyager/',
-		'/AcadiaUniversityWebCensusClient/',
-		'/FeedChecker/',
-		'/KnowItAll\(knowitall@cs\.washington\.edu\)/',
-		'/Mediapartners-Google/',
-		'/psycheclone/',
-		'/topicblogs/',
+		'/bot/i',
+		'/crawler/i',
+		'/spider/i',
+		'/larbin/i', // also larbinSpider
+		'/httrack/i',
+		'/voyager/i',
+		'/acadiauniversitywebcensusclient/i',
+		'/feedchecker/i',
+		'/knowitall\(knowitall@cs\.washington\.edu\)/i',
+		'/mediapartners-google/i',
+		'/psycheclone/i',
+		'/topicblogs/i',
+		'/nutch/i',
 	),
 );
 
@@ -474,13 +428,36 @@
 	$handle = fopen($logf, "r");
 	while (!feof($handle)) {
 		$buffer = fgets($handle, 4096);
-		// NJS 2005-11-25 Added regexp for EPrints short URLs.
-		// NJS 2007-01-26 Added user-agent match to all regexps to enable bot detection.
-		// NJS 2007-01-29 Added missing regexp for EPrints short URLs with domain names rather than IP addresses.
-		if	((preg_match("/^(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - - \[(.*?)\] \"GET \/archive\/0{1,8}(\d{1,4}).*? HTTP\/1..\" 200 .*?(\"[^\"]+\")?$/i",$buffer,$matches)) ||
-			(preg_match("/^(\S{1,}\.\S{1,}\.\S{1,}\.\S{1,}) - - \[(.*?)\] \"GET \/archive\/0{1,8}(\d{1,4}).*? HTTP\/1..\" 200 .*?(\"[^\"]+\")?$/i",$buffer,$matches)) ||
-			(preg_match("/^(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - - \[(.*?)\] \"GET \/(\d{1,4}).*? HTTP\/1..\" 200 .*?(\"[^\"]+\")?$/i",$buffer,$matches)) ||
-			(preg_match("/^(\S{1,}\.\S{1,}\.\S{1,}\.\S{1,}) - - \[(.*?)\] \"GET \/(\d{1,4}).*? HTTP\/1..\" 200 .*?(\"[^\"]+\")?$/i",$buffer,$matches)))
+		/* NJS 2007-01-26
+		   Added user-agent match to all regexps to enable bot detection.
+		   
+		   NJS 2007-01-31
+		   Refactored regexps from four down to one, after realising
+		   that (a) long EPrints URLs are a superset of the short ones,
+		   and (b) a regexp that matches domain names works just as well
+		   for IP addresses (the GeoIP lookup doesn't care which it
+		   gets). Also fixed the pattern so it can handle an arbitrary
+		   number of subdomains. Note that the latter would be the main
+		   argument for keeping a separate IP address pattern, as IP
+		   addresses always comprise exactly four parts. However, it's
+		   not really up to the script to verify IP addresses; Apache
+		   should be recording them correctly in the first place!
+		   
+		   The typical kinds of strings we are matching look something
+		   like this:
+		   
+		   fetch abstract (short, long):
+		   168.192.1.1 - - [31/Jan/2007:09:15:36 +1300] "GET /1/ HTTP/1.1" 200 12345 "referer" "user-agent"
+		   168.192.1.1 - - [31/Jan/2007:09:15:36 +1300] "GET /archive/00000001/ HTTP/1.1" 200 12345 "referer" "user-agent"
+		   
+		   download item (short, long):
+		   168.192.1.1 - - [31/Jan/2007:09:15:37 +1300] "GET /1/01/foo.pdf HTTP/1.1" 200 12345 "referer" "user-agent"
+		   168.192.1.1 - - [31/Jan/2007:09:15:37 +1300] "GET /archive/00000001/01/foo.pdf HTTP/1.1" 200 12345 "referer" "user-agent"
+		   
+		   Plus any of the above with a domain name substituted for the IP
+		   address (e.g., foo.bar.com instead of 168.192.1.1).
+		*/
+		if	(preg_match("/^(\S+(?:\.\S+)+) - - \[(.*?)\] \"GET \/(?:archive\/0{1,8})?(\d{1,4}).*? HTTP\/1..\" 200 .*?(\"[^\"]+\")?$/i",$buffer,$matches))
 		{
 			$counter++;
 			$country_code = '';
@@ -572,7 +549,7 @@
 			{
 				foreach ($patterns as $pat)
 				{
-					if (preg_match($pat, $user_agent, $matches2))
+					if (preg_match($pat, $user_agent))
 					{
 						$found_country = TRUE;
 						break;
@@ -592,7 +569,8 @@
 			$uniquebits = $buffer;
 			
 			// NJS 2005-11-25 Added regexp for EPrints short URLs.
-			if(preg_match("/GET \/archive\/0{1,8}\d{1,4}\/\d\d\//i",$buffer) || preg_match("/GET \/\d{1,4}\/\d\d\//i",$buffer)) {
+			// NJS 2007-01-31 Refactored into one regexp for both styles.
+			if (preg_match("/GET \/(?:archive\/0{1,8})?\d{1,4}\/\d\d\//i",$buffer)) {
 				$view_type = "download";
 			} else {
 				$view_type = "abstract";
@@ -633,7 +611,7 @@
 
 	/*
 		Keep track of where we are. Should avoid duplication of results
-		if the script is run more than once on the same log file
+		if the script is run more than once on the same log file.
 	*/
 
 // NJS 2006-04-28 Switched value inserted to $start_time instead of $request_date.