- <?php
- /* NJS 2007-07-24
- The database structure changed between versions 2.x and 3.x of
- EPrints, so we now need to check the major version number and alter
- the queries appropriately. Use only the MAJOR version number (i.e.,
- 2 or 3, don't include the release number).
- */
- $eprints_version = ##EPRINTS_VERSION##;
- /* NJS 2006-04-28
- In earlier versions of this script, which eprints to count was
- determined by comparing the request date of the eprint against the
- "lastproc" date of this script (i.e., minimum time unit one day).
- This was fine if you only ran the script once per day, but if you ran
- it more than that, it counted multiple times requests whose
- $request_date == $lastproc. For example, if you ran this script five
- times per day, all the downloads that occurred during that day would
- be counted EVERY TIME this script ran, thus overinflating your stats
- by a factor of up to five :(
- The solution is to use the full time stamp for comparison rather than
- just the date. This timestamp MUST include time zone information so
- that things don't get screwed up by daylight saving time. As long as
- this is done consistently, there's no need to do things like convert
- to GMT, for example.
- The very first thing we need to do is grab the current time stamp
- with time zone, which will later be stored in the database as the
- "lastproc" time. This needs to happen first so that we don't "lose"
- any requests that occur while the script is running.
- */
- $start_time = date('Y-m-d H:i:s O');
- /* NJS 2007-01-30
- A further twist! The original script ignored log lines that had a
- date falling before $lastproc, i.e., if log line date < $lastproc
- then it's already been dealt with. This is all fine. However, it
- didn't bother checking for log lines that were written after the
- script started running (i.e. log line date >= $start_time).
- Why is this a problem? We're reading the live Apache log file, so
- it's quite likely that new lines will be written to it after the
- script has started (i.e., after $start_time). Suppose $start_time is
- '2006-06-15 14:03:15 +1200', $lastproc is '2006-06-15 12:03:15 +1200'
- (i.e., the script is run every two hours) and the log file contains
- lines with the following dates:
- '2006-06-15 10:03:15 +1200' [1] <-- written before $lastproc
- '2006-06-15 12:03:14 +1200' [2] <-- written before $lastproc
- '2006-06-15 13:03:15 +1200' [3] <-- written before $start_time
- '2006-06-15 14:03:14 +1200' [4] <-- written before $start_time
- '2006-06-15 14:03:15 +1200' [5] <-- written at $start_time
- '2006-06-15 14:03:16 +1200' [6] <-- written after $start_time
- During this run, dates [1] and [2] are both < $lastproc and thus
- ignored. The remaining four dates ([4]--[6]) are >= $lastproc and
- thus processed.
- Two hours later, the script runs again, this time with $start_time
- set to '2006-06-15 16:03:15 +1200' and $lastproc to '2006-06-15
- 14:03:15 +1200'. Dates [1] through [4] are all < $lastproc and
- thus ignored. However, dates [5] and [6] are both >= $lastproc
- and are processed a second time, resulting in a duplicate entry
- in the database.
- The solution is to ignore any log line entries that occur at or after
- (>=) $start_time. In the example above, this would mean that in the
- first run, dates [1], [2], [5] and [6] would be ignored and dates [3]
- and [4] processed. In the second run, dates [1]--[4] would be ignored
- and dates [5] and [6] processed.
- */
- $test_starttime = strtotime($start_time);
- // NJS 2005-12-09 Switched to GeoIP from GeoIP:IPfree.
- include("geoip.inc");
- $gi = geoip_open("##GEOIP_DATABASE##",GEOIP_STANDARD);
- /*
- Apache log for ePrints uses this format:
- LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"" combined
- If the log format differs the regular expression matching would need to be adjusted.
- Parse:
- ip
- date YYYY MM DD
- archive ID
- */
- // Web server log files
- $log_dir = '##APACHE_LOG_LOCATION##';
- $log_file = array(
- 'otago_eprints' => '##APACHE_LOG_NAME_1##',
- 'cardrona' => '##APACHE_LOG_NAME_2##',
- );
- // eprintstats db
- $sqlserver = 'localhost';
- $sqluser = 'eprintstatspriv';
- $sqlpass = 'AuldGrizzel';
- $sqldatabase = 'eprintstats';
- /* NJS 2006-05-26
- SQL details of your ePrints installation(s). This has now been
- generalised to work with multiple archives. For each archive that you
- have, add an entry to this array in the following format:
- 'archive_name' => array(
- 'sqlserver' => 'db_host',
- 'dbname' => 'database_name',
- 'username' => 'user_name',
- 'password' => 'password',
- ),
- NJS 2007-07-16: Added support for different database name.
- Usually, archive_name, database_name and user_name are the same, but they
- don't have to be.
- */
- $eprintsdbs = array(
- 'otago_eprints' => array(
- 'sqlserver' => 'localhost',
- 'dbname' => 'otago_eprints_v3',
- 'username' => 'otago_eprints',
- 'password' => 'DrSyntaxRidesAgain',
- ),
- 'cardrona' => array(
- 'sqlserver' => 'localhost',
- 'dbname' => 'cardona_v3',
- 'username' => 'cardrona',
- 'password' => 'DrSyntaxRidesAgain',
- ),
- );
- /* NJS 2005-12-16
- IP address ranges for your local Intranet(s). You can have multiple
- ranges of IP addresses, each with a different "country name", so that
- they will appear as separate entries in the by country stats pages.
- You should use a different country code for each range (ISO 3166-1
- specifies the range XA through XZ as "user-assignable", so you can use
- codes from there as necessary), and create flag icons as appropriate.
- Each address range key is the name that will appear in the statistics
- database (the "country name"), followed by a comma, followed by the
- appropriate ISO 3166-1 country code as noted above. Each entry in the
- range is either a single IP address, or an array specifying a lower and
- upper bound for a contiguous IP address range (see example below).
- All IP addresses must be converted to long values using the ip2long()
- function before being stored.
- Note that address ranges may overlap. The script will use the first
- range that matches a given IP, so list the ranges in the correct order
- of precedence for your needs.
- Example:
- $local_IPs = array(
- 'Repository Admin,XA' => array(
- ip2long(''),
- ip2long(''),
- array(
- lower => ip2long(''),
- upper => ip2long(''),
- ),
- ),
- 'Our Intranet,XI' => array(
- array(
- lower => ip2long(''),
- upper => ip2long(''),
- ),
- ),
- );
- 'Repository Admin' covers the IP addresses, and
- the range to, inclusive. 'Our Intranet' covers
- the range to, inclusive. A machine will only
- match the 'Our Intranet' range if it first fails to match the
- 'Repository Admin' range.
- */
- $local_IPs = array(
- 'Repository Admin,XA' => array(
- ip2long(''), // Nigel @ Uni
- ip2long(''), // Nigel @ home
- ip2long(''), // Monica & Jeremy
- ip2long(''), // @ Uni
- ip2long(''), // Monica @ home
- ip2long(''), // eprints.otago.ac.nz
- ip2long(''), // eprints.otago.ac.nz pre-switch
- ip2long(''), // eprints.otago.ac.nz pre-switch
- ),
- 'Otago Intranet,XI' => array(
- array(
- 'lower' => ip2long(''),
- 'upper' => ip2long(''),
- ),
- ),
- );
- /* NJS 2007-01-26
- Patterns to match various search engine bots. Ideally, we'd use a similar
- mechanism to the $local_IPs variable above, but this isn't feasible because
- we'd need to know the IP ranges for the likes of Google, for example. This
- clearly isn't possible in practice.
- Fortunately, most search bots insert a readily identifiable string into
- the user-agent part of the HTTP response, which gets recorded in the Apache
- log file. We can look for these and re-code log entries as appropriate.
- The format of this list is similar to that of the $local_IPs variable.
- The key is the "country name" (in this case the name of the search
- engine) plus a non-standard four-character country code starting with
- "X@", separated by a comma. Each key value has an associated list of
- corresponding regular expressions that can occur in the user-agent part
- of the Apache log entry. If any one of these REs matches the user-agent
- part of the log entry, then we should re-code the country appropriately.
- A four-character code is used because that what the database allows, and
- it avoids having to reserve several of the "X" country codes for search
- engines.
- */
- $bot_patterns = array(
- // Yahoo! (http://www.yahoo.com/)
- 'Yahoo!,X@YH' => array(
- '/yahoo! slurp/i',
- '/yahooseeker/i',
- ),
- // Windows Live Search (http://search.msn.com/)
- 'Windows Live Search,X@MS' => array(
- '/msnbot/i',
- ),
- // Google (http://www.google.com/)
- 'Google,X@GG' => array(
- '/googlebot/i',
- ),
- // Ask.com (http://www.ask.com/)
- 'Ask.com,X@AC' => array(
- '/ask jeeves\/teoma/i',
- ),
- // Everything else I could find in our log files :)
- 'Other search engine,X@OS' => array(
- // TAMU Internet Research Lab (http://irl.cs.tamu.edu/)
- '/http:\/\/irl\.cs\.tamu\.edu\/crawler/i',
- // Alexa web search (http://www.alexa.com/)
- '/ia_archiver/i',
- // TrueKnowledge for Web (http://www.authoritativeweb.com/)
- '/converacrawler/i',
- // Majestic 12 distributed search engine (http://www.majestic12.co.uk/)
- '/mj12bot/i',
- // Picsearch (http://www.picsearch.com/)
- '/psbot/i',
- // Exalead (http://www.exalead.com/search)
- '/exabot/i',
- // Cazoodle (note cazoodle.com doesn't exist)
- '/cazoodlebot crawler/i',
- '/mqbot@cazoodle\.com/i',
- // Gigablast (http://www.gigablast.com/)
- '/gigabot/i',
- // Houxou (http://www.houxou.com/)
- '/houxoucrawler/i',
- '/crawler at houxou dot com/i',
- // IBM Almaden Research Center Computer Science group (http://www.almaden.ibm.com/cs/)
- '/http:\/\/www\.almaden\.ibm\.com\/cs\/crawler/i',
- // Goo? (http://help.goo.ne.jp/)
- '/ichiro/i',
- // Daum Communications Corp (Korea)
- '/edacious & intelligent web robot/i',
- '/daum communications corp/i',
- '/daum web robot/i',
- '/msie is not me/i',
- '/daumoa/i',
- // Girafa (http://www.girafa.com/)
- '/girafabot/i',
- // The Generations Network (http://www.myfamilyinc.com/)
- '/myfamilybot/i',
- // Naver? (http://www.naver.com/)
- '/naverbot/i',
- // WiseNut (http://www.wisenutbot.com/)
- '/zyborg/i',
- '/wn-[0-9]+\.zyborg@looksmart\.net/i',
- // Accelobot (http://www.accelobot.com/)
- // This one seems particularly busy!
- '/heritrix/i',
- // Seeqpod (http://www.seeqpod.com/)
- '/seeqpod-vertical-crawler/i',
- // University of Illinois at Urbana-Champaign, Computer Science (http://www.cs.uiuc.edu/)
- '/mqbot crawler/i',
- '/mqbot@cs\.uiuc\.edu/i',
- // Microsoft Research (http://research.microsoft.com/)
- '/msrbot/i',
- // Nusearch
- '/nusearch spider/i',
- // SourceForge (http://www.sf.net/)
- '/nutch-agent@lists\.sourceforge\.net/i',
- // Lucene (http://lucene.apache.org/)
- '/nutch-agent@lucene\.apache\.org/i',
- '/raphael@unterreuth.de/i',
- // Computer Science, University of Washington (http://cs.washington.edu/)
- '/nutch running at uw/i',
- '/sycrawl@cs\.washington\.edu/i',
- // Chikayama & Taura Laboratory, University of Tokyo (http://www.logos.ic.i.u-tokyo.ac.jp/)
- '/shim-crawler/i',
- '/crawl@logos\.ic\.i\.u-tokyo\.ac\.jp/i',
- // Sproose (http://www.sproose.com/)
- '/sproose bot/i',
- '/crawler@sproose\.com/i',
- // Turnitin (http://www.turnitin.com/)
- '/turnitinbot/i',
- // WISH Project (http://wish.slis.tsukuba.ac.jp/)
- '/wish-project/i',
- // WWWster
- '/wwwster/i',
- '/gue@cis\.uni-muenchen\.de/i',
- // Forex Trading Network Organization (http://www.netforex.org/)
- '/forex trading network organization/i',
- '/info@netforex\.org/i',
- // FunnelBack (http://www.funnelback.com/)
- '/funnelback/i',
- // Baidu (http://www.baidu.com/)
- '/baiduspider/i',
- // Brandimensions (http://www.brandimensions.com/)
- '/bdfetch/i',
- // Blaiz Enterprises (http://www.blaiz.net/)
- '/blaiz-bee/i',
- // Boitho/SearchDaimon (http://www.boitho.com/ or http://www.searchdaimon.com/)
- '/boitho\.com-dc/i',
- // Celestial (OAI aggregator, see http://oai-perl.sourceforge.net/ for a little info)
- '/celestial/i',
- // Cipinet (http://www.cipinet.com/)
- '/cipinetbot/i',
- // iVia (http://ivia.ucr.edu/)
- '/crawlertest crawlertest/i',
- // Encyclopedia of Keywords (http://keywen.com/)
- '/easydl/i',
- // Everest-Vulcan Inc. (http://everest.vulcan.com/)
- '/everest-vulcan inc/i',
- // FactBites (http://www.factbites.com/)
- '/factbot/i',
- // Scirus (http://www.scirus.com/)
- '/scirus scirus-crawler@fast\.no/i',
- // UOL (http://www.uol.com.br/)
- '/uolcrawler/i',
- '/soscrawler@uol\.com\.br/i',
- // Always Updated (http://www.updated.com/)
- '/updated crawler/i',
- '/crawler@updated\.com/i',
- // FAST Enterprise Search (http://www.fast.no/)
- '/fast metaweb crawler/i',
- '/crawler@fast\.no/i',
- '/helpdesk at fastsearch dot com/i',
- // Deutsche Wortschatz Portal (http://wortschatz.uni-leipzig.de/)
- '/findlinks/i',
- // Gais (http://gais.cs.ccu.edu.tw/)
- '/gaisbot/i',
- '/robot[0-9]{2}@gais.cs.ccu.edu.tw/i',
- // http://ilse.net/
- '/ingrid/i',
- // Krugle (http://corp.krugle.com/)
- '/krugle\/krugle/i',
- '/krugle web crawler/i',
- '/webcrawler@krugle\.com/i',
- // WebWobot (http://www.webwobot.com/)
- '/scollspider/i',
- // Omni-Explorer (http://www.omni-explorer.com/)
- '/omniexplorer_bot/i',
- '/worldindexer/i',
- // PageBull (http://www.pagebull.com/)
- '/pagebull http:\/\/www\.pagebull\.com\//i',
- // dir.com (http://dir.com/)
- '/pompos/i',
- // Sensis (http://sensis.com.au/)
- '/sensis web crawler/i',
- '/search_comments\\\\at\\\\sensis\\\\dot\\\\com\\\\dot\\\\au/i',
- // Shopwiki (http://www.shopwiki.com/)
- '/shopwiki/i',
- // Guruji (http://www.terrawiz.com/)
- '/terrawizbot/i',
- // Language Observatory Project (http://www.language-observatory.org/)
- '/ubicrawler/i',
- // MSIE offline bookmarks crawler
- '/msiecrawler/i',
- // Unidentified
- '/bot/i',
- '/crawler/i',
- '/spider/i',
- '/larbin/i', // also larbinSpider
- '/httrack/i',
- '/voyager/i',
- '/acadiauniversitywebcensusclient/i',
- '/feedchecker/i',
- '/knowitall\(knowitall@cs\.washington\.edu\)/i',
- '/mediapartners-google/i',
- '/psycheclone/i',
- '/topicblogs/i',
- '/nutch/i',
- ),
- );
- ###########################################
- ##
- ## No configuration required below here.
- ##
- ###########################################
- $connect = mysql_pconnect ($sqlserver,$sqluser,$sqlpass);
- $db = mysql_select_db($sqldatabase,$connect) or die("Could not connect");
- // First get the date of last update
- /* NJS 2006-04-28
- Changed this from order by timeinsert to order by id. The ID is
- always guaranteed to increase temporally, but is otherwise
- time-independent and thus not affected by things like daylight
- savings.
- */
- $query = "SELECT lastproc FROM lastproc ORDER BY id DESC LIMIT 1";
- $result = mysql_query($query,$connect);
- $num_rows = mysql_num_rows($result);
- if ($num_rows > 0) {
- $row = mysql_fetch_assoc($result);
- $lastproc = $row["lastproc"];
- // NJS 2007-01-30 Refactored $databaseA to more meaningful $test_lastproc.
- $test_lastproc = strtotime($lastproc);
- }
- else {
- $test_lastproc = 0;
- }
- // NJS 2006-06-14: Generalised connection list for multiple archives.
- $eprints_connections = array();
- foreach ($eprintsdbs as $archive_name => $details)
- {
- $eprints_connections[$archive_name] =
- mysql_connect($details['sqlserver'],$details['username'],$details['password']);
- }
- $counter = 0;
- foreach($log_file as $archive_name=>$archive_log) {
- $logf = $log_dir . $archive_log;
- $handle = fopen($logf, "r");
- while (!feof($handle)) {
- $buffer = fgets($handle, 4096);
- /* NJS 2007-01-26
- Added user-agent match to all regexps to enable bot detection.
- NJS 2007-01-31
- Refactored regexps from four down to one, after realising
- that (a) long EPrints URLs are a superset of the short ones,
- and (b) a regexp that matches domain names works just as well
- for IP addresses (the GeoIP lookup doesn't care which it
- gets). Also fixed the pattern so it can handle an arbitrary
- number of subdomains. Note that the latter would be the main
- argument for keeping a separate IP address pattern, as IP
- addresses always comprise exactly four parts. However, it's
- not really up to the script to verify IP addresses; Apache
- should be recording them correctly in the first place!
- The typical kinds of strings we are matching look something
- like this:
- fetch abstract (short, long):
- - - [31/Jan/2007:09:15:36 +1300] "GET /1/ HTTP/1.1" 200 12345 "referer" "user-agent"
- - - [31/Jan/2007:09:15:36 +1300] "GET /archive/00000001/ HTTP/1.1" 200 12345 "referer" "user-agent"
- download item (short, long):
- - - [31/Jan/2007:09:15:37 +1300] "GET /1/01/foo.pdf HTTP/1.1" 200 12345 "referer" "user-agent"
- - - [31/Jan/2007:09:15:37 +1300] "GET /archive/00000001/01/foo.pdf HTTP/1.1" 200 12345 "referer" "user-agent"
- Plus any of the above with a domain name substituted for the IP
- address (e.g., foo.bar.com instead of
- */
- if (preg_match("/^(\S+(?:\.\S+)+) - - \[(.*?)\] \"GET \/(?:archive\/0+)?(\d+).*? HTTP\/1..\" 200 .*?(\"[^\"]+\")?$/i",$buffer,$matches))
- {
- $counter++;
- $country_code = '';
- $country_name = '';
- $insertid = '';
- $eprint_name = '';
- $view_type = '';
- $uniquebits = '';
- /* NJS 2007-01-29
- Moved date checking to the start of the loop, as there's
- no point in doing any of the regexp checks if we've already
- processed this log entry and will discard it anyway.
- */
- $date = $matches[2];
- /* NJS 2006-04-28
- Switched to timestamp rather than date-based comparison.
- First, clean up the Apache request date into something
- that strtotime understands. Note that the Apache log
- dates include time zone info by default.
- */
- $date = preg_replace("/:/"," ",$date,1); // Change first ":" to " ".
- $date = preg_replace("/\//", " ", $date); // Change all "/" to " ".
- // NJS 2007-01-30 Refactored $databaseB to more meaningful
- // $test_logdate.
- $test_logdate = strtotime($date);
- // NJS 2007-01-30 Added test for log dates >= $start_time.
- if ( ( $test_logdate < $test_lastproc ) ||
- ( $test_logdate >= $test_starttime ) )
- continue;
- // Convert to properly formatted date string.
- $request_date = date('Y-m-d H:i:s O', $test_logdate);
- /* NJS 2005-12-16
- Determine country code and name.
- Check whether the IP number falls into any of the local
- intranet ranges. If so, then use that.
- */
- $ip = $matches[1];
- $ip_long = ip2long($ip);
- $found_country = FALSE;
- foreach ($local_IPs as $id => $addresses)
- {
- foreach ($addresses as $ip_range)
- {
- if (is_array($ip_range)) // check against lower/upper bounds
- {
- $found_country = (($ip_long >= $ip_range['lower'])
- && ($ip_long <= $ip_range['upper']));
- }
- else if (is_long($ip_range)) // data type sanity check
- {
- $found_country = ($ip_long == $ip_range);
- }
- else // something is seriously broken, ignore this entry
- {
- print "Unsupported data type " . gettype($ip_range) .
- " (value " . $ip_range .
- ") in \$local_IPs (expected long).\n";
- continue;
- }
- if ( $found_country ) break;
- }
- if ($found_country)
- {
- list($country_name, $country_code) = explode(',', $id);
- break;
- }
- }
- // Otherwise, fall back to GeoIP.
- if (!$found_country)
- {
- $country_code = geoip_country_code_by_addr($gi, $ip);
- $country_name = geoip_country_name_by_addr($gi, $ip);
- }
- // end NJS 2005-12-16
- /* NJS 2007-01-26
- Check whether this is a bot reference.
- */
- $user_agent = $matches[4];
- $found_country = FALSE;
- foreach ($bot_patterns as $id => $patterns)
- {
- foreach ($patterns as $pat)
- {
- if (preg_match($pat, $user_agent))
- {
- $found_country = TRUE;
- break;
- }
- }
- if ($found_country)
- {
- list($country_name, $country_code) = explode(',', $id);
- break;
- }
- }
- // end NJS 2007-01-26
- // Now sort out the remaining bits and we're done.
- $eprint_id = $matches[3];
- $uniquebits = $buffer;
- // NJS 2005-11-25 Added regexp for EPrints short URLs.
- // NJS 2007-01-31 Refactored into one regexp for both styles.
- if (preg_match("/GET \/(?:archive\/0+)?\d+\/\d+\//i",$buffer)) {
- $view_type = "download";
- } else {
- $view_type = "abstract";
- }
- if(isset($eprintname[$archive_name . $eprint_id])) {
- $eprint_name = $eprintname[$archive_name . $eprint_id];
- } else {
- $eprint_name = getePrintName($eprints_connections[$archive_name],$eprintsdbs[$archive_name]['dbname'],$eprint_id,$eprints_version);
- $eprintname[$archive_name . $eprint_id] = $eprint_name;
- }
- if($eprint_name=='') {
- // Do nothing.
- } else {
- $eprint_name = mysql_escape_string($eprint_name);
- /* NJS 2006-04-25
- Requests containing apostrophes (') are dumped by
- MySQL unless we escape them. Looking in the GeoIP
- files I also see country names with apostrophes, so
- escape that as well. Everything else should be fine.
- */
- $uniquebits = mysql_escape_string($uniquebits);
- $country_name = mysql_escape_string($country_name);
- // end NJS 2006-04-25
- $query = "
- INSERT INTO view (uniquebits,archive_name,ip,request_date,archiveid,country_code,country_name,view_type,eprint_name)
- VALUES('".$uniquebits."','".$archive_name."','".$ip."','".$request_date."',".$eprint_id.",'".$country_code."','".$country_name."','".$view_type."','".$eprint_name."')";
- $result = mysql_query($query,$connect);
- $insertid = mysql_insert_id($connect);
- }
- } else {
- // print "NO match" . "\n";
- }
- }
- fclose($handle);
- }
- /*
- Keep track of where we are. Should avoid duplication of results
- if the script is run more than once on the same log file.
- */
- // NJS 2006-04-28 Switched value inserted to $start_time instead of $request_date.
- $query = "INSERT into lastproc (lastproc) values('".$start_time."')";
- $result = mysql_query($query,$connect);
- #print "Records counted: $counter\n";
- #print "Last count: $request_date\n";
- foreach ($eprints_connections as $connection)
- {
- mysql_close($connection);
- }
- mysql_close($connect);
- // Look up the title corresponding to the specified eprint id.
- function getePrintName($connection,$dbname,$eprintid,$eprints_version) {
- // NJS 2006-06-14: DB connection now passed as an argument.
- $db = mysql_select_db($dbname,$connection);
- // NJS 2007-07-24: Added check for EPrints version, as the
- // database structure changed between versions 2 and 3.
- if ( $eprints_version > 2 )
- {
- $query3 = "
- SELECT title
- FROM eprint
- WHERE eprintid = $eprintid
- AND eprint_status = 'archive'
- ";
- }
- else
- {
- $query3 = "
- SELECT title
- FROM archive
- WHERE eprintid = $eprintid
- ";
- }
- $result3 = mysql_query($query3,$connection);
- $title = '';
- $suffix = '';
- // NJS 2006-04-25 Added check for empty result, probably a deleted item.
- // Look in the deleted items for details.
- if (mysql_num_rows($result3) == 0) {
- // NJS 2007-07-24: Added check for EPrints version, as the
- // database structure changed between versions 2 and 3.
- if ( $eprints_version > 2 )
- {
- $query3 = "
- SELECT title
- FROM eprint
- WHERE eprintid = $eprintid
- AND eprint_status = 'deletion'
- ";
- }
- else
- {
- $query3 = "
- SELECT title
- FROM deletion
- WHERE eprintid = $eprintid
- ";
- }
- $result3 = mysql_query($query3,$connection);
- // If it's not in deletion, then we have no clue what it is.
- if (mysql_num_rows($result3) == 0) {
- $title = "Unknown item [$eprintid]";
- }
- else {
- $suffix = ' [deleted]';
- }
- }
- if ($title == '') {
- $row = mysql_fetch_assoc($result3);
- $row["title"] = trim($row["title"]);
- $row["title"] = preg_replace("/\s+/"," ",$row["title"]);
- $title = $row["title"];
- }
- return $title . $suffix;
- }
- ?>