Newer
Older
Digital_Repository / Repositories / statistics / scripts / fix-titles_src.php
<?php

if ( $argc != 1 )
{
	if ( $argc != 3 )
	{
		print("Usage: php fix-titles.php [archivename eprintid]\n");
		return;
	}
	else
	{
		$archive_name = $argv[1];
		$eprint_id = $argv[2];
	}
}

/* NJS 2007-07-24
   The database structure changed between versions 2.x and 3.x of
   EPrints, so we now need to check the major version number and alter
   the queries appropriately. Use only the MAJOR version number (i.e.,
   2 or 3, don't include the release number).
*/
$eprints_version = ##EPRINTS_VERSION##;

	/*

	Apache log for ePrints uses this format:
	LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"" combined

	If the log format differs the regular expression matching would need to be adjusted.		
	
	Parse:
		ip
		date YYYY MM DD
		archive ID

	*/

// eprintstats db
$sqlserver = 'localhost';
$sqluser = 'eprintstatspriv';
$sqlpass = 'AuldGrizzel';
$sqldatabase = 'eprintstats';

/* NJS 2006-05-26
SQL details of your ePrints installation(s). This has now been
generalised to work with multiple archives. For each archive that you
have, add an entry to this array in the following format:

	'archive_name' => array(
		'sqlserver' => 'db_host',
		'username' => 'archive_name',
		'password' => 'password',
	),
*/
$eprintsdbs = array(
	'otago_eprints' => array(
		'sqlserver' => 'localhost',
		'username' => 'otago_eprints',
		'password' => 'DrSyntaxRidesAgain',
	),
	'cardrona' => array(
		'sqlserver' => 'localhost',
		'username' => 'cardrona',
		'password' => 'chautquau',
	),
);

###########################################
##
## No configuration required below here.
##
###########################################

$connect = mysql_pconnect($sqlserver,$sqluser,$sqlpass);
$db = mysql_select_db($sqldatabase,$connect) or die("Could not connect");

// NJS 2006-06-14: Generalised connection list for multiple archives.
$eprints_connections = array();
foreach ($eprintsdbs as $archive => $details)
{
	$eprints_connections[$archive] =
		mysql_connect($details['sqlserver'],$details['username'],$details['password']);
}

/* Get list of all items in database. We only need the distinct archive name 
   and eprint ID, because we'll be updating all duplicate instances with the
   same eprint title anyway. We could limit it to items with titles like
   "Unknown%", to make things faster, but that doesn't cover different titles
   for the same item (e.g., from corrected punctuation).
*/
if ( $argc == 1 )
{
	$query = "
		SELECT DISTINCT archive_name, archiveid
		FROM view
		ORDER BY archive_name, archiveid";
}
else
{
	$query = "
		SELECT DISTINCT archive_name, archiveid
		FROM view
		WHERE archive_name = '$archive_name'
		  AND archiveid = $eprint_id
		ORDER BY archive_name, archiveid";
}

$result = mysql_query( $query, $connect );
$total_rows = ( $result ) ? mysql_num_rows( $result ) : 0;
$count = 0;
while ( $row = mysql_fetch_assoc( $result ) )
{
	$archive_name = $row['archive_name'];
	$eprint_id = $row['archiveid'];
	printf( "Checking %s item %s (%d/%d): ", $archive_name, $eprint_id, ++$count, $total_rows );
	$eprint_name = getePrintName( $eprints_connections[$archive_name], $archive_name, $eprint_id, $eprints_version );
	$update = sprintf("
		UPDATE view
		SET eprint_name = '%s'
		WHERE archive_name = '%s'
		  AND archiveid = %d",
		mysql_real_escape_string( $eprint_name ),
		$archive_name,
		$eprint_id
	);
	if ( mysql_query($update, $connect) )
	{
		$num_rows = mysql_affected_rows( $connect );
		printf( "OK (%d row%s updated).\n", $num_rows, ( $num_rows != 1 ) ? "s" : "" );
	}
	else
	{
		print( "failed!\n" );
	}
}

foreach ($eprints_connections as $connection)
{
	mysql_close($connection);
}
mysql_close($connect);

// Look up the title corresponding to the specified eprint id.
function getePrintName($connection,$archive,$eprintid,$eprints_version) {
	// NJS 2006-06-14: DB connection now passed as an argument.
	$db = mysql_select_db($archive,$connection);
	// NJS 2007-07-24: Added check for EPrints version, as the
	// database structure changed between versions 2 and 3.
	if ( $eprints_version > 2 )
	{
		$query3 = "
			SELECT title
			FROM eprint
			WHERE eprintid = $eprintid
			  AND eprint_status = 'archive'
		";
	}
	else
	{
		$query3 = "
			SELECT title
			FROM archive
			WHERE eprintid = $eprintid
		";
	}
	$result3 = mysql_query($query3,$connection);
	$title = '';
	$suffix = '';
	// NJS 2006-04-25 Added check for empty result, probably a deleted item.
	// Look in the deleted items for details.
	if (mysql_num_rows($result3) == 0) {
		// NJS 2007-07-24: Added check for EPrints version, as the
		// database structure changed between versions 2 and 3.
		if ( $eprints_version > 2 )
		{
			$query3 = "
				SELECT title
				FROM eprint
				WHERE eprintid = $eprintid
				  AND eprint_status = 'deletion'
			";
		}
		else
		{
			$query3 = "
				SELECT title
				FROM deletion
				WHERE eprintid = $eprintid
			";
		}
		$result3 = mysql_query($query3,$connection);
		// If it's not in deletion, then we have no clue what it is.
		if (mysql_num_rows($result3) == 0) {
			$title = "Unknown item [$eprintid]";
		}
		else {
			$suffix = ' [deleted]';
		}
	}
	if ($title == '') {
		$row = mysql_fetch_assoc($result3);
		$row["title"] = trim($row["title"]);
		$row["title"] = preg_replace("/\s+/"," ",$row["title"]);
		$title = $row["title"];
	}
	return $title . $suffix;
}

?>