GitBucket
4.21.2
Toggle navigation
Snippets
Sign in
Files
Branches
1
Releases
Issues
Pull requests
Labels
Priorities
Milestones
Wiki
Forks
nigel.stanger
/
Digital_Repository
Browse code
- Added script to rebuild all eprint titles.
master
1 parent
318e8d2
commit
2f6c534b6a67206b89c37a12a058f15caa5746e3
nstanger
authored
on 23 Jan 2008
Patch
Showing
2 changed files
Repositories/statistics/Makefile
Repositories/statistics/scripts/fix-titles_src.php
Ignore Space
Show notes
View
Repositories/statistics/Makefile
SHELL=/bin/sh # Load in machine-specific environment settings from environment_config.make. # This include file MUST be created and MUST define the following variables: # # EPRINTSSTATS: /path/to/eprintsstats/installation # EPRINTS_USER: the user that owns the EPrints directory # EPRINTS_GROUP: the group that owns the EPrints directory # # EPRINTS_HOST: eprints host name # EPRINTS_PORT: appropriate HTTP port # EPRINTS_ADMIN_EMAIL: administrator email address (quote "@" with \) # # GEOIP_DATABASE:=/usr/local/share/GeoIP/GeoIP.dat # # APACHE_LOG_LOCATION:=/sw/var/apache2/logs/ # APACHE_LOG_NAME:=access_log # # PHP_LIB:=/sw/lib/php4 # include environment_config.make BINDIR:=$(EPRINTSSTATS)/scripts GENERATED_FILES:=config/inc.vars.es.php scripts/eprints-usage.php \ scripts/fix-countries.php scripts/fix-titles.php vhost/apache.conf CONFIG_SUBS:=EPRINTSSTATS EPRINTS_HOST EPRINTS_PORT EPRINTS_ADMIN_EMAIL USAGE_SUBS:=GEOIP_DATABASE APACHE_LOG_LOCATION APACHE_LOG_NAME_1 APACHE_LOG_NAME_2 FIX_SUBS:=GEOIP_DATABASE VHOST_SUBS:=EPRINTSSTATS PHP_LIB .PHONY: deploy stats deploy: $(GENERATED_FILES) @announce "Copying files..." @sudo -u $(EPRINTS_USER) rsync --verbose --cvs-exclude --exclude=Makefile \ --exclude='*.make' --exclude='*_src.*' \ --recursive --times . $(EPRINTSSTATS) config/inc.vars.es.php: config/inc.vars.es_src.php environment_config.make @announce "Generating $@" perl -p $(foreach SUB,$(CONFIG_SUBS),-e "s|##$(SUB)##|$($(SUB))|g;") $< > $@ scripts/eprints-usage.php: scripts/eprints-usage_src.php environment_config.make @announce "Generating $@" perl -p $(foreach SUB,$(USAGE_SUBS),-e "s|##$(SUB)##|$($(SUB))|g;") $< > $@ scripts/fix-countries.php: scripts/fix-countries_src.php environment_config.make @announce "Generating $@" perl -p $(foreach SUB,$(FIX_SUBS),-e "s|##$(SUB)##|$($(SUB))|g;") $< > $@ scripts/fix-titles.php: scripts/fix-titles_src.php environment_config.make @announce "Generating $@" perl -p $(foreach SUB,$(FIX_SUBS),-e "s|##$(SUB)##|$($(SUB))|g;") $< > $@ vhost/apache.conf: vhost/apache_src.conf environment_config.make @announce "Generating $@" perl -p $(foreach SUB,$(VHOST_SUBS),-e "s|##$(SUB)##|$($(SUB))|g;") $< > $@ stats: @announce "Generating statistics..." @php $(BINDIR)/eprints-usage.php debug: @echo "EPRINTSSTATS = [$(EPRINTSSTATS)]" @echo "BINDIR = [$(BINDIR)]"
SHELL=/bin/sh # Load in machine-specific environment settings from environment_config.make. # This include file MUST be created and MUST define the following variables: # # EPRINTSSTATS: /path/to/eprintsstats/installation # EPRINTS_USER: the user that owns the EPrints directory # EPRINTS_GROUP: the group that owns the EPrints directory # # EPRINTS_HOST: eprints host name # EPRINTS_PORT: appropriate HTTP port # EPRINTS_ADMIN_EMAIL: administrator email address (quote "@" with \) # # GEOIP_DATABASE:=/usr/local/share/GeoIP/GeoIP.dat # # APACHE_LOG_LOCATION:=/sw/var/apache2/logs/ # APACHE_LOG_NAME:=access_log # # PHP_LIB:=/sw/lib/php4 # include environment_config.make BINDIR:=$(EPRINTSSTATS)/scripts GENERATED_FILES:=config/inc.vars.es.php scripts/eprints-usage.php \ scripts/fix-countries.php vhost/apache.conf CONFIG_SUBS:=EPRINTSSTATS EPRINTS_HOST EPRINTS_PORT EPRINTS_ADMIN_EMAIL USAGE_SUBS:=GEOIP_DATABASE APACHE_LOG_LOCATION APACHE_LOG_NAME_1 APACHE_LOG_NAME_2 FIX_SUBS:=GEOIP_DATABASE VHOST_SUBS:=EPRINTSSTATS PHP_LIB .PHONY: deploy stats deploy: $(GENERATED_FILES) @announce "Copying files..." @sudo -u $(EPRINTS_USER) rsync --verbose --cvs-exclude --exclude=Makefile \ --exclude='*.make' --exclude='*_src.*' \ --recursive --times . $(EPRINTSSTATS) config/inc.vars.es.php: config/inc.vars.es_src.php environment_config.make @announce "Generating $@" perl -p $(foreach SUB,$(CONFIG_SUBS),-e "s|##$(SUB)##|$($(SUB))|g;") $< > $@ scripts/eprints-usage.php: scripts/eprints-usage_src.php environment_config.make @announce "Generating $@" perl -p $(foreach SUB,$(USAGE_SUBS),-e "s|##$(SUB)##|$($(SUB))|g;") $< > $@ scripts/fix-countries.php: scripts/fix-countries_src.php environment_config.make @announce "Generating $@" perl -p $(foreach SUB,$(FIX_SUBS),-e "s|##$(SUB)##|$($(SUB))|g;") $< > $@ vhost/apache.conf: vhost/apache_src.conf environment_config.make @announce "Generating $@" perl -p $(foreach SUB,$(VHOST_SUBS),-e "s|##$(SUB)##|$($(SUB))|g;") $< > $@ stats: @announce "Generating statistics..." @php $(BINDIR)/eprints-usage.php debug: @echo "EPRINTSSTATS = [$(EPRINTSSTATS)]" @echo "BINDIR = [$(BINDIR)]"
Ignore Space
Show notes
View
Repositories/statistics/scripts/fix-titles_src.php
0 → 100755
<?php /* NJS 2007-07-24 The database structure changed between versions 2.x and 3.x of EPrints, so we now need to check the major version number and alter the queries appropriately. Use only the MAJOR version number (i.e., 2 or 3, don't include the release number). */ $eprints_version = 2; /* Apache log for ePrints uses this format: LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"" combined If the log format differs the regular expression matching would need to be adjusted. Parse: ip date YYYY MM DD archive ID */ // Web server log files $log_dir = '##APACHE_LOG_LOCATION##'; $log_file = array( 'otago_eprints' => '##APACHE_LOG_NAME_1##', 'cardrona' => '##APACHE_LOG_NAME_2##', ); // eprintstats db $sqlserver = 'localhost'; $sqluser = 'eprintstatspriv'; $sqlpass = 'AuldGrizzel'; $sqldatabase = 'eprintstats'; /* NJS 2006-05-26 SQL details of your ePrints installation(s). This has now been generalised to work with multiple archives. For each archive that you have, add an entry to this array in the following format: 'archive_name' => array( 'sqlserver' => 'db_host', 'username' => 'archive_name', 'password' => 'password', ), */ $eprintsdbs = array( 'otago_eprints' => array( 'sqlserver' => 'localhost', 'username' => 'otago_eprints', 'password' => 'DrSyntaxRidesAgain', ), 'cardrona' => array( 'sqlserver' => 'localhost', 'username' => 'cardrona', 'password' => 'chautquau', ), ); ########################################### ## ## No configuration required below here. ## ########################################### $connect = mysql_pconnect($sqlserver,$sqluser,$sqlpass); $db = mysql_select_db($sqldatabase,$connect) or die("Could not connect"); // NJS 2006-06-14: Generalised connection list for multiple archives. $eprints_connections = array(); foreach ($eprintsdbs as $archive_name => $details) { $eprints_connections[$archive_name] = mysql_connect($details['sqlserver'],$details['username'],$details['password']); } /* Get list of all items in database. We only need the distinct archive name and eprint ID, because we'll be updating all duplicate instances with the same eprint title anyway. We could limit it to items with titles like "Unknown%", to make things faster, but that doesn't cover different titles for the same item (e.g., from corrected punctuation). */ $query = " SELECT DISTINCT archive_name, archiveid FROM view ORDER BY archive_name, archiveid"; $result = mysql_query( $query, $connect ); $total_rows = ( $result ) ? mysql_num_rows( $result ) : 0; $count = 0; while ( $row = mysql_fetch_assoc( $result ) ) { $archive_name = $row['archive_name']; $eprint_id = $row['archiveid']; printf( "Checking %s item %s (%d/%d): ", $archive_name, $eprint_id, ++$count, $total_rows ); $eprint_name = getePrintName( $eprints_connections[$archive_name], $archive_name, $eprint_id, $eprints_version ); $update = sprintf(" UPDATE view SET eprint_name = '%s' WHERE archive_name = '%s' AND archiveid = %d", mysql_real_escape_string( $eprint_name ), $archive_name, $eprint_id ); if ( mysql_query($update, $connect) ) { $num_rows = mysql_affected_rows( $connect ); printf( "OK (%d row%s updated).\n", $num_rows, ( $num_rows != 1 ) ? "s" : "" ); } else { print( "failed!\n" ); } } foreach ($eprints_connections as $connection) { mysql_close($connection); } mysql_close($connect); // Look up the title corresponding to the specified eprint id. function getePrintName($connection,$archive,$eprintid,$eprints_version) { // NJS 2006-06-14: DB connection now passed as an argument. $db = mysql_select_db($archive,$connection); // NJS 2007-07-24: Added check for EPrints version, as the // database structure changed between versions 2 and 3. if ( $eprints_version > 2 ) { $query3 = " SELECT title FROM eprint WHERE eprintid = $eprintid AND eprint_status = 'archive' "; } else { $query3 = " SELECT title FROM archive WHERE eprintid = $eprintid "; } $result3 = mysql_query($query3,$connection); $title = ''; $suffix = ''; // NJS 2006-04-25 Added check for empty result, probably a deleted item. // Look in the deleted items for details. if (mysql_num_rows($result3) == 0) { // NJS 2007-07-24: Added check for EPrints version, as the // database structure changed between versions 2 and 3. if ( $eprints_version > 2 ) { $query3 = " SELECT title FROM eprint WHERE eprintid = $eprintid AND eprint_status = 'deletion' "; } else { $query3 = " SELECT title FROM archive WHERE eprintid = $eprintid "; } $result3 = mysql_query($query3,$connection); // If it's not in deletion, then we have no clue what it is. if (mysql_num_rows($result3) == 0) { $title = "Unknown item [$eprintid]"; } else { $suffix = ' [deleted]'; } } if ($title == '') { $row = mysql_fetch_assoc($result3); $row["title"] = trim($row["title"]); $row["title"] = preg_replace("/\s+/"," ",$row["title"]); $title = $row["title"]; } return $title . $suffix; } ?>
Show line notes below