Newer
Older
Digital_Repository / Repositories / otago_eprints / otago_eprints / cfg / ArchiveOAIConfig.pm
######################################################################
#
#  OAI Configutation for Archive.
#
######################################################################
#
# This file is part of GNU EPrints 2.
# 
# Copyright (c) 2000-2004 University of Southampton, UK. SO17 1BJ.
# 
# EPrints 2 is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# 
# EPrints 2 is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with EPrints 2; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
######################################################################

use EPrints::OpenArchives;

sub get_oai_conf { my( $perlurl ) = @_; my $oai={};


##########################################################################
# OAI 1.1 
##########################################################################

# Site specific **UNIQUE** archive identifier.
# See http://www.openarchives.org/ for existing identifiers.
$oai->{archive_id} = "GenericEPrints";

# Exported metadata formats. The hash should map format ids to namespaces.
$oai->{metadata_namespaces} =
{
	"oai_dc"    =>  "http://purl.org/dc/elements/1.1/"
};

# Exported metadata formats. The hash should map format ids to schemas.
$oai->{metadata_schemas} =
{
	"oai_dc"    =>  "http://www.openarchives.org/OAI/1.1/dc.xsd"
};

# Each supported metadata format will need a function to turn
# the eprint record into XML representing that format. The function(s)
# are defined later in this file.
$oai->{metadata_functions} = 
{
	"oai_dc"    =>  \&make_metadata_oai_dc
};

# Base URL of OAI 1.1
$oai->{base_url} = $perlurl."/oai";

$oai->{sample_identifier} = EPrints::OpenArchives::to_oai_identifier(
	$oai->{archive_id},
	"23" );

##########################################################################
# OAI-PMH 2.0 
#
# 2.0 requires slightly different schemas and XML to v1.1
##########################################################################

# Site specific **UNIQUE** archive identifier.
# See http://www.openarchives.org/ for existing identifiers.
# This may be different for OAI v2.0
# It can have dots (.) in which v1.1 can't. This means you can use your
# sites domain as (part of) the base ID - which is pretty darn unique.
$oai->{v2}->{archive_id} = "eprints.otago.ac.nz";

# Exported metadata formats. The hash should map format ids to namespaces.
$oai->{v2}->{metadata_namespaces} =
{
	"oai_dc"    =>  "http://www.openarchives.org/OAI/2.0/oai_dc/"
};

# Exported metadata formats. The hash should map format ids to schemas.
$oai->{v2}->{metadata_schemas} =
{
	"oai_dc"    =>  "http://www.openarchives.org/OAI/2.0/oai_dc.xsd"
};

# Each supported metadata format will need a function to turn
# the eprint record into XML representing that format. The function(s)
# are defined later in this file.
$oai->{v2}->{metadata_functions} = 
{
	"oai_dc"    =>  \&make_metadata_oai_dc_oai2
};

# Base URL of OAI 2.0
$oai->{v2}->{base_url} = $perlurl."/oai2";

$oai->{v2}->{sample_identifier} = EPrints::OpenArchives::to_oai_identifier(
	$oai->{v2}->{archive_id},
	"23" );

##########################################################################
# GENERAL OAI CONFIGURATION
# 
# This applies to all versions of OAI.
##########################################################################



# Set Configuration
# Rather than harvest the entire archive, a harvester may harvest only
# one set. Sets are usually subjects, but can be anything you like and are
# defined in the same manner as "browse_views". Only id, allow_null, fields
# are used.
$oai->{sets} = [
#	{ id=>"year", allow_null=>1, fields=>"date_effective" },
#	{ id=>"person", allow_null=>0, fields=>"creators.id/editors.id" },
	{ id=>"status", allow_null=>0, fields=>"ispublished" },
	{ id=>"subjects", allow_null=>0, fields=>"subjects" }
];

# Filter OAI export. If you want to stop certain records being exported
# you can add filters here. These work the same as for a search filter.

$oai->{filters} = [

#	{ meta_fields => [ "creators" ], value=>"harnad" }
# Example: don't export any OAI records from before 2003.
#	{ meta_fields => [ "date-effective" ], value=>"2003-" }
];

# Number of results to display on a single search results page

# Information for "Identify" responses.

# "content" : Text and/or a URL linking to text describing the content
# of the repository.  It would be appropriate to indicate the language(s)
# of the metadata/data in the repository.

$oai->{content}->{"text"} = latin1( <<END );
This is a pilot eprint repository for the School of Business at the
University of Otago. It comprises an open access collection of publications
by School of Business academic staff and students across all departments
in the School, both before and after peer-reviewed publication. Most
publications are fully searchable. Note that the full text of some
material may not be available for various reasons (such as copyright
issues).
END
$oai->{content}->{"url"} = undef;

# "metadataPolicy" : Text and/or a URL linking to text describing policies
# relating to the use of metadata harvested through the OAI interface.

# metadataPolicy{"text"} and/or metadataPolicy{"url"} 
# MUST be defined to comply to OAI.

$oai->{metadata_policy}->{"text"} = latin1( <<END );
No metadata policy defined. 
This server has not yet been fully configured.
Please contact the admin for more information, but if in doubt assume that
NO rights at all are granted to this data.
END
$oai->{metadata_policy}->{"url"} = undef;

# "dataPolicy" : Text and/or a URL linking to text describing policies
# relating to the data held in the repository.  This may also describe
# policies regarding downloading data (full-content).

# dataPolicy{"text"} and/or dataPolicy{"url"} 
# MUST be defined to comply to OAI.

$oai->{data_policy}->{"text"} = latin1( <<END );
No data policy defined. 
This server has not yet been fully configured.
Please contact the admin for more information, but if in doubt assume that
NO rights at all are granted to this data.
END
$oai->{data_policy}->{"url"} = undef;

# "submissionPolicy" : Text and/or a URL linking to text describing
# policies relating to the submission of content to the repository (or
# other accession mechanisms).

$oai->{submission_policy}->{"text"} = latin1( <<END );
No submission-data policy defined. 
This server has not yet been fully configured.
END
$oai->{submission_policy}->{"url"} = undef;

# "comment" : Text and/or a URL linking to text describing anything else
# that is not covered by the fields above. It would be appropriate to
# include additional contact details (additional to the adminEmail that
# is part of the response to the Identify request).

# An array of comments to be returned. May be empty.

$oai->{comments} = [ 
	latin1( "This system is running eprints server software (".
		EPrints::Config::get( "version" ).") developed at the ".
		"University of Southampton. For more information see ".
		"http://www.eprints.org/" ) 
];

$oai->{mime_types} = {
	pdf => "application/pdf",
	ps => "application/postscript",
	html => "text/html",
	other => "application/octet-stream",
	ascii => "text/plain"
};

return $oai; }

######################################################################
#
# $domfragment = make_metadata_oai_dc( $eprint, $session )
#
######################################################################
# $eprint
# - the EPrints::EPrint to be converted
# $session
# - the current EPrints::Session
#
# returns: ( $xhtmlfragment, $title )
# - a DOM tree containing the metadata from $eprint in oai_dc - 
# unqualified dublin-core.
######################################################################
# This subroutine takes an eprint object and renders the XML DOM
# to export as the oai_dc default format in OAI.
#
# If supporting other metadata formats, it's probably best to start
# by copying this method, and modifying it.
#
# It uses a seperate function to actually map to the DC, this is
# so it can be called by the metadata_links function in the 
# ArchiveRenderConfig.pm - saves having to map it to unqualified
# DC in two places.
#
######################################################################

sub make_metadata_oai_dc
{
	my( $eprint, $session ) = @_;

	my @dcdata = &eprint_to_unqualified_dc( $eprint, $session );

	my $archive = $session->get_archive();

	# return undef here, if you don't support this metadata format for 
	# this record.  ( But this is "oai_dc" so we have to support it! )

	# Get the namespace & schema.
	# We could hard code them here, but getting the values from our
	# own configuration should avoid getting our knickers in a twist.
	
	my $oai_conf = $archive->get_conf( "oai" );
	my $namespace = $oai_conf->{metadata_namespaces}->{oai_dc};
	my $schema = $oai_conf->{metadata_schemas}->{oai_dc};

	my $dc = $session->make_element(
		"dc",
		"xmlns" => $namespace,
		"xmlns:xsi" => "http://www.w3.org/2001/XMLSchema-instance",
		"xsi:schemaLocation" => $namespace." ".$schema );

	# turn the list of pairs into XML blocks (indented by 8) and add them
	# them to the DC element.
	foreach( @dcdata )
	{
		$dc->appendChild(  $session->render_data_element( 8, $_->[0], $_->[1] ) );
		# produces <key>value</key>
	}

	return $dc;
}

######################################################################
#
# $domfragment = make_metadata_oai_dc_oai2( $eprint, $session )
#
######################################################################
#
# Identical to make_metadata_oai_dc except with a few changes
# for the new version of the protocol.
#
######################################################################

sub make_metadata_oai_dc_oai2
{
	my( $eprint, $session ) = @_;

	my @dcdata = &eprint_to_unqualified_dc( $eprint, $session );

	my $archive = $session->get_archive();

	# Get the namespace & schema.
	# We could hard code them here, but getting the values from our
	# own configuration should avoid getting our knickers in a twist.
	
	my $oai_conf = $archive->get_conf( "oai", "v2" );
	my $namespace = $oai_conf->{metadata_namespaces}->{oai_dc};
	my $schema = $oai_conf->{metadata_schemas}->{oai_dc};

	my $oai_dc = $session->make_element(
		"oai_dc:dc",
		"xmlns:oai_dc" => $namespace,
		"xmlns:dc" => "http://purl.org/dc/elements/1.1/",
		"xmlns:xsi" => "http://www.w3.org/2001/XMLSchema-instance",
		"xsi:schemaLocation" => $namespace." ".$schema );

	# turn the list of pairs into XML blocks (indented by 8) and add them
	# them to the DC element.
	foreach( @dcdata )
	{
		$oai_dc->appendChild(  $session->render_data_element( 8, "dc:".$_->[0], $_->[1] ) );
		# produces <key>value</key>
	}

	return $oai_dc;
}

######################################################################
#
# $dc = eprint_to_unqualified_dc( $eprint, $session )
#
######################################################################
# $eprint
# - the EPrints::EPrint to be converted
# $session
# - the current EPrints::Session
#
# returns: array of array refs. 
# - the array refs are 2 item arrays containing dc fieldname and value
# eg. [ "title", "Bacon and Toast Experiments" ]
######################################################################
# This function is called by make_metadata_oai_dc and metadata_links.
#
# It maps an EPrint object into unqualified dublincore. 
#
# It is not called directly from the EPrints system.
#
######################################################################

sub eprint_to_unqualified_dc
{
	my( $eprint, $session ) = @_;

	my @dcdata = ();
	push @dcdata, [ "title", $eprint->get_value( "title" ) ]; 
	
	# grab the creators without the ID parts so if the site admin
	# sets or unsets creators to having and ID part it will make
	# no difference to this bit.

	my $creators = $eprint->get_value( "creators", 1 );
	if( defined $creators )
	{
		foreach my $creator ( @{$creators} )
		{
			push @dcdata, [ "creator", EPrints::Utils::make_name_string( $creator ) ];
		}
	}

	my $subjectid;
	foreach $subjectid ( @{$eprint->get_value( "subjects" )} )
	{
		my $subject = EPrints::Subject->new( $session, $subjectid );
		# avoid problems with bad subjects
		next unless( defined $subject ); 
		push @dcdata, [ "subject", EPrints::Utils::tree_to_utf8( $subject->render_description() ) ];
	}

	push @dcdata, [ "description", $eprint->get_value( "abstract" ) ]; 

	push @dcdata, [ "publisher", $eprint->get_value( "publisher" ) ]; 

	my $editors = $eprint->get_value( "editors", 1 );
	if( defined $editors )
	{
		foreach my $editor ( @{$editors} )
		{
			push @dcdata, [ "contributor", EPrints::Utils::make_name_string( $editor ) ];
		}
	}

	## Date for discovery. For a month/day we don't have, assume 01.
	my $date = $eprint->get_value( "date_effective" );
	if( defined $date )
	{
        	$date =~ s/(-0+)+$//;
		push @dcdata, [ "date", $date ];
	}


	my $ds = $eprint->get_dataset();
	push @dcdata, [ "type", $ds->get_type_name( $session, $eprint->get_value( "type" ) ) ];
	
	my $ref = "NonPeerReviewed";
	if( $eprint->is_set( "refereed" ) && $eprint->get_value( "refereed" ) eq "TRUE" )
	{
		$ref = "PeerReviewed";
	}
	push @dcdata, [ "type", $ref ];


	# The identifier is the URL of the abstract page.
	# possibly this should be the OAI ID, or both.
	push @dcdata, [ "identifier", $eprint->get_url() ];
	push @dcdata, [ "identifier",
		EPrints::Utils::tree_to_utf8( $eprint->render_citation() ) ];

	my @documents = $eprint->get_all_documents();
	my $mimetypes = $session->get_archive->get_conf( "oai", "mime_types" );
	foreach( @documents )
	{
		my $format = $mimetypes->{$_->get_value("format")};
		$format = "application/octet-stream" unless defined $format;
		push @dcdata, [ "format", $format ];
		push @dcdata, [ "relation", $_->get_url() ];
	}

	if( $eprint->is_set( "official_url" ) )
	{
		push @dcdata, [ "relation", $eprint->get_value( "official_url" ) ];
	}
	
	# dc.language not handled yet.
	# dc.source not handled yet.
	# dc.coverage not handled yet.
	# dc.rights not handled yet.

	return @dcdata;
}



1;