Newer
Older
Digital_Repository / OARiNZ / DIY / deb_package / eprints-3.0 / perl_lib / EPrints / Plugin / Export / OAI_UKETD_DC.pm
package EPrints::Plugin::Export::OAI_UKETD_DC;

######################################################################
# Copyright (C) British Library Board, St. Pancras, UK
#
# Author: Steve Carr, British Library
# Email: stephen.carr@bl.uk
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
#
######################################################################

use Unicode::String qw( utf8 );

use EPrints::Plugin::Export;

@ISA = ( "EPrints::Plugin::Export" );

use strict;

# The utf8() method is called to ensure that
# any broken characters are removed. There should
# not be any broken characters, but better to be
# sure.

sub new
{
	my( $class, %opts ) = @_;

	my $self = $class->SUPER::new( %opts );

	$self->{name} = "UK ETD DC OAI Schema";
	$self->{accept} = [ 'dataobj/eprint' ];
	$self->{visible} = "";
	$self->{suffix} = ".xml";
	$self->{mimetype} = "text/xml";
	
	$self->{xmlns} = "http://naca.central.cranfield.ac.uk/ethos-oai/2.0/";
	$self->{schemaLocation} = "http://naca.central.cranfield.ac.uk/ethos-oai/2.0/uketd_dc.xsd";

	return $self;
}


sub output_dataobj
{
	my( $plugin, $dataobj ) = @_;

	my $xml = $plugin->xml_dataobj( $dataobj );

	return EPrints::XML::to_string( $xml );
}




#######################################################################
#
# Steve Carr - eprints revision (standard revision in order to offer
# something other than basic dublin core - which isn't going to be enough
# to encode the complex data that we are dealing with for e-theses)
# This subroutine takes an eprint object and renders the XML DOM
# to export as the uketd_dc default format in OAI.
#
######################################################################

sub xml_dataobj
{
	my( $plugin, $eprint ) = @_;

	# we have a variety of namespaces since we're doing qualified dublin core, so we need an
	# array of references to three element arrays in our data structure
	my @etdData = &eprint_to_uketd_dc( $eprint, $plugin->{session} );
	
	my $namespace = $plugin->{xmlns};
	my $schema = $plugin->{schemaLocation};

        # the eprint may well be null since it may not be a thesis but an article
        my $uketd_dc = $plugin->{session}->make_element(
		"uketd_dc:uketddc",
		"xmlns:dc" => "http://purl.org/dc/elements/1.1/",
		"xmlns:xsi" => "http://www.w3.org/2001/XMLSchema-instance",
		# TO DO check out that these are properly acceptable when validated
		# TO DO put in final location for our xsd and namespace - it'll probably be somewhere on ethos.ac.uk or bl.uk
		"xsi:schemaLocation" => $namespace." ".$schema,
		"xmlns:uketd_dc" => $namespace,
		"xmlns:dcterms" => "http://purl.org/dc/terms/",
		"xmlns:uketdterms" => "http://naca.central.cranfield.ac.uk/ethos-oai/terms/");
	# turn the list of pairs into XML blocks (indented by 8) and add them
	# them to the ETD element.
	foreach( @etdData )
	{
		if(scalar $_ < 4){
			$uketd_dc->appendChild( $plugin->{session}->render_data_element( 8, $_->[2].":".$_->[0], $_->[1] ) );
		}else{
		# there's an attribute to add
			$uketd_dc->appendChild( $plugin->{session}->render_data_element( 8, $_->[2].":".$_->[0], $_->[1], "xsi:type"=> $_->[3]  ) );
			
		}
	}
	return $uketd_dc;
	
}

##############################################################################
#
# Steve Carr
# subroutine to create a suitable array of array refs to the two item arrays
# as per routine directly above for dublin core (dc). The only difference is that
# qualified dublin core will have additional namespaces and more elements from
# the eprint can be utilised. So we return a longer, three element array per
# array ref. This may need rethinking when we get to attributes (e.g. xsi:type="URI")
#
#
##############################################################################

sub eprint_to_uketd_dc
{
	my( $eprint, $session ) = @_;

	my @etddata = ();
	# we still want much the same dc data so include under the dc namespace
	# by putting the namespace last this won't break the simple dc rendering routine
	# above. Skip all records that aren't theses because uketd_dc is nonsensical for
	# non-thesis items.
	
	if($eprint->get_value( "type") eq "thesis" || $eprint->get_value( "type" ) eq "Thesis"){
		
		push @etddata, [ "title", $eprint->get_value( "title" ), "dc" ]; 
		
		# grab the creators without the ID parts so if the site admin
		# sets or unsets creators to having and ID part it will make
		# no difference to this bit.
	
		my $creators = $eprint->get_value( "creators_name" );
		if( defined $creators )
		{
			foreach my $creator ( @{$creators} )
			{
				push @etddata, [ "creator", EPrints::Utils::make_name_string( $creator ), "dc" ];
			}
		}
	
		my $subjectid;
		foreach $subjectid ( @{$eprint->get_value( "subjects" )} )
		{
			my $subject = EPrints::DataObj::Subject->new( $session, $subjectid );
			# avoid problems with bad subjects
			next unless( defined $subject ); 
			push @etddata, [ "subject", EPrints::Utils::tree_to_utf8( $subject->render_description() ), "dc" ];
		}
	
		# Steve Carr : we're using qdc, namespace dcterms, version of description - 'abstract'
		push @etddata, [ "abstract", $eprint->get_value( "abstract" ), "dcterms" ]; 
		
		# Steve Carr : theses aren't technically 'published' so we can't assume a publisher here as in original code
		if(defined $eprint->get_value( "publisher" )){
			push @etddata, [ "commercial", $eprint->get_value( "publisher" ), "uketdterms" ]; 
		}
	
		my $editors = $eprint->get_value( "editors_name" );
		if( defined $editors )
		{
			foreach my $editor ( @{$editors} )
			{
				push @etddata, [ "contributor", EPrints::Utils::make_name_string( $editor ), "dc" ];
			}
		}
#	 cjg commented out. need to make work for new date layout
##		## Date for discovery. For a month/day we don't have, assume 01.
##		my $date = $eprint->get_value( "date_issue" );
#		if( defined $date )
#		{
#	        	$date =~ s/(-0+)+$//;
#			push @etddata, [ "issued", $date, "dcterms" ];
#		}
	
	
		my $ds = $eprint->get_dataset();
		push @etddata, [ "type", $session->get_type_name( "eprint", $eprint->get_value( "type" ) ), "dc" ];
		
		# The URL of the abstract page is the dcterms isreferencedby
		push @etddata, [ "isReferencedBy", $eprint->get_url(), "dcterms" ];
	
	
		my @documents = $eprint->get_all_documents();
		my $mimetypes = $session->get_repository->get_conf( "oai", "mime_types" );
		foreach( @documents )
		{
			my $format = $mimetypes->{$_->get_value("format")};
			$format = $_->get_value("format") unless defined $format;
			#$format = "application/octet-stream" unless defined $format;
			
			push @etddata, [ "identifier", $_->get_url(), "dc", "dcterms:URI" ];
			push @etddata, [ "format", $format, "dc", "dcterms" ];
			# information about extent and checksums could be added here, if they are available
			# the default eprint doesn't have a place for this but both could be generated dynamically
		}
	
		# Steve Carr : we're using isreferencedby for the official url splash page
		if( $eprint->exists_and_set( "official_url" ) )
		{
			push @etddata, [ "isReferencedBy", $eprint->get_value( "official_url" ), "dcterms", "dcterms:URI"];
		}
			
		if( $eprint->exists_and_set( "thesis_name" )){
			push @etddata, [ "qualificationname", $eprint->get_value( "thesis_name" ), "uketdterms"];
		}
		if( $eprint->exists_and_set( "thesis_type")){
			push @etddata, [ "qualificationlevel", $eprint->get_value( "thesis_type" ), "uketdterms"];
		}
		if( $eprint->exists_and_set( "institution" )){
			push @etddata, [ "institution", $eprint->get_value( "institution" ), "uketdterms"];
		}
		if( $eprint->exists_and_set( "department" )){
			push @etddata, [ "department", $eprint->get_value( "department" ), "uketdterms"];
		}
		if( $eprint->exists_and_set( "advisor" )){
			push @etddata, [ "advisor", $eprint->get_value( "advisor" ), "uketdterms"];
		}
		if( $eprint->exists_and_set( "language" )){
			push @etddata, [ "language", $eprint->get_value( "language" ), "dc"];
		}
		if( $eprint->exists_and_set( "sponsors" )){
			push @etddata, [ "sponsor", $eprint->get_value( "sponsors" ), "uketdterms"];
		}
		if( $eprint->exists_and_set( "alt_title" )){
			push @etddata, [ "alternative", $eprint->get_value("alt_title" ), "dcterms"];
		}
		if( $eprint->exists_and_set( "checksum" )){
			push @etddata, [ "checksum", $eprint->get_value("checksum"), "uketdterms" ];
		}
		if( $eprint->exists_and_set( "date_embargo" )){
			push @etddata, ["date_embargo", $eprint->get_value("date_embargo"), "uketdterms"];
		}
		if( $eprint->exists_and_set( "embargo_reason" )){
			push @etddata, ["embargo_reason", $eprint->get_value("embargo_reason"), "uketdterms"];
		}
		if( $eprint->exists_and_set( "rights" )){
			push @etddata, ["rights", $eprint->get_value("rights"), "dc"];
		}
		if( $eprint->exists_and_set( "citations" )){
			push @etddata, ["hasVersion", $eprint->get_value("citations"), "dcterms"];
		}
		if( $eprint->exists_and_set( "referencetext" )){
			push @etddata, ["references", $eprint->get_value("referencetext"), "dcterms"];
		}
		
		
	
		# dc.source TO DO
		# dc.coverage TO DO
		
	}
	
	return @etddata;
}
	
1;