Newer
Older
Digital_Repository / OARiNZ / DIY / deb_package / eprints-3.0 / perl_lib / EPrints / Plugin / Import / XML.pm
package EPrints::Plugin::Import::XML;

use strict;

use EPrints::Plugin::Import::DefaultXML;

our @ISA = qw/ EPrints::Plugin::Import::DefaultXML /;

sub new
{
	my( $class, %params ) = @_;

	my $self = $class->SUPER::new(%params);

	$self->{name} = "XML";
	$self->{visible} = "all";
	$self->{produce} = [ 'list/*', 'dataobj/*' ];

	return $self;
}

sub top_level_tag
{
	my( $plugin, $dataset ) = @_;

	return $dataset->confid."s";
}

sub xml_to_epdata
{
	my( $plugin, $dataset, $xml ) = @_;

	my @fields = $dataset->get_fields;
	my @fieldnames = ();
	foreach( @fields ) { push @fieldnames, $_->get_name; }

	my %toprocess = $plugin->get_known_nodes( $xml, @fieldnames );

	my $epdata = {};
	foreach my $fn ( keys %toprocess )
	{
		my $field = $dataset->get_field( $fn );
		$epdata->{$fn} = $plugin->xml_field_to_epdatafield( $dataset, $field, $toprocess{$fn} );
	}
	return $epdata;
}

sub xml_to_file
{
	my( $plugin, $dataset, $xml ) = @_;

	my %toprocess = $plugin->get_known_nodes( $xml, qw/ filename filesize url data / );

	my $data = {};
	foreach my $part ( keys %toprocess )
	{
		$data->{$part} = $plugin->xml_to_text( $toprocess{$part} );
	}
	
	return $data;
}


sub xml_field_to_epdatafield
{
	my( $plugin,$dataset,$field,$xml ) = @_;

	unless( $field->get_property( "multiple" ) )
	{
		return $plugin->xml_field_to_data_single( $dataset,$field,$xml );
	}

	my $epdatafield = [];
	my @list = $xml->getChildNodes;
	foreach my $el ( @list )
	{
		next unless EPrints::XML::is_dom( $el, "Element" );
		my $type = $el->nodeName;
		if( $field->is_type( "subobject" ) )
		{
			my $expect = $field->get_property( "datasetid" );
			if( $type ne $expect )
			{
				$plugin->warning( $plugin->phrase( "unexpected_type", 
					type => $type, 
					expected => $expect, 
					fieldname => $field->get_name ) );
				next;
			}
			my $sub_dataset = $plugin->{session}->get_repository->get_dataset( $expect );
			push @{$epdatafield}, $plugin->xml_to_epdata( $sub_dataset,$el );
			next;
		}

		if( $field->is_type( "file" ) )
		{
			if( $type ne "file" )
			{
				$plugin->warning( $plugin->phrase( "expected_file", type => $type, fieldname => $field->get_name ) );
				next;
			}
			push @{$epdatafield}, $plugin->xml_to_file( $dataset,$el );
			next;
		}
	
		if( $field->is_virtual && !$field->is_type( "compound","multilang") )
		{
			$plugin->warning( $plugin->phrase( "unknown_virtual", type => $type, fieldname => $field->get_name ) );
			next;
		}
	

		if( $type ne "item" )
		{
			$plugin->warning( $plugin->phrase( "expected_item", type => $type, fieldname => $field->get_name ) );
			next;
		}
		push @{$epdatafield}, $plugin->xml_field_to_data_single( $dataset,$field,$el );
	}

	return $epdatafield;
}

sub xml_field_to_data_single
{
	my( $plugin,$dataset,$field,$xml ) = @_;

#	unless( $field->get_property( "multiple" ) )
#	{
#		return $plugin->xml_field_to_data_single( $dataset,$field,$xml );
#	}
	return $plugin->xml_field_to_data_basic( $dataset, $field, $xml );
}

sub xml_field_to_data_basic
{
	my( $plugin,$dataset,$field,$xml ) = @_;

	if( $field->is_type( "compound","multilang") )
	{
		my $data = {};
		my @list = $xml->getChildNodes;
		my %a_to_f = $field->get_alias_to_fieldname;
		foreach my $el ( @list )
		{
			next unless EPrints::XML::is_dom( $el, "Element" );
			my $nodename = $el->nodeName();
			my $name = $a_to_f{$nodename};
			if( !defined $name )
			{
				$plugin->warning( "Unknown element found inside compound field: $nodename. (skipping)" );
				next;
			}
			my $f = $dataset->get_field( $name );
			$data->{$nodename} = $plugin->xml_field_to_data_basic( $dataset, $f, $el );
		}
		return $data;
	}

	unless( $field->is_type( "name" ) )
	{
		return $plugin->xml_to_text( $xml );
	}

	my %toprocess = $plugin->get_known_nodes( $xml, qw/ given family lineage honourific / );

	my $epdatafield = {};
	foreach my $part ( keys %toprocess )
	{
		$epdatafield->{$part} = $plugin->xml_to_text( $toprocess{$part} );
	}
	return $epdatafield;
}

sub get_known_nodes
{
	my( $plugin, $xml, @whitelist ) = @_;

	my @list = $xml->getChildNodes;
	my %map = ();
	foreach my $el ( @list )
	{
		next unless EPrints::XML::is_dom( $el, "Element" );
		if( defined $map{$el->nodeName()} )
		{
			$plugin->warning( $plugin->phrase( "dup_element", name => $el->nodeName ) );
			next;
		}
		$map{$el->nodeName()} = $el;
	}

	my %toreturn = ();
	foreach my $oknode ( @whitelist ) 
	{
		next unless defined $map{$oknode};
		$toreturn{$oknode} = $map{$oknode};
		delete $map{$oknode};
	}

	foreach my $name ( keys %map )
	{
		$plugin->warning( $plugin->phrase( "unexpected_element", name => $name ) );
		$plugin->warning( $plugin->phrase( "expected", elements => "<".join("> <", @whitelist).">" ) );
	}
	return %toreturn;
}



	


	

1;