Newer
Older
Digital_Repository / OARiNZ / DIY / deb_package / eprints-3.0 / perl_lib / EPrints / Plugin / Import / PubMedXML.pm
package EPrints::Plugin::Import::PubMedXML;

use strict;

use EPrints::Plugin::Import::DefaultXML;

our @ISA = qw/ EPrints::Plugin::Import::DefaultXML /;

sub new
{
	my( $class, %params ) = @_;

	my $self = $class->SUPER::new(%params);

	$self->{name} = "PubMed XML";
	$self->{visible} = "all";
	$self->{produce} = [ 'list/eprint', 'dataobj/eprint' ];

	return $self;
}

sub top_level_tag
{
	my( $plugin, $dataset ) = @_;

	return "PubmedArticleSet";
}

sub xml_to_epdata
{
	# $xml is the PubmedArticle element
	my( $plugin, $dataset, $xml ) = @_;

	my $epdata = {};

	my $citation = $xml->getElementsByTagName("MedlineCitation")->item(0);
	return unless defined $citation;

	my $article = $citation->getElementsByTagName("Article")->item(0);
	return unless defined $article;

	my $articletitle = $article->getElementsByTagName( "ArticleTitle" )->item(0);
	$epdata->{title} = $plugin->xml_to_text( $articletitle ) if defined $articletitle;

	my $journal = $article->getElementsByTagName( "Journal" )->item(0);
	if( defined $journal )
	{
		my $title = $journal->getElementsByTagName( "Title" )->item(0);
		$epdata->{publication} = $plugin->xml_to_text( $title ) if defined $title;

		my $issn = $journal->getElementsByTagName( "ISSN" )->item(0);
		$epdata->{issn} = $plugin->xml_to_text( $issn ) if defined $issn;

		my $journalissue = $journal->getElementsByTagName( "JournalIssue" )->item( 0 );
		if( defined $journalissue )
		{
			my $volume = $journalissue->getElementsByTagName( "Volume" )->item(0);
			$epdata->{volume} = $plugin->xml_to_text( $volume ) if defined $volume;
	
			my $issue = $journalissue->getElementsByTagName( "Issue" )->item(0);
			$epdata->{number} = $plugin->xml_to_text( $issue ) if defined $issue;

			my $pubdate = $journalissue->getElementsByTagName( "PubDate" )->item(0);
			if( defined $pubdate )
			{
				my $year = $pubdate->getElementsByTagName( "Year" )->item(0);
				$epdata->{date} = $plugin->xml_to_text( $year ) if defined $year;
			}
		}
	}

	my $pagination = $article->getElementsByTagName( "Pagination" )->item(0);
	if( defined $pagination )
	{
		my $medlinepgn = $pagination->getElementsByTagName( "MedlinePgn" )->item(0);
		if( defined $medlinepgn )
		{
			$epdata->{pagerange} = $plugin->xml_to_text( $medlinepgn );
		}
		else
		{
			my $startpage = $pagination->getElementsByTagName( "StartPage" )->item(0);
			if( defined $startpage )
			{
				$epdata->{pagerange} = $plugin->xml_to_text( $startpage );

				my $endpage = $pagination->getElementsByTagName( "EndPage" )->item(0);
				$epdata->{pagerange} .= "-" . $plugin->xml_to_text( $endpage ) if defined $endpage;
			}
		}
	}

	my $abstract = $article->getElementsByTagName( "Abstract" )->item(0);
	if( defined $abstract )
	{
		my $abstracttext = $abstract->getElementsByTagName( "AbstractText" )->item(0);
		$epdata->{abstract} = $plugin->xml_to_text( $abstracttext ) if defined $abstracttext;
	}

	my $authorlist = $article->getElementsByTagName( "AuthorList" )->item(0);
	if( defined $authorlist )
	{
		foreach my $author ( $authorlist->getElementsByTagName("Author") )
		{
			my $name = {};
			
			my $lastname = $author->getElementsByTagName( "LastName" )->item(0);
			$name->{family} = $plugin->xml_to_text( $lastname ) if defined $lastname;

			my $forename = $author->getElementsByTagName( "ForeName" )->item(0);
			$name->{given} = $plugin->xml_to_text( $forename ) if defined $forename;

			push @{ $epdata->{creators_name} }, $name;
		}
	}


	unless( defined $epdata->{publication} )
	{
		# Alternative way of getting (abbrev.) journal title
		my $medlinejournalinfo = $citation->getElementsByTagName( "MedlineJournalInfo" )->item(0);
		if( defined $medlinejournalinfo )
		{
			my $medlineta = $medlinejournalinfo->getElementsByTagName( "MedlineTA" )->item(0);
			$epdata->{publication} = $plugin->xml_to_text( $medlineta ) if defined $medlineta;
		}
	}

	# NLMCommon DTD has "Book" entity, but PubMed seems to
	# only contain articles
	# http://www.ncbi.nlm.nih.gov/entrez/query/DTD/nlmcommon_070101.dtd
	$epdata->{type} = "article";

	return $epdata;

}

1;