Newer
Older
Digital_Repository / Memory Bank / Kete / kete_transform.xsl
<?xml version="1.0" encoding="utf-8"?>
<xsl:stylesheet version="2.0"
	xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
	
	<xsl:output method="xml" encoding="utf-8" version="1.0" />
	
	<!--
		We can only import items of one type at a time, so the item type is passed in as an argument.
		Possible values: audio, video, document, topic, image.
	-->
	<xsl:param name="itemtype" />
	
	<!--
		Some Kete item types map to multiple eprint types (e.g., "document" includes "monograph", "article", etc.).
		This variable contains a string of applicable eprint types that can be matched against later.
	-->
	<xsl:variable name="eprinttypes">
		<xsl:choose>
			<xsl:when test="$itemtype = 'document'">
				<xsl:text>/article/book_section/monograph/book/thesis/patent/artefact/other/</xsl:text>
			</xsl:when>
			<!-- Topics don't correspond to anything in EPrints. -->
			<xsl:when test="$itemtype = 'topic'" />
			<xsl:otherwise>
				<xsl:text>/</xsl:text>
				<xsl:value-of select="$itemtype" />
				<xsl:text>/</xsl:text>
			</xsl:otherwise>
		</xsl:choose>
	</xsl:variable>
	
	<!--
		In the current version of Kete, you have to specify the full file path for items. (Excel import does it automatically.)
		We therefore pass the base path for files as an argument.
		
		We'll assume that the actual path is basepath/itemtype/files/filename.
	-->
	<xsl:param name="basepath" />
	
	<xsl:template match="/">
		<xsl:apply-templates />
	</xsl:template>
	
	<!--
		We process the required sub-elements explicitly rather than just letting apply-templates take care of it.
		This makes it easier to eliminate the irrelevant output that we would get otherwise.
	-->
	<xsl:template match="eprints">
		<xsl:element name="{concat( $itemtype, 's' )}">
			<xsl:apply-templates />
		</xsl:element>
	</xsl:template>
	
	<!--
		If the item export type is "topic", then we want to process only those eprints that have multiple valid documents
		and generate a single topic element for each. "Valid" excludes things like previews, thumbnails, etc. (see horrible
		XPath below), in the EPrints export to avoid generating unnecessary topics in Kete.
		
		Otherwise, we process the valid document sub-elements because these effectively correspond to the things
		that we'll be producing for Kete. Only output the record if the item type matches!
	-->
	<xsl:template match="eprint[count(documents/document[not (	starts-with( formatdesc, 'Thumbnail Documents conversion' ) or
																starts-with( formatdesc, 'Generate index codes conversion' ) or
																starts-with( formatdesc, 'Image thumbnails conversion' ) or
																( files/file/copies/item/sourceid = 'choosing-the-current-cover' )
														)] ) > 1]">
		<xsl:choose>
			<xsl:when test="$itemtype = 'topic'">
				<record>
					<xsl:apply-templates select="." mode="metadata" />
				</record>
			</xsl:when>
			<xsl:otherwise>
				<xsl:apply-templates select="documents/document[not (	starts-with( formatdesc, 'Thumbnail Documents conversion' ) or
																		starts-with( formatdesc, 'Generate index codes conversion' ) or
																		starts-with( formatdesc, 'Image thumbnails conversion' ) or
																		( files/file/copies/item/sourceid = 'choosing-the-current-cover' )
																)]" />
			</xsl:otherwise>
		</xsl:choose>
	</xsl:template>
	
	<!--
		If the eprint only contains one valid document, then we're done. Only output the record if the item type matches!
	-->
	<xsl:template match="eprint[count(documents/document[not (	starts-with( formatdesc, 'Thumbnail Documents conversion' ) or
																starts-with( formatdesc, 'Generate index codes conversion' ) or
																starts-with( formatdesc, 'Image thumbnails conversion' ) or
																( files/file/copies/item/sourceid = 'choosing-the-current-cover' )
															)] ) = 1]">
		<!--
			We don't generate topics for eprints with only one document. Technically we could create a topic for every item, but this
			may lead to incorrect results like three separate topics that should really be one, if, for example, there are three
			independent image eprints that relate to the same thing.
		-->
		<xsl:if test="contains( $eprinttypes, concat( '/', type, '/' ) )">
			<record>
				<!-- Output the main record metadata... -->
				<xsl:apply-templates select="." mode="metadata" />
				<!-- ...and the file path (note "/main" at end of select expression). -->
				<xsl:apply-templates select="documents/document[not (	starts-with( formatdesc, 'Thumbnail Documents conversion' ) or
																		starts-with( formatdesc, 'Generate index codes conversion' ) or
																		starts-with( formatdesc, 'Image thumbnails conversion' ) or
																		( files/file/copies/item/sourceid = 'choosing-the-current-cover' )
																	)]/main" />	
			</record>
		</xsl:if>
	</xsl:template>
	
	<!--
		Output the main record metadata. The enclosing "record" element is generated by the caller.
	-->
	<xsl:template match="eprint" mode="metadata">
		<xsl:apply-templates select="eprintid" />
		<xsl:apply-templates select="title" />
		<xsl:apply-templates select="abstract" />
		<xsl:apply-templates select="creators" />
		<xsl:apply-templates select="date" />
		<xsl:apply-templates select="official_url" />
		
		<!-- Process subjects, if any. -->
		<xsl:apply-templates select="." mode="subject" />
																	
		<!-- Generate tags from keywords. -->
		<xsl:apply-templates select="keywords" />
	</xsl:template>
	
	<!--
		Output the subjects for an eprint.
	-->
	<xsl:template match="eprint" mode="subject">
		<xsl:if test="count( themes|location|classification|time|ethnicity ) > 0">
			<subject>
				<xsl:if test="count( themes ) > 0">
					<xsl:text>Heritage Theme,</xsl:text>
				</xsl:if>
				<xsl:if test="count( location ) > 0">
					<xsl:text>Location,</xsl:text>
				</xsl:if>
				<xsl:if test="count( classification ) > 0">
					<xsl:text>Classification,</xsl:text>
				</xsl:if>
				<xsl:if test="count( time ) > 0">
					<xsl:text>Time Period,</xsl:text>
				</xsl:if>
				<xsl:if test="count( ethnicity ) > 0">
					<xsl:text>Ethnic Group,</xsl:text>
				</xsl:if>
				<xsl:for-each select="(themes|location|classification|time|ethnicity)/item">
					<xsl:value-of select="." />
					<xsl:if test="position() != last()">
						<xsl:text>,</xsl:text>
					</xsl:if>
				</xsl:for-each>
			</subject>
		</xsl:if>
	</xsl:template>
	
	<!--
		Output a link to the parent topic of an item, where applicable. This is only done for eprints
		that multiple attached documents (see above).
	-->
	<xsl:template match="eprint" mode="parent">
		<related_topic>
			<xsl:value-of select="eprintid" />
		</related_topic>
	</xsl:template>
	
	<!--
		Some eprints may have multiple files attached. These need to be broken out into separate records.
		Note that this template is ONLY ever called when an eprint has multiple files attached.
		
		When we say "file" here, we actually mean the "document" element. Each document in the Memory Bank
		export has only a single file attached with it. While it's technically possible to have many files
		per document, I think that only applies to things like zipped web sites.
		
		Irrelevant documents formats like previews and thumbnails should already have been filtered out by
		the "eprint" templates above.
	-->
	<xsl:template match="document">
		<!--
			Process accordingly based on the document format. Ignore anything we don't understand.
		-->
		<xsl:variable name="format">
			<xsl:choose>
				<xsl:when test="starts-with(format, 'audio/')">audio</xsl:when>
				<xsl:when test="starts-with(format, 'image/')">image</xsl:when>
				<xsl:when test="starts-with(format, 'video/')">video</xsl:when>
				<xsl:when test="starts-with(format, 'text/')">document</xsl:when>
				<xsl:when test="ends-with(format, '/pdf')">
					<xsl:choose>
						<xsl:when test="../../type = 'image'">image</xsl:when>
						<xsl:otherwise>document</xsl:otherwise>
					</xsl:choose>
				</xsl:when>
				<xsl:otherwise />
			</xsl:choose>
		</xsl:variable>
		<xsl:if test="$format = $itemtype">
			<record>
				
				<!-- This will probably end up duplicating much of the info from the parent topic, but hey. -->
				<xsl:apply-templates select="docid" />
				<xsl:apply-templates select="../../title" />
				<xsl:apply-templates select="formatdesc" />
				<xsl:apply-templates select="../../creators" />
				<xsl:apply-templates select="../../date" />
				<xsl:apply-templates select="../../official_url" />
				
				<!--
					Process subjects, if any. This may appear to be a slightly odd way of doing it, but we want
					to generate all of the subjects at once to ensure that they appear as a single element.
				-->
				<xsl:apply-templates select="../.." mode="subject" />
				
				<!-- Generate tags from keywords. -->
				<xsl:apply-templates select="../../keywords" />
	
				<!-- Link to parent topic here. -->
				<xsl:apply-templates select="../.." mode="parent" />
				
				<!-- Insert path to file. -->
				<xsl:apply-templates select="main" />
			</record>
		</xsl:if>
	</xsl:template>
	
	<xsl:template match="eprintid">
		<eprints_id>
			<xsl:apply-templates />
		</eprints_id>
	</xsl:template>
	
	<!-- Construct the eprint ID for a sub-item from the combination of the eprint and document IDs. -->
	<xsl:template match="docid">
		<eprints_id>
			<xsl:value-of select="../../../eprintid" />
			<xsl:text>_</xsl:text>
			<xsl:value-of select="." />
		</eprints_id>
	</xsl:template>
	
	<xsl:template match="title">
		<title><xsl:apply-templates /></title>
	</xsl:template>
	
	<xsl:template match="abstract|formatdesc">
		<description><xsl:apply-templates /></description>
	</xsl:template>
	
	<xsl:template match="creators">
		<author>
			<xsl:for-each select="item/name">
				<xsl:value-of select="given" />
				<xsl:text> </xsl:text>
				<xsl:value-of select="family" />
				<xsl:if test="position() != last()">
					<xsl:text>,</xsl:text>
				</xsl:if>
			</xsl:for-each>
		</author>
	</xsl:template>
	
	<xsl:template match="date">
		<publicationdate>
			<xsl:value-of select="." />
		</publicationdate>
	</xsl:template>
	
	<xsl:template match="official_url">
		<officialurl>
			<xsl:value-of select="." />
		</officialurl>
	</xsl:template>
	
	<xsl:template match="document/main">
		<path_to_file>
			<xsl:value-of select="$basepath" />
			<xsl:text>/</xsl:text>
			<xsl:value-of select="$itemtype" />
			<xsl:text>/files/</xsl:text>
			<xsl:value-of select="." />
		</path_to_file>
	</xsl:template>
	
	<!-- Keywords map to tags. -->
	<xsl:template match="keywords">
		<tags>
			<xsl:value-of select="." />
		</tags>
	</xsl:template>
	
	<!-- Stuff to ignore. -->
	<xsl:template match="fileid|datasetid|objectid" />
	
</xsl:stylesheet>