Newer
Older
Digital_Repository / OARiNZ / DIY / deb_package / eprints-3.0 / cgi / oai2
nstanger on 7 Jun 2007 25 KB - Added debian package source.
  1. ######################################################################
  2. #
  3. # EPrints OAI 2.0 Handler
  4. #
  5. # Responds to incoming OAI requests
  6. #
  7. ######################################################################
  8. #
  9. # This file is part of GNU EPrints 2.
  10. #
  11. # Copyright (c) 2000-2004 University of Southampton, UK. SO17 1BJ.
  12. #
  13. # EPrints 2 is free software; you can redistribute it and/or modify
  14. # it under the terms of the GNU General Public License as published by
  15. # the Free Software Foundation; either version 2 of the License, or
  16. # (at your option) any later version.
  17. #
  18. # EPrints 2 is distributed in the hope that it will be useful,
  19. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  20. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  21. # GNU General Public License for more details.
  22. #
  23. # You should have received a copy of the GNU General Public License
  24. # along with EPrints 2; if not, write to the Free Software
  25. # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  26. #
  27. ######################################################################
  28.  
  29. #cjg need to recomment all subs
  30.  
  31. use EPrints;
  32.  
  33. # use FileHandle;
  34. # use IO::File;
  35. # use POSIX;
  36. # use XML::Writer;
  37.  
  38. use URI::Escape;
  39. # use Time::Local;
  40.  
  41. use strict;
  42.  
  43. # New session
  44. my $session = new EPrints::Session();
  45. exit( 0 ) unless( defined $session );
  46.  
  47. # Allowed exports (OAI restricts the prefixes that can be used)
  48. our %FORMATS = %{$session->get_repository->get_conf( "oai", "v2", "output_plugins" )};
  49.  
  50. # What are we begin asked?
  51. my $verb = $session->param( "verb" );
  52. #$session->get_database->set_debug(1);
  53. my $NS = "http://www.openarchives.org/OAI/2.0/";
  54. my $response = $session->make_element(
  55. "OAI-PMH",
  56. "xmlns"=>$NS,
  57. "xmlns:xsi"=>"http://www.w3.org/2001/XMLSchema-instance",
  58. "xsi:schemaLocation"=>$NS." http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd" );
  59.  
  60. $response->appendChild(
  61. $session->render_data_element(
  62. 2,
  63. "responseDate",
  64. EPrints::Time::get_iso_timestamp() ) );
  65.  
  66.  
  67. my @bits = ();
  68. my $request_desc = $session->make_element( "request" );
  69.  
  70. foreach my $attr (
  71. "verb",
  72. "identifier",
  73. "metadataPrefix",
  74. "from",
  75. "until",
  76. "set",
  77. "resumptionToken" )
  78. {
  79. my $value = $session->param( $attr );
  80.  
  81. # check it matches schema
  82. if( $attr eq "verb" )
  83. {
  84. next unless(
  85. $value eq "Identify" ||
  86. $value eq "GetRecord" ||
  87. $value eq "ListRecords" ||
  88. $value eq "ListIdentifiers" ||
  89. $value eq "ListMetadataFormats" ||
  90. $value eq "ListSets" );
  91. }
  92. if( $attr eq "identifier" )
  93. {
  94. next unless( $value =~ m/^[a-z]+:.*$/ );
  95. }
  96. if( $attr eq "metadataPrefix" )
  97. {
  98. next unless( $value =~ m/^[A-Za-z0-9attr!'$\(\)\+\-\.\*]+$/ );
  99. }
  100. if( $attr eq "from" || $attr eq "until")
  101. {
  102. next unless( $value =~ m/^\d\d\d\d-\d\d-\d\dT(\d\d:\d\d:\d\dZ)?$/ );
  103. }
  104. if( $attr eq "set" )
  105. {
  106. next unless( $value =~ m/^([A-Za-z0-9_!'$\(\)\+\-\.\*])+(:[A-Za-z0-9_!'$\(\)\+\-\.\*]+)*$/ );
  107. }
  108. # if( $attr eq "resumptionToken" ) { } # just any string
  109.  
  110. # may be setting it to undef - but that won't matter as that
  111. # unsets it.
  112. next unless defined $value;
  113. $request_desc->setAttribute( $attr=>$value );
  114. }
  115. my $url = $session->get_repository->get_conf( "base_url" );
  116. $url .= $session->get_uri;
  117. $request_desc->appendChild( $session->make_text( $url ) );
  118.  
  119. $response->appendChild( $session->make_indent( 2 ) );
  120. $response->appendChild( $request_desc );
  121.  
  122. $response->appendChild(
  123. render_verb( $session, $session->param( "verb" ) ) );
  124.  
  125. my $content = "text/xml";
  126. if( $session->param( "debug" ) eq "yes" )
  127. {
  128. $content = "text/plain";
  129. }
  130.  
  131. $session->send_http_header( content_type=>$content );
  132.  
  133. print <<END;
  134. <?xml version="1.0" encoding="UTF-8" ?>
  135. <?xml-stylesheet type='text/xsl' href='/oai2.xsl' ?>
  136.  
  137. END
  138. print EPrints::XML::to_string( $response );
  139. EPrints::XML::dispose( $response );
  140. $session->terminate();
  141. exit;
  142.  
  143. # OAI 2 Error conditions:
  144. #
  145. # badArgument
  146. # badResumptionToken
  147. # badVerb
  148. # caonnot DisseminateFormat
  149. # idDoesNotExist
  150. # noRecordsMatch
  151. # noSetHierachy
  152.  
  153. sub render_verb
  154. {
  155. my( $session , $verb ) = @_;
  156.  
  157. if( !defined $verb )
  158. {
  159. return render_oai_error( $session, "badVerb", "No verb was specified" );
  160. }
  161.  
  162. if( $verb eq "Identify" )
  163. {
  164. return Identify( $session );
  165. }
  166.  
  167. if( $verb eq "GetRecord" )
  168. {
  169. return GetRecord( $session );
  170. }
  171.  
  172. if( $verb eq "ListRecords" )
  173. {
  174. return ListRecords( $session );
  175. }
  176.  
  177. if( $verb eq "ListIdentifiers" )
  178. {
  179. return ListIdentifiers( $session );
  180. }
  181.  
  182. if( $verb eq "ListMetadataFormats" )
  183. {
  184. return ListMetadataFormats( $session );
  185. }
  186.  
  187. if( $verb eq "ListSets" )
  188. {
  189. return ListSets( $session );
  190. }
  191.  
  192. return render_oai_error( $session, "badVerb", "Unknown verb: '$verb'" );
  193. }
  194.  
  195. ######################################################################
  196. #
  197. # Identify( $session )
  198. #
  199. # Identify ourselves
  200. #
  201. ######################################################################
  202.  
  203. sub Identify
  204. {
  205. my( $session ) = @_;
  206.  
  207. my( $args, $errors ) = get_oai_args( $session, [], [] );
  208.  
  209. return join_errors( $session, $errors ) if( scalar @{$errors} > 0 );
  210. my $response = $session->make_element( "Identify" );
  211.  
  212. $response->appendChild( $session->render_data_element(
  213. 4,
  214. "repositoryName",
  215. $session->phrase( "archive_name" ) ) );
  216.  
  217. $response->appendChild( $session->render_data_element(
  218. 4,
  219. "baseURL",
  220. $session->get_repository->get_conf( "oai","v2","base_url" ) ) );
  221.  
  222. $response->appendChild( $session->render_data_element(
  223. 4,
  224. "protocolVersion",
  225. "2.0" ) );
  226.  
  227. $response->appendChild( $session->render_data_element(
  228. 4,
  229. "adminEmail",
  230. $session->get_repository->get_conf( "adminemail" ) ) );
  231.  
  232. # Later this may be either calcualted from the
  233. # database, or configurable.
  234. $response->appendChild( $session->render_data_element(
  235. 4,
  236. "earliestDatestamp",
  237. "0001-01-01" ) );
  238.  
  239. $response->appendChild( $session->render_data_element(
  240. 4,
  241. "deletedRecord",
  242. "persistent" ) );
  243.  
  244. $response->appendChild( $session->render_data_element(
  245. 4,
  246. "granularity",
  247. "YYYY-MM-DD" ) );
  248.  
  249. my $d1 = $session->make_element( "description" );
  250. my $NS = "http://www.openarchives.org/OAI/2.0/oai-identifier";
  251. my $XSD = "http://www.openarchives.org/OAI/2.0/oai-identifier.xsd";
  252. my $oaiid = $session->make_element(
  253. "oai-identifier",
  254. "xmlns"=>$NS,
  255. "xmlns:xsi"=>"http://www.w3.org/2001/XMLSchema-instance",
  256. "xsi:schemaLocation"=>"$NS $XSD" );
  257.  
  258. $d1->appendChild( $session->make_indent( 6 ) );
  259. $d1->appendChild( $oaiid );
  260. $response->appendChild( $session->make_indent( 4 ) );
  261. $response->appendChild( $d1 );
  262.  
  263. $oaiid->appendChild( $session->render_data_element(
  264. 8,
  265. "scheme",
  266. "oai" ) );
  267.  
  268. $oaiid->appendChild( $session->render_data_element(
  269. 8,
  270. "repositoryIdentifier",
  271. $session->get_repository->get_conf( "oai","v2","archive_id" ) ) );
  272.  
  273. $oaiid->appendChild( $session->render_data_element(
  274. 8,
  275. "delimiter",
  276. ":" ) );
  277.  
  278. $oaiid->appendChild( $session->render_data_element(
  279. 8,
  280. "sampleIdentifier",
  281. $session->get_repository->get_conf( "oai","v2","sample_identifier" ) ) );
  282.  
  283. my $d2 = $session->make_element( "description" );
  284. my $eprints = $session->make_element(
  285. "eprints",
  286. "xmlns"=>"http://www.openarchives.org/OAI/1.1/eprints",
  287. "xmlns:xsi"=>"http://www.w3.org/2001/XMLSchema-instance",
  288. "xsi:schemaLocation"=>"http://www.openarchives.org/OAI/1.1/eprints http://www.openarchives.org/OAI/1.1/eprints.xsd" );
  289. $d2->appendChild( $session->make_indent( 6 ) );
  290. $d2->appendChild( $eprints );
  291. $response->appendChild( $session->make_indent( 4 ) );
  292. $response->appendChild( $d2 );
  293.  
  294. $eprints->appendChild( render_text_url(
  295. $session,
  296. "content",
  297. $session->get_repository->get_conf( "oai","content" ) ) );
  298. $eprints->appendChild( render_text_url(
  299. $session,
  300. "metadataPolicy",
  301. $session->get_repository->get_conf( "oai","metadata_policy" ) ) );
  302.  
  303. $eprints->appendChild( render_text_url(
  304. $session,
  305. "dataPolicy",
  306. $session->get_repository->get_conf( "oai","data_policy" ) ) );
  307.  
  308. $eprints->appendChild( render_text_url(
  309. $session,
  310. "submissionPolicy",
  311. $session->get_repository->get_conf( "oai","submission_policy" ) ) );
  312.  
  313. foreach( @{$session->get_repository->get_conf( "oai","comments" )} )
  314. {
  315. $eprints->appendChild( $session->render_data_element(
  316. 8,
  317. "comment",
  318. $_ ) );
  319. }
  320. my $f = $session->make_doc_fragment();
  321. $f->appendChild( $session->make_indent( 2 ) );
  322. $f->appendChild( $response );
  323. return $f;
  324.  
  325. }
  326.  
  327. ######################################################################
  328. #
  329. # write_text_url( $writer, $name, $texturl )
  330. # hashref
  331. # Write a TextURL type block to writer, of name $name. Block will
  332. # contain a text and/or url element, defined in %texturl.
  333. # If texturl contains neither then this method returns without action.
  334. #
  335. ######################################################################
  336.  
  337. sub render_text_url
  338. {
  339. my( $session, $name, $texturl ) = @_;
  340.  
  341. my $f = $session->make_doc_fragment();
  342.  
  343. $f->appendChild( $session->make_indent( 8 ) );
  344. my $e = $session->make_element( $name );
  345. $f->appendChild( $e );
  346.  
  347. if ( defined $texturl->{"text"} )
  348. {
  349. $e->appendChild( $session->render_data_element(
  350. 10,
  351. "text",
  352. $texturl->{"text"} ) );
  353. }
  354.  
  355. if ( defined $texturl->{"url"} )
  356. {
  357. $e->appendChild( $session->render_data_element(
  358. 10,
  359. "URL",
  360. $texturl->{"url"} ) );
  361. }
  362.  
  363. return $f;
  364. }
  365.  
  366.  
  367. ######################################################################
  368. #
  369. # GetRecord( $session )
  370. #
  371. # Respond to a GetRecord verb: Retrieve a single metadata record
  372. #
  373. ######################################################################
  374.  
  375. sub GetRecord
  376. {
  377. my( $session ) = @_;
  378.  
  379. my( $args, $errors ) = get_oai_args( $session, [ "identifier", "metadataPrefix" ], [] );
  380.  
  381. return join_errors( $session, $errors ) if( scalar @{$errors} > 0 );
  382. if( !defined $session->get_repository->get_conf( "oai", "v2", "metadata_namespaces" )->{$args->{metadataPrefix}} )
  383. {
  384. return render_oai_error(
  385. $session,
  386. "cannotDisseminateFormat",
  387. "Record not available as metadata type: ".$args->{metadataPrefix} );
  388.  
  389. }
  390.  
  391. my $id = EPrints::OpenArchives::from_oai_identifier(
  392. $session,
  393. $args->{identifier} );
  394.  
  395. my $eprint = new EPrints::DataObj::EPrint(
  396. $session,
  397. $id,
  398. $session->get_repository->get_dataset( "archive" ) );
  399.  
  400. if( !defined $eprint )
  401. {
  402. # try deleted records
  403. $eprint = new EPrints::DataObj::EPrint(
  404. $session,
  405. $id,
  406. $session->get_repository->get_dataset( "deletion" ) );
  407. }
  408.  
  409. my $response = $session->make_element( "GetRecord" );
  410.  
  411. # The eprint exists, so write the record
  412. # if the metadataFormat isn't available for
  413. # this record, only the header will be output.
  414.  
  415. my $plugin_id = "Export::" . $FORMATS{ $args->{metadataPrefix} };
  416. my $plugin = $session->plugin( $plugin_id );
  417.  
  418. unless( defined $plugin )
  419. {
  420. EPrints::abort( "Could not find plugin $plugin_id" );
  421. }
  422.  
  423. $response->appendChild( $session->make_indent( 2 ) );
  424. $response->appendChild(
  425. EPrints::OpenArchives::make_record(
  426. $session,
  427. $eprint,
  428. $plugin,
  429. 1 ) );
  430.  
  431. my $f = $session->make_doc_fragment();
  432. $f->appendChild( $session->make_indent( 2 ) );
  433. $f->appendChild( $response );
  434. return $f;
  435. }
  436.  
  437.  
  438.  
  439.  
  440. ######################################################################
  441. #
  442. # ListIdentifiers( $session )
  443. # ListRecords( $session )
  444. #
  445. # Respond to ListIdentifiers & ListRecords verbs.
  446. #
  447. ######################################################################
  448.  
  449. sub ListIdentifiers
  450. {
  451. my( $session ) = @_;
  452.  
  453. return _list( $session, 1 );
  454. }
  455.  
  456. sub ListRecords
  457. {
  458. my( $session ) = @_;
  459.  
  460. return _list( $session, 2 );
  461. }
  462.  
  463. sub _list
  464. {
  465. my( $session , $mode ) = @_;
  466.  
  467. #mode 1 = ID
  468. #mode 2 = full metadata
  469. my $PAGESIZE = 100;
  470.  
  471. # different params depending if we have a resumptionToken
  472. # or not
  473.  
  474. my( $searchexp, $offset, $metadata_format );
  475.  
  476. if( defined $session->param( "resumptionToken" ) )
  477. {
  478. my( $args, $errors ) = get_oai_args( $session, [ "resumptionToken" ], [] );
  479.  
  480. return join_errors( $session, $errors ) if( scalar @{$errors} > 0 );
  481.  
  482. unless( $args->{"resumptionToken"} =~
  483. m#^(archive|deletion)/(\d+)/(\d+)(/(.*))?$# )
  484. {
  485. return render_oai_error(
  486. $session,
  487. "badResumptionToken",
  488. "Token is invalid (does not match regexp)" );
  489. }
  490. my $filters => $session->get_repository->get_conf( "oai", "filters" );
  491. $filters = [] unless defined $filters;
  492. my $cache_id;
  493. ( $offset, $cache_id, $metadata_format ) = ( $2, $3, $5 );
  494. $searchexp = EPrints::Search->new(
  495. dataset => $session->get_repository->get_dataset( "$1" ),
  496. session => $session,
  497. keep_cache => 1,
  498. filters => [
  499. @{$filters},
  500. ],
  501. cache_id => $cache_id );
  502.  
  503. unless( defined $searchexp )
  504. {
  505. return render_oai_error(
  506. $session,
  507. "badResumptionToken",
  508. "Token has expired" );
  509. }
  510. }
  511. else
  512. {
  513. my $optf = [ "until", "from", "set"];
  514. my $reqf = [ "metadataPrefix" ];
  515.  
  516. # We ignore metadataPrefix if doing ListIdentifiers
  517. # which is not quite the Right Thing, but saves much CPU.
  518.  
  519. my( $args, $errors ) = get_oai_args( $session, $reqf, $optf );
  520.  
  521. return join_errors( $session, $errors ) if( scalar @{$errors} > 0 );
  522.  
  523. $metadata_format = $args->{metadataPrefix};
  524.  
  525. if( !defined($FORMATS{ $metadata_format }) )
  526. {
  527. return render_oai_error(
  528. $session,
  529. "cannotDisseminateFormat",
  530. "Record not available as metadata type: ".$args->{metadataPrefix} );
  531. }
  532.  
  533. my $date_range;
  534. my( $date, $date, $errors, $g1, $g2, $e );
  535. $errors = [];
  536. if( defined $args->{from} )
  537. {
  538. ( $date , $g1 , $e ) = munge_date( $session, $args->{from}, 0 );
  539. push @{$errors}, @{$e};
  540. $date_range = $date."-";
  541. }
  542. return join_errors( $session, $errors ) if( scalar @{$errors} > 0 );
  543. if( defined $args->{until} )
  544. {
  545. ( $date , $g2 , $e ) = munge_date( $session, $args->{until}, 1 );
  546. push @{$errors}, @{$e};
  547. $date_range.= "-" if( !defined $date_range );
  548. $date_range.= $date;
  549. }
  550. return join_errors( $session, $errors ) if( scalar @{$errors} > 0 );
  551. if( defined $g1 && defined $g2 && $g1 ne $g2 )
  552. {
  553. return render_oai_error(
  554. $session,
  555. "badArgument",
  556. "from and until dates have different granularity ($g1 and $g2)" );
  557. }
  558.  
  559. my $ds = $session->get_repository->get_dataset( "archive" );
  560. my $filters => $session->get_repository->get_conf( "oai", "filters" );
  561. $filters = [] unless defined $filters;
  562. $searchexp = new EPrints::Search(
  563. session => $session,
  564. keep_cache => 1,
  565. allow_blank => 1,
  566. filters => [
  567. @{$filters},
  568. ],
  569. dataset => $ds );
  570.  
  571. if( defined $args->{set} )
  572. {
  573. my( $head , @tail ) = EPrints::OpenArchives::decode_setspec( $args->{set} );
  574. my( $key , $value ) = split( /=/ , $head );
  575. $value = pop @tail if( scalar @tail > 0 );
  576. my $views = $session->get_repository->get_conf( "oai","sets" ); #cjg
  577. my $info;
  578. foreach( @{$views} )
  579. {
  580. $info = $_ if( $_->{id} eq $key );
  581. }
  582. if( !defined $info )
  583. {
  584. return render_oai_error(
  585. $session,
  586. "badArgument",
  587. "Invalid set parameter; unknown key ( $key )" );
  588. }
  589. my @fields;
  590. my $match = "EX";
  591. foreach( split( "/", $info->{fields} ) )
  592. {
  593. my $field = EPrints::Utils::field_from_config_string( $ds, $_ );
  594. unless( $field->is_browsable() )
  595. {
  596. # Eeep. This is really bad. Just die now.
  597. my $type = $field->get_type();
  598. EPrints::abort( <<END );
  599. Cannot generate OAI set for field "$_"
  600. - Type "$type" cannot be browsed.
  601. END
  602. }
  603. push @fields, $field;
  604. if( $field->is_type( "subject" ) )
  605. {
  606. $match = "EQ";
  607. }
  608. }
  609. $searchexp->add_field( \@fields, $value, $match );
  610. }
  611. if( defined $date_range )
  612. {
  613. $searchexp->add_field(
  614. $ds->get_field( "datestamp" ),
  615. $date_range );
  616. }
  617.  
  618. $offset = 0;
  619.  
  620. if( !defined $searchexp )
  621. {
  622. # something went wrong
  623. return render_oai_error(
  624. $session,
  625. "badArgument",
  626. "Could not make Search (system error)." );
  627. }
  628. }
  629. $searchexp->perform_search();
  630. my $count = $searchexp->count();
  631.  
  632. if( $count == 0 )
  633. {
  634. ## End of archive items, time to do deleted ones.
  635. my $delsearchexp = $searchexp->clone;
  636. $delsearchexp->set_dataset(
  637. $session->get_repository->get_dataset( "deletion" ) );
  638. $delsearchexp->perform_search();
  639. $searchexp->dispose();
  640. $searchexp = $delsearchexp;
  641. $count = $searchexp->count();
  642. }
  643.  
  644. if( $count == 0 )
  645. {
  646. # no items at all
  647.  
  648. return render_oai_error(
  649. $session,
  650. "noRecordsMatch",
  651. "No items match. None. None at all. Not even deleted ones." );
  652. }
  653.  
  654. my $cache = $searchexp->get_cache_id();
  655. my $searchdsid = $searchexp->get_dataset()->id();
  656. my %opts = ();
  657. $opts{status}="deleted" if( $searchdsid eq "deletion" );
  658.  
  659. my $response;
  660. my $mdtoken = "";
  661.  
  662. my $response;
  663.  
  664. if( $mode == 1 )
  665. {
  666. $response = $session->make_element( "ListIdentifiers" );
  667.  
  668. my @records = $searchexp->get_records( $offset, $PAGESIZE );
  669. my $eprint;
  670. foreach $eprint ( @records )
  671. {
  672. $response->appendChild( $session->make_indent( 2 ) );
  673. $response->appendChild(
  674. EPrints::OpenArchives::make_header(
  675. $session,
  676. $eprint,
  677. 1 ) );
  678. }
  679. }
  680.  
  681. if( $mode == 2 )
  682. {
  683. $response = $session->make_element( "ListRecords" );
  684.  
  685. my $plugin_id = "Export::" . $FORMATS{ $metadata_format };
  686. my $plugin = $session->plugin( $plugin_id );
  687.  
  688. unless( defined $plugin )
  689. {
  690. EPrints::abort( "Could not find plugin $plugin_id" );
  691. }
  692.  
  693.  
  694. my @records = $searchexp->get_records( $offset, $PAGESIZE );
  695. my $eprint;
  696. foreach $eprint ( @records )
  697. {
  698. $response->appendChild( $session->make_indent( 2 ) );
  699. $response->appendChild(
  700. EPrints::OpenArchives::make_record(
  701. $session,
  702. $eprint,
  703. $plugin,
  704. 1 ) );
  705. }
  706.  
  707. $mdtoken = "/".$metadata_format;
  708. }
  709.  
  710. $searchexp->dispose();
  711.  
  712. my $tokenvalue = "";
  713. if( $count > $offset+$PAGESIZE )
  714. {
  715. $tokenvalue = $searchdsid."/".
  716. ($offset+$PAGESIZE)."/".
  717. $cache.$mdtoken;
  718. }
  719. elsif( $searchdsid eq "archive" )
  720. {
  721. ## End of archive items, time to do deleted ones.
  722. my $delsearchexp = $searchexp->clone;
  723. $delsearchexp->set_dataset(
  724. $session->get_repository->get_dataset( "deletion" ) );
  725. $delsearchexp->perform_search();
  726. my $cache = $delsearchexp->get_cache_id();
  727. my $count = $delsearchexp->count();
  728. $delsearchexp->dispose();
  729. # no point giving a token if we have no
  730. # deleted items.
  731. if( $count > 0 )
  732. {
  733. $tokenvalue = "deletion/0/$cache".$mdtoken;
  734. }
  735. }
  736.  
  737. if( EPrints::Utils::is_set( $tokenvalue ) )
  738. {
  739. $response->appendChild( $session->render_data_element(
  740. 2,
  741. "resumptionToken",
  742. $tokenvalue ) );
  743. }
  744. my $f = $session->make_doc_fragment();
  745. $f->appendChild( $session->make_indent( 2 ) );
  746. $f->appendChild( $response );
  747. return $f;
  748. }
  749.  
  750.  
  751. ######################################################################
  752. #
  753. # ListMetadataFormats( $session )
  754. #
  755. ######################################################################
  756.  
  757. sub ListMetadataFormats
  758. {
  759. my( $session ) = @_;
  760. my( $args, $errors ) = get_oai_args( $session, [], [ "identifier" ] );
  761.  
  762. return join_errors( $session, $errors ) if( scalar @{$errors} > 0 );
  763.  
  764. my $eprint;
  765. if( defined $args->{identifier} )
  766. {
  767. $eprint = new EPrints::DataObj::EPrint(
  768. $session,
  769. EPrints::OpenArchives::from_oai_identifier(
  770. $session,
  771. $args->{identifier} ),
  772. $session->get_repository->get_dataset( "archive" ) );
  773.  
  774. }
  775. my $response = $session->make_element( "ListMetadataFormats" );
  776.  
  777. foreach ( keys %FORMATS )
  778. {
  779. my $plugin_id = "Export::" . $FORMATS{ $_ };
  780. my $plugin = $session->plugin( $plugin_id ) or next;
  781. if( defined($eprint) )
  782. {
  783. my $md = $eprint->export( $FORMATS{ $_ } ) or next;
  784. EPrints::XML::dispose( $md );
  785. }
  786. my $mdf = $session->make_element( "metadataFormat" );
  787.  
  788. $mdf->appendChild( $session->render_data_element(
  789. 4,
  790. "metadataPrefix",
  791. $_ ) );
  792.  
  793. $mdf->appendChild( $session->render_data_element(
  794. 4,
  795. "schema",
  796. $plugin->{ 'schemaLocation' }
  797. ));
  798. $mdf->appendChild( $session->render_data_element(
  799. 4,
  800. "metadataNamespace",
  801. $plugin->{ 'xmlns' }
  802. ));
  803. $response->appendChild( $session->make_indent( 2 ) );
  804. $response->appendChild( $mdf );
  805. }
  806. my $f = $session->make_doc_fragment();
  807. $f->appendChild( $session->make_indent( 2 ) );
  808. $f->appendChild( $response );
  809. return $f;
  810. }
  811.  
  812.  
  813.  
  814. ######################################################################
  815. #
  816. # ListSets( $session )
  817. #
  818. # Respond to a ListSets verb.
  819. #
  820. ######################################################################
  821.  
  822. sub ListSets
  823. {
  824. my( $session ) = @_;
  825. my( $args, $errors ) = get_oai_args( $session, [], [ 'resumptionToken' ] );
  826.  
  827. return join_errors( $session, $errors ) if( scalar @{$errors} > 0 );
  828.  
  829. if( defined $args->{resumptionToken} )
  830. {
  831. return render_oai_error(
  832. $session,
  833. "badResumptionToken",
  834. "Resumption Tokens not supported for ListSets" );
  835. }
  836.  
  837. my @sets = ();
  838. my %setnames = ();
  839.  
  840. my $response = $session->make_element( "ListSets" );
  841.  
  842. my $ds = $session->get_repository->get_dataset( "archive" );
  843. my $ds_del = $session->get_repository->get_dataset( "deletion" );
  844.  
  845. my $viewconf = $session->get_repository->get_conf( "oai","sets" );
  846. my $info;
  847. foreach $info ( @{$viewconf} )
  848. {
  849. my $fieldname;
  850. my %v = ();
  851. foreach $fieldname ( split( "/" , $info->{fields} ) )
  852. {
  853. my $field = EPrints::Utils::field_from_config_string( $ds, $fieldname );
  854. if( $field->is_type( "subject" ) )
  855. {
  856. my $topsubj = EPrints::DataObj::Subject->new(
  857. $session,
  858. $field->get_property( "top" ) );
  859. my $i;
  860. foreach $i ( @{$topsubj->get_subjects( 0, 0, 1 )} )
  861. {
  862. my @kb = split( ":", $i->[0] );
  863. foreach( @kb )
  864. {
  865. $_ = EPrints::OpenArchives::encode_setspec( $_ );
  866. }
  867. my $key = join( ":", @kb );
  868. $v{$key} = $i->[1];
  869. }
  870. }
  871. else
  872. {
  873. my $v1 = $field->get_values( $session, $ds );
  874. my $delfield = $field->clone();
  875. #cjg why clone with new style datasets?
  876. #$delfield->set_dataset( $ds_del );
  877. my $v2 = $delfield->get_values( $session, $ds_del );
  878. foreach( @{$v1}, @{$v2} )
  879. {
  880. my $key = EPrints::OpenArchives::encode_setspec( $_ );
  881. if( !defined $key ) { $key=""; }
  882. $v{$key} = EPrints::Utils::tree_to_utf8( $field->get_value_label( $session, $_ ) );
  883. }
  884. }
  885. }
  886. unless( $info->{allow_null} ) { delete $v{""}; }
  887. foreach( keys %v )
  888. {
  889. my $set = $session->make_element( "set" );
  890. $response->appendChild( $session->make_indent( 2 ) );
  891. $response->appendChild( $set );
  892. my $spec = EPrints::OpenArchives::encode_setspec( $info->{id}."=" ).$_;
  893. $set->appendChild( $session->render_data_element(
  894. 4,
  895. "setSpec",
  896. $spec ) );
  897. my $name = $session->get_view_name( $ds, $info->{id} )." = ".$v{$_};
  898. $set->appendChild( $session->render_data_element(
  899. 4,
  900. "setName",
  901. $name ) );
  902. }
  903. }
  904.  
  905. my $f = $session->make_doc_fragment();
  906. $f->appendChild( $session->make_indent( 2 ) );
  907. $f->appendChild( $response );
  908. return $f;
  909. }
  910.  
  911.  
  912. ######################################################################
  913. #
  914. # send_http_error( $session, $code, $message )
  915. #
  916. # Send an HTTP error as a response
  917. #
  918. ######################################################################
  919.  
  920. sub send_http_error
  921. {
  922. my( $session, $code, $message ) = @_;
  923.  
  924. my $r = Apache->request;
  925. $r->content_type( 'text/html' );
  926. $r->status_line( "$code $message" );
  927. $r->send_http_header;
  928. my $title = "Error $code in OAI request";
  929. $r->print( <<END );
  930. <html>
  931. <head><title>$title</title></head>
  932. <body>
  933. <h1>$title</h1>
  934. <p>$message</p>
  935. </body>
  936. END
  937. }
  938.  
  939.  
  940.  
  941. sub render_oai_error
  942. {
  943. my( $session, $code, $message ) = @_;
  944.  
  945. return $session->render_data_element(
  946. 2,
  947. "error",
  948. $message,
  949. code => $code );
  950. }
  951.  
  952. sub get_oai_args
  953. {
  954. my( $session, $required, $optional ) = @_;
  955.  
  956. my %a;
  957. foreach( @{$required}, @{$optional} ) { $a{$_}=1; }
  958. $a{verb} = 1;
  959.  
  960. my %args;
  961. my @errors;
  962. foreach( $session->param() )
  963. {
  964. if( $a{$_} == 1 )
  965. {
  966. my @p = $session->param( $_ );
  967. $args{$_} = $p[0];
  968. delete $a{$_};
  969. if( scalar @p > 1 )
  970. {
  971. # Repeated Arg
  972. push @errors, render_oai_error(
  973. $session,
  974. "badArgument",
  975. "Repeated argument: $_" );
  976. }
  977. next;
  978. }
  979.  
  980. push @errors, render_oai_error(
  981. $session,
  982. "badArgument",
  983. "Illegal argument: $_" );
  984. }
  985.  
  986. foreach( @{$required} )
  987. {
  988. next unless( $a{ $_ } );
  989. push @errors, render_oai_error(
  990. $session,
  991. "badArgument",
  992. "Missing required argument: $_" );
  993. }
  994.  
  995. if( defined $args{identifier} &&
  996. $args{identifier} !~ m/^oai:[a-zA-Z][a-zA-Z0-9\-]*(\.[a-zA-Z][a-zA-Z0-9\-]+)+:[a-zA-Z0-9\-_\.!~\*'\(\);\/\?:\@\&=\+\$,\%]+$/ )
  997. {
  998. push @errors, render_oai_error(
  999. $session,
  1000. "badArgument",
  1001. "identifier does not match regexp: $args{identifier}" );
  1002. }
  1003. return( \%args, \@errors );
  1004. }
  1005.  
  1006. sub join_errors
  1007. {
  1008. my( $session, $errors ) = @_;
  1009.  
  1010. my $f = $session->make_doc_fragment;
  1011. foreach( @{$errors} )
  1012. {
  1013. $f->appendChild( $_ );
  1014. }
  1015. return $f;
  1016. }
  1017.  
  1018. sub munge_date
  1019. {
  1020. my( $session, $string, $roundup ) = @_;
  1021. if( $string !~ m/^(\d\d\d\d)-(\d\d)-(\d\d)(T(\d\d):(\d\d):(\d\d(\.\d+)?)Z)?$/ )
  1022. {
  1023. return( "", "", [render_oai_error(
  1024. $session,
  1025. "badArgument",
  1026. "not valid datetime: $string" )] );
  1027. }
  1028. my( $year, $month, $day, $hour, $min, $sec ) = ( $1 , $2 , $3 , $5, $6 , $7 );
  1029.  
  1030. if( defined $hour )
  1031. {
  1032. return( "", "", [render_oai_error(
  1033. $session,
  1034. "badArgument",
  1035. "Max granularity is YYYY-MM-DD: $string" )] );
  1036. }
  1037.  
  1038. my $granularity = "YYYY-MM-DD";
  1039.  
  1040. # $granularity = "YYYY-MM" unless( defined $day );
  1041. # $granularity = "YYYY" unless( defined $month );
  1042.  
  1043. # YYYY and YYYY-MM granularities disappeard between OAI2.0alpha3
  1044. # and OAI2.0 but I'll keep the code around just in case.
  1045.  
  1046. # if( !defined $month )
  1047. # {
  1048. # $month = ( $roundup ? 12 : 1 );
  1049. # }
  1050. # if( !defined $day )
  1051. # {
  1052. # $day = 1;
  1053. # if( $roundup )
  1054. # {
  1055. # $month+=1;
  1056. # if( $month == 13 ) { $month = 1; $year+=1; }
  1057. # my $gt = timegm(0,0,0,$day,$month-1,$year);
  1058. # # $gt is one month too far so we want one day less.
  1059. # my @bits = gmtime( $gt - 24*60*60 );
  1060. # $day = $bits[3];
  1061. # $month = $bits[4]+1;
  1062. # $year = $bits[5]+1900;
  1063. # }
  1064. # }
  1065.  
  1066. my $date = sprintf( "%04d-%02d-%02d",$year,$month,$day );
  1067.  
  1068. return( $date, $granularity, []);
  1069. }