#!/usr/bin/perl use strict; use HTTP::OAI; use POSIX qw(strftime); my $url = shift; my $last_date = shift; my $today = strftime( "%F", localtime ); my $h = new HTTP::OAI::Harvester(baseURL=>$url); my $response = $h->repository($h->Identify); if( $response->is_error ) { print "Error requesting Identify:\n", $response->code . " " . $response->message, "\n"; exit; } # Note: repositoryVersion will always be 2.0, $r->version returns # the actual version the repository is running # print "Repository supports protocol version ", $response->version, "\n"; # Version 1.x repositories don't support metadataPrefix, # but OAI-PERL will drop the prefix automatically # if an Identify was requested first (as above) $response = $h->ListIdentifiers( metadataPrefix=>'oai_dc', from=>$last_date, until=>$today, ); if( $response->is_error ) { die("Error harvesting: " . $response->message . "\n"); } # print "responseDate => ", $response->responseDate, "\n", # "requestURL => ", $response->requestURL, "\n"; # while( my $id = $response->next ) { # printf "%s\t%s\n", $id->identifier, $id->datestamp; # # print " (", $id->status, ")" if $id->status; # # print "\n"; # # # Only available from OAI 2.0 repositories # # for( $id->setSpec ) { # # print "\t", $_, "\n"; # # } # } # Using a handler $response = $h->ListRecords( metadataPrefix=>'oai_dc', handlers=>{metadata=>'HTTP::OAI::Metadata::OAI_DC'}, from=>$last_date, until=>$today, ); while( my $rec = $response->next ) { print $rec->identifier, "\t", $rec->datestamp, "\n", $rec->metadata, "\n"; print join(',', @{$rec->metadata->dc->{'title'}}), "\n"; } # if( $rec->is_error ) { # die $response->message; # } # # # Offline parsing # $I = HTTP::OAI::Identify->new(); # $I->parse_string($content); # $I->parse_file($fh);