Newer
Older
Digital_Repository / OARiNZ / DIY / deb_package / eprints-3.0 / perl_lib / EPrints / Apache / LogHandler.pm
######################################################################
#
# EPrints::Apache::LogHandler
#
######################################################################
#
#  This file is part of GNU EPrints 2.
#  
#  Copyright (c) 2000-2004 University of Southampton, UK. SO17 1BJ.
#  
#  EPrints 2 is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#  
#  EPrints 2 is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#  
#  You should have received a copy of the GNU General Public License
#  along with EPrints 2; if not, write to the Free Software
#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
######################################################################

=pod

=head1 NAME

EPrints::Apache::LogHandler - Main handler for Apache log events

=head1 CONFIGURATION

To enable the Apache::LogHandler add to your ArchiveConfig:

   $c->{loghandler}->{enable} = 1;

=head1 DATA FORMAT

=over 4

=item requester

The requester is stored using their IP in URN format: C<urn:ip:x.x.x.x>.

=item serviceType

ServiceType is in format L<info:ofi/fmt:kev:mtx:sch_svc|http://alcme.oclc.org/openurl/servlet/OAIHandler?verb=GetRecord&metadataPrefix=oai_dc&identifier=info:ofi/fmt:kev:mtx:sch_svc>.

The value is encoded as C<?name=yes> (where C<name> is one of the services defined).

=item referent, referringEntity

These are stored in URN format: C<info:oai:repositoryid:eprintid>.

=item referent_docid

The document id as a fragment of the referent: C<#docid>.

=back

=head1 METHODS

=over 4

=cut

package EPrints::Apache::LogHandler;

use strict;
use warnings;

use URI;

use EPrints;
use EPrints::Apache::AnApache;

use constant NOT_MODIFIED => 304;

sub handler
{
	my( $r ) = @_;

	# If you're confused its probably because your browser is issuing NOT
	# MODIFIED SINCE (304 NOT MODIFIED)
	unless( $r->status == 200 ) 
	{
		return DECLINED;
	}

	my $session = new EPrints::Session or return DECLINED;
	my $repository = $session->get_repository;

	my $c = $r->connection;
	my $ip = $c->remote_ip;
	my $uri = URI->new($r->uri);

	my $access = {};
	$access->{datestamp} = EPrints::Time::get_iso_timestamp( $r->request_time );
	$access->{requester_id} = 'urn:ip:' . $ip;
	$access->{referent_id} = $r->uri;
	$access->{referent_docid} = undef;
	$access->{referring_entity_id} = $r->headers_in->{ "Referer" };
	$access->{service_type_id} = '';
	$access->{requester_user_agent} = $r->headers_in->{ "User-Agent" };

	# External full-text request
	if( $r->filename and $r->filename =~ /redirect$/ )
	{
	}
	else
	{
		my $eprintid = uri_to_eprintid( $session, $uri );
		unless( defined $eprintid )
		{
			# Not interested in this URL.
			return DECLINED;
		}

		# Request for an abstract page or full-text
		$access->{referent_id} = $eprintid;

		my $docid = uri_to_docid( $session, $eprintid, $uri );

		if( defined $docid )
		{
			$access->{referent_docid} = $docid;
			$access->{service_type_id} = "?fulltext=yes";
		}
		else
		{
			$access->{service_type_id} = "?abstract=yes";
		}
	}

	if( !$access->{referring_entity_id} or $access->{referring_entity_id} !~ /^https?:/ )
	{
		$access->{referring_entity_id} = '';
	}


	# Check for an internal referrer
	my $ref_uri = URI->new($access->{referring_entity_id});
	my $eprintid = uri_to_eprintid( $session, $ref_uri );

	if( defined $eprintid ) 
	{
		$access->{referring_entity_id} = $eprintid;

		my $docid = uri_to_docid( $session, $eprintid, $ref_uri );

		# If referring entity and referent are the same, and both are fulltext,
		# then this is likely to be inline content (e.g. an image or
		# javascript). For now, we'll ignore these requests.
		if( $access->{referring_entity_id} eq $access->{referent_id} and
			defined( $docid ) )
		{
			return OK;
		}
	}

	$session->get_repository->get_dataset( "access" )->create_object( $session, $access );
	
	return OK;
}

=item $id = EPrints::Apache::LogHandler::uri_to_eprintid( $session, $uri )

Returns the eprint id that $uri corresponds to, or undef.

=cut

sub uri_to_eprintid
{
	my( $session, $uri ) = @_;

	# uri is something like /xxxxxx/?
	if( $uri->path =~ m#^(?:/archive)?/(\d+)/# )
	{
		return 'info:' . EPrints::OpenArchives::to_oai_identifier( $session->get_repository->get_conf( "oai" )->{v2}->{ "archive_id" }, $1 );
	}
	
	return undef;
}

=item $id = EPrints::Apache::LogHandler::uri_to_docid( $session, $eprintid, $uri )

Returns the docid that $uri corresponds to (given the $eprintid), or undef.

=cut

sub uri_to_docid
{
	my( $session, $eprintid, $uri ) = @_;

	if( $uri->path =~ m#^(?:/archive)?/(\d+)/(\d+)/# )
	{
		return '#' . 1 * $2;
	}

	return undef;
}


1;

__END__

=back

=head1 SEE ALSO

L<EPrints::DataObj::Access>