Newer
Older
Digital_Repository / OARiNZ / DIY / deb_package / eprints-3.0 / bin / generate_static
nstanger on 7 Jun 2007 12 KB - Added debian package source.
#!/usr/bin/perl -w -I/opt/eprints3/perl_lib 

######################################################################
#
#  This file is part of GNU EPrints 2.
#  
#  Copyright (c) 2000-2004 University of Southampton, UK. SO17 1BJ.
#  
#  EPrints 2 is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#  
#  EPrints 2 is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#  
#  You should have received a copy of the GNU General Public License
#  along with EPrints 2; if not, write to the Free Software
#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
######################################################################


=pod

=head1 NAME

B<generate_static> - Generate static pages of an EPrint repository using the template.

=head1 SYNOPSIS

B<generate_static> I<repository_id> [B<options>] 

=head1 DESCRIPTION

This script creates the static web site for EPrints (or, if you are running in multiple lanugages it generates the websites).

It processes every file in B<EPRINTS/archives/ARCHIVE/cfg/static/LANGID/>. For each language processes all the files in /LANGID/ and /generic/ into B<EPRINTS/archives/ARCHIVE/html/LANGID>. If that sounds confusing, don't worry, it's not that bad, just put your webpage outlines in static/en/ and your image files and the like in static/generic/ and run this script and see what happens.

Most files are copied into the target directory as is and directory structure is preserved. 

Files with a .xpage or .xhtml suffix are processed as they are copied.


=over 8

=item B<.xpage> 

This is an XML file with the following structure:

 <?xml version="1.0" standalone="no" ?>
 <!DOCTYPE page SYSTEM "entities.dtd" >
 <page>
   <title>This is the page title</title>
   <body>
     <p>Some XHTML body</p><p>Which is <b>neat</b></p>
   </body>
 </page>

The resulting file will be a .html file (foo.xpage becomes foo.html). It will take the template for this repository and insert the title and body from the appropriate places.  It will also cause the the special EPrints entities to be converted as it is copied. See the main documentation.

=item B<.xhtml> 

This is a normal XHTML file but with the following XML header:

 <?xml version="1.0" standalone="no" ?>
 <!DOCTYPE html SYSTEM "entities.dtd" >

This will cause the the HTML entities to be properly decoded. It will also be renamed to .html for example, foo.xhtml will become foo.html

=back


=head1 NOTE FOR THE NON-ENGLISH MAJORITY

If you are running EPrints in a language other than English then place the static files in a directory of your ISO language ID instead of B<en>, for example French is B<fr>. The generic directory is for language neutral stuff. Which is extra handy if you want to run the site in more than one language. Also the entities file should be renamed from -en to -whatever eg. B<entities-fr.xml>.

=head1 ARGUMENTS

=over 8

=item B<repository_id> 

The ID of the eprint repository to use.

=back

=head1 OPTIONS

=over 8

=item B<--prune>

Remove stray files from the website (eg. if something was removed from the static pages).

=item B<--help>

Print a brief help message and exit.

=item B<--man>

Print the full manual page and then exit.

=item B<--quiet>

Be vewwy vewwy quiet. This option will supress all output unless an error occurs.

=item B<--verbose>

Explain in detail what is going on.
May be repeated for greater effect.

=item B<--version>

Output version information and exit.

=back   

=head1 AUTHOR

This is part of this EPrints 3 system. EPrints 3 is developed by Christopher Gutteridge.

=head1 VERSION

EPrints Version: 3.0

=head1 CONTACT

For more information goto B<http://www.eprints.org/> which give information on mailing lists and the like.

Chris Gutteridge may be contacted at B<support@eprints.org>

Should you need a real world address for some reason, EPrints can be contacted in the real world at

 EPrints c/o Christopher Gutteridge
 Department of Electronics and Computer Science
 University of Southampton
 SO17 1BJ
 United Kingdom

=head1 COPYRIGHT

This file is part of GNU EPrints 2.

Copyright (c) 2000-2004 University of Southampton, UK. SO17 1BJ.

EPrints 2 is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

EPrints 2 is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with EPrints 2; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA


=cut

use EPrints;

use strict;
use Getopt::Long;
use Pod::Usage;

my $version = 0;
my $verbose = 0;
my $quiet = 0;
my $prune = 0;
my $help = 0;
my $man = 0;

GetOptions( 
	'help|?' => \$help,
	'man' => \$man,
	'version' => \$version,
	'prune' => \$prune,
	'verbose+' => \$verbose,
	'silent' => \$quiet,
	'quiet' => \$quiet
) || pod2usage( 2 );
EPrints::Utils::cmd_version( "generate_static" ) if $version;
pod2usage( 1 ) if $help;
pod2usage( -exitstatus => 0, -verbose => 2 ) if $man;
pod2usage( 2 ) if( scalar @ARGV != 1 ); 

our $noise = 1;
$noise = 0 if( $quiet );
$noise = 1+$verbose if( $verbose );

# Set STDOUT to auto flush (without needing a \n)
$|=1;

my $session = new EPrints::Session( 1 , $ARGV[0] , $noise );
exit( 1 ) unless( defined $session );

my @static_dirs = ();

my $theme = $session->get_repository->get_conf( "theme" );

$static_dirs[0] = $session->get_repository->get_conf( "lib_path" )."/static";
if( defined $theme )
{	
	$static_dirs[2] = $session->get_repository->get_conf( "lib_path" )."/themes/$theme/static";
}
$static_dirs[4] = $session->get_repository->get_conf( "config_path" )."/static";

my $strays = 0;
foreach my $langid ( @{$session->get_repository->get_conf( "languages" )} )
{
	$static_dirs[1] = $session->get_repository->get_conf( "lib_path" )."/lang/$langid/static";
	if( defined $theme )
	{	
		$static_dirs[3] = $session->get_repository->get_conf( "lib_path" )."/themes/$theme/lang/$langid/static";
	}
	$static_dirs[5] = $session->get_repository->get_conf( "config_path" )."/lang/$langid/static";

	my $map = {};
	foreach my $dir ( @static_dirs )
	{
		next if !defined $dir;
		next if !-d $dir;
		scan_static_dir( $dir, $map );
	}
	$session->change_lang( $langid );
	my $base_target_dir = $session->get_repository->get_conf( "htdocs_path" )."/$langid";
	my $wrote_files = {};
	foreach my $target ( keys %{$map} )
	{
		my $source = $map->{$target};
		$target = $base_target_dir.$target;
		$target =~ m/^(.*)\/([^\/]+)/;
		my( $target_dir, $target_file ) = ( $1, $2 );
	
		if( !-e $target_dir )
		{
			print "mkdir $target_dir\n" if( $noise >= 1);
			EPrints::Platform::mkdir( $target_dir );
		}

		$source =~ m/\.([^.]+)$/;

		my $suffix = $1;

		print "$source -> $target\n" if( $noise >= 2);
		if( $suffix eq "xhtml" ) { copy_xhtml( $session, $source, $target, $wrote_files ); }
		elsif( $suffix eq "xpage" ) { copy_xpage( $session, $source, $target, $wrote_files ); }
		else { copy_plain( $session, $source, $target, $wrote_files ); }
	}	

	# do the magic auto.js and auto.css
	my $js = "";
	my $css = "";
	my $fn;
	my $base_url = $session->get_repository->get_conf( "base_url" );
	foreach my $target ( sort keys %{$map} )
	{
		if( $target =~ m/(\/style\/auto\/.*\.css$)/ )
		{
			$css .= "\@import url($base_url$1);\n";

		}	
		if( $target =~ m/(\/javascript\/auto\/.*\.js$)/ )
		{
			$fn = $map->{$target};
			open( JS, $fn ) || EPrints::abort( "Can't read $fn: $!" );
			$js .= "\n\n\n/* From: $fn */\n\n";
			$js .= join( "", <JS> );
			close JS;	
		}	
	}

	$fn = "$base_target_dir/style/auto.css";
	open( CSS, ">$fn" ) || EPrints::abort( "Can't write $fn: $!" );
	$wrote_files->{$fn} = 1;
	print CSS $css;
	close CSS;

	$fn = "$base_target_dir/javascript/auto.js";
	open( JS, ">$fn" ) || EPrints::abort( "Can't write $fn: $!" );
	$wrote_files->{$fn} = 1;
	print JS $js;
	close JS;

	my $existing_files = {};
	scan_dir( $base_target_dir, $existing_files );

	foreach my $e_file ( keys %{$existing_files} )
	{
		next if defined( $wrote_files->{$e_file} );
		next if( $e_file =~ m/^$base_target_dir\/view\// );
		next if( $e_file =~ m/^$base_target_dir\/view_tmp/ );
		next if( $e_file =~ m/^$base_target_dir\/archive\// );
		if( $prune )
		{
			print "removing $e_file\n" if( $noise >= 1);
			unlink( $e_file );
		}
		else
		{
			print "Unrecognised file in website: $e_file\n" if( $noise >= 1 );
			$strays = 1;
		}
	}
}

if( $strays )
{
	print "To prune unrecognised files run with the --prune option.\n" if( $noise >= 1 );
}
$session->terminate();

exit;

##################################

sub scan_dir
{
	my( $dir, $map ) = @_;

	my $dh;
	my @dirs = ();
	opendir( $dh, $dir ) || EPrints::abort( "Failed to read dir: $dir" );
	while( my $file = readdir( $dh ) )
	{
		next if $file eq ".svn";
		next if $file eq "CVS";
		next if $file eq ".";
		next if $file eq "..";
		if( -d "$dir/$file" ) 
		{
			push @dirs, $file;
			next;
		}
		# file
		$map->{"$dir/$file"} = 1;
	}
	closedir( $dh );

	foreach my $subdir ( @dirs )
	{
		scan_dir( $dir."/".$subdir, $map );
	}
}

sub scan_static_dir
{
	my( $dir, $map ) = @_;

	scan_static_dir_aux( $dir, $map, "" );
}

sub scan_static_dir_aux
{
	my( $dir, $map, $prefix ) = @_;

	my $dh;
	my @dirs = ();
	opendir( $dh, $dir ) || EPrints::abort( "Failed to read dir: $dir" );
	while( my $file = readdir( $dh ) )
	{
		next if $file eq ".svn";
		next if $file eq "CVS";
		next if $file eq ".";
		next if $file eq "..";
		if( -d "$dir/$file" ) 
		{
			push @dirs, $file;
			next;
		}
		# file
		my $source = $dir."/".$file;
		my $target = $prefix."/".$file;
		$target =~ s/\.x(html|page)$/.html/;
		$map->{$target} = $source;
	}
	closedir( $dh );

	foreach my $subdir ( @dirs )
	{
		scan_static_dir_aux( $dir."/".$subdir, $map, $prefix."/".$subdir );
	}
}

sub copy_plain
{
	my( $session, $from, $to, $wrote_files ) = @_;

	if( !EPrints::Utils::copy( $from, $to ) )
	{
		EPrints::abort( "Can't copy $from to $to: $!" );
	}

	$wrote_files->{$to} = 1;
}

sub copy_xpage
{
	my( $session, $from, $to, $wrote_files ) = @_;

	my $doc = $session->get_repository->parse_xml( $from );

	if( !defined $doc )
	{
		$session->get_repository->log( "Could not load file: $from" );
		return;
	}

	my $html = $doc->documentElement;
	my $parts = {};
	foreach my $node ( $html->getChildNodes )
	{
		my $part = $node->nodeName;
		$part =~ s/^.*://;
		next unless( $part eq "body" || $part eq "title" );

		$parts->{$part} = $session->make_doc_fragment;
			
		foreach my $kid ( $node->getChildNodes )
		{
			$parts->{$part}->appendChild( 
				EPrints::XML::EPC::process( 
					$kid,
					in => $from,
					session => $session ) ); 
		}
	}
	foreach my $part ( qw/ title body / )
	{
		if( !$parts->{$part} )
		{
			$session->get_repository->log( "Error: no $part element in ".$from );
			EPrints::XML::dispose( $doc );
			return;
		}
	}

	$parts->{page} = delete $parts->{body};
	$to =~ s/.html$//;
	$session->write_static_page( $to, $parts, "static", $wrote_files );

	EPrints::XML::dispose( $doc );
}

sub copy_xhtml
{
	my( $session, $from, $to, $wrote_files ) = @_;

	my $doc = $session->get_repository->parse_xml( $from );

	if( !defined $doc )
	{
		$session->get_repository->log( "Could not load file: $from" );
		return;
	}

	my( $elements ) = EPrints::XML::find_elements( $doc, "html" );
	if( !defined $elements->{html} )
	{
		$session->get_repository->log( "Error: no html element in ".$from );
		EPrints::XML::dispose( $doc );
		return;
	}
	# why clone?
	#$session->set_page( $session->clone_for_me( $elements->{html}, 1 ) );
	$session->set_page( 
		EPrints::XML::EPC::process( 
			$elements->{html}, 
			in => $from,
			session => $session ) ); 

	$session->page_to_file( $to, $wrote_files );
}