#!/usr/bin/perl -w -I/opt/eprints3/perl_lib ###################################################################### # # This file is part of GNU EPrints 2. # # Copyright (c) 2000-2004 University of Southampton, UK. SO17 1BJ. # # EPrints 2 is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # EPrints 2 is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with EPrints 2; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # ###################################################################### =pod =head1 NAME B<generate_views> - Generate static browse pages for an EPrint repository =head1 SYNOPSIS B<generate_views> I<repository_id> [B<options>] =head1 DESCRIPTION This script renders static "browse views" for an EPrint repository. What this does is generate browse pages for each field configured as browsable in B<ArchiveConfig.pm>. It creates a static web page for each value of that field, and index pages to navigate to them. For example, if we make "year" browseable then this script will generate one page for each unique value of the year field. So a user can then view the 1995 page and see links to all the 1995 eprints. Advantages of this are that this puts less load on the database than user searches. Assuming you pick two or three sensible fields to make browsable. This script should be run every hour or so, but that should once a day or even once a week on large repositories, as the more eprints the longer it will take to run. The rough length of time to run this is of the order of O( B<languages> * B<eprints> * B<browsable fields> ). You can automate running this with the B<cron> system. =head1 ARGUMENTS =over 8 =item B<repository_id> The ID of the eprint repository to use. =back =head1 OPTIONS =over 8 =item B<--help> Print a brief help message and exit. =item B<--man> Print the full manual page and then exit. =item B<--quiet> Be vewwy vewwy quiet. This option will supress all output unless an error occurs. =item B<--verbose> Explain in detail what is going on. May be repeated for greater effect. =item B<--version> Output version information and exit. =back =head1 AUTHOR This is part of this EPrints 3 system. EPrints 3 is developed by Christopher Gutteridge. =head1 VERSION EPrints Version: 3.0 =head1 CONTACT For more information goto B<http://www.eprints.org/> which give information on mailing lists and the like. Chris Gutteridge may be contacted at B<support@eprints.org> Should you need a real world address for some reason, EPrints can be contacted in the real world at EPrints c/o Christopher Gutteridge Department of Electronics and Computer Science University of Southampton SO17 1BJ United Kingdom =head1 COPYRIGHT This file is part of GNU EPrints 2. Copyright (c) 2000-2004 University of Southampton, UK. SO17 1BJ. EPrints 2 is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. EPrints 2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with EPrints 2; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA =cut use EPrints; use File::Copy; use strict; use Getopt::Long; use Pod::Usage; my $version = 0; my $verbose = 0; my $quiet = 0; my $help = 0; my $man = 0; GetOptions( 'help|?' => \$help, 'man' => \$man, 'version' => \$version, 'verbose+' => \$verbose, 'silent' => \$quiet, 'quiet' => \$quiet ) || pod2usage( 2 ); EPrints::Utils::cmd_version( "generate_views" ) if $version; pod2usage( 1 ) if $help; pod2usage( -exitstatus => 0, -verbose => 2 ) if $man; pod2usage( 2 ) if( scalar @ARGV != 1 ); my $noise = 1; $noise = 0 if( $quiet ); $noise = 1+$verbose if( $verbose ); my $PATH = "view"; # Set STDOUT to auto flush (without needing a \n) $|=1; my $session = new EPrints::Session( 1 , $ARGV[0] , $noise ); exit( 1 ) unless( defined $session ); my $views = $session->get_repository->get_conf( "browse_views" ); my $ds = $session->get_repository->get_dataset( "archive" ); my $langid; foreach $langid ( @{$session->get_repository->get_conf( "languages" )} ) { $session->change_lang( $langid ); my $dir = $session->get_repository->get_conf( "htdocs_path" )."/".$langid."/".$PATH; my $tmpdir = $dir."_tmp.$$"; my $doomdir = $dir."_toerase.$$"; if( -e $tmpdir ) { EPrints::Utils::rmtree( $tmpdir ); } EPrints::Platform::mkdir( $tmpdir ); foreach my $view ( @{$views} ) { #print "Making browse pages for: $langid/".$view->{id}."\n" if( $noise > 0 ); my @f = split( ',', $view->{fields} ); my $fieldids = \@f; my $filters = []; make_menu( $session, $tmpdir.'/'.$view->{id}, $ds, $view, $fieldids, $filters, $noise ); } # Make Browse Page which lists various views print "Making main browse index for: $langid ... " if( $noise > 1 ); my( $ul, $li, $page, $a, $file, $title ); $page = $session->make_doc_fragment(); $page->appendChild( $session->html_phrase( "bin/generate_views:browseintro" ) ); $ul = $session->make_element( "ul" ); foreach( @{$views} ) { next if( $_->{nolink} ); $li = $session->make_element( "li" ); $a = $session->render_link( $_->{id}."/" ); $a->appendChild( $session->make_text( $session->get_view_name( $ds, $_->{id} ) ) ); $li->appendChild( $a ); $ul->appendChild( $li ); } $page->appendChild( $ul ); $title = $session->html_phrase( "bin/generate_views:browsetitle" ); $file = $tmpdir."/index.html"; $session->write_static_page( $tmpdir."/index", {title=>$title, page=>$page}, "browsemain" ); print "done\n" if( $noise > 1 ); print "Moving $langid views to be online ... " if( $noise > 1 ); move( $dir, $doomdir ); move( $tmpdir, $dir ); EPrints::Utils::rmtree( $doomdir ); print "done\n" if( $noise > 1 ); } $session->terminate(); exit; sub get_fields_from_config { my( $ds, $ids ) = @_; $ids =~ s/^-//; my @fields; foreach my $fieldid ( split( "/", $ids )) { my $field = EPrints::Utils::field_from_config_string( $ds, $fieldid ); unless( $field->is_browsable() ) { #cjg should abort here. Or maybe just log. print STDERR "Cannot generate browse pages for field \"".$_."\"\n"; print STDERR "- Type \"".$field->get_type()."\" cannot be browsed.\n"; next; } #print STDERR "cjg-($fieldid)\n"; push @fields, $field; } if( scalar @fields == 0 ) { return; } return @fields } sub get_pfilter_count { my( $session, $ds, $pfilters, $mode ) = @_; $mode = 'EX' unless( defined $mode ); my $count = -1; if( scalar @{$pfilters} ) { my $searchexp = new EPrints::Search( satisfy_all=>1, session=>$session, dataset=>$ds ); # filter nosearch and hide metadata $searchexp->add_field( $ds->get_field('metadata_visibility'), 'show', 'EQ' ); foreach my $filter ( @{$pfilters} ) { $searchexp->add_field( $filter->[0], $filter->[1], $mode ); } $searchexp->perform_search(); $count = $searchexp->count(); $searchexp->dispose; } return $count; } sub get_filtered_values { my( $session, $ids, $pfilters, $view, $fields ) = @_; my $reverse_list = 0; if( $ids =~ s/^-// ) { $reverse_list = 1; } my @values = (); if( scalar @{$fields} == 1 ) { my $vref = $fields->[0]->get_values( $session, $ds ); @values = @{$vref}; } elsif( $fields->[0]->is_type( "name" ) ) { my %v=(); foreach my $field ( @{$fields} ) { my $vref = $field->get_values( $session, $ds ); foreach( @{$vref} ) { if( !defined $_ ) { $_=""; } $_->{given} = '' unless defined( $_->{given} ); $_->{family} = '' unless defined( $_->{family} ); $v{$_->{given}.':'.$_->{family}}=$_; } } @values = values %v; } else { my %v=(); foreach my $field ( @{$fields} ) { my $vref = $field->get_values( $session, $ds ); foreach( @{$vref} ) { if( !defined $_ ) { $_=""; } $v{$_}=1; } } @values = keys %v; } if( !$view->{allow_null} ) { my @ov = @values; @values = (); foreach( @ov ) { push @values,$_ unless $_ eq ""; } } my $ov = {}; my $o_v = $fields->[0]->sort_values( $session, \@values ); if( $reverse_list ) { @values = reverse @{$o_v}; } else { @values = @{$o_v}; } return( @values ); } sub make_menu { my( $session, $dir, $ds, $view, $fieldids, $pfilters, $noise ) = @_; EPrints::Platform::mkdir( $dir ); # head & tail my( $levelids, @sublevelids ) = @{$fieldids}; my $count = get_pfilter_count( $session, $ds, $pfilters ); if( $count == 0 ) { unless( EPrints::Utils::is_set( $view->{nocut} ) && $view->{nocut} ) { #print STDERR "cjg: Nothing here. Cut.\n"; return 0; } } my @fields = get_fields_from_config( $ds, $levelids ); my @values = get_filtered_values( $session, $levelids, $pfilters, $view, \@fields ); my $has_submenu = ( scalar @sublevelids > 0 ); # if !$has_submenu && graph # render: graph page & return ################################# # # Render the pages or recurse the next level of menus # ################################# my $show_sizes = undef; if( $fields[0]->is_type( "subject" ) ) { $show_sizes = {}; foreach my $value ( @values ) { my( $filters ) = [ @{$pfilters}, [ \@fields, $value ] ]; $show_sizes->{$value} = get_pfilter_count( $session, $ds, $filters,'EQ' ); } } my %size = (); foreach my $value ( @values ) { my $fileid = mk_file_id( $value, $fields[0]->get_type ); my( $filters ) = [ @{$pfilters}, [ \@fields, $value ] ]; if( $has_submenu ) { $size{$value}=make_menu( $session, $dir.'/'.$fileid, $ds, $view, [@sublevelids], $filters, $noise ); next; } my $page = $session->make_element( "div", class=>"ep_view_page ep_view_page_view_".$view->{id} ); my @sh_ids = (); if( defined $view->{subheadings} ) { @sh_ids = split( ',', $view->{subheadings} ); } my( $list, $n ) = render_list( $session, $view, $filters, \@sh_ids, $view->{heading_level} ); $size{$value} = $n; unless( $view->{nocount} ) { my $phraseid = "bin/generate_views:blurb"; if( $fields[0]->is_type( "subject" ) ) { $phraseid = "bin/generate_views:subject_blurb"; } $page->appendChild( $session->html_phrase( $phraseid, n=>$session->make_text( $size{$value} ) ) ); } $page->appendChild( $list ); unless( $view->{notimestamp} ) { $page->appendChild( $session->html_phrase( "bin/generate_views:timestamp", time=>$session->make_text( EPrints::Time::human_time() ) ) ); } # render page if( $view->{include} ) { my $file = $dir."/".$fileid.".include"; print "Writing: $file\n" if( $noise > 1 ); open( FILE, ">$file" ); print FILE EPrints::XML::to_string( $page, undef, 1 ); close FILE; } if( $view->{nohtml} ) { EPrints::XML::dispose( $page ); next; } my $pagedesc = $session->make_doc_fragment; foreach my $pfilter ( @{$pfilters} ) { $pagedesc->appendChild( $pfilter->[0]->[0]->get_value_label( $session, $pfilter->[1] ) ); $pagedesc->appendChild( $session->make_text( " / " ) ); } $pagedesc->appendChild( $fields[0]->get_value_label( $session, $value ) ); my $title = $session->html_phrase( "bin/generate_views:title", viewname=>$session->make_text( $session->get_view_name( $ds, $view->{id} ) ), value=>$pagedesc ); my $htmlpage = $session->make_doc_fragment; if( $fields[0]->is_type( "subject" ) ) { my $subject = EPrints::DataObj::Subject->new( $session, $value ); my @ids= @{$subject->get_value( "ancestors" )}; foreach( $subject->get_children ) { push @ids,$_->get_value( "subjectid" ); } my $subj = $session->make_element( "div", class=>"ep_view_subjects" ); foreach my $field ( @fields ) { $subj->appendChild( $session->render_subjects( \@ids, $field->get_property( "top" ), $value, 2, $show_sizes ) ); } $htmlpage->appendChild( $subj ); } $htmlpage->appendChild( $page ); $session->write_static_page( $dir."/".$fileid, {title=>$title, page=>$htmlpage}, "browseview" ); } ################################# # # Render the menu page # ################################# return if( $view->{noindex} ); my $page = $session->make_element( "div", class=>"ep_view_menu" ); $page->appendChild( $session->html_phrase( "bin/generate_views:intro" ) ); $show_sizes = \%size unless defined( $show_sizes ); if( $fields[0]->is_type( "subject" ) ) { $page->appendChild( render_subj_menu( $session, $view, $show_sizes, \@values, \@fields, $has_submenu ) ); } else { $page->appendChild( render_menu( $session, $view, $show_sizes, \@values, \@fields, $has_submenu ) ); } my $title = get_index_title( $session, $pfilters, $ds, $view ); if( $view->{include} ) { my $file = $dir."/index.include"; print "Writing: $file\n" if( $noise > 1 ); open( FILE, ">$file" ); print FILE EPrints::XML::to_string( $page, undef, 1 ); close FILE; } $session->write_static_page( $dir."/index", {title=>$title, page=>$page}, "browseindex" ); return( $count ); } sub render_subj_menu { my( $session, $view, $sizes, $values, $fields, $has_submenu ) = @_; my $subjects_to_show = $values; if( $view->{hideempty} ) { my %show = (); foreach my $value ( @{$values} ) { next unless( $sizes->{$value} > 0 ); my $subject = EPrints::DataObj::Subject->new( $session, $value ); my @ids= @{$subject->get_value( "ancestors" )}; foreach my $id ( @ids ) { $show{$id} = 1; } } $subjects_to_show = []; foreach my $value ( @{$values} ) { next unless( $show{$value} ); push @{$subjects_to_show}, $value; } } my $f = $session->make_doc_fragment; foreach my $field ( @{$fields} ) { $f->appendChild( $session->render_subjects( $subjects_to_show, $field->get_property( "top" ), undef, ($has_submenu?3:2), $sizes ) ); } return $f; } sub render_menu { my( $session, $view, $sizes, $values, $fields, $has_submenu ) = @_; my $ul = $session->make_element( "ul" ); foreach my $value ( @{$values} ) { next if( $view->{hideempty} && $sizes->{$value} == 0 ); my $fileid = &mk_file_id( $value, $fields->[0]->get_type ); my $li = $session->make_element( "li" ); my $link = $fileid; if( $has_submenu ) { $link .= '/'; } else { $link .= '.html'; } my $a = $session->render_link( $link ); $a->appendChild( $fields->[0]->get_value_label( $session, $value ) ); $li->appendChild( $a ); $li->appendChild( $session->make_text( " (".$sizes->{$value}.")" ) ); $ul->appendChild( $li ); } return $ul; } sub get_index_title { my( $session, $pfilters, $ds, $view ) = @_; unless( scalar @{$pfilters} ) { # top level index return $session->html_phrase( "bin/generate_views:indextitle", viewname=>$session->make_text( $session->get_view_name( $ds, $view->{id} ) ) ); } # sub level index my $pagedesc = $session->make_doc_fragment; my $first = 1; foreach my $pfilter ( @{$pfilters} ) { if( !$first ) { $pagedesc->appendChild( $session->make_text( " / " ) ); } $pagedesc->appendChild( $pfilter->[0]->[0]->get_value_label( $session, $pfilter->[1] ) ); $first = 0; } return $session->html_phrase( "bin/generate_views:title", viewname=>$session->make_text( $session->get_view_name( $ds, $view->{id} ) ), value=>$pagedesc ); } sub render_list { my( $session, $view, $pfilters, $subheadings, $level ) = @_; if( !defined $level ) { $level = 2; } if( defined $subheadings && scalar @{$subheadings} > 0 ) { my( $heading_ids, @subheading_ids ) = @{$subheadings}; my $part = $session->make_doc_fragment; my @fields = get_fields_from_config( $ds, $heading_ids ); my @values = get_filtered_values( $session, $heading_ids, $pfilters, $view, \@fields ); my $count = 0; foreach my $value ( @values ) { my( $filters ) = [ @{$pfilters}, [ \@fields, $value ] ]; my( $list, $sh_count ) = render_list( $session, $view, $filters, \@subheading_ids, $level+1 ); if( $sh_count == 0 ) { EPrints::XML::dispose( $list ); next; } $count += $sh_count; if( $level > 6 ) { $level = 6; } # can't go past h6 my $heading = $session->make_element( "h".$level ); $heading->appendChild( $fields[0]->get_value_label( $session, $value ) ); $part->appendChild( $heading ); $part->appendChild( $list ); } return( $part, $count ); } my $links; if( !defined $view->{layout} ) { $links = $session->make_doc_fragment(); } elsif( $view->{layout} eq "orderedlist" ) { $links = $session->make_element( "ol" ); } elsif( $view->{layout} eq "unorderedlist" ) { $links = $session->make_element( "ul" ); } else { $links = $session->make_doc_fragment(); } my $fn = sub { my( $session, $dataset, $item, $view ) = @_; my $cite = $item->render_citation_link( $view->{citation} ); if( $view->{layout} eq "paragraph" ) { my $p = $session->make_element( "p" ); $p->appendChild( $cite ); $cite = $p; } elsif( $view->{layout} eq "orderedlist" || $view->{layout} eq "unorderedlist" ) { my $li = $session->make_element( "li" ); $li->appendChild( $cite ); $cite = $li; } #otherwise layout is "none" $links->appendChild( $session->make_indent( 4 ) ); $links->appendChild( $cite ); }; my $searchexp = new EPrints::Search( custom_order=>$view->{order}, satisfy_all=>1, session=>$session, dataset=>$ds ); $searchexp->add_field( $ds->get_field('metadata_visibility'), 'show', 'EQ' ); my $n = 0; foreach my $filter ( @{$pfilters} ) { $searchexp->add_field( $filter->[0], $filter->[1], "EX", undef, "filter".($n++), 1 ); } $searchexp->perform_search(); my $count = $searchexp->count(); my %mapinfo = %{$view}; $mapinfo{links} = $links; if( !defined $mapinfo{layout} ) { $mapinfo{layout} = "paragraph"; } $searchexp->map( $fn, \%mapinfo ); $searchexp->dispose(); return( $links, $count ); } sub mk_file_id { my( $value, $type ) = @_; my $fileid = $value; if( $type eq "name" ) { $fileid = EPrints::Utils::make_name_string( $value ); } return EPrints::Utils::escape_filename( $fileid ); }