Newer
Older
Digital_Repository / OARiNZ / DIY / deb_package / eprints-3.0 / perl_lib / EPrints / Plugin / Convert / PlainText.pm
package EPrints::Plugin::Convert::PlainText;

=pod

=head1 NAME

EPrints::Plugin::Convert::PlainText - Convert documents to plain-text

=head1 DESCRIPTION

Uses the file extension to determine file type.

=cut

use strict;
use warnings;

use Carp;
use English;
use Unicode::String;

use EPrints::Plugin::Convert;
our @ISA = qw/ EPrints::Plugin::Convert /;

# xml = ?
our %APPS = qw(
pdf		pdftotext
doc		antiword
htm		elinks
html		elinks
xml		elinks
ps		ps2ascii
txt		_special
);

sub new
{
	my( $class, %opts ) = @_;

	my $self = $class->SUPER::new( %opts );

	$self->{name} = "Plain text conversion";
	$self->{visible} = "all";

	return $self;
}

sub can_convert
{
	my ($plugin, $doc) = @_;

	# Get the main file name
	my $fn = $doc->get_main();

	my @type = ('text/plain' => {
		plugin => $plugin,
		encoding => 'utf-8',
		phraseid => 'plaintext',
	});

	if( $fn =~ /\.txt$/ )
	{
		return @type;
	}

	keys(%APPS);
	while( my( $ext, $app ) = each %APPS )
	{
		if( $fn =~ /\.$ext$/ and defined($plugin->get_repository->get_conf( "executables", $app )) ) {
			return @type;
		}
	}
	
	return ();
}

sub export
{
	my ( $plugin, $dir, $doc, $type ) = @_;

	# What to call the temporary file
	my $main = $doc->get_main;
	
	my( $bin, $ext, $app );
	
	keys(%APPS);
	# Find the app to use
	while( ( $ext, $app ) = each %APPS )
	{
		if( $main =~ /\.$ext$/ and defined($plugin->get_repository->get_conf( "executables", $app )) ) {
			$bin = $plugin->get_repository->get_conf( "executables", $app );
			last if defined($bin);
		}
	}
	return () unless defined($bin);
	
	my $invo = $plugin->get_repository->get_conf( "invocation", $app );
	$invo ||= "\$($app) \$(SOURCE) \$(TARGET)";
	
	my %files = $doc->files;
	my @txt_files;
	foreach my $fn ( keys %files )
	{
		my $tgt = $fn;
		next unless $tgt =~ s/\.$ext$/\.txt/;
		my $infile = EPrints::Utils::join_path( $doc->local_path, $fn );
		my $outfile = EPrints::Utils::join_path( $dir, $tgt );
		
		if( $ext eq 'txt' )
		{
			# PerlIO
			if( $PERL_VERSION gt v5.8.0 )
			{
				open( my $fh, "<:encoding(iso-8859-1)", $infile );
				open( my $fo, ">:utf8", $outfile );
				while(<$fh>) { print $fo $_ }
				close( $fh ); close( $fo );
			}
			# Unicode::String
			else
			{
				open( my $fh, "<", $infile );
				open( my $fo, ">", $outfile );
				while(<$fh>) { print $fo Unicode::String::latin1($_)->utf8; }
				close( $fh ); close( $fo );
			}
		}
		else
		{
			my $cmd = EPrints::Utils::prepare_cmd( $invo,
				$app => $bin,
				SOURCE_DIR => $doc->local_path,
				SOURCE => $infile,
				TARGET_DIR => $dir,
				TARGET => $outfile,
			);
			system( $cmd );
		}
		EPrints::Utils::chown_for_eprints( $outfile );
		
		if( -s EPrints::Utils::join_path( $dir, $tgt ) > 0 ) {
			if( $fn eq $doc->get_main ) {
				unshift @txt_files, $tgt;
			} else {
				push @txt_files, $tgt;
			}
		} elsif( $fn eq $doc->get_main ) {
			return ();
		}
	}

	return @txt_files;
}

1;