Digital_Repository / OARiNZ / DIY / deb_package / eprints-3.0 / perl_lib / EPrints / Plugin / Convert /
nstanger on 7 Jun 2007 2 KB - Added debian package source.
package EPrints::Plugin::Convert::PlainText;


=head1 NAME

EPrints::Plugin::Convert::PlainText - Convert documents to plain-text


Uses the file extension to determine file type.


use strict;
use warnings;

use Carp;
use English;
use Unicode::String;

use EPrints::Plugin::Convert;
our @ISA = qw/ EPrints::Plugin::Convert /;

# xml = ?
our %APPS = qw(
pdf		pdftotext
doc		antiword
htm		elinks
html		elinks
xml		elinks
ps		ps2ascii
txt		_special

sub new
	my( $class, %opts ) = @_;

	my $self = $class->SUPER::new( %opts );

	$self->{name} = "Plain text conversion";
	$self->{visible} = "all";

	return $self;

sub can_convert
	my ($plugin, $doc) = @_;

	# Get the main file name
	my $fn = $doc->get_main();

	my @type = ('text/plain' => {
		plugin => $plugin,
		encoding => 'utf-8',
		phraseid => 'plaintext',

	if( $fn =~ /\.txt$/ )
		return @type;

	while( my( $ext, $app ) = each %APPS )
		if( $fn =~ /\.$ext$/ and defined($plugin->get_repository->get_conf( "executables", $app )) ) {
			return @type;
	return ();

sub export
	my ( $plugin, $dir, $doc, $type ) = @_;

	# What to call the temporary file
	my $main = $doc->get_main;
	my( $bin, $ext, $app );
	# Find the app to use
	while( ( $ext, $app ) = each %APPS )
		if( $main =~ /\.$ext$/ and defined($plugin->get_repository->get_conf( "executables", $app )) ) {
			$bin = $plugin->get_repository->get_conf( "executables", $app );
			last if defined($bin);
	return () unless defined($bin);
	my $invo = $plugin->get_repository->get_conf( "invocation", $app );
	$invo ||= "\$($app) \$(SOURCE) \$(TARGET)";
	my %files = $doc->files;
	my @txt_files;
	foreach my $fn ( keys %files )
		my $tgt = $fn;
		next unless $tgt =~ s/\.$ext$/\.txt/;
		my $infile = EPrints::Utils::join_path( $doc->local_path, $fn );
		my $outfile = EPrints::Utils::join_path( $dir, $tgt );
		if( $ext eq 'txt' )
			# PerlIO
			if( $PERL_VERSION gt v5.8.0 )
				open( my $fh, "<:encoding(iso-8859-1)", $infile );
				open( my $fo, ">:utf8", $outfile );
				while(<$fh>) { print $fo $_ }
				close( $fh ); close( $fo );
			# Unicode::String
				open( my $fh, "<", $infile );
				open( my $fo, ">", $outfile );
				while(<$fh>) { print $fo Unicode::String::latin1($_)->utf8; }
				close( $fh ); close( $fo );
			my $cmd = EPrints::Utils::prepare_cmd( $invo,
				$app => $bin,
				SOURCE_DIR => $doc->local_path,
				SOURCE => $infile,
				TARGET_DIR => $dir,
				TARGET => $outfile,
			system( $cmd );
		EPrints::Utils::chown_for_eprints( $outfile );
		if( -s EPrints::Utils::join_path( $dir, $tgt ) > 0 ) {
			if( $fn eq $doc->get_main ) {
				unshift @txt_files, $tgt;
			} else {
				push @txt_files, $tgt;
		} elsif( $fn eq $doc->get_main ) {
			return ();

	return @txt_files;
