Newer
Older
Handbook / make-includes / include_content_files.pl
nstanger on 13 Mar 2005 2 KB - Added info about arguments.
#!/usr/bin/perl
################################################################################
#
# File: $Id$
#
# When the derived XML source for a document we include the contents of
# all content files identified in an <@INC[]@> include tag. However, we
# can't just read all the include tags willy-nilly, because some of them
# may be commented out. Commented out include tags should be ignored,
# because they may contain XML comments themselves, and nested XML
# comments are a no-no. This script solves the problem by simply stripping
# out all XML comments from the original source XML file (note that this
# does not affect comments in the included content files). This should
# have the side benefit of making the overall build process a little faster,
# because the XSLT processor doesn't have to process the comments.
#
# Arguments:
#
#	$1	The path to the paper root. This is the base path to start from
#		when looking for content files to include.
#
#	$2	The name of the file to process.
#
################################################################################

use strict;

my $skip = 0;
my $paper_root = shift;
my $infilename = shift;

open INFILE,"<$infilename";

while (<INFILE>)
{
	# First the easy case: remove all single-line comments. Note the non-
	# greedy modifier (?) on the regular expression to stop it munching
	# text that occurs between two comments on the same line (e.g.,
	# <!-- comment --> useful text <!-- comment -->).
	s/<!--(.*?)-->//;
	
	# Multi-line comments: switch skip mode on and off as we encounter
	# open (<!--) and close (-->) XML comment markers. We have to check
	# for content occurring on the same line as the start or end comment
	# markers, however (e.g., "</section><!-- ..." or "--><blah>"). Just
	# switching the mode would mean that such content is omitted from the
	# output --- not good!
	if (/(.*)<!--/)
	{
		print $1;
		$skip = 1;
	}
	
	if (/-->(.*)/)
	{
		print $1;
		$skip = 0;
	}
	
	# Ignore everything while in skip mode. We also check for a closing
	# comment marker, otherwise it gets printed too --- oops!
	next if ($skip || /-->/);
	
	# Otherwise, print stuff.
	if (/^(.*)<\@INC\[([^]]+)\]@>(.*)$/)
	{
		print "$1\n" . `cat $paper_root/$2` . "\n$3\n";
	}
	else
	{
		print;
	}
}

close INFILE;