#!/usr/bin/perl ################################################################################ # # File: $Id$ # # When the derived XML source for a document we include the contents of # all content files identified in an <@INC[]@> include tag. However, we # can't just read all the include tags willy-nilly, because some of them # may be commented out. Commented out include tags should be ignored, # because they may contain XML comments themselves, and nested XML # comments are a no-no. This script solves the problem by simply stripping # out all XML comments from the original source XML file (note that this # does not affect comments in the included content files). This should # have the side benefit of making the overall build process a little faster, # because the XSLT processor doesn't have to process the comments. # # Arguments: # # $1 The path to the paper root. This is the base path to start from # when looking for content files to include. # # $2 The name of the file to process. # ################################################################################ use strict; my $skip = 0; my $paper_root = shift; my $infilename = shift; open INFILE,"<$infilename"; while (<INFILE>) { # First the easy case: remove all single-line comments. Note the non- # greedy modifier (?) on the regular expression to stop it munching # text that occurs between two comments on the same line (e.g., # <!-- comment --> useful text <!-- comment -->). s/<!--(.*?)-->//; # Multi-line comments: switch skip mode on and off as we encounter # open (<!--) and close (-->) XML comment markers. We have to check # for content occurring on the same line as the start or end comment # markers, however (e.g., "</section><!-- ..." or "--><blah>"). Just # switching the mode would mean that such content is omitted from the # output --- not good! if (/(.*)<!--/) { print $1; $skip = 1; } if (/-->(.*)/) { print $1; $skip = 0; } # Ignore everything while in skip mode. We also check for a closing # comment marker, otherwise it gets printed too --- oops! next if ($skip || /-->/); # Otherwise, print stuff. if (/^(.*)<\@INC\[([^]]+)\]@>(.*)$/) { print "$1\n" . `cat $paper_root/$2` . "\n$3\n"; } else { print; } } close INFILE;