| 1 |
#!/usr/bin/perl -w
|
| 2 |
|
| 3 |
# Script parses the XML file for the appendix on preseeding and extracts
|
| 4 |
# example snippts to form the raw preseed example file. Section titles are
|
| 5 |
# added as headers.
|
| 6 |
# The script will include all text between <informalexample> tags that have
|
| 7 |
# the attribute 'role="example"' set, except if a 'condition' attribute is
|
| 8 |
# in force that does not match the specified release or if an 'arch' attribute
|
| 9 |
# is in force that does not match the specified architecture.
|
| 10 |
|
| 11 |
# Define module to use
|
| 12 |
use HTML::Parser();
|
| 13 |
use Getopt::Std;
|
| 14 |
|
| 15 |
local %tagstatus;
|
| 16 |
local %example;
|
| 17 |
local %ignore;
|
| 18 |
local $prevtag = '';
|
| 19 |
local $titletag;
|
| 20 |
local $settitle = 0;
|
| 21 |
|
| 22 |
$example{'print'} = 0;
|
| 23 |
$example{'in_sect'} = 0;
|
| 24 |
$example{'first'} = 1;
|
| 25 |
$example{'new'} = 0;
|
| 26 |
|
| 27 |
getopts('hda:r:') || die "Unknown command line arguments! Try $0 -h\n";
|
| 28 |
use vars qw($opt_h $opt_d $opt_a $opt_r);
|
| 29 |
|
| 30 |
if ($opt_h) {
|
| 31 |
print <<END;
|
| 32 |
preseed.pl: parses preseed appendix xml file to extract preseed example file
|
| 33 |
|
| 34 |
Usage: $0 [-hdac] <xml-file>
|
| 35 |
|
| 36 |
Options:
|
| 37 |
-h display this help information
|
| 38 |
-d debug mode
|
| 39 |
-a <arch> architecture for which to generate the example
|
| 40 |
(default: i386)
|
| 41 |
-r <release> release for which to generate the example (required)
|
| 42 |
END
|
| 43 |
exit 0;
|
| 44 |
}
|
| 45 |
|
| 46 |
die "Must specify release for which to generate example.\n" if ! $opt_r;
|
| 47 |
|
| 48 |
my $xmlfile = shift;
|
| 49 |
die "Must specify XML file to parse!\n" if ! $xmlfile;
|
| 50 |
die "Specified XML file \"$xmlfile\" not found.\n" if ! -f $xmlfile;
|
| 51 |
|
| 52 |
my $arch = $opt_a ? "$opt_a" : "i386";
|
| 53 |
my $release = $opt_r;
|
| 54 |
|
| 55 |
|
| 56 |
# Create instance
|
| 57 |
$p = HTML::Parser->new(
|
| 58 |
start_h => [\&start_rtn, 'tagname, text, attr'],
|
| 59 |
text_h => [\&text_rtn, 'text'],
|
| 60 |
end_h => [\&end_rtn, 'tagname']);
|
| 61 |
|
| 62 |
# Start parsing the specified file
|
| 63 |
$p->parse_file($xmlfile);
|
| 64 |
|
| 65 |
# Replace entities in examples
|
| 66 |
# FIXME: should maybe be extracted from entity definition
|
| 67 |
sub replace_entities {
|
| 68 |
my $text = shift;
|
| 69 |
|
| 70 |
$text =~ s/&archive-mirror;/http.us.debian.org/g;
|
| 71 |
$text =~ s/&releasename;/$release/g;
|
| 72 |
$text =~ s/>/>/g;
|
| 73 |
$text =~ s/</</g;
|
| 74 |
|
| 75 |
|
| 76 |
# Any unrecognized entities?
|
| 77 |
if ( $text =~ /(&[^ ]+;)/ ) {
|
| 78 |
die "Error: unrecognized entity '$1'\n"
|
| 79 |
}
|
| 80 |
|
| 81 |
return $text;
|
| 82 |
}
|
| 83 |
|
| 84 |
# Execute when start tag is encountered
|
| 85 |
sub start_rtn {
|
| 86 |
my ($tagname, $text, $attr) = @_;
|
| 87 |
print STDERR "\nStart: $tagname\n" if $opt_d;
|
| 88 |
|
| 89 |
if ( $tagname =~ /appendix|sect1|sect2|sect3|para|informalexample|phrase/ ) {
|
| 90 |
$tagstatus{$tagname}{'count'} += 1;
|
| 91 |
print STDERR "$tagname $tagstatus{$tagname}{'count'}\n" if $opt_d;
|
| 92 |
|
| 93 |
if ( ! exists $ignore{'tag'} ) {
|
| 94 |
# FIXME: this ignores that 'contition' is used for many
|
| 95 |
# other things than the release; should be OK in practice
|
| 96 |
# for the preseed appendix though.
|
| 97 |
if ( exists $attr->{condition} ) {
|
| 98 |
print STDERR "Condition: $attr->{condition}\n" if $opt_d;
|
| 99 |
if ( $attr->{condition} ne $release ) {
|
| 100 |
$ignore{'tag'} = $tagname;
|
| 101 |
$ignore{'depth'} = $tagstatus{$tagname}{'count'};
|
| 102 |
print STDERR "Start ignore because of condition" if $opt_d;
|
| 103 |
}
|
| 104 |
}
|
| 105 |
if ( exists $attr->{arch} ) {
|
| 106 |
print STDERR "Architecture: $attr->{arch}\n" if $opt_d;
|
| 107 |
if ( $attr->{arch} ne $arch ) {
|
| 108 |
$ignore{'tag'} = $tagname;
|
| 109 |
$ignore{'depth'} = $tagstatus{$tagname}{'count'};
|
| 110 |
print STDERR "Start ignore because of architecture" if $opt_d;
|
| 111 |
}
|
| 112 |
}
|
| 113 |
}
|
| 114 |
}
|
| 115 |
|
| 116 |
# Assumes that <title> is the first tag after a section tag
|
| 117 |
if ( $prevtag =~ /sect1|sect2|sect3/ ) {
|
| 118 |
$settitle = ( $tagname eq 'title' );
|
| 119 |
$titletag = $prevtag;
|
| 120 |
$example{'in_sect'} = 0;
|
| 121 |
}
|
| 122 |
$prevtag = $tagname;
|
| 123 |
if ( $tagname eq 'informalexample' && ! exists $ignore{'tag'} ) {
|
| 124 |
if ( exists $attr->{role} && $attr->{role} eq "example" ) {
|
| 125 |
$example{'print'} = 1;
|
| 126 |
$example{'new'} = 1;
|
| 127 |
}
|
| 128 |
}
|
| 129 |
}
|
| 130 |
|
| 131 |
# Execute when text is encountered
|
| 132 |
sub text_rtn {
|
| 133 |
my ($text) = @_;
|
| 134 |
|
| 135 |
if ( $settitle ) {
|
| 136 |
# Clean leading and trailing whitespace for titles
|
| 137 |
$text =~ s/^[[:space:]]*//;
|
| 138 |
$text =~ s/[[:space:]]*$//;
|
| 139 |
|
| 140 |
$text = replace_entities($text);
|
| 141 |
$tagstatus{$titletag}{'title'} = $text;
|
| 142 |
$settitle = 0;
|
| 143 |
}
|
| 144 |
|
| 145 |
if ( $example{'print'} && ! exists $ignore{'tag'} ) {
|
| 146 |
# Print section headers
|
| 147 |
for ($s=1; $s<=3; $s++) {
|
| 148 |
my $sect="sect$s";
|
| 149 |
if ( $tagstatus{$sect}{'title'} ) {
|
| 150 |
print "\n" if ( $s == 1 && ! $example{'first'} );
|
| 151 |
for ( $i = 1; $i <= 5 - $s; $i++ ) { print "#"; };
|
| 152 |
print " $tagstatus{$sect}{'title'}\n";
|
| 153 |
delete $tagstatus{$sect}{'title'};
|
| 154 |
}
|
| 155 |
}
|
| 156 |
|
| 157 |
# Clean leading whitespace
|
| 158 |
if ( $example{'new'} ) {
|
| 159 |
$text =~ s/^[[:space:]]*//;
|
| 160 |
}
|
| 161 |
|
| 162 |
$text = replace_entities($text);
|
| 163 |
print "$text";
|
| 164 |
|
| 165 |
$example{'first'} = 0;
|
| 166 |
$example{'new'} = 0;
|
| 167 |
$example{'in_sect'} = 1;
|
| 168 |
}
|
| 169 |
}
|
| 170 |
|
| 171 |
# Execute when the end tag is encountered
|
| 172 |
sub end_rtn {
|
| 173 |
my ($tagname) = @_;
|
| 174 |
print STDERR "\nEnd: $tagname\n" if $opt_d;
|
| 175 |
|
| 176 |
# Set of tags must match what's in start_rtn
|
| 177 |
if ( $tagname =~ /appendix|sect1|sect2|sect3|para|informalexample|phrase/ ) {
|
| 178 |
my $ts = $tagstatus{$tagname}{'count'};
|
| 179 |
$tagstatus{$tagname}{'count'} -= 1;
|
| 180 |
print STDERR "$tagname $tagstatus{$tagname}{'count'}\n" if $opt_d;
|
| 181 |
die "Invalid XML file: negative count for tag <$tagname>!\n" if $tagstatus{$tagname}{'count'} < 0;
|
| 182 |
|
| 183 |
if ( exists $ignore{'tag'} ) {
|
| 184 |
if ( $ignore{'tag'} eq $tagname && $ignore{'depth'} == $ts ) {
|
| 185 |
delete $ignore{'tag'};
|
| 186 |
}
|
| 187 |
return
|
| 188 |
}
|
| 189 |
}
|
| 190 |
|
| 191 |
if ( $tagname eq 'informalexample' ) {
|
| 192 |
$example{'print'} = 0;
|
| 193 |
}
|
| 194 |
|
| 195 |
if ( $tagname =~ /appendix|sect1|sect2|sect3|para/ ) {
|
| 196 |
delete $tagstatus{$tagname}{'title'} if exists $tagstatus{$tagname}{'title'};
|
| 197 |
|
| 198 |
if ( $example{'in_sect'} ) {
|
| 199 |
print "\n";
|
| 200 |
$example{'in_sect'} = 0;
|
| 201 |
}
|
| 202 |
}
|
| 203 |
}
|