/[d-i]/trunk/manual/scripts/merge_xml.awk
ViewVC logotype

Contents of /trunk/manual/scripts/merge_xml.awk

Parent Directory Parent Directory | Revision Log Revision Log


Revision 31233 - (show annotations) (download)
Fri Oct 7 19:51:38 2005 UTC (7 years, 8 months ago) by joeyh
File size: 4550 byte(s)
move manual to top-level directory, split out of debian-installer package
1 # The script keeps track of some special situations:
2 # - 'tags' in comments are not handled well by poxml tools, so these
3 # are removed
4 # - references within comments should not be processed, so we keep
5 # a count of opening and closing of comments
6
7 BEGIN {
8 main_count = 1
9
10 # Let's first build an array with all the entities (xml files)
11 while (getline <ENTLIST) {
12 delim = index($0, ":")
13 i = substr($0, 1, delim - 1)
14
15 fname = substr($0, delim + 1, length($0) - delim)
16 # Trim any leading and trailing space of filenames
17 gsub(/^[[:space:]]*/, "", fname)
18 gsub(/[[:space:]]*$/, "", fname)
19
20 ent [i] = fname
21 included [i] = 0
22 }
23 }
24
25 {
26 # In the main loop we only want to process entities that are refered to
27 line = $0
28 if (match (line, /^[[:space:]]*&.*\.xml;[[:space:]]*(<\!--.*-->[[:space:]]*|)*$/) > 0) {
29 process_file(line, "main")
30 }
31 }
32
33 END {
34 print "" >>LOG
35 print "The following defined entities (from docstruct) were NOT processed:" >>LOG
36 for (entname in ent) {
37 if (included [entname] == 0) {
38 print " " entname >>LOG
39 }
40 }
41 }
42
43 function process_file(entline, level, fname, tfname) {
44 entname = get_entname(entline)
45 if (entname in ent) {
46 fname = ent [entname]
47 print "Processing: " fname >>LOG
48 INFILE = WORKDIR "/in/" fname
49
50 if (level == "main") {
51 main_count += 1
52
53 # Change at highest level: change to a new output file
54 OUTFILE = WORKDIR "/out/" fname
55 OUTDIR = OUTFILE
56 gsub(/\/[^\/]*$/, "/", OUTDIR) # strip filename
57 system("mkdir -p " OUTDIR) # create directory
58 } else {
59 print "" >>OUTFILE
60 }
61
62 if (level == "sub" && included [entname] != 0 && included [entname] < main_count) {
63 print "** Warning: entity '" entname "'was also included in another file." >>LOG
64 }
65 if (level == "main") {
66 included [entname] = 1
67 } else {
68 included [entname] = main_count
69 }
70 parse_file(INFILE, fname)
71
72 } else {
73 print "** Entity " entname " not found and will be skipped!" >>LOG
74 print entline >>OUTFILE
75 }
76 }
77
78 function parse_file(PARSEFILE, FNAME, fname, nwline, comment_count) {
79 comment_count = 0
80 fname = FNAME
81
82 # Test whether file exists
83 getline <PARSEFILE
84 if (ERRNO != 0) {
85 print "** Error: file '" PARSEFILE "' does not exist!" >>LOG
86 return
87 }
88
89 print "<!-- Start of file " fname " -->" >>OUTFILE
90 while (getline <PARSEFILE) {
91 nwline = $0
92
93 # Update the count of 'open' comments
94 comment_count += count_comments(nwline)
95
96 if (match(nwline, /^[[:space:]]*&.*\.xml;[[:space:]]*(<\!--.*-->[[:space:]]*|)*$/) > 0) {
97 # If we find another entity reference, we process that file recursively
98 # But not if the reference is within a comment
99 if (comment_count != 0) {
100 print "** Skipping entity reference '" nwline "' found in comment!" >>LOG
101 } else {
102 process_file(nwline, "sub")
103 }
104 } else {
105 # Else we just print the line
106 if (match(nwline, /<\!--.*<.*>.*<.*>.*-->/) > 0) {
107 # Comments containing "<...> ... <...>" are not handled correctly
108 # by xml2pot and split2po, so we skip lines like that
109 # Note: this is a workaround for a bug in the tools:
110 # http://bugs.kde.org/show_bug.cgi?id=90294
111 print "** Comment deleted in line '" nwline "'" >>LOG
112 gsub(/<\!--.*<.*>.*<.*>.*-->/, "", nwline)
113 }
114 print nwline >>OUTFILE
115 }
116 }
117 if (comment_count != 0) {
118 print "** Comment count is not zero at end of file: " comment_count >>LOG
119 }
120 print "<!-- End of file " fname " -->" >>OUTFILE
121 close(PARSEFILE)
122 }
123
124 function get_entname(entline, ename) {
125 # Parse the name of the entity out of the entity reference
126 ename = entline
127 gsub(/^[[:space:]]*&/, "", ename)
128 gsub(/;.*$/, "", ename)
129 return ename
130 }
131
132 function count_comments(inline, tmpline, count) {
133 # 'abuse' gsub to count them
134 tmpline = inline
135 count += gsub(/<\!--/, "", tmpline)
136 count -= gsub(/-->/, "", tmpline)
137 return count
138 }

  ViewVC Help
Powered by ViewVC 1.1.5