| 1 |
# The script keeps track of some special situations:
|
| 2 |
# - 'tags' in comments are not handled well by poxml tools, so these
|
| 3 |
# are removed
|
| 4 |
# - references within comments should not be processed, so we keep
|
| 5 |
# a count of opening and closing of comments
|
| 6 |
|
| 7 |
BEGIN {
|
| 8 |
main_count = 1
|
| 9 |
|
| 10 |
# Let's first build an array with all the entities (xml files)
|
| 11 |
while (getline <ENTLIST) {
|
| 12 |
delim = index($0, ":")
|
| 13 |
i = substr($0, 1, delim - 1)
|
| 14 |
|
| 15 |
fname = substr($0, delim + 1, length($0) - delim)
|
| 16 |
# Trim any leading and trailing space of filenames
|
| 17 |
gsub(/^[[:space:]]*/, "", fname)
|
| 18 |
gsub(/[[:space:]]*$/, "", fname)
|
| 19 |
|
| 20 |
ent [i] = fname
|
| 21 |
included [i] = 0
|
| 22 |
}
|
| 23 |
}
|
| 24 |
|
| 25 |
{
|
| 26 |
# In the main loop we only want to process entities that are refered to
|
| 27 |
line = $0
|
| 28 |
if (match (line, /^[[:space:]]*&.*\.xml;[[:space:]]*(<\!--.*-->[[:space:]]*|)*$/) > 0) {
|
| 29 |
process_file(line, "main")
|
| 30 |
}
|
| 31 |
}
|
| 32 |
|
| 33 |
END {
|
| 34 |
print "" >>LOG
|
| 35 |
print "The following defined entities (from docstruct) were NOT processed:" >>LOG
|
| 36 |
for (entname in ent) {
|
| 37 |
if (included [entname] == 0) {
|
| 38 |
print " " entname >>LOG
|
| 39 |
}
|
| 40 |
}
|
| 41 |
}
|
| 42 |
|
| 43 |
function process_file(entline, level, fname, tfname) {
|
| 44 |
entname = get_entname(entline)
|
| 45 |
if (entname in ent) {
|
| 46 |
fname = ent [entname]
|
| 47 |
print "Processing: " fname >>LOG
|
| 48 |
INFILE = WORKDIR "/in/" fname
|
| 49 |
|
| 50 |
if (level == "main") {
|
| 51 |
main_count += 1
|
| 52 |
|
| 53 |
# Change at highest level: change to a new output file
|
| 54 |
OUTFILE = WORKDIR "/out/" fname
|
| 55 |
OUTDIR = OUTFILE
|
| 56 |
gsub(/\/[^\/]*$/, "/", OUTDIR) # strip filename
|
| 57 |
system("mkdir -p " OUTDIR) # create directory
|
| 58 |
} else {
|
| 59 |
print "" >>OUTFILE
|
| 60 |
}
|
| 61 |
|
| 62 |
if (level == "sub" && included [entname] != 0 && included [entname] < main_count) {
|
| 63 |
print "** Warning: entity '" entname "'was also included in another file." >>LOG
|
| 64 |
}
|
| 65 |
if (level == "main") {
|
| 66 |
included [entname] = 1
|
| 67 |
} else {
|
| 68 |
included [entname] = main_count
|
| 69 |
}
|
| 70 |
parse_file(INFILE, fname)
|
| 71 |
|
| 72 |
} else {
|
| 73 |
print "** Entity " entname " not found and will be skipped!" >>LOG
|
| 74 |
print entline >>OUTFILE
|
| 75 |
}
|
| 76 |
}
|
| 77 |
|
| 78 |
function parse_file(PARSEFILE, FNAME, fname, nwline, comment_count) {
|
| 79 |
comment_count = 0
|
| 80 |
fname = FNAME
|
| 81 |
|
| 82 |
# Test whether file exists
|
| 83 |
getline <PARSEFILE
|
| 84 |
if (ERRNO != 0) {
|
| 85 |
print "** Error: file '" PARSEFILE "' does not exist!" >>LOG
|
| 86 |
return
|
| 87 |
}
|
| 88 |
|
| 89 |
print "<!-- Start of file " fname " -->" >>OUTFILE
|
| 90 |
while (getline <PARSEFILE) {
|
| 91 |
nwline = $0
|
| 92 |
|
| 93 |
# Update the count of 'open' comments
|
| 94 |
comment_count += count_comments(nwline)
|
| 95 |
|
| 96 |
if (match(nwline, /^[[:space:]]*&.*\.xml;[[:space:]]*(<\!--.*-->[[:space:]]*|)*$/) > 0) {
|
| 97 |
# If we find another entity reference, we process that file recursively
|
| 98 |
# But not if the reference is within a comment
|
| 99 |
if (comment_count != 0) {
|
| 100 |
print "** Skipping entity reference '" nwline "' found in comment!" >>LOG
|
| 101 |
} else {
|
| 102 |
process_file(nwline, "sub")
|
| 103 |
}
|
| 104 |
} else {
|
| 105 |
# Else we just print the line
|
| 106 |
if (match(nwline, /<\!--.*<.*>.*<.*>.*-->/) > 0) {
|
| 107 |
# Comments containing "<...> ... <...>" are not handled correctly
|
| 108 |
# by xml2pot and split2po, so we skip lines like that
|
| 109 |
# Note: this is a workaround for a bug in the tools:
|
| 110 |
# http://bugs.kde.org/show_bug.cgi?id=90294
|
| 111 |
print "** Comment deleted in line '" nwline "'" >>LOG
|
| 112 |
gsub(/<\!--.*<.*>.*<.*>.*-->/, "", nwline)
|
| 113 |
}
|
| 114 |
print nwline >>OUTFILE
|
| 115 |
}
|
| 116 |
}
|
| 117 |
if (comment_count != 0) {
|
| 118 |
print "** Comment count is not zero at end of file: " comment_count >>LOG
|
| 119 |
}
|
| 120 |
print "<!-- End of file " fname " -->" >>OUTFILE
|
| 121 |
close(PARSEFILE)
|
| 122 |
}
|
| 123 |
|
| 124 |
function get_entname(entline, ename) {
|
| 125 |
# Parse the name of the entity out of the entity reference
|
| 126 |
ename = entline
|
| 127 |
gsub(/^[[:space:]]*&/, "", ename)
|
| 128 |
gsub(/;.*$/, "", ename)
|
| 129 |
return ename
|
| 130 |
}
|
| 131 |
|
| 132 |
function count_comments(inline, tmpline, count) {
|
| 133 |
# 'abuse' gsub to count them
|
| 134 |
tmpline = inline
|
| 135 |
count += gsub(/<\!--/, "", tmpline)
|
| 136 |
count -= gsub(/-->/, "", tmpline)
|
| 137 |
return count
|
| 138 |
}
|