/[webwml]/webwml/i18nwwwfix.pl
ViewVC logotype

Contents of /webwml/i18nwwwfix.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (show annotations) (download)
Sat Nov 22 14:49:07 2008 UTC (4 years, 5 months ago) by bas
Branch: MAIN
CVS Tags: HEAD
Changes since 1.2: +144 -78 lines
File MIME type: text/plain
Clean up i18nwwwfix.pl
1 #!/usr/bin/perl
2
3 # This script enables the creation of copies of the www site
4 # in only two languages. Default language (see $DEFAULT)
5 # and provided language (-p switch) giving preference to
6 # the later.
7
8 # (c) Javier Fernández-Sanguino Peña <jfs@debian.org>
9 # Distributed under the GNU GPL License (see http://www.gnu.org/gpl)
10
11 # It fixes all the href links in the current directory
12 # and all under it depending on the files that exist.
13 # Current options
14 # -p (obliged): tells which preferred language to use
15 # -v (optional): activates verbose output
16
17
18 # In order to retrieve Debian's website try something
19 # like:
20 # (for Spanish users)
21 # 1.- wget -o debian.log -m -k http://www.es.debian.org
22 # 2.- (go to the dir created by wget, in our case www.es.debian.org)
23 # 3.- perl intcopy.pl -p es
24 #
25 # NOTES:
26 # 1.- Customize the URL and -p option to fix for your closest mirror and
27 # language
28 # 2.- after doing this you can remove all other languages
29 # besides yours (anyone care to give an easy bash line here?)
30 # 3.- afterwards check all URL (try checkbot) and send any bugs regarding
31 # bad fixed links to me.
32
33
34
35 # TODO:
36 # 1.- It currently does not understand # in links and fixes
37 # them incorrectly
38
39
40 use Getopt::Std;
41 use IO::File;
42 use Cwd;
43 use File::Copy;
44
45 use strict;
46 use warnings;
47
48 my %opts;
49 getopts('vp:d:', \%opts);
50
51 my $POST = $opts{'p'};
52 my $DEFAULT = "en"; #Default language is english (en)
53 my $INVALID_DIRS = '^\.|\.\.|CVS|\.svn|.git$';
54
55 my $current_dir = $opts{'d'} || getcwd;
56 my $verbose = $opts{'v'};
57
58 fixDirectory($current_dir);
59
60 exit 0;
61
62
63 sub fixDirectory
64 {
65 my ($directory) = @_;
66 my $dir = new IO::File;
67 opendir ($dir, $directory) || die ("I cannot read $directory: $!\n");
68 while ( my $file = readdir ($dir) )
69 {
70 next if $file eq '.' or $file eq '..';
71
72 warn "Checking $file\n" if $verbose;
73
74 if ( -d "${directory}/${file}" and not -l "${directory}/${file}" )
75 {
76 if ( $file =~ /$INVALID_DIRS/ )
77 {
78 warn "Not a valid dir: $file \n" if $verbose;
79 }
80 else
81 {
82 fixDirectory ("${directory}/${file}");
83 }
84 }
85 else
86 {
87 fix_html_file (${directory},"${directory}/${file}") if $file =~ /.html?$/ ;
88 }
89 } # del while
90 } #de la subrutina
91
92
93 sub fix_html_file
94 {
95 # This is a html file
96 my ($directory,$file) = @_;
97
98 warn "Opening the file $file.\n" if $verbose;
99
100 open (FICHERO, "<${file}") or die ("Cannot open ${file} : $!\n");
101 open (NEWFICHERO, ">${file}.bak") or die ("I cannot create a backup of ${file} : $!\n");
102
103 while ( my $line =<FICHERO>)
104 {
105 chomp $line;
106
107 # Here we must check:
108 # 1.- the href ends in .$post.html and $POST = $post and if not
109 # cancel the href (remove the tag)
110 # 2.- if the href does not end in $post.html and $POST.html exists
111 # make it point there
112 # 3.- if the href does not end in $post.html and $POST.html does not
113 # exist then link to .en.html (english version)
114
115 my $newline = "";
116 my $endofline = "";
117
118 while ( $line =~ m/A HREF=\"(.*?)\"/gi )
119 {
120 my $old_ref = $1;
121 my $new_ref = $old_ref;
122 $newline = $newline.$`;
123 $endofline = $';
124
125 if ( islocalreference($old_ref) )
126 {
127 warn "Checking reference $old_ref\n" if $verbose;
128 if ( $old_ref =~ /\/$/ )
129 {
130 # This is a directory... check if the file exists
131 warn "Fixing directory reference $old_ref\n" if $verbose;
132
133 if ( -f "${directory}/${old_ref}/index.$POST.html" )
134 {
135 $new_ref = $old_ref."index.".$POST.".html";
136 }
137 if ( $new_ref eq $old_ref
138 and -f "${directory}/${old_ref}/index.$DEFAULT.html" )
139 {
140 $new_ref = $old_ref."index.".$DEFAULT.".html";
141 }
142 if ( $new_ref eq $old_ref and -f "${directory}/${old_ref}/index.html" )
143 {
144 $new_ref = $old_ref."index.html";
145 }
146 }
147 elsif ( $old_ref =~ /(.*?)\.(.*?)\.html$/ )
148 {
149 # This one uses does *not* use content negotiation...
150 warn "Fixing HTML reference $old_ref\n" if $verbose;
151
152 my $base = $1;
153 my $ending = $2;
154
155 if ( -f "${directory}/${base}.$POST.html" )
156 {
157 $new_ref = $base.".".$POST.".html";
158 }
159 if ( $new_ref eq $old_ref && -f "${directory}/${base}.$DEFAULT.html" )
160 {
161 $new_ref = $base.".".$DEFAULT.".html";
162 }
163 }
164 elsif ( $old_ref !~ /([\w-]+)\.([\w-]+)$/ ) {
165 warn "Fixing Content Negotiation reference $old_ref\n" if $verbose;
166
167 # This one uses *does* use content negotiation...
168 # Check as above but also move around files
169 if ( -f "${directory}/${old_ref}.$POST.html" )
170 {
171 $new_ref = $old_ref.".".$POST.".html";
172 }
173 if ( "$new_ref eq $old_ref && -f ${directory}/${old_ref}.$DEFAULT.html" )
174 {
175 $new_ref = $old_ref.".".$DEFAULT.".html";
176 }
177 if ( "$new_ref eq $old_ref && -f ${directory}/${old_ref}.html" )
178 {
179 $new_ref = $old_ref.".html"
180 }
181 if ( "$new_ref eq $old_ref && -f ${directory}/${old_ref}" )
182 {
183 $new_ref = $old_ref.".html";
184 }
185 }
186 }
187
188 # After checking if $old_ref =/= $new_ref then substitute
189 $newline .= qq{A HREF="$new_ref"};
190 if ( $verbose and $new_ref ne $old_ref)
191 {
192 warn "Fixed reference $old_ref to $new_ref\n";
193 }
194 }
195 $newline .= $endofline;
196 $newline = $line if $newline eq "";
197
198 warn "Changing $line to $newline\n" if $verbose;
199 print NEWFICHERO $newline;
200 print NEWFICHERO "\n";
201 }
202 close FICHERO;
203 close NEWFICHERO;
204
205 unlink $file;
206 move("$file.bak", $file)
207 or die("Couldn't move `$file.bak' to `$file': $!\n");
208 }
209
210 # Checks if a reference points to a local resource,
211 # i.e. it is not in (http|ftp|gopher):// form
212 sub islocalreference
213 {
214 my ($reference) = @_;
215 if ($reference !~ /:\/\// )
216 {
217 warn "Local reference: $reference\n" if $verbose;
218 return 1;
219 }
220 return;
221 }
222
223 __END__
224

  ViewVC Help
Powered by ViewVC 1.1.5