/[webwml]/webwml/i18nwwwfix.pl
ViewVC logotype

Contents of /webwml/i18nwwwfix.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (hide annotations) (download)
Sat Nov 22 14:49:07 2008 UTC (4 years, 5 months ago) by bas
Branch: MAIN
CVS Tags: HEAD
Changes since 1.2: +144 -78 lines
File MIME type: text/plain
Clean up i18nwwwfix.pl
1 bas 1.3 #!/usr/bin/perl
2 spanish 1.1
3     # This script enables the creation of copies of the www site
4     # in only two languages. Default language (see $DEFAULT)
5     # and provided language (-p switch) giving preference to
6     # the later.
7    
8 bas 1.2 # (c) Javier Fernández-Sanguino Peña <jfs@debian.org>
9 spanish 1.1 # Distributed under the GNU GPL License (see http://www.gnu.org/gpl)
10    
11     # It fixes all the href links in the current directory
12     # and all under it depending on the files that exist.
13     # Current options
14     # -p (obliged): tells which preferred language to use
15     # -v (optional): activates verbose output
16    
17    
18     # In order to retrieve Debian's website try something
19     # like:
20     # (for Spanish users)
21     # 1.- wget -o debian.log -m -k http://www.es.debian.org
22     # 2.- (go to the dir created by wget, in our case www.es.debian.org)
23 bas 1.3 # 3.- perl intcopy.pl -p es
24 spanish 1.1 #
25     # NOTES:
26     # 1.- Customize the URL and -p option to fix for your closest mirror and
27     # language
28     # 2.- after doing this you can remove all other languages
29     # besides yours (anyone care to give an easy bash line here?)
30     # 3.- afterwards check all URL (try checkbot) and send any bugs regarding
31     # bad fixed links to me.
32    
33    
34    
35     # TODO:
36     # 1.- It currently does not understand # in links and fixes
37     # them incorrectly
38    
39    
40     use Getopt::Std;
41     use IO::File;
42 bas 1.3 use Cwd;
43     use File::Copy;
44    
45     use strict;
46     use warnings;
47    
48     my %opts;
49     getopts('vp:d:', \%opts);
50    
51     my $POST = $opts{'p'};
52     my $DEFAULT = "en"; #Default language is english (en)
53     my $INVALID_DIRS = '^\.|\.\.|CVS|\.svn|.git$';
54    
55     my $current_dir = $opts{'d'} || getcwd;
56     my $verbose = $opts{'v'};
57    
58     fixDirectory($current_dir);
59 spanish 1.1
60     exit 0;
61    
62 bas 1.3
63     sub fixDirectory
64 spanish 1.1 {
65     my ($directory) = @_;
66     my $dir = new IO::File;
67 bas 1.3 opendir ($dir, $directory) || die ("I cannot read $directory: $!\n");
68     while ( my $file = readdir ($dir) )
69 spanish 1.1 {
70 bas 1.3 next if $file eq '.' or $file eq '..';
71    
72     warn "Checking $file\n" if $verbose;
73    
74     if ( -d "${directory}/${file}" and not -l "${directory}/${file}" )
75     {
76     if ( $file =~ /$INVALID_DIRS/ )
77     {
78     warn "Not a valid dir: $file \n" if $verbose;
79     }
80     else
81     {
82     fixDirectory ("${directory}/${file}");
83     }
84 spanish 1.1 }
85     else
86 bas 1.3 {
87     fix_html_file (${directory},"${directory}/${file}") if $file =~ /.html?$/ ;
88     }
89 spanish 1.1 } # del while
90     } #de la subrutina
91    
92 bas 1.3
93 spanish 1.1 sub fix_html_file
94     {
95 bas 1.3 # This is a html file
96 spanish 1.1 my ($directory,$file) = @_;
97 bas 1.3
98     warn "Opening the file $file.\n" if $verbose;
99    
100     open (FICHERO, "<${file}") or die ("Cannot open ${file} : $!\n");
101     open (NEWFICHERO, ">${file}.bak") or die ("I cannot create a backup of ${file} : $!\n");
102    
103     while ( my $line =<FICHERO>)
104     {
105 spanish 1.1 chomp $line;
106    
107 bas 1.3 # Here we must check:
108     # 1.- the href ends in .$post.html and $POST = $post and if not
109     # cancel the href (remove the tag)
110     # 2.- if the href does not end in $post.html and $POST.html exists
111     # make it point there
112     # 3.- if the href does not end in $post.html and $POST.html does not
113     # exist then link to .en.html (english version)
114    
115     my $newline = "";
116     my $endofline = "";
117    
118 spanish 1.1 while ( $line =~ m/A HREF=\"(.*?)\"/gi )
119 bas 1.3 {
120 spanish 1.1 my $old_ref = $1;
121     my $new_ref = $old_ref;
122     $newline = $newline.$`;
123     $endofline = $';
124 bas 1.3
125     if ( islocalreference($old_ref) )
126     {
127     warn "Checking reference $old_ref\n" if $verbose;
128     if ( $old_ref =~ /\/$/ )
129     {
130     # This is a directory... check if the file exists
131     warn "Fixing directory reference $old_ref\n" if $verbose;
132    
133     if ( -f "${directory}/${old_ref}/index.$POST.html" )
134     {
135     $new_ref = $old_ref."index.".$POST.".html";
136     }
137     if ( $new_ref eq $old_ref
138     and -f "${directory}/${old_ref}/index.$DEFAULT.html" )
139     {
140     $new_ref = $old_ref."index.".$DEFAULT.".html";
141     }
142     if ( $new_ref eq $old_ref and -f "${directory}/${old_ref}/index.html" )
143     {
144     $new_ref = $old_ref."index.html";
145     }
146 spanish 1.1 }
147 bas 1.3 elsif ( $old_ref =~ /(.*?)\.(.*?)\.html$/ )
148     {
149     # This one uses does *not* use content negotiation...
150     warn "Fixing HTML reference $old_ref\n" if $verbose;
151    
152     my $base = $1;
153     my $ending = $2;
154    
155     if ( -f "${directory}/${base}.$POST.html" )
156     {
157     $new_ref = $base.".".$POST.".html";
158     }
159     if ( $new_ref eq $old_ref && -f "${directory}/${base}.$DEFAULT.html" )
160     {
161     $new_ref = $base.".".$DEFAULT.".html";
162     }
163 spanish 1.1 }
164     elsif ( $old_ref !~ /([\w-]+)\.([\w-]+)$/ ) {
165 bas 1.3 warn "Fixing Content Negotiation reference $old_ref\n" if $verbose;
166    
167     # This one uses *does* use content negotiation...
168     # Check as above but also move around files
169     if ( -f "${directory}/${old_ref}.$POST.html" )
170     {
171     $new_ref = $old_ref.".".$POST.".html";
172     }
173     if ( "$new_ref eq $old_ref && -f ${directory}/${old_ref}.$DEFAULT.html" )
174     {
175     $new_ref = $old_ref.".".$DEFAULT.".html";
176     }
177     if ( "$new_ref eq $old_ref && -f ${directory}/${old_ref}.html" )
178     {
179     $new_ref = $old_ref.".html"
180     }
181     if ( "$new_ref eq $old_ref && -f ${directory}/${old_ref}" )
182     {
183     $new_ref = $old_ref.".html";
184     }
185 spanish 1.1 }
186     }
187 bas 1.3
188     # After checking if $old_ref =/= $new_ref then substitute
189     $newline .= qq{A HREF="$new_ref"};
190     if ( $verbose and $new_ref ne $old_ref)
191     {
192     warn "Fixed reference $old_ref to $new_ref\n";
193     }
194 spanish 1.1 }
195 bas 1.3 $newline .= $endofline;
196 spanish 1.1 $newline = $line if $newline eq "";
197 bas 1.3
198     warn "Changing $line to $newline\n" if $verbose;
199 spanish 1.1 print NEWFICHERO $newline;
200     print NEWFICHERO "\n";
201     }
202     close FICHERO;
203     close NEWFICHERO;
204 bas 1.3
205 spanish 1.1 unlink $file;
206 bas 1.3 move("$file.bak", $file)
207     or die("Couldn't move `$file.bak' to `$file': $!\n");
208 spanish 1.1 }
209    
210 bas 1.3 # Checks if a reference points to a local resource,
211 spanish 1.1 # i.e. it is not in (http|ftp|gopher):// form
212 bas 1.3 sub islocalreference
213     {
214 spanish 1.1 my ($reference) = @_;
215 bas 1.3 if ($reference !~ /:\/\// )
216     {
217     warn "Local reference: $reference\n" if $verbose;
218 spanish 1.1 return 1;
219 bas 1.3 }
220     return;
221 spanish 1.1 }
222 bas 1.3
223     __END__
224    

  ViewVC Help
Powered by ViewVC 1.1.5