#! /usr/bin/perl
# webwml-stattrans - Debian web site translation statistics
# Copyright (c) 2001 Martin Schulze and others
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
use POSIX qw(strftime);
use Getopt::Std;
# These modules reside under webwml/Perl
use lib ($0 =~ m|(.*)/|, $1 or ".") ."/Perl";
use Local::Cvsinfo;
use Webwml::Langs;
use Webwml::TransCheck;
use Webwml::TransIgnore;
$| = 1;
$opt_h = "/org/www.debian.org/www/devel/website/stats";
$opt_w = "/org/www.debian.org/webwml";
$opt_p = "*.wml";
$opt_t = "Debian web site translation statistics";
$opt_v = 0;
$opt_d = "u";
$opt_l = undef;
getopts('h:w:p:t:vd:l:') || die;
# Replace filename globbing by Perl regexps
$opt_p =~ s/\./\\./g;
$opt_p =~ s/\?/./g;
$opt_p =~ s/\*/.*/g;
$opt_p =~ s/$/\$/g;
%config = (
'htmldir' => $opt_h,
'wmldir' => $opt_w,
'wmlpat' => $opt_p,
'title' => $opt_t,
'verbose' => $opt_v,
'difftype'=> $opt_d,
);
my $l = Webwml::Langs->new($opt_w);
my %langs = $l->name_iso();
my $transignore = Webwml::TransIgnore->new($opt_w);
my $cvs = Local::Cvsinfo->new();
$cvs->options(
recursive => 1,
matchfile => [ $config{'wmlpat'} ],
skipdir => [ "template" ],
);
$cvs->readinfo("$config{'wmldir'}/english");
foreach (@{$transignore->global()}) {
$cvs->removefile("$config{'wmldir'}/english/$_");
}
my $altcvs = Local::Cvsinfo->new();
$altcvs->options(
recursive => 1,
matchfile => [ $config{'wmlpat'} ],
skipdir => [ "template" ],
);
$max_versions = 5;
$min_versions = 1;
$border_head = "";
$date = strftime "%a %b %e %H:%M:%S %Y %z", localtime;
my %original;
my %transversion;
my %version;
my %files;
# Count wml files in given directory
#
sub getwmlfiles
{
my $lang = shift;
my $dir = "$config{'wmldir'}/$lang";
my $cutfrom = length ($config{'wmldir'})+length($lang)+2;
my $count = 0;
my $is_english = ($lang eq "english")?1:0;
my $file, $v;
my @listfiles;
print "$lang " if ($config{verbose});
die "$0: can't find $dir!\n" if (! -d "$dir");
if ($is_english) {
@listfiles = @{$cvs->files()};
} else {
$altcvs->reset();
$altcvs->readinfo($dir);
@listfiles = @{$altcvs->files()};
}
foreach my $f (@listfiles) {
$file = substr ($f, $cutfrom);
next if $transignore->is_global($file);
$file =~ s/\.wml$//;
$files{$file} = 1;
$wmlfiles{$lang} .= " " . $file;
my $transcheck = Webwml::TransCheck->new("$dir/$file.wml");
if ($transcheck->revision()) {
$transversion{"$lang/$file"} = $transcheck->revision();
$original{"$lang/$file"} ||= $transcheck->original();
}
if ($is_english) {
$version{"$lang/$file"} = $cvs->revision($f);
} else {
$version{"$lang/$file"} = $altcvs->revision($f);
if (!$transcheck->revision()) {
$original{"english/$file"} = $lang;
$transversion{"english/$file"} ||= "1.1";
}
}
if ($transcheck->maintainer()) {
$maintainer{"$lang/$file"} = $transcheck->maintainer();
}
$count++;
}
close (FIND);
$wmlfiles{$lang} .= " ";
$wml{$lang} = $count;
}
sub get_color
{
my $percent = shift;
if ($percent < 50) {
return sprintf ("#FF%02x00", (255/50) * $percent);
} else {
return sprintf ("#%02xFF00", (255/50) * (100 - $percent));
}
}
sub check_translation
{
my ($translation, $version, $file) = @_;
my @version_numbers, $major_number, $last_number;
my @translation_numbers, $major_translated_number, $last_translated_number;
if ($version ne "" && $translation ne "") {
@version_numbers = split /\./,$version;
$major_number = @version_numbers[0];
$last_number = pop @version_numbers;
die "Invalid CVS revision for $file: $version\n"
unless ($major_number =~ /\d+/ && $last_number =~ /\d+/);
@translation_numbers = split /\./,$translation;
$major_translated_number = @translation_numbers[0];
$last_translated_number = pop @translation_numbers;
die "Invalid translation revision for $file: $translation\n"
unless ($major_translated_number =~ /\d+/ && $last_translated_number =~ /\d+/);
# Here we compare the original version with the translated one and print
# a note for the user if their first or last numbers are too far apart
# From translation-check.wml
if ($version eq "") {
return "The original no longer exists";
} elsif ( $major_number != $major_translated_number ) {
return "This translation is too out of date";
} elsif ( $last_number - $last_translated_number >= $max_versions ) {
return "This translation is too out of date";
} elsif ( $last_number - $last_translated_number >= $min_versions ) {
return "The original is newer than this translation";
}
}
return "";
}
print "Collecting data in: " if ($config{'verbose'});
if ($opt_l) {
getwmlfiles ($opt_l);
getwmlfiles ('english');
} else {
foreach $lang (keys %langs) {
getwmlfiles ($lang);
}
}
print "\n" if ($config{'verbose'});
my @search_in;
if ($opt_l) {
@search_in = ( 'english', $opt_l );
} else {
@search_in = sort keys %langs;
}
# Compute stats about gettext files
print "Computing statistics in gettext files... " if ($config{'verbose'});
my %po_translated,%po_fuzzy,%po_untranslated,%po_total;
my %percent_po_t,%percent_po_u,%percent_po_f;
foreach $lang (@search_in) {
next if $lang eq 'english';
$l = $langs{$lang};
$po_translated{"total"}{$lang} = $po_fuzzy{"total"}{$lang} = $po_untranslated{"total"}{$lang} = 0;
my @status = qx,LC_ALL=C make -C $opt_w/$lang/po stats 2>&1 1>/dev/null,;
foreach $line (@status) {
chomp $line;
($domain = $line) =~ s/\..*//;
$po_translated{$domain}{$lang} = ($line =~ /(\d+) translated/ ? $1 : "0");
$po_fuzzy{$domain}{$lang} = ($line =~ /(\d+) fuzzy/ ? $1 : "0");
$po_untranslated{$domain}{$lang} = ($line =~ /(\d+) untranslated/ ? $1 : "0");
$po_total{$domain} = $po_translated{$domain}{$lang} + $po_fuzzy{$domain}{$lang} + $po_untranslated{$domain}{$lang};
$po_translated{"total"}{$lang} += $po_translated{$domain}{$lang};
$po_fuzzy{"total"}{$lang} += $po_fuzzy{$domain}{$lang};
$po_untranslated{"total"}{$lang} += $po_untranslated{$domain}{$lang};
if ($po_total{$domain} > 0) {
$percent_po_t{$domain}{$lang} = int ($po_translated{$domain}{$lang}/$po_total{$domain} * 100 + .5);
$percent_po_f{$domain}{$lang} = int ($po_fuzzy{$domain}{$lang}/$po_total{$domain} * 100 + .5);
$percent_po_u{$domain}{$lang} = int ($po_untranslated{$domain}{$lang}/$po_total{$domain} * 100 + .5);
} else {
$percent_po_t{$domain}{$lang} = 0;
$percent_po_f{$domain}{$lang} = 0;
$percent_po_u{$domain}{$lang} = 0;
}
}
$po_total{"total"} = $po_translated{"total"}{$lang} + $po_fuzzy{"total"}{$lang} + $po_untranslated{"total"}{$lang};
if ($po_total{'total'} > 0) {
$percent_po_t{'total'}{$lang} = int ($po_translated{'total'}{$lang}/$po_total{'total'} * 100 + .5);
$percent_po_f{'total'}{$lang} = int ($po_fuzzy{'total'}{$lang}/$po_total{'total'} * 100 + .5);
$percent_po_u{'total'}{$lang} = int ($po_untranslated{'total'}{$lang}/$po_total{'total'} * 100 + .5);
} else {
$percent_po_t{'total'}{$lang} = 0;
$percent_po_f{'total'}{$lang} = 0;
$percent_po_u{'total'}{$lang} = 0;
}
}
print "done.\n" if ($config{'verbose'});
# =============== Create HTML files ===============
mkdir ($config{'htmldir'}, 02775) if (! -d $config{'htmldir'});
my @filenames = sort keys %files;
my $nfiles = scalar @filenames;
print "Creating files: " if ($config{'verbose'});
foreach $lang (@search_in) {
$l = $langs{$lang};
print "$l.html " if ($config{'verbose'});
$l = "zh-cn" if ($l eq "zh"); # kludge
$t_body = $u_body = $o_body = "";
# get stats about files
foreach $file (@filenames) {
next if ($file eq "");
# Translated pages
if (index ($wmlfiles{$lang}, " $file ") >= 0) {
$translated{$lang}++;
$orig = $original{"$lang/$file"} || "english";
# Outdated translations
$msg = check_translation ($transversion{"$lang/$file"}, $version{"$orig/$file"}, "$lang/$file");
if (length ($msg)) {
$o_body .= "";
if ($file eq "devel/wnpp/wnpp") {
$o_body .= sprintf "| %s | ", $file;
} else {
$o_body .= sprintf "%s | ", $file, $l, $file;
}
$o_body .= sprintf "%s | ", $transversion{"$lang/$file"};
$o_body .= sprintf "%s | ", $version{"$orig/$file"};
$o_body .= sprintf "%s | ", $msg;
$o_body .= sprintf " %s -> %s | ", $file, $transversion{"$lang/$file"}, $version{"$orig/$file"}, $config{'difftype'}, $transversion{"$lang/$file"}, $version{"$orig/$file"};
$o_body .= sprintf "%s | ", $maintainer{"$lang/$file"} || "";
$o_body .= "
\n";
$outdated{$lang}++;
# Up-to-date translations
} else {
if ($file eq "devel/wnpp/wnpp") {
$t_body .= sprintf "%s
\n", $file;
} else {
$t_body .= sprintf "%s
\n", $file, $l, $file;
}
}
}
# Untranslated pages
else {
if ($file eq "devel/wnpp/wnpp") {
$u_body .= sprintf "%s
\n", $file;
} else {
$u_body .= sprintf "%s
\n", $file, $file;
}
$untranslated{$lang}++;
}
}
# this is where we discard the files that the translation directory contains
# but which don't exist in the English directory
# print "extra files: ".$wml{$lang}-$translated{$lang}."\n";
$wml{$lang} = $translated{$lang};
$translated{$lang} = $translated{$lang} - $outdated{$lang};
$percent_a{$lang} = int ($wml{$lang}/$nfiles * 100 + .5);
$percent_t{$lang} = int ($translated{$lang}/$wml{$lang} * 100 + .5);
$percent_o{$lang} = 100 - $percent_t{$lang};
$percent_u{$lang} = 100 - $percent_a{$lang};
if (open (HTML, ">$config{'htmldir'}/$l.html")) {
printf HTML "%s: %s\n", $config{'title'}, ucfirst $lang;
$color = get_color ($percent_a{$lang});
print HTML "\n";
printf HTML "\n", $color;
printf HTML "%s: %s |
", $config{'title'}, ucfirst $lang;
print HTML "\n";
printf HTML "| %d files (%d%%) translated | ", $wml{$lang}, $percent_a{$lang};
printf HTML "%d files (%d%%) up to date | ", $translated{$lang}, $percent_t{$lang};
printf HTML "%d files (%d%%) outdated | ", $outdated{$lang}, $percent_o{$lang};
printf HTML "%d files (%d%%) not translated | ", $untranslated{$lang}, $percent_u{$lang};
print HTML "
\n";
print HTML "
\n";
# Make the table of content
print HTML "Table of Contents
\n";
print HTML "Back to index of languages
\n";
print HTML "
Working on the website\n";
if ($o_body) {
print HTML "
Outdated translations\n";
}
if ($u_body) {
print HTML "
Pages not translated\n";
}
if ($t_body) {
print HTML "
Translations up to date\n";
}
if ($lang ne 'english') {
print HTML "
Translations of templates (gettext files)\n";
}
print HTML "
\n";
# outputs the content
if ($o_body) {
print HTML "\n";
print HTML "\n";
print HTML "| File | Translated | Origin | Comment | ";
if ($opt_d eq "u") { print HTML "Unified diff | "; }
elsif ($opt_d eq "h") { print HTML "Colored diff | "; }
else { print HTML "Diff | "; }
print HTML "Maintainer | ";
print HTML "
\n";
print HTML $o_body;
print HTML "
\n";
}
if ($u_body) {
print HTML "\n";
print HTML $u_body;
}
if ($t_body) {
print HTML "\n";
print HTML $t_body;
}
# outputs the gettext stats
if ($lang ne 'english') {
print HTML "\n";
# print HTML $border_head;
print HTML "\n";
print HTML "| File | Up to date | Fuzzy | Untranslated | Total |
\n";
$l = $langs{$lang};
$l = "zh-cn" if ($l eq "zh"); # kludge
foreach my $domain (sort keys %po_total) {
next if $domain eq 'total';
print HTML "";
$color_t = get_color ($percent_po_t{$domain}{$lang});
$color_f = get_color (100 - $percent_po_f{$domain}{$lang});
$color_u = get_color (100 - $percent_po_u{$domain}{$lang});
print HTML "| $domain.$langs{$lang}.po | ";
printf HTML "%d (%d%%) | ", $color_t, $po_translated{$domain}{$lang}, $percent_po_t{$domain}{$lang};
printf HTML "%d (%d%%) | ", $color_f, $po_fuzzy{$domain}{$lang}, $percent_po_f{$domain}{$lang};
printf HTML "%d (%d%%) | ", $color_u, $po_untranslated{$domain}{$lang}, $percent_po_u{$domain}{$lang};
printf HTML "%d | ", $po_total{$domain};
print HTML "
\n";
}
print HTML "| | | | | |
| Total: | ";
$color_t = get_color ($percent_po_t{'total'}{$lang});
$color_f = get_color (100 - $percent_po_f{'total'}{$lang});
$color_u = get_color (100 - $percent_po_u{'total'}{$lang});
printf HTML "%d (%d%%) | ", $color_t, $po_translated{'total'}{$lang}, $percent_po_t{'total'}{$lang};
printf HTML "%d (%d%%) | ", $color_f, $po_fuzzy{'total'}{$lang}, $percent_po_f{'total'}{$lang};
printf HTML "%d (%d%%) | ", $color_u, $po_untranslated{'total'}{$lang}, $percent_po_u{'total'}{$lang};
printf HTML "%d | ", $po_total{'total'};
print HTML "
\n";
}
print HTML "
\n";
# outputs footer
print HTML "
Compiled at $date\n";
print HTML "";
close (HTML);
}
}
print "\n" if ($config{'verbose'});
# =============== Creating index.html ===============
print "Creating index.html... " if ($config{'verbose'});
open (HTML, ">$config{'htmldir'}/index.html")
|| die "Can't open $config{'htmldir'}/index.html";
printf HTML "\n%s\n\n", $config{'title'};
printf HTML "%s
\n", $config{'title'};
print HTML "Translated web pages
\n";
printf HTML "There are %d pages to translate.
\n",($wml{'english'}+$untranslated{'english'});
print HTML $border_head;
print HTML "\n";
print HTML "| Language | Translations | Up to date | Outdated | Not translated |
\n";
foreach $lang (@search_in) {
$l = $langs{$lang};
$l = "zh-cn" if ($l eq "zh"); # kludge
$color_a = get_color ($percent_a{$lang});
$color_t = get_color ($percent_t{$lang});
$color_o = get_color (100 - $percent_o{$lang});
$color_u = get_color (100 - $percent_u{$lang});
print HTML "";
printf HTML "| %s (%s) | ", $l, ucfirst $lang, $l;
printf HTML "%d (%d%%) | ", $color_a, $wml{$lang}, $percent_a{$lang};
printf HTML "%d (%d%%) | ", $color_t, $translated{$lang}, $percent_t{$lang};
printf HTML "%d (%d%%) | ", $color_o, $outdated{$lang}, $percent_o{$lang};
printf HTML "%d (%d%%) | ", $color_u, $untranslated{$lang}, $percent_u{$lang};
print HTML "
\n",
}
print HTML "
\n";
print HTML $border_foot;
print HTML "Translated templates (gettext files)
\n";
printf HTML "There are %d strings to translate.
\n",$po_total{'total'};
print HTML $border_head;
print HTML "\n";
print HTML "| Language | Up to date | Fuzzy | Not translated |
\n";
foreach $lang (@search_in) {
next if $lang eq 'english';
$l = $langs{$lang};
$l = "zh-cn" if ($l eq "zh"); # kludge
print HTML "";
printf HTML "| %s (%s) | ", $l, ucfirst $lang, $l;
$color_t = get_color ($percent_po_t{'total'}{$lang});
$color_f = get_color (100 - $percent_po_f{'total'}{$lang});
$color_u = get_color (100 - $percent_po_u{'total'}{$lang});
printf HTML "%d (%d%%) | ", $color_t, $po_translated{'total'}{$lang}, $percent_po_t{'total'}{$lang};
printf HTML "%d (%d%%) | ", $color_f, $po_fuzzy{'total'}{$lang}, $percent_po_f{'total'}{$lang};
printf HTML "%d (%d%%) | ", $color_u, $po_untranslated{'total'}{$lang}, $percent_po_u{'total'}{$lang};
print HTML "
\n";
}
print HTML "
\n";
print HTML $border_foot;
print HTML "
\n";
print HTML "Created with webwml-stattrans at $date\n";
print HTML "\n";
close (HTML);
print "done.\n" if ($config{'verbose'});
# Note:
# Translated pages on ll.html may be higher than in index.html.
# This is due to the fact that some english pages were removed.
# printf "%s\n", join ("\n", keys %version);
# printf "%s - %s\n", $version{'german/devel/index'}, $version{'english/devel/index'};