#!/usr/bin/perl -w use strict; use utf8; =head1 NAME dl10n-spider -- crawl translator mailing lists (and BTS) for status updates =head1 SYNOPSIS dl10n-spider [options] lang+ =head1 DESCRIPTION This script parses the debian-l10n-ElanguageE mailing list archives. It looks for emails which title follow a specific format indicating what the author intend to translate, or the current status of his work on this translation. Those informations are saved to a dl10n database which can then be used to build a l10n coordination page or any other useless statistics. =cut use Getopt::Long; #to parse the args use LWP::UserAgent; use Debian::L10n::Html; use File::Path; use POSIX qw(strftime); my $progname = $0; $progname = $1 if $progname =~ m,([^/])+$,; my $VERSION = "4.0"; # External Version Number my $BANNER = "Debian l10n infrastructure -- mailing list spider v$VERSION"; # Version Banner - text form my $cmdline_year = undef; my $cmdline_month = undef; my $cmdline_msg = undef; my $cmdline_file = undef; my %Language = ( ar => 'arabic', ca => 'catalan', de => 'german', en => 'english', es => 'spanish', fr => 'french', nl => 'dutch', pt => 'portuguese', tr => 'turkish', all => 'all', ); =head1 Command line option parsing =over4 =item General options: =over4 =item -h, --help display short help text =item -V, --version display version and exit =back =item Begin point of the crawling: =over4 =item --year=YYYY =item --month=MM =item --message=msg =back if not specified, will crawl for new messages. =item Database to fill: =over4 =item --sdb=STATUS_FILE use STATUS_FILE as status file (instead of $STATUS_FILE) =back =back =cut # This is put into a block to avoid main namespace pollution { sub syntax_message { my $message = shift; if (defined $message) { print "$progname: $message\n"; } else { print "$BANNER\n"; } print < \&syntax_msg, "version|V" => \&banner, # ------------------ configuration options "sdb=s" => \$cmdline_file, ); # init commandline parser Getopt::Long::config('bundling', 'no_getopt_compat', 'no_auto_abbrev'); # process commandline options GetOptions(%opthash) or syntax_msg("error parsing options"); } my $lang = $ARGV[0]; my $language = $Language{$lang}; Html::html($cmdline_file, $lang); { my $head = < Coordination of debian-l10n-$language

Debian Project

Coordination of debian-l10n-$language

This page is made to aid the coordination of translating debian related text to $language. As documented here, translators and reviewers use pseudo-urls in the subject of e-mails to the debian-l10n-$language list for coordination.

A program parses these pseudo-urls and collects the relevant data, which are then displayed below.

EOF ; my $date = strftime('%a, %d %b %Y %H:%M:%S %z', gmtime); my $tail = <

Comments: Thomas Huriaux

Generated on $date

EOF ; opendir D, './include' or die "Cannot open .: $!"; my @files = readdir D; closedir D; mkpath ("html/include", 02775) or die "Cannot create include directory\n" unless (-d "html/include"); foreach (grep (/^$lang\./, @files)) { next unless /\.inc$/; s/\.inc$//; open I, "; close I; open I, ">html/include/$_.inc" or die "Cannot open $_.inc $_"; print I @inc; close I; open H, ">html/$Language{$lang}/$_.html" or die "Cannot open $_.html: $_"; print H $head; print H @inc; print H $tail; close H; } } =head1 LICENSE This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. # You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. =head1 COPYRIGHT (C) 2003,2004 Tim Dijkstra 2004 Nicolas Bertolissio 2004 Martin Quinson =cut 1;