| 1 |
#!/usr/bin/env python
|
| 2 |
# -*- coding: utf-8 -*-
|
| 3 |
|
| 4 |
"""
|
| 5 |
This script imports informations about translated applications
|
| 6 |
inside Debian packages.
|
| 7 |
"""
|
| 8 |
|
| 9 |
from aux import quote
|
| 10 |
from gatherer import gatherer
|
| 11 |
import re
|
| 12 |
from debian_bundle import deb822
|
| 13 |
from os import stat
|
| 14 |
from sys import stderr, exit
|
| 15 |
from filecmp import cmp
|
| 16 |
import gzip
|
| 17 |
# import bz2
|
| 18 |
from psycopg2 import IntegrityError, InternalError
|
| 19 |
|
| 20 |
debug=0
|
| 21 |
|
| 22 |
check_char_re = re.compile('&#[0-9][0-9][0-9];')
|
| 23 |
parse_translation_status_re = re.compile('^(\d+)t(\d+)f(\d+)u$')
|
| 24 |
|
| 25 |
def replace_special_char(string):
|
| 26 |
if not check_char_re.search(string):
|
| 27 |
return string
|
| 28 |
parts = string.split('&#')
|
| 29 |
newstring = ''
|
| 30 |
for p in parts:
|
| 31 |
q = p.split(';')
|
| 32 |
if len(q) > 1:
|
| 33 |
newstring += unichr(int(q[0])) + q[1]
|
| 34 |
else:
|
| 35 |
newstring += q[0]
|
| 36 |
return newstring.encode('utf-8')
|
| 37 |
|
| 38 |
def get_gatherer(connection, config, source):
|
| 39 |
return i18n_apps_gatherer(connection, config, source)
|
| 40 |
|
| 41 |
class pkg_info():
|
| 42 |
def __init__(self, package, release):
|
| 43 |
self.package = package
|
| 44 |
self.release = release
|
| 45 |
self.version = ''
|
| 46 |
self.maintainer = ''
|
| 47 |
|
| 48 |
def __str__(self):
|
| 49 |
return "Package %s: %s, %s\n%s" % \
|
| 50 |
(self.package, self.maintainer, self.version)
|
| 51 |
|
| 52 |
class po_info():
|
| 53 |
def __init__(self, poline):
|
| 54 |
po = poline.strip().split('!')
|
| 55 |
# ignore .pot and .templates files
|
| 56 |
if not po[0].endswith('.po'):
|
| 57 |
# or po[1].startswith('_') :
|
| 58 |
self.infofields = 0
|
| 59 |
return
|
| 60 |
# Keep track of the number of information fields given for a po files
|
| 61 |
# In case there are more than one po file in a package just take the
|
| 62 |
# one containing more information
|
| 63 |
self.infofields = len(po)
|
| 64 |
self.po_file = po[0]
|
| 65 |
self.language = po[1]
|
| 66 |
if len(self.language) < 2:
|
| 67 |
print >>stderr, "Invalid language '%s'. Po filename is %s." % (self.language, self.po_file)
|
| 68 |
self.infofields = 0
|
| 69 |
return
|
| 70 |
match = parse_translation_status_re.match(po[2])
|
| 71 |
if not match:
|
| 72 |
self.translated = 'NULL'
|
| 73 |
self.fuzzy = 'NULL'
|
| 74 |
self.untranslated = 'NULL'
|
| 75 |
else:
|
| 76 |
self.translated = match.groups()[0]
|
| 77 |
self.fuzzy = match.groups()[1]
|
| 78 |
self.untranslated = match.groups()[2]
|
| 79 |
self.pkg_version_lang = po[3] # Meaning is unclear
|
| 80 |
|
| 81 |
# sometimes language translation team is missing
|
| 82 |
if self.infofields < 6:
|
| 83 |
self.language_team = 'NULL'
|
| 84 |
else:
|
| 85 |
self.language_team = replace_special_char(po[5])
|
| 86 |
if self.infofields == 4:
|
| 87 |
self.last_translator = 'NULL'
|
| 88 |
else:
|
| 89 |
self.last_translator = replace_special_char(po[4])
|
| 90 |
|
| 91 |
def __str__(self):
|
| 92 |
return "Package %s: %s, %s\n%s" % \
|
| 93 |
(self.infofields, self.language, self.po_file, self.last_translator)
|
| 94 |
|
| 95 |
def __cmp__(self, other):
|
| 96 |
return self.infofields - other.infofields
|
| 97 |
|
| 98 |
class i18n_apps_gatherer(gatherer):
|
| 99 |
|
| 100 |
def __init__(self, connection, config, source):
|
| 101 |
gatherer.__init__(self, connection, config, source)
|
| 102 |
self.assert_my_config('path', 'files', 'table_apps', 'table_debconf')
|
| 103 |
my_config = self.my_config
|
| 104 |
|
| 105 |
cur = self.cursor()
|
| 106 |
# create prepared statements here!
|
| 107 |
query = """PREPARE %s_insert
|
| 108 |
(text, text, text, text, text, text, text, text, text, int, int, int)
|
| 109 |
AS INSERT INTO %s
|
| 110 |
(package, version, release, maintainer, po_file, language,
|
| 111 |
pkg_version_lang, last_translator, language_team,
|
| 112 |
translated, fuzzy, untranslated)
|
| 113 |
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)"""
|
| 114 |
cur.execute(query % (my_config['table_apps'], my_config['table_apps']))
|
| 115 |
cur.execute(query % (my_config['table_debconf'], my_config['table_debconf']))
|
| 116 |
|
| 117 |
pkg = None
|
| 118 |
|
| 119 |
def parse_po_infoline(self, po_type, data):
|
| 120 |
cur = self.cursor()
|
| 121 |
|
| 122 |
if po_type == 'PO':
|
| 123 |
target_table = self.my_config['table_apps']
|
| 124 |
elif po_type == 'PODEBCONF':
|
| 125 |
target_table = self.my_config['table_debconf']
|
| 126 |
else:
|
| 127 |
print >>stderr, "Wrong PO type %s ignored." % po_type
|
| 128 |
return
|
| 129 |
|
| 130 |
po_info_dict = {}
|
| 131 |
for poline in data[po_type].split("\n"):
|
| 132 |
# ignore first empty line
|
| 133 |
if len(poline) <= 1:
|
| 134 |
continue
|
| 135 |
poinfo = po_info(poline)
|
| 136 |
if poinfo.infofields == 0:
|
| 137 |
continue
|
| 138 |
# Sometimes there is more than one po file in a package. We inject the file
|
| 139 |
# which contains better info about translator
|
| 140 |
# Attention: For the current application it is completely sufficient that we
|
| 141 |
# keep the information *that* a package contains translation for
|
| 142 |
# a certain package in UDD. Other applications might need more
|
| 143 |
# complete information.
|
| 144 |
if po_info_dict.has_key(poinfo.language):
|
| 145 |
po_info_dict[poinfo.language] = max(po_info_dict[poinfo.language], poinfo)
|
| 146 |
else:
|
| 147 |
po_info_dict[poinfo.language] = poinfo
|
| 148 |
|
| 149 |
for lang in po_info_dict.keys():
|
| 150 |
poinfo = po_info_dict[lang]
|
| 151 |
query = "EXECUTE %s_insert (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" % \
|
| 152 |
(target_table, \
|
| 153 |
quote(self.pkg.package), quote(self.pkg.version), quote(self.pkg.release), \
|
| 154 |
quote(self.pkg.maintainer), quote(poinfo.po_file), quote(poinfo.language), \
|
| 155 |
quote(poinfo.pkg_version_lang), \
|
| 156 |
quote(poinfo.last_translator), quote(poinfo.language_team), \
|
| 157 |
poinfo.translated, poinfo.fuzzy, poinfo.untranslated)
|
| 158 |
try:
|
| 159 |
cur.execute(query)
|
| 160 |
except IntegrityError, err:
|
| 161 |
print str(err).strip()
|
| 162 |
print len(po), po, poline, self.pkg
|
| 163 |
except InternalError, err:
|
| 164 |
print "InternalError:", err
|
| 165 |
print len(po), po, poline, self.pkg, po_type
|
| 166 |
print query
|
| 167 |
exit(-1)
|
| 168 |
except UnicodeEncodeError, err:
|
| 169 |
print err
|
| 170 |
print query
|
| 171 |
|
| 172 |
def run(self):
|
| 173 |
my_config = self.my_config
|
| 174 |
#start harassing the DB, preparing the final inserts and making place
|
| 175 |
#for the new data:
|
| 176 |
cur = self.cursor()
|
| 177 |
|
| 178 |
releases=my_config['releases'].split(' ')
|
| 179 |
|
| 180 |
# verify whether input files are properly downloaded
|
| 181 |
for rel in releases:
|
| 182 |
file = my_config['path']+'/'+rel+'.gz'
|
| 183 |
statinfo = stat(file)
|
| 184 |
if not statinfo or statinfo[6] < 1:
|
| 185 |
print >>stderr, "File %s for release %s does not exist or is empty" % (file, rel)
|
| 186 |
exit
|
| 187 |
# print "File %s has %i bytes" % ( file, statinfo[6] )
|
| 188 |
# Clean up tables
|
| 189 |
query = "TRUNCATE %s; TRUNCATE %s;" % ( my_config['table_apps'], my_config['table_debconf'])
|
| 190 |
cur.execute(query)
|
| 191 |
|
| 192 |
for rel in releases:
|
| 193 |
file = my_config['path']+'/'+rel+'.gz'
|
| 194 |
g = gzip.GzipFile(file)
|
| 195 |
try:
|
| 196 |
for stanza in deb822.Sources.iter_paragraphs(g, shared_storage=False):
|
| 197 |
self.pkg = pkg_info(stanza['Package'], rel)
|
| 198 |
# First entry is no real package but a date entry
|
| 199 |
if not stanza.has_key('Version'):
|
| 200 |
continue
|
| 201 |
# Package without language information are irrelevant
|
| 202 |
if not stanza.has_key('PO') or not stanza.has_key('PODEBCONF'):
|
| 203 |
continue
|
| 204 |
self.pkg.version = stanza['Version']
|
| 205 |
self.pkg.maintainer = stanza['Maintainer']
|
| 206 |
|
| 207 |
if stanza.has_key('PO'):
|
| 208 |
self.parse_po_infoline('PO', stanza)
|
| 209 |
if stanza.has_key('PODEBCONF'):
|
| 210 |
self.parse_po_infoline('PODEBCONF', stanza)
|
| 211 |
|
| 212 |
except IOError, err:
|
| 213 |
print >>stderr, "Error reading %s (%s)" % (file, err)
|
| 214 |
|
| 215 |
cur.execute("ANALYZE %s" % my_config['table_apps'])
|
| 216 |
cur.execute("ANALYZE %s" % my_config['table_debconf'])
|
| 217 |
|
| 218 |
if __name__ == '__main__':
|
| 219 |
main()
|
| 220 |
|
| 221 |
# vim:set et tabstop=2:
|
| 222 |
|