/[collab-qa]/udd/udd/i18n_apps_gatherer.py
ViewVC logotype

Contents of /udd/udd/i18n_apps_gatherer.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1723 - (show annotations) (download) (as text)
Sun Mar 14 19:46:59 2010 UTC (3 years, 2 months ago) by tille
File MIME type: text/x-python
File size: 7693 byte(s)
Fix parsing problem in ftpnew, make i18n-apps usable
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 """
5 This script imports informations about translated applications
6 inside Debian packages.
7 """
8
9 from aux import quote
10 from gatherer import gatherer
11 import re
12 from debian_bundle import deb822
13 from os import stat
14 from sys import stderr, exit
15 from filecmp import cmp
16 import gzip
17 # import bz2
18 from psycopg2 import IntegrityError, InternalError
19
20 debug=0
21
22 check_char_re = re.compile('&#[0-9][0-9][0-9];')
23 parse_translation_status_re = re.compile('^(\d+)t(\d+)f(\d+)u$')
24
25 def replace_special_char(string):
26 if not check_char_re.search(string):
27 return string
28 parts = string.split('&#')
29 newstring = ''
30 for p in parts:
31 q = p.split(';')
32 if len(q) > 1:
33 newstring += unichr(int(q[0])) + q[1]
34 else:
35 newstring += q[0]
36 return newstring.encode('utf-8')
37
38 def get_gatherer(connection, config, source):
39 return i18n_apps_gatherer(connection, config, source)
40
41 class pkg_info():
42 def __init__(self, package, release):
43 self.package = package
44 self.release = release
45 self.version = ''
46 self.maintainer = ''
47
48 def __str__(self):
49 return "Package %s: %s, %s\n%s" % \
50 (self.package, self.maintainer, self.version)
51
52 class po_info():
53 def __init__(self, poline):
54 po = poline.strip().split('!')
55 # ignore .pot and .templates files
56 if not po[0].endswith('.po'):
57 # or po[1].startswith('_') :
58 self.infofields = 0
59 return
60 # Keep track of the number of information fields given for a po files
61 # In case there are more than one po file in a package just take the
62 # one containing more information
63 self.infofields = len(po)
64 self.po_file = po[0]
65 self.language = po[1]
66 if len(self.language) < 2:
67 print >>stderr, "Invalid language '%s'. Po filename is %s." % (self.language, self.po_file)
68 self.infofields = 0
69 return
70 match = parse_translation_status_re.match(po[2])
71 if not match:
72 self.translated = 'NULL'
73 self.fuzzy = 'NULL'
74 self.untranslated = 'NULL'
75 else:
76 self.translated = match.groups()[0]
77 self.fuzzy = match.groups()[1]
78 self.untranslated = match.groups()[2]
79 self.pkg_version_lang = po[3] # Meaning is unclear
80
81 # sometimes language translation team is missing
82 if self.infofields < 6:
83 self.language_team = 'NULL'
84 else:
85 self.language_team = replace_special_char(po[5])
86 if self.infofields == 4:
87 self.last_translator = 'NULL'
88 else:
89 self.last_translator = replace_special_char(po[4])
90
91 def __str__(self):
92 return "Package %s: %s, %s\n%s" % \
93 (self.infofields, self.language, self.po_file, self.last_translator)
94
95 def __cmp__(self, other):
96 return self.infofields - other.infofields
97
98 class i18n_apps_gatherer(gatherer):
99
100 def __init__(self, connection, config, source):
101 gatherer.__init__(self, connection, config, source)
102 self.assert_my_config('path', 'files', 'table_apps', 'table_debconf')
103 my_config = self.my_config
104
105 cur = self.cursor()
106 # create prepared statements here!
107 query = """PREPARE %s_insert
108 (text, text, text, text, text, text, text, text, text, int, int, int)
109 AS INSERT INTO %s
110 (package, version, release, maintainer, po_file, language,
111 pkg_version_lang, last_translator, language_team,
112 translated, fuzzy, untranslated)
113 VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)"""
114 cur.execute(query % (my_config['table_apps'], my_config['table_apps']))
115 cur.execute(query % (my_config['table_debconf'], my_config['table_debconf']))
116
117 pkg = None
118
119 def parse_po_infoline(self, po_type, data):
120 cur = self.cursor()
121
122 if po_type == 'PO':
123 target_table = self.my_config['table_apps']
124 elif po_type == 'PODEBCONF':
125 target_table = self.my_config['table_debconf']
126 else:
127 print >>stderr, "Wrong PO type %s ignored." % po_type
128 return
129
130 po_info_dict = {}
131 for poline in data[po_type].split("\n"):
132 # ignore first empty line
133 if len(poline) <= 1:
134 continue
135 poinfo = po_info(poline)
136 if poinfo.infofields == 0:
137 continue
138 # Sometimes there is more than one po file in a package. We inject the file
139 # which contains better info about translator
140 # Attention: For the current application it is completely sufficient that we
141 # keep the information *that* a package contains translation for
142 # a certain package in UDD. Other applications might need more
143 # complete information.
144 if po_info_dict.has_key(poinfo.language):
145 po_info_dict[poinfo.language] = max(po_info_dict[poinfo.language], poinfo)
146 else:
147 po_info_dict[poinfo.language] = poinfo
148
149 for lang in po_info_dict.keys():
150 poinfo = po_info_dict[lang]
151 query = "EXECUTE %s_insert (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" % \
152 (target_table, \
153 quote(self.pkg.package), quote(self.pkg.version), quote(self.pkg.release), \
154 quote(self.pkg.maintainer), quote(poinfo.po_file), quote(poinfo.language), \
155 quote(poinfo.pkg_version_lang), \
156 quote(poinfo.last_translator), quote(poinfo.language_team), \
157 poinfo.translated, poinfo.fuzzy, poinfo.untranslated)
158 try:
159 cur.execute(query)
160 except IntegrityError, err:
161 print str(err).strip()
162 print len(po), po, poline, self.pkg
163 except InternalError, err:
164 print "InternalError:", err
165 print len(po), po, poline, self.pkg, po_type
166 print query
167 exit(-1)
168 except UnicodeEncodeError, err:
169 print err
170 print query
171
172 def run(self):
173 my_config = self.my_config
174 #start harassing the DB, preparing the final inserts and making place
175 #for the new data:
176 cur = self.cursor()
177
178 releases=my_config['releases'].split(' ')
179
180 # verify whether input files are properly downloaded
181 for rel in releases:
182 file = my_config['path']+'/'+rel+'.gz'
183 statinfo = stat(file)
184 if not statinfo or statinfo[6] < 1:
185 print >>stderr, "File %s for release %s does not exist or is empty" % (file, rel)
186 exit
187 # print "File %s has %i bytes" % ( file, statinfo[6] )
188 # Clean up tables
189 query = "TRUNCATE %s; TRUNCATE %s;" % ( my_config['table_apps'], my_config['table_debconf'])
190 cur.execute(query)
191
192 for rel in releases:
193 file = my_config['path']+'/'+rel+'.gz'
194 g = gzip.GzipFile(file)
195 try:
196 for stanza in deb822.Sources.iter_paragraphs(g, shared_storage=False):
197 self.pkg = pkg_info(stanza['Package'], rel)
198 # First entry is no real package but a date entry
199 if not stanza.has_key('Version'):
200 continue
201 # Package without language information are irrelevant
202 if not stanza.has_key('PO') or not stanza.has_key('PODEBCONF'):
203 continue
204 self.pkg.version = stanza['Version']
205 self.pkg.maintainer = stanza['Maintainer']
206
207 if stanza.has_key('PO'):
208 self.parse_po_infoline('PO', stanza)
209 if stanza.has_key('PODEBCONF'):
210 self.parse_po_infoline('PODEBCONF', stanza)
211
212 except IOError, err:
213 print >>stderr, "Error reading %s (%s)" % (file, err)
214
215 cur.execute("ANALYZE %s" % my_config['table_apps'])
216 cur.execute("ANALYZE %s" % my_config['table_debconf'])
217
218 if __name__ == '__main__':
219 main()
220
221 # vim:set et tabstop=2:
222

  ViewVC Help
Powered by ViewVC 1.1.5