/[collab-qa]/udd/udd/packages_gatherer.py
ViewVC logotype

Contents of /udd/udd/packages_gatherer.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1615 - (show annotations) (download) (as text)
Tue Nov 3 06:18:26 2009 UTC (3 years, 6 months ago) by lucas
File MIME type: text/x-python
File size: 9602 byte(s)
fix problem when importing several packages sources in the same run, and packages have the same name+version in different sources
1 # /usr/bin/env python
2 # Last-Modified: <Sun Aug 17 12:24:40 2008>
3 # This file is a part of the Ultimate Debian Database project
4
5 import debian_bundle.deb822
6 import gzip
7 import os
8 import sys
9 import aux
10 import tempfile
11 from aux import ConfigException
12 import psycopg2
13 from gatherer import gatherer
14 import email.Utils
15 import re
16
17 def get_gatherer(connection, config, source):
18 return packages_gatherer(connection, config, source)
19
20 class packages_gatherer(gatherer):
21 "This class imports the data from Packages.gz files into the database"
22 # For efficiency, these are dictionaries
23 # mandatory: list of fields which each package has to provide
24 # non_mandatory: list of fields which are possibly provided by packages
25 # ignorable: fields which are not useful for the database,
26 # but for which no warning should be printed
27 mandatory = {'Package': 0, 'Version': 0, 'Architecture': 0, 'Maintainer': 0,
28 'Description': 0}
29 non_mandatory = {'Source': 0, 'Essential': 0, 'Depends': 0, 'Recommends': 0,
30 'Suggests': 0, 'Enhances': 0, 'Pre-Depends': 0, 'Breaks':0, 'Installed-Size': 0,
31 'Homepage': 0, 'Size': 0, 'Build-Essential':0, 'Origin':0,
32 'SHA1':0, 'Replaces':0, 'Section':0, 'MD5sum':0, 'Bugs':0, 'Priority':0,
33 'Tag':0, 'Task':0, 'Python-Version':0, 'Provides':0, 'Conflicts':0,
34 'SHA256':0, 'Original-Maintainer':0}
35 ignorable = {'Filename':0, 'Npp-Filename':0, 'Npp-Name':0, 'Npp-Mimetype':0, 'Npp-Applications':0, 'Python-Runtime':0, 'Npp-File':0, 'Npp-Description':0, 'Url':0, 'Gstreamer-Elements':0, 'Gstreamer-Version':0, 'Gstreamer-Decoders':0, 'Gstreamer-Uri-Sinks':0, 'Gstreamer-Encoders':0, 'Gstreamer-Uri-Sources':0, 'url':0, 'Vdr-PatchLevel':0, 'Vdr-Patchlevel':0, 'originalmaintainer':0, 'Originalmaintainer':0, 'Build-Recommends':0, 'Multi-Arch':0, 'Maintainer-Homepage':0, 'Tads2-Version':0, 'Tads3-Version':0 }
36 ignorable_re = re.compile("^(Orig-|Original-|Origianl-|Orginal-|Orignal-|Orgiinal-|Debian-|X-Original-|Upstream-)")
37
38
39 def __init__(self, connection, config, source):
40 gatherer.__init__(self, connection, config, source)
41 # The ID for the distribution we want to include
42 self._distr = None
43 self.assert_my_config('directory', 'archs', 'release', 'components', 'distribution', 'packages-table', 'packages-schema')
44 self.warned_about = {}
45 # A mapping from <package-name><version> to 1 If <package-name><version> is
46 # included in this dictionary, this means, that we've already added this
47 # package with this version for architecture 'all' to the database. Needed
48 # because different architectures include packages for architecture 'all'
49 # with the same version, and we don't want these duplicate entries
50 self.imported_all_pkgs = {}
51
52 def build_dict(self, control):
53 """Build a dictionary from the control dictionary.
54
55 Influenced by class variables mandatory, non_mandatory and ignorable"""
56 d = {}
57 for k in packages_gatherer.mandatory:
58 if k not in control:
59 raise "Mandatory field %s not specified" % k
60 d[k] = control[k]
61 for k in packages_gatherer.non_mandatory:
62 if k not in control:
63 d[k] = None
64 else:
65 d[k] = control[k]
66 for k in control.keys():
67 if k not in packages_gatherer.non_mandatory and k not in packages_gatherer.mandatory and k not in packages_gatherer.ignorable:
68 if not packages_gatherer.ignorable_re.match(k):
69 if k not in self.warned_about:
70 self.warned_about[k] = 1
71 else:
72 self.warned_about[k] += 1
73 return d
74
75 def import_packages(self, sequence, cur):
76 """Import the packages from the sequence into the database-connection
77 conn.
78
79 Sequence has to have an iterator interface, that yields a line every time
80 it is called.The Format of the sequence is expected to be that of a
81 debian packages file."""
82 pkgs = []
83 query = """EXECUTE package_insert
84 (%(Package)s, %(Version)s, %(Architecture)s, %(Maintainer)s, %(maintainer_name)s, %(maintainer_email)s,
85 %(Description)s, %(Long_Description)s, %(Source)s, %(Source_Version)s, %(Essential)s,
86 %(Depends)s, %(Recommends)s, %(Suggests)s, %(Enhances)s,
87 %(Pre-Depends)s, %(Breaks)s, %(Installed-Size)s, %(Homepage)s, %(Size)s,
88 %(Build-Essential)s, %(Origin)s, %(SHA1)s,
89 %(Replaces)s, %(Section)s, %(MD5sum)s, %(Bugs)s, %(Priority)s,
90 %(Tag)s, %(Task)s, %(Python-Version)s, %(Provides)s,
91 %(Conflicts)s, %(SHA256)s, %(Original-Maintainer)s)"""
92 # The fields that are to be read. Other fields are ignored
93 for control in debian_bundle.deb822.Packages.iter_paragraphs(sequence):
94 # Check whether packages with architectue 'all' have already been
95 # imported
96 if control['Architecture'] == 'all':
97 t = control['Package'] + control['Version']
98 if t in self.imported_all_pkgs:
99 continue
100 self.imported_all_pkgs[t] = 1
101
102 d = self.build_dict(control)
103
104 # We just use the first line of the description
105 if 'Description' in d:
106 if len(d['Description'].split("\n",1)) > 1:
107 d['Long_Description'] = d['Description'].split("\n",1)[1]
108 else:
109 d['Long_Description'] = ''
110 d['Description'] = d['Description'].split("\n",1)[0]
111
112 # Convert numbers to numbers
113 for f in ['Installed-Size', 'Size']:
114 if d[f] is not None:
115 d[f] = int(d[f])
116
117 # Source is non-mandatory, but we don't want it to be NULL
118 if d['Source'] is None:
119 d['Source'] = d['Package']
120 d['Source_Version'] = d['Version']
121 else:
122 split = d['Source'].strip("'").split()
123 if len(split) == 1:
124 d['Source_Version'] = d['Version']
125 else:
126 d['Source'] = split[0]
127 d['Source_Version'] = split[1].strip("()")
128
129 pkgs.append(d)
130
131 d['maintainer_name'], d['maintainer_email'] = email.Utils.parseaddr(d['Maintainer'])
132 try:
133 cur.executemany(query, pkgs)
134 except psycopg2.ProgrammingError:
135 print query
136 raise
137
138 def setup(self):
139 if 'schema-dir' in self.config['general']:
140 schema_dir = self.config['general']['schema-dir']
141 if 'packages-schema' in self.my_config:
142 schema = schema_dir + '/' + self.my_config['packages-schema']
143 self.eval_sql_file(schema, self.my_config)
144 else:
145 raise Exception("'packages-schema' not specified for source " + self.source)
146 else:
147 raise Exception("'schema-dir' not specified")
148
149 def tables(self):
150 return [
151 self.my_config['packages-table'],
152 self.my_config['packages-table'] + '_summary']
153
154 def run(self):
155 src_cfg = self.my_config
156
157 aux.debug = self.config['general']['debug']
158 table = src_cfg['packages-table']
159
160 # Get distribution ID
161 self._distr = src_cfg['distribution']
162
163 cur = self.cursor()
164 # defer constraints checking until the end of the transaction
165 cur.execute("SET CONSTRAINTS ALL DEFERRED")
166
167 # For every part and every architecture, import the packages into the DB
168 for comp in src_cfg['components']:
169 cur.execute("DELETE FROM %s WHERE distribution = '%s' AND release = '%s' AND component = '%s'" %\
170 (table, self._distr, src_cfg['release'], comp))
171 for arch in src_cfg['archs']:
172 path = os.path.join(src_cfg['directory'], comp, 'binary-' + arch, 'Packages.gz')
173 try:
174 cur.execute("""PREPARE package_insert AS INSERT INTO %s
175 (Package, Version, Architecture, Maintainer, maintainer_name, maintainer_email, Description, Long_Description, Source,
176 Source_Version, Essential, Depends, Recommends, Suggests, Enhances,
177 Pre_Depends, Breaks, Installed_Size, Homepage, Size,
178 build_essential, origin, sha1, replaces, section,
179 md5sum, bugs, priority, tag, task, python_version,
180 provides, conflicts, sha256, original_maintainer,
181 Distribution, Release, Component)
182 VALUES
183 ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15,
184 $16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26, $27, $28,
185 $29, $30, $31, $32, $33, $34, $35, '%s', '%s', '%s')
186 """ % (table, self._distr, src_cfg['release'], comp))
187 # aux.print_debug("Reading file " + path)
188 # Copy content from gzipped file to temporary file, so that apt_pkg is
189 # used by debian_bundle
190 tmp = tempfile.NamedTemporaryFile()
191 file = gzip.open(path)
192 tmp.write(file.read())
193 file.close()
194 tmp.seek(0)
195 # aux.print_debug("Importing from " + path)
196 self.import_packages(open(tmp.name), cur)
197 tmp.close()
198 except IOError, (e, message):
199 print "Could not read packages from %s: %s" % (path, message)
200 cur.execute("DEALLOCATE package_insert")
201 # Fill the summary tables
202 cur.execute("DELETE FROM %s" % (table + '_summary'));
203 cur.execute("""INSERT INTO %s (package, version, source, source_version,
204 maintainer, maintainer_name, maintainer_email, distribution, release, component)
205 SELECT DISTINCT ON (package, version, distribution, release, component)
206 package, version, source, source_version, maintainer, maintainer_name, maintainer_email, distribution, release, component
207 FROM %s""" % (table + '_summary', table));
208 cur.execute("DELETE FROM %s" % (table + '_distrelcomparch'));
209 cur.execute("""INSERT INTO %s
210 (distribution, release, component, architecture)
211 SELECT DISTINCT distribution, release, component, architecture
212 FROM %s""" % (table + '_distrelcomparch', table))
213
214 cur.execute("ANALYZE %s" % table)
215 cur.execute("ANALYZE %s" % table + '_summary')
216 cur.execute("ANALYZE %s" % table + '_distrelcomparch')
217
218 self.print_warnings()
219
220 def print_warnings(self):
221 for key in self.warned_about:
222 print("[Packages] Unknown key %s appeared %d times" % (key, self.warned_about[key]))

  ViewVC Help
Powered by ViewVC 1.1.5