/[collab-qa]/udd/udd/packages_gatherer.py
ViewVC logotype

Contents of /udd/udd/packages_gatherer.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1243 - (show annotations) (download) (as text)
Mon Sep 8 09:18:54 2008 UTC (4 years, 8 months ago) by lucas
File MIME type: text/x-python
File size: 7908 byte(s)
added foreign keys, adapted import scripts to allow to use them. DB udd on udd.debian.net was updated manually.
1 # /usr/bin/env python
2 # Last-Modified: <Sun Aug 17 12:24:40 2008>
3 # This file is a part of the Ultimate Debian Database project
4
5 import debian_bundle.deb822
6 import gzip
7 import os
8 import sys
9 import aux
10 import tempfile
11 from aux import ConfigException
12 import psycopg2
13 from gatherer import gatherer
14
15 def get_gatherer(connection, config, source):
16 return packages_gatherer(connection, config, source)
17
18 class packages_gatherer(gatherer):
19 "This class imports the data from Packages.gz files into the database"
20 # For efficiency, these are dictionaries
21 # mandatory: list of fields which each package has to provide
22 # non_mandatory: list of fields which are possibly provided by packages
23 # ignorable: fields which are not useful for the database,
24 # but for which no warning should be printed
25 mandatory = {'Package': 0, 'Version': 0, 'Architecture': 0, 'Maintainer': 0,
26 'Description': 0}
27 non_mandatory = {'Source': 0, 'Essential': 0, 'Depends': 0, 'Recommends': 0,
28 'Suggests': 0, 'Enhances': 0, 'Pre-Depends': 0, 'Installed-Size': 0,
29 'Homepage': 0, 'Size': 0, 'Build-Essential':0, 'Origin':0,
30 'SHA1':0, 'Replaces':0, 'Section':0, 'MD5sum':0, 'Bugs':0, 'Priority':0,
31 'Tag':0, 'Task':0, 'Python-Version':0, 'Provides':0, 'Conflicts':0,
32 'SHA256':0, 'Original-Maintainer':0}
33 ignorable = {'Filename':0}
34
35 warned_about = {}
36 # A mapping from <package-name><version> to 1 If <package-name><version> is
37 # included in this dictionary, this means, that we've already added this
38 # package with this version for architecture 'all' to the database. Needed
39 # because different architectures include packages for architecture 'all'
40 # with the same version, and we don't want these duplicate entries
41 imported_all_pkgs = {}
42
43 def __init__(self, connection, config, source):
44 gatherer.__init__(self, connection, config, source)
45 # The ID for the distribution we want to include
46 self._distr = None
47 self.assert_my_config('directory', 'archs', 'release', 'components', 'distribution', 'packages-table', 'packages-schema')
48
49 def build_dict(self, control):
50 """Build a dictionary from the control dictionary.
51
52 Influenced by class variables mandatory, non_mandatory and ignorable"""
53 d = {}
54 for k in packages_gatherer.mandatory:
55 if k not in control:
56 raise "Mandatory field %s not specified" % k
57 d[k] = control[k]
58 for k in packages_gatherer.non_mandatory:
59 if k not in control:
60 d[k] = None
61 else:
62 d[k] = control[k]
63 for k in control.keys():
64 if k not in packages_gatherer.non_mandatory and k not in packages_gatherer.mandatory and k not in packages_gatherer.ignorable:
65 if k not in packages_gatherer.warned_about:
66 packages_gatherer.warned_about[k] = 1
67 else:
68 packages_gatherer.warned_about[k] += 1
69 return d
70
71 def import_packages(self, sequence, cur):
72 """Import the packages from the sequence into the database-connection
73 conn.
74
75 Sequence has to have an iterator interface, that yields a line every time
76 it is called.The Format of the sequence is expected to be that of a
77 debian packages file."""
78 # The fields that are to be read. Other fields are ignored
79 for control in debian_bundle.deb822.Packages.iter_paragraphs(sequence):
80 # Check whether packages with architectue 'all' have already been
81 # imported
82 if control['Architecture'] == 'all':
83 t = control['Package'] + control['Version']
84 if t in packages_gatherer.imported_all_pkgs:
85 continue
86 packages_gatherer.imported_all_pkgs[t] = 1
87
88 d = self.build_dict(control)
89
90 # We just use the first line of the description
91 if 'Description' in d:
92 d['Description'] = d['Description'].split("\n",1)[0]
93
94 # Convert numbers to numbers
95 for f in ['Installed-Size', 'Size']:
96 if d[f] is not None:
97 d[f] = int(d[f])
98
99 # Source is non-mandatory, but we don't want it to be NULL
100 if d['Source'] is None:
101 d['Source'] = d['Package']
102 d['Source_Version'] = d['Version']
103 else:
104 split = d['Source'].strip("'").split()
105 if len(split) == 1:
106 d['Source_Version'] = d['Version']
107 else:
108 d['Source'] = aux.quote(split[0])
109 d['Source_Version'] = aux.quote(split[1].strip("()"))
110
111 query = """EXECUTE package_insert
112 (%(Package)s, %(Version)s, %(Architecture)s, %(Maintainer)s,
113 %(Description)s, %(Source)s, %(Source_Version)s, %(Essential)s,
114 %(Depends)s, %(Recommends)s, %(Suggests)s, %(Enhances)s,
115 %(Pre-Depends)s, %(Installed-Size)s, %(Homepage)s, %(Size)s,
116 %(Build-Essential)s, %(Origin)s, %(SHA1)s,
117 %(Replaces)s, %(Section)s, %(MD5sum)s, %(Bugs)s, %(Priority)s,
118 %(Tag)s, %(Task)s, %(Python-Version)s, %(Provides)s,
119 %(Conflicts)s, %(SHA256)s, %(Original-Maintainer)s)"""
120 try:
121 cur.execute(query, d)
122 except psycopg2.ProgrammingError:
123 print query
124 raise
125
126 def setup(self):
127 if 'schema-dir' in self.config['general']:
128 schema_dir = self.config['general']['schema-dir']
129 if 'packages-schema' in self.my_config:
130 schema = schema_dir + '/' + self.my_config['packages-schema']
131 self.eval_sql_file(schema, self.my_config)
132 else:
133 raise Exception("'packages-schema' not specified for source " + self.source)
134 else:
135 raise Exception("'schema-dir' not specified")
136
137 def tables(self):
138 return [
139 self.my_config['packages-table'],
140 self.my_config['packages-table'] + '_summary']
141
142 def run(self):
143 src_cfg = self.my_config
144
145 aux.debug = self.config['general']['debug']
146 table = src_cfg['packages-table']
147
148 # Get distribution ID
149 self._distr = src_cfg['distribution']
150
151 cur = self.cursor()
152 # defer constraints checking until the end of the transaction
153 cur.execute("SET CONSTRAINTS ALL DEFERRED")
154
155 # For every part and every architecture, import the packages into the DB
156 for comp in src_cfg['components']:
157 cur.execute("DELETE FROM %s WHERE distribution = '%s' AND release = '%s' AND component = '%s'" %\
158 (table, self._distr, src_cfg['release'], comp))
159 for arch in src_cfg['archs']:
160 path = os.path.join(src_cfg['directory'], comp, 'binary-' + arch, 'Packages.gz')
161 try:
162 cur.execute("""PREPARE package_insert AS INSERT INTO %s
163 (Package, Version, Architecture, Maintainer, Description, Source,
164 Source_Version, Essential, Depends, Recommends, Suggests, Enhances,
165 Pre_Depends, Installed_Size, Homepage, Size,
166 build_essential, origin, sha1, replaces, section,
167 md5sum, bugs, priority, tag, task, python_version,
168 provides, conflicts, sha256, original_maintainer,
169 Distribution, Release, Component)
170 VALUES
171 ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15,
172 $16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26, $27, $28,
173 $29, $30, $31, '%s', '%s', '%s')
174 """ % (table, self._distr, src_cfg['release'], comp))
175 aux.print_debug("Reading file " + path)
176 # Copy content from gzipped file to temporary file, so that apt_pkg is
177 # used by debian_bundle
178 tmp = tempfile.NamedTemporaryFile()
179 file = gzip.open(path)
180 tmp.write(file.read())
181 file.close()
182 tmp.seek(0)
183 aux.print_debug("Importing from " + path)
184 self.import_packages(open(tmp.name), cur)
185 tmp.close()
186 except IOError, (e, message):
187 print "Could not read packages from %s: %s" % (path, message)
188 cur.execute("DEALLOCATE package_insert")
189 # Fill the summary tables
190 cur.execute("DELETE FROM %s" % (table + '_summary'));
191 cur.execute("""INSERT INTO %s SELECT DISTINCT ON (package, version,
192 distribution, release, component) package, version, source,
193 source_version, maintainer, distribution, release, component FROM %s""" %
194 (table + '_summary', table));
195
196 self.print_warnings()
197
198 def print_warnings(self):
199 for key in packages_gatherer.warned_about:
200 print("Unknown key: %s appeared %d times" % (key, packages_gatherer.warned_about[key]))

  ViewVC Help
Powered by ViewVC 1.1.5