/[collab-qa]/udd/udd/packages_gatherer.py
ViewVC logotype

Contents of /udd/udd/packages_gatherer.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1243 - (hide annotations) (download) (as text)
Mon Sep 8 09:18:54 2008 UTC (4 years, 8 months ago) by lucas
File MIME type: text/x-python
File size: 7908 byte(s)
added foreign keys, adapted import scripts to allow to use them. DB udd on udd.debian.net was updated manually.
1 neronus-guest 936 # /usr/bin/env python
2 neronus-guest 1106 # Last-Modified: <Sun Aug 17 12:24:40 2008>
3 neronus-guest 936 # This file is a part of the Ultimate Debian Database project
4 neronus-guest 855
5 neronus-guest 856 import debian_bundle.deb822
6     import gzip
7 neronus-guest 855 import os
8     import sys
9 neronus-guest 856 import aux
10 neronus-guest 868 import tempfile
11 neronus-guest 856 from aux import ConfigException
12 neronus-guest 891 import psycopg2
13 neronus-guest 901 from gatherer import gatherer
14 neronus-guest 855
15 neronus-guest 1068 def get_gatherer(connection, config, source):
16     return packages_gatherer(connection, config, source)
17 neronus-guest 892
18 neronus-guest 901 class packages_gatherer(gatherer):
19 neronus-guest 936 "This class imports the data from Packages.gz files into the database"
20 neronus-guest 901 # For efficiency, these are dictionaries
21 neronus-guest 936 # mandatory: list of fields which each package has to provide
22     # non_mandatory: list of fields which are possibly provided by packages
23     # ignorable: fields which are not useful for the database,
24     # but for which no warning should be printed
25 neronus-guest 901 mandatory = {'Package': 0, 'Version': 0, 'Architecture': 0, 'Maintainer': 0,
26     'Description': 0}
27     non_mandatory = {'Source': 0, 'Essential': 0, 'Depends': 0, 'Recommends': 0,
28     'Suggests': 0, 'Enhances': 0, 'Pre-Depends': 0, 'Installed-Size': 0,
29 lucas 1013 'Homepage': 0, 'Size': 0, 'Build-Essential':0, 'Origin':0,
30     'SHA1':0, 'Replaces':0, 'Section':0, 'MD5sum':0, 'Bugs':0, 'Priority':0,
31     'Tag':0, 'Task':0, 'Python-Version':0, 'Provides':0, 'Conflicts':0,
32     'SHA256':0, 'Original-Maintainer':0}
33     ignorable = {'Filename':0}
34 neronus-guest 891
35 neronus-guest 901 warned_about = {}
36     # A mapping from <package-name><version> to 1 If <package-name><version> is
37     # included in this dictionary, this means, that we've already added this
38     # package with this version for architecture 'all' to the database. Needed
39     # because different architectures include packages for architecture 'all'
40     # with the same version, and we don't want these duplicate entries
41     imported_all_pkgs = {}
42 neronus-guest 891
43 neronus-guest 1068 def __init__(self, connection, config, source):
44     gatherer.__init__(self, connection, config, source)
45 neronus-guest 901 # The ID for the distribution we want to include
46     self._distr = None
47 neronus-guest 1068 self.assert_my_config('directory', 'archs', 'release', 'components', 'distribution', 'packages-table', 'packages-schema')
48 neronus-guest 891
49 neronus-guest 901 def build_dict(self, control):
50     """Build a dictionary from the control dictionary.
51 neronus-guest 855
52 neronus-guest 901 Influenced by class variables mandatory, non_mandatory and ignorable"""
53     d = {}
54     for k in packages_gatherer.mandatory:
55     if k not in control:
56     raise "Mandatory field %s not specified" % k
57 neronus-guest 1032 d[k] = control[k]
58 neronus-guest 901 for k in packages_gatherer.non_mandatory:
59 neronus-guest 1032 if k not in control:
60     d[k] = None
61     else:
62     d[k] = control[k]
63 neronus-guest 901 for k in control.keys():
64 neronus-guest 1032 if k not in packages_gatherer.non_mandatory and k not in packages_gatherer.mandatory and k not in packages_gatherer.ignorable:
65 neronus-guest 901 if k not in packages_gatherer.warned_about:
66     packages_gatherer.warned_about[k] = 1
67     else:
68     packages_gatherer.warned_about[k] += 1
69     return d
70 neronus-guest 855
71 lucas 1243 def import_packages(self, sequence, cur):
72 neronus-guest 901 """Import the packages from the sequence into the database-connection
73     conn.
74 neronus-guest 887
75 neronus-guest 901 Sequence has to have an iterator interface, that yields a line every time
76     it is called.The Format of the sequence is expected to be that of a
77     debian packages file."""
78     # The fields that are to be read. Other fields are ignored
79     for control in debian_bundle.deb822.Packages.iter_paragraphs(sequence):
80     # Check whether packages with architectue 'all' have already been
81     # imported
82     if control['Architecture'] == 'all':
83     t = control['Package'] + control['Version']
84     if t in packages_gatherer.imported_all_pkgs:
85     continue
86     packages_gatherer.imported_all_pkgs[t] = 1
87 neronus-guest 891
88 neronus-guest 901 d = self.build_dict(control)
89    
90     # We just use the first line of the description
91 neronus-guest 1032 if 'Description' in d:
92 neronus-guest 901 d['Description'] = d['Description'].split("\n",1)[0]
93 neronus-guest 1032
94     # Convert numbers to numbers
95     for f in ['Installed-Size', 'Size']:
96     if d[f] is not None:
97     d[f] = int(d[f])
98 neronus-guest 901
99     # Source is non-mandatory, but we don't want it to be NULL
100 neronus-guest 1032 if d['Source'] is None:
101 neronus-guest 901 d['Source'] = d['Package']
102 neronus-guest 892 d['Source_Version'] = d['Version']
103     else:
104 neronus-guest 901 split = d['Source'].strip("'").split()
105     if len(split) == 1:
106     d['Source_Version'] = d['Version']
107     else:
108 neronus-guest 902 d['Source'] = aux.quote(split[0])
109     d['Source_Version'] = aux.quote(split[1].strip("()"))
110 neronus-guest 891
111 neronus-guest 901 query = """EXECUTE package_insert
112     (%(Package)s, %(Version)s, %(Architecture)s, %(Maintainer)s,
113     %(Description)s, %(Source)s, %(Source_Version)s, %(Essential)s,
114     %(Depends)s, %(Recommends)s, %(Suggests)s, %(Enhances)s,
115     %(Pre-Depends)s, %(Installed-Size)s, %(Homepage)s, %(Size)s,
116 lucas 1013 %(Build-Essential)s, %(Origin)s, %(SHA1)s,
117     %(Replaces)s, %(Section)s, %(MD5sum)s, %(Bugs)s, %(Priority)s,
118     %(Tag)s, %(Task)s, %(Python-Version)s, %(Provides)s,
119 neronus-guest 1032 %(Conflicts)s, %(SHA256)s, %(Original-Maintainer)s)"""
120 neronus-guest 901 try:
121 neronus-guest 1032 cur.execute(query, d)
122 neronus-guest 901 except psycopg2.ProgrammingError:
123     print query
124     raise
125 neronus-guest 891
126 neronus-guest 1068 def setup(self):
127     if 'schema-dir' in self.config['general']:
128     schema_dir = self.config['general']['schema-dir']
129     if 'packages-schema' in self.my_config:
130     schema = schema_dir + '/' + self.my_config['packages-schema']
131     self.eval_sql_file(schema, self.my_config)
132     else:
133     raise Exception("'packages-schema' not specified for source " + self.source)
134     else:
135     raise Exception("'schema-dir' not specified")
136 neronus-guest 855
137 neronus-guest 1106 def tables(self):
138     return [
139     self.my_config['packages-table'],
140     self.my_config['packages-table'] + '_summary']
141 neronus-guest 855
142 neronus-guest 1068 def run(self):
143     src_cfg = self.my_config
144    
145 neronus-guest 901 aux.debug = self.config['general']['debug']
146 neronus-guest 998 table = src_cfg['packages-table']
147 neronus-guest 891
148 neronus-guest 936 # Get distribution ID
149 neronus-guest 901 self._distr = src_cfg['distribution']
150 neronus-guest 856
151 neronus-guest 901 cur = self.cursor()
152 lucas 1243 # defer constraints checking until the end of the transaction
153     cur.execute("SET CONSTRAINTS ALL DEFERRED")
154 neronus-guest 856
155 neronus-guest 901 # For every part and every architecture, import the packages into the DB
156     for comp in src_cfg['components']:
157 neronus-guest 998 cur.execute("DELETE FROM %s WHERE distribution = '%s' AND release = '%s' AND component = '%s'" %\
158     (table, self._distr, src_cfg['release'], comp))
159 neronus-guest 901 for arch in src_cfg['archs']:
160     path = os.path.join(src_cfg['directory'], comp, 'binary-' + arch, 'Packages.gz')
161     try:
162 neronus-guest 998 cur.execute("""PREPARE package_insert AS INSERT INTO %s
163 neronus-guest 901 (Package, Version, Architecture, Maintainer, Description, Source,
164     Source_Version, Essential, Depends, Recommends, Suggests, Enhances,
165 lucas 1013 Pre_Depends, Installed_Size, Homepage, Size,
166     build_essential, origin, sha1, replaces, section,
167     md5sum, bugs, priority, tag, task, python_version,
168     provides, conflicts, sha256, original_maintainer,
169     Distribution, Release, Component)
170 neronus-guest 901 VALUES
171     ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15,
172 lucas 1013 $16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26, $27, $28,
173     $29, $30, $31, '%s', '%s', '%s')
174 neronus-guest 998 """ % (table, self._distr, src_cfg['release'], comp))
175 neronus-guest 901 aux.print_debug("Reading file " + path)
176     # Copy content from gzipped file to temporary file, so that apt_pkg is
177     # used by debian_bundle
178     tmp = tempfile.NamedTemporaryFile()
179     file = gzip.open(path)
180     tmp.write(file.read())
181     file.close()
182     tmp.seek(0)
183     aux.print_debug("Importing from " + path)
184 lucas 1243 self.import_packages(open(tmp.name), cur)
185 neronus-guest 901 tmp.close()
186     except IOError, (e, message):
187     print "Could not read packages from %s: %s" % (path, message)
188     cur.execute("DEALLOCATE package_insert")
189 neronus-guest 1105 # Fill the summary tables
190 lucas 1107 cur.execute("DELETE FROM %s" % (table + '_summary'));
191 neronus-guest 1105 cur.execute("""INSERT INTO %s SELECT DISTINCT ON (package, version,
192     distribution, release, component) package, version, source,
193     source_version, maintainer, distribution, release, component FROM %s""" %
194     (table + '_summary', table));
195 neronus-guest 855
196 neronus-guest 908 self.print_warnings()
197    
198 neronus-guest 901 def print_warnings(self):
199     for key in packages_gatherer.warned_about:
200     print("Unknown key: %s appeared %d times" % (key, packages_gatherer.warned_about[key]))

  ViewVC Help
Powered by ViewVC 1.1.5