| 1 |
neronus-guest |
936 |
# /usr/bin/env python
|
| 2 |
neronus-guest |
1106 |
# Last-Modified: <Sun Aug 17 12:24:40 2008>
|
| 3 |
neronus-guest |
936 |
# This file is a part of the Ultimate Debian Database project
|
| 4 |
neronus-guest |
855 |
|
| 5 |
neronus-guest |
856 |
import debian_bundle.deb822
|
| 6 |
|
|
import gzip
|
| 7 |
neronus-guest |
855 |
import os
|
| 8 |
|
|
import sys
|
| 9 |
neronus-guest |
856 |
import aux
|
| 10 |
neronus-guest |
868 |
import tempfile
|
| 11 |
neronus-guest |
856 |
from aux import ConfigException
|
| 12 |
neronus-guest |
891 |
import psycopg2
|
| 13 |
neronus-guest |
901 |
from gatherer import gatherer
|
| 14 |
neronus-guest |
855 |
|
| 15 |
neronus-guest |
1068 |
def get_gatherer(connection, config, source):
|
| 16 |
|
|
return packages_gatherer(connection, config, source)
|
| 17 |
neronus-guest |
892 |
|
| 18 |
neronus-guest |
901 |
class packages_gatherer(gatherer):
|
| 19 |
neronus-guest |
936 |
"This class imports the data from Packages.gz files into the database"
|
| 20 |
neronus-guest |
901 |
# For efficiency, these are dictionaries
|
| 21 |
neronus-guest |
936 |
# mandatory: list of fields which each package has to provide
|
| 22 |
|
|
# non_mandatory: list of fields which are possibly provided by packages
|
| 23 |
|
|
# ignorable: fields which are not useful for the database,
|
| 24 |
|
|
# but for which no warning should be printed
|
| 25 |
neronus-guest |
901 |
mandatory = {'Package': 0, 'Version': 0, 'Architecture': 0, 'Maintainer': 0,
|
| 26 |
|
|
'Description': 0}
|
| 27 |
|
|
non_mandatory = {'Source': 0, 'Essential': 0, 'Depends': 0, 'Recommends': 0,
|
| 28 |
|
|
'Suggests': 0, 'Enhances': 0, 'Pre-Depends': 0, 'Installed-Size': 0,
|
| 29 |
lucas |
1013 |
'Homepage': 0, 'Size': 0, 'Build-Essential':0, 'Origin':0,
|
| 30 |
|
|
'SHA1':0, 'Replaces':0, 'Section':0, 'MD5sum':0, 'Bugs':0, 'Priority':0,
|
| 31 |
|
|
'Tag':0, 'Task':0, 'Python-Version':0, 'Provides':0, 'Conflicts':0,
|
| 32 |
|
|
'SHA256':0, 'Original-Maintainer':0}
|
| 33 |
|
|
ignorable = {'Filename':0}
|
| 34 |
neronus-guest |
891 |
|
| 35 |
neronus-guest |
901 |
warned_about = {}
|
| 36 |
|
|
# A mapping from <package-name><version> to 1 If <package-name><version> is
|
| 37 |
|
|
# included in this dictionary, this means, that we've already added this
|
| 38 |
|
|
# package with this version for architecture 'all' to the database. Needed
|
| 39 |
|
|
# because different architectures include packages for architecture 'all'
|
| 40 |
|
|
# with the same version, and we don't want these duplicate entries
|
| 41 |
|
|
imported_all_pkgs = {}
|
| 42 |
neronus-guest |
891 |
|
| 43 |
neronus-guest |
1068 |
def __init__(self, connection, config, source):
|
| 44 |
|
|
gatherer.__init__(self, connection, config, source)
|
| 45 |
neronus-guest |
901 |
# The ID for the distribution we want to include
|
| 46 |
|
|
self._distr = None
|
| 47 |
neronus-guest |
1068 |
self.assert_my_config('directory', 'archs', 'release', 'components', 'distribution', 'packages-table', 'packages-schema')
|
| 48 |
neronus-guest |
891 |
|
| 49 |
neronus-guest |
901 |
def build_dict(self, control):
|
| 50 |
|
|
"""Build a dictionary from the control dictionary.
|
| 51 |
neronus-guest |
855 |
|
| 52 |
neronus-guest |
901 |
Influenced by class variables mandatory, non_mandatory and ignorable"""
|
| 53 |
|
|
d = {}
|
| 54 |
|
|
for k in packages_gatherer.mandatory:
|
| 55 |
|
|
if k not in control:
|
| 56 |
|
|
raise "Mandatory field %s not specified" % k
|
| 57 |
neronus-guest |
1032 |
d[k] = control[k]
|
| 58 |
neronus-guest |
901 |
for k in packages_gatherer.non_mandatory:
|
| 59 |
neronus-guest |
1032 |
if k not in control:
|
| 60 |
|
|
d[k] = None
|
| 61 |
|
|
else:
|
| 62 |
|
|
d[k] = control[k]
|
| 63 |
neronus-guest |
901 |
for k in control.keys():
|
| 64 |
neronus-guest |
1032 |
if k not in packages_gatherer.non_mandatory and k not in packages_gatherer.mandatory and k not in packages_gatherer.ignorable:
|
| 65 |
neronus-guest |
901 |
if k not in packages_gatherer.warned_about:
|
| 66 |
|
|
packages_gatherer.warned_about[k] = 1
|
| 67 |
|
|
else:
|
| 68 |
|
|
packages_gatherer.warned_about[k] += 1
|
| 69 |
|
|
return d
|
| 70 |
neronus-guest |
855 |
|
| 71 |
lucas |
1243 |
def import_packages(self, sequence, cur):
|
| 72 |
neronus-guest |
901 |
"""Import the packages from the sequence into the database-connection
|
| 73 |
|
|
conn.
|
| 74 |
neronus-guest |
887 |
|
| 75 |
neronus-guest |
901 |
Sequence has to have an iterator interface, that yields a line every time
|
| 76 |
|
|
it is called.The Format of the sequence is expected to be that of a
|
| 77 |
|
|
debian packages file."""
|
| 78 |
|
|
# The fields that are to be read. Other fields are ignored
|
| 79 |
|
|
for control in debian_bundle.deb822.Packages.iter_paragraphs(sequence):
|
| 80 |
|
|
# Check whether packages with architectue 'all' have already been
|
| 81 |
|
|
# imported
|
| 82 |
|
|
if control['Architecture'] == 'all':
|
| 83 |
|
|
t = control['Package'] + control['Version']
|
| 84 |
|
|
if t in packages_gatherer.imported_all_pkgs:
|
| 85 |
|
|
continue
|
| 86 |
|
|
packages_gatherer.imported_all_pkgs[t] = 1
|
| 87 |
neronus-guest |
891 |
|
| 88 |
neronus-guest |
901 |
d = self.build_dict(control)
|
| 89 |
|
|
|
| 90 |
|
|
# We just use the first line of the description
|
| 91 |
neronus-guest |
1032 |
if 'Description' in d:
|
| 92 |
neronus-guest |
901 |
d['Description'] = d['Description'].split("\n",1)[0]
|
| 93 |
neronus-guest |
1032 |
|
| 94 |
|
|
# Convert numbers to numbers
|
| 95 |
|
|
for f in ['Installed-Size', 'Size']:
|
| 96 |
|
|
if d[f] is not None:
|
| 97 |
|
|
d[f] = int(d[f])
|
| 98 |
neronus-guest |
901 |
|
| 99 |
|
|
# Source is non-mandatory, but we don't want it to be NULL
|
| 100 |
neronus-guest |
1032 |
if d['Source'] is None:
|
| 101 |
neronus-guest |
901 |
d['Source'] = d['Package']
|
| 102 |
neronus-guest |
892 |
d['Source_Version'] = d['Version']
|
| 103 |
|
|
else:
|
| 104 |
neronus-guest |
901 |
split = d['Source'].strip("'").split()
|
| 105 |
|
|
if len(split) == 1:
|
| 106 |
|
|
d['Source_Version'] = d['Version']
|
| 107 |
|
|
else:
|
| 108 |
neronus-guest |
902 |
d['Source'] = aux.quote(split[0])
|
| 109 |
|
|
d['Source_Version'] = aux.quote(split[1].strip("()"))
|
| 110 |
neronus-guest |
891 |
|
| 111 |
neronus-guest |
901 |
query = """EXECUTE package_insert
|
| 112 |
|
|
(%(Package)s, %(Version)s, %(Architecture)s, %(Maintainer)s,
|
| 113 |
|
|
%(Description)s, %(Source)s, %(Source_Version)s, %(Essential)s,
|
| 114 |
|
|
%(Depends)s, %(Recommends)s, %(Suggests)s, %(Enhances)s,
|
| 115 |
|
|
%(Pre-Depends)s, %(Installed-Size)s, %(Homepage)s, %(Size)s,
|
| 116 |
lucas |
1013 |
%(Build-Essential)s, %(Origin)s, %(SHA1)s,
|
| 117 |
|
|
%(Replaces)s, %(Section)s, %(MD5sum)s, %(Bugs)s, %(Priority)s,
|
| 118 |
|
|
%(Tag)s, %(Task)s, %(Python-Version)s, %(Provides)s,
|
| 119 |
neronus-guest |
1032 |
%(Conflicts)s, %(SHA256)s, %(Original-Maintainer)s)"""
|
| 120 |
neronus-guest |
901 |
try:
|
| 121 |
neronus-guest |
1032 |
cur.execute(query, d)
|
| 122 |
neronus-guest |
901 |
except psycopg2.ProgrammingError:
|
| 123 |
|
|
print query
|
| 124 |
|
|
raise
|
| 125 |
neronus-guest |
891 |
|
| 126 |
neronus-guest |
1068 |
def setup(self):
|
| 127 |
|
|
if 'schema-dir' in self.config['general']:
|
| 128 |
|
|
schema_dir = self.config['general']['schema-dir']
|
| 129 |
|
|
if 'packages-schema' in self.my_config:
|
| 130 |
|
|
schema = schema_dir + '/' + self.my_config['packages-schema']
|
| 131 |
|
|
self.eval_sql_file(schema, self.my_config)
|
| 132 |
|
|
else:
|
| 133 |
|
|
raise Exception("'packages-schema' not specified for source " + self.source)
|
| 134 |
|
|
else:
|
| 135 |
|
|
raise Exception("'schema-dir' not specified")
|
| 136 |
neronus-guest |
855 |
|
| 137 |
neronus-guest |
1106 |
def tables(self):
|
| 138 |
|
|
return [
|
| 139 |
|
|
self.my_config['packages-table'],
|
| 140 |
|
|
self.my_config['packages-table'] + '_summary']
|
| 141 |
neronus-guest |
855 |
|
| 142 |
neronus-guest |
1068 |
def run(self):
|
| 143 |
|
|
src_cfg = self.my_config
|
| 144 |
|
|
|
| 145 |
neronus-guest |
901 |
aux.debug = self.config['general']['debug']
|
| 146 |
neronus-guest |
998 |
table = src_cfg['packages-table']
|
| 147 |
neronus-guest |
891 |
|
| 148 |
neronus-guest |
936 |
# Get distribution ID
|
| 149 |
neronus-guest |
901 |
self._distr = src_cfg['distribution']
|
| 150 |
neronus-guest |
856 |
|
| 151 |
neronus-guest |
901 |
cur = self.cursor()
|
| 152 |
lucas |
1243 |
# defer constraints checking until the end of the transaction
|
| 153 |
|
|
cur.execute("SET CONSTRAINTS ALL DEFERRED")
|
| 154 |
neronus-guest |
856 |
|
| 155 |
neronus-guest |
901 |
# For every part and every architecture, import the packages into the DB
|
| 156 |
|
|
for comp in src_cfg['components']:
|
| 157 |
neronus-guest |
998 |
cur.execute("DELETE FROM %s WHERE distribution = '%s' AND release = '%s' AND component = '%s'" %\
|
| 158 |
|
|
(table, self._distr, src_cfg['release'], comp))
|
| 159 |
neronus-guest |
901 |
for arch in src_cfg['archs']:
|
| 160 |
|
|
path = os.path.join(src_cfg['directory'], comp, 'binary-' + arch, 'Packages.gz')
|
| 161 |
|
|
try:
|
| 162 |
neronus-guest |
998 |
cur.execute("""PREPARE package_insert AS INSERT INTO %s
|
| 163 |
neronus-guest |
901 |
(Package, Version, Architecture, Maintainer, Description, Source,
|
| 164 |
|
|
Source_Version, Essential, Depends, Recommends, Suggests, Enhances,
|
| 165 |
lucas |
1013 |
Pre_Depends, Installed_Size, Homepage, Size,
|
| 166 |
|
|
build_essential, origin, sha1, replaces, section,
|
| 167 |
|
|
md5sum, bugs, priority, tag, task, python_version,
|
| 168 |
|
|
provides, conflicts, sha256, original_maintainer,
|
| 169 |
|
|
Distribution, Release, Component)
|
| 170 |
neronus-guest |
901 |
VALUES
|
| 171 |
|
|
( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15,
|
| 172 |
lucas |
1013 |
$16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26, $27, $28,
|
| 173 |
|
|
$29, $30, $31, '%s', '%s', '%s')
|
| 174 |
neronus-guest |
998 |
""" % (table, self._distr, src_cfg['release'], comp))
|
| 175 |
neronus-guest |
901 |
aux.print_debug("Reading file " + path)
|
| 176 |
|
|
# Copy content from gzipped file to temporary file, so that apt_pkg is
|
| 177 |
|
|
# used by debian_bundle
|
| 178 |
|
|
tmp = tempfile.NamedTemporaryFile()
|
| 179 |
|
|
file = gzip.open(path)
|
| 180 |
|
|
tmp.write(file.read())
|
| 181 |
|
|
file.close()
|
| 182 |
|
|
tmp.seek(0)
|
| 183 |
|
|
aux.print_debug("Importing from " + path)
|
| 184 |
lucas |
1243 |
self.import_packages(open(tmp.name), cur)
|
| 185 |
neronus-guest |
901 |
tmp.close()
|
| 186 |
|
|
except IOError, (e, message):
|
| 187 |
|
|
print "Could not read packages from %s: %s" % (path, message)
|
| 188 |
|
|
cur.execute("DEALLOCATE package_insert")
|
| 189 |
neronus-guest |
1105 |
# Fill the summary tables
|
| 190 |
lucas |
1107 |
cur.execute("DELETE FROM %s" % (table + '_summary'));
|
| 191 |
neronus-guest |
1105 |
cur.execute("""INSERT INTO %s SELECT DISTINCT ON (package, version,
|
| 192 |
|
|
distribution, release, component) package, version, source,
|
| 193 |
|
|
source_version, maintainer, distribution, release, component FROM %s""" %
|
| 194 |
|
|
(table + '_summary', table));
|
| 195 |
neronus-guest |
855 |
|
| 196 |
neronus-guest |
908 |
self.print_warnings()
|
| 197 |
|
|
|
| 198 |
neronus-guest |
901 |
def print_warnings(self):
|
| 199 |
|
|
for key in packages_gatherer.warned_about:
|
| 200 |
|
|
print("Unknown key: %s appeared %d times" % (key, packages_gatherer.warned_about[key]))
|