| 1 |
tille |
1406 |
#!/usr/bin/env python
|
| 2 |
|
|
|
| 3 |
|
|
"""
|
| 4 |
|
|
This script imports information from ftp new queue into the database
|
| 5 |
|
|
See http://ftp-master.debian.org/new.822 and
|
| 6 |
|
|
http://ftp-master.debian.org/new.html
|
| 7 |
|
|
"""
|
| 8 |
|
|
|
| 9 |
|
|
from debian_bundle import deb822
|
| 10 |
|
|
from os import access, mkdir, unlink, W_OK
|
| 11 |
|
|
from sys import stderr
|
| 12 |
|
|
import aux
|
| 13 |
|
|
from aux import quote
|
| 14 |
|
|
from gatherer import gatherer
|
| 15 |
|
|
import email.Utils
|
| 16 |
|
|
import re
|
| 17 |
|
|
from time import ctime
|
| 18 |
|
|
from psycopg2 import IntegrityError
|
| 19 |
|
|
|
| 20 |
|
|
def get_gatherer(connection, config, source):
|
| 21 |
|
|
return ftpnew_gatherer(connection, config, source)
|
| 22 |
|
|
|
| 23 |
tille |
1408 |
DEBUG=0
|
| 24 |
|
|
|
| 25 |
tille |
1406 |
# When parsing src html pages we have to get rid of certain html strings
|
| 26 |
|
|
def de_html(string):
|
| 27 |
tille |
1408 |
string= re.sub("</?span[^>]*>", '', string)
|
| 28 |
|
|
string= re.sub(""", '"', string)
|
| 29 |
|
|
string= re.sub("&", '&', string)
|
| 30 |
|
|
string= re.sub("<", '<', string)
|
| 31 |
|
|
string= re.sub(">", '>', string)
|
| 32 |
|
|
string= re.sub("</?pre>", '', string)
|
| 33 |
tille |
1406 |
return string
|
| 34 |
|
|
|
| 35 |
|
|
# These fields are not forewarded to UDD tables for the moment
|
| 36 |
|
|
fields_to_pass = ('Format',
|
| 37 |
|
|
'Date',
|
| 38 |
|
|
'Changed-By',
|
| 39 |
|
|
'Files',
|
| 40 |
|
|
'Uploaders',
|
| 41 |
|
|
'Standards-Version',
|
| 42 |
|
|
'Priority',
|
| 43 |
|
|
'Urgency',
|
| 44 |
|
|
'Dm-Upload-Allowed',
|
| 45 |
|
|
'Autobuild',
|
| 46 |
|
|
'Build-Depends',
|
| 47 |
|
|
'Build-Depends-Indep',
|
| 48 |
|
|
'Build-Conflicts',
|
| 49 |
|
|
'Python-Version')
|
| 50 |
|
|
# + startswith('Npp-')
|
| 51 |
|
|
|
| 52 |
|
|
dependencies_to_accept = ( 'Depends', 'Recommends', 'Suggests', 'Enhances', 'Pre-Depends',
|
| 53 |
|
|
'Breaks', 'Replaces', 'Provides', 'Conflicts')
|
| 54 |
|
|
|
| 55 |
|
|
class src_pkg():
|
| 56 |
|
|
def __init__(self, source):
|
| 57 |
|
|
self.s = {}
|
| 58 |
|
|
self.s['Source'] = source
|
| 59 |
|
|
self.has_several_versions = 0
|
| 60 |
|
|
# self.bin = () # comma separated list of binaries created from the source
|
| 61 |
|
|
self.s['Bin'] = () # comma separated list of binaries created from the source
|
| 62 |
|
|
self.s['Architecture'] = () # architecture(s separated by blanks)
|
| 63 |
|
|
# Just define Vcs fields in case it is not provided in the control
|
| 64 |
|
|
self.s['Vcs-Type'] = None
|
| 65 |
|
|
self.s['Vcs-Url'] = None
|
| 66 |
|
|
# preset WNPP bug
|
| 67 |
|
|
self.s['Closes'] = 0
|
| 68 |
|
|
|
| 69 |
|
|
def check_dict(self):
|
| 70 |
|
|
"Make sure that non-mandatory fields at least get a '' value"
|
| 71 |
|
|
for field in ftpnew_gatherer.s_non_mandatory:
|
| 72 |
|
|
if not self.s.has_key(field):
|
| 73 |
|
|
self.s[field] = ''
|
| 74 |
|
|
|
| 75 |
|
|
def __str__(self):
|
| 76 |
|
|
str = "Source %(Source)s: %(Version)s, (%(Architecture)s), %(Last_modified)s, %(Queue)s, %(Distribution)s" % \
|
| 77 |
|
|
(self.s)
|
| 78 |
|
|
str += " %(maintainer_name)s <%(maintainer_email)s>, %(Closes)i" % (self.s)
|
| 79 |
|
|
return str
|
| 80 |
|
|
|
| 81 |
|
|
class bin_pkg():
|
| 82 |
|
|
def __init__(self, package, source):
|
| 83 |
|
|
self.b = {}
|
| 84 |
|
|
self.b['Package'] = package
|
| 85 |
|
|
self.b['Source'] = source
|
| 86 |
|
|
self.b['Installed-Size'] = 0
|
| 87 |
|
|
self.b['License'] = ''
|
| 88 |
|
|
|
| 89 |
|
|
def check_dict(self):
|
| 90 |
|
|
"Make sure that non-mandatory fields at least get a '' value"
|
| 91 |
|
|
for field in ftpnew_gatherer.b_non_mandatory:
|
| 92 |
|
|
if not self.b.has_key(field):
|
| 93 |
|
|
self.b[field] = ''
|
| 94 |
|
|
|
| 95 |
|
|
def __str__(self):
|
| 96 |
|
|
return "Package %s: %s, %s, %s, %s, %s" % \
|
| 97 |
|
|
(self.b['Package'], self.b['Version'], self.b['Architecture'], self.b['Maintainer'],
|
| 98 |
|
|
self.b['Description'], self.b['Long_Description'])
|
| 99 |
|
|
|
| 100 |
|
|
class ftpnew_gatherer(gatherer):
|
| 101 |
|
|
"This class imports the data from New queue into the database"
|
| 102 |
|
|
s_mandatory = {'Source': 0, 'Format': 0, 'Maintainer': 0, 'Package': 0, 'Version': 0, 'Files': 0,
|
| 103 |
|
|
'Queue': 0, 'Last_modified': 0}
|
| 104 |
|
|
s_non_mandatory = {'Uploaders': 0, 'Bin': 0, 'Architecture': 0,
|
| 105 |
|
|
'Homepage': 0, 'Build-Depends': 0, 'Vcs-Arch': 0, 'Vcs-Bzr': 0,
|
| 106 |
|
|
'Vcs-Cvs': 0, 'Vcs-Darcs': 0, 'Vcs-Git': 0, 'Vcs-Hg': 0, 'Vcs-Svn': 0,
|
| 107 |
tille |
1474 |
'Vcs-Mtn':0, 'Vcs-Browser': 0, 'License': 0, 'Section': 0
|
| 108 |
tille |
1406 |
}
|
| 109 |
|
|
s_ignorable = {'X-Vcs-Browser': 0, 'X-Vcs-Bzr': 0, 'X-Vcs-Darcs': 0, 'X-Vcs-Svn': 0, 'X-Vcs-Hg':0, 'X-Vcs-Git':0,
|
| 110 |
|
|
'Directory':0, 'Comment':0, 'Origin':0, 'Url':0, 'X-Collab-Maint':0, 'Autobuild':0, 'Vcs-Cvs:':0,
|
| 111 |
|
|
'Python-Standards-Version':0, 'url':0, 'originalmaintainer':0, 'Originalmaintainer':0,
|
| 112 |
|
|
'Build-Recommends':0,
|
| 113 |
|
|
'Build-Depends-Indep': 0, 'Build-Conflicts': 0, 'Build-Conflicts-Indep': 0,
|
| 114 |
tille |
1474 |
'Priority': 0, 'Python-Version': 0, 'Checksums-Sha1':0,
|
| 115 |
tille |
1406 |
'Checksums-Sha256':0, 'Original-Maintainer':0, 'Dm-Upload-Allowed':0,
|
| 116 |
|
|
'Standards-Version': 0,
|
| 117 |
|
|
}
|
| 118 |
|
|
|
| 119 |
|
|
b_non_mandatory = {'Source': 0, 'Essential': 0, 'Depends': 0, 'Recommends': 0,
|
| 120 |
|
|
'Suggests': 0, 'Enhances': 0, 'Pre-Depends': 0, 'Breaks':0, 'Installed-Size': 0,
|
| 121 |
|
|
'Homepage': 0, 'Size': 0, 'Build-Essential':0, 'Origin':0,
|
| 122 |
|
|
'SHA1':0, 'Replaces':0, 'Section':0, 'MD5sum':0, 'Bugs':0, 'Priority':0,
|
| 123 |
|
|
'Tag':0, 'Task':0, 'Python-Version':0, 'Provides':0, 'Conflicts':0,
|
| 124 |
|
|
'SHA256':0, 'Original-Maintainer':0}
|
| 125 |
|
|
|
| 126 |
|
|
s_ignorable_re = re.compile("^(Original-|Origianl-|Orginal-|Debian-|X-Original-|Upstream-)")
|
| 127 |
|
|
s_vcs = { 'Arch':0, 'Bzr':0, 'Cvs':0, 'Darcs':0, 'Git':0, 'Hg':0, 'Svn':0, 'Mtn':0}
|
| 128 |
|
|
|
| 129 |
|
|
src_html_failed_re = re.compile("^<p>The requested URL /new/.+\.html was not found on this server\.</p>")
|
| 130 |
|
|
src_html_has_tag_re = re.compile('^\s*<tr><td class="key">([-\w]+):</td><td class="val">(.+)</td></tr>$')
|
| 131 |
|
|
src_html_has_description_start_re = re.compile('^\s*<tr><td class="key">Description:</td><td class="val"><pre>(.+)')
|
| 132 |
|
|
src_html_has_description_end_re = re.compile('(.+)</pre></td></tr>')
|
| 133 |
|
|
closes_is_itp_re = re.compile('^\s*(ITP|RFP|ITA)')
|
| 134 |
|
|
vcs_type_re = re.compile('Vcs-(Svn|Git|Bzr|Darcs|Hg|Cvs|Arch|Mtn)')
|
| 135 |
|
|
|
| 136 |
|
|
def __init__(self, connection, config, source):
|
| 137 |
|
|
gatherer.__init__(self, connection, config, source)
|
| 138 |
|
|
self.assert_my_config('path', 'table_sources', 'table_packages', 'ftpmasterURL', 'releases_ignore')
|
| 139 |
|
|
|
| 140 |
|
|
|
| 141 |
|
|
def check_existing_binaries(self, values, queue):
|
| 142 |
|
|
# Sometimes the source package name has changed, but the binary package name is known in UDD
|
| 143 |
|
|
# we are not interested in these packages
|
| 144 |
|
|
|
| 145 |
|
|
cur = self.cursor()
|
| 146 |
|
|
for value in values:
|
| 147 |
|
|
# query = "SELECT count(*) FROM packages WHERE package = '%s'" % (value)
|
| 148 |
|
|
query = "EXECUTE ftpnew_check_existing_package ('%s')" % (value)
|
| 149 |
|
|
cur.execute(query)
|
| 150 |
|
|
in_udd = cur.fetchone()[0]
|
| 151 |
|
|
if in_udd:
|
| 152 |
tille |
1408 |
if DEBUG != 0:
|
| 153 |
|
|
print >>stderr, "Binary package %s is %i times in UDD - no interest in just known binaries (queue = %s)" \
|
| 154 |
|
|
% (value, int(in_udd), queue)
|
| 155 |
tille |
1406 |
return 1
|
| 156 |
|
|
return 0
|
| 157 |
|
|
|
| 158 |
|
|
def run(self):
|
| 159 |
|
|
my_config = self.my_config
|
| 160 |
|
|
|
| 161 |
|
|
#start harassing the DB, preparing the final inserts and making place
|
| 162 |
|
|
#for the new data:
|
| 163 |
|
|
cur = self.cursor()
|
| 164 |
|
|
|
| 165 |
|
|
# if we check whether a package just exists in UDD we ignore oldstable which is currently etch but other
|
| 166 |
|
|
# dists might have to be ignored as well
|
| 167 |
|
|
cur.execute("PREPARE ftpnew_check_existing_package AS SELECT COUNT(*) FROM packages WHERE package = $1 AND release NOT IN (%s)" \
|
| 168 |
|
|
% self.my_config["releases_ignore"])
|
| 169 |
|
|
# For some reason the code tries to add binary packages twice - just verify whether the package is
|
| 170 |
|
|
# just included to make sure we do not trigger conflicting primary keys
|
| 171 |
|
|
cur.execute("PREPARE ftpnew_check_just_added_package AS SELECT COUNT(*) FROM new_packages WHERE package = $1 AND version = $2 AND architecture = $3")
|
| 172 |
|
|
|
| 173 |
|
|
cur.execute("DELETE FROM %s" % my_config["table_sources"])
|
| 174 |
|
|
cur.execute("DELETE FROM %s" % my_config["table_packages"])
|
| 175 |
|
|
|
| 176 |
|
|
query = """PREPARE ftpnew_insert_source
|
| 177 |
|
|
AS INSERT INTO %s (source, version, maintainer, maintainer_name, maintainer_email, binaries,
|
| 178 |
|
|
changed_by, architecture, homepage,
|
| 179 |
tille |
1474 |
vcs_type, vcs_url, vcs_browser,
|
| 180 |
|
|
section, distribution, component, closes, license, last_modified, queue)
|
| 181 |
|
|
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19)""" % (my_config['table_sources'])
|
| 182 |
tille |
1406 |
cur.execute(query)
|
| 183 |
|
|
query = """PREPARE ftpnew_insert_package
|
| 184 |
|
|
AS INSERT INTO %s (package, version, architecture, maintainer, description, source,
|
| 185 |
|
|
depends, recommends, suggests, enhances, pre_depends, breaks, replaces, provides, conflicts,
|
| 186 |
tille |
1474 |
installed_size, homepage, section, long_description, distribution, component, license)
|
| 187 |
|
|
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22)""" \
|
| 188 |
|
|
% (my_config['table_packages'])
|
| 189 |
tille |
1406 |
cur.execute(query)
|
| 190 |
|
|
|
| 191 |
|
|
ftpnew_data = open(my_config['path']+'/new.822')
|
| 192 |
|
|
|
| 193 |
tille |
1474 |
has_warned_about_missing_section_key = 0
|
| 194 |
tille |
1406 |
for stanza in deb822.Sources.iter_paragraphs(ftpnew_data, shared_storage=False):
|
| 195 |
|
|
if stanza['queue'] == 'accepted' or stanza['queue'] == 'proposedupdates' :
|
| 196 |
|
|
continue
|
| 197 |
|
|
srcpkg = src_pkg(stanza['source'])
|
| 198 |
|
|
versions = stanza['version'].split(' ') # the page lists more than one version
|
| 199 |
|
|
srcpkg.has_several_versions = len(versions)-1 # some tests below fail if more than one version in in queue
|
| 200 |
|
|
srcpkg.s['Version'] = versions[srcpkg.has_several_versions]
|
| 201 |
|
|
srcpkg.s['Architecture'] = stanza['architectures']
|
| 202 |
|
|
srcpkg.s['Queue'] = stanza['queue']
|
| 203 |
|
|
srcpkg.s['Last_modified'] = ctime(int(stanza['last-modified'])) # We want a real time object instead of an epoch
|
| 204 |
|
|
srcpkg.s['Distribution'] = stanza['distribution']
|
| 205 |
|
|
srcpkg.s['Changed-By'] = stanza['changed-by']
|
| 206 |
tille |
1474 |
try:
|
| 207 |
|
|
srcpkg.s['Section'] = stanza['section']
|
| 208 |
|
|
if stanza['section'].startswith('non-free'):
|
| 209 |
|
|
srcpkg.s['Component'] = 'non-free'
|
| 210 |
|
|
elif stanza['section'].startswith('contrib'):
|
| 211 |
|
|
srcpkg.s['Component'] = 'contrib'
|
| 212 |
|
|
else:
|
| 213 |
|
|
srcpkg.s['Component'] = 'main'
|
| 214 |
|
|
except KeyError:
|
| 215 |
|
|
srcpkg.s['Section'] = ''
|
| 216 |
|
|
srcpkg.s['Component'] = ''
|
| 217 |
|
|
if has_warned_about_missing_section_key == 0:
|
| 218 |
|
|
has_warned_about_missing_section_key = 1
|
| 219 |
|
|
print >>stderr, "Warning: Because of a bug in DAK code the Section field is currently missing."
|
| 220 |
tille |
1406 |
|
| 221 |
|
|
# Check UDD for existing source packages of this name
|
| 222 |
|
|
query = "SELECT count(*) FROM sources WHERE source = '%s'" % (srcpkg.s['Source'])
|
| 223 |
|
|
cur.execute(query)
|
| 224 |
|
|
in_udd = cur.fetchone()[0]
|
| 225 |
|
|
if in_udd:
|
| 226 |
tille |
1408 |
if DEBUG != 0:
|
| 227 |
|
|
print >>stderr, "%s is %i times in UDD - no interest in just known sources (queue = %s)" \
|
| 228 |
|
|
% (srcpkg.s['Source'], int(in_udd), srcpkg.s['Source'])
|
| 229 |
tille |
1406 |
continue
|
| 230 |
|
|
|
| 231 |
|
|
src_info_base = srcpkg.s['Source'] + '_' + srcpkg.s['Version']
|
| 232 |
|
|
src_info_html = my_config['path'] + '/' + src_info_base + '.html'
|
| 233 |
|
|
src_info_822 = my_config['path'] + '/' + src_info_base + '.822'
|
| 234 |
|
|
|
| 235 |
|
|
try:
|
| 236 |
|
|
srci = open(src_info_html, 'r')
|
| 237 |
|
|
except IOError, err:
|
| 238 |
|
|
print >>stderr, "No html info for package %s in queue %s (%s)." % (srcpkg.s['Source'], stanza['queue'], err)
|
| 239 |
|
|
continue
|
| 240 |
|
|
srco = open(src_info_822, 'w')
|
| 241 |
|
|
in_description = 0
|
| 242 |
|
|
in_source = 1
|
| 243 |
|
|
binpkgs = []
|
| 244 |
|
|
binpkg = None
|
| 245 |
|
|
for line in srci.readlines():
|
| 246 |
|
|
if ftpnew_gatherer.src_html_failed_re.match(line):
|
| 247 |
|
|
print >>stderr, "File %s not found." % (src_info_html)
|
| 248 |
|
|
src_info_not_found = 1
|
| 249 |
|
|
break
|
| 250 |
|
|
match = ftpnew_gatherer.src_html_has_tag_re.match(line)
|
| 251 |
|
|
if match:
|
| 252 |
|
|
field = match.groups()[0]
|
| 253 |
|
|
value = de_html(match.groups()[1])
|
| 254 |
|
|
if field == 'Package':
|
| 255 |
|
|
# Here begins a new binary package
|
| 256 |
|
|
if self.check_existing_binaries((value,), srcpkg.s['Queue']):
|
| 257 |
|
|
srcpkg.s['Queue'] = 'ignore'
|
| 258 |
|
|
break
|
| 259 |
|
|
if in_source:
|
| 260 |
|
|
in_source = 0
|
| 261 |
|
|
if binpkg:
|
| 262 |
|
|
binpkgs.append(binpkg)
|
| 263 |
|
|
binpkg = bin_pkg(value, srcpkg.s['Source'])
|
| 264 |
|
|
print >>srco, "\nPackage: %s" % (value)
|
| 265 |
tille |
1473 |
binpkg.b['Distribution'] = srcpkg.s['Distribution']
|
| 266 |
tille |
1406 |
elif field == 'Maintainer':
|
| 267 |
|
|
# print "DEBUG %s: %s" % (field, value)
|
| 268 |
|
|
if in_source:
|
| 269 |
|
|
srcpkg.s[field] = value
|
| 270 |
|
|
srcpkg.s['maintainer_name'], srcpkg.s['maintainer_email'] = email.Utils.parseaddr(srcpkg.s['Maintainer'])
|
| 271 |
|
|
else:
|
| 272 |
|
|
binpkg.b[field] = value
|
| 273 |
|
|
print >>srco, "%s: %s" % (field, value)
|
| 274 |
|
|
elif field == 'Description':
|
| 275 |
|
|
if in_source:
|
| 276 |
tille |
1408 |
srcpkg.s[field] = de_html(value)
|
| 277 |
tille |
1406 |
else:
|
| 278 |
tille |
1408 |
binpkg.b[field] = de_html(value)
|
| 279 |
tille |
1406 |
print >>srco, "%s: %s" % (field, value)
|
| 280 |
|
|
elif field == 'Architecture':
|
| 281 |
|
|
if in_source:
|
| 282 |
|
|
srcpkg.s[field] = value
|
| 283 |
|
|
else:
|
| 284 |
|
|
binpkg.b[field] = value
|
| 285 |
|
|
print >>srco, "%s: %s" % (field, value)
|
| 286 |
|
|
elif field == 'Source':
|
| 287 |
|
|
if in_source:
|
| 288 |
|
|
if value != srcpkg.s['Source']:
|
| 289 |
|
|
print >>stderr, "Incompatible source names between new.822(%s) and %s.html (%s)" % \
|
| 290 |
|
|
(srcpkg.s['Source'], src_info_base, value)
|
| 291 |
|
|
srcpkg.s['Source'] = value
|
| 292 |
|
|
print >>srco, "%s: %s" % (field, value)
|
| 293 |
|
|
elif field == 'Version':
|
| 294 |
|
|
if in_source:
|
| 295 |
|
|
if srcpkg.has_several_versions == 0 and value != srcpkg.s[field]:
|
| 296 |
|
|
print >>stderr, "Incompatible version numbers between new.822(%s) and %s.html (%s)" % \
|
| 297 |
|
|
(srcpkg.s[field], src_info_base, value)
|
| 298 |
|
|
srcpkg.s[field] = value
|
| 299 |
|
|
else:
|
| 300 |
|
|
binpkg.b[field] = value
|
| 301 |
|
|
print >>srco, "%s: %s" % (field, value)
|
| 302 |
|
|
elif field == 'Closes':
|
| 303 |
|
|
values = value.split(' ')
|
| 304 |
|
|
found_itp = 0
|
| 305 |
|
|
for val in values:
|
| 306 |
|
|
ival = int(val)
|
| 307 |
|
|
query = "SELECT title from bugs where id = %i and package = 'wnpp' and source = 'wnpp'" % (ival)
|
| 308 |
|
|
cur.execute(query)
|
| 309 |
|
|
try:
|
| 310 |
|
|
wnpp_title = cur.fetchone()[0]
|
| 311 |
|
|
except TypeError, err:
|
| 312 |
|
|
query = "SELECT id, package, source, title FROM bugs WHERE id = %i" % (ival)
|
| 313 |
|
|
cur.execute(query)
|
| 314 |
|
|
bug_info = cur.fetchone()
|
| 315 |
|
|
if not bug_info:
|
| 316 |
|
|
print >>stderr, "Bug %i which source package %s claims to close does not exist." % (ival, srcpkg.s['Source'])
|
| 317 |
|
|
else:
|
| 318 |
|
|
print >>stderr, "Bug #%i of package %s and source %s is not against pseudopackage 'wnpp' and hast title '%s'" % bug_info
|
| 319 |
|
|
if not ftpnew_gatherer.closes_is_itp_re.match(wnpp_title):
|
| 320 |
|
|
print >>stderr, "Closed bug %i seems to be not ITPed (queue = %s; title = %s)" % (ival, srcpkg.s['Queue'], wnpp_title)
|
| 321 |
|
|
else:
|
| 322 |
|
|
if found_itp:
|
| 323 |
|
|
print >>stderr, "Warning: Package %s seems to have more than one ITP bugs (%i, %i). Only %i is stored in UDD" % \
|
| 324 |
|
|
(srcpkg.s['Source'], srcpkg.s['Closes'], ival, srcpkg.s['Closes'])
|
| 325 |
|
|
query = "SELECT count(*) FROM bugs_merged_with WHERE id = %i OR id = %i" % (srcpkg.s['Closes'], ival)
|
| 326 |
|
|
cur.execute(query)
|
| 327 |
|
|
is_merged = cur.fetchone()[0]
|
| 328 |
|
|
if is_merged != 2:
|
| 329 |
|
|
print >>stderr, " --> Bugs should be merged in BTS!"
|
| 330 |
|
|
else: # stay with the ITP found first
|
| 331 |
|
|
srcpkg.s['Closes'] = int(ival)
|
| 332 |
|
|
found_itp = 1
|
| 333 |
|
|
if not found_itp:
|
| 334 |
|
|
print >>stderr, "Most probably %s is not new." % (srcpkg.s['Source'])
|
| 335 |
|
|
print >>srco, "%s: %s\n" % (field, value)
|
| 336 |
|
|
elif field == 'Distribution':
|
| 337 |
|
|
if in_source:
|
| 338 |
|
|
if srcpkg.has_several_versions == 0 and value != srcpkg.s['Distribution']:
|
| 339 |
|
|
print >>stderr, "Incompatible distributions between new.822(%s) and %s.html (%s)" % \
|
| 340 |
|
|
(srcpkg.s['Distribution'], src_info_base, value)
|
| 341 |
|
|
srcpkg.s['Distribution'] = value
|
| 342 |
|
|
print >>srco, "%s: %s" % (field, value)
|
| 343 |
|
|
else:
|
| 344 |
|
|
print >>stderr, "Binary should not mention distribution field in %s.html (%s)" % \
|
| 345 |
|
|
(src_info_base, value)
|
| 346 |
|
|
elif field == 'Binary':
|
| 347 |
|
|
if in_source:
|
| 348 |
|
|
# Binaries are mentioned in different syntax in *.changes and *.dsc
|
| 349 |
|
|
value = re.sub(", +", " ", value)
|
| 350 |
|
|
if self.check_existing_binaries(value.split(' '), srcpkg.s['Queue']):
|
| 351 |
|
|
srcpkg.s['Queue'] = 'ignore'
|
| 352 |
|
|
break
|
| 353 |
|
|
if in_source:
|
| 354 |
|
|
if srcpkg.s['Bin'] != () and value != srcpkg.s['Bin']:
|
| 355 |
|
|
print >>stderr, "Incompatible binaries between new.822(%s) and %s.html (%s)" % \
|
| 356 |
|
|
(srcpkg.s['Bin'], src_info_base, value)
|
| 357 |
|
|
srcpkg.s['Bin'] = value
|
| 358 |
|
|
print >>srco, "%s: %s" % (field, value)
|
| 359 |
|
|
else:
|
| 360 |
|
|
print >>stderr, "Binary should not mention Binary field in %s.html (%s)" % \
|
| 361 |
|
|
(src_info_base, value)
|
| 362 |
|
|
elif field == 'Installed-Size':
|
| 363 |
|
|
if not in_source:
|
| 364 |
|
|
binpkg.b[field] = int(value)
|
| 365 |
|
|
elif field == 'Homepage':
|
| 366 |
|
|
if not in_source:
|
| 367 |
|
|
binpkg.b[field] = value
|
| 368 |
|
|
elif field == 'Section':
|
| 369 |
|
|
if not in_source:
|
| 370 |
|
|
if not binpkg:
|
| 371 |
|
|
print >>stderr, "This should not happen", srcpkg, field, value
|
| 372 |
|
|
exit(-1)
|
| 373 |
tille |
1474 |
else:
|
| 374 |
|
|
binpkg.b[field] = value
|
| 375 |
|
|
binpkg.b['Component'] = srcpkg.s['Component']
|
| 376 |
tille |
1406 |
elif field == 'Vcs-Browser':
|
| 377 |
|
|
srcpkg.s[field] = value
|
| 378 |
|
|
elif binpkg != None and field in dependencies_to_accept:
|
| 379 |
|
|
binpkg.b[field] = value
|
| 380 |
|
|
print >>srco, "%s: %s" % (field, value)
|
| 381 |
|
|
elif field in fields_to_pass or field.startswith('Npp-'):
|
| 382 |
|
|
print >>srco, "%s: %s" % (field, value)
|
| 383 |
|
|
else:
|
| 384 |
|
|
matchvcs = ftpnew_gatherer.vcs_type_re.match(field)
|
| 385 |
|
|
if matchvcs:
|
| 386 |
|
|
srcpkg.s['Vcs-Type'] = matchvcs.groups()[0]
|
| 387 |
|
|
srcpkg.s['Vcs-Url'] = value
|
| 388 |
|
|
print >>srco, "%s: %s" % (field, value)
|
| 389 |
|
|
else:
|
| 390 |
|
|
print >>stderr, "Unknown field in %s: %s" % (srcpkg.s['Source'], field)
|
| 391 |
|
|
print >>srco, "*%s: %s" % (field, value)
|
| 392 |
|
|
continue
|
| 393 |
|
|
if in_description:
|
| 394 |
|
|
match = ftpnew_gatherer.src_html_has_description_end_re.match(line)
|
| 395 |
|
|
if match:
|
| 396 |
|
|
if match.groups()[0][0] != ' ':
|
| 397 |
|
|
description += ' '
|
| 398 |
tille |
1408 |
description += de_html(match.groups()[0])
|
| 399 |
tille |
1406 |
in_description = 0
|
| 400 |
|
|
if not in_source: # binpkg and binpkg.b:
|
| 401 |
tille |
1408 |
(binpkg.b['Description'], binpkg.b['Long_Description']) = description.split("\n",1)
|
| 402 |
|
|
print >>srco, "Description: %s\n%s" % (binpkg.b['Description'], binpkg.b['Long_Description'])
|
| 403 |
tille |
1406 |
else:
|
| 404 |
|
|
if line[0] != ' ':
|
| 405 |
|
|
description += ' '
|
| 406 |
tille |
1408 |
description += de_html(line)
|
| 407 |
tille |
1406 |
else:
|
| 408 |
|
|
match = ftpnew_gatherer.src_html_has_description_start_re.match(line)
|
| 409 |
|
|
if match:
|
| 410 |
|
|
in_description = 1
|
| 411 |
tille |
1408 |
description = de_html(match.groups()[0]) + "\n"
|
| 412 |
tille |
1406 |
srci.close()
|
| 413 |
tille |
1486 |
srco.close()
|
| 414 |
|
|
# Append last read binary package to list of binary packages
|
| 415 |
|
|
binpkgs.append(binpkg)
|
| 416 |
tille |
1406 |
if srcpkg.s['Queue'] != 'ignore':
|
| 417 |
|
|
# print srcpkg
|
| 418 |
|
|
srcpkg.check_dict()
|
| 419 |
|
|
query = """EXECUTE ftpnew_insert_source (%(Source)s, %(Version)s,
|
| 420 |
|
|
%(Maintainer)s, %(maintainer_name)s, %(maintainer_email)s,
|
| 421 |
|
|
%(Bin)s, %(Changed-By)s, %(Architecture)s, %(Homepage)s,
|
| 422 |
|
|
%(Vcs-Type)s, %(Vcs-Url)s, %(Vcs-Browser)s,
|
| 423 |
tille |
1474 |
%(Section)s, %(Distribution)s, %(Component)s, %(Closes)s, %(License)s,
|
| 424 |
tille |
1406 |
%(Last_modified)s, %(Queue)s)"""
|
| 425 |
|
|
cur.execute(query, srcpkg.s)
|
| 426 |
|
|
for binpkg in binpkgs:
|
| 427 |
|
|
# print binpkg
|
| 428 |
|
|
binpkg.check_dict()
|
| 429 |
|
|
query = """EXECUTE ftpnew_insert_package (%(Package)s, %(Version)s,
|
| 430 |
|
|
%(Architecture)s, %(Maintainer)s, %(Description)s, %(Source)s,
|
| 431 |
|
|
%(Depends)s, %(Recommends)s, %(Suggests)s, %(Enhances)s,
|
| 432 |
|
|
%(Pre-Depends)s, %(Breaks)s, %(Replaces)s, %(Provides)s, %(Conflicts)s,
|
| 433 |
|
|
%(Installed-Size)s, %(Homepage)s, %(Section)s,
|
| 434 |
tille |
1474 |
%(Long_Description)s, %(Distribution)s, %(Component)s, %(License)s)"""
|
| 435 |
tille |
1406 |
try:
|
| 436 |
|
|
cur.execute(query, binpkg.b)
|
| 437 |
|
|
except IntegrityError, err:
|
| 438 |
|
|
print >>stderr, err, src_info_html
|
| 439 |
|
|
print >>stderr, binpkg
|
| 440 |
|
|
print >>stderr, binpkg.b
|
| 441 |
|
|
continue
|
| 442 |
tille |
1487 |
except KeyError, err:
|
| 443 |
|
|
print >>stderr, "Missing information field for binary package %s: %s" % (binpkg.b['Package'], err)
|
| 444 |
|
|
continue
|
| 445 |
tille |
1406 |
|
| 446 |
|
|
cur.execute("DEALLOCATE ftpnew_insert_source")
|
| 447 |
|
|
cur.execute("DEALLOCATE ftpnew_insert_package")
|
| 448 |
|
|
cur.execute("DEALLOCATE ftpnew_check_existing_package")
|
| 449 |
lucas |
1531 |
cur.execute("ANALYZE %s" % my_config["table_sources"])
|
| 450 |
|
|
cur.execute("ANALYZE %s" % my_config["table_packages"])
|
| 451 |
tille |
1406 |
|
| 452 |
|
|
if __name__ == '__main__':
|
| 453 |
|
|
main()
|
| 454 |
|
|
|
| 455 |
|
|
# vim:set et tabstop=2:
|