9b4a35d62d1c198882090a053c576b4f30347669
1 #!/usr/bin/python
3 # Copyright 2011 Paul Wise
4 # Released under the MIT/Expat license, see doc/COPYING
6 # Uses the snapshot.debian.org metadata database and SHA-1 based filesystem to
7 # compute debdiffs between Debian and individual derivatives. The metadata
8 # allows knowing if a particular file was ever in Debian and the filesystem
9 # allows the creation of debdiffs.
10 #
11 # The script works approximately like this:
12 #
13 # Load the Sources files previously downloaded by get-package-lists as indicated
14 # by the sources.list of the derivative.
15 #
16 # For each source package in the derivative:
17 #
18 # Check if the dsc has ever been in Debian, if not, check if the other
19 # parts have and therefore decide if the package is unmodified or not.
20 # Unmodified source packages are skipped and include those with the exact
21 # same dsc file or those where all the non-dsc parts are identical.
22 #
23 # Try some heuristics (name, version, changelog entries) to find out if
24 # the package could be based on some package that is or was in Debian.
25 #
26 # If it was not then skip to the next one and make a note, since Debian
27 # might want to know about source packages that are missing from Debian.
28 #
29 # If it was then use debdiff to create a diff and filterdiff to create a
30 # diff of the debian/ dir.
31 #
32 # Usage:
33 # compare-source-package-list <sources.list> <apt dir> <patches list> <links list> <new package list> <log file>
35 # FIXME: write out some statistics and rrdtool graphs
36 # source package types per derivative
37 # number of source packages
38 # cache misses: md5, sha256, sha1, patch, changelog
39 # FIXME: comment the code to list assumptions and function purpose
40 # FIXME: add options to allow re-processing only specific packages
41 # FIXME: write something to clean up old files and patches
42 # FIXME: don't unpack or make a patch when we don't have all the parts
43 # FIXME: don't make a patch when we were not able to unpack the source package
44 # FIXME: cleanup files at start of run
45 # FIXME: extract new debian/patches/ patches
46 # FIXME: print out packages that are no longer in Debian
47 # FIXME: deal with really large patches:
48 # FIXME: kde-l10n-*: too few parts to be useful
49 # FIXME: divergence: too many changelog entries between versions to be useful
50 # FIXME: derivative is older than Debian
51 # FIXME: derivative renamed the source package
52 # FIXME: just a really big diff
53 # FIXME: when there are multiple dsc files in snapshots, prefer the debian/debian-archive one
54 # FIXME: when the source package is ancient and the dsc is missing, make a fake one to use
55 # FIXME: add an in-memory cache of hashes so that hashes in multiple releases hit the disk once
57 import os
58 import sys
59 import httplib
60 import urllib2
61 import hashlib
62 import shutil
63 import logging
64 import tempfile
65 import string
66 import socket
67 import signal
68 import subprocess
69 import yaml
70 from debian import deb822, changelog
71 import apt_pkg
72 import psycopg2
73 try: import cjson as json
74 except ImportError: import json
76 # Helper functions for python stuff with annoying error handling
78 def makedirs(dirs):
79 try: os.makedirs(dirs)
80 except OSError: pass
82 def rmtree(dir):
83 try: shutil.rmtree(dir)
84 except OSError: pass
86 def remove(file):
87 try: os.remove(file)
88 except OSError: pass
90 def symlink(source, link):
91 try: os.symlink(source, link)
92 except OSError: pass
94 # http://www.chiark.greenend.org.uk/ucgi/~cjwatson/blosxom/2009-07-02-python-sigpipe.html
95 def subprocess_setup():
96 # Python installs a SIGPIPE handler by default. This is usually not what
97 # non-Python subprocesses expect.
98 signal.signal(signal.SIGPIPE, signal.SIG_DFL)
100 # We need to map apt_pkg.version_compare return values to cmp return values
101 # The documentation is incorrect: http://bugs.debian.org/680891
102 def apt_version_cmp(a, b):
103 ret = apt_pkg.version_compare(a, b)
104 if ret < 0: return -1
105 elif ret > 0: return 1
106 else: return 0
108 # Config
109 md5_cache_dir = os.path.abspath('../md5-farm')
110 sha1_cache_dir = os.path.abspath('../sha1-farm')
111 sha256_cache_dir = os.path.abspath('../sha256-farm')
112 sha1_patch_dir = os.path.abspath('../sha1-patches')
113 sha1_lsdiff_dir = os.path.abspath('../sha1-lsdiff')
114 sha1_changelog_dir = os.path.abspath('../sha1-changelog')
115 deriv_patch_dir = os.path.abspath('patches')
116 global_patch_dir = os.path.abspath('../patches')
117 snapshot_cache_dir = '/srv/snapshot.debian.org/farm'
118 patch_too_large = os.path.abspath('../../doc/patch-too-large.txt')
119 checksum_types = ('sha1', 'sha256', 'md5sum')
120 checksum_hashlib = ('sha1', 'sha256', 'md5')
121 checksum_headers = ('Checksums-Sha1', 'Checksums-Sha256', 'Files')
122 user_agent = 'Debian Derivatives Census QA bot'
123 timeout = 60
124 ishex = lambda s: not(set(s)-set(string.hexdigits))
126 # Init
127 apt_pkg.init()
129 # Setup configuration
130 apt_pkg.config.set('Dir', os.path.abspath(sys.argv[2]))
131 apt_pkg.config.set('Dir::Etc', os.path.abspath(sys.argv[2]))
132 apt_pkg.config.set('Dir::State', os.path.abspath(sys.argv[2]))
133 apt_pkg.config.set('Dir::Cache', os.path.abspath(sys.argv[2]))
134 apt_pkg.config.set('Dir::State::status', os.path.abspath(os.path.join(sys.argv[2],'status')))
135 apt_pkg.config.set('Dir::Etc::sourcelist', os.path.abspath(sys.argv[1]))
136 apt_pkg.config.set('Dir::Bin::gpg', 'fakegpgv')
138 # Preparation
139 sources_list = apt_pkg.SourceList()
140 sources_list.read_main_list()
141 conn = psycopg2.connect("service=snapshot-guest")
142 cur = conn.cursor()
143 remove(sys.argv[7])
144 logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.DEBUG, filename=sys.argv[7])
146 # Voodoo
147 lists_dir = apt_pkg.config.find_dir('Dir::State::lists')
148 source_entries = [[i for i in x.index_files if i.label=='Debian Source Index'] for x in sources_list.list]
149 derivative_short_name = os.path.basename(os.getcwd())
150 modifies_dsc_files = 0
151 repackaged_but_identical = 0
153 # Helper functions for generating path names
155 def hash_path_parent(dir, hash):
156 return os.path.join(dir, hash[0:2], hash[2:4])
158 def hash_path(dir, hash):
159 return os.path.join(dir, hash[0:2], hash[2:4], hash)
161 def hash_path_exists(dir, hash):
162 return os.path.exists(os.path.join(dir, hash[0:2], hash[2:4], hash))
164 def sha1_patch_path(debian_dsc_sha1, dsc_sha1, type=None):
165 path = os.path.join(hash_path(sha1_patch_dir, debian_dsc_sha1), hash_path('', dsc_sha1))
166 if type: path += '.%s' % type
167 path += '.patch'
168 return os.path.abspath(path)
170 def sha1_lsdiff_path(debian_dsc_sha1, dsc_sha1, type=None):
171 path = os.path.join(hash_path(sha1_lsdiff_dir, debian_dsc_sha1), hash_path('', dsc_sha1))
172 if type: path += '.%s' % type
173 path += '.lsdiff'
174 return os.path.abspath(path)
176 def shortslug(name):
177 return name[:4] if name.startswith('lib') else name[0]
179 def deriv_patch_path(name, version, debian_name, debian_version, type=None):
180 path = os.path.join(deriv_patch_dir, shortslug(debian_name), debian_name, '')
181 path += '_'.join((debian_name, debian_version, name, version))
182 if type: path += '.%s' % type
183 path += '.patch'
184 return os.path.abspath(path)
186 def global_patch_path(name, version, debian_name, debian_version, type=None):
187 path = os.path.join(global_patch_dir, shortslug(debian_name), debian_name, '')
188 path += '_'.join(('Debian', debian_name, debian_version, derivative_short_name, name, version))
189 if type: path += '.%s' % type
190 path += '.patch'
191 return os.path.abspath(path)
193 # Functions for munging source packages
195 def convert_lzip_to_gzip(dir, name):
196 cmdline = ['lzip', '-d', name]
197 process = subprocess.Popen(cmdline, cwd=dir, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, preexec_fn=subprocess_setup)
198 output = process.communicate()[0]
199 if process.returncode:
200 logging.warning('lzip reported failure to decompress %s:', name)
201 logging.warning(output)
202 bname = name[0:-3] # Strip off .lz
203 cmdline = ['gzip', '-1', bname] # gzip -1 to reduce overhead
204 process = subprocess.Popen(cmdline, cwd=dir, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, preexec_fn=subprocess_setup)
205 output = process.communicate()[0]
206 if process.returncode:
207 logging.warning('gzip reported failure to compress %s:', bname)
208 logging.warning(output)
209 return (name, bname+'.gz')
211 def update_dsc_file(dir, dsc_name, parts):
212 dsc_path = os.path.join(dir,dsc_name)
213 dsc_file = open(dsc_path,'rb')
214 dsc = deb822.Dsc(dsc_file)
215 for (old, name) in parts:
216 path = os.path.join(dir,name)
217 size = os.path.getsize(path)
218 with open(path,'rb') as f:
219 hashes = {}
220 for (type, func) in zip(checksum_types, checksum_hashlib):
221 hashes[type] = getattr(hashlib, func)()
222 for chunk in iter(lambda: f.read(128*64L), b''):
223 for type in checksum_types:
224 hashes[type].update(chunk)
225 for type in checksum_types:
226 hashes[type] = hashes[type].hexdigest()
227 for (header, type) in zip(checksum_headers, checksum_types):
228 if header in dsc:
229 dsc[header] = [{type: hashes[type], 'size': size, 'name': name} if p['name'] == old else p for p in dsc[header]]
230 dsc_file.close()
231 os.remove(dsc_path) # So we don't change the original that the dsc links to
232 dsc_file = open(dsc_path,'wb')
233 dsc.dump(dsc_file)
234 dsc_file.close()
236 # Functions for downloading files and storing them in the hash caches
238 def download_and_check_hash(url, dir, hash, hash_type):
239 try:
240 parent = hash_path_parent(dir,hash)
241 path = hash_path(dir,hash)
242 logging.debug('downloading %s', url)
243 makedirs(parent)
244 headers = { 'User-Agent' : user_agent }
245 req = urllib2.Request(url, None, headers)
246 u = urllib2.urlopen(req, None, timeout)
247 data = u.read()
248 if hash_type == 'sha256':
249 data_hash = hashlib.sha256(data).hexdigest()
250 elif hash_type == 'md5sum':
251 data_hash = hashlib.md5(data).hexdigest()
252 else:
253 logging.warning('unknown hash type detected: %s %s %s', hash_type, hash, url)
254 return ('unknown', None)
255 if data_hash != hash:
256 logging.warning('incorrect hash for downloaded file, ignoring: %s %s != %s %s', hash_type, hash, data_hash, url)
257 return ('unknown', None)
258 sha1 = hashlib.sha1(data).hexdigest()
259 sha1_path = hash_path(sha1_cache_dir, sha1)
260 sha1_parent = hash_path_parent(sha1_cache_dir, sha1)
261 makedirs(sha1_parent)
262 if hash_path_exists(snapshot_cache_dir, sha1):
263 snapshot_path = hash_path(snapshot_cache_dir, sha1)
264 symlink(snapshot_path, path)
265 logging.debug('exists in snapshot sha1 cache: %s %s %s %s', hash_type, hash, sha1, url)
266 return (True, sha1)
267 else:
268 if not os.path.exists(sha1_path):
269 logging.debug('correct hash for downloaded file, saving: %s %s %s %s', hash_type, hash, sha1, url)
270 f = open(sha1_path, 'w')
271 f.write(data)
272 f.close()
273 else:
274 logging.debug('correct hash for downloaded file, not saving: already in derivs cache: %s %s %s %s', hash_type, hash, sha1, url)
275 symlink(os.path.relpath(sha1_path, os.path.dirname(path)), path)
276 logging.debug('does not exist in snapshot sha1 cache: %s %s %s %s', hash_type, hash, sha1, url)
277 return (False, sha1)
278 except urllib2.URLError, e:
279 if hasattr(e, 'reason'): reason = e.reason
280 elif hasattr(e, 'code'): reason = e.code
281 else: reason = e
282 logging.warning('unable to download hash file, ignoring: %s %s', reason, url)
283 return ('unknown', None)
284 except httplib.HTTPException, e:
285 logging.warning('unable to download hash file, ignoring: %s %s', repr(e), url)
286 return ('unknown', None)
287 except socket.error, e:
288 logging.warning('unable to download hash file, ignoring: %s %s', e, url)
289 return ('unknown', None)
291 def download_sha1(url, dir, sha1):
292 try:
293 parent = hash_path_parent(dir,sha1)
294 path = hash_path(dir,sha1)
295 logging.debug('downloading sha1: %s %s', sha1, url)
296 makedirs(parent)
297 headers = { 'User-Agent' : user_agent }
298 req = urllib2.Request(url, None, headers)
299 u = urllib2.urlopen(req, None, timeout)
300 data = u.read()
301 data_sha1 = hashlib.sha1(data).hexdigest()
302 if data_sha1 == sha1:
303 logging.debug('correct sha1 for downloaded file, saving: %s %s', sha1, url)
304 if not os.path.exists(path):
305 f = open(path, 'w')
306 f.write(data)
307 f.close()
308 return (False, sha1)
309 else:
310 logging.warning('incorrect sha1 for downloaded file, ignoring: %s != %s %s', sha1, data_sha1, url)
311 return ('unknown', None)
312 except urllib2.URLError, e:
313 if hasattr(e, 'reason'): reason = e.reason
314 elif hasattr(e, 'code'): reason = e.code
315 else: reason = e
316 logging.warning('unable to download sha1 file, ignoring: %s %s', reason, url)
317 return ('unknown', None)
318 except httplib.HTTPException, e:
319 logging.warning('unable to download hash file, ignoring: %s %s', repr(e), url)
320 return ('unknown', None)
321 except socket.error, e:
322 logging.warning('unable to download hash file, ignoring: %s %s', e, url)
323 return ('unknown', None)
325 # Functions for checking the hash caches
327 def check_hash_cache(dir, hash, hash_type, url):
328 logging.debug('checking hash cache: %s %s', hash_type, hash)
329 path = hash_path(dir, hash)
330 try:
331 result = os.readlink(path)
332 path = os.path.join(os.path.dirname(path), result)
333 except OSError:
334 logging.debug('does not exist in hash cache: %s %s', hash_type, hash)
335 return download_and_check_hash(url, dir, hash, hash_type)
336 logging.debug('exists in hash cache: %s %s', hash_type, hash)
337 sha1 = os.path.basename(path)
338 if hash_path_exists(snapshot_cache_dir, sha1):
339 logging.debug('exists in snapshot sha1 cache: %s', sha1)
340 remove(hash_path(sha1_cache_dir,sha1))
341 return (True, sha1)
342 elif hash_path_exists(sha1_cache_dir, sha1):
343 logging.debug('exists in derivatives sha1 cache: %s', sha1)
344 return (False, sha1)
346 def check_sha1_cache(sha1, url):
347 logging.debug('checking sha1 caches: %s', sha1)
348 if hash_path_exists(snapshot_cache_dir, sha1):
349 logging.debug('exists in snapshot sha1 cache: %s', sha1)
350 remove(hash_path(sha1_cache_dir,sha1))
351 return (True, sha1)
352 elif hash_path_exists(sha1_cache_dir, sha1):
353 logging.debug('exists in derivatives sha1 cache: %s', sha1)
354 return (False, sha1)
355 else:
356 logging.debug('does not exist in any sha1 caches: %s', sha1)
357 return download_sha1(url, sha1_cache_dir, sha1)
359 def status(type, hash, url):
360 logging.debug('checking status of hash: %s %s %s', type, hash, url)
361 if type == 'sha1':
362 (ret, sha1) = check_sha1_cache(hash, url)
363 if ret == True:
364 return ('unmodified', sha1)
365 elif ret == False:
366 return ('modified', sha1)
367 else:
368 return (ret, sha1)
369 elif type == 'sha256':
370 (ret, sha1) = check_hash_cache(sha256_cache_dir, hash, type, url)
371 if ret == True:
372 return ('unmodified', sha1)
373 elif ret == False:
374 return ('modified', sha1)
375 else:
376 return (ret, sha1)
377 elif type == 'md5sum':
378 (ret, sha1) = check_hash_cache(md5_cache_dir, hash, type, url)
379 if ret == True:
380 return ('unmodified', sha1)
381 elif ret == False:
382 return ('modified', sha1)
383 else:
384 return (ret, sha1)
385 else:
386 logging.warning('unknown hash type detected: %s %s %s', type, hash, url)
387 return ('unknown', None)
389 # Functions for getting information about source packages
391 def get_info(srcpkg):
392 dsc = None
393 for header in checksum_headers:
394 if not dsc and header in srcpkg:
395 dsc = [x for x in srcpkg[header] if x['name'].endswith('.dsc')]
396 if not dsc:
397 logging.warning('did not find any dsc files')
398 return None
399 if len(dsc) > 1:
400 logging.warning('found multiple dsc files: %s' % ' '.join(['%s %s' % (dsc_name, dsc_sha1) for dsc_name, dsc_sha1 in dsc]))
401 return None
402 dsc = dsc[0]
403 dsc_name = dsc['name']
404 dsc_hash_type, dsc_hash = [(k, v) for k, v in dsc.iteritems() if k not in ('name', 'size')][0]
406 parts = []
407 part_names = []
408 for header in checksum_headers:
409 if header in srcpkg:
410 for part in srcpkg[header]:
411 if 'name' in part and part['name'] not in part_names and not part['name'].endswith('.dsc'):
412 parts.append(part)
413 part_names.append(part['name'])
415 return (dsc_hash_type, dsc_hash, dsc_name, parts)
417 def get_debian_info(files):
418 dsc = [file for file in files if file[0].endswith('.dsc')]
419 if not dsc:
420 logging.warning('did not find any Debian dsc files: snapshots bug or ancient source package')
421 return None
422 if len(dsc) > 1:
423 logging.warning('found multiple Debian dsc files, choosing first one: %s' % ' '.join(['%s %s' % (dsc_name, dsc_sha1) for dsc_name, dsc_sha1 in dsc]))
425 dsc = dsc[0]
426 dsc_name, dsc_sha1 = dsc
428 parts = []
429 part_names = []
430 for file in files:
431 part_name, part_sha1 = file
432 if part_name not in part_names and not part_name.endswith('.dsc'):
433 parts.append(file)
434 part_names.append(part_name)
436 return (dsc_sha1, dsc_name, parts)
438 # Functions for extracting information from the snapshots database
440 def database_error(e):
441 reason = None
442 code = None
443 if hasattr(e, 'pgerror'): reason = e.pgerror
444 if hasattr(e, 'pgcode'): code = e.pgcode
445 logging.warning('unable to execute database query: %s %s', code, reason)
446 conn.reset()
448 def srcpkg_was_in_debian(name, version=None):
449 try:
450 if version:
451 cur.execute('SELECT version FROM srcpkg WHERE name=%s AND version=%s LIMIT 1;', (name, version))
452 return not not cur.fetchone()
453 else:
454 cur.execute('SELECT version FROM srcpkg WHERE name=%s LIMIT 1;', (name,))
455 return not not cur.fetchone()
456 except psycopg2.Error, e:
457 database_error(e)
458 return None
460 def sha1_to_srcpkgs(sha1):
461 try:
462 cur.execute(
463 '''SELECT name, version
464 FROM srcpkg
465 JOIN file_srcpkg_mapping ON file_srcpkg_mapping.srcpkg_id=srcpkg.srcpkg_id
466 WHERE hash=%s;''', (sha1,))
467 return cur.fetchall()
468 except psycopg2.Error, e:
469 database_error(e)
470 return None
472 def srcpkg_to_sha1s(name, version):
473 try:
474 cur.execute(
475 '''SELECT hash
476 FROM file_srcpkg_mapping
477 JOIN srcpkg ON srcpkg.srcpkg_id=file_srcpkg_mapping.srcpkg_id
478 WHERE name=%s AND version=%s;''', (name, version))
479 return cur.fetchall()
480 except psycopg2.Error, e:
481 database_error(e)
482 return None
484 def srcpkg_to_srcpkgs(name):
485 try:
486 cur.execute(
487 '''SELECT name, version
488 FROM srcpkg
489 WHERE name=%s ORDER BY version DESC;''', (name,))
490 return cur.fetchall()
491 except psycopg2.Error, e:
492 database_error(e)
493 return None
495 def sha1s_to_files(sha1):
496 try:
497 cur.execute('SELECT DISTINCT ON (name, hash) name, hash FROM file WHERE hash=%s;', hash)
498 return cur.fetchall()
499 except psycopg2.Error, e:
500 database_error(e)
501 return None
503 def srcpkg_to_files(name, version):
504 try:
505 cur.execute(
506 '''SELECT DISTINCT ON (file.name, file.hash) file.name, file.hash
507 FROM file_srcpkg_mapping
508 JOIN srcpkg ON srcpkg.srcpkg_id=file_srcpkg_mapping.srcpkg_id
509 JOIN file ON file_srcpkg_mapping.hash=file.hash
510 WHERE srcpkg.name=%s AND srcpkg.version=%s;''', (name, version))
511 return cur.fetchall()
512 except psycopg2.Error, e:
513 database_error(e)
514 return None
516 def sha1_version_to_derived_from(sha1, version):
517 try:
518 cur.execute(
519 '''SELECT name, version
520 FROM srcpkg
521 JOIN file_srcpkg_mapping ON file_srcpkg_mapping.srcpkg_id=srcpkg.srcpkg_id
522 WHERE hash=%s and version<=%s
523 ORDER BY name ASC, version DESC
524 LIMIT 1;''', (sha1, version))
525 res = cur.fetchall()
526 if res: return res
527 cur.execute(
528 '''SELECT name, version
529 FROM srcpkg
530 JOIN file_srcpkg_mapping ON file_srcpkg_mapping.srcpkg_id=srcpkg.srcpkg_id
531 WHERE hash=%s
532 ORDER BY name ASC, version ASC
533 LIMIT 1;''', (sha1, version))
534 return cur.fetchall()
535 except psycopg2.Error, e:
536 database_error(e)
537 return None
539 def srcpkg_to_derived_from(name, version):
540 try:
541 cur.execute(
542 '''SELECT name, version
543 FROM srcpkg
544 WHERE name=%s and version<=%s
545 ORDER BY version DESC
546 LIMIT 1;''', (name, version))
547 res = cur.fetchall()
548 if res: return res
549 cur.execute(
550 '''SELECT name, version
551 FROM srcpkg
552 WHERE name=%s
553 ORDER BY version ASC
554 LIMIT 1;''', (name,))
555 return cur.fetchall()
556 except psycopg2.Error, e:
557 database_error(e)
558 return None
560 # Functions related to creating patches
562 # Add symlinks for all needed files
563 def prepare(dsc_name, dsc_sha1, parts):
564 logging.debug('preparing deriv directory for %s', dsc_name)
565 tmp_dir = tempfile.mkdtemp(prefix='derivs-cmp-srcpkg-%s-' % derivative_short_name)
566 path = hash_path(snapshot_cache_dir, dsc_sha1)
567 if not os.path.exists(path): path = hash_path(sha1_cache_dir, dsc_sha1)
568 path = hash_path(sha1_cache_dir, dsc_sha1)
569 dsc_path = os.path.join(tmp_dir, dsc_name)
570 os.symlink(path, dsc_path)
571 converted_parts = []
572 for part in parts:
573 if 'sha1' in part:
574 path = hash_path(snapshot_cache_dir, part['sha1'])
575 if not os.path.exists(path): path = hash_path(sha1_cache_dir, part['sha1'])
576 elif 'sha256' in part: path = hash_path(sha256_cache_dir, part['sha256'])
577 elif 'md5sum' in part: path = hash_path(md5_cache_dir, part['md5sum'])
578 part_path = os.path.join(tmp_dir, part['name'])
579 os.symlink(path, part_path)
580 # Some distributions allow additional compression schemes
581 # Here we work around this by recompressing with gzip
582 if part['name'].endswith('.lz'):
583 converted_parts.append(convert_lzip_to_gzip(tmp_dir, part['name']))
584 # Update the dsc file if we recompressed any files
585 if converted_parts:
586 update_dsc_file(tmp_dir, dsc_name, converted_parts)
587 return tmp_dir
589 def prepare_debian(dsc_name, dsc_sha1, files):
590 logging.debug('preparing Debian directory for %s', dsc_name)
591 readable_parts = 0
592 debian_tmp_dir = tempfile.mkdtemp(prefix='derivs-cmp-srcpkg-Debian-')
593 path = hash_path(snapshot_cache_dir, dsc_sha1)
594 if os.access(path, os.R_OK): readable_parts += 1
595 dsc_path = os.path.join(debian_tmp_dir, dsc_name)
596 os.symlink(path, dsc_path)
597 for file in files:
598 part_name, part_sha1 = file
599 path = hash_path(snapshot_cache_dir, part_sha1)
600 part_path = os.path.join(debian_tmp_dir, part_name)
601 os.symlink(path, part_path)
602 if os.access(path, os.R_OK): readable_parts += 1
603 if readable_parts != (1 + len(files)):
604 logging.info('only %s parts of %s are readable', readable_parts, dsc_name)
605 rmtree(debian_tmp_dir)
606 return None
607 return debian_tmp_dir
609 def get_changelog_entries(tmp_dir, dsc_name, dsc_sha1):
610 logging.debug('getting changelog entries from %s', dsc_name)
612 # Cache check
613 changelog_path = hash_path(sha1_changelog_dir, dsc_sha1)
614 if os.path.exists(changelog_path):
615 logging.debug('changelog cache exists for %s %s', dsc_name, dsc_sha1)
616 f = file(changelog_path)
617 if f:
618 try: changelog_entries = json.load(f)
619 except ValueError: pass
620 else: return [tuple(entry) for entry in changelog_entries]
621 finally: f.close()
623 # Preparation
624 extract_path = os.path.join(tmp_dir,'extracted')
626 # Unpack the source tree
627 logging.debug('unpacking source package %s', dsc_name)
628 cmdline = ['dpkg-source', '-x', dsc_name, 'extracted']
629 process = subprocess.Popen(cmdline, cwd=tmp_dir, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, preexec_fn=subprocess_setup)
630 output = process.communicate()[0]
631 if process.returncode:
632 logging.warning('dpkg-source reported failure to extract %s:', dsc_name)
633 logging.warning(output)
634 cmdline = ['ls', '-lR', '--time-style=+']
635 process = subprocess.Popen(cmdline, cwd=tmp_dir, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, preexec_fn=subprocess_setup)
636 output = process.communicate()[0]
637 logging.warning(output)
638 rmtree(extract_path)
639 return None
641 # Sanitise the debian dir and changelog file in case it is a symlink to outside
642 debian_dir = os.path.join(extract_path, 'debian')
643 changelog_filename = os.path.join(debian_dir,'changelog')
644 if os.path.islink(debian_dir) or os.path.islink(changelog_filename):
645 logging.warning('debian dir or changelog is a symbolic link %s', dsc_name)
646 rmtree(extract_path)
647 return None
649 # Check if the changelog exists
650 if not os.path.exists(changelog_filename):
651 logging.warning('could not find changelog in %s', dsc_name)
652 rmtree(extract_path)
653 return None
655 # Find out which source package is the most likely derivative
656 logging.debug('parsing changelog for %s', dsc_name)
657 changelog_file = open(changelog_filename)
658 changelog_obj = changelog.Changelog(changelog_file)
659 try:
660 changelog_entries = [(entry.package, str(entry._raw_version)) for entry in changelog_obj]
661 except:
662 logging.warning('could not read changelog from %s', dsc_name)
663 rmtree(extract_path)
664 return None
665 del changelog_obj
666 changelog_file.close()
668 # Clean up again
669 rmtree(extract_path)
671 # Write the cache
672 makedirs(hash_path_parent(sha1_changelog_dir, dsc_sha1))
673 remove(changelog_path)
674 f = file(changelog_path, 'w')
675 json.dump(changelog_entries, f)
676 f.close()
678 return changelog_entries
680 # Find the source package name and version this is probably derived from
681 def find_derived_from(tmp_dir, name, version, dsc_name, dsc_sha1, parts_unmodified):
682 logging.debug('finding base source package of %s %s', name, version)
684 # Get a list of changelog entries
685 changelog_entries = get_changelog_entries(tmp_dir, dsc_name, dsc_sha1)
686 if changelog_entries:
687 logging.debug('changelog entries are: %s', ' '.join(['%s %s' % (entry_name, entry_version) for entry_name, entry_version in changelog_entries]))
689 # Get a list of candidate versions from the database
690 possibly_derived_from = []
691 logging.debug('checking which parts were in Debian')
692 for part_sha1, part_name in parts_unmodified:
693 part_derived_from = sha1_to_srcpkgs(part_sha1)
694 if part_derived_from:
695 logging.debug('part %s %s available in %s', part_sha1, part_name, ' '.join(['%s %s' % (entry_name, entry_version) for entry_name, entry_version in part_derived_from]))
696 possibly_derived_from.extend(part_derived_from)
698 if not possibly_derived_from:
699 logging.debug('no parts in common with Debian, obtaining old versions')
700 old_packages = srcpkg_to_srcpkgs(name)
701 if old_packages: possibly_derived_from = old_packages
703 # Uniqify
704 possibly_derived_from = list(set(possibly_derived_from))
705 if possibly_derived_from:
706 logging.debug('possibly derived from: %s', ' '.join(['%s %s' % (entry_name, entry_version) for entry_name, entry_version in possibly_derived_from]))
707 else:
708 logging.debug('nothing in possibly derived from list')
710 # Match changelog versions against candidates
711 if changelog_entries:
712 logging.debug('matching changelog entries against versions possibly derived from')
713 for entry in changelog_entries:
714 entry_name, entry_version = entry
715 if entry in possibly_derived_from:
716 logging.debug('%s %s in possibly derived from', entry_name, entry_version)
717 return entry
718 logging.debug('checking if changelog entries were ever in Debian')
719 for entry_name, entry_version in changelog_entries:
720 if srcpkg_was_in_debian(entry_name, entry_version):
721 logging.debug('%s %s was in Debian', entry_name, entry_version)
722 return (entry_name, entry_version)
723 if possibly_derived_from:
724 logging.debug('finding closest entry in possibly derived from')
725 possibly_derived_from.sort(cmp=lambda a,b: apt_version_cmp(b[1],a[1]))
726 for entry_name, entry_version in possibly_derived_from:
727 if name == entry_name and apt_version_cmp(version, entry_version) >= 0:
728 logging.debug('%s %s is an equal or lower version', entry_name, entry_version)
729 return (entry_name, entry_version)
730 entry = possibly_derived_from[-1]
731 entry_name, entry_version = entry
732 logging.debug('no lower version numbers, returning next highest version %s %s', entry_name, entry_version)
733 return entry
734 logging.debug('finding closest version number in Debian')
735 for entry in srcpkg_to_derived_from(name, version):
736 entry_name, entry_version = entry
737 logging.debug('closest package was %s %s', entry_name, entry_version)
738 return entry
739 logging.debug('could not find Debian package %s %s is derived from', name, version)
740 return None
742 # Generate a patch file
743 def create_patch(tmp_dir, dsc_name, dsc_sha1, debian_tmp_dir, debian_dsc_name, debian_dsc_sha1):
744 global repackaged_but_identical
746 dsc_path = os.path.join(tmp_dir, dsc_name)
747 debian_dsc_path = os.path.join(debian_tmp_dir, debian_dsc_name)
748 path_everything = sha1_patch_path(debian_dsc_sha1, dsc_sha1)
749 path_debian = sha1_patch_path(debian_dsc_sha1, dsc_sha1, 'debian')
751 # Generate the main patch
752 if not os.path.exists(path_everything) and os.path.exists(debian_dsc_path) and os.path.exists(dsc_path):
753 makedirs(os.path.dirname(path_everything))
754 cmdline = ['debdiff', '--quiet', '--diffstat', debian_dsc_path, dsc_path]
755 stdout = open(path_everything, 'w')
756 process = subprocess.Popen(cmdline, stdout=stdout, stderr=subprocess.PIPE, preexec_fn=subprocess_setup)
757 output = process.communicate()[1]
758 stdout.close()
759 if process.returncode == 255:
760 logging.warning('debdiff reported failure %s %s:', debian_dsc_name, dsc_name)
761 logging.warning(output)
762 cmdline = ['ls', '-lR', '--time-style=+']
763 for name, dir in (derivative_short_name, tmp_dir), ('Debian', debian_tmp_dir):
764 logging.warning('dir listing for %s:', name)
765 process = subprocess.Popen(cmdline, cwd=dir, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, preexec_fn=subprocess_setup)
766 output = process.communicate()[0]
767 logging.warning(output)
768 return False
769 elif process.returncode == 0:
770 logging.info('derivative repackaged in an identical way %s %s', debian_dsc_name, dsc_name)
771 repackaged_but_identical += 1
772 return False
773 elif process.returncode != 1:
774 logging.warning('debdiff reported unknown return code %s %s %s:', process.returncode, debian_dsc_name, dsc_name)
775 logging.warning(output)
776 cmdline = ['ls', '-lR', '--time-style=+']
777 for name, dir in (derivative_short_name, tmp_dir), ('Debian', debian_tmp_dir):
778 logging.warning('dir listing for %s:', name)
779 process = subprocess.Popen(cmdline, cwd=dir, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, preexec_fn=subprocess_setup)
780 output = process.communicate()[0]
781 logging.warning(output)
782 return False
784 # Filter the main patch to include only the debian/ directory
785 if os.path.exists(path_everything) and not os.path.exists(path_debian):
786 makedirs(os.path.dirname(path_debian))
787 cmdline = ['filterdiff', '--include=*/debian/*', path_everything]
788 filterdiff = subprocess.Popen(cmdline, stdout=subprocess.PIPE, preexec_fn=subprocess_setup)
789 filterdiff_output = filterdiff.communicate()[0]
790 diffstat = subprocess.Popen('diffstat', stdin=subprocess.PIPE, stdout=subprocess.PIPE, preexec_fn=subprocess_setup)
791 diffstat_output = diffstat.communicate(filterdiff_output)[0]
792 f = open(path_debian, 'w')
793 f.write('diffstat of debian/ for %s %s\n' % (os.path.splitext(debian_dsc_name)[0], os.path.splitext(dsc_name)[0]))
794 f.write('\n')
795 f.write(diffstat_output)
796 f.write('\n')
797 f.write(filterdiff_output)
798 f.close()
800 # Patches > 100MB are probably not that useful, replace them with a link
801 for path in path_everything, path_debian:
802 try:
803 if os.path.getsize(path) > 104857600:
804 logging.info('patch between %s and %s is larger than 100MB', dsc_name, debian_dsc_name)
805 remove(path)
806 symlink(os.path.relpath(patch_too_large, os.path.dirname(path)), path)
807 except OSError:
808 pass
810 return True
812 def check_patch(debian_dsc_sha1, dsc_sha1):
813 patch_path = sha1_patch_path(debian_dsc_sha1, dsc_sha1)
814 lsdiff_path = sha1_lsdiff_path(debian_dsc_sha1, dsc_sha1)
815 if os.path.exists(lsdiff_path):
816 logging.debug('lsdiff cache exists for %s', patch_path)
817 f = file(lsdiff_path)
818 lsdiff = f.read()
819 f.close()
820 else:
821 logging.debug('lsdiff cache does not exist for %s', patch_path)
822 cmdline = ['lsdiff', patch_path]
823 process = subprocess.Popen(cmdline, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, preexec_fn=subprocess_setup)
824 lsdiff = process.communicate()[0]
825 makedirs(os.path.dirname(lsdiff_path))
826 f = file(lsdiff_path,'w')
827 f.write(lsdiff)
828 f.close()
829 lsdiff = lsdiff.splitlines()
830 for line in lsdiff:
831 if line != 'debian/changelog' and not line.endswith('/debian/changelog'):
832 return True
833 return False
835 def present_patch(name, version, dsc_sha1, debian_name, debian_version, debian_dsc_sha1):
836 useful_patch = check_patch(debian_dsc_sha1, dsc_sha1)
837 patches = []
838 types = ('', 'debian')
839 for type in types:
840 ln_to = sha1_patch_path(debian_dsc_sha1, dsc_sha1, type)
841 if not os.path.exists(ln_to):
842 continue
843 ln_from_deriv = deriv_patch_path(name, version, debian_name, debian_version, type)
844 ln_from_global = global_patch_path(name, version, debian_name, debian_version, type)
845 makedirs(os.path.dirname(ln_from_deriv))
846 makedirs(os.path.dirname(ln_from_global))
847 remove(ln_from_deriv)
848 remove(ln_from_global)
849 symlink(os.path.relpath(ln_to, os.path.dirname(ln_from_deriv)), ln_from_deriv)
850 symlink(os.path.relpath(ln_to, os.path.dirname(ln_from_global)), ln_from_global)
851 if useful_patch:
852 patches.append(os.path.relpath(ln_from_global, os.path.abspath(global_patch_dir)))
853 return tuple(patches)
855 # Functions that wrap other functions and decide what to do
857 def check_source_package(source_entry, srcpkg):
858 global modifies_dsc_files
860 try:
861 name = None
862 version = None
863 dir = None
864 name = srcpkg['Package']
865 version = srcpkg['Version']
866 dir = srcpkg['Directory']
867 if '/' in name or name == '..':
868 logging.warning('could not process source package %s %s: possibly malicious name', name, version)
869 return None
870 if '/' in version or version == '..':
871 logging.warning('could not process source package %s %s: possibly malicious version', name, version)
872 return None
873 if '..' in dir.split('/'):
874 logging.warning('could not process source package %s %s: possibly malicious dir: %s', name, version, dir)
875 return None
876 except KeyError:
877 logging.warning('could not process source package %s %s', name, version)
878 return None
879 logging.debug('started processing source package %s %s', name, version)
880 info = get_info(srcpkg)
881 if not info:
882 logging.warning('finished processing source package %s %s: could not get any info', name, version)
883 return None
884 dsc_hash_type, dsc_hash, dsc_name, parts = info
885 if '/' in dsc_name or dsc_name == '..':
886 logging.warning('could not process source package %s %s: possibly malicious dsc name %s', name, version, dsc_name)
887 return None
888 if not ishex(dsc_hash):
889 logging.warning('could not process source package %s %s: possibly malicious dsc hash %s', name, version, dsc_hash)
890 return None
891 dsc_url = source_entry.archive_uri('%s/%s' % (dir, dsc_name))
892 logging.debug('found dsc file: %s %s %s', dsc_hash_type, dsc_hash, dsc_url)
893 dsc_status, dsc_sha1 = status(dsc_hash_type, dsc_hash, dsc_url)
894 logging.debug('checked dsc status: %s %s %s', dsc_status, dsc_sha1, dsc_url)
895 if dsc_status == 'unmodified':
896 # Ignore the srcpkg since we know it is was in Debian
897 # at one point and is hopefully therefore unmodified
898 logging.debug('finished processing source package %s %s: dsc unmodified', name, version)
899 return None
900 else:
901 files = [(dsc_sha1, dsc_hash_type, dsc_hash)]
902 parts_unmodified = []
903 parts_modified = []
904 parts_unknown = []
905 for part in parts:
906 part_name = part['name']
907 if '/' in part_name or part_name == '..':
908 logging.warning('could not process source package %s %s: possibly malicious part name %s', name, version, part_name)
909 return None
910 part_url = source_entry.archive_uri('%s/%s' % (dir, part_name))
911 part_hash_type, part_hash = [(k, v) for k, v in part.iteritems() if k not in ('name', 'size')][0]
912 if not ishex(part_hash):
913 logging.warning('could not process source package %s %s: possibly malicious part hash %s', name, version, part_hash)
914 return None
915 logging.debug('found part file: %s %s %s', part_hash_type, part_hash, part_url)
916 part_status, part_sha1 = status(part_hash_type, part_hash, part_url)
917 logging.debug('checked part status: %s %s %s', part_status, part_sha1, part_url)
918 if 'sha1' not in part and part_sha1: part['sha1'] = part_sha1
919 if part_status == 'unmodified': parts_unmodified.append((part_sha1, part_name))
920 elif part_status == 'modified': parts_modified.append((part_sha1, part_name))
921 else: parts_unknown.append((part_sha1, part_name))
922 if part_status == 'modified': files.append((part_sha1, part_hash_type, part_hash))
924 all_parts_unmodified = (len(parts_unmodified) == len(parts))
925 parts_unmodified = list(set(parts_unmodified))
926 logging.debug('source package status %s %s: dsc %s, %s parts unmodified, %s parts modified, %s parts unknown', name, version, dsc_status, len(parts_unmodified), len(parts_modified), len(parts_unknown))
928 if all_parts_unmodified:
929 # Ignore the srcpkg since we know all the parts were
930 # in Debian at one point and ergo, it is unmodified
931 logging.debug('finished processing source package %s %s: all non-dsc parts unmodified', name, version)
932 if dsc_status == 'modified':
933 logging.info('source package %s %s: unmodified, but dsc different', name, version)
934 modifies_dsc_files += 1
935 return (files, None, None, None)
936 else:
937 logging.debug('some parts modified, looking for derived version %s %s', name, version)
938 if not dsc_sha1:
939 logging.warning('finished processing source package %s %s: sha1 missing for dsc file', name, version)
940 return (files, None, None, None)
941 if parts_unknown:
942 logging.warning('finished processing source package %s %s: sha1 missing for some parts', name, version)
943 return (files, None, None, None)
944 new = None
945 link = None
946 patch = None
947 tmp_dir = prepare(dsc_name, dsc_sha1, parts)
948 derived_from = find_derived_from(tmp_dir, name, version, dsc_name, dsc_sha1, parts_unmodified)
949 if derived_from:
950 debian_name, debian_version = derived_from
951 link = (debian_name, debian_version, name, version, dsc_url)
952 logging.debug('source package %s %s derived from %s %s', name, version, debian_name, debian_version)
953 debian_files = srcpkg_to_files(debian_name, debian_version)
954 if debian_files:
955 debian_info = get_debian_info(debian_files)
956 if debian_info:
957 debian_dsc_sha1, debian_dsc_name, debian_parts = debian_info
958 logging.debug('Debian source package %s %s dsc found %s %s', debian_name, debian_version, debian_dsc_name, debian_dsc_sha1)
959 debian_tmp_dir = prepare_debian(debian_dsc_name, debian_dsc_sha1, debian_parts)
960 if debian_tmp_dir:
961 patch_created = create_patch(tmp_dir, dsc_name, dsc_sha1, debian_tmp_dir, debian_dsc_name, debian_dsc_sha1)
962 if patch_created:
963 patch_names = present_patch(name, version, dsc_sha1, debian_name, debian_version, debian_dsc_sha1)
964 if patch_names: patch = (debian_name, debian_version, debian_dsc_sha1, name, version, dsc_sha1, [part[0] for part in parts_modified], patch_names)
965 rmtree(debian_tmp_dir)
966 else:
967 # This could be an issue with snapshots or a file that is not distributable
968 logging.info('source package %s %s: could not create temporary dir for Debian: %s %s', name, version, debian_name, debian_version)
969 else:
970 logging.warning('source package %s %s: could not get Debian info for %s %s: %s', name, version, debian_name, debian_version, debian_info)
971 else:
972 if srcpkg_was_in_debian(debian_name, debian_version):
973 logging.warning('source package %s %s: snapshot database issue, no Debian files found', debian_name, debian_version)
974 else:
975 logging.warning('source package %s %s: derived from %s %s possibly bogus', name, version, debian_name, debian_version)
976 else:
977 new = (name, version, dsc_url)
978 rmtree(tmp_dir)
979 logging.debug('finished processing source package %s %s: all done', name, version)
980 return (files, patch, link, new)
982 def process_sources(source_entries, lists_dir):
983 files = []
984 patches = []
985 links = []
986 new = []
987 for source in source_entries:
988 for source_entry in source:
989 fn = os.path.join(lists_dir, source_entry.describe.rstrip(')').rpartition('(')[2])
990 try: f = file(fn)
991 except IOError: continue
992 for srcpkg in deb822.Sources.iter_paragraphs(f):
993 actions = check_source_package(source_entry, srcpkg)
994 if actions:
995 action_files, action_patch, action_link, action_new = actions
996 if action_files:
997 files.append(action_files)
998 logging.debug('action: return files %s', ' '.join([' '.join(action) for action in action_files]))
999 if action_patch:
1000 patches.append(action_patch)
1001 logging.debug('action: return patches %s', ' '.join([' '.join(action) for action in action_patch]))
1002 if action_link:
1003 links.append(action_link)
1004 logging.debug('action: return links to modified source packages %s', ' '.join(action_link))
1005 if action_new:
1006 new.append(action_new)
1007 logging.debug('action: return links to new source packages %s', ' '.join(action_new))
1008 logging.debug('done')
1009 logging.debug('')
1010 f.close()
1011 return (files, patches, links, new)
1013 logging.debug('processing distribution %s', derivative_short_name)
1015 files, patches, links, new = process_sources(source_entries, lists_dir)
1017 # Done with the database, close the connection
1018 cur.close()
1019 conn.close()
1021 # Write out the results
1022 filename = sys.argv[3]
1023 data = files
1024 if data:
1025 output_data = {}
1026 for package in data:
1027 for modified_file in package:
1028 sha1, hash_type, hash = modified_file
1029 if sha1 not in output_data:
1030 output_data[sha1] = {}
1031 if hash_type != 'sha1' and hash_type not in output_data[sha1]:
1032 output_data[sha1][hash_type] = hash
1033 elif hash_type != 'sha1' and hash != output_data[sha1][hash_type]:
1034 logging.warning('hashes mismatched: %s: %s %s != %s', sha1, hash_type, hash, output_data[sha1][hash_type])
1035 output = file(os.path.abspath(filename), 'wb')
1036 yaml.safe_dump(output_data, output)
1037 output.close()
1039 filename = sys.argv[4]
1040 data = patches
1041 if data:
1042 if not os.path.exists(os.path.join(global_patch_dir,'HEADER.html')):
1043 symlink('../../doc/HEADER.patches.html',os.path.join(global_patch_dir,'HEADER.html'))
1044 if not os.path.exists(os.path.join(global_patch_dir,'.htaccess')):
1045 symlink('../../etc/htaccess.patches',os.path.join(global_patch_dir,'.htaccess'))
1046 if not os.path.exists(os.path.join(deriv_patch_dir,'HEADER.html')):
1047 symlink('../../../doc/HEADER.patches.html',os.path.join(deriv_patch_dir,'HEADER.html'))
1048 if not os.path.exists(os.path.join(global_patch_dir,'.htaccess')):
1049 symlink('../../../etc/htaccess.patches',os.path.join(deriv_patch_dir,'.htaccess'))
1050 output_data = []
1051 for item in data:
1052 debian_name, debian_version, debian_sha1, name, version, sha1, parts_sha1, patches = item
1053 item = {}
1054 item['debian_name'] = debian_name
1055 item['debian_version'] = debian_version
1056 item['debian_sha1'] = debian_sha1
1057 item['name'] = name
1058 item['version'] = version
1059 item['sha1'] = sha1
1060 item['patches'] = patches
1061 item['parts'] = parts_sha1
1062 output_data.append(item)
1063 output = file(os.path.abspath(filename), 'wb')
1064 yaml.safe_dump(output_data, output)
1065 output.close()
1066 else:
1067 remove(filename)
1069 filename = sys.argv[5]
1070 data = links
1071 if data:
1072 data = list(set(data))
1073 data.sort(cmp=lambda a,b: cmp(a[0],b[0]) or apt_version_cmp(a[1],b[1]) or cmp(a[2],b[2]) or apt_version_cmp(a[3],b[3]))
1074 output_data = {}
1075 output = file(os.path.abspath(filename), 'wb')
1076 for debian_name, debian_version, name, version, dsc_url in data:
1077 if debian_name not in output_data:
1078 output_data[debian_name] = {}
1079 if debian_version not in output_data[debian_name]:
1080 output_data[debian_name][debian_version] = []
1081 item = {}
1082 item['name'] = name
1083 item['version'] = version
1084 item['dsc'] = dsc_url
1085 output_data[debian_name][debian_version].append(item)
1086 yaml.safe_dump(output_data, output)
1087 output.close()
1088 else:
1089 remove(filename)
1091 filename = sys.argv[6]
1092 data = new
1093 if data:
1094 data = list(set(data))
1095 data.sort(cmp=lambda a,b: cmp(a[0],b[0]) or apt_version_cmp(a[1],b[1]) or cmp(a[2],b[2]))
1096 output_data = {}
1097 output = file(os.path.abspath(filename), 'wb')
1098 for name, version, dsc_url in data:
1099 if name not in output_data:
1100 output_data[name] = {}
1101 if version not in output_data[name]:
1102 output_data[name][version] = []
1103 output_data[name][version].append(str(dsc_url))
1104 yaml.safe_dump(output_data, output)
1105 output.close()
1106 else:
1107 remove(filename)
1109 logging.shutdown()
