9b4a35d62d1c198882090a053c576b4f30347669
[dex/census.git] / bin / compare-source-package-list
1 #!/usr/bin/python
3 # Copyright 2011 Paul Wise
4 # Released under the MIT/Expat license, see doc/COPYING
6 # Uses the snapshot.debian.org metadata database and SHA-1 based filesystem to
7 # compute debdiffs between Debian and individual derivatives. The metadata
8 # allows knowing if a particular file was ever in Debian and the filesystem
9 # allows the creation of debdiffs.
10 #
11 # The script works approximately like this:
12 #
13 # Load the Sources files previously downloaded by get-package-lists as indicated
14 # by the sources.list of the derivative.
15 #
16 # For each source package in the derivative:
17 #
18 # Check if the dsc has ever been in Debian, if not, check if the other
19 # parts have and therefore decide if the package is unmodified or not.
20 # Unmodified source packages are skipped and include those with the exact
21 # same dsc file or those where all the non-dsc parts are identical.
22 #
23 # Try some heuristics (name, version, changelog entries) to find out if
24 # the package could be based on some package that is or was in Debian.
25 #
26 # If it was not then skip to the next one and make a note, since Debian
27 # might want to know about source packages that are missing from Debian.
28 #
29 # If it was then use debdiff to create a diff and filterdiff to create a
30 # diff of the debian/ dir.
31 #
32 # Usage:
33 # compare-source-package-list <sources.list> <apt dir> <patches list> <links list> <new package list> <log file>
35 # FIXME: write out some statistics and rrdtool graphs
36 #               source package types per derivative
37 #               number of source packages
38 #               cache misses: md5, sha256, sha1, patch, changelog
39 # FIXME: comment the code to list assumptions and function purpose
40 # FIXME: add options to allow re-processing only specific packages
41 # FIXME: write something to clean up old files and patches
42 # FIXME: don't unpack or make a patch when we don't have all the parts
43 # FIXME: don't make a patch when we were not able to unpack the source package
44 # FIXME: cleanup files at start of run
45 # FIXME: extract new debian/patches/ patches
46 # FIXME: print out packages that are no longer in Debian
47 # FIXME: deal with really large patches:
48 # FIXME:   kde-l10n-*: too few parts to be useful
49 # FIXME:   divergence: too many changelog entries between versions to be useful
50 # FIXME:   derivative is older than Debian
51 # FIXME:   derivative renamed the source package
52 # FIXME:   just a really big diff
53 # FIXME: when there are multiple dsc files in snapshots, prefer the debian/debian-archive one
54 # FIXME: when the source package is ancient and the dsc is missing, make a fake one to use
55 # FIXME: add an in-memory cache of hashes so that hashes in multiple releases hit the disk once
57 import os
58 import sys
59 import httplib
60 import urllib2
61 import hashlib
62 import shutil
63 import logging
64 import tempfile
65 import string
66 import socket
67 import signal
68 import subprocess
69 import yaml
70 from debian import deb822, changelog
71 import apt_pkg
72 import psycopg2
73 try: import cjson as json
74 except ImportError: import json
76 # Helper functions for python stuff with annoying error handling
78 def makedirs(dirs):
79         try: os.makedirs(dirs)
80         except OSError: pass
82 def rmtree(dir):
83         try: shutil.rmtree(dir)
84         except OSError: pass
86 def remove(file):
87         try: os.remove(file)
88         except OSError: pass
90 def symlink(source, link):
91         try: os.symlink(source, link)
92         except OSError: pass
94 # http://www.chiark.greenend.org.uk/ucgi/~cjwatson/blosxom/2009-07-02-python-sigpipe.html
95 def subprocess_setup():
96         # Python installs a SIGPIPE handler by default. This is usually not what
97         # non-Python subprocesses expect.
98         signal.signal(signal.SIGPIPE, signal.SIG_DFL)
100 # We need to map apt_pkg.version_compare return values to cmp return values
101 # The documentation is incorrect: http://bugs.debian.org/680891
102 def apt_version_cmp(a, b):
103         ret = apt_pkg.version_compare(a, b)
104         if ret < 0: return -1
105         elif ret > 0: return 1
106         else: return 0
108 # Config
109 md5_cache_dir = os.path.abspath('../md5-farm')
110 sha1_cache_dir = os.path.abspath('../sha1-farm')
111 sha256_cache_dir = os.path.abspath('../sha256-farm')
112 sha1_patch_dir = os.path.abspath('../sha1-patches')
113 sha1_lsdiff_dir = os.path.abspath('../sha1-lsdiff')
114 sha1_changelog_dir = os.path.abspath('../sha1-changelog')
115 deriv_patch_dir = os.path.abspath('patches')
116 global_patch_dir = os.path.abspath('../patches')
117 snapshot_cache_dir = '/srv/snapshot.debian.org/farm'
118 patch_too_large = os.path.abspath('../../doc/patch-too-large.txt')
119 checksum_types = ('sha1', 'sha256', 'md5sum')
120 checksum_hashlib = ('sha1', 'sha256', 'md5')
121 checksum_headers = ('Checksums-Sha1', 'Checksums-Sha256', 'Files')
122 user_agent = 'Debian Derivatives Census QA bot'
123 timeout = 60
124 ishex = lambda s: not(set(s)-set(string.hexdigits))
126 # Init
127 apt_pkg.init()
129 # Setup configuration
130 apt_pkg.config.set('Dir', os.path.abspath(sys.argv[2]))
131 apt_pkg.config.set('Dir::Etc', os.path.abspath(sys.argv[2]))
132 apt_pkg.config.set('Dir::State', os.path.abspath(sys.argv[2]))
133 apt_pkg.config.set('Dir::Cache', os.path.abspath(sys.argv[2]))
134 apt_pkg.config.set('Dir::State::status', os.path.abspath(os.path.join(sys.argv[2],'status')))
135 apt_pkg.config.set('Dir::Etc::sourcelist', os.path.abspath(sys.argv[1]))
136 apt_pkg.config.set('Dir::Bin::gpg', 'fakegpgv')
138 # Preparation
139 sources_list = apt_pkg.SourceList()
140 sources_list.read_main_list()
141 conn = psycopg2.connect("service=snapshot-guest")
142 cur = conn.cursor()
143 remove(sys.argv[7])
144 logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.DEBUG, filename=sys.argv[7])
146 # Voodoo
147 lists_dir = apt_pkg.config.find_dir('Dir::State::lists')
148 source_entries = [[i for i in x.index_files if i.label=='Debian Source Index'] for x in sources_list.list]
149 derivative_short_name = os.path.basename(os.getcwd())
150 modifies_dsc_files = 0
151 repackaged_but_identical = 0
153 # Helper functions for generating path names
155 def hash_path_parent(dir, hash):
156         return os.path.join(dir, hash[0:2], hash[2:4])
158 def hash_path(dir, hash):
159         return os.path.join(dir, hash[0:2], hash[2:4], hash)
161 def hash_path_exists(dir, hash):
162         return os.path.exists(os.path.join(dir, hash[0:2], hash[2:4], hash))
164 def sha1_patch_path(debian_dsc_sha1, dsc_sha1, type=None):
165         path = os.path.join(hash_path(sha1_patch_dir, debian_dsc_sha1), hash_path('', dsc_sha1))
166         if type: path += '.%s' % type
167         path += '.patch'
168         return os.path.abspath(path)
170 def sha1_lsdiff_path(debian_dsc_sha1, dsc_sha1, type=None):
171         path = os.path.join(hash_path(sha1_lsdiff_dir, debian_dsc_sha1), hash_path('', dsc_sha1))
172         if type: path += '.%s' % type
173         path += '.lsdiff'
174         return os.path.abspath(path)
176 def shortslug(name):
177         return name[:4] if name.startswith('lib') else name[0]
179 def deriv_patch_path(name, version, debian_name, debian_version, type=None):
180         path = os.path.join(deriv_patch_dir, shortslug(debian_name), debian_name, '')
181         path += '_'.join((debian_name, debian_version, name, version))
182         if type: path += '.%s' % type
183         path += '.patch'
184         return os.path.abspath(path)
186 def global_patch_path(name, version, debian_name, debian_version, type=None):
187         path = os.path.join(global_patch_dir, shortslug(debian_name), debian_name, '')
188         path += '_'.join(('Debian', debian_name, debian_version, derivative_short_name, name, version))
189         if type: path += '.%s' % type
190         path += '.patch'
191         return os.path.abspath(path)
193 # Functions for munging source packages
195 def convert_lzip_to_gzip(dir, name):
196         cmdline = ['lzip', '-d', name]
197         process = subprocess.Popen(cmdline, cwd=dir, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, preexec_fn=subprocess_setup)
198         output = process.communicate()[0]
199         if process.returncode:
200                 logging.warning('lzip reported failure to decompress %s:', name)
201                 logging.warning(output)
202         bname = name[0:-3] # Strip off .lz
203         cmdline = ['gzip', '-1', bname] # gzip -1 to reduce overhead
204         process = subprocess.Popen(cmdline, cwd=dir, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, preexec_fn=subprocess_setup)
205         output = process.communicate()[0]
206         if process.returncode:
207                 logging.warning('gzip reported failure to compress %s:', bname)
208                 logging.warning(output)
209         return (name, bname+'.gz')
211 def update_dsc_file(dir, dsc_name, parts):
212         dsc_path = os.path.join(dir,dsc_name)
213         dsc_file = open(dsc_path,'rb')
214         dsc = deb822.Dsc(dsc_file)
215         for (old, name) in parts:
216                 path = os.path.join(dir,name)
217                 size = os.path.getsize(path)
218                 with open(path,'rb') as f:
219                         hashes = {}
220                         for (type, func) in zip(checksum_types, checksum_hashlib):
221                                 hashes[type] = getattr(hashlib, func)()
222                         for chunk in iter(lambda: f.read(128*64L), b''):
223                                 for type in checksum_types:
224                                         hashes[type].update(chunk)
225                         for type in checksum_types:
226                                 hashes[type] = hashes[type].hexdigest()
227                         for (header, type) in zip(checksum_headers, checksum_types):
228                                 if header in dsc:
229                                         dsc[header] = [{type: hashes[type], 'size': size, 'name': name} if p['name'] == old else p for p in dsc[header]]
230         dsc_file.close()
231         os.remove(dsc_path) # So we don't change the original that the dsc links to
232         dsc_file = open(dsc_path,'wb')
233         dsc.dump(dsc_file)
234         dsc_file.close()
236 # Functions for downloading files and storing them in the hash caches
238 def download_and_check_hash(url, dir, hash, hash_type):
239         try:
240                 parent = hash_path_parent(dir,hash)
241                 path = hash_path(dir,hash)
242                 logging.debug('downloading %s', url)
243                 makedirs(parent)
244                 headers = { 'User-Agent' : user_agent }
245                 req = urllib2.Request(url, None, headers)
246                 u = urllib2.urlopen(req, None, timeout)
247                 data = u.read()
248                 if hash_type == 'sha256':
249                         data_hash = hashlib.sha256(data).hexdigest()
250                 elif hash_type == 'md5sum':
251                         data_hash = hashlib.md5(data).hexdigest()
252                 else:
253                         logging.warning('unknown hash type detected: %s %s %s', hash_type, hash, url)
254                         return ('unknown', None)
255                 if data_hash != hash:
256                         logging.warning('incorrect hash for downloaded file, ignoring: %s %s != %s %s', hash_type, hash, data_hash, url)
257                         return ('unknown', None)
258                 sha1 = hashlib.sha1(data).hexdigest()
259                 sha1_path = hash_path(sha1_cache_dir, sha1)
260                 sha1_parent = hash_path_parent(sha1_cache_dir, sha1)
261                 makedirs(sha1_parent)
262                 if hash_path_exists(snapshot_cache_dir, sha1):
263                         snapshot_path = hash_path(snapshot_cache_dir, sha1)
264                         symlink(snapshot_path, path)
265                         logging.debug('exists in snapshot sha1 cache: %s %s %s %s', hash_type, hash, sha1, url)
266                         return (True, sha1)
267                 else:
268                         if not os.path.exists(sha1_path):
269                                 logging.debug('correct hash for downloaded file, saving: %s %s %s %s', hash_type, hash, sha1, url)
270                                 f = open(sha1_path, 'w')
271                                 f.write(data)
272                                 f.close()
273                         else:
274                                 logging.debug('correct hash for downloaded file, not saving: already in derivs cache: %s %s %s %s', hash_type, hash, sha1, url)
275                         symlink(os.path.relpath(sha1_path, os.path.dirname(path)), path)
276                         logging.debug('does not exist in snapshot sha1 cache: %s %s %s %s', hash_type, hash, sha1, url)
277                         return (False, sha1)
278         except urllib2.URLError, e:
279                 if hasattr(e, 'reason'): reason = e.reason
280                 elif hasattr(e, 'code'): reason = e.code
281                 else: reason = e
282                 logging.warning('unable to download hash file, ignoring: %s %s', reason, url)
283                 return ('unknown', None)
284         except httplib.HTTPException, e:
285                 logging.warning('unable to download hash file, ignoring: %s %s', repr(e), url)
286                 return ('unknown', None)
287         except socket.error, e:
288                 logging.warning('unable to download hash file, ignoring: %s %s', e, url)
289                 return ('unknown', None)
291 def download_sha1(url, dir, sha1):
292         try:
293                 parent = hash_path_parent(dir,sha1)
294                 path = hash_path(dir,sha1)
295                 logging.debug('downloading sha1: %s %s', sha1, url)
296                 makedirs(parent)
297                 headers = { 'User-Agent' : user_agent }
298                 req = urllib2.Request(url, None, headers)
299                 u = urllib2.urlopen(req, None, timeout)
300                 data = u.read()
301                 data_sha1 = hashlib.sha1(data).hexdigest()
302                 if data_sha1 == sha1:
303                         logging.debug('correct sha1 for downloaded file, saving: %s %s', sha1, url)
304                         if not os.path.exists(path):
305                                 f = open(path, 'w')
306                                 f.write(data)
307                                 f.close()
308                         return (False, sha1)
309                 else:
310                         logging.warning('incorrect sha1 for downloaded file, ignoring: %s != %s %s', sha1, data_sha1, url)
311                         return ('unknown', None)
312         except urllib2.URLError, e:
313                 if hasattr(e, 'reason'): reason = e.reason
314                 elif hasattr(e, 'code'): reason = e.code
315                 else: reason = e
316                 logging.warning('unable to download sha1 file, ignoring: %s %s', reason, url)
317                 return ('unknown', None)
318         except httplib.HTTPException, e:
319                 logging.warning('unable to download hash file, ignoring: %s %s', repr(e), url)
320                 return ('unknown', None)
321         except socket.error, e:
322                 logging.warning('unable to download hash file, ignoring: %s %s', e, url)
323                 return ('unknown', None)
325 # Functions for checking the hash caches
327 def check_hash_cache(dir, hash, hash_type, url):
328         logging.debug('checking hash cache: %s %s', hash_type, hash)
329         path = hash_path(dir, hash)
330         try:
331                 result = os.readlink(path)
332                 path = os.path.join(os.path.dirname(path), result)
333         except OSError:
334                 logging.debug('does not exist in hash cache: %s %s', hash_type, hash)
335                 return download_and_check_hash(url, dir, hash, hash_type)
336         logging.debug('exists in hash cache: %s %s', hash_type, hash)
337         sha1 = os.path.basename(path)
338         if hash_path_exists(snapshot_cache_dir, sha1):
339                 logging.debug('exists in snapshot sha1 cache: %s', sha1)
340                 remove(hash_path(sha1_cache_dir,sha1))
341                 return (True, sha1)
342         elif hash_path_exists(sha1_cache_dir, sha1):
343                 logging.debug('exists in derivatives sha1 cache: %s', sha1)
344                 return (False, sha1)
346 def check_sha1_cache(sha1, url):
347         logging.debug('checking sha1 caches: %s', sha1)
348         if hash_path_exists(snapshot_cache_dir, sha1):
349                 logging.debug('exists in snapshot sha1 cache: %s', sha1)
350                 remove(hash_path(sha1_cache_dir,sha1))
351                 return (True, sha1)
352         elif hash_path_exists(sha1_cache_dir, sha1):
353                 logging.debug('exists in derivatives sha1 cache: %s', sha1)
354                 return (False, sha1)
355         else:
356                 logging.debug('does not exist in any sha1 caches: %s', sha1)
357                 return download_sha1(url, sha1_cache_dir, sha1)
359 def status(type, hash, url):
360         logging.debug('checking status of hash: %s %s %s', type, hash, url)
361         if type == 'sha1':
362                 (ret, sha1) = check_sha1_cache(hash, url)
363                 if ret == True:
364                         return ('unmodified', sha1)
365                 elif ret == False:
366                         return ('modified', sha1)
367                 else:
368                         return (ret, sha1)
369         elif type == 'sha256':
370                 (ret, sha1) = check_hash_cache(sha256_cache_dir, hash, type, url)
371                 if ret == True:
372                         return ('unmodified', sha1)
373                 elif ret == False:
374                         return ('modified', sha1)
375                 else:
376                         return (ret, sha1)
377         elif type == 'md5sum':
378                 (ret, sha1) = check_hash_cache(md5_cache_dir, hash, type, url)
379                 if ret == True:
380                         return ('unmodified', sha1)
381                 elif ret == False:
382                         return ('modified', sha1)
383                 else:
384                         return (ret, sha1)
385         else:
386                 logging.warning('unknown hash type detected: %s %s %s', type, hash, url)
387                 return ('unknown', None)
389 # Functions for getting information about source packages
391 def get_info(srcpkg):
392         dsc = None
393         for header in checksum_headers:
394                 if not dsc and header in srcpkg:
395                         dsc = [x for x in srcpkg[header] if x['name'].endswith('.dsc')]
396         if not dsc:
397                 logging.warning('did not find any dsc files')
398                 return None
399         if len(dsc) > 1:
400                 logging.warning('found multiple dsc files: %s' % ' '.join(['%s %s' % (dsc_name, dsc_sha1) for dsc_name, dsc_sha1 in dsc]))
401                 return None
402         dsc = dsc[0]
403         dsc_name = dsc['name']
404         dsc_hash_type, dsc_hash =  [(k, v) for k, v in dsc.iteritems() if k not in ('name', 'size')][0]
406         parts = []
407         part_names = []
408         for header in checksum_headers:
409                 if header in srcpkg:
410                         for part in srcpkg[header]:
411                                 if 'name' in part and part['name'] not in part_names and not part['name'].endswith('.dsc'):
412                                         parts.append(part)
413                                         part_names.append(part['name'])
415         return (dsc_hash_type, dsc_hash, dsc_name, parts)
417 def get_debian_info(files):
418         dsc = [file for file in files if file[0].endswith('.dsc')]
419         if not dsc:
420                 logging.warning('did not find any Debian dsc files: snapshots bug or ancient source package')
421                 return None
422         if len(dsc) > 1:
423                 logging.warning('found multiple Debian dsc files, choosing first one: %s' % ' '.join(['%s %s' % (dsc_name, dsc_sha1) for dsc_name, dsc_sha1 in dsc]))
425         dsc = dsc[0]
426         dsc_name, dsc_sha1 = dsc
428         parts = []
429         part_names = []
430         for file in files:
431                 part_name, part_sha1 = file
432                 if part_name not in part_names and not part_name.endswith('.dsc'):
433                         parts.append(file)
434                         part_names.append(part_name)
436         return (dsc_sha1, dsc_name, parts)
438 # Functions for extracting information from the snapshots database
440 def database_error(e):
441         reason = None
442         code = None
443         if hasattr(e, 'pgerror'): reason = e.pgerror
444         if hasattr(e, 'pgcode'): code = e.pgcode
445         logging.warning('unable to execute database query: %s %s', code, reason)
446         conn.reset()
448 def srcpkg_was_in_debian(name, version=None):
449         try:
450                 if version:
451                         cur.execute('SELECT version FROM srcpkg WHERE name=%s AND version=%s LIMIT 1;', (name, version))
452                         return not not cur.fetchone()
453                 else:
454                         cur.execute('SELECT version FROM srcpkg WHERE name=%s LIMIT 1;', (name,))
455                         return not not cur.fetchone()
456         except psycopg2.Error, e:
457                 database_error(e)
458                 return None
460 def sha1_to_srcpkgs(sha1):
461         try:
462                 cur.execute(
463                         '''SELECT name, version
464                         FROM srcpkg
465                         JOIN file_srcpkg_mapping ON file_srcpkg_mapping.srcpkg_id=srcpkg.srcpkg_id
466                         WHERE hash=%s;''', (sha1,))
467                 return cur.fetchall()
468         except psycopg2.Error, e:
469                 database_error(e)
470                 return None
472 def srcpkg_to_sha1s(name, version):
473         try:
474                 cur.execute(
475                         '''SELECT hash
476                         FROM file_srcpkg_mapping
477                         JOIN srcpkg ON srcpkg.srcpkg_id=file_srcpkg_mapping.srcpkg_id
478                         WHERE name=%s AND version=%s;''', (name, version))
479                 return cur.fetchall()
480         except psycopg2.Error, e:
481                 database_error(e)
482                 return None
484 def srcpkg_to_srcpkgs(name):
485         try:
486                 cur.execute(
487                         '''SELECT name, version
488                         FROM srcpkg
489                         WHERE name=%s ORDER BY version DESC;''', (name,))
490                 return cur.fetchall()
491         except psycopg2.Error, e:
492                 database_error(e)
493                 return None
495 def sha1s_to_files(sha1):
496         try:
497                 cur.execute('SELECT DISTINCT ON (name, hash) name, hash FROM file WHERE hash=%s;', hash)
498                 return cur.fetchall()
499         except psycopg2.Error, e:
500                 database_error(e)
501                 return None
503 def srcpkg_to_files(name, version):
504         try:
505                 cur.execute(
506                         '''SELECT DISTINCT ON (file.name, file.hash) file.name, file.hash
507                         FROM file_srcpkg_mapping
508                         JOIN srcpkg ON srcpkg.srcpkg_id=file_srcpkg_mapping.srcpkg_id
509                         JOIN file ON file_srcpkg_mapping.hash=file.hash
510                         WHERE srcpkg.name=%s AND srcpkg.version=%s;''', (name, version))
511                 return cur.fetchall()
512         except psycopg2.Error, e:
513                 database_error(e)
514                 return None
516 def sha1_version_to_derived_from(sha1, version):
517         try:
518                 cur.execute(
519                         '''SELECT name, version
520                         FROM srcpkg
521                         JOIN file_srcpkg_mapping ON file_srcpkg_mapping.srcpkg_id=srcpkg.srcpkg_id
522                         WHERE hash=%s and version<=%s
523                         ORDER BY name ASC, version DESC
524                         LIMIT 1;''', (sha1, version))
525                 res = cur.fetchall()
526                 if res: return res
527                 cur.execute(
528                         '''SELECT name, version
529                         FROM srcpkg
530                         JOIN file_srcpkg_mapping ON file_srcpkg_mapping.srcpkg_id=srcpkg.srcpkg_id
531                         WHERE hash=%s
532                         ORDER BY name ASC, version ASC
533                         LIMIT 1;''', (sha1, version))
534                 return cur.fetchall()
535         except psycopg2.Error, e:
536                 database_error(e)
537                 return None
539 def srcpkg_to_derived_from(name, version):
540         try:
541                 cur.execute(
542                         '''SELECT name, version
543                         FROM srcpkg
544                         WHERE name=%s and version<=%s
545                         ORDER BY version DESC
546                         LIMIT 1;''', (name, version))
547                 res = cur.fetchall()
548                 if res: return res
549                 cur.execute(
550                         '''SELECT name, version
551                         FROM srcpkg
552                         WHERE name=%s
553                         ORDER BY version ASC
554                         LIMIT 1;''', (name,))
555                 return cur.fetchall()
556         except psycopg2.Error, e:
557                 database_error(e)
558                 return None
560 # Functions related to creating patches
562 # Add symlinks for all needed files
563 def prepare(dsc_name, dsc_sha1, parts):
564         logging.debug('preparing deriv directory for %s', dsc_name)
565         tmp_dir = tempfile.mkdtemp(prefix='derivs-cmp-srcpkg-%s-' % derivative_short_name)
566         path = hash_path(snapshot_cache_dir, dsc_sha1)
567         if not os.path.exists(path): path = hash_path(sha1_cache_dir, dsc_sha1)
568         path = hash_path(sha1_cache_dir, dsc_sha1)
569         dsc_path = os.path.join(tmp_dir, dsc_name)
570         os.symlink(path, dsc_path)
571         converted_parts = []
572         for part in parts:
573                 if 'sha1' in part:
574                         path = hash_path(snapshot_cache_dir, part['sha1'])
575                         if not os.path.exists(path): path = hash_path(sha1_cache_dir, part['sha1'])
576                 elif 'sha256' in part: path = hash_path(sha256_cache_dir, part['sha256'])
577                 elif 'md5sum' in part: path = hash_path(md5_cache_dir, part['md5sum'])
578                 part_path = os.path.join(tmp_dir, part['name'])
579                 os.symlink(path, part_path)
580                 # Some distributions allow additional compression schemes
581                 # Here we work around this by recompressing with gzip
582                 if part['name'].endswith('.lz'):
583                         converted_parts.append(convert_lzip_to_gzip(tmp_dir, part['name']))
584         # Update the dsc file if we recompressed any files
585         if converted_parts:
586                 update_dsc_file(tmp_dir, dsc_name, converted_parts)
587         return tmp_dir
589 def prepare_debian(dsc_name, dsc_sha1, files):
590         logging.debug('preparing Debian directory for %s', dsc_name)
591         readable_parts = 0
592         debian_tmp_dir = tempfile.mkdtemp(prefix='derivs-cmp-srcpkg-Debian-')
593         path = hash_path(snapshot_cache_dir, dsc_sha1)
594         if os.access(path, os.R_OK): readable_parts += 1
595         dsc_path = os.path.join(debian_tmp_dir, dsc_name)
596         os.symlink(path, dsc_path)
597         for file in files:
598                 part_name, part_sha1 = file
599                 path = hash_path(snapshot_cache_dir, part_sha1)
600                 part_path = os.path.join(debian_tmp_dir, part_name)
601                 os.symlink(path, part_path)
602                 if os.access(path, os.R_OK): readable_parts += 1
603         if readable_parts != (1 + len(files)):
604                 logging.info('only %s parts of %s are readable', readable_parts, dsc_name)
605                 rmtree(debian_tmp_dir)
606                 return None
607         return debian_tmp_dir
609 def get_changelog_entries(tmp_dir, dsc_name, dsc_sha1):
610         logging.debug('getting changelog entries from %s', dsc_name)
612         # Cache check
613         changelog_path = hash_path(sha1_changelog_dir, dsc_sha1)
614         if os.path.exists(changelog_path):
615                 logging.debug('changelog cache exists for %s %s', dsc_name, dsc_sha1)
616                 f = file(changelog_path)
617                 if f:
618                         try: changelog_entries = json.load(f)
619                         except ValueError: pass
620                         else: return [tuple(entry) for entry in changelog_entries]
621                         finally: f.close()
623         # Preparation
624         extract_path = os.path.join(tmp_dir,'extracted')
626         # Unpack the source tree
627         logging.debug('unpacking source package %s', dsc_name)
628         cmdline = ['dpkg-source', '-x', dsc_name, 'extracted']
629         process = subprocess.Popen(cmdline, cwd=tmp_dir, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, preexec_fn=subprocess_setup)
630         output = process.communicate()[0]
631         if process.returncode:
632                 logging.warning('dpkg-source reported failure to extract %s:', dsc_name)
633                 logging.warning(output)
634                 cmdline = ['ls', '-lR', '--time-style=+']
635                 process = subprocess.Popen(cmdline, cwd=tmp_dir, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, preexec_fn=subprocess_setup)
636                 output = process.communicate()[0]
637                 logging.warning(output)
638                 rmtree(extract_path)
639                 return None
641         # Sanitise the debian dir and changelog file in case it is a symlink to outside
642         debian_dir = os.path.join(extract_path, 'debian')
643         changelog_filename = os.path.join(debian_dir,'changelog')
644         if os.path.islink(debian_dir) or os.path.islink(changelog_filename):
645                 logging.warning('debian dir or changelog is a symbolic link %s', dsc_name)
646                 rmtree(extract_path)
647                 return None
649         # Check if the changelog exists
650         if not os.path.exists(changelog_filename):
651                 logging.warning('could not find changelog in %s', dsc_name)
652                 rmtree(extract_path)
653                 return None
655         # Find out which source package is the most likely derivative
656         logging.debug('parsing changelog for %s', dsc_name)
657         changelog_file = open(changelog_filename)
658         changelog_obj = changelog.Changelog(changelog_file)
659         try:
660                 changelog_entries = [(entry.package, str(entry._raw_version)) for entry in changelog_obj]
661         except:
662                 logging.warning('could not read changelog from %s', dsc_name)
663                 rmtree(extract_path)
664                 return None
665         del changelog_obj
666         changelog_file.close()
668         # Clean up again
669         rmtree(extract_path)
671         # Write the cache
672         makedirs(hash_path_parent(sha1_changelog_dir, dsc_sha1))
673         remove(changelog_path)
674         f = file(changelog_path, 'w')
675         json.dump(changelog_entries, f)
676         f.close()
678         return changelog_entries
680 # Find the source package name and version this is probably derived from
681 def find_derived_from(tmp_dir, name, version, dsc_name, dsc_sha1, parts_unmodified):
682         logging.debug('finding base source package of %s %s', name, version)
684         # Get a list of changelog entries
685         changelog_entries = get_changelog_entries(tmp_dir, dsc_name, dsc_sha1)
686         if changelog_entries:
687                 logging.debug('changelog entries are: %s', ' '.join(['%s %s' % (entry_name, entry_version) for entry_name, entry_version in changelog_entries]))
689         # Get a list of candidate versions from the database
690         possibly_derived_from = []
691         logging.debug('checking which parts were in Debian')
692         for part_sha1, part_name in parts_unmodified:
693                 part_derived_from = sha1_to_srcpkgs(part_sha1)
694                 if part_derived_from:
695                         logging.debug('part %s %s available in %s', part_sha1, part_name, ' '.join(['%s %s' % (entry_name, entry_version) for entry_name, entry_version in part_derived_from]))
696                         possibly_derived_from.extend(part_derived_from)
698         if not possibly_derived_from:
699                 logging.debug('no parts in common with Debian, obtaining old versions')
700                 old_packages = srcpkg_to_srcpkgs(name)
701                 if old_packages: possibly_derived_from = old_packages
703         # Uniqify
704         possibly_derived_from = list(set(possibly_derived_from))
705         if possibly_derived_from:
706                 logging.debug('possibly derived from: %s', ' '.join(['%s %s' % (entry_name, entry_version) for entry_name, entry_version in possibly_derived_from]))
707         else:
708                 logging.debug('nothing in possibly derived from list')
710         # Match changelog versions against candidates
711         if changelog_entries:
712                 logging.debug('matching changelog entries against versions possibly derived from')
713                 for entry in changelog_entries:
714                         entry_name, entry_version = entry
715                         if entry in possibly_derived_from:
716                                 logging.debug('%s %s in possibly derived from', entry_name, entry_version)
717                                 return entry
718                 logging.debug('checking if changelog entries were ever in Debian')
719                 for entry_name, entry_version in changelog_entries:
720                         if srcpkg_was_in_debian(entry_name, entry_version):
721                                 logging.debug('%s %s was in Debian', entry_name, entry_version)
722                                 return (entry_name, entry_version)
723         if possibly_derived_from:
724                 logging.debug('finding closest entry in possibly derived from')
725                 possibly_derived_from.sort(cmp=lambda a,b: apt_version_cmp(b[1],a[1]))
726                 for entry_name, entry_version in possibly_derived_from:
727                         if name == entry_name and apt_version_cmp(version, entry_version) >= 0:
728                                 logging.debug('%s %s is an equal or lower version', entry_name, entry_version)
729                                 return (entry_name, entry_version)
730                 entry = possibly_derived_from[-1]
731                 entry_name, entry_version = entry
732                 logging.debug('no lower version numbers, returning next highest version %s %s', entry_name, entry_version)
733                 return entry
734         logging.debug('finding closest version number in Debian')
735         for entry in srcpkg_to_derived_from(name, version):
736                 entry_name, entry_version = entry
737                 logging.debug('closest package was %s %s', entry_name, entry_version)
738                 return entry
739         logging.debug('could not find Debian package %s %s is derived from', name, version)
740         return None
742 # Generate a patch file
743 def create_patch(tmp_dir, dsc_name, dsc_sha1, debian_tmp_dir, debian_dsc_name, debian_dsc_sha1):
744         global repackaged_but_identical
746         dsc_path = os.path.join(tmp_dir, dsc_name)
747         debian_dsc_path = os.path.join(debian_tmp_dir, debian_dsc_name)
748         path_everything = sha1_patch_path(debian_dsc_sha1, dsc_sha1)
749         path_debian = sha1_patch_path(debian_dsc_sha1, dsc_sha1, 'debian')
751         # Generate the main patch
752         if not os.path.exists(path_everything) and os.path.exists(debian_dsc_path) and os.path.exists(dsc_path):
753                 makedirs(os.path.dirname(path_everything))
754                 cmdline = ['debdiff', '--quiet', '--diffstat', debian_dsc_path, dsc_path]
755                 stdout = open(path_everything, 'w')
756                 process = subprocess.Popen(cmdline, stdout=stdout, stderr=subprocess.PIPE, preexec_fn=subprocess_setup)
757                 output = process.communicate()[1]
758                 stdout.close()
759                 if process.returncode == 255:
760                         logging.warning('debdiff reported failure %s %s:', debian_dsc_name, dsc_name)
761                         logging.warning(output)
762                         cmdline = ['ls', '-lR', '--time-style=+']
763                         for name, dir in (derivative_short_name, tmp_dir), ('Debian', debian_tmp_dir):
764                                 logging.warning('dir listing for %s:', name)
765                                 process = subprocess.Popen(cmdline, cwd=dir, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, preexec_fn=subprocess_setup)
766                                 output = process.communicate()[0]
767                                 logging.warning(output)
768                         return False
769                 elif process.returncode == 0:
770                         logging.info('derivative repackaged in an identical way %s %s', debian_dsc_name, dsc_name)
771                         repackaged_but_identical += 1
772                         return False
773                 elif process.returncode != 1:
774                         logging.warning('debdiff reported unknown return code %s %s %s:', process.returncode, debian_dsc_name, dsc_name)
775                         logging.warning(output)
776                         cmdline = ['ls', '-lR', '--time-style=+']
777                         for name, dir in (derivative_short_name, tmp_dir), ('Debian', debian_tmp_dir):
778                                 logging.warning('dir listing for %s:', name)
779                                 process = subprocess.Popen(cmdline, cwd=dir, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, preexec_fn=subprocess_setup)
780                                 output = process.communicate()[0]
781                                 logging.warning(output)
782                         return False
784         # Filter the main patch to include only the debian/ directory
785         if os.path.exists(path_everything) and not os.path.exists(path_debian):
786                 makedirs(os.path.dirname(path_debian))
787                 cmdline = ['filterdiff', '--include=*/debian/*', path_everything]
788                 filterdiff = subprocess.Popen(cmdline, stdout=subprocess.PIPE, preexec_fn=subprocess_setup)
789                 filterdiff_output = filterdiff.communicate()[0]
790                 diffstat = subprocess.Popen('diffstat', stdin=subprocess.PIPE, stdout=subprocess.PIPE, preexec_fn=subprocess_setup)
791                 diffstat_output = diffstat.communicate(filterdiff_output)[0]
792                 f = open(path_debian, 'w')
793                 f.write('diffstat of debian/ for %s %s\n' % (os.path.splitext(debian_dsc_name)[0], os.path.splitext(dsc_name)[0]))
794                 f.write('\n')
795                 f.write(diffstat_output)
796                 f.write('\n')
797                 f.write(filterdiff_output)
798                 f.close()
800         # Patches > 100MB are probably not that useful, replace them with a link
801         for path in path_everything, path_debian:
802                 try:
803                         if os.path.getsize(path) > 104857600:
804                                 logging.info('patch between %s and %s is larger than 100MB', dsc_name, debian_dsc_name)
805                                 remove(path)
806                                 symlink(os.path.relpath(patch_too_large, os.path.dirname(path)), path)
807                 except OSError:
808                         pass
810         return True
812 def check_patch(debian_dsc_sha1, dsc_sha1):
813         patch_path = sha1_patch_path(debian_dsc_sha1, dsc_sha1)
814         lsdiff_path = sha1_lsdiff_path(debian_dsc_sha1, dsc_sha1)
815         if os.path.exists(lsdiff_path):
816                 logging.debug('lsdiff cache exists for %s', patch_path)
817                 f = file(lsdiff_path)
818                 lsdiff = f.read()
819                 f.close()
820         else:
821                 logging.debug('lsdiff cache does not exist for %s', patch_path)
822                 cmdline = ['lsdiff', patch_path]
823                 process = subprocess.Popen(cmdline, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, preexec_fn=subprocess_setup)
824                 lsdiff = process.communicate()[0]
825                 makedirs(os.path.dirname(lsdiff_path))
826                 f = file(lsdiff_path,'w')
827                 f.write(lsdiff)
828                 f.close()
829         lsdiff = lsdiff.splitlines()
830         for line in lsdiff:
831                 if line != 'debian/changelog' and not line.endswith('/debian/changelog'):
832                         return True
833         return False
835 def present_patch(name, version, dsc_sha1, debian_name, debian_version,  debian_dsc_sha1):
836         useful_patch = check_patch(debian_dsc_sha1, dsc_sha1)
837         patches = []
838         types = ('', 'debian')
839         for type in types:
840                 ln_to = sha1_patch_path(debian_dsc_sha1, dsc_sha1, type)
841                 if not os.path.exists(ln_to):
842                         continue
843                 ln_from_deriv = deriv_patch_path(name, version, debian_name, debian_version, type)
844                 ln_from_global = global_patch_path(name, version, debian_name, debian_version, type)
845                 makedirs(os.path.dirname(ln_from_deriv))
846                 makedirs(os.path.dirname(ln_from_global))
847                 remove(ln_from_deriv)
848                 remove(ln_from_global)
849                 symlink(os.path.relpath(ln_to, os.path.dirname(ln_from_deriv)), ln_from_deriv)
850                 symlink(os.path.relpath(ln_to, os.path.dirname(ln_from_global)), ln_from_global)
851                 if useful_patch:
852                         patches.append(os.path.relpath(ln_from_global, os.path.abspath(global_patch_dir)))
853         return tuple(patches)
855 # Functions that wrap other functions and decide what to do
857 def check_source_package(source_entry, srcpkg):
858         global modifies_dsc_files
860         try:
861                 name = None
862                 version = None
863                 dir = None
864                 name = srcpkg['Package']
865                 version = srcpkg['Version']
866                 dir = srcpkg['Directory']
867                 if '/' in name or name == '..':
868                         logging.warning('could not process source package %s %s: possibly malicious name', name, version)
869                         return None
870                 if '/' in version or version == '..':
871                         logging.warning('could not process source package %s %s: possibly malicious version', name, version)
872                         return None
873                 if '..' in dir.split('/'):
874                         logging.warning('could not process source package %s %s: possibly malicious dir: %s', name, version, dir)
875                         return None
876         except KeyError:
877                 logging.warning('could not process source package %s %s', name, version)
878                 return None
879         logging.debug('started processing source package %s %s', name, version)
880         info = get_info(srcpkg)
881         if not info:
882                 logging.warning('finished processing source package %s %s: could not get any info', name, version)
883                 return None
884         dsc_hash_type, dsc_hash, dsc_name, parts = info
885         if '/' in dsc_name or dsc_name == '..':
886                 logging.warning('could not process source package %s %s: possibly malicious dsc name %s', name, version, dsc_name)
887                 return None
888         if not ishex(dsc_hash):
889                 logging.warning('could not process source package %s %s: possibly malicious dsc hash %s', name, version, dsc_hash)
890                 return None
891         dsc_url = source_entry.archive_uri('%s/%s' % (dir, dsc_name))
892         logging.debug('found dsc file: %s %s %s', dsc_hash_type, dsc_hash, dsc_url)
893         dsc_status, dsc_sha1 = status(dsc_hash_type, dsc_hash, dsc_url)
894         logging.debug('checked dsc status: %s %s %s', dsc_status, dsc_sha1, dsc_url)
895         if dsc_status == 'unmodified':
896                 # Ignore the srcpkg since we know it is was in Debian
897                 # at one point and is hopefully therefore unmodified
898                 logging.debug('finished processing source package %s %s: dsc unmodified', name, version)
899                 return None
900         else:
901                 files = [(dsc_sha1, dsc_hash_type, dsc_hash)]
902                 parts_unmodified = []
903                 parts_modified = []
904                 parts_unknown = []
905                 for part in parts:
906                         part_name = part['name']
907                         if '/' in part_name or part_name == '..':
908                                 logging.warning('could not process source package %s %s: possibly malicious part name %s', name, version, part_name)
909                                 return None
910                         part_url = source_entry.archive_uri('%s/%s' % (dir, part_name))
911                         part_hash_type, part_hash = [(k, v) for k, v in part.iteritems() if k not in ('name', 'size')][0]
912                         if not ishex(part_hash):
913                                 logging.warning('could not process source package %s %s: possibly malicious part hash %s', name, version, part_hash)
914                                 return None
915                         logging.debug('found part file: %s %s %s', part_hash_type, part_hash, part_url)
916                         part_status, part_sha1 = status(part_hash_type, part_hash, part_url)
917                         logging.debug('checked part status: %s %s %s', part_status, part_sha1, part_url)
918                         if 'sha1' not in part and part_sha1: part['sha1'] = part_sha1
919                         if part_status == 'unmodified': parts_unmodified.append((part_sha1, part_name))
920                         elif part_status == 'modified': parts_modified.append((part_sha1, part_name))
921                         else: parts_unknown.append((part_sha1, part_name))
922                         if part_status == 'modified': files.append((part_sha1, part_hash_type, part_hash))
924                 all_parts_unmodified = (len(parts_unmodified) == len(parts))
925                 parts_unmodified = list(set(parts_unmodified))
926                 logging.debug('source package status %s %s: dsc %s, %s parts unmodified, %s parts modified, %s parts unknown', name, version, dsc_status, len(parts_unmodified), len(parts_modified), len(parts_unknown))
928                 if all_parts_unmodified:
929                         # Ignore the srcpkg since we know all the parts were
930                         # in Debian at one point and ergo, it is unmodified
931                         logging.debug('finished processing source package %s %s: all non-dsc parts unmodified', name, version)
932                         if dsc_status == 'modified':
933                                 logging.info('source package %s %s: unmodified, but dsc different', name, version)
934                                 modifies_dsc_files += 1
935                         return (files, None, None, None)
936                 else:
937                         logging.debug('some parts modified, looking for derived version %s %s', name, version)
938                         if not dsc_sha1:
939                                 logging.warning('finished processing source package %s %s: sha1 missing for dsc file', name, version)
940                                 return (files, None, None, None)
941                         if parts_unknown:
942                                 logging.warning('finished processing source package %s %s: sha1 missing for some parts', name, version)
943                                 return (files, None, None, None)
944                         new = None
945                         link = None
946                         patch = None
947                         tmp_dir = prepare(dsc_name, dsc_sha1, parts)
948                         derived_from = find_derived_from(tmp_dir, name, version, dsc_name, dsc_sha1, parts_unmodified)
949                         if derived_from:
950                                 debian_name, debian_version = derived_from
951                                 link = (debian_name, debian_version, name, version, dsc_url)
952                                 logging.debug('source package %s %s derived from %s %s', name, version, debian_name, debian_version)
953                                 debian_files = srcpkg_to_files(debian_name, debian_version)
954                                 if debian_files:
955                                         debian_info = get_debian_info(debian_files)
956                                         if debian_info:
957                                                 debian_dsc_sha1, debian_dsc_name, debian_parts = debian_info
958                                                 logging.debug('Debian source package %s %s dsc found %s %s', debian_name, debian_version, debian_dsc_name, debian_dsc_sha1)
959                                                 debian_tmp_dir = prepare_debian(debian_dsc_name, debian_dsc_sha1, debian_parts)
960                                                 if debian_tmp_dir:
961                                                         patch_created = create_patch(tmp_dir, dsc_name, dsc_sha1, debian_tmp_dir, debian_dsc_name, debian_dsc_sha1)
962                                                         if patch_created:
963                                                                 patch_names = present_patch(name, version, dsc_sha1, debian_name, debian_version, debian_dsc_sha1)
964                                                                 if patch_names: patch = (debian_name, debian_version, debian_dsc_sha1, name, version, dsc_sha1, [part[0] for part in parts_modified], patch_names)
965                                                         rmtree(debian_tmp_dir)
966                                                 else:
967                                                         # This could be an issue with snapshots or a file that is not distributable
968                                                         logging.info('source package %s %s: could not create temporary dir for Debian: %s %s', name, version, debian_name, debian_version)
969                                         else:
970                                                 logging.warning('source package %s %s: could not get Debian info for %s %s: %s', name, version, debian_name, debian_version, debian_info)
971                                 else:
972                                         if srcpkg_was_in_debian(debian_name, debian_version):
973                                                 logging.warning('source package %s %s: snapshot database issue, no Debian files found', debian_name, debian_version)
974                                         else:
975                                                 logging.warning('source package %s %s: derived from %s %s possibly bogus', name, version, debian_name, debian_version)
976                         else:
977                                 new = (name, version, dsc_url)
978                         rmtree(tmp_dir)
979                         logging.debug('finished processing source package %s %s: all done', name, version)
980                         return (files, patch, link, new)
982 def process_sources(source_entries, lists_dir):
983         files = []
984         patches = []
985         links = []
986         new = []
987         for source in source_entries:
988                 for source_entry in source:
989                         fn = os.path.join(lists_dir, source_entry.describe.rstrip(')').rpartition('(')[2])
990                         try: f = file(fn)
991                         except IOError: continue
992                         for srcpkg in deb822.Sources.iter_paragraphs(f):
993                                 actions = check_source_package(source_entry, srcpkg)
994                                 if actions:
995                                         action_files, action_patch, action_link, action_new = actions
996                                         if action_files:
997                                                 files.append(action_files)
998                                                 logging.debug('action: return files %s', ' '.join([' '.join(action) for action in action_files]))
999                                         if action_patch:
1000                                                 patches.append(action_patch)
1001                                                 logging.debug('action: return patches %s', ' '.join([' '.join(action) for action in action_patch]))
1002                                         if action_link:
1003                                                 links.append(action_link)
1004                                                 logging.debug('action: return links to modified source packages %s', ' '.join(action_link))
1005                                         if action_new:
1006                                                 new.append(action_new)
1007                                                 logging.debug('action: return links to new source packages %s', ' '.join(action_new))
1008                                 logging.debug('done')
1009                                 logging.debug('')
1010                         f.close()
1011         return (files, patches, links, new)
1013 logging.debug('processing distribution %s', derivative_short_name)
1015 files, patches, links, new = process_sources(source_entries, lists_dir)
1017 # Done with the database, close the connection
1018 cur.close()
1019 conn.close()
1021 # Write out the results
1022 filename = sys.argv[3]
1023 data = files
1024 if data:
1025         output_data = {}
1026         for package in data:
1027                 for modified_file in package:
1028                         sha1, hash_type, hash = modified_file
1029                         if sha1 not in output_data:
1030                                 output_data[sha1] = {}
1031                         if hash_type != 'sha1' and hash_type not in output_data[sha1]:
1032                                 output_data[sha1][hash_type] = hash
1033                         elif hash_type != 'sha1' and hash != output_data[sha1][hash_type]:
1034                                 logging.warning('hashes mismatched: %s: %s %s != %s', sha1, hash_type, hash, output_data[sha1][hash_type])
1035         output = file(os.path.abspath(filename), 'wb')
1036         yaml.safe_dump(output_data, output)
1037         output.close()
1039 filename = sys.argv[4]
1040 data = patches
1041 if data:
1042         if not os.path.exists(os.path.join(global_patch_dir,'HEADER.html')):
1043                 symlink('../../doc/HEADER.patches.html',os.path.join(global_patch_dir,'HEADER.html'))
1044         if not os.path.exists(os.path.join(global_patch_dir,'.htaccess')):
1045                 symlink('../../etc/htaccess.patches',os.path.join(global_patch_dir,'.htaccess'))
1046         if not os.path.exists(os.path.join(deriv_patch_dir,'HEADER.html')):
1047                 symlink('../../../doc/HEADER.patches.html',os.path.join(deriv_patch_dir,'HEADER.html'))
1048         if not os.path.exists(os.path.join(global_patch_dir,'.htaccess')):
1049                 symlink('../../../etc/htaccess.patches',os.path.join(deriv_patch_dir,'.htaccess'))
1050         output_data = []
1051         for item in data:
1052                 debian_name, debian_version, debian_sha1, name, version, sha1, parts_sha1, patches = item
1053                 item = {}
1054                 item['debian_name'] = debian_name
1055                 item['debian_version'] = debian_version
1056                 item['debian_sha1'] = debian_sha1
1057                 item['name'] = name
1058                 item['version'] = version
1059                 item['sha1'] = sha1
1060                 item['patches'] = patches
1061                 item['parts'] = parts_sha1
1062                 output_data.append(item)
1063         output = file(os.path.abspath(filename), 'wb')
1064         yaml.safe_dump(output_data, output)
1065         output.close()
1066 else:
1067         remove(filename)
1069 filename = sys.argv[5]
1070 data = links
1071 if data:
1072         data = list(set(data))
1073         data.sort(cmp=lambda a,b: cmp(a[0],b[0]) or apt_version_cmp(a[1],b[1]) or cmp(a[2],b[2]) or apt_version_cmp(a[3],b[3]))
1074         output_data = {}
1075         output = file(os.path.abspath(filename), 'wb')
1076         for debian_name, debian_version, name, version, dsc_url in data:
1077                 if debian_name not in output_data:
1078                         output_data[debian_name] = {}
1079                 if debian_version not in output_data[debian_name]:
1080                         output_data[debian_name][debian_version] = []
1081                 item = {}
1082                 item['name'] = name
1083                 item['version'] = version
1084                 item['dsc'] = dsc_url
1085                 output_data[debian_name][debian_version].append(item)
1086         yaml.safe_dump(output_data, output)
1087         output.close()
1088 else:
1089         remove(filename)
1091 filename = sys.argv[6]
1092 data = new
1093 if data:
1094         data = list(set(data))
1095         data.sort(cmp=lambda a,b: cmp(a[0],b[0]) or apt_version_cmp(a[1],b[1]) or cmp(a[2],b[2]))
1096         output_data = {}
1097         output = file(os.path.abspath(filename), 'wb')
1098         for name, version, dsc_url in data:
1099                 if name not in output_data:
1100                         output_data[name] = {}
1101                 if version not in output_data[name]:
1102                         output_data[name][version] = []
1103                 output_data[name][version].append(str(dsc_url))
1104         yaml.safe_dump(output_data, output)
1105         output.close()
1106 else:
1107         remove(filename)
1109 logging.shutdown()