#!/usr/bin/python # -*- coding: utf-8 -*- # vim: expandtab # Copyright 2002-2011 Raphaƫl Hertzog # This file is distributed under the terms of the General Public License # version 2 or (at your option) any later version. import os.path, rfc822, email, email.Utils, sys, string, re, cPickle import xml.dom from config import dir, odir from common import vcs_table, hash_name, DpkgVersion # address_from_string takes an address in RFC822 format # and turns it into a tuple of the form (real name, email). # The difference with email.Utils.parseaddr and rfc822.parseaddr # is that this routine allows unquoted comma's to appear in the real name # (in violation of RFC822). This is a hack to allow a Maintainer field to # be like 'Maintainer: John H. Robinson, IV '. [PvR] def address_from_string(content): hacked_content = string.replace(content, ",", "WEWANTNOCOMMAS") (name, mail) = email.Utils.parseaddr(hacked_content) return (string.replace(name,"WEWANTNOCOMMAS",","),string.replace(mail,"WEWANTNOCOMMAS",",")) # addresses_from_string takes a string with addresses in RFC822 format # and changes it into a list of tuples of the form (real name, email). # Just as address_from_string, it tries to be forgiving about unquoted # commas in addresses. [PvR] def addresses_from_string(content): pattern = re.compile("([^>]),") hacked_content = pattern.sub("\\1WEWANTNOCOMMAS", content) msg = email.message_from_string("Header: " + hacked_content) hacked_list = email.Utils.getaddresses(msg.get_all("Header", [])) list = map(lambda p: map(lambda s:string.replace(s,"WEWANTNOCOMMAS",","), p), hacked_list) return list def add_maintainer_info(child, name, mail, doc): text = doc.createTextNode(unicode(name, 'UTF-8', 'replace')) # Take care of non-ascii item_elt = doc.createElement("name") item_elt.appendChild(text) child.appendChild(item_elt) text = doc.createTextNode(mail) item_elt = doc.createElement("email") item_elt.appendChild(text) child.appendChild(item_elt) def update_sources_info(m, dist): """Update the XML information with the given Message (Package entry)""" global odir, old_done, new_done package = m["Package"] version = DpkgVersion(m["Version"]) hash = hash_name(package) xml_ok = os.path.isfile("%s/%s/%s/%s.xml" % (odir, hash, package, dist)) # Check if the work has already been done key = "%s_%s" % (m["Package"], dist) # Skip (duplicate) old entries if old_done.has_key(key) and version < old_done[key] and xml_ok: return if new_done.has_key(key) and version <= new_done[key] and xml_ok: return new_done[key] = version # Don't redo the work already done if old_done.has_key(key) and old_done[key] == version and xml_ok: return # Make sure the directory exists if not os.path.isdir(odir + "/" + hash + "/" + package): if not os.path.isdir(odir + "/" + hash): os.mkdir(odir + "/" + hash) os.mkdir(odir + "/" + hash + "/" + package) # Create the XML DOM object doc = xml.dom.getDOMImplementation('minidom').createDocument(None, None, None) root = doc.createElement("source") doc.appendChild(root) root.setAttribute("release", dist) if re.search("-\d+\.\d+(\.\d+)?$", m["version"]): root.setAttribute("nmu", "yes") for tag in m.keys(): child = doc.createElement(tag) root.appendChild(child) if tag == "binary" or tag[0:5] == "build": for item in re.split(",[ \n]*", m[tag]): # Take care of non-ascii, prevents troubles... text = doc.createTextNode(unicode(item, 'UTF-8', 'replace')) item_elt = doc.createElement("item") item_elt.appendChild(text) child.appendChild(item_elt) elif tag == "maintainer": (name, mail) = address_from_string(m[tag]) add_maintainer_info(child, name, mail, doc) elif tag == "uploaders": uploaders = addresses_from_string(m[tag]) for item in uploaders: item_elt = doc.createElement("item") (name,mail) = item add_maintainer_info(item_elt, name, mail, doc) child.appendChild(item_elt) elif tag == "files": for line in string.split(m[tag], "\n"): item_elt = doc.createElement("item") child.appendChild(item_elt) line = line.strip() fields = ["md5sum", "size", "filename"] values = string.split(line) for i in range(len(fields)): new_elt = doc.createElement(fields[i]) text = doc.createTextNode(values[i]) new_elt.appendChild(text) item_elt.appendChild(new_elt) else: text = doc.createTextNode(unicode(m[tag], 'UTF-8', 'replace')) child.appendChild(text) # now compute derived information and store it in the xml # add 'parsed' version of VCS info if tag[:6] == 'x-vcs-' or tag[:4] == 'vcs-': if tag[0] == 'x': prefix_len = 6 else: prefix_len = 4 # e.g. tag[prefix_len:] would be 'svn' for 'x-vcs-svn'/'vcs-svn' # would be 'browser' for 'vcs-browser' (to be handled specially) repos_elt = list(root.getElementsByTagName('repository')) if repos_elt: # reuse existing repository element repos_elt = repos_elt[0] # invariant: at most 1 repository elt else: # create a new repository element repos_elt = doc.createElement('repository') root.appendChild(repos_elt) vcs_elt = doc.createElement('vcs') kind = tag[prefix_len:] if vcs_table.has_key(kind): kind = vcs_table[kind][0] vcs_elt.setAttribute('kind', kind) vcs_elt.setAttribute('url', m[tag]) repos_elt.appendChild(vcs_elt) # Print the DOM object to a file try: f = open("%s/%s/%s/%s.xml" % (odir, hash, package, dist), "w") f.write(doc.toxml(encoding="UTF-8")) f.close() except Exception, msg: sys.stderr.write("Output problem for %s (%s)\n" % (m["package"], msg)); def update_source_binary_mapping(m, dist): global binary_to_sources, source_to_binaries src = m["Package"] for pkg in m["Binary"].split(","): pkg = pkg.strip() if not source_to_binaries.has_key(src): source_to_binaries[src] = { 'current': [], 'merged': [] } if not binary_to_sources.has_key(pkg): binary_to_sources[pkg] = { 'current': None, 'merged': [] } if dist == "unstable": source_to_binaries[src]['current'].append(pkg) binary_to_sources[pkg]['current'] = src for d in 'merged', dist: if not source_to_binaries[src].has_key(d): source_to_binaries[src][d] = [] if not binary_to_sources[pkg].has_key(d): binary_to_sources[pkg][d] = [] if pkg not in source_to_binaries[src][d]: source_to_binaries[src][d].append(pkg) if src not in binary_to_sources[pkg][d]: binary_to_sources[pkg][d].append(src) def treat_sources_file(fname, dist, update_mapping): """Scan the given Sources file and treat each Package entry""" try: f = open(fname, "r") except IOError: sys.stderr.write("Skipping parsing of non-existing/non-readable %s\n" % fname) return while 1: try: m = rfc822.Message(f) if len(m) == 0: #eof break update_sources_info(m, dist) if update_mapping: update_source_binary_mapping(m, dist) except EOFError: break f.close() # Load the list of sources generated the last time old_done = {} new_done = {} if os.path.exists(odir + "/sources_done"): f = open(odir + "/sources_done", "r") old_done = cPickle.load(f) f.close() # Order matters in main_distros main_distros = ['oldstable', 'stable', 'testing', 'unstable', 'experimental'] other_distros = [ 'security-oldstable', 'oldstable-backports', 'stable-proposed-updates', 'stable-updates', 'security-stable', 'stable-backports', 'testing-proposed-updates', 'security-testing', 'mentors', ] source_to_binaries = {} binary_to_sources = {} for distro in main_distros: for comp in ['main', 'contrib', 'non-free']: treat_sources_file(os.path.join(dir, 'Sources-%s_%s' % (distro, comp)), distro, True) for distro in other_distros: for comp in ['main', 'contrib', 'non-free']: treat_sources_file(os.path.join(dir, 'Sources-%s_%s' % (distro, comp)), distro, False) # Store the list of sources generated f = open(odir + "/sources_done", "w") cPickle.dump(new_done, f, 0) f.close() # Store the src/pkg mappings f = open(odir + "/sources_mapping", "w") cPickle.dump(source_to_binaries, f, 0) cPickle.dump(binary_to_sources, f, 0) f.close() # Scan the old package/distribution that existed and check if they # still exist ... if they don't, remove the associated xml file. for key in old_done.keys(): (p, d) = key.split("_", 1) if not new_done.has_key(key): hash = hash_name(p) filename = "%s/%s/%s/%s.xml" % (odir, hash, p, d) filenamerebuild = "%s/%s/%s/force-rebuild" % (odir, hash, p) if os.path.exists(filename): os.unlink(filename) f = open(filenamerebuild, "w") f.close() # We're done