#!/usr/bin/python # Create a database of identities in Debian using various sources # Copyright (C) 2005-2006 Jeroen van Wolffelaar # $Id$ # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA import os, sys, sets, ldap import apt_pkg # Carnivore works by having a dictionary from identifyers to identities. An # identify is a tuple of (primary id, list-of-ids, list-of-extra-info). # Instead, it also can be a string referring to another identity that is # equivalent. carnivore = {} def join(sep, items): return reduce(lambda a, b: a+sep+b, items) def parseUid(uid): uid = uid.strip() # First, strip comment s = uid.find('(') e = uid.find(')') if s >= 0 and e >= 0: uid = uid[:s] + uid[e+1:] s = uid.find('<') e = uid.find('>') email = None if s >= 0 and e >= 0: email = uid[s+1:e] uid = uid[:s] + uid[e+1:] uid = uid.strip() if not email and uid.find('@') >= 0: email, uid = uid, email return (uid, email) def getCanonical(key): if not carnivore.has_key(key): return None while 1: key = carnivore[key] if isinstance(key, tuple) or isinstance(key, list): return key def merge(a, b): ca = getCanonical(a) cb = getCanonical(b) if ca and ca == cb: return ca if not ca: ca = (a, [a], []) carnivore[a] = ca if not cb: cb = (b, [b], []) carnivore[cb[0]] = ca[0] ca[1].extend(cb[1]) ca[2].extend(cb[2]) ca[2].sort() return ca def addName(name, ref): # merge with realname, TODO: support for blacklist for nonreal names if name in ['Brian Nelson', 'Thawte Freemail Member']: extraInfo(ref, "realname:"+name) else: merge(ref, "realname:"+name) def extraInfo(key, info): can = getCanonical(key) if not can: carnivore[key] = (key, [key], [info]) else: can[2].append(info) def weakRef(key, target): can = getCanonical(key) if not can: carnivore[key] = [target] else: can.append(target) def getKeyrings(): for keyring, file in {'keyring': 'debian-keyring', \ 'emeritus': 'emeritus-keyring', \ 'removed': 'removed-keys' }.iteritems(): for type in [ 'gpg' , 'pgp' ]: contents = os.popen("gpg --no-default-keyring \ --no-expensive-trust-checks \ --keyring /org/keyring.debian.org/keyrings/%s.%s --list-keys \ --with-colons --fingerprint" % (file, type)) fpr = None entry = None lastpub = None for line in contents.readlines(): items = line.split(':') uid = None if items[0] == 'pub': fpr = entry = None lastpub = items[9].strip() continue elif items[0] == 'fpr': fpr = "gpg:" + items[9].strip() extraInfo(fpr, "x:gpg:keyring:"+fpr[4:] + ":" + keyring) uid = lastpub elif items[0] == 'uid': uid = items[9].strip() else: continue # Do stuff with 'uid' weakRef("uid:"+uid, fpr) uid, email = parseUid(uid) if email: email = "email:" + email merge(fpr, email) extraInfo(fpr, "x:"+fpr+":"+email) if uid: addName(uid, fpr) extraInfo(fpr, "realname:"+uid) extraInfo(fpr, "x:"+fpr+":realname:"+uid) contents.close() def getMia(): mias = os.listdir("/org/qa.debian.org/mia/db") for res in mias: if res[-8:] == ".summary": continue if res.find('=') < 0: extraInfo("ldap:"+res, "mia") def getLdap(): l = ldap.initialize("ldap://db.debian.org/") result = l.search_s("dc=debian,dc=org", ldap.SCOPE_SUBTREE, "objectClass=debiandeveloper", ['uid', 'gecos', 'keyFingerPrint']) for res in result: uid = "ldap:" + res[1]['uid'][0] gecos = "realname:" + res[1]['gecos'][0].split(',')[0].strip() merge(uid, "email:" + uid[5:]+"@debian.org") extraInfo(uid, gecos) addName(gecos[9:], uid) weakRef("uid:"+gecos[9:]+" <"+uid[5:]+"@debian.org>", uid) extraInfo(uid, "x:ldap:realname:%s:%s" % (uid[5:], gecos[9:])) if res[1].has_key('keyFingerPrint'): for fpr in res[1]['keyFingerPrint']: merge(uid, "gpg:" + fpr) extraInfo(uid, "x:ldap:gpg:"+uid[5:] + ':' + fpr) def getPackages(): packages = os.popen("/org/qa.debian.org/data/ftp/get-packages \ -s unstable -a source") parser = apt_pkg.ParseTagFile(packages) while parser.Step(): package = parser.Section.get("Package") maintainers = [parser.Section.get("Maintainer")] uploaders = parser.Section.get("Uploaders") if uploaders: maintainers += uploaders.split(",") for maintainer in maintainers: maintainer = maintainer.strip() maint, email = parseUid(maintainer) if not email: sys.stderr.write("Malformed maintainer field for %s: %s; skipping\n" \ % (package, maintainer)) continue email = "email:" + email extraInfo(email, "maint:"+package) weakRef("uid:"+maintainer, email) # aspectj has a bogus Maintainer entry if maint and package != "aspectj": extraInfo(email, "realname:"+maint) addName(maint, email) extraInfo(email, "x:"+"maint:"+package+":"+email+":realname:"+maint) packages.close() def cleanUp(): for k, v in carnivore.iteritems(): if k.find("\n") >= 0: sys.stderr.write("Aiee, newline in key %s (for value %s)!\n" % (k,v)) if isinstance(v, list): # weak ref S = sets.Set(v) v[:] = [] for ref in S: v.append(getCanonical(ref)[0]) # Second time, because now everything is canonicalized S = sets.Set(v) v[:] = [] for ref in S: v.append(ref) v.sort() elif isinstance(v, tuple): for i in [1, 2]: S = sets.Set(v[i]) v[i][:] = [] for item in S: v[i].append(item) v[i].sort() def writeUids(fd): uids = [] for k, v in carnivore.iteritems(): if isinstance(v, list) and k[:4] == "uid:": if len(v) != 1: sys.stderr.write("Oops, a uid with multiple identities: "+k+"\n") continue uids.append((k[4:].lower().strip(), v[0])) uids.sort() lastV = uids[0][1] for i in uids: if i[1] != lastV: fd.write("\n") lastV = i[1] fd.write(i[0] + ": "+i[1]+"\n") def writeReport(fd): nopackages = [] for k, v in carnivore.iteritems(): if not isinstance(v, tuple): continue # So, we have a person ldap, realname, email, package = [], [], [], [] extra, expl, warnings, mia = [], [], [], [] gecos = "" keyring = {'keyring': [], 'emeritus': [], 'removed': [], 'ldap': []} for item in sets.Set(v[1]+v[2]): if item[:5] == "ldap:": ldap.append(item[5:]) elif item[:6] == "email:": email.append(item[6:]) elif item[:9] == "realname:": realname.append(item[9:]) elif item[:6] == "maint:": package.append(item[6:]) elif item[:2] == "x:": expl.append(item[2:]) if item[:11] == "x:ldap:gpg:": dummy, gpg = item[11:].split(':') keyring['ldap'].append(gpg) if item[:14] == "x:gpg:keyring:": gpg, ring = item[14:].split(':') keyring[ring].append(gpg) if item[:16] == "x:ldap:realname:": dummy, gecos = item[16:].split(':') elif item == "mia": mia.append('in-db') else: extra.append(item) for ring in keyring.values(): ring.sort() if len(ldap) > 1: warnings.append("Multiple LDAP entries") if keyring['emeritus'] and keyring['keyring']: warnings.append("Both emeritus and active") if keyring['ldap'] != keyring['keyring']: warnings.append("Ldap doesn't match keys in keyring") if keyring['ldap'] and not package: mia.append('needs-wat') if keyring['emeritus'] and package: mia.append('emeritus-with-package') if not keyring['emeritus'] and not keyring['keyring'] and \ keyring['removed'] and package: mia.append('removed-with-package') text = "" if ldap: text += "DD: "+gecos+" <"+ldap[0]+"@debian.org>\n" if realname: text += "Known as: "+join(', ', realname)+"\n" if email: text += "Using emails: "+join(', ', email)+"\n" for k, v in keyring.iteritems(): for key in v: text += "Key in "+k+": "+key+"\n" p = "0" if package and len(package) <= 5: p = "%s (%s)" % (len(package), join(", ", package)) elif package: p = "%s (%s)" % (len(package), join(", ", package[:4]+["..."])) text += "Packages: %s\n" % p #if expl: text += "Extra: "+join(', ', expl)+"\n" if mia: text += "X-MIA: "+join(', ', mia)+"\n" for warn in warnings: text += "X-Warning: "+warn+"\n" fd.write(text+"\n") #os.chdir('/org/qa.debian.org/data/carnivore') #try: # os.mkdir('results') #except OSError: # pass #os.chdir('results') getLdap() getKeyrings() getPackages() getMia() merge("ldap:cerebus", "email:cerebus@email.com") merge("ldap:torin", "email:torin@daft.com") merge("ldap:myxie", "email:gpp10@cam.ac.uk") merge("ldap:jcs", "email:jorg.schuler@gmx.de") merge("ldap:vizzie", "email:vizzie@airmail.net") merge("email:jnelson@boa.org", "email:john@debian.org") cleanUp() out = open('uids.new', 'w') writeUids(out) out.close() os.rename("uids.new", "uids") out = open('report', 'w') writeReport(out) out.close() for k, v in carnivore.iteritems(): print k print v