#!/usr/bin/python # Create a database of identities in Debian using various sources # Copyright (C) 2005-2006 Jeroen van Wolffelaar # $Id$ # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA import os, sys, ldap import os.path import apt_pkg # activity-* fields from LDAP are private # moved out of the data subdir and made world readable again -- Myon os.umask(02) # Carnivore works by having a dictionary from identifyers to identities. An # identify is a tuple of (primary id, list-of-ids, list-of-extra-info). # Instead, it also can be a string referring to another identity that is # equivalent. carnivore = {} def join(sep, items): return reduce(lambda a, b: a+sep+b, items) def parseUid(uid): uid = uid.strip() # First, strip comment s = uid.find('(') e = uid.find(')') if s >= 0 and e >= 0: uid = uid[:s] + uid[e+1:] s = uid.find('<') e = uid.find('>') email = None if s >= 0 and e >= 0: email = uid[s+1:e] uid = uid[:s] + uid[e+1:] uid = uid.strip() if not email and uid.find('@') >= 0: email, uid = uid, email return (uid, email) def getCanonical(key): if not carnivore.has_key(key): return None while 1: key = carnivore[key] if isinstance(key, tuple) or isinstance(key, list): return key def merge(a, b): ca = getCanonical(a) cb = getCanonical(b) if ca and ca == cb: return ca if not ca: ca = (a, [a], []) carnivore[a] = ca if not cb: cb = (b, [b], []) carnivore[cb[0]] = ca[0] ca[1].extend(cb[1]) ca[2].extend(cb[2]) ca[2].sort() return ca def addName(name, ref): # merge with realname, TODO: support for blacklist for nonreal names if name in ['Brian Nelson', 'Luca Bruno', 'Andrew Ross', 'Thawte Freemail Member']: extraInfo(ref, "realname:"+name) else: merge(ref, "realname:"+name) def extraInfo(key, info): can = getCanonical(key) if not can: carnivore[key] = (key, [key], [info]) else: can[2].append(info) def weakRef(key, target): can = getCanonical(key) if not can: carnivore[key] = [target] else: can.append(target) def getKeyrings(): """Extracts keys from various keyrings (DDs, DMs, emeritus and removed""" for keyring, file in {'keyring': 'debian-keyring', \ 'emeritus': 'emeritus-keyring', \ 'removed': 'removed-keys' }.iteritems(): for type in [ 'gpg' , 'pgp' ]: parseKeyring("/org/qa.debian.org/data/keyrings/keyrings/%s.%s" % (file, type), keyring) # Add DM keyring parseKeyring("/org/qa.debian.org/data/keyrings/keyrings/debian-maintainers.gpg", "dm") def parseKeyring(keyring_file, keyring): """Parses the given keyring_file, adding its keys to keyring set (used below)""" contents = os.popen("gpg --no-default-keyring \ --no-expensive-trust-checks \ --keyring %s --list-keys \ --with-colons --fingerprint" % keyring_file) fpr = None entry = None lastpub = None for line in contents.readlines(): items = line.split(':') uid = None if items[0] == 'pub': fpr = entry = None lastpub = items[9].strip() continue elif items[0] == 'fpr': fpr = "gpg:" + items[9].strip() extraInfo(fpr, "x:gpg:keyring:"+fpr[4:] + ":" + keyring) uid = lastpub elif items[0] == 'uid': uid = items[9].strip() else: continue # Do stuff with 'uid' weakRef("uid:"+uid, fpr) uid, email = parseUid(uid) if email: email = "email:" + email merge(fpr, email) extraInfo(fpr, "x:"+fpr+":"+email) if uid: addName(uid, fpr) extraInfo(fpr, "realname:"+uid) extraInfo(fpr, "x:"+fpr+":realname:"+uid) contents.close() def getLdap(): l = ldap.initialize("ldap://db.debian.org/") result = l.search_s("dc=debian,dc=org", ldap.SCOPE_SUBTREE, "objectClass=debiandeveloper", ['uid', 'gecos', 'keyFingerPrint', 'activity-pgp', 'activity-from']) for res in result: uid = "ldap:" + res[1]['uid'][0] gecos = "realname:" + res[1]['gecos'][0].split(',')[0].strip() merge(uid, "email:" + uid[5:]+"@debian.org") extraInfo(uid, gecos) if res[1].has_key('activity-from'): extraInfo(uid, "activity-from:"+res[1]['activity-from'][0]) if res[1].has_key('activity-pgp'): extraInfo(uid, "activity-pgp:" +res[1]['activity-pgp'][0]) addName(gecos[9:], uid) weakRef("uid:"+gecos[9:]+" <"+uid[5:]+"@debian.org>", uid) extraInfo(uid, "x:ldap:realname:%s:%s" % (uid[5:], gecos[9:])) if res[1].has_key('keyFingerPrint'): for fpr in res[1]['keyFingerPrint']: merge(uid, "gpg:" + fpr) extraInfo(uid, "x:ldap:gpg:"+uid[5:] + ':' + fpr) def getPackages(): packages = os.popen("/org/qa.debian.org/data/ftp/get-packages \ -s unstable -a source") parser = apt_pkg.TagFile(packages) while parser.step(): package = parser.section.get("Package") maintainers = [parser.section.get("Maintainer")] uploaders = parser.section.get("Uploaders") if uploaders: maintainers += uploaders.split(",") oldmaintainer = '' for maintainer in maintainers: if oldmaintainer: maintainer = oldmaintainer + ', ' + maintainer oldmaintainer = '' if maintainer == None: sys.stderr.write("No maintainer field for %s: %s; skipping\n" % (package, maintainer)) continue maintainer = maintainer.strip() maint, email = parseUid(maintainer) if not email: oldmaintainer = maintainer #sys.stderr.write("Malformed maintainer field for %s: %s; skipping\n" \ # % (package, maintainer)) continue email = "email:" + email if package == None: sys.stderr.write("No package field for %s: %s; skipping\n" % (maintainer, package)) continue extraInfo(email, "maint:"+package) weakRef("maint:"+package, email) weakRef("uid:"+maintainer, email) if maint: extraInfo(email, "realname:"+maint) addName(maint, email) extraInfo(email, "x:"+"maint:"+package+":"+email+":realname:"+maint) packages.close() def cleanUp(): for k, v in carnivore.iteritems(): if k.find("\n") >= 0: sys.stderr.write("Aiee, newline in key %s (for value %s)!\n" % (k,v)) if isinstance(v, list): # weak ref S = set(v) v[:] = [] for ref in S: v.append(getCanonical(ref)[0]) # Second time, because now everything is canonicalized S = set(v) v[:] = [] for ref in S: v.append(ref) v.sort() elif isinstance(v, tuple): for i in [1, 2]: S = set(v[i]) v[i][:] = [] for item in S: v[i].append(item) v[i].sort() def writeUids(fd): uids = [] for k, v in carnivore.iteritems(): if isinstance(v, list) and k[:4] == "uid:": if len(v) != 1: sys.stderr.write("Oops, a uid with multiple identities: "+k+"\n") continue uids.append((k[4:].lower().strip(), v[0])) uids.sort() lastV = uids[0][1] for i in uids: if i[1] != lastV: fd.write("\n") lastV = i[1] fd.write(i[0] + ": "+i[1]+"\n") def writeMiaEmails(fd): emails = [] for k in carnivore.iterkeys(): if k[:6] == "email:": fd.write(k[6:].replace('@', '=')+"\n") elif k[:5] == "ldap:": fd.write(k[5:]+"\n") def writeReport(fd): nopackages = [] for k, v in carnivore.iteritems(): if not isinstance(v, tuple): continue # So, we have a person ldap, realname, email, package = [], [], [], [] extra, expl, warnings, mia = [], [], [], [] gecos = "" keyring = {'keyring': [], 'emeritus': [], 'removed': [], 'ldap': [], 'dm':[]} for item in set(v[1]+v[2]): if item[:5] == "ldap:": ldap.append(item[5:]) elif item[:6] == "email:": email.append(item[6:]) elif item[:9] == "realname:": realname.append(item[9:]) elif item[:6] == "maint:": package.append(item[6:]) elif item[:2] == "x:": expl.append(item[2:]) if item[:11] == "x:ldap:gpg:": dummy, gpg = item[11:].split(':') keyring['ldap'].append(gpg) if item[:14] == "x:gpg:keyring:": gpg, ring = item[14:].split(':') keyring[ring].append(gpg) if item[:16] == "x:ldap:realname:": dummy, gecos = item[16:].split(':') elif item == "mia": mia.append('in-db') else: extra.append(item) for ring in keyring.values(): ring.sort() if len(ldap) > 1: warnings.append("Multiple LDAP entries") if keyring['emeritus'] and keyring['keyring']: warnings.append("Both emeritus and active") if keyring['ldap'] != keyring['keyring']: warnings.append("Ldap doesn't match keys in keyring") if keyring['ldap'] and not package: mia.append('needs-wat') if keyring['emeritus'] and package: mia.append('emeritus-with-package') if not keyring['emeritus'] and not keyring['keyring'] and \ keyring['removed'] and package: mia.append('removed-with-package') text = "" if ldap: text += "DD: "+gecos+" <"+ldap[0]+"@debian.org>\n" if realname: text += "Known as: "+join(', ', realname)+"\n" if email: text += "Using emails: "+join(', ', email)+"\n" for k, v in keyring.iteritems(): for key in v: text += "Key in "+k+": "+key+"\n" p = "0" if package and len(package) <= 5: p = "%s (%s)" % (len(package), join(", ", package)) elif package: p = "%s (%s)" % (len(package), join(", ", package[:4]+["..."])) text += "Packages: %s\n" % p #if expl: text += "Extra: "+join(', ', expl)+"\n" if mia: text += "X-MIA: "+join(', ', mia)+"\n" for warn in warnings: text += "X-Warning: "+warn+"\n" fd.write(text+"\n") #os.chdir('/org/qa.debian.org/data/carnivore') #try: # os.mkdir('results') #except OSError: # pass #os.chdir('results') getLdap() getKeyrings() getPackages() # merge extra IDs from file if os.path.exists("associations"): for line in open("associations"): toks = line.strip().split() if len(toks) != 2: continue merge(toks[0], toks[1]) cleanUp() out = open('uids.new', 'w') writeUids(out) out.close() os.rename("uids.new", "uids") out = open('mia-addresses.new', 'w') out.write("""# mia-* suffices that are valid # # This file is automatically generated by # /org/qa.debian.org/data/carnivore/extract_data, and used by # /etc/exim4/exim4.conf to decide what mia-*@qa.debian.org addresses are # actually valid, in order to reject invalid mail # # Do not remove or change without coordinating with DSA """) writeMiaEmails(out) out.close() os.rename("mia-addresses.new", "mia-addresses") out = open('report', 'w') writeReport(out) out.close() for k, v in carnivore.iteritems(): print k print v