| 1 |
#!/usr/bin/python
|
| 2 |
|
| 3 |
# Create a database of identities in Debian using various sources
|
| 4 |
# Copyright (C) 2005-2006 Jeroen van Wolffelaar <jeroen@wolffelaar.nl>
|
| 5 |
# $Id$
|
| 6 |
|
| 7 |
# This program is free software; you can redistribute it and/or modify
|
| 8 |
# it under the terms of the GNU General Public License as published by
|
| 9 |
# the Free Software Foundation; either version 2 of the License, or
|
| 10 |
# (at your option) any later version.
|
| 11 |
|
| 12 |
# This program is distributed in the hope that it will be useful,
|
| 13 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 15 |
# GNU General Public License for more details.
|
| 16 |
|
| 17 |
# You should have received a copy of the GNU General Public License
|
| 18 |
# along with this program; if not, write to the Free Software
|
| 19 |
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
| 20 |
|
| 21 |
import os, sys, sets, ldap
|
| 22 |
import os.path
|
| 23 |
import apt_pkg
|
| 24 |
|
| 25 |
# activity-* fields from LDAP are private
|
| 26 |
# moved out of the data subdir and made world readable again -- Myon
|
| 27 |
os.umask(02)
|
| 28 |
|
| 29 |
# Carnivore works by having a dictionary from identifyers to identities. An
|
| 30 |
# identify is a tuple of (primary id, list-of-ids, list-of-extra-info).
|
| 31 |
# Instead, it also can be a string referring to another identity that is
|
| 32 |
# equivalent.
|
| 33 |
carnivore = {}
|
| 34 |
|
| 35 |
def join(sep, items):
|
| 36 |
return reduce(lambda a, b: a+sep+b, items)
|
| 37 |
|
| 38 |
def parseUid(uid):
|
| 39 |
uid = uid.strip()
|
| 40 |
# First, strip comment
|
| 41 |
s = uid.find('(')
|
| 42 |
e = uid.find(')')
|
| 43 |
if s >= 0 and e >= 0:
|
| 44 |
uid = uid[:s] + uid[e+1:]
|
| 45 |
s = uid.find('<')
|
| 46 |
e = uid.find('>')
|
| 47 |
email = None
|
| 48 |
if s >= 0 and e >= 0:
|
| 49 |
email = uid[s+1:e]
|
| 50 |
uid = uid[:s] + uid[e+1:]
|
| 51 |
uid = uid.strip()
|
| 52 |
if not email and uid.find('@') >= 0:
|
| 53 |
email, uid = uid, email
|
| 54 |
return (uid, email)
|
| 55 |
|
| 56 |
def getCanonical(key):
|
| 57 |
if not carnivore.has_key(key):
|
| 58 |
return None
|
| 59 |
while 1:
|
| 60 |
key = carnivore[key]
|
| 61 |
if isinstance(key, tuple) or isinstance(key, list):
|
| 62 |
return key
|
| 63 |
|
| 64 |
def merge(a, b):
|
| 65 |
ca = getCanonical(a)
|
| 66 |
cb = getCanonical(b)
|
| 67 |
if ca and ca == cb:
|
| 68 |
return ca
|
| 69 |
if not ca:
|
| 70 |
ca = (a, [a], [])
|
| 71 |
carnivore[a] = ca
|
| 72 |
if not cb:
|
| 73 |
cb = (b, [b], [])
|
| 74 |
|
| 75 |
carnivore[cb[0]] = ca[0]
|
| 76 |
ca[1].extend(cb[1])
|
| 77 |
ca[2].extend(cb[2])
|
| 78 |
ca[2].sort()
|
| 79 |
return ca
|
| 80 |
|
| 81 |
def addName(name, ref):
|
| 82 |
# merge with realname, TODO: support for blacklist for nonreal names
|
| 83 |
if name in ['Brian Nelson', 'Thawte Freemail Member']:
|
| 84 |
extraInfo(ref, "realname:"+name)
|
| 85 |
else:
|
| 86 |
merge(ref, "realname:"+name)
|
| 87 |
|
| 88 |
def extraInfo(key, info):
|
| 89 |
can = getCanonical(key)
|
| 90 |
if not can:
|
| 91 |
carnivore[key] = (key, [key], [info])
|
| 92 |
else:
|
| 93 |
can[2].append(info)
|
| 94 |
|
| 95 |
def weakRef(key, target):
|
| 96 |
can = getCanonical(key)
|
| 97 |
if not can:
|
| 98 |
carnivore[key] = [target]
|
| 99 |
else:
|
| 100 |
can.append(target)
|
| 101 |
|
| 102 |
def getKeyrings():
|
| 103 |
"""Extracts keys from various keyrings (DDs, DMs, emeritus and removed"""
|
| 104 |
for keyring, file in {'keyring': 'debian-keyring', \
|
| 105 |
'emeritus': 'emeritus-keyring', \
|
| 106 |
'removed': 'removed-keys' }.iteritems():
|
| 107 |
for type in [ 'gpg' , 'pgp' ]:
|
| 108 |
parseKeyring("/org/qa.debian.org/data/keyrings/keyrings/%s.%s" % (file, type), keyring)
|
| 109 |
# Add DM keyring
|
| 110 |
parseKeyring("/org/qa.debian.org/data/keyrings/keyrings/debian-maintainers.gpg", "dm")
|
| 111 |
|
| 112 |
def parseKeyring(keyring_file, keyring):
|
| 113 |
"""Parses the given keyring_file, adding its keys to keyring set (used below)"""
|
| 114 |
contents = os.popen("gpg --no-default-keyring \
|
| 115 |
--no-expensive-trust-checks \
|
| 116 |
--keyring %s --list-keys \
|
| 117 |
--with-colons --fingerprint" % keyring_file)
|
| 118 |
fpr = None
|
| 119 |
entry = None
|
| 120 |
lastpub = None
|
| 121 |
for line in contents.readlines():
|
| 122 |
items = line.split(':')
|
| 123 |
uid = None
|
| 124 |
if items[0] == 'pub':
|
| 125 |
fpr = entry = None
|
| 126 |
lastpub = items[9].strip()
|
| 127 |
continue
|
| 128 |
elif items[0] == 'fpr':
|
| 129 |
fpr = "gpg:" + items[9].strip()
|
| 130 |
extraInfo(fpr, "x:gpg:keyring:"+fpr[4:] + ":" + keyring)
|
| 131 |
uid = lastpub
|
| 132 |
elif items[0] == 'uid':
|
| 133 |
uid = items[9].strip()
|
| 134 |
else:
|
| 135 |
continue
|
| 136 |
# Do stuff with 'uid'
|
| 137 |
weakRef("uid:"+uid, fpr)
|
| 138 |
uid, email = parseUid(uid)
|
| 139 |
if email:
|
| 140 |
email = "email:" + email
|
| 141 |
merge(fpr, email)
|
| 142 |
extraInfo(fpr, "x:"+fpr+":"+email)
|
| 143 |
if uid:
|
| 144 |
addName(uid, fpr)
|
| 145 |
extraInfo(fpr, "realname:"+uid)
|
| 146 |
extraInfo(fpr, "x:"+fpr+":realname:"+uid)
|
| 147 |
contents.close()
|
| 148 |
|
| 149 |
def getLdap():
|
| 150 |
l = ldap.initialize("ldap://db.debian.org/")
|
| 151 |
result = l.search_s("dc=debian,dc=org", ldap.SCOPE_SUBTREE,
|
| 152 |
"objectClass=debiandeveloper", ['uid', 'gecos', 'keyFingerPrint', 'activity-pgp', 'activity-from'])
|
| 153 |
for res in result:
|
| 154 |
uid = "ldap:" + res[1]['uid'][0]
|
| 155 |
gecos = "realname:" + res[1]['gecos'][0].split(',')[0].strip()
|
| 156 |
merge(uid, "email:" + uid[5:]+"@debian.org")
|
| 157 |
extraInfo(uid, gecos)
|
| 158 |
if res[1].has_key('activity-from'):
|
| 159 |
extraInfo(uid, "activity-from:"+res[1]['activity-from'][0])
|
| 160 |
if res[1].has_key('activity-pgp'):
|
| 161 |
extraInfo(uid, "activity-pgp:" +res[1]['activity-pgp'][0])
|
| 162 |
addName(gecos[9:], uid)
|
| 163 |
weakRef("uid:"+gecos[9:]+" <"+uid[5:]+"@debian.org>", uid)
|
| 164 |
extraInfo(uid, "x:ldap:realname:%s:%s" % (uid[5:], gecos[9:]))
|
| 165 |
|
| 166 |
if res[1].has_key('keyFingerPrint'):
|
| 167 |
for fpr in res[1]['keyFingerPrint']:
|
| 168 |
merge(uid, "gpg:" + fpr)
|
| 169 |
extraInfo(uid, "x:ldap:gpg:"+uid[5:] + ':' + fpr)
|
| 170 |
|
| 171 |
|
| 172 |
def getPackages():
|
| 173 |
packages = os.popen("/org/qa.debian.org/data/ftp/get-packages \
|
| 174 |
-s unstable -a source")
|
| 175 |
parser = apt_pkg.ParseTagFile(packages)
|
| 176 |
while parser.Step():
|
| 177 |
package = parser.Section.get("Package")
|
| 178 |
maintainers = [parser.Section.get("Maintainer")]
|
| 179 |
uploaders = parser.Section.get("Uploaders")
|
| 180 |
if uploaders:
|
| 181 |
maintainers += uploaders.split(",")
|
| 182 |
oldmaintainer = ''
|
| 183 |
for maintainer in maintainers:
|
| 184 |
if oldmaintainer:
|
| 185 |
maintainer = oldmaintainer + ', ' + maintainer
|
| 186 |
oldmaintainer = ''
|
| 187 |
if maintainer == None:
|
| 188 |
sys.stderr.write("No maintainer field for %s: %s; skipping\n" % (package, maintainer))
|
| 189 |
continue
|
| 190 |
maintainer = maintainer.strip()
|
| 191 |
maint, email = parseUid(maintainer)
|
| 192 |
if not email:
|
| 193 |
oldmaintainer = maintainer
|
| 194 |
#sys.stderr.write("Malformed maintainer field for %s: %s; skipping\n" \
|
| 195 |
# % (package, maintainer))
|
| 196 |
continue
|
| 197 |
email = "email:" + email
|
| 198 |
if package == None:
|
| 199 |
sys.stderr.write("No package field for %s: %s; skipping\n" % (maintainer, package))
|
| 200 |
continue
|
| 201 |
extraInfo(email, "maint:"+package)
|
| 202 |
weakRef("maint:"+package, email)
|
| 203 |
weakRef("uid:"+maintainer, email)
|
| 204 |
if maint:
|
| 205 |
extraInfo(email, "realname:"+maint)
|
| 206 |
addName(maint, email)
|
| 207 |
extraInfo(email, "x:"+"maint:"+package+":"+email+":realname:"+maint)
|
| 208 |
packages.close()
|
| 209 |
|
| 210 |
def cleanUp():
|
| 211 |
for k, v in carnivore.iteritems():
|
| 212 |
if k.find("\n") >= 0:
|
| 213 |
sys.stderr.write("Aiee, newline in key %s (for value %s)!\n" % (k,v))
|
| 214 |
if isinstance(v, list):
|
| 215 |
# weak ref
|
| 216 |
S = sets.Set(v)
|
| 217 |
v[:] = []
|
| 218 |
for ref in S:
|
| 219 |
v.append(getCanonical(ref)[0])
|
| 220 |
# Second time, because now everything is canonicalized
|
| 221 |
S = sets.Set(v)
|
| 222 |
v[:] = []
|
| 223 |
for ref in S: v.append(ref)
|
| 224 |
v.sort()
|
| 225 |
elif isinstance(v, tuple):
|
| 226 |
for i in [1, 2]:
|
| 227 |
S = sets.Set(v[i])
|
| 228 |
v[i][:] = []
|
| 229 |
for item in S: v[i].append(item)
|
| 230 |
v[i].sort()
|
| 231 |
|
| 232 |
def writeUids(fd):
|
| 233 |
uids = []
|
| 234 |
for k, v in carnivore.iteritems():
|
| 235 |
if isinstance(v, list) and k[:4] == "uid:":
|
| 236 |
if len(v) != 1:
|
| 237 |
sys.stderr.write("Oops, a uid with multiple identities: "+k+"\n")
|
| 238 |
continue
|
| 239 |
uids.append((k[4:].lower().strip(), v[0]))
|
| 240 |
uids.sort()
|
| 241 |
lastV = uids[0][1]
|
| 242 |
for i in uids:
|
| 243 |
if i[1] != lastV:
|
| 244 |
fd.write("\n")
|
| 245 |
lastV = i[1]
|
| 246 |
fd.write(i[0] + ": "+i[1]+"\n")
|
| 247 |
|
| 248 |
|
| 249 |
def writeMiaEmails(fd):
|
| 250 |
emails = []
|
| 251 |
for k in carnivore.iterkeys():
|
| 252 |
if k[:6] == "email:":
|
| 253 |
fd.write(k[6:].replace('@', '=')+"\n")
|
| 254 |
elif k[:5] == "ldap:":
|
| 255 |
fd.write(k[5:]+"\n")
|
| 256 |
|
| 257 |
def writeReport(fd):
|
| 258 |
nopackages = []
|
| 259 |
for k, v in carnivore.iteritems():
|
| 260 |
if not isinstance(v, tuple):
|
| 261 |
continue
|
| 262 |
# So, we have a person
|
| 263 |
ldap, realname, email, package = [], [], [], []
|
| 264 |
extra, expl, warnings, mia = [], [], [], []
|
| 265 |
gecos = ""
|
| 266 |
keyring = {'keyring': [], 'emeritus': [], 'removed': [], 'ldap': [], 'dm':[]}
|
| 267 |
for item in sets.Set(v[1]+v[2]):
|
| 268 |
if item[:5] == "ldap:":
|
| 269 |
ldap.append(item[5:])
|
| 270 |
elif item[:6] == "email:":
|
| 271 |
email.append(item[6:])
|
| 272 |
elif item[:9] == "realname:":
|
| 273 |
realname.append(item[9:])
|
| 274 |
elif item[:6] == "maint:":
|
| 275 |
package.append(item[6:])
|
| 276 |
elif item[:2] == "x:":
|
| 277 |
expl.append(item[2:])
|
| 278 |
if item[:11] == "x:ldap:gpg:":
|
| 279 |
dummy, gpg = item[11:].split(':')
|
| 280 |
keyring['ldap'].append(gpg)
|
| 281 |
if item[:14] == "x:gpg:keyring:":
|
| 282 |
gpg, ring = item[14:].split(':')
|
| 283 |
keyring[ring].append(gpg)
|
| 284 |
if item[:16] == "x:ldap:realname:":
|
| 285 |
dummy, gecos = item[16:].split(':')
|
| 286 |
elif item == "mia":
|
| 287 |
mia.append('in-db')
|
| 288 |
else:
|
| 289 |
extra.append(item)
|
| 290 |
for ring in keyring.values(): ring.sort()
|
| 291 |
if len(ldap) > 1:
|
| 292 |
warnings.append("Multiple LDAP entries")
|
| 293 |
if keyring['emeritus'] and keyring['keyring']:
|
| 294 |
warnings.append("Both emeritus and active")
|
| 295 |
if keyring['ldap'] != keyring['keyring']:
|
| 296 |
warnings.append("Ldap doesn't match keys in keyring")
|
| 297 |
if keyring['ldap'] and not package:
|
| 298 |
mia.append('needs-wat')
|
| 299 |
if keyring['emeritus'] and package:
|
| 300 |
mia.append('emeritus-with-package')
|
| 301 |
if not keyring['emeritus'] and not keyring['keyring'] and \
|
| 302 |
keyring['removed'] and package:
|
| 303 |
mia.append('removed-with-package')
|
| 304 |
text = ""
|
| 305 |
if ldap: text += "DD: "+gecos+" <"+ldap[0]+"@debian.org>\n"
|
| 306 |
if realname: text += "Known as: "+join(', ', realname)+"\n"
|
| 307 |
if email: text += "Using emails: "+join(', ', email)+"\n"
|
| 308 |
for k, v in keyring.iteritems():
|
| 309 |
for key in v:
|
| 310 |
text += "Key in "+k+": "+key+"\n"
|
| 311 |
p = "0"
|
| 312 |
if package and len(package) <= 5:
|
| 313 |
p = "%s (%s)" % (len(package), join(", ", package))
|
| 314 |
elif package:
|
| 315 |
p = "%s (%s)" % (len(package), join(", ", package[:4]+["..."]))
|
| 316 |
text += "Packages: %s\n" % p
|
| 317 |
#if expl: text += "Extra: "+join(', ', expl)+"\n"
|
| 318 |
if mia: text += "X-MIA: "+join(', ', mia)+"\n"
|
| 319 |
for warn in warnings:
|
| 320 |
text += "X-Warning: "+warn+"\n"
|
| 321 |
fd.write(text+"\n")
|
| 322 |
|
| 323 |
#os.chdir('/org/qa.debian.org/data/carnivore')
|
| 324 |
#try:
|
| 325 |
# os.mkdir('results')
|
| 326 |
#except OSError:
|
| 327 |
# pass
|
| 328 |
#os.chdir('results')
|
| 329 |
|
| 330 |
getLdap()
|
| 331 |
getKeyrings()
|
| 332 |
getPackages()
|
| 333 |
# merge extra IDs from file
|
| 334 |
if os.path.exists("associations"):
|
| 335 |
for line in open("associations"):
|
| 336 |
toks = line.strip().split()
|
| 337 |
if len(toks) != 2: continue
|
| 338 |
merge(toks[0], toks[1])
|
| 339 |
cleanUp()
|
| 340 |
|
| 341 |
out = open('uids.new', 'w')
|
| 342 |
writeUids(out)
|
| 343 |
out.close()
|
| 344 |
os.rename("uids.new", "uids")
|
| 345 |
|
| 346 |
out = open('mia-addresses.new', 'w')
|
| 347 |
out.write("""# mia-* suffices that are valid
|
| 348 |
#
|
| 349 |
# This file is automatically generated by
|
| 350 |
# /org/qa.debian.org/data/carnivore/extract_data, and used by
|
| 351 |
# /etc/exim4/exim4.conf to decide what mia-*@qa.debian.org addresses are
|
| 352 |
# actually valid, in order to reject invalid mail
|
| 353 |
#
|
| 354 |
# Do not remove or change without coordinating with DSA
|
| 355 |
|
| 356 |
""")
|
| 357 |
writeMiaEmails(out)
|
| 358 |
out.close()
|
| 359 |
os.rename("mia-addresses.new", "mia-addresses")
|
| 360 |
|
| 361 |
out = open('report', 'w')
|
| 362 |
writeReport(out)
|
| 363 |
out.close()
|
| 364 |
|
| 365 |
for k, v in carnivore.iteritems():
|
| 366 |
print k
|
| 367 |
print v
|