| 1 |
jeroen |
1202 |
#!/usr/bin/python |
| 2 |
|
|
|
| 3 |
|
|
# Create a database of identities in Debian using various sources |
| 4 |
jeroen |
1238 |
# Copyright (C) 2005-2006 Jeroen van Wolffelaar <jeroen@wolffelaar.nl> |
| 5 |
jeroen |
1202 |
# $Id$ |
| 6 |
|
|
|
| 7 |
|
|
# This program is free software; you can redistribute it and/or modify |
| 8 |
|
|
# it under the terms of the GNU General Public License as published by |
| 9 |
|
|
# the Free Software Foundation; either version 2 of the License, or |
| 10 |
|
|
# (at your option) any later version. |
| 11 |
|
|
|
| 12 |
|
|
# This program is distributed in the hope that it will be useful, |
| 13 |
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 |
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 15 |
|
|
# GNU General Public License for more details. |
| 16 |
|
|
|
| 17 |
|
|
# You should have received a copy of the GNU General Public License |
| 18 |
|
|
# along with this program; if not, write to the Free Software |
| 19 |
|
|
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
| 20 |
|
|
|
| 21 |
jhr |
2429 |
import os, sys, ldap |
| 22 |
myon |
2267 |
import os.path |
| 23 |
jeroen |
1202 |
import apt_pkg |
| 24 |
|
|
|
| 25 |
jeroen |
1476 |
# activity-* fields from LDAP are private |
| 26 |
myon |
1481 |
# moved out of the data subdir and made world readable again -- Myon |
| 27 |
|
|
os.umask(02) |
| 28 |
jeroen |
1476 |
|
| 29 |
jeroen |
1202 |
# Carnivore works by having a dictionary from identifyers to identities. An |
| 30 |
|
|
# identify is a tuple of (primary id, list-of-ids, list-of-extra-info). |
| 31 |
|
|
# Instead, it also can be a string referring to another identity that is |
| 32 |
|
|
# equivalent. |
| 33 |
|
|
carnivore = {} |
| 34 |
|
|
|
| 35 |
|
|
def join(sep, items): |
| 36 |
|
|
return reduce(lambda a, b: a+sep+b, items) |
| 37 |
|
|
|
| 38 |
|
|
def parseUid(uid): |
| 39 |
jeroen |
1238 |
uid = uid.strip() |
| 40 |
jeroen |
1202 |
# First, strip comment |
| 41 |
|
|
s = uid.find('(') |
| 42 |
|
|
e = uid.find(')') |
| 43 |
|
|
if s >= 0 and e >= 0: |
| 44 |
|
|
uid = uid[:s] + uid[e+1:] |
| 45 |
|
|
s = uid.find('<') |
| 46 |
|
|
e = uid.find('>') |
| 47 |
|
|
email = None |
| 48 |
|
|
if s >= 0 and e >= 0: |
| 49 |
|
|
email = uid[s+1:e] |
| 50 |
|
|
uid = uid[:s] + uid[e+1:] |
| 51 |
|
|
uid = uid.strip() |
| 52 |
|
|
if not email and uid.find('@') >= 0: |
| 53 |
|
|
email, uid = uid, email |
| 54 |
|
|
return (uid, email) |
| 55 |
|
|
|
| 56 |
|
|
def getCanonical(key): |
| 57 |
|
|
if not carnivore.has_key(key): |
| 58 |
|
|
return None |
| 59 |
|
|
while 1: |
| 60 |
|
|
key = carnivore[key] |
| 61 |
|
|
if isinstance(key, tuple) or isinstance(key, list): |
| 62 |
|
|
return key |
| 63 |
|
|
|
| 64 |
|
|
def merge(a, b): |
| 65 |
|
|
ca = getCanonical(a) |
| 66 |
|
|
cb = getCanonical(b) |
| 67 |
|
|
if ca and ca == cb: |
| 68 |
|
|
return ca |
| 69 |
|
|
if not ca: |
| 70 |
|
|
ca = (a, [a], []) |
| 71 |
|
|
carnivore[a] = ca |
| 72 |
|
|
if not cb: |
| 73 |
|
|
cb = (b, [b], []) |
| 74 |
|
|
|
| 75 |
|
|
carnivore[cb[0]] = ca[0] |
| 76 |
|
|
ca[1].extend(cb[1]) |
| 77 |
|
|
ca[2].extend(cb[2]) |
| 78 |
|
|
ca[2].sort() |
| 79 |
|
|
return ca |
| 80 |
|
|
|
| 81 |
|
|
def addName(name, ref): |
| 82 |
|
|
# merge with realname, TODO: support for blacklist for nonreal names |
| 83 |
bartm |
2762 |
if name in ['Brian Nelson', 'Luca Bruno', 'Andrew Ross', 'Matthew Vernon', 'Thomas Mueller', 'Thawte Freemail Member']: |
| 84 |
jeroen |
1202 |
extraInfo(ref, "realname:"+name) |
| 85 |
|
|
else: |
| 86 |
|
|
merge(ref, "realname:"+name) |
| 87 |
|
|
|
| 88 |
|
|
def extraInfo(key, info): |
| 89 |
|
|
can = getCanonical(key) |
| 90 |
|
|
if not can: |
| 91 |
|
|
carnivore[key] = (key, [key], [info]) |
| 92 |
|
|
else: |
| 93 |
|
|
can[2].append(info) |
| 94 |
|
|
|
| 95 |
|
|
def weakRef(key, target): |
| 96 |
|
|
can = getCanonical(key) |
| 97 |
|
|
if not can: |
| 98 |
|
|
carnivore[key] = [target] |
| 99 |
|
|
else: |
| 100 |
|
|
can.append(target) |
| 101 |
|
|
|
| 102 |
|
|
def getKeyrings(): |
| 103 |
geissert |
2390 |
"""Extracts keys from various keyrings (DDs, DMs, emeritus and removed""" |
| 104 |
jeroen |
1202 |
for keyring, file in {'keyring': 'debian-keyring', \ |
| 105 |
|
|
'emeritus': 'emeritus-keyring', \ |
| 106 |
|
|
'removed': 'removed-keys' }.iteritems(): |
| 107 |
|
|
for type in [ 'gpg' , 'pgp' ]: |
| 108 |
geissert |
2738 |
parseKeyring("/srv/qa.debian.org/data/keyrings/keyrings/%s.%s" % (file, type), keyring) |
| 109 |
geissert |
2390 |
# Add DM keyring |
| 110 |
geissert |
2738 |
parseKeyring("/srv/qa.debian.org/data/keyrings/keyrings/debian-maintainers.gpg", "dm") |
| 111 |
jeroen |
1202 |
|
| 112 |
geissert |
2390 |
def parseKeyring(keyring_file, keyring): |
| 113 |
|
|
"""Parses the given keyring_file, adding its keys to keyring set (used below)""" |
| 114 |
|
|
contents = os.popen("gpg --no-default-keyring \ |
| 115 |
|
|
--no-expensive-trust-checks \ |
| 116 |
|
|
--keyring %s --list-keys \ |
| 117 |
|
|
--with-colons --fingerprint" % keyring_file) |
| 118 |
|
|
fpr = None |
| 119 |
|
|
entry = None |
| 120 |
|
|
lastpub = None |
| 121 |
|
|
for line in contents.readlines(): |
| 122 |
|
|
items = line.split(':') |
| 123 |
|
|
uid = None |
| 124 |
|
|
if items[0] == 'pub': |
| 125 |
|
|
fpr = entry = None |
| 126 |
|
|
lastpub = items[9].strip() |
| 127 |
|
|
continue |
| 128 |
|
|
elif items[0] == 'fpr': |
| 129 |
|
|
fpr = "gpg:" + items[9].strip() |
| 130 |
|
|
extraInfo(fpr, "x:gpg:keyring:"+fpr[4:] + ":" + keyring) |
| 131 |
|
|
uid = lastpub |
| 132 |
|
|
elif items[0] == 'uid': |
| 133 |
|
|
uid = items[9].strip() |
| 134 |
|
|
else: |
| 135 |
|
|
continue |
| 136 |
|
|
# Do stuff with 'uid' |
| 137 |
|
|
weakRef("uid:"+uid, fpr) |
| 138 |
|
|
uid, email = parseUid(uid) |
| 139 |
|
|
if email: |
| 140 |
|
|
email = "email:" + email |
| 141 |
|
|
merge(fpr, email) |
| 142 |
|
|
extraInfo(fpr, "x:"+fpr+":"+email) |
| 143 |
|
|
if uid: |
| 144 |
|
|
addName(uid, fpr) |
| 145 |
|
|
extraInfo(fpr, "realname:"+uid) |
| 146 |
|
|
extraInfo(fpr, "x:"+fpr+":realname:"+uid) |
| 147 |
jwilk |
2691 |
if contents.close() is not None: |
| 148 |
|
|
raise IOError('Exporting %s keyring failed' % keyring_file) |
| 149 |
geissert |
2390 |
|
| 150 |
jeroen |
1202 |
def getLdap(): |
| 151 |
|
|
l = ldap.initialize("ldap://db.debian.org/") |
| 152 |
|
|
result = l.search_s("dc=debian,dc=org", ldap.SCOPE_SUBTREE, |
| 153 |
jeroen |
1475 |
"objectClass=debiandeveloper", ['uid', 'gecos', 'keyFingerPrint', 'activity-pgp', 'activity-from']) |
| 154 |
jeroen |
1202 |
for res in result: |
| 155 |
|
|
uid = "ldap:" + res[1]['uid'][0] |
| 156 |
jeroen |
1238 |
gecos = "realname:" + res[1]['gecos'][0].split(',')[0].strip() |
| 157 |
jeroen |
1202 |
merge(uid, "email:" + uid[5:]+"@debian.org") |
| 158 |
|
|
extraInfo(uid, gecos) |
| 159 |
jeroen |
1475 |
if res[1].has_key('activity-from'): |
| 160 |
|
|
extraInfo(uid, "activity-from:"+res[1]['activity-from'][0]) |
| 161 |
|
|
if res[1].has_key('activity-pgp'): |
| 162 |
|
|
extraInfo(uid, "activity-pgp:" +res[1]['activity-pgp'][0]) |
| 163 |
jeroen |
1202 |
addName(gecos[9:], uid) |
| 164 |
|
|
weakRef("uid:"+gecos[9:]+" <"+uid[5:]+"@debian.org>", uid) |
| 165 |
|
|
extraInfo(uid, "x:ldap:realname:%s:%s" % (uid[5:], gecos[9:])) |
| 166 |
|
|
|
| 167 |
|
|
if res[1].has_key('keyFingerPrint'): |
| 168 |
|
|
for fpr in res[1]['keyFingerPrint']: |
| 169 |
|
|
merge(uid, "gpg:" + fpr) |
| 170 |
|
|
extraInfo(uid, "x:ldap:gpg:"+uid[5:] + ':' + fpr) |
| 171 |
|
|
|
| 172 |
|
|
|
| 173 |
|
|
def getPackages(): |
| 174 |
geissert |
2738 |
packages = os.popen("/srv/qa.debian.org/data/ftp/get-packages \ |
| 175 |
jeroen |
1202 |
-s unstable -a source") |
| 176 |
jhr |
2430 |
parser = apt_pkg.TagFile(packages) |
| 177 |
|
|
while parser.step(): |
| 178 |
|
|
package = parser.section.get("Package") |
| 179 |
|
|
maintainers = [parser.section.get("Maintainer")] |
| 180 |
|
|
uploaders = parser.section.get("Uploaders") |
| 181 |
jeroen |
1202 |
if uploaders: |
| 182 |
|
|
maintainers += uploaders.split(",") |
| 183 |
luk |
1907 |
oldmaintainer = '' |
| 184 |
jeroen |
1202 |
for maintainer in maintainers: |
| 185 |
luk |
1907 |
if oldmaintainer: |
| 186 |
|
|
maintainer = oldmaintainer + ', ' + maintainer |
| 187 |
|
|
oldmaintainer = '' |
| 188 |
myon |
1847 |
if maintainer == None: |
| 189 |
|
|
sys.stderr.write("No maintainer field for %s: %s; skipping\n" % (package, maintainer)) |
| 190 |
|
|
continue |
| 191 |
jeroen |
1238 |
maintainer = maintainer.strip() |
| 192 |
jeroen |
1202 |
maint, email = parseUid(maintainer) |
| 193 |
|
|
if not email: |
| 194 |
luk |
1907 |
oldmaintainer = maintainer |
| 195 |
|
|
#sys.stderr.write("Malformed maintainer field for %s: %s; skipping\n" \ |
| 196 |
|
|
# % (package, maintainer)) |
| 197 |
jeroen |
1202 |
continue |
| 198 |
|
|
email = "email:" + email |
| 199 |
myon |
1847 |
if package == None: |
| 200 |
|
|
sys.stderr.write("No package field for %s: %s; skipping\n" % (maintainer, package)) |
| 201 |
|
|
continue |
| 202 |
jeroen |
1202 |
extraInfo(email, "maint:"+package) |
| 203 |
jeroen |
1239 |
weakRef("maint:"+package, email) |
| 204 |
jeroen |
1202 |
weakRef("uid:"+maintainer, email) |
| 205 |
jeroen |
1318 |
if maint: |
| 206 |
|
|
extraInfo(email, "realname:"+maint) |
| 207 |
|
|
addName(maint, email) |
| 208 |
|
|
extraInfo(email, "x:"+"maint:"+package+":"+email+":realname:"+maint) |
| 209 |
jwilk |
2691 |
if packages.close() is not None: |
| 210 |
|
|
raise IOError('Extracting package data failed') |
| 211 |
jeroen |
1202 |
|
| 212 |
|
|
def cleanUp(): |
| 213 |
|
|
for k, v in carnivore.iteritems(): |
| 214 |
jeroen |
1238 |
if k.find("\n") >= 0: |
| 215 |
|
|
sys.stderr.write("Aiee, newline in key %s (for value %s)!\n" % (k,v)) |
| 216 |
jeroen |
1202 |
if isinstance(v, list): |
| 217 |
|
|
# weak ref |
| 218 |
jhr |
2429 |
S = set(v) |
| 219 |
jeroen |
1202 |
v[:] = [] |
| 220 |
|
|
for ref in S: |
| 221 |
|
|
v.append(getCanonical(ref)[0]) |
| 222 |
|
|
# Second time, because now everything is canonicalized |
| 223 |
jhr |
2429 |
S = set(v) |
| 224 |
jeroen |
1202 |
v[:] = [] |
| 225 |
|
|
for ref in S: v.append(ref) |
| 226 |
|
|
v.sort() |
| 227 |
|
|
elif isinstance(v, tuple): |
| 228 |
|
|
for i in [1, 2]: |
| 229 |
jhr |
2429 |
S = set(v[i]) |
| 230 |
jeroen |
1202 |
v[i][:] = [] |
| 231 |
|
|
for item in S: v[i].append(item) |
| 232 |
|
|
v[i].sort() |
| 233 |
|
|
|
| 234 |
|
|
def writeUids(fd): |
| 235 |
|
|
uids = [] |
| 236 |
|
|
for k, v in carnivore.iteritems(): |
| 237 |
|
|
if isinstance(v, list) and k[:4] == "uid:": |
| 238 |
|
|
if len(v) != 1: |
| 239 |
|
|
sys.stderr.write("Oops, a uid with multiple identities: "+k+"\n") |
| 240 |
|
|
continue |
| 241 |
|
|
uids.append((k[4:].lower().strip(), v[0])) |
| 242 |
|
|
uids.sort() |
| 243 |
|
|
lastV = uids[0][1] |
| 244 |
|
|
for i in uids: |
| 245 |
|
|
if i[1] != lastV: |
| 246 |
|
|
fd.write("\n") |
| 247 |
|
|
lastV = i[1] |
| 248 |
|
|
fd.write(i[0] + ": "+i[1]+"\n") |
| 249 |
|
|
|
| 250 |
jeroen |
1280 |
|
| 251 |
|
|
def writeMiaEmails(fd): |
| 252 |
|
|
emails = [] |
| 253 |
|
|
for k in carnivore.iterkeys(): |
| 254 |
|
|
if k[:6] == "email:": |
| 255 |
|
|
fd.write(k[6:].replace('@', '=')+"\n") |
| 256 |
|
|
elif k[:5] == "ldap:": |
| 257 |
|
|
fd.write(k[5:]+"\n") |
| 258 |
|
|
|
| 259 |
jeroen |
1202 |
def writeReport(fd): |
| 260 |
|
|
nopackages = [] |
| 261 |
|
|
for k, v in carnivore.iteritems(): |
| 262 |
|
|
if not isinstance(v, tuple): |
| 263 |
|
|
continue |
| 264 |
|
|
# So, we have a person |
| 265 |
|
|
ldap, realname, email, package = [], [], [], [] |
| 266 |
|
|
extra, expl, warnings, mia = [], [], [], [] |
| 267 |
tbm |
1209 |
gecos = "" |
| 268 |
geissert |
2390 |
keyring = {'keyring': [], 'emeritus': [], 'removed': [], 'ldap': [], 'dm':[]} |
| 269 |
jhr |
2429 |
for item in set(v[1]+v[2]): |
| 270 |
jeroen |
1202 |
if item[:5] == "ldap:": |
| 271 |
|
|
ldap.append(item[5:]) |
| 272 |
|
|
elif item[:6] == "email:": |
| 273 |
|
|
email.append(item[6:]) |
| 274 |
|
|
elif item[:9] == "realname:": |
| 275 |
|
|
realname.append(item[9:]) |
| 276 |
|
|
elif item[:6] == "maint:": |
| 277 |
|
|
package.append(item[6:]) |
| 278 |
|
|
elif item[:2] == "x:": |
| 279 |
|
|
expl.append(item[2:]) |
| 280 |
|
|
if item[:11] == "x:ldap:gpg:": |
| 281 |
|
|
dummy, gpg = item[11:].split(':') |
| 282 |
|
|
keyring['ldap'].append(gpg) |
| 283 |
|
|
if item[:14] == "x:gpg:keyring:": |
| 284 |
|
|
gpg, ring = item[14:].split(':') |
| 285 |
|
|
keyring[ring].append(gpg) |
| 286 |
|
|
if item[:16] == "x:ldap:realname:": |
| 287 |
|
|
dummy, gecos = item[16:].split(':') |
| 288 |
|
|
elif item == "mia": |
| 289 |
|
|
mia.append('in-db') |
| 290 |
|
|
else: |
| 291 |
|
|
extra.append(item) |
| 292 |
|
|
for ring in keyring.values(): ring.sort() |
| 293 |
|
|
if len(ldap) > 1: |
| 294 |
|
|
warnings.append("Multiple LDAP entries") |
| 295 |
|
|
if keyring['emeritus'] and keyring['keyring']: |
| 296 |
|
|
warnings.append("Both emeritus and active") |
| 297 |
|
|
if keyring['ldap'] != keyring['keyring']: |
| 298 |
|
|
warnings.append("Ldap doesn't match keys in keyring") |
| 299 |
|
|
if keyring['ldap'] and not package: |
| 300 |
|
|
mia.append('needs-wat') |
| 301 |
|
|
if keyring['emeritus'] and package: |
| 302 |
|
|
mia.append('emeritus-with-package') |
| 303 |
|
|
if not keyring['emeritus'] and not keyring['keyring'] and \ |
| 304 |
|
|
keyring['removed'] and package: |
| 305 |
|
|
mia.append('removed-with-package') |
| 306 |
|
|
text = "" |
| 307 |
|
|
if ldap: text += "DD: "+gecos+" <"+ldap[0]+"@debian.org>\n" |
| 308 |
|
|
if realname: text += "Known as: "+join(', ', realname)+"\n" |
| 309 |
|
|
if email: text += "Using emails: "+join(', ', email)+"\n" |
| 310 |
|
|
for k, v in keyring.iteritems(): |
| 311 |
|
|
for key in v: |
| 312 |
|
|
text += "Key in "+k+": "+key+"\n" |
| 313 |
|
|
p = "0" |
| 314 |
|
|
if package and len(package) <= 5: |
| 315 |
|
|
p = "%s (%s)" % (len(package), join(", ", package)) |
| 316 |
|
|
elif package: |
| 317 |
|
|
p = "%s (%s)" % (len(package), join(", ", package[:4]+["..."])) |
| 318 |
|
|
text += "Packages: %s\n" % p |
| 319 |
|
|
#if expl: text += "Extra: "+join(', ', expl)+"\n" |
| 320 |
|
|
if mia: text += "X-MIA: "+join(', ', mia)+"\n" |
| 321 |
|
|
for warn in warnings: |
| 322 |
|
|
text += "X-Warning: "+warn+"\n" |
| 323 |
|
|
fd.write(text+"\n") |
| 324 |
|
|
|
| 325 |
geissert |
2738 |
#os.chdir('/srv/qa.debian.org/data/carnivore') |
| 326 |
jeroen |
1202 |
#try: |
| 327 |
|
|
# os.mkdir('results') |
| 328 |
|
|
#except OSError: |
| 329 |
|
|
# pass |
| 330 |
|
|
#os.chdir('results') |
| 331 |
|
|
|
| 332 |
|
|
getLdap() |
| 333 |
|
|
getKeyrings() |
| 334 |
|
|
getPackages() |
| 335 |
myon |
2267 |
# merge extra IDs from file |
| 336 |
|
|
if os.path.exists("associations"): |
| 337 |
|
|
for line in open("associations"): |
| 338 |
|
|
toks = line.strip().split() |
| 339 |
|
|
if len(toks) != 2: continue |
| 340 |
|
|
merge(toks[0], toks[1]) |
| 341 |
jeroen |
1202 |
cleanUp() |
| 342 |
|
|
|
| 343 |
|
|
out = open('uids.new', 'w') |
| 344 |
|
|
writeUids(out) |
| 345 |
|
|
out.close() |
| 346 |
|
|
os.rename("uids.new", "uids") |
| 347 |
|
|
|
| 348 |
jeroen |
1280 |
out = open('mia-addresses.new', 'w') |
| 349 |
|
|
out.write("""# mia-* suffices that are valid |
| 350 |
|
|
# |
| 351 |
|
|
# This file is automatically generated by |
| 352 |
geissert |
2738 |
# /srv/qa.debian.org/data/carnivore/extract_data, and used by |
| 353 |
jeroen |
1280 |
# /etc/exim4/exim4.conf to decide what mia-*@qa.debian.org addresses are |
| 354 |
|
|
# actually valid, in order to reject invalid mail |
| 355 |
|
|
# |
| 356 |
|
|
# Do not remove or change without coordinating with DSA |
| 357 |
|
|
|
| 358 |
|
|
""") |
| 359 |
|
|
writeMiaEmails(out) |
| 360 |
|
|
out.close() |
| 361 |
|
|
os.rename("mia-addresses.new", "mia-addresses") |
| 362 |
|
|
|
| 363 |
jeroen |
1202 |
out = open('report', 'w') |
| 364 |
|
|
writeReport(out) |
| 365 |
|
|
out.close() |
| 366 |
|
|
|
| 367 |
|
|
for k, v in carnivore.iteritems(): |
| 368 |
|
|
print k |
| 369 |
|
|
print v |