/[qa]/trunk/carnivore/extract_data
ViewVC logotype

Contents of /trunk/carnivore/extract_data

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2805 - (show annotations) (download)
Fri Aug 17 17:09:55 2012 UTC (2 years, 4 months ago) by bartm
File size: 11097 byte(s)
No longer merge by e-mail address leader@debian.org.
Reported today by Andreas Tille <andreas@an3as.eu> on debian-qa@lists.debian.org.
1 #!/usr/bin/python
2
3 # Create a database of identities in Debian using various sources
4 # Copyright (C) 2005-2006 Jeroen van Wolffelaar <jeroen@wolffelaar.nl>
5 # $Id$
6
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
11
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
16
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
21 import os, sys, ldap
22 import os.path
23 import apt_pkg
24
25 # activity-* fields from LDAP are private
26 # moved out of the data subdir and made world readable again -- Myon
27 os.umask(02)
28
29 # Carnivore works by having a dictionary from identifyers to identities. An
30 # identify is a tuple of (primary id, list-of-ids, list-of-extra-info).
31 # Instead, it also can be a string referring to another identity that is
32 # equivalent.
33 carnivore = {}
34
35 def join(sep, items):
36 return reduce(lambda a, b: a+sep+b, items)
37
38 def parseUid(uid):
39 uid = uid.strip()
40 # First, strip comment
41 s = uid.find('(')
42 e = uid.find(')')
43 if s >= 0 and e >= 0:
44 uid = uid[:s] + uid[e+1:]
45 s = uid.find('<')
46 e = uid.find('>')
47 email = None
48 if s >= 0 and e >= 0:
49 email = uid[s+1:e]
50 uid = uid[:s] + uid[e+1:]
51 uid = uid.strip()
52 if not email and uid.find('@') >= 0:
53 email, uid = uid, email
54 return (uid, email)
55
56 def getCanonical(key):
57 if not carnivore.has_key(key):
58 return None
59 while 1:
60 key = carnivore[key]
61 if isinstance(key, tuple) or isinstance(key, list):
62 return key
63
64 def merge(a, b):
65 ca = getCanonical(a)
66 cb = getCanonical(b)
67 if ca and ca == cb:
68 return ca
69 if not ca:
70 ca = (a, [a], [])
71 carnivore[a] = ca
72 if not cb:
73 cb = (b, [b], [])
74
75 carnivore[cb[0]] = ca[0]
76 ca[1].extend(cb[1])
77 ca[2].extend(cb[2])
78 ca[2].sort()
79 return ca
80
81 def addName(name, ref):
82 # merge with realname, TODO: support for blacklist for nonreal names
83 if name in ['Brian Nelson', 'Luca Bruno', 'Andrew Ross', 'Matthew Vernon', 'Thomas Mueller', 'Thawte Freemail Member']:
84 extraInfo(ref, "realname:"+name)
85 else:
86 merge(ref, "realname:"+name)
87
88 def extraInfo(key, info):
89 can = getCanonical(key)
90 if not can:
91 carnivore[key] = (key, [key], [info])
92 else:
93 can[2].append(info)
94
95 def weakRef(key, target):
96 can = getCanonical(key)
97 if not can:
98 carnivore[key] = [target]
99 else:
100 can.append(target)
101
102 def getKeyrings():
103 """Extracts keys from various keyrings (DDs, DMs, emeritus and removed"""
104 for keyring, file in {'keyring': 'debian-keyring', \
105 'emeritus': 'emeritus-keyring', \
106 'removed': 'removed-keys' }.iteritems():
107 for type in [ 'gpg' , 'pgp' ]:
108 parseKeyring("/srv/qa.debian.org/data/keyrings/keyrings/%s.%s" % (file, type), keyring)
109 # Add DM keyring
110 parseKeyring("/srv/qa.debian.org/data/keyrings/keyrings/debian-maintainers.gpg", "dm")
111
112 def parseKeyring(keyring_file, keyring):
113 """Parses the given keyring_file, adding its keys to keyring set (used below)"""
114 contents = os.popen("gpg --no-default-keyring \
115 --no-expensive-trust-checks \
116 --keyring %s --list-keys \
117 --with-colons --fingerprint" % keyring_file)
118 fpr = None
119 entry = None
120 lastpub = None
121 for line in contents.readlines():
122 items = line.split(':')
123 uid = None
124 if items[0] == 'pub':
125 fpr = entry = None
126 lastpub = items[9].strip()
127 continue
128 elif items[0] == 'fpr':
129 fpr = "gpg:" + items[9].strip()
130 extraInfo(fpr, "x:gpg:keyring:"+fpr[4:] + ":" + keyring)
131 uid = lastpub
132 elif items[0] == 'uid':
133 uid = items[9].strip()
134 else:
135 continue
136 # Do stuff with 'uid'
137 weakRef("uid:"+uid, fpr)
138 uid, email = parseUid(uid)
139 if email:
140 if email in ['leader@debian.org']:
141 continue
142 email = "email:" + email
143 merge(fpr, email)
144 extraInfo(fpr, "x:"+fpr+":"+email)
145 if uid:
146 addName(uid, fpr)
147 extraInfo(fpr, "realname:"+uid)
148 extraInfo(fpr, "x:"+fpr+":realname:"+uid)
149 if contents.close() is not None:
150 raise IOError('Exporting %s keyring failed' % keyring_file)
151
152 def getLdap():
153 l = ldap.initialize("ldap://db.debian.org/")
154 result = l.search_s("dc=debian,dc=org", ldap.SCOPE_SUBTREE,
155 "objectClass=debiandeveloper", ['uid', 'gecos', 'keyFingerPrint', 'activity-pgp', 'activity-from'])
156 for res in result:
157 uid = "ldap:" + res[1]['uid'][0]
158 gecos = "realname:" + res[1]['gecos'][0].split(',')[0].strip()
159 merge(uid, "email:" + uid[5:]+"@debian.org")
160 extraInfo(uid, gecos)
161 if res[1].has_key('activity-from'):
162 extraInfo(uid, "activity-from:"+res[1]['activity-from'][0])
163 if res[1].has_key('activity-pgp'):
164 extraInfo(uid, "activity-pgp:" +res[1]['activity-pgp'][0])
165 addName(gecos[9:], uid)
166 weakRef("uid:"+gecos[9:]+" <"+uid[5:]+"@debian.org>", uid)
167 extraInfo(uid, "x:ldap:realname:%s:%s" % (uid[5:], gecos[9:]))
168
169 if res[1].has_key('keyFingerPrint'):
170 for fpr in res[1]['keyFingerPrint']:
171 merge(uid, "gpg:" + fpr)
172 extraInfo(uid, "x:ldap:gpg:"+uid[5:] + ':' + fpr)
173
174
175 def getPackages():
176 packages = os.popen("/srv/qa.debian.org/data/ftp/get-packages \
177 -s unstable -a source")
178 parser = apt_pkg.TagFile(packages)
179 while parser.step():
180 package = parser.section.get("Package")
181 maintainers = [parser.section.get("Maintainer")]
182 uploaders = parser.section.get("Uploaders")
183 if uploaders:
184 maintainers += uploaders.split(",")
185 oldmaintainer = ''
186 for maintainer in maintainers:
187 if oldmaintainer:
188 maintainer = oldmaintainer + ', ' + maintainer
189 oldmaintainer = ''
190 if maintainer == None:
191 sys.stderr.write("No maintainer field for %s: %s; skipping\n" % (package, maintainer))
192 continue
193 maintainer = maintainer.strip()
194 maint, email = parseUid(maintainer)
195 if not email:
196 oldmaintainer = maintainer
197 #sys.stderr.write("Malformed maintainer field for %s: %s; skipping\n" \
198 # % (package, maintainer))
199 continue
200 email = "email:" + email
201 if package == None:
202 sys.stderr.write("No package field for %s: %s; skipping\n" % (maintainer, package))
203 continue
204 extraInfo(email, "maint:"+package)
205 weakRef("maint:"+package, email)
206 weakRef("uid:"+maintainer, email)
207 if maint:
208 extraInfo(email, "realname:"+maint)
209 addName(maint, email)
210 extraInfo(email, "x:"+"maint:"+package+":"+email+":realname:"+maint)
211 if packages.close() is not None:
212 raise IOError('Extracting package data failed')
213
214 def cleanUp():
215 for k, v in carnivore.iteritems():
216 if k.find("\n") >= 0:
217 sys.stderr.write("Aiee, newline in key %s (for value %s)!\n" % (k,v))
218 if isinstance(v, list):
219 # weak ref
220 S = set(v)
221 v[:] = []
222 for ref in S:
223 v.append(getCanonical(ref)[0])
224 # Second time, because now everything is canonicalized
225 S = set(v)
226 v[:] = []
227 for ref in S: v.append(ref)
228 v.sort()
229 elif isinstance(v, tuple):
230 for i in [1, 2]:
231 S = set(v[i])
232 v[i][:] = []
233 for item in S: v[i].append(item)
234 v[i].sort()
235
236 def writeUids(fd):
237 uids = []
238 for k, v in carnivore.iteritems():
239 if isinstance(v, list) and k[:4] == "uid:":
240 if len(v) != 1:
241 sys.stderr.write("Oops, a uid with multiple identities: "+k+"\n")
242 continue
243 uids.append((k[4:].lower().strip(), v[0]))
244 uids.sort()
245 lastV = uids[0][1]
246 for i in uids:
247 if i[1] != lastV:
248 fd.write("\n")
249 lastV = i[1]
250 fd.write(i[0] + ": "+i[1]+"\n")
251
252
253 def writeMiaEmails(fd):
254 emails = []
255 for k in carnivore.iterkeys():
256 if k[:6] == "email:":
257 fd.write(k[6:].replace('@', '=')+"\n")
258 elif k[:5] == "ldap:":
259 fd.write(k[5:]+"\n")
260
261 def writeReport(fd):
262 nopackages = []
263 for k, v in carnivore.iteritems():
264 if not isinstance(v, tuple):
265 continue
266 # So, we have a person
267 ldap, realname, email, package = [], [], [], []
268 extra, expl, warnings, mia = [], [], [], []
269 gecos = ""
270 keyring = {'keyring': [], 'emeritus': [], 'removed': [], 'ldap': [], 'dm':[]}
271 for item in set(v[1]+v[2]):
272 if item[:5] == "ldap:":
273 ldap.append(item[5:])
274 elif item[:6] == "email:":
275 email.append(item[6:])
276 elif item[:9] == "realname:":
277 realname.append(item[9:])
278 elif item[:6] == "maint:":
279 package.append(item[6:])
280 elif item[:2] == "x:":
281 expl.append(item[2:])
282 if item[:11] == "x:ldap:gpg:":
283 dummy, gpg = item[11:].split(':')
284 keyring['ldap'].append(gpg)
285 if item[:14] == "x:gpg:keyring:":
286 gpg, ring = item[14:].split(':')
287 keyring[ring].append(gpg)
288 if item[:16] == "x:ldap:realname:":
289 dummy, gecos = item[16:].split(':')
290 elif item == "mia":
291 mia.append('in-db')
292 else:
293 extra.append(item)
294 for ring in keyring.values(): ring.sort()
295 if len(ldap) > 1:
296 warnings.append("Multiple LDAP entries")
297 if keyring['emeritus'] and keyring['keyring']:
298 warnings.append("Both emeritus and active")
299 if keyring['ldap'] != keyring['keyring']:
300 warnings.append("Ldap doesn't match keys in keyring")
301 if keyring['ldap'] and not package:
302 mia.append('needs-wat')
303 if keyring['emeritus'] and package:
304 mia.append('emeritus-with-package')
305 if not keyring['emeritus'] and not keyring['keyring'] and \
306 keyring['removed'] and package:
307 mia.append('removed-with-package')
308 text = ""
309 if ldap: text += "DD: "+gecos+" <"+ldap[0]+"@debian.org>\n"
310 if realname: text += "Known as: "+join(', ', realname)+"\n"
311 if email: text += "Using emails: "+join(', ', email)+"\n"
312 for k, v in keyring.iteritems():
313 for key in v:
314 text += "Key in "+k+": "+key+"\n"
315 p = "0"
316 if package and len(package) <= 5:
317 p = "%s (%s)" % (len(package), join(", ", package))
318 elif package:
319 p = "%s (%s)" % (len(package), join(", ", package[:4]+["..."]))
320 text += "Packages: %s\n" % p
321 #if expl: text += "Extra: "+join(', ', expl)+"\n"
322 if mia: text += "X-MIA: "+join(', ', mia)+"\n"
323 for warn in warnings:
324 text += "X-Warning: "+warn+"\n"
325 fd.write(text+"\n")
326
327 #os.chdir('/srv/qa.debian.org/data/carnivore')
328 #try:
329 # os.mkdir('results')
330 #except OSError:
331 # pass
332 #os.chdir('results')
333
334 getLdap()
335 getKeyrings()
336 getPackages()
337 # merge extra IDs from file
338 if os.path.exists("associations"):
339 for line in open("associations"):
340 toks = line.strip().split()
341 if len(toks) != 2: continue
342 merge(toks[0], toks[1])
343 cleanUp()
344
345 out = open('uids.new', 'w')
346 writeUids(out)
347 out.close()
348 os.rename("uids.new", "uids")
349
350 out = open('mia-addresses.new', 'w')
351 out.write("""# mia-* suffices that are valid
352 #
353 # This file is automatically generated by
354 # /srv/qa.debian.org/data/carnivore/extract_data, and used by
355 # /etc/exim4/exim4.conf to decide what mia-*@qa.debian.org addresses are
356 # actually valid, in order to reject invalid mail
357 #
358 # Do not remove or change without coordinating with DSA
359
360 """)
361 writeMiaEmails(out)
362 out.close()
363 os.rename("mia-addresses.new", "mia-addresses")
364
365 out = open('report', 'w')
366 writeReport(out)
367 out.close()
368
369 for k, v in carnivore.iteritems():
370 print k
371 print v

Properties

Name Value
svn:eol-style native
svn:executable *
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.5