/[qa]/trunk/carnivore/extract_data
ViewVC logotype

Contents of /trunk/carnivore/extract_data

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2390 - (show annotations) (download)
Wed Jul 28 20:16:21 2010 UTC (2 years, 9 months ago) by geissert
File size: 10868 byte(s)
Know about DM's gpg keys in carnivore (Closes: #501182)

Based on patch by Sandro Tosi
1 #!/usr/bin/python
2
3 # Create a database of identities in Debian using various sources
4 # Copyright (C) 2005-2006 Jeroen van Wolffelaar <jeroen@wolffelaar.nl>
5 # $Id$
6
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
11
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
16
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
21 import os, sys, sets, ldap
22 import os.path
23 import apt_pkg
24
25 # activity-* fields from LDAP are private
26 # moved out of the data subdir and made world readable again -- Myon
27 os.umask(02)
28
29 # Carnivore works by having a dictionary from identifyers to identities. An
30 # identify is a tuple of (primary id, list-of-ids, list-of-extra-info).
31 # Instead, it also can be a string referring to another identity that is
32 # equivalent.
33 carnivore = {}
34
35 def join(sep, items):
36 return reduce(lambda a, b: a+sep+b, items)
37
38 def parseUid(uid):
39 uid = uid.strip()
40 # First, strip comment
41 s = uid.find('(')
42 e = uid.find(')')
43 if s >= 0 and e >= 0:
44 uid = uid[:s] + uid[e+1:]
45 s = uid.find('<')
46 e = uid.find('>')
47 email = None
48 if s >= 0 and e >= 0:
49 email = uid[s+1:e]
50 uid = uid[:s] + uid[e+1:]
51 uid = uid.strip()
52 if not email and uid.find('@') >= 0:
53 email, uid = uid, email
54 return (uid, email)
55
56 def getCanonical(key):
57 if not carnivore.has_key(key):
58 return None
59 while 1:
60 key = carnivore[key]
61 if isinstance(key, tuple) or isinstance(key, list):
62 return key
63
64 def merge(a, b):
65 ca = getCanonical(a)
66 cb = getCanonical(b)
67 if ca and ca == cb:
68 return ca
69 if not ca:
70 ca = (a, [a], [])
71 carnivore[a] = ca
72 if not cb:
73 cb = (b, [b], [])
74
75 carnivore[cb[0]] = ca[0]
76 ca[1].extend(cb[1])
77 ca[2].extend(cb[2])
78 ca[2].sort()
79 return ca
80
81 def addName(name, ref):
82 # merge with realname, TODO: support for blacklist for nonreal names
83 if name in ['Brian Nelson', 'Thawte Freemail Member']:
84 extraInfo(ref, "realname:"+name)
85 else:
86 merge(ref, "realname:"+name)
87
88 def extraInfo(key, info):
89 can = getCanonical(key)
90 if not can:
91 carnivore[key] = (key, [key], [info])
92 else:
93 can[2].append(info)
94
95 def weakRef(key, target):
96 can = getCanonical(key)
97 if not can:
98 carnivore[key] = [target]
99 else:
100 can.append(target)
101
102 def getKeyrings():
103 """Extracts keys from various keyrings (DDs, DMs, emeritus and removed"""
104 for keyring, file in {'keyring': 'debian-keyring', \
105 'emeritus': 'emeritus-keyring', \
106 'removed': 'removed-keys' }.iteritems():
107 for type in [ 'gpg' , 'pgp' ]:
108 parseKeyring("/org/qa.debian.org/data/keyrings/keyrings/%s.%s" % (file, type), keyring)
109 # Add DM keyring
110 parseKeyring("/org/qa.debian.org/data/keyrings/keyrings/debian-maintainers.gpg", "dm")
111
112 def parseKeyring(keyring_file, keyring):
113 """Parses the given keyring_file, adding its keys to keyring set (used below)"""
114 contents = os.popen("gpg --no-default-keyring \
115 --no-expensive-trust-checks \
116 --keyring %s --list-keys \
117 --with-colons --fingerprint" % keyring_file)
118 fpr = None
119 entry = None
120 lastpub = None
121 for line in contents.readlines():
122 items = line.split(':')
123 uid = None
124 if items[0] == 'pub':
125 fpr = entry = None
126 lastpub = items[9].strip()
127 continue
128 elif items[0] == 'fpr':
129 fpr = "gpg:" + items[9].strip()
130 extraInfo(fpr, "x:gpg:keyring:"+fpr[4:] + ":" + keyring)
131 uid = lastpub
132 elif items[0] == 'uid':
133 uid = items[9].strip()
134 else:
135 continue
136 # Do stuff with 'uid'
137 weakRef("uid:"+uid, fpr)
138 uid, email = parseUid(uid)
139 if email:
140 email = "email:" + email
141 merge(fpr, email)
142 extraInfo(fpr, "x:"+fpr+":"+email)
143 if uid:
144 addName(uid, fpr)
145 extraInfo(fpr, "realname:"+uid)
146 extraInfo(fpr, "x:"+fpr+":realname:"+uid)
147 contents.close()
148
149 def getLdap():
150 l = ldap.initialize("ldap://db.debian.org/")
151 result = l.search_s("dc=debian,dc=org", ldap.SCOPE_SUBTREE,
152 "objectClass=debiandeveloper", ['uid', 'gecos', 'keyFingerPrint', 'activity-pgp', 'activity-from'])
153 for res in result:
154 uid = "ldap:" + res[1]['uid'][0]
155 gecos = "realname:" + res[1]['gecos'][0].split(',')[0].strip()
156 merge(uid, "email:" + uid[5:]+"@debian.org")
157 extraInfo(uid, gecos)
158 if res[1].has_key('activity-from'):
159 extraInfo(uid, "activity-from:"+res[1]['activity-from'][0])
160 if res[1].has_key('activity-pgp'):
161 extraInfo(uid, "activity-pgp:" +res[1]['activity-pgp'][0])
162 addName(gecos[9:], uid)
163 weakRef("uid:"+gecos[9:]+" <"+uid[5:]+"@debian.org>", uid)
164 extraInfo(uid, "x:ldap:realname:%s:%s" % (uid[5:], gecos[9:]))
165
166 if res[1].has_key('keyFingerPrint'):
167 for fpr in res[1]['keyFingerPrint']:
168 merge(uid, "gpg:" + fpr)
169 extraInfo(uid, "x:ldap:gpg:"+uid[5:] + ':' + fpr)
170
171
172 def getPackages():
173 packages = os.popen("/org/qa.debian.org/data/ftp/get-packages \
174 -s unstable -a source")
175 parser = apt_pkg.ParseTagFile(packages)
176 while parser.Step():
177 package = parser.Section.get("Package")
178 maintainers = [parser.Section.get("Maintainer")]
179 uploaders = parser.Section.get("Uploaders")
180 if uploaders:
181 maintainers += uploaders.split(",")
182 oldmaintainer = ''
183 for maintainer in maintainers:
184 if oldmaintainer:
185 maintainer = oldmaintainer + ', ' + maintainer
186 oldmaintainer = ''
187 if maintainer == None:
188 sys.stderr.write("No maintainer field for %s: %s; skipping\n" % (package, maintainer))
189 continue
190 maintainer = maintainer.strip()
191 maint, email = parseUid(maintainer)
192 if not email:
193 oldmaintainer = maintainer
194 #sys.stderr.write("Malformed maintainer field for %s: %s; skipping\n" \
195 # % (package, maintainer))
196 continue
197 email = "email:" + email
198 if package == None:
199 sys.stderr.write("No package field for %s: %s; skipping\n" % (maintainer, package))
200 continue
201 extraInfo(email, "maint:"+package)
202 weakRef("maint:"+package, email)
203 weakRef("uid:"+maintainer, email)
204 if maint:
205 extraInfo(email, "realname:"+maint)
206 addName(maint, email)
207 extraInfo(email, "x:"+"maint:"+package+":"+email+":realname:"+maint)
208 packages.close()
209
210 def cleanUp():
211 for k, v in carnivore.iteritems():
212 if k.find("\n") >= 0:
213 sys.stderr.write("Aiee, newline in key %s (for value %s)!\n" % (k,v))
214 if isinstance(v, list):
215 # weak ref
216 S = sets.Set(v)
217 v[:] = []
218 for ref in S:
219 v.append(getCanonical(ref)[0])
220 # Second time, because now everything is canonicalized
221 S = sets.Set(v)
222 v[:] = []
223 for ref in S: v.append(ref)
224 v.sort()
225 elif isinstance(v, tuple):
226 for i in [1, 2]:
227 S = sets.Set(v[i])
228 v[i][:] = []
229 for item in S: v[i].append(item)
230 v[i].sort()
231
232 def writeUids(fd):
233 uids = []
234 for k, v in carnivore.iteritems():
235 if isinstance(v, list) and k[:4] == "uid:":
236 if len(v) != 1:
237 sys.stderr.write("Oops, a uid with multiple identities: "+k+"\n")
238 continue
239 uids.append((k[4:].lower().strip(), v[0]))
240 uids.sort()
241 lastV = uids[0][1]
242 for i in uids:
243 if i[1] != lastV:
244 fd.write("\n")
245 lastV = i[1]
246 fd.write(i[0] + ": "+i[1]+"\n")
247
248
249 def writeMiaEmails(fd):
250 emails = []
251 for k in carnivore.iterkeys():
252 if k[:6] == "email:":
253 fd.write(k[6:].replace('@', '=')+"\n")
254 elif k[:5] == "ldap:":
255 fd.write(k[5:]+"\n")
256
257 def writeReport(fd):
258 nopackages = []
259 for k, v in carnivore.iteritems():
260 if not isinstance(v, tuple):
261 continue
262 # So, we have a person
263 ldap, realname, email, package = [], [], [], []
264 extra, expl, warnings, mia = [], [], [], []
265 gecos = ""
266 keyring = {'keyring': [], 'emeritus': [], 'removed': [], 'ldap': [], 'dm':[]}
267 for item in sets.Set(v[1]+v[2]):
268 if item[:5] == "ldap:":
269 ldap.append(item[5:])
270 elif item[:6] == "email:":
271 email.append(item[6:])
272 elif item[:9] == "realname:":
273 realname.append(item[9:])
274 elif item[:6] == "maint:":
275 package.append(item[6:])
276 elif item[:2] == "x:":
277 expl.append(item[2:])
278 if item[:11] == "x:ldap:gpg:":
279 dummy, gpg = item[11:].split(':')
280 keyring['ldap'].append(gpg)
281 if item[:14] == "x:gpg:keyring:":
282 gpg, ring = item[14:].split(':')
283 keyring[ring].append(gpg)
284 if item[:16] == "x:ldap:realname:":
285 dummy, gecos = item[16:].split(':')
286 elif item == "mia":
287 mia.append('in-db')
288 else:
289 extra.append(item)
290 for ring in keyring.values(): ring.sort()
291 if len(ldap) > 1:
292 warnings.append("Multiple LDAP entries")
293 if keyring['emeritus'] and keyring['keyring']:
294 warnings.append("Both emeritus and active")
295 if keyring['ldap'] != keyring['keyring']:
296 warnings.append("Ldap doesn't match keys in keyring")
297 if keyring['ldap'] and not package:
298 mia.append('needs-wat')
299 if keyring['emeritus'] and package:
300 mia.append('emeritus-with-package')
301 if not keyring['emeritus'] and not keyring['keyring'] and \
302 keyring['removed'] and package:
303 mia.append('removed-with-package')
304 text = ""
305 if ldap: text += "DD: "+gecos+" <"+ldap[0]+"@debian.org>\n"
306 if realname: text += "Known as: "+join(', ', realname)+"\n"
307 if email: text += "Using emails: "+join(', ', email)+"\n"
308 for k, v in keyring.iteritems():
309 for key in v:
310 text += "Key in "+k+": "+key+"\n"
311 p = "0"
312 if package and len(package) <= 5:
313 p = "%s (%s)" % (len(package), join(", ", package))
314 elif package:
315 p = "%s (%s)" % (len(package), join(", ", package[:4]+["..."]))
316 text += "Packages: %s\n" % p
317 #if expl: text += "Extra: "+join(', ', expl)+"\n"
318 if mia: text += "X-MIA: "+join(', ', mia)+"\n"
319 for warn in warnings:
320 text += "X-Warning: "+warn+"\n"
321 fd.write(text+"\n")
322
323 #os.chdir('/org/qa.debian.org/data/carnivore')
324 #try:
325 # os.mkdir('results')
326 #except OSError:
327 # pass
328 #os.chdir('results')
329
330 getLdap()
331 getKeyrings()
332 getPackages()
333 # merge extra IDs from file
334 if os.path.exists("associations"):
335 for line in open("associations"):
336 toks = line.strip().split()
337 if len(toks) != 2: continue
338 merge(toks[0], toks[1])
339 cleanUp()
340
341 out = open('uids.new', 'w')
342 writeUids(out)
343 out.close()
344 os.rename("uids.new", "uids")
345
346 out = open('mia-addresses.new', 'w')
347 out.write("""# mia-* suffices that are valid
348 #
349 # This file is automatically generated by
350 # /org/qa.debian.org/data/carnivore/extract_data, and used by
351 # /etc/exim4/exim4.conf to decide what mia-*@qa.debian.org addresses are
352 # actually valid, in order to reject invalid mail
353 #
354 # Do not remove or change without coordinating with DSA
355
356 """)
357 writeMiaEmails(out)
358 out.close()
359 os.rename("mia-addresses.new", "mia-addresses")
360
361 out = open('report', 'w')
362 writeReport(out)
363 out.close()
364
365 for k, v in carnivore.iteritems():
366 print k
367 print v

Properties

Name Value
svn:eol-style native
svn:executable *
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.5