/[qa]/trunk/carnivore/extract_data
ViewVC logotype

Contents of /trunk/carnivore/extract_data

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2267 - (show annotations) (download)
Thu Sep 3 14:54:31 2009 UTC (3 years, 9 months ago) by myon
File size: 10576 byte(s)
move list of IDs to merge to separate file. By Enrico Zini.
1 #!/usr/bin/python
2
3 # Create a database of identities in Debian using various sources
4 # Copyright (C) 2005-2006 Jeroen van Wolffelaar <jeroen@wolffelaar.nl>
5 # $Id$
6
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
11
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
16
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
21 import os, sys, sets, ldap
22 import os.path
23 import apt_pkg
24
25 # activity-* fields from LDAP are private
26 # moved out of the data subdir and made world readable again -- Myon
27 os.umask(02)
28
29 # Carnivore works by having a dictionary from identifyers to identities. An
30 # identify is a tuple of (primary id, list-of-ids, list-of-extra-info).
31 # Instead, it also can be a string referring to another identity that is
32 # equivalent.
33 carnivore = {}
34
35 def join(sep, items):
36 return reduce(lambda a, b: a+sep+b, items)
37
38 def parseUid(uid):
39 uid = uid.strip()
40 # First, strip comment
41 s = uid.find('(')
42 e = uid.find(')')
43 if s >= 0 and e >= 0:
44 uid = uid[:s] + uid[e+1:]
45 s = uid.find('<')
46 e = uid.find('>')
47 email = None
48 if s >= 0 and e >= 0:
49 email = uid[s+1:e]
50 uid = uid[:s] + uid[e+1:]
51 uid = uid.strip()
52 if not email and uid.find('@') >= 0:
53 email, uid = uid, email
54 return (uid, email)
55
56 def getCanonical(key):
57 if not carnivore.has_key(key):
58 return None
59 while 1:
60 key = carnivore[key]
61 if isinstance(key, tuple) or isinstance(key, list):
62 return key
63
64 def merge(a, b):
65 ca = getCanonical(a)
66 cb = getCanonical(b)
67 if ca and ca == cb:
68 return ca
69 if not ca:
70 ca = (a, [a], [])
71 carnivore[a] = ca
72 if not cb:
73 cb = (b, [b], [])
74
75 carnivore[cb[0]] = ca[0]
76 ca[1].extend(cb[1])
77 ca[2].extend(cb[2])
78 ca[2].sort()
79 return ca
80
81 def addName(name, ref):
82 # merge with realname, TODO: support for blacklist for nonreal names
83 if name in ['Brian Nelson', 'Thawte Freemail Member']:
84 extraInfo(ref, "realname:"+name)
85 else:
86 merge(ref, "realname:"+name)
87
88 def extraInfo(key, info):
89 can = getCanonical(key)
90 if not can:
91 carnivore[key] = (key, [key], [info])
92 else:
93 can[2].append(info)
94
95 def weakRef(key, target):
96 can = getCanonical(key)
97 if not can:
98 carnivore[key] = [target]
99 else:
100 can.append(target)
101
102 def getKeyrings():
103 for keyring, file in {'keyring': 'debian-keyring', \
104 'emeritus': 'emeritus-keyring', \
105 'removed': 'removed-keys' }.iteritems():
106 for type in [ 'gpg' , 'pgp' ]:
107 contents = os.popen("gpg --no-default-keyring \
108 --no-expensive-trust-checks \
109 --keyring /org/qa.debian.org/data/keyrings/keyrings/%s.%s --list-keys \
110 --with-colons --fingerprint" % (file, type))
111 fpr = None
112 entry = None
113 lastpub = None
114 for line in contents.readlines():
115 items = line.split(':')
116 uid = None
117 if items[0] == 'pub':
118 fpr = entry = None
119 lastpub = items[9].strip()
120 continue
121 elif items[0] == 'fpr':
122 fpr = "gpg:" + items[9].strip()
123 extraInfo(fpr, "x:gpg:keyring:"+fpr[4:] + ":" + keyring)
124 uid = lastpub
125 elif items[0] == 'uid':
126 uid = items[9].strip()
127 else:
128 continue
129 # Do stuff with 'uid'
130 weakRef("uid:"+uid, fpr)
131 uid, email = parseUid(uid)
132 if email:
133 email = "email:" + email
134 merge(fpr, email)
135 extraInfo(fpr, "x:"+fpr+":"+email)
136 if uid:
137 addName(uid, fpr)
138 extraInfo(fpr, "realname:"+uid)
139 extraInfo(fpr, "x:"+fpr+":realname:"+uid)
140 contents.close()
141
142 def getLdap():
143 l = ldap.initialize("ldap://db.debian.org/")
144 result = l.search_s("dc=debian,dc=org", ldap.SCOPE_SUBTREE,
145 "objectClass=debiandeveloper", ['uid', 'gecos', 'keyFingerPrint', 'activity-pgp', 'activity-from'])
146 for res in result:
147 uid = "ldap:" + res[1]['uid'][0]
148 gecos = "realname:" + res[1]['gecos'][0].split(',')[0].strip()
149 merge(uid, "email:" + uid[5:]+"@debian.org")
150 extraInfo(uid, gecos)
151 if res[1].has_key('activity-from'):
152 extraInfo(uid, "activity-from:"+res[1]['activity-from'][0])
153 if res[1].has_key('activity-pgp'):
154 extraInfo(uid, "activity-pgp:" +res[1]['activity-pgp'][0])
155 addName(gecos[9:], uid)
156 weakRef("uid:"+gecos[9:]+" <"+uid[5:]+"@debian.org>", uid)
157 extraInfo(uid, "x:ldap:realname:%s:%s" % (uid[5:], gecos[9:]))
158
159 if res[1].has_key('keyFingerPrint'):
160 for fpr in res[1]['keyFingerPrint']:
161 merge(uid, "gpg:" + fpr)
162 extraInfo(uid, "x:ldap:gpg:"+uid[5:] + ':' + fpr)
163
164
165 def getPackages():
166 packages = os.popen("/org/qa.debian.org/data/ftp/get-packages \
167 -s unstable -a source")
168 parser = apt_pkg.ParseTagFile(packages)
169 while parser.Step():
170 package = parser.Section.get("Package")
171 maintainers = [parser.Section.get("Maintainer")]
172 uploaders = parser.Section.get("Uploaders")
173 if uploaders:
174 maintainers += uploaders.split(",")
175 oldmaintainer = ''
176 for maintainer in maintainers:
177 if oldmaintainer:
178 maintainer = oldmaintainer + ', ' + maintainer
179 oldmaintainer = ''
180 if maintainer == None:
181 sys.stderr.write("No maintainer field for %s: %s; skipping\n" % (package, maintainer))
182 continue
183 maintainer = maintainer.strip()
184 maint, email = parseUid(maintainer)
185 if not email:
186 oldmaintainer = maintainer
187 #sys.stderr.write("Malformed maintainer field for %s: %s; skipping\n" \
188 # % (package, maintainer))
189 continue
190 email = "email:" + email
191 if package == None:
192 sys.stderr.write("No package field for %s: %s; skipping\n" % (maintainer, package))
193 continue
194 extraInfo(email, "maint:"+package)
195 weakRef("maint:"+package, email)
196 weakRef("uid:"+maintainer, email)
197 if maint:
198 extraInfo(email, "realname:"+maint)
199 addName(maint, email)
200 extraInfo(email, "x:"+"maint:"+package+":"+email+":realname:"+maint)
201 packages.close()
202
203 def cleanUp():
204 for k, v in carnivore.iteritems():
205 if k.find("\n") >= 0:
206 sys.stderr.write("Aiee, newline in key %s (for value %s)!\n" % (k,v))
207 if isinstance(v, list):
208 # weak ref
209 S = sets.Set(v)
210 v[:] = []
211 for ref in S:
212 v.append(getCanonical(ref)[0])
213 # Second time, because now everything is canonicalized
214 S = sets.Set(v)
215 v[:] = []
216 for ref in S: v.append(ref)
217 v.sort()
218 elif isinstance(v, tuple):
219 for i in [1, 2]:
220 S = sets.Set(v[i])
221 v[i][:] = []
222 for item in S: v[i].append(item)
223 v[i].sort()
224
225 def writeUids(fd):
226 uids = []
227 for k, v in carnivore.iteritems():
228 if isinstance(v, list) and k[:4] == "uid:":
229 if len(v) != 1:
230 sys.stderr.write("Oops, a uid with multiple identities: "+k+"\n")
231 continue
232 uids.append((k[4:].lower().strip(), v[0]))
233 uids.sort()
234 lastV = uids[0][1]
235 for i in uids:
236 if i[1] != lastV:
237 fd.write("\n")
238 lastV = i[1]
239 fd.write(i[0] + ": "+i[1]+"\n")
240
241
242 def writeMiaEmails(fd):
243 emails = []
244 for k in carnivore.iterkeys():
245 if k[:6] == "email:":
246 fd.write(k[6:].replace('@', '=')+"\n")
247 elif k[:5] == "ldap:":
248 fd.write(k[5:]+"\n")
249
250 def writeReport(fd):
251 nopackages = []
252 for k, v in carnivore.iteritems():
253 if not isinstance(v, tuple):
254 continue
255 # So, we have a person
256 ldap, realname, email, package = [], [], [], []
257 extra, expl, warnings, mia = [], [], [], []
258 gecos = ""
259 keyring = {'keyring': [], 'emeritus': [], 'removed': [], 'ldap': []}
260 for item in sets.Set(v[1]+v[2]):
261 if item[:5] == "ldap:":
262 ldap.append(item[5:])
263 elif item[:6] == "email:":
264 email.append(item[6:])
265 elif item[:9] == "realname:":
266 realname.append(item[9:])
267 elif item[:6] == "maint:":
268 package.append(item[6:])
269 elif item[:2] == "x:":
270 expl.append(item[2:])
271 if item[:11] == "x:ldap:gpg:":
272 dummy, gpg = item[11:].split(':')
273 keyring['ldap'].append(gpg)
274 if item[:14] == "x:gpg:keyring:":
275 gpg, ring = item[14:].split(':')
276 keyring[ring].append(gpg)
277 if item[:16] == "x:ldap:realname:":
278 dummy, gecos = item[16:].split(':')
279 elif item == "mia":
280 mia.append('in-db')
281 else:
282 extra.append(item)
283 for ring in keyring.values(): ring.sort()
284 if len(ldap) > 1:
285 warnings.append("Multiple LDAP entries")
286 if keyring['emeritus'] and keyring['keyring']:
287 warnings.append("Both emeritus and active")
288 if keyring['ldap'] != keyring['keyring']:
289 warnings.append("Ldap doesn't match keys in keyring")
290 if keyring['ldap'] and not package:
291 mia.append('needs-wat')
292 if keyring['emeritus'] and package:
293 mia.append('emeritus-with-package')
294 if not keyring['emeritus'] and not keyring['keyring'] and \
295 keyring['removed'] and package:
296 mia.append('removed-with-package')
297 text = ""
298 if ldap: text += "DD: "+gecos+" <"+ldap[0]+"@debian.org>\n"
299 if realname: text += "Known as: "+join(', ', realname)+"\n"
300 if email: text += "Using emails: "+join(', ', email)+"\n"
301 for k, v in keyring.iteritems():
302 for key in v:
303 text += "Key in "+k+": "+key+"\n"
304 p = "0"
305 if package and len(package) <= 5:
306 p = "%s (%s)" % (len(package), join(", ", package))
307 elif package:
308 p = "%s (%s)" % (len(package), join(", ", package[:4]+["..."]))
309 text += "Packages: %s\n" % p
310 #if expl: text += "Extra: "+join(', ', expl)+"\n"
311 if mia: text += "X-MIA: "+join(', ', mia)+"\n"
312 for warn in warnings:
313 text += "X-Warning: "+warn+"\n"
314 fd.write(text+"\n")
315
316 #os.chdir('/org/qa.debian.org/data/carnivore')
317 #try:
318 # os.mkdir('results')
319 #except OSError:
320 # pass
321 #os.chdir('results')
322
323 getLdap()
324 getKeyrings()
325 getPackages()
326 # merge extra IDs from file
327 if os.path.exists("associations"):
328 for line in open("associations"):
329 toks = line.strip().split()
330 if len(toks) != 2: continue
331 merge(toks[0], toks[1])
332 cleanUp()
333
334 out = open('uids.new', 'w')
335 writeUids(out)
336 out.close()
337 os.rename("uids.new", "uids")
338
339 out = open('mia-addresses.new', 'w')
340 out.write("""# mia-* suffices that are valid
341 #
342 # This file is automatically generated by
343 # /org/qa.debian.org/data/carnivore/extract_data, and used by
344 # /etc/exim4/exim4.conf to decide what mia-*@qa.debian.org addresses are
345 # actually valid, in order to reject invalid mail
346 #
347 # Do not remove or change without coordinating with DSA
348
349 """)
350 writeMiaEmails(out)
351 out.close()
352 os.rename("mia-addresses.new", "mia-addresses")
353
354 out = open('report', 'w')
355 writeReport(out)
356 out.close()
357
358 for k, v in carnivore.iteritems():
359 print k
360 print v

Properties

Name Value
svn:eol-style native
svn:executable *
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.5