/[qa]/trunk/carnivore/extract_data
ViewVC logotype

Contents of /trunk/carnivore/extract_data

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2762 - (hide annotations) (download)
Mon May 28 08:41:12 2012 UTC (11 months, 3 weeks ago) by bartm
File size: 11046 byte(s)
Added Thomas Mueller to the list of names not to be merged.  Closes: #658457.
1 jeroen 1202 #!/usr/bin/python
2    
3     # Create a database of identities in Debian using various sources
4 jeroen 1238 # Copyright (C) 2005-2006 Jeroen van Wolffelaar <jeroen@wolffelaar.nl>
5 jeroen 1202 # $Id$
6    
7     # This program is free software; you can redistribute it and/or modify
8     # it under the terms of the GNU General Public License as published by
9     # the Free Software Foundation; either version 2 of the License, or
10     # (at your option) any later version.
11    
12     # This program is distributed in the hope that it will be useful,
13     # but WITHOUT ANY WARRANTY; without even the implied warranty of
14     # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15     # GNU General Public License for more details.
16    
17     # You should have received a copy of the GNU General Public License
18     # along with this program; if not, write to the Free Software
19     # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20    
21 jhr 2429 import os, sys, ldap
22 myon 2267 import os.path
23 jeroen 1202 import apt_pkg
24    
25 jeroen 1476 # activity-* fields from LDAP are private
26 myon 1481 # moved out of the data subdir and made world readable again -- Myon
27     os.umask(02)
28 jeroen 1476
29 jeroen 1202 # Carnivore works by having a dictionary from identifyers to identities. An
30     # identify is a tuple of (primary id, list-of-ids, list-of-extra-info).
31     # Instead, it also can be a string referring to another identity that is
32     # equivalent.
33     carnivore = {}
34    
35     def join(sep, items):
36     return reduce(lambda a, b: a+sep+b, items)
37    
38     def parseUid(uid):
39 jeroen 1238 uid = uid.strip()
40 jeroen 1202 # First, strip comment
41     s = uid.find('(')
42     e = uid.find(')')
43     if s >= 0 and e >= 0:
44     uid = uid[:s] + uid[e+1:]
45     s = uid.find('<')
46     e = uid.find('>')
47     email = None
48     if s >= 0 and e >= 0:
49     email = uid[s+1:e]
50     uid = uid[:s] + uid[e+1:]
51     uid = uid.strip()
52     if not email and uid.find('@') >= 0:
53     email, uid = uid, email
54     return (uid, email)
55    
56     def getCanonical(key):
57     if not carnivore.has_key(key):
58     return None
59     while 1:
60     key = carnivore[key]
61     if isinstance(key, tuple) or isinstance(key, list):
62     return key
63    
64     def merge(a, b):
65     ca = getCanonical(a)
66     cb = getCanonical(b)
67     if ca and ca == cb:
68     return ca
69     if not ca:
70     ca = (a, [a], [])
71     carnivore[a] = ca
72     if not cb:
73     cb = (b, [b], [])
74    
75     carnivore[cb[0]] = ca[0]
76     ca[1].extend(cb[1])
77     ca[2].extend(cb[2])
78     ca[2].sort()
79     return ca
80    
81     def addName(name, ref):
82     # merge with realname, TODO: support for blacklist for nonreal names
83 bartm 2762 if name in ['Brian Nelson', 'Luca Bruno', 'Andrew Ross', 'Matthew Vernon', 'Thomas Mueller', 'Thawte Freemail Member']:
84 jeroen 1202 extraInfo(ref, "realname:"+name)
85     else:
86     merge(ref, "realname:"+name)
87    
88     def extraInfo(key, info):
89     can = getCanonical(key)
90     if not can:
91     carnivore[key] = (key, [key], [info])
92     else:
93     can[2].append(info)
94    
95     def weakRef(key, target):
96     can = getCanonical(key)
97     if not can:
98     carnivore[key] = [target]
99     else:
100     can.append(target)
101    
102     def getKeyrings():
103 geissert 2390 """Extracts keys from various keyrings (DDs, DMs, emeritus and removed"""
104 jeroen 1202 for keyring, file in {'keyring': 'debian-keyring', \
105     'emeritus': 'emeritus-keyring', \
106     'removed': 'removed-keys' }.iteritems():
107     for type in [ 'gpg' , 'pgp' ]:
108 geissert 2738 parseKeyring("/srv/qa.debian.org/data/keyrings/keyrings/%s.%s" % (file, type), keyring)
109 geissert 2390 # Add DM keyring
110 geissert 2738 parseKeyring("/srv/qa.debian.org/data/keyrings/keyrings/debian-maintainers.gpg", "dm")
111 jeroen 1202
112 geissert 2390 def parseKeyring(keyring_file, keyring):
113     """Parses the given keyring_file, adding its keys to keyring set (used below)"""
114     contents = os.popen("gpg --no-default-keyring \
115     --no-expensive-trust-checks \
116     --keyring %s --list-keys \
117     --with-colons --fingerprint" % keyring_file)
118     fpr = None
119     entry = None
120     lastpub = None
121     for line in contents.readlines():
122     items = line.split(':')
123     uid = None
124     if items[0] == 'pub':
125     fpr = entry = None
126     lastpub = items[9].strip()
127     continue
128     elif items[0] == 'fpr':
129     fpr = "gpg:" + items[9].strip()
130     extraInfo(fpr, "x:gpg:keyring:"+fpr[4:] + ":" + keyring)
131     uid = lastpub
132     elif items[0] == 'uid':
133     uid = items[9].strip()
134     else:
135     continue
136     # Do stuff with 'uid'
137     weakRef("uid:"+uid, fpr)
138     uid, email = parseUid(uid)
139     if email:
140     email = "email:" + email
141     merge(fpr, email)
142     extraInfo(fpr, "x:"+fpr+":"+email)
143     if uid:
144     addName(uid, fpr)
145     extraInfo(fpr, "realname:"+uid)
146     extraInfo(fpr, "x:"+fpr+":realname:"+uid)
147 jwilk 2691 if contents.close() is not None:
148     raise IOError('Exporting %s keyring failed' % keyring_file)
149 geissert 2390
150 jeroen 1202 def getLdap():
151     l = ldap.initialize("ldap://db.debian.org/")
152     result = l.search_s("dc=debian,dc=org", ldap.SCOPE_SUBTREE,
153 jeroen 1475 "objectClass=debiandeveloper", ['uid', 'gecos', 'keyFingerPrint', 'activity-pgp', 'activity-from'])
154 jeroen 1202 for res in result:
155     uid = "ldap:" + res[1]['uid'][0]
156 jeroen 1238 gecos = "realname:" + res[1]['gecos'][0].split(',')[0].strip()
157 jeroen 1202 merge(uid, "email:" + uid[5:]+"@debian.org")
158     extraInfo(uid, gecos)
159 jeroen 1475 if res[1].has_key('activity-from'):
160     extraInfo(uid, "activity-from:"+res[1]['activity-from'][0])
161     if res[1].has_key('activity-pgp'):
162     extraInfo(uid, "activity-pgp:" +res[1]['activity-pgp'][0])
163 jeroen 1202 addName(gecos[9:], uid)
164     weakRef("uid:"+gecos[9:]+" <"+uid[5:]+"@debian.org>", uid)
165     extraInfo(uid, "x:ldap:realname:%s:%s" % (uid[5:], gecos[9:]))
166    
167     if res[1].has_key('keyFingerPrint'):
168     for fpr in res[1]['keyFingerPrint']:
169     merge(uid, "gpg:" + fpr)
170     extraInfo(uid, "x:ldap:gpg:"+uid[5:] + ':' + fpr)
171    
172    
173     def getPackages():
174 geissert 2738 packages = os.popen("/srv/qa.debian.org/data/ftp/get-packages \
175 jeroen 1202 -s unstable -a source")
176 jhr 2430 parser = apt_pkg.TagFile(packages)
177     while parser.step():
178     package = parser.section.get("Package")
179     maintainers = [parser.section.get("Maintainer")]
180     uploaders = parser.section.get("Uploaders")
181 jeroen 1202 if uploaders:
182     maintainers += uploaders.split(",")
183 luk 1907 oldmaintainer = ''
184 jeroen 1202 for maintainer in maintainers:
185 luk 1907 if oldmaintainer:
186     maintainer = oldmaintainer + ', ' + maintainer
187     oldmaintainer = ''
188 myon 1847 if maintainer == None:
189     sys.stderr.write("No maintainer field for %s: %s; skipping\n" % (package, maintainer))
190     continue
191 jeroen 1238 maintainer = maintainer.strip()
192 jeroen 1202 maint, email = parseUid(maintainer)
193     if not email:
194 luk 1907 oldmaintainer = maintainer
195     #sys.stderr.write("Malformed maintainer field for %s: %s; skipping\n" \
196     # % (package, maintainer))
197 jeroen 1202 continue
198     email = "email:" + email
199 myon 1847 if package == None:
200     sys.stderr.write("No package field for %s: %s; skipping\n" % (maintainer, package))
201     continue
202 jeroen 1202 extraInfo(email, "maint:"+package)
203 jeroen 1239 weakRef("maint:"+package, email)
204 jeroen 1202 weakRef("uid:"+maintainer, email)
205 jeroen 1318 if maint:
206     extraInfo(email, "realname:"+maint)
207     addName(maint, email)
208     extraInfo(email, "x:"+"maint:"+package+":"+email+":realname:"+maint)
209 jwilk 2691 if packages.close() is not None:
210     raise IOError('Extracting package data failed')
211 jeroen 1202
212     def cleanUp():
213     for k, v in carnivore.iteritems():
214 jeroen 1238 if k.find("\n") >= 0:
215     sys.stderr.write("Aiee, newline in key %s (for value %s)!\n" % (k,v))
216 jeroen 1202 if isinstance(v, list):
217     # weak ref
218 jhr 2429 S = set(v)
219 jeroen 1202 v[:] = []
220     for ref in S:
221     v.append(getCanonical(ref)[0])
222     # Second time, because now everything is canonicalized
223 jhr 2429 S = set(v)
224 jeroen 1202 v[:] = []
225     for ref in S: v.append(ref)
226     v.sort()
227     elif isinstance(v, tuple):
228     for i in [1, 2]:
229 jhr 2429 S = set(v[i])
230 jeroen 1202 v[i][:] = []
231     for item in S: v[i].append(item)
232     v[i].sort()
233    
234     def writeUids(fd):
235     uids = []
236     for k, v in carnivore.iteritems():
237     if isinstance(v, list) and k[:4] == "uid:":
238     if len(v) != 1:
239     sys.stderr.write("Oops, a uid with multiple identities: "+k+"\n")
240     continue
241     uids.append((k[4:].lower().strip(), v[0]))
242     uids.sort()
243     lastV = uids[0][1]
244     for i in uids:
245     if i[1] != lastV:
246     fd.write("\n")
247     lastV = i[1]
248     fd.write(i[0] + ": "+i[1]+"\n")
249    
250 jeroen 1280
251     def writeMiaEmails(fd):
252     emails = []
253     for k in carnivore.iterkeys():
254     if k[:6] == "email:":
255     fd.write(k[6:].replace('@', '=')+"\n")
256     elif k[:5] == "ldap:":
257     fd.write(k[5:]+"\n")
258    
259 jeroen 1202 def writeReport(fd):
260     nopackages = []
261     for k, v in carnivore.iteritems():
262     if not isinstance(v, tuple):
263     continue
264     # So, we have a person
265     ldap, realname, email, package = [], [], [], []
266     extra, expl, warnings, mia = [], [], [], []
267 tbm 1209 gecos = ""
268 geissert 2390 keyring = {'keyring': [], 'emeritus': [], 'removed': [], 'ldap': [], 'dm':[]}
269 jhr 2429 for item in set(v[1]+v[2]):
270 jeroen 1202 if item[:5] == "ldap:":
271     ldap.append(item[5:])
272     elif item[:6] == "email:":
273     email.append(item[6:])
274     elif item[:9] == "realname:":
275     realname.append(item[9:])
276     elif item[:6] == "maint:":
277     package.append(item[6:])
278     elif item[:2] == "x:":
279     expl.append(item[2:])
280     if item[:11] == "x:ldap:gpg:":
281     dummy, gpg = item[11:].split(':')
282     keyring['ldap'].append(gpg)
283     if item[:14] == "x:gpg:keyring:":
284     gpg, ring = item[14:].split(':')
285     keyring[ring].append(gpg)
286     if item[:16] == "x:ldap:realname:":
287     dummy, gecos = item[16:].split(':')
288     elif item == "mia":
289     mia.append('in-db')
290     else:
291     extra.append(item)
292     for ring in keyring.values(): ring.sort()
293     if len(ldap) > 1:
294     warnings.append("Multiple LDAP entries")
295     if keyring['emeritus'] and keyring['keyring']:
296     warnings.append("Both emeritus and active")
297     if keyring['ldap'] != keyring['keyring']:
298     warnings.append("Ldap doesn't match keys in keyring")
299     if keyring['ldap'] and not package:
300     mia.append('needs-wat')
301     if keyring['emeritus'] and package:
302     mia.append('emeritus-with-package')
303     if not keyring['emeritus'] and not keyring['keyring'] and \
304     keyring['removed'] and package:
305     mia.append('removed-with-package')
306     text = ""
307     if ldap: text += "DD: "+gecos+" <"+ldap[0]+"@debian.org>\n"
308     if realname: text += "Known as: "+join(', ', realname)+"\n"
309     if email: text += "Using emails: "+join(', ', email)+"\n"
310     for k, v in keyring.iteritems():
311     for key in v:
312     text += "Key in "+k+": "+key+"\n"
313     p = "0"
314     if package and len(package) <= 5:
315     p = "%s (%s)" % (len(package), join(", ", package))
316     elif package:
317     p = "%s (%s)" % (len(package), join(", ", package[:4]+["..."]))
318     text += "Packages: %s\n" % p
319     #if expl: text += "Extra: "+join(', ', expl)+"\n"
320     if mia: text += "X-MIA: "+join(', ', mia)+"\n"
321     for warn in warnings:
322     text += "X-Warning: "+warn+"\n"
323     fd.write(text+"\n")
324    
325 geissert 2738 #os.chdir('/srv/qa.debian.org/data/carnivore')
326 jeroen 1202 #try:
327     # os.mkdir('results')
328     #except OSError:
329     # pass
330     #os.chdir('results')
331    
332     getLdap()
333     getKeyrings()
334     getPackages()
335 myon 2267 # merge extra IDs from file
336     if os.path.exists("associations"):
337     for line in open("associations"):
338     toks = line.strip().split()
339     if len(toks) != 2: continue
340     merge(toks[0], toks[1])
341 jeroen 1202 cleanUp()
342    
343     out = open('uids.new', 'w')
344     writeUids(out)
345     out.close()
346     os.rename("uids.new", "uids")
347    
348 jeroen 1280 out = open('mia-addresses.new', 'w')
349     out.write("""# mia-* suffices that are valid
350     #
351     # This file is automatically generated by
352 geissert 2738 # /srv/qa.debian.org/data/carnivore/extract_data, and used by
353 jeroen 1280 # /etc/exim4/exim4.conf to decide what mia-*@qa.debian.org addresses are
354     # actually valid, in order to reject invalid mail
355     #
356     # Do not remove or change without coordinating with DSA
357    
358     """)
359     writeMiaEmails(out)
360     out.close()
361     os.rename("mia-addresses.new", "mia-addresses")
362    
363 jeroen 1202 out = open('report', 'w')
364     writeReport(out)
365     out.close()
366    
367     for k, v in carnivore.iteritems():
368     print k
369     print v

Properties

Name Value
svn:eol-style native
svn:executable *
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.5