/[collab-qa]/udd/udd/carnivore_gatherer.py
ViewVC logotype

Contents of /udd/udd/carnivore_gatherer.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1106 - (show annotations) (download) (as text)
Sun Aug 17 12:24:24 2008 UTC (4 years, 10 months ago) by neronus-guest
Original Path: udd/src/udd/carnivore_gatherer.py
File MIME type: text/x-python
File size: 4484 byte(s)
Added command 'tables' which returns the tables used by the given source
1 #!/usr/bin/env python
2
3 """
4 This script imports the carnivore data into the database
5 See merkel.debian.org:/org/qa.debian.org/carnivore/
6 """
7
8 from aux import quote
9 import sys
10 import gzip
11 from gatherer import gatherer
12 import re
13
14 def get_gatherer(connection, config, source):
15 return carnivore_gatherer(connection, config, source)
16
17 class carnivore_gatherer(gatherer):
18 field_ignores = ["Packages", "X-MIA", "X-Warning"]
19 field_to_DB_map = {
20 "Using emails": {"name": "emails", "content-type": "comma-separated"},
21 "Known as": {"name": "names", "content-type": "comma-separated"},
22 "DD": {"name": "login", "content-type": "unique-login"},
23 "Key in keyring": {"name": "keyring_key", "content-type": "multiple entries"},
24 "Key in ldap": {"name": "ldap_key", "content-type": "multiple entries"},
25 "Key in emeritus": {"name": "emeritus_key", "content-type": "multiple entries"},
26 "Key in removed": {"name": "removed_key", "content-type": "multiple entries"},
27 }
28
29 def __init__(self, connection, config, source):
30 gatherer.__init__(self, connection, config, source)
31 self.assert_my_config('path', 'emails-table', 'names-table', 'keys-table', 'login-table')
32
33 def tables(self):
34 cur = self.cursor()
35 ret = []
36 for table in ['emails', 'names', 'keys', 'login']:
37 ret.append(self.my_config["%s-table" % table])
38 return ret
39
40 def run(self):
41 my_config = self.my_config
42
43 #start harassing the DB, preparing the final inserts and making place
44 #for the new data:
45 cur = self.cursor()
46
47 for table in ['emails', 'names', 'keys', 'login']:
48 cur.execute("DELETE FROM %s" % my_config["%s-table" % table])
49
50 cur.execute("""PREPARE carnivore_email_insert
51 AS INSERT INTO %s (id, email)
52 VALUES ($1, $2)""" % (my_config['emails-table']))
53 cur.execute("""PREPARE carnivore_name_insert
54 AS INSERT INTO %s (id, name)
55 VALUES ($1, $2)""" % (my_config['names-table']))
56 cur.execute("""PREPARE carnivore_keys_insert
57 AS INSERT INTO %s (id, key, key_type)
58 VALUES ($1, $2, $3)""" % (my_config['keys-table']))
59 cur.execute("""PREPARE carnivore_login_insert
60 AS INSERT INTO %s (id, login)
61 VALUES ($1, $2)""" % (my_config['login-table']))
62
63 carnivore_data = open(my_config['path'])
64 (line_number, record_number) = (0, 1);
65 record = {}
66 for line in carnivore_data:
67 line_number += 1
68 if len(line) == 0 or line.isspace():
69 #We require a minimum of data in each record:
70 if 'emails' in record and 'names' in record:
71 #collect all queries:
72 qs = []
73 for email in record["emails"]:
74 qs.append("EXECUTE carnivore_email_insert (%d, %s)" % (record_number, quote(email)))
75 for name in record["names"]:
76 qs.append("EXECUTE carnivore_name_insert (%d, %s)" % (record_number, quote(name)))
77 if "login" in record:
78 qs.append("EXECUTE carnivore_login_insert (%d, %s)" % (record_number, quote(record["login"])))
79 for key_type in ['keyring', 'ldap', 'emeritus', 'removed']:
80 if ("%s_key" % key_type) in record:
81 for key in record["%s_key" % key_type]:
82 qs.append("EXECUTE carnivore_keys_insert (%d, %s, '%s')" % (record_number, quote(key), key_type))
83 for query in qs:
84 cur.execute(query)
85 record_number += 1
86 record = {}
87 else:
88 (field, content) = line.split(': ', 1)
89 if not (len(field) and len(content)):
90 print "Couldn't parse line %d: %s" % (line_number, line)
91 else:
92 field_info = {}
93 if field in carnivore_gatherer.field_ignores:
94 continue
95 elif carnivore_gatherer.field_to_DB_map[field]:
96 info = carnivore_gatherer.field_to_DB_map[field]
97 else:
98 print "Unknown field in line %d: %s" % (line_number, field)
99 continue
100
101 if info["content-type"] == "unique-login":
102 match = re.compile('(\w+)@debian.org').search(content)
103 record[info["name"]] = match.group(1)
104 elif info["content-type"] == "comma-separated":
105 record[info["name"]] = set(content.rstrip().split(", "))
106 elif info["content-type"] == "multiple entries":
107 if info["name"] not in record:
108 record[info["name"]] = set()
109 record[info["name"]].add(content.rstrip())
110
111 if __name__ == '__main__':
112 main()

  ViewVC Help
Powered by ViewVC 1.1.5