| 1 |
#!/usr/bin/env python
|
| 2 |
|
| 3 |
"""
|
| 4 |
This script imports the carnivore data into the database
|
| 5 |
See merkel.debian.org:/org/qa.debian.org/carnivore/
|
| 6 |
"""
|
| 7 |
|
| 8 |
from aux import quote
|
| 9 |
import sys
|
| 10 |
import gzip
|
| 11 |
from gatherer import gatherer
|
| 12 |
import re
|
| 13 |
|
| 14 |
def get_gatherer(connection, config, source):
|
| 15 |
return carnivore_gatherer(connection, config, source)
|
| 16 |
|
| 17 |
class carnivore_gatherer(gatherer):
|
| 18 |
field_ignores = ["Packages", "X-MIA", "X-Warning"]
|
| 19 |
field_to_DB_map = {
|
| 20 |
"Using emails": {"name": "emails", "content-type": "comma-separated"},
|
| 21 |
"Known as": {"name": "names", "content-type": "comma-separated"},
|
| 22 |
"DD": {"name": "login", "content-type": "unique-login"},
|
| 23 |
"Key in keyring": {"name": "keyring_key", "content-type": "multiple entries"},
|
| 24 |
"Key in ldap": {"name": "ldap_key", "content-type": "multiple entries"},
|
| 25 |
"Key in emeritus": {"name": "emeritus_key", "content-type": "multiple entries"},
|
| 26 |
"Key in removed": {"name": "removed_key", "content-type": "multiple entries"},
|
| 27 |
}
|
| 28 |
|
| 29 |
def __init__(self, connection, config, source):
|
| 30 |
gatherer.__init__(self, connection, config, source)
|
| 31 |
self.assert_my_config('path', 'emails-table', 'names-table', 'keys-table', 'login-table')
|
| 32 |
|
| 33 |
def tables(self):
|
| 34 |
cur = self.cursor()
|
| 35 |
ret = []
|
| 36 |
for table in ['emails', 'names', 'keys', 'login']:
|
| 37 |
ret.append(self.my_config["%s-table" % table])
|
| 38 |
return ret
|
| 39 |
|
| 40 |
def run(self):
|
| 41 |
my_config = self.my_config
|
| 42 |
|
| 43 |
#start harassing the DB, preparing the final inserts and making place
|
| 44 |
#for the new data:
|
| 45 |
cur = self.cursor()
|
| 46 |
|
| 47 |
for table in ['emails', 'names', 'keys', 'login']:
|
| 48 |
cur.execute("DELETE FROM %s" % my_config["%s-table" % table])
|
| 49 |
|
| 50 |
cur.execute("""PREPARE carnivore_email_insert
|
| 51 |
AS INSERT INTO %s (id, email)
|
| 52 |
VALUES ($1, $2)""" % (my_config['emails-table']))
|
| 53 |
cur.execute("""PREPARE carnivore_name_insert
|
| 54 |
AS INSERT INTO %s (id, name)
|
| 55 |
VALUES ($1, $2)""" % (my_config['names-table']))
|
| 56 |
cur.execute("""PREPARE carnivore_keys_insert
|
| 57 |
AS INSERT INTO %s (id, key, key_type)
|
| 58 |
VALUES ($1, $2, $3)""" % (my_config['keys-table']))
|
| 59 |
cur.execute("""PREPARE carnivore_login_insert
|
| 60 |
AS INSERT INTO %s (id, login)
|
| 61 |
VALUES ($1, $2)""" % (my_config['login-table']))
|
| 62 |
|
| 63 |
carnivore_data = open(my_config['path'])
|
| 64 |
(line_number, record_number) = (0, 1);
|
| 65 |
record = {}
|
| 66 |
for line in carnivore_data:
|
| 67 |
line_number += 1
|
| 68 |
if len(line) == 0 or line.isspace():
|
| 69 |
#We require a minimum of data in each record:
|
| 70 |
if 'emails' in record and 'names' in record:
|
| 71 |
#collect all queries:
|
| 72 |
qs = []
|
| 73 |
for email in record["emails"]:
|
| 74 |
qs.append("EXECUTE carnivore_email_insert (%d, %s)" % (record_number, quote(email)))
|
| 75 |
for name in record["names"]:
|
| 76 |
qs.append("EXECUTE carnivore_name_insert (%d, %s)" % (record_number, quote(name)))
|
| 77 |
if "login" in record:
|
| 78 |
qs.append("EXECUTE carnivore_login_insert (%d, %s)" % (record_number, quote(record["login"])))
|
| 79 |
for key_type in ['keyring', 'ldap', 'emeritus', 'removed']:
|
| 80 |
if ("%s_key" % key_type) in record:
|
| 81 |
for key in record["%s_key" % key_type]:
|
| 82 |
qs.append("EXECUTE carnivore_keys_insert (%d, %s, '%s')" % (record_number, quote(key), key_type))
|
| 83 |
for query in qs:
|
| 84 |
cur.execute(query)
|
| 85 |
record_number += 1
|
| 86 |
record = {}
|
| 87 |
else:
|
| 88 |
(field, content) = line.split(': ', 1)
|
| 89 |
if not (len(field) and len(content)):
|
| 90 |
print "Couldn't parse line %d: %s" % (line_number, line)
|
| 91 |
else:
|
| 92 |
field_info = {}
|
| 93 |
if field in carnivore_gatherer.field_ignores:
|
| 94 |
continue
|
| 95 |
elif carnivore_gatherer.field_to_DB_map[field]:
|
| 96 |
info = carnivore_gatherer.field_to_DB_map[field]
|
| 97 |
else:
|
| 98 |
print "Unknown field in line %d: %s" % (line_number, field)
|
| 99 |
continue
|
| 100 |
|
| 101 |
if info["content-type"] == "unique-login":
|
| 102 |
match = re.compile('(\w+)@debian.org').search(content)
|
| 103 |
record[info["name"]] = match.group(1)
|
| 104 |
elif info["content-type"] == "comma-separated":
|
| 105 |
record[info["name"]] = set(content.rstrip().split(", "))
|
| 106 |
elif info["content-type"] == "multiple entries":
|
| 107 |
if info["name"] not in record:
|
| 108 |
record[info["name"]] = set()
|
| 109 |
record[info["name"]].add(content.rstrip())
|
| 110 |
|
| 111 |
if __name__ == '__main__':
|
| 112 |
main()
|