#!/usr/bin/env python

"""
This script imports the carnivore data into the database
See merkel.debian.org:/org/qa.debian.org/carnivore/
"""

import aux
import sys
import gzip
from gatherer import gatherer
import re

def get_gatherer(connection, config):
  return carnivore_gatherer(connection, config)

class carnivore_gatherer(gatherer):
  carnivore_field_ignores = ["Packages", "X-MIA", "X-Warning"]
  carnivore_field_to_DB_map = {
    "Using emails":    {"name": "emails", "content-type": "comma-separated"},
    "Known as":        {"name": "names", "content-type": "comma-separated"},
    "DD":              {"name": "login", "content-type": "unique-login"},
    "Key in keyring":  {"name": "keyring_key", "content-type": "multiple entries"},
    "Key in ldap":     {"name": "ldap_key", "content-type": "multiple entries"},
    "Key in emeritus": {"name": "emeritus_key", "content-type": "multiple entries"},
    "Key in removed":  {"name": "removed_key", "content-type": "multiple entries"},
  }

  def __init__(self, connection, config):
    gatherer.__init__(self, connection, config)

  def run(self, source):
    try:
      my_config = self.config[source]
    except:
      raise

    #check that the config contains everything we need:
    for key in ['path', 'emails-table', 'names-table', 'keys-table', 'login-table']:
      if not key in my_config:
        raise aux.ConfigException, "%s not configured for source %s" % (key, source)

    #start harassing the DB, preparing the final inserts and making place
    #for the new data:
    cur = self.cursor()

    for table in ['emails', 'names', 'keys', 'login']:
      cur.execute("DELETE FROM %s" % my_config["%s-table" % table])

    cur.execute("""PREPARE carnivore_email_insert 
      AS INSERT INTO %s (id, email) 
      VALUES ($1, $2)""" % (my_config['emails-table']))
    cur.execute("""PREPARE carnivore_name_insert
      AS INSERT INTO %s (id, name)
      VALUES ($1, $2)""" % (my_config['name-table']))
    cur.execute("""PREPARE carnivore_keys_insert
      AS INSERT INTO %s (id, key, key_type)
      VALUES ($1, $2, $3)""" % (my_config['keys-table']))
    cur.execute("""PREPARE carnivore_login_insert
      AS INSERT INTO %s (id, login)
      VALUES ($1, $2)""" % (my_config['login-table']))

    carnivore_data = file.open(my_config['path'])
    (line_number, record_number) = (0, 1);
    record = {}
    for line in carnivore_data:
      line_number += 1
      if len(line) == 0 or line.isspace():
        #We require a minimum of data in each record:
        if 'emails' in record and 'names' in record:
          #collect all queries:
          qs = []
          for email in record[emails]:
            qs.append("EXECUTE carnivore_email_insert (%d, '%s')" % (record_number, email))
          for name in record[names]:
            qs.append("EXECUTE carnivore_name_insert (%d, '%s')" % (record_number, name))
          if record[login]:
            qs.append("EXECUTE carnivore_login_insert (%d, '%s')" % (record_number, record[login]))
          for key_type in ['keyring', 'ldap', 'emeritus', 'removed']
            if record["%s_key" % key_type]:
              for key in record["%s_key" % key_type]:
                qs.append("EXECUTE carnivore_keys_insert (%d, '%s', '%s')" % (record_number, key, key_type))
          for query in qs:
            cur.execute(query)
        record_number += 1
        record = {}
      else:
        (field, content) = line.split(': ', 1)
        if not (len(field) and len(content)):
          print "Couldn't parse line %d: %s" % (line_number, line)
        else:
          field_info = {}
          if field in carnivore_field_ignores:
            continue
          elif carnivore_field_to_DB_map[field]:
            info = carnivore_field_to_DB_map[field]
          else:
            print "Unknown field in line %d: %s" % (line_number, field)
            continue
      
          if   info["content-type"] == "unique-login":
            match = re.compile('(\w+)@debian.org').search(content)
            record[info["name"]] = match.group(1) 
          elif info["content-type"] == "comma-separated":
            record[info["name"]] = content.rstrip().split(", ")
          elif info["content-type"] == "multiple entries":
            if info["name"] not in record:
              record[info["name"]] = []
            record[info["name"]].append(content.rstrip())

if __name__ == '__main__':
  main()
