#!/usr/bin/python
# -*- coding: utf-8 -*-

# vim: expandtab

# Copyright 2002 Raphaël Hertzog
# This file is distributed under the terms of the General Public License
# version 2 or (at your option) any later version.

import os.path, rfc822, email, email.Utils, sys, string, re, cPickle
from xml.dom import implementation, ext

from config import dir, odir
from common import vcs_table, hash_name

# address_from_string takes an address in RFC822 format 
# and turns it into a tuple of the form (real name, email).
# The difference with email.Utils.parseaddr  and rfc822.parseaddr 
# is that this routine allows unquoted comma's to appear in the real name
# (in violation of RFC822). This is a hack to allow a Maintainer field to
# be like 'Maintainer: John H. Robinson, IV <jaqque@debian.org>'. [PvR]
def address_from_string(content):
    hacked_content = string.replace(content, ",", "WEWANTNOCOMMAS")
    (name, mail) = email.Utils.parseaddr(hacked_content)
    return (string.replace(name,"WEWANTNOCOMMAS",","),string.replace(mail,"WEWANTNOCOMMAS",","))

# addresses_from_string takes a string with addresses in RFC822 format
# and changes it into a list of tuples of the form (real name, email). 
# Just as address_from_string, it tries to be forgiving about unquoted
# commas in addresses. [PvR]
def addresses_from_string(content):
    pattern = re.compile("([^>]),")
    hacked_content = pattern.sub("\\1WEWANTNOCOMMAS", content)
    msg = email.message_from_string("Header: " + hacked_content)
    hacked_list = email.Utils.getaddresses(msg.get_all("Header", []))
    list = map(lambda p:
               map(lambda s:string.replace(s,"WEWANTNOCOMMAS",","), p),
               hacked_list)
    return list

def add_maintainer_info(child, name, mail, doc):
    text = doc.createTextNode(unicode(name,'UTF8','replace')) # Take care of non-ascii
    item_elt = doc.createElement("name")
    item_elt.appendChild(text)
    child.appendChild(item_elt)
    text = doc.createTextNode(mail)
    item_elt = doc.createElement("email")
    item_elt.appendChild(text)
    child.appendChild(item_elt)

def update_sources_info(m, dist):
    """Update the XML information with the given Message (Package entry)"""
    global odir, old_done, new_done, new_dist_map
    package = m["Package"]
    hash = hash_name(package)
    # Check if the work has already been done
    key = "%s_%s_%s" % (m["Package"], m["Version"], dist)
    new_done[key] = 1
    new_dist_map["%s_%s" % (m["Package"], dist)] = 1
    if old_done.has_key(key) and os.path.isfile("%s/%s/%s/%s.xml" % (odir, hash, package, dist)):
        return
    # Make sure the directory exists
    if not os.path.isdir(odir + "/" + hash + "/" + package):
        if not os.path.isdir(odir + "/" + hash):
            os.mkdir(odir + "/" + hash)
        os.mkdir(odir + "/" + hash + "/" + package)
    # Create the XML DOM object
    doc = implementation.createDocument(None, None, None)
    root = doc.createElement("source")
    doc.appendChild(root)
    root.setAttribute("release", dist)
    if re.search("-\d+\.\d+(\.\d+)?$", m["version"]):
        root.setAttribute("nmu", "yes")
    for tag in m.keys():
        child = doc.createElement(tag)
        root.appendChild(child)
        if tag == "binary" or tag[0:5] == "build":
            for item in re.split(", ?", m[tag]):
                # Take care of non-ascii, prevents troubles...
                text = doc.createTextNode(unicode(item,'ISO-8859-1','replace')) 
                item_elt = doc.createElement("item")
                item_elt.appendChild(text)
                child.appendChild(item_elt)
        elif tag == "maintainer":
            (name, mail) = address_from_string(m[tag])
            add_maintainer_info(child, name, mail, doc)
        elif tag == "uploaders":
            uploaders = addresses_from_string(m[tag])
            for item in uploaders:
                item_elt = doc.createElement("item")
                (name,mail) = item
                add_maintainer_info(item_elt, name, mail, doc)
                child.appendChild(item_elt)
        elif tag == "files":
            for line in string.split(m[tag], "\n"):
                item_elt = doc.createElement("item")
                child.appendChild(item_elt)
                line = line.strip()
                fields = ["md5sum", "size", "filename"]
                values = string.split(line)
                for i in range(len(fields)):
                    new_elt = doc.createElement(fields[i])
                    text = doc.createTextNode(values[i])
                    new_elt.appendChild(text)
                    item_elt.appendChild(new_elt)
        else:
            text = doc.createTextNode(m[tag])
            child.appendChild(text)
        # now compute derived information and store it in the xml
            # add 'parsed' version of VCS info
        if tag[:6] == 'x-vcs-' or tag[:4] == 'vcs-':
            if tag[0] == 'x':
                prefix_len = 6
            else:
                prefix_len = 4
            # e.g. tag[prefix_len:] would be 'svn' for 'x-vcs-svn'/'vcs-svn'
            # would be 'browser' for 'vcs-browser' (to be handled specially)
            repos_elt = list(root.getElementsByTagName('repository'))
            if repos_elt:   # reuse existing repository element
                repos_elt = repos_elt[0] # invariant: at most 1 repository elt
            else:   # create a new repository element
                repos_elt = doc.createElement('repository')
                root.appendChild(repos_elt)
            vcs_elt = doc.createElement('vcs')
            kind = tag[prefix_len:]
            if vcs_table.has_key(kind):
                kind = vcs_table[kind][0]
            vcs_elt.setAttribute('kind', kind)
            vcs_elt.setAttribute('url', m[tag])
            repos_elt.appendChild(vcs_elt)
    # Print the DOM object to a file
    try:
        f = open("%s/%s/%s/%s.xml" % (odir, hash, package, dist), "w")
        ext.PrettyPrint(doc, f, "UTF-8")
        f.close()
    except:
        sys.stderr.write("Output problem for " + m["package"] + "\n");

def treat_sources_file(fname, dist):
    """Scan the given Sources file and treat each Package entry"""
    f = open(fname, "r")
    while 1:
        try:
            m = rfc822.Message(f)
            if len(m) == 0: #eof
                break
            update_sources_info(m, dist)
        except EOFError:
            break
    f.close()

# Load the list of sources generated the last time
old_done = {}
new_done = {}
new_dist_map = {}
if os.path.exists(odir + "/sources_done"):
    f = open(odir + "/sources_done", "r")
    old_done = cPickle.load(f)
    f.close()

distros = ['oldstable', 'stable', 'testing', 'unstable', 'experimental']
distros.extend(['stable-proposed-updates', 'testing-proposed-updates'])
distros.extend(['security-oldstable', 'security-stable'])
distros.extend(['volatile', 'mentors'])

for distro in distros:
    for comp in ['main', 'contrib', 'non-free']:
        treat_sources_file(os.path.join(dir, 'Sources-%s_%s' % (distro, comp)),
            distro)

# Store the list of sources generated
f = open(odir + "/sources_done", "w")
cPickle.dump(new_done, f, 0)
f.close()

# Scan the old package/distribution that existed and check if they
# still exist ... if they don't, remove the associated xml file.
for key in old_done.keys():
    (p, v, d) = key.split("_", 2)
    if not new_dist_map.has_key("%s_%s" % (p, d)):
        hash = hash_name(p)
        filename = "%s/%s/%s/%s.xml" % (odir, hash, p, d)
        filenamerebuild = "%s/%s/%s/force-rebuild" % (odir, hash, p)
        if os.path.exists(filename):
            os.unlink(filename)
            f = open(filenamerebuild, "w")
            f.close()

# We're done
