/[qa]/trunk/pts/www/bin/sources_to_xml.py
ViewVC logotype

Diff of /trunk/pts/www/bin/sources_to_xml.py

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 344 by hertzog, Mon Aug 12 17:20:36 2002 UTC revision 2565 by hertzog, Thu Aug 25 09:11:42 2011 UTC
# Line 1  Line 1 
1  #!/usr/bin/python2.2  #!/usr/bin/python
2    # -*- coding: utf-8 -*-
3    
 # Make sure tabs expand to 8 spaces in vim  
4  # vim: expandtab  # vim: expandtab
5    
6  import os.path, rfc822, sys, string, re  # Copyright 2002-2011 RaphaĆ«l Hertzog
7  from xml.dom import implementation, ext  # This file is distributed under the terms of the General Public License
8    # version 2 or (at your option) any later version.
9    
10    import os.path, rfc822, email, email.Utils, sys, string, re, cPickle
11    import xml.dom
12    
13  from config import dir, odir  from config import dir, odir
14    from common import vcs_table, hash_name, DpkgVersion
15    
16    # address_from_string takes an address in RFC822 format
17    # and turns it into a tuple of the form (real name, email).
18    # The difference with email.Utils.parseaddr  and rfc822.parseaddr
19    # is that this routine allows unquoted comma's to appear in the real name
20    # (in violation of RFC822). This is a hack to allow a Maintainer field to
21    # be like 'Maintainer: John H. Robinson, IV <jaqque@debian.org>'. [PvR]
22    def address_from_string(content):
23        hacked_content = string.replace(content, ",", "WEWANTNOCOMMAS")
24        (name, mail) = email.Utils.parseaddr(hacked_content)
25        return (string.replace(name,"WEWANTNOCOMMAS",","),string.replace(mail,"WEWANTNOCOMMAS",","))
26    
27    # addresses_from_string takes a string with addresses in RFC822 format
28    # and changes it into a list of tuples of the form (real name, email).
29    # Just as address_from_string, it tries to be forgiving about unquoted
30    # commas in addresses. [PvR]
31    def addresses_from_string(content):
32        pattern = re.compile("([^>]),")
33        hacked_content = pattern.sub("\\1WEWANTNOCOMMAS", content)
34        msg = email.message_from_string("Header: " + hacked_content)
35        hacked_list = email.Utils.getaddresses(msg.get_all("Header", []))
36        list = map(lambda p:
37                   map(lambda s:string.replace(s,"WEWANTNOCOMMAS",","), p),
38                   hacked_list)
39        return list
40    
41  """if os.path.isdir("incoming"):  def add_maintainer_info(child, name, mail, doc):
42      dir = "incoming"      text = doc.createTextNode(unicode(name, 'UTF-8', 'replace')) # Take care of non-ascii
 elif os.path.isdir("../incoming"):  
     dir = "../incoming"  
 else:  
     dir = "/home/rhertzog/shared/paquets/debian/pts/incoming"  
   
 odir = dir + "/../base" """  
   
 def add_maintainer_info(child, content, doc):  
     (name, email) = rfc822.parseaddr(content)  
     text = doc.createTextNode(unicode(name,'iso-8859-1')) # Take care of non-ascii  
43      item_elt = doc.createElement("name")      item_elt = doc.createElement("name")
44      item_elt.appendChild(text)      item_elt.appendChild(text)
45      child.appendChild(item_elt)      child.appendChild(item_elt)
46      text = doc.createTextNode(email)      text = doc.createTextNode(mail)
47      item_elt = doc.createElement("email")      item_elt = doc.createElement("email")
48      item_elt.appendChild(text)      item_elt.appendChild(text)
49      child.appendChild(item_elt)      child.appendChild(item_elt)
50    
51  def update_sources_info(m, dist):  def update_sources_info(m, dist):
52      """Update the XML information with the given Message (Package entry)"""      """Update the XML information with the given Message (Package entry)"""
53      global odir      global odir, old_done, new_done
     # Make sure the directory exists  
54      package = m["Package"]      package = m["Package"]
55      hash = package[0]      version = DpkgVersion(m["Version"])
56      if package[0:3] == "lib":      hash = hash_name(package)
57          hash = package[0:4]      xml_ok = os.path.isfile("%s/%s/%s/%s.xml" % (odir, hash, package, dist))
58        # Check if the work has already been done
59        key = "%s_%s" % (m["Package"], dist)
60        # Skip (duplicate) old entries
61        if old_done.has_key(key) and version < old_done[key] and xml_ok:
62            return
63        if new_done.has_key(key) and version <= new_done[key] and xml_ok:
64            return
65        new_done[key] = version
66        # Don't redo the work already done
67        if old_done.has_key(key) and old_done[key] == version and xml_ok:
68            return
69        # Make sure the directory exists
70      if not os.path.isdir(odir + "/" + hash + "/" + package):      if not os.path.isdir(odir + "/" + hash + "/" + package):
71          if not os.path.isdir(odir + "/" + hash):          if not os.path.isdir(odir + "/" + hash):
72              os.mkdir(odir + "/" + hash)              os.mkdir(odir + "/" + hash)
73          os.mkdir(odir + "/" + hash + "/" + package)          os.mkdir(odir + "/" + hash + "/" + package)
74      # Create the XML DOM object      # Create the XML DOM object
75      doc = implementation.createDocument(None, None, None)      doc = xml.dom.getDOMImplementation('minidom').createDocument(None, None, None)
76      root = doc.createElement("source")      root = doc.createElement("source")
77      doc.appendChild(root)      doc.appendChild(root)
78      root.setAttribute("release", dist);      root.setAttribute("release", dist)
79      if re.search("-\d+\.\d+(\.\d+)?$", m["version"]):      if re.search("-\d+\.\d+(\.\d+)?$", m["version"]):
80          root.setAttribute("nmu", "yes")          root.setAttribute("nmu", "yes")
81      for tag in m.keys():      for tag in m.keys():
82          child = doc.createElement(tag)          child = doc.createElement(tag)
83          root.appendChild(child)          root.appendChild(child)
84          if tag == "binary" or tag[0:5] == "build":          if tag == "binary" or tag[0:5] == "build":
85              for item in re.split(", ?", m[tag]):              for item in re.split(",[ \n]*", m[tag]):
86                  text = doc.createTextNode(item)                  # Take care of non-ascii, prevents troubles...
87                    text = doc.createTextNode(unicode(item, 'UTF-8', 'replace'))
88                  item_elt = doc.createElement("item")                  item_elt = doc.createElement("item")
89                  item_elt.appendChild(text)                  item_elt.appendChild(text)
90                  child.appendChild(item_elt)                  child.appendChild(item_elt)
91          elif tag == "maintainer":          elif tag == "maintainer":
92              add_maintainer_info(child, m[tag], doc)              (name, mail) = address_from_string(m[tag])
93                add_maintainer_info(child, name, mail, doc)
94          elif tag == "uploaders":          elif tag == "uploaders":
95              for item in re.split(", ?", m[tag]):              uploaders = addresses_from_string(m[tag])
96                for item in uploaders:
97                  item_elt = doc.createElement("item")                  item_elt = doc.createElement("item")
98                  add_maintainer_info(item_elt, item, doc)                  (name,mail) = item
99                    add_maintainer_info(item_elt, name, mail, doc)
100                  child.appendChild(item_elt)                  child.appendChild(item_elt)
101          elif tag == "files":          elif tag == "files":
102              for line in string.split(m[tag], "\n"):              for line in string.split(m[tag], "\n"):
# Line 76  def update_sources_info(m, dist): Line 111  def update_sources_info(m, dist):
111                      new_elt.appendChild(text)                      new_elt.appendChild(text)
112                      item_elt.appendChild(new_elt)                      item_elt.appendChild(new_elt)
113          else:          else:
114              text = doc.createTextNode(m[tag])              text = doc.createTextNode(unicode(m[tag], 'UTF-8', 'replace'))
115              child.appendChild(text)              child.appendChild(text)
116            # now compute derived information and store it in the xml
117                # add 'parsed' version of VCS info
118            if tag[:6] == 'x-vcs-' or tag[:4] == 'vcs-':
119                if tag[0] == 'x':
120                    prefix_len = 6
121                else:
122                    prefix_len = 4
123                # e.g. tag[prefix_len:] would be 'svn' for 'x-vcs-svn'/'vcs-svn'
124                # would be 'browser' for 'vcs-browser' (to be handled specially)
125                repos_elt = list(root.getElementsByTagName('repository'))
126                if repos_elt:   # reuse existing repository element
127                    repos_elt = repos_elt[0] # invariant: at most 1 repository elt
128                else:   # create a new repository element
129                    repos_elt = doc.createElement('repository')
130                    root.appendChild(repos_elt)
131                vcs_elt = doc.createElement('vcs')
132                kind = tag[prefix_len:]
133                if vcs_table.has_key(kind):
134                    kind = vcs_table[kind][0]
135                vcs_elt.setAttribute('kind', kind)
136                vcs_elt.setAttribute('url', m[tag])
137                repos_elt.appendChild(vcs_elt)
138      # Print the DOM object to a file      # Print the DOM object to a file
139      try:      try:
140          f = open("%s/%s/%s/%s.xml" % (odir, hash, package, dist), "w")          f = open("%s/%s/%s/%s.xml" % (odir, hash, package, dist), "w")
141          ext.PrettyPrint(doc, f, "iso-8859-1")          f.write(doc.toxml(encoding="UTF-8"))
142          f.close()          f.close()
143      except:      except Exception, msg:
144          sys.stderr.write("Output problem for" + m["package"] + "\n");          sys.stderr.write("Output problem for %s (%s)\n" % (m["package"], msg));
145    
146  def treat_sources_file(f, dist):  def update_source_binary_mapping(m, dist):
147        global binary_to_sources, source_to_binaries
148        src = m["Package"]
149        for pkg in m["Binary"].split(","):
150            pkg = pkg.strip()
151            if not source_to_binaries.has_key(src):
152                source_to_binaries[src] = { 'current': [], 'merged': [] }
153            if not binary_to_sources.has_key(pkg):
154                binary_to_sources[pkg] = { 'current': None, 'merged': [] }
155            if dist == "unstable":
156                source_to_binaries[src]['current'].append(pkg)
157                binary_to_sources[pkg]['current'] = src
158            for d in 'merged', dist:
159                if not source_to_binaries[src].has_key(d):
160                    source_to_binaries[src][d] = []
161                if not binary_to_sources[pkg].has_key(d):
162                    binary_to_sources[pkg][d] = []
163                if pkg not in source_to_binaries[src][d]:
164                    source_to_binaries[src][d].append(pkg)
165                if src not in binary_to_sources[pkg][d]:
166                    binary_to_sources[pkg][d].append(src)
167    
168    def treat_sources_file(fname, dist, update_mapping):
169      """Scan the given Sources file and treat each Package entry"""      """Scan the given Sources file and treat each Package entry"""
170        try:
171            f = open(fname, "r")
172        except IOError:
173            sys.stderr.write("Skipping parsing of non-existing/non-readable %s\n" % fname)
174            return
175      while 1:      while 1:
176          try:          try:
177              m = rfc822.Message(f)              m = rfc822.Message(f)
178              if len(m) == 0: #eof              if len(m) == 0: #eof
179                  break                  break
180              update_sources_info(m, dist)              update_sources_info(m, dist)
181                if update_mapping:
182                    update_source_binary_mapping(m, dist)
183          except EOFError:          except EOFError:
184              break              break
185        f.close()
186    
187    # Load the list of sources generated the last time
188  for comp in ["main", "contrib", "non-free"]:  old_done = {}
189      for dist in ["stable", "testing", "unstable"]:  new_done = {}
190          f = open(dir + "/Sources_%s_%s" % (dist, comp), "r")  if os.path.exists(odir + "/sources_done"):
191          treat_sources_file(f, dist)      f = open(odir + "/sources_done", "r")
192          f.close()      old_done = cPickle.load(f)
         f = open(dir + "/Sources-nonus_%s_%s" % (dist, comp), "r")  
         treat_sources_file(f, dist)  
         f.close()  
     # Experimental  
     f = open(dir + "/Sources-experimental_" + comp, "r")  
     treat_sources_file(f,  "experimental")  
193      f.close()      f.close()
194    
195    # Order matters in main_distros
196    main_distros = ['oldstable', 'stable', 'testing', 'unstable', 'experimental']
197    other_distros = [
198        'security-oldstable', 'oldstable-backports',
199        'stable-proposed-updates', 'stable-updates', 'security-stable',
200        'stable-backports',
201        'testing-proposed-updates', 'security-testing',
202        'mentors',
203    ]
204    
205    source_to_binaries = {}
206    binary_to_sources = {}
207    
208    for distro in main_distros:
209        for comp in ['main', 'contrib', 'non-free']:
210            treat_sources_file(os.path.join(dir, 'Sources-%s_%s' % (distro, comp)),
211                distro, True)
212    for distro in other_distros:
213        for comp in ['main', 'contrib', 'non-free']:
214            treat_sources_file(os.path.join(dir, 'Sources-%s_%s' % (distro, comp)),
215                distro, False)
216    
217    # Store the list of sources generated
218    f = open(odir + "/sources_done", "w")
219    cPickle.dump(new_done, f, 0)
220    f.close()
221    
222    # Store the src/pkg mappings
223    f = open(odir + "/sources_mapping", "w")
224    cPickle.dump(source_to_binaries, f, 0)
225    cPickle.dump(binary_to_sources, f, 0)
226    f.close()
227    
228    # Also store a copy in the old sources.map format, to be used by the
229    # Perl mail interface
230    with open(os.path.join(odir, "sources.map"), 'w') as f:
231        for binary, source in binary_to_sources.iteritems():
232            # If the package is in current (i.e., unstable), we use it
233            if source['current'] is not None:
234                f.write("%s %s\n" % (binary, source['current']))
235            # Otherwise we pick an arbitrary one among all the available
236            # distributions
237            else:
238                f.write("%s %s\n" % (binary, source['merged'][0]))
239    
240    # Scan the old package/distribution that existed and check if they
241    # still exist ... if they don't, remove the associated xml file.
242    for key in old_done.keys():
243        (p, d) = key.split("_", 1)
244        if not new_done.has_key(key):
245            hash = hash_name(p)
246            filename = "%s/%s/%s/%s.xml" % (odir, hash, p, d)
247            filenamerebuild = "%s/%s/%s/force-rebuild" % (odir, hash, p)
248            if os.path.exists(filename):
249                os.unlink(filename)
250                f = open(filenamerebuild, "w")
251                f.close()
252    
253    # We're done

Legend:
Removed from v.344  
changed lines
  Added in v.2565

  ViewVC Help
Powered by ViewVC 1.1.5