/[collab-qa]/udd/udd/removals_gatherer.py
ViewVC logotype

Contents of /udd/udd/removals_gatherer.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1628 - (hide annotations) (download) (as text)
Sat Nov 14 08:18:33 2009 UTC (3 years, 6 months ago) by lucas
File MIME type: text/x-python
File size: 10044 byte(s)
add removals gatherer
1 lucas 1628 #!/usr/bin/env python
2    
3     # This file is a part of the Ultimate Debian Database
4     # <http://wiki.debian.org/UltimateDebianDatabase>
5     #
6     # Copyright (C) 2009 Serafeim Zanikolas <serzan@hellug.gr>
7     #
8     # This file is distributed under the terms of the General Public
9     # License version 3 or (at your option) any later version.
10    
11     """ import data about the removal of packages (from the debian archive) in UDD
12    
13     Raw data source: http://ftp-master.debian.org/removals-full.txt
14    
15     Sample removal batch from the above file:
16    
17     =========================================================================
18     [Date: Tue, 9 Jan 2001 20:52:51 -0500] [ftpmaster: James Troup]
19     Removed the following packages from unstable:
20    
21     dsniff | 2.3-1 | source, i386
22     Closed bugs: 81709
23    
24     ------------------- Reason -------------------
25     ROM; moved to non-US (now depends on libssl)
26     ----------------------------------------------
27     =========================================================================
28    
29     Note that a removal batch may have many packages removed (unlike the one
30     above, where only dsniff is removed).
31    
32     This script when ran as a standalone script will not connect to the database
33     but will instead run a basic sanity test (to make sure that the input file
34     hasn't changed in a way that would break the script).
35     """
36    
37     import sys
38     import re
39    
40     from gatherer import gatherer
41     from aux import quote
42    
43     def fail(msg):
44     sys.stderr.write("%s\n" % msg)
45     exit(1)
46    
47     def parse_removals(stream):
48     # We expect lines to appear in the order below. parser.curr_func is set to
49     # one of several functions based on how we expect to show up next in the
50     # file.
51     #
52     # date; ftp-master name
53     # distrib
54     # skip_line*
55     # pkg name | version | arch[, arch] <-- >=1 lines like these
56     # skip_line*
57     #------------------- Reason -------------------
58     # requestor; reasons
59    
60     parser = Parser()
61     for line in stream:
62     if parser.skip_line(line):
63     continue
64     if parser.curr_func(line):
65     continue
66     return parser.removal_batches
67    
68     def get_gatherer(connection, config, source):
69     return removals_gatherer(connection, config, source)
70    
71     class removals_gatherer(gatherer):
72     """import removals into the database"""
73    
74     def __init__(self, connection, config, source):
75     gatherer.__init__(self, connection, config, source)
76     self.assert_my_config('path', 'table')
77    
78     def run(self):
79     conf = self.my_config
80    
81     try:
82     input_fd = open(conf['path'])
83     except IOError:
84     fail('failed to open %s' % conf['path'])
85    
86     batch_removals = parse_removals(input_fd)
87    
88     pkg_removal_table = conf['table']
89     pkg_removal_batch_table = "%s_batch" % conf['table']
90    
91     cur = self.cursor()
92     cur.execute('DELETE FROM %s' % pkg_removal_table)
93     cur.execute('DELETE FROM %s' % pkg_removal_batch_table)
94    
95     # insert data for batches of removals
96     cur.execute('PREPARE batch_removals_insert ' \
97     'AS INSERT INTO %s (id, time, ftpmaster, ' \
98     'distribution, requestor, ' \
99     'reasons)' \
100     'VALUES ($1, $2, $3, $4, $5, $6)' \
101     % pkg_removal_batch_table)
102     for i, batch_removal in enumerate(batch_removals):
103     cur.execute('EXECUTE batch_removals_insert ' \
104     '(%s, %s, %s, %s, %s, %s)' \
105     % (i, quote(batch_removal.timestamp),
106     quote(batch_removal.ftpmaster),
107     quote(batch_removal.distribution),
108     quote(batch_removal.requestor),
109     quote(batch_removal.reasons)))
110     cur.execute('DEALLOCATE batch_removals_insert')
111     cur.execute("ANALYZE %s" % pkg_removal_batch_table)
112    
113     # insert data for removals of individual packages
114     cur.execute('PREPARE pkg_removal_insert ' \
115     'AS INSERT INTO %s (batch_id, name, version, ' \
116     'arch_array)' \
117     'VALUES ($1, $2, $3, $4)' % pkg_removal_table)
118     for i, batch_removal in enumerate(batch_removals):
119     for pkg in batch_removal.packages:
120     cur.execute('EXECUTE pkg_removal_insert (%s, %s, %s, %s)' \
121     % (i, quote(pkg.name), quote(pkg.version),
122     quote("{%s}" % ",".join(pkg.arches))))
123     cur.execute('DEALLOCATE pkg_removal_insert')
124     cur.execute("ANALYZE %s" % pkg_removal_table)
125    
126     def test(filename, removal_batches):
127     """compare the number of parsed packages against those counted with a
128     shell one-liner"""
129    
130     from commands import getstatusoutput
131    
132     status, npackage_removals_via_grep = getstatusoutput(\
133     "egrep '[^ ]+ *\| *[^ ]+ *\| *[^ ]+' %s | " \
134     "awk '-F|' '{print $1, $2}' | sed 's/ */ /g' | wc -l" \
135     % filename)
136     if status != 0:
137     fail("failed to extract removed packages with grep")
138     npackage_removals_via_grep = int(npackage_removals_via_grep)
139    
140     npackage_removals_via_python = 0
141     ftpmasters = set()
142     distribs = set()
143     package_removals_via_python = set()
144     for pkg_rm_batch in removal_batches:
145     npackage_removals_via_python += len(pkg_rm_batch.packages)
146     ftpmasters.add(pkg_rm_batch.ftpmaster)
147     distribs.add(pkg_rm_batch.distribution)
148    
149     if npackage_removals_via_grep != npackage_removals_via_python:
150     fail("%d removed packages have been parsed but %d were expected" % \
151     (npackage_removals_via_python, npackage_removals_via_grep))
152    
153     print '%d packages were removed from %d distributions, in %d\n' \
154     'batches of removals done by %d ftpmaster members' % \
155     (npackage_removals_via_python, len(distribs),
156     len(removal_batches), len(ftpmasters))
157    
158    
159     class Package(object):
160     """container for a single removed package"""
161     def __init__(self, name, version, arches):
162     self.name = name
163     self.version = version
164     self.arches = [arch.strip() for arch in arches.split(",")]
165    
166     def __str__(self):
167     return '%s-%s' % (self.name, self.version)
168    
169     class PackageRemovalBatch(object):
170     """container for a removal batch (refers to one or more packages)"""
171     def __init__(self, timestamp, ftpmaster):
172     self.timestamp = timestamp
173     self.ftpmaster = ftpmaster
174     self.distribution = None
175     self.packages = []
176     self.requestor = None
177     self.reasons = None
178    
179     def add_pkg(self, pkg):
180     self.packages.append(pkg)
181    
182     def __str__(self):
183     return "removal of %s at %s by %s from %s" \
184     % ("\n".join([str(p) for p in self.packages]), \
185     self.timestamp, self.ftpmaster, self.distribution)
186    
187     class Parser(object):
188     date_master_pat = re.compile(r"\[Date: ([^\]]+)] \[ftpmaster: ([^\]]+)\]")
189     distrib_pat = re.compile(r"Removed the following packages from ([a-z-]+)[:,]*")
190     pkg_version_arches_pat = re.compile(r"\s*(\S*) *\|\s*(\S+)\s*\|\s*(.*)$")
191     reason_pat = re.compile("-+\s*Reason\s*-+")
192     rene_pat = re.compile("(\[rene[^\]]*\])\s*(.*)")
193    
194     def __init__(self):
195     self.removal_batch = None
196     self.removal_batches = []
197     self.curr_func = self.parse_removal
198    
199     def skip_line(self, line):
200     if line.isspace() or line == "":
201     return True
202    
203     def parse_removal(self, line):
204     match = Parser.date_master_pat.search(line)
205     if match:
206     timestamp, ftpmaster = match.groups()
207     self.removal_batch = PackageRemovalBatch(timestamp, ftpmaster)
208     self.curr_func = self.parse_distrib
209     return True
210    
211     def parse_distrib(self, line):
212     match = Parser.distrib_pat.search(line)
213     if match:
214     self.removal_batch.distribution = match.group(1)
215     self.curr_func = self.parse_pkg_version_arch_or_reason_header
216     return True
217    
218     def parse_pkg_version_arch_or_reason_header(self, line):
219     match = Parser.pkg_version_arches_pat.search(line)
220     if match:
221     pkg, version, arches = match.groups()
222     pkg_obj = Package(pkg, version, arches)
223     if self.removal_batch:
224     self.removal_batch.add_pkg(pkg_obj)
225     return True
226     elif self.removal_batch:
227     match = Parser.reason_pat.search(line)
228     if match:
229     self.curr_func = self.parse_requestor_reasons
230     return True
231    
232     def parse_requestor_reasons(self, line):
233     match = Parser.rene_pat.search(line)
234     if match:
235     self.removal_batch.requestor = match.group(1)
236     self.removal_batch.reasons = match.group(2)
237     else:
238     fields = line.split(';')
239     if fields == 1: # assume no requestor
240     self.removal_batch.requestor = None
241     self.removal_batch.reasons = line
242     else:
243     self.removal_batch.requestor = fields[0]
244     self.removal_batch.reasons = ";".join(fields[1:])
245     self.curr_func = self.conclude_batch
246     return True # assume that we always get fed the correct line
247    
248     def conclude_batch(self, line):
249     if line.startswith("---------") and self.removal_batch is not None:
250     self.removal_batches.append(self.removal_batch)
251     self.removal_batch = None
252     self.curr_func = self.parse_removal
253     return True
254    
255     if '__main__' == __name__:
256     import os
257    
258     try:
259     filename = sys.argv[1]
260     input_fd = open(filename)
261     except IndexError:
262     fail("syntax: %s <removals-file>\n" \
263     "(when run from the command line will only prints stats)" \
264     % os.path.basename(sys.argv[0]))
265     except IOError:
266     fail("failed to open %s" % filename)
267    
268     batch_removals = parse_removals(input_fd)
269     test(filename, batch_removals)

  ViewVC Help
Powered by ViewVC 1.1.5