/[collab-qa]/udd/udd/debtags_gatherer.py
ViewVC logotype

Contents of /udd/udd/debtags_gatherer.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1531 - (hide annotations) (download) (as text)
Thu Jul 23 14:05:39 2009 UTC (3 years, 9 months ago) by lucas
File MIME type: text/x-python
File size: 2671 byte(s)
add ANALYZE at the end of all importers to teach pgsql some stats about the data we just imported
1 zack 1293 #!/usr/bin/env python
2    
3     # This file is a part of the Ultimate Debian Database
4     # <http://wiki.debian.org/UltimateDebianDatabase>
5     #
6     # Copyright (C) 2008 Stefano Zacchiroli <zack@debian.org>
7     #
8     # This file is distributed under the terms of the General Public
9     # License version 3 or (at your option) any later version.
10    
11     """ import debtags data into the database
12    
13     tags information are downloaded from SVN (though via http/websvn to
14     avoid an extra dependency on svn), see the "update-command"
15     configuration of the debtags gatherer
16     """
17    
18     import re
19     import sys
20    
21     from gatherer import gatherer
22     from aux import quote
23    
24    
25     # a "live" instance of the tag database, whose lines should match the regexp
26     # below, is at: http://svn.debian.org/viewsvn/*checkout*/debtags/tagdb/tags
27     tag_line_RE = re.compile(r'^(?P<pkg>[a-z0-9+-\.]+):\s+(?P<tags>[\w:+-]+(,\s+[\w:+-]+)*)$')
28     tag_sep_RE = re.compile(r',\s+')
29     # field_sep_RE = re.compile(r':\s+')
30    
31     def parse_tags(fname):
32     global tag_line_RE, tag_sep_RE
33    
34     line_no = 0
35     tags_db = file(fname)
36     for line in tags_db:
37     line_no += 1
38     line = line.strip()
39     parsed_line = tag_line_RE.match(line)
40     if not parsed_line:
41     print >> sys.stderr, \
42     "debtags: can not parse line %d: %s" % (line_no, line)
43     else:
44     parts = parsed_line.groupdict()
45     pkg = parts['pkg']
46     for tag in tag_sep_RE.split(parts['tags']):
47     yield (pkg, tag)
48     tags_db.close()
49    
50    
51     def get_gatherer(connection, config, source):
52     return debtags_gatherer(connection, config, source)
53    
54    
55     class debtags_gatherer(gatherer):
56     """import debtags data into the database"""
57    
58     def __init__(self, connection, config, source):
59     gatherer.__init__(self, connection, config, source)
60     self.assert_my_config('path', 'table')
61    
62     def run(self):
63     conf = self.my_config
64     cur = self.cursor()
65     cur.execute('DELETE FROM %s' % conf['table'])
66     cur.execute('PREPARE debtags_insert ' \
67     'AS INSERT INTO %s (package, tag) VALUES ($1, $2)' \
68     % conf['table'])
69     for (pkg, tag) in parse_tags(conf['path']):
70     cur.execute('EXECUTE debtags_insert (%s, %s)' \
71     % (quote(pkg), quote(tag)))
72     cur.execute('DEALLOCATE debtags_insert')
73 lucas 1531 cur.execute("ANALYZE %s" % conf['table'])
74 zack 1293
75    
76     def test():
77     """given a filename on the cmdline, print all tuples <pkg, tag>
78     that would be inserted in the db. For debugging/testing purposes.
79     """
80     for (pkg, tag) in parse_tags(sys.argv[1]):
81     print "%s\t%s" % (pkg, tag)
82    
83     if __name__ == '__main__':
84     test()

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.5