/[collab-qa]/udd/udd/debtags_gatherer.py
ViewVC logotype

Contents of /udd/udd/debtags_gatherer.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1531 - (show annotations) (download) (as text)
Thu Jul 23 14:05:39 2009 UTC (3 years, 10 months ago) by lucas
File MIME type: text/x-python
File size: 2671 byte(s)
add ANALYZE at the end of all importers to teach pgsql some stats about the data we just imported
1 #!/usr/bin/env python
2
3 # This file is a part of the Ultimate Debian Database
4 # <http://wiki.debian.org/UltimateDebianDatabase>
5 #
6 # Copyright (C) 2008 Stefano Zacchiroli <zack@debian.org>
7 #
8 # This file is distributed under the terms of the General Public
9 # License version 3 or (at your option) any later version.
10
11 """ import debtags data into the database
12
13 tags information are downloaded from SVN (though via http/websvn to
14 avoid an extra dependency on svn), see the "update-command"
15 configuration of the debtags gatherer
16 """
17
18 import re
19 import sys
20
21 from gatherer import gatherer
22 from aux import quote
23
24
25 # a "live" instance of the tag database, whose lines should match the regexp
26 # below, is at: http://svn.debian.org/viewsvn/*checkout*/debtags/tagdb/tags
27 tag_line_RE = re.compile(r'^(?P<pkg>[a-z0-9+-\.]+):\s+(?P<tags>[\w:+-]+(,\s+[\w:+-]+)*)$')
28 tag_sep_RE = re.compile(r',\s+')
29 # field_sep_RE = re.compile(r':\s+')
30
31 def parse_tags(fname):
32 global tag_line_RE, tag_sep_RE
33
34 line_no = 0
35 tags_db = file(fname)
36 for line in tags_db:
37 line_no += 1
38 line = line.strip()
39 parsed_line = tag_line_RE.match(line)
40 if not parsed_line:
41 print >> sys.stderr, \
42 "debtags: can not parse line %d: %s" % (line_no, line)
43 else:
44 parts = parsed_line.groupdict()
45 pkg = parts['pkg']
46 for tag in tag_sep_RE.split(parts['tags']):
47 yield (pkg, tag)
48 tags_db.close()
49
50
51 def get_gatherer(connection, config, source):
52 return debtags_gatherer(connection, config, source)
53
54
55 class debtags_gatherer(gatherer):
56 """import debtags data into the database"""
57
58 def __init__(self, connection, config, source):
59 gatherer.__init__(self, connection, config, source)
60 self.assert_my_config('path', 'table')
61
62 def run(self):
63 conf = self.my_config
64 cur = self.cursor()
65 cur.execute('DELETE FROM %s' % conf['table'])
66 cur.execute('PREPARE debtags_insert ' \
67 'AS INSERT INTO %s (package, tag) VALUES ($1, $2)' \
68 % conf['table'])
69 for (pkg, tag) in parse_tags(conf['path']):
70 cur.execute('EXECUTE debtags_insert (%s, %s)' \
71 % (quote(pkg), quote(tag)))
72 cur.execute('DEALLOCATE debtags_insert')
73 cur.execute("ANALYZE %s" % conf['table'])
74
75
76 def test():
77 """given a filename on the cmdline, print all tuples <pkg, tag>
78 that would be inserted in the db. For debugging/testing purposes.
79 """
80 for (pkg, tag) in parse_tags(sys.argv[1]):
81 print "%s\t%s" % (pkg, tag)
82
83 if __name__ == '__main__':
84 test()

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.5