| 1 |
zack |
1293 |
#!/usr/bin/env python
|
| 2 |
|
|
|
| 3 |
|
|
# This file is a part of the Ultimate Debian Database
|
| 4 |
|
|
# <http://wiki.debian.org/UltimateDebianDatabase>
|
| 5 |
|
|
#
|
| 6 |
|
|
# Copyright (C) 2008 Stefano Zacchiroli <zack@debian.org>
|
| 7 |
|
|
#
|
| 8 |
|
|
# This file is distributed under the terms of the General Public
|
| 9 |
|
|
# License version 3 or (at your option) any later version.
|
| 10 |
|
|
|
| 11 |
|
|
""" import debtags data into the database
|
| 12 |
|
|
|
| 13 |
|
|
tags information are downloaded from SVN (though via http/websvn to
|
| 14 |
|
|
avoid an extra dependency on svn), see the "update-command"
|
| 15 |
|
|
configuration of the debtags gatherer
|
| 16 |
|
|
"""
|
| 17 |
|
|
|
| 18 |
|
|
import re
|
| 19 |
|
|
import sys
|
| 20 |
|
|
|
| 21 |
|
|
from gatherer import gatherer
|
| 22 |
|
|
from aux import quote
|
| 23 |
|
|
|
| 24 |
|
|
|
| 25 |
|
|
# a "live" instance of the tag database, whose lines should match the regexp
|
| 26 |
|
|
# below, is at: http://svn.debian.org/viewsvn/*checkout*/debtags/tagdb/tags
|
| 27 |
|
|
tag_line_RE = re.compile(r'^(?P<pkg>[a-z0-9+-\.]+):\s+(?P<tags>[\w:+-]+(,\s+[\w:+-]+)*)$')
|
| 28 |
|
|
tag_sep_RE = re.compile(r',\s+')
|
| 29 |
|
|
# field_sep_RE = re.compile(r':\s+')
|
| 30 |
|
|
|
| 31 |
|
|
def parse_tags(fname):
|
| 32 |
|
|
global tag_line_RE, tag_sep_RE
|
| 33 |
|
|
|
| 34 |
|
|
line_no = 0
|
| 35 |
|
|
tags_db = file(fname)
|
| 36 |
|
|
for line in tags_db:
|
| 37 |
|
|
line_no += 1
|
| 38 |
|
|
line = line.strip()
|
| 39 |
|
|
parsed_line = tag_line_RE.match(line)
|
| 40 |
|
|
if not parsed_line:
|
| 41 |
|
|
print >> sys.stderr, \
|
| 42 |
|
|
"debtags: can not parse line %d: %s" % (line_no, line)
|
| 43 |
|
|
else:
|
| 44 |
|
|
parts = parsed_line.groupdict()
|
| 45 |
|
|
pkg = parts['pkg']
|
| 46 |
|
|
for tag in tag_sep_RE.split(parts['tags']):
|
| 47 |
|
|
yield (pkg, tag)
|
| 48 |
|
|
tags_db.close()
|
| 49 |
|
|
|
| 50 |
|
|
|
| 51 |
|
|
def get_gatherer(connection, config, source):
|
| 52 |
|
|
return debtags_gatherer(connection, config, source)
|
| 53 |
|
|
|
| 54 |
|
|
|
| 55 |
|
|
class debtags_gatherer(gatherer):
|
| 56 |
|
|
"""import debtags data into the database"""
|
| 57 |
|
|
|
| 58 |
|
|
def __init__(self, connection, config, source):
|
| 59 |
|
|
gatherer.__init__(self, connection, config, source)
|
| 60 |
|
|
self.assert_my_config('path', 'table')
|
| 61 |
|
|
|
| 62 |
|
|
def run(self):
|
| 63 |
|
|
conf = self.my_config
|
| 64 |
|
|
cur = self.cursor()
|
| 65 |
|
|
cur.execute('DELETE FROM %s' % conf['table'])
|
| 66 |
|
|
cur.execute('PREPARE debtags_insert ' \
|
| 67 |
|
|
'AS INSERT INTO %s (package, tag) VALUES ($1, $2)' \
|
| 68 |
|
|
% conf['table'])
|
| 69 |
|
|
for (pkg, tag) in parse_tags(conf['path']):
|
| 70 |
|
|
cur.execute('EXECUTE debtags_insert (%s, %s)' \
|
| 71 |
|
|
% (quote(pkg), quote(tag)))
|
| 72 |
|
|
cur.execute('DEALLOCATE debtags_insert')
|
| 73 |
lucas |
1531 |
cur.execute("ANALYZE %s" % conf['table'])
|
| 74 |
zack |
1293 |
|
| 75 |
|
|
|
| 76 |
|
|
def test():
|
| 77 |
|
|
"""given a filename on the cmdline, print all tuples <pkg, tag>
|
| 78 |
|
|
that would be inserted in the db. For debugging/testing purposes.
|
| 79 |
|
|
"""
|
| 80 |
|
|
for (pkg, tag) in parse_tags(sys.argv[1]):
|
| 81 |
|
|
print "%s\t%s" % (pkg, tag)
|
| 82 |
|
|
|
| 83 |
|
|
if __name__ == '__main__':
|
| 84 |
|
|
test()
|