/[collab-qa]/udd/udd/sources_gatherer.py
ViewVC logotype

Contents of /udd/udd/sources_gatherer.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1531 - (show annotations) (download) (as text)
Thu Jul 23 14:05:39 2009 UTC (3 years, 9 months ago) by lucas
File MIME type: text/x-python
File size: 8401 byte(s)
add ANALYZE at the end of all importers to teach pgsql some stats about the data we just imported
1 #/usr/bin/env python
2 # Last-Modified: <Sun Aug 17 12:07:25 2008>
3 # This file is a part of the Ultimate Debian Database project
4
5 import debian_bundle.deb822
6 import gzip
7 import os
8 import sys
9 import aux
10 import tempfile
11 from aux import ConfigException
12 from aux import null_or_quote, quote
13 from gatherer import gatherer
14 import email.Utils
15 import re
16
17 def get_gatherer(connection, config, source):
18 return sources_gatherer(connection, config, source)
19
20 class sources_gatherer(gatherer):
21 "This class imports the data from Sources.gz files into the database"
22 mandatory = {'Format': 0, 'Maintainer': 0, 'Package': 0, 'Version': 0, 'Files': 0}
23 non_mandatory = {'Uploaders': 0, 'Binary': 0, 'Architecture': 0,
24 'Standards-Version': 0, 'Homepage': 0, 'Build-Depends': 0,
25 'Build-Depends-Indep': 0, 'Build-Conflicts': 0, 'Build-Conflicts-Indep': 0,
26 'Priority': 0, 'Section': 0, 'Python-Version': 0, 'Checksums-Sha1':0,
27 'Checksums-Sha256':0, 'Original-Maintainer':0, 'Dm-Upload-Allowed':0}
28 ignorable = {'Vcs-Arch': 0, 'Vcs-Bzr': 0,
29 'Vcs-Cvs': 0, 'Vcs-Darcs': 0, 'Vcs-Git': 0, 'Vcs-Hg': 0, 'Vcs-Svn': 0,
30 'Vcs-Mtn':0,
31 'X-Vcs-Browser': 0, 'Vcs-Browser': 0, 'X-Vcs-Bzr': 0, 'X-Vcs-Darcs': 0, 'X-Vcs-Svn': 0, 'X-Vcs-Hg':0, 'X-Vcs-Git':0, 'Vcs-Browse':0,
32 'Directory':0, 'Comment':0, 'Origin':0, 'Url':0, 'X-Collab-Maint':0, 'Autobuild':0, 'Vcs-Cvs:':0, 'Python-Standards-Version':0, 'url':0, 'originalmaintainer':0, 'Originalmaintainer':0, 'Build-Recommends':0}
33 #Vcs-Cvs: is caused by a bug in python-debian, apparently.
34 ignorable_re = re.compile("^(Orig-|Original-|Origianl-|Orginal-|Debian-|X-Original-|Upstream-)")
35 vcs = { 'Arch':0, 'Bzr':0, 'Cvs':0, 'Darcs':0, 'Git':0, 'Hg':0, 'Svn':0, 'Mtn':0}
36
37 warned_about = {}
38
39 def __init__(self, connection, config, source):
40 gatherer.__init__(self, connection, config, source)
41 self._distr = None
42 self.assert_my_config('directory', 'components', 'distribution', 'release', 'sources-table', 'sources-schema')
43
44 def build_dict(self, control):
45 """Build a dictionary from the control dictionary.
46
47 Influenced by global variables mandatory, non_mandatory and ignorable"""
48 d = {}
49 for k in sources_gatherer.mandatory:
50 if k not in control:
51 raise "Mandatory field %s not specified" % k
52 d[k] = control[k]
53 for k in sources_gatherer.non_mandatory:
54 if k in control:
55 d[k] = control[k]
56 else:
57 d[k] = None
58
59 d['Vcs-Type'] = None
60 d['Vcs-Url'] = None
61 for vcs in sources_gatherer.vcs:
62 if control.has_key("Vcs-"+vcs):
63 d['Vcs-Type'] = vcs
64 d['Vcs-Url'] = control["Vcs-"+vcs]
65 break
66 elif control.has_key("X-Vcs-"+vcs):
67 d['Vcs-Type'] = vcs
68 d['Vcs-Url'] = control["X-Vcs-"+vcs]
69 break
70 if control.has_key("Vcs-Browser"):
71 d['Vcs-Browser'] = control["Vcs-Browser"]
72 elif control.has_key("X-Vcs-Browser"):
73 d['Vcs-Browser'] = control["X-Vcs-Browser"]
74 elif control.has_key("Vcs-Browse"): # common typo
75 d['Vcs-Browser'] = control["Vcs-Browse"]
76 else:
77 d['Vcs-Browser'] = None
78
79 for k in control.keys():
80 if k not in sources_gatherer.mandatory and k not in sources_gatherer.non_mandatory and k not in sources_gatherer.ignorable:
81 if not sources_gatherer.ignorable_re.match(k):
82 if k not in sources_gatherer.warned_about:
83 sources_gatherer.warned_about[k] = 1
84 else:
85 sources_gatherer.warned_about[k] += 1
86 return d
87
88 def import_sources(self, file):
89 """Import the sources from the file into the database-connection conn.
90
91 Sequence has to have an iterator interface, that yields a line every time it
92 is called.The Format of the file is expected to be that of a debian
93 source file."""
94 cur = self.cursor()
95 pkgs = ()
96 query = """EXECUTE source_insert
97 (%(Package)s, %(Version)s, %(Maintainer)s,
98 %(maintainer_name)s, %(maintainer_email)s, %(Format)s, %(Files)s,
99 %(Uploaders)s, %(Binary)s, %(Architecture)s, %(Standards-Version)s,
100 %(Homepage)s, %(Build-Depends)s, %(Build-Depends-Indep)s,
101 %(Build-Conflicts)s, %(Build-Conflicts-Indep)s, %(Priority)s,
102 %(Section)s, %(Vcs-Type)s, %(Vcs-Url)s, %(Vcs-Browser)s,
103 %(Python-Version)s, %(Checksums-Sha1)s, %(Checksums-Sha256)s,
104 %(Original-Maintainer)s, %(Dm-Upload-Allowed)s)"""
105 query_uploaders = """EXECUTE uploader_insert (%(Package)s, %(Version)s,
106 %(Uploader)s, %(Name)s, %(Email)s)"""
107 uploaders = ()
108 for control in debian_bundle.deb822.Packages.iter_paragraphs(file):
109 d = self.build_dict(control)
110 d['maintainer_name'], d['maintainer_email'] = email.Utils.parseaddr(d['Maintainer'])
111 pkgs += (d,)
112
113 if d['Uploaders']:
114 for uploader in email.Utils.getaddresses([d['Uploaders']]):
115 ud = {}
116 ud['Package'] = d['Package']
117 ud['Version'] = d['Version']
118 ud['Uploader'] = email.Utils.formataddr(uploader)
119 ud['Name'] = uploader[0]
120 ud['Email'] = uploader[1]
121 uploaders += (ud,)
122 cur.executemany(query, pkgs)
123 cur.executemany(query_uploaders, uploaders)
124
125 def tables(self):
126 return [self.my_config['sources-table']]
127
128 def run(self):
129 src_cfg = self.my_config
130
131 table = src_cfg['sources-table']
132
133 utable = src_cfg['uploaders-table']
134
135 aux.debug = self.config['general']['debug']
136
137 cur = self.cursor()
138
139 for comp in src_cfg['components']:
140 path = os.path.join(src_cfg['directory'], comp, 'source', 'Sources.gz')
141 cur.execute("DELETE from %s WHERE Distribution = '%s' AND\
142 release = '%s' AND component = '%s'"\
143 % (table, src_cfg['distribution'], src_cfg['release'], comp))
144 cur.execute("DELETE from %s WHERE Distribution = '%s' AND\
145 release = '%s' AND component = '%s'"\
146 % (utable, src_cfg['distribution'], src_cfg['release'], comp))
147 try:
148 query = """PREPARE source_insert as INSERT INTO %s
149 (Source, Version, Maintainer, Maintainer_name, Maintainer_email, Format, Files, Uploaders, Bin,
150 Architecture, Standards_Version, Homepage, Build_Depends,
151 Build_Depends_Indep, Build_Conflicts, Build_Conflicts_Indep, Priority,
152 Section, Vcs_Type, Vcs_Url, Vcs_Browser, python_version, checksums_sha1,
153 checksums_sha256, original_maintainer, dm_upload_allowed,
154 Distribution, Release, Component)
155 VALUES
156 ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16,
157 $17, $18, $19, $20, $21, $22, $23, $24, $25, $26, '%s', '%s', '%s')"""\
158 % (table, src_cfg['distribution'], src_cfg['release'], comp)
159 cur.execute(query)
160 query = """PREPARE uploader_insert as INSERT INTO %s
161 (Source, Version, Distribution, Release, Component, Uploader, Name, Email) VALUES
162 ($1, $2, '%s', '%s', '%s', $3, $4, $5) """ % \
163 (utable, src_cfg['distribution'], src_cfg['release'], comp)
164 cur.execute(query)
165
166 # aux.print_debug("Reading file " + path)
167 # Copy content from gzipped file to temporary file, so that apt_pkg is
168 # used by debian_bundle
169 tmp = tempfile.NamedTemporaryFile()
170 file = gzip.open(path)
171 tmp.write(file.read())
172 file.close()
173 tmp.seek(0)
174 # aux.print_debug("Importing from " + path)
175 self.import_sources(open(tmp.name))
176 tmp.close()
177 except IOError, (e, message):
178 print "Could not read packages from %s: %s" % (path, message)
179 cur.execute("DEALLOCATE source_insert")
180 cur.execute("DEALLOCATE uploader_insert")
181
182 cur.execute('ANALYZE %s' % table)
183 cur.execute('ANALYZE %s' % utable)
184
185 self.print_warnings()
186
187 def setup(self):
188 if 'schema-dir' in self.config['general']:
189 schema_dir = self.config['general']['schema-dir']
190 if 'sources-schema' in self.my_config:
191 schema = schema_dir + '/' + self.my_config['sources-schema']
192 self.eval_sql_file(schema, self.my_config)
193 else:
194 raise Exception("'packages-schema' not specified for source " + self.source)
195 else:
196 raise Exception("'schema-dir' not specified")
197
198 def print_warnings(self):
199 for key in sources_gatherer.warned_about:
200 print "[Sources] Unknown key %s appeared %d times" % (key, sources_gatherer.warned_about[key])

  ViewVC Help
Powered by ViewVC 1.1.5