| 20 |
def get_gatherer(connection, config, source): |
def get_gatherer(connection, config, source): |
| 21 |
return ftpnew_gatherer(connection, config, source) |
return ftpnew_gatherer(connection, config, source) |
| 22 |
|
|
| 23 |
|
DEBUG=0 |
| 24 |
|
|
| 25 |
# When parsing src html pages we have to get rid of certain html strings |
# When parsing src html pages we have to get rid of certain html strings |
| 26 |
def de_html(string): |
def de_html(string): |
| 27 |
string= re.sub("</?span[^>]*>", "", string) |
string= re.sub("</?span[^>]*>", '', string) |
| 28 |
string= re.sub("<", "<", string) |
string= re.sub(""", '"', string) |
| 29 |
string= re.sub(">", ">", string) |
string= re.sub("&", '&', string) |
| 30 |
string= re.sub("</?pre>", "", string) |
string= re.sub("<", '<', string) |
| 31 |
|
string= re.sub(">", '>', string) |
| 32 |
|
string= re.sub("</?pre>", '', string) |
| 33 |
return string |
return string |
| 34 |
|
|
| 35 |
# These fields are not forewarded to UDD tables for the moment |
# These fields are not forewarded to UDD tables for the moment |
| 149 |
cur.execute(query) |
cur.execute(query) |
| 150 |
in_udd = cur.fetchone()[0] |
in_udd = cur.fetchone()[0] |
| 151 |
if in_udd: |
if in_udd: |
| 152 |
print >>stderr, "Binary package %s is %i times in UDD - no interest in just known binaries (queue = %s)" \ |
if DEBUG != 0: |
| 153 |
% (value, int(in_udd), queue) |
print >>stderr, "Binary package %s is %i times in UDD - no interest in just known binaries (queue = %s)" \ |
| 154 |
|
% (value, int(in_udd), queue) |
| 155 |
return 1 |
return 1 |
| 156 |
return 0 |
return 0 |
| 157 |
|
|
| 206 |
cur.execute(query) |
cur.execute(query) |
| 207 |
in_udd = cur.fetchone()[0] |
in_udd = cur.fetchone()[0] |
| 208 |
if in_udd: |
if in_udd: |
| 209 |
print >>stderr, "%s is %i times in UDD - no interest in just known sources (queue = %s)" \ |
if DEBUG != 0: |
| 210 |
% (srcpkg.s['Source'], int(in_udd), srcpkg.s['Source']) |
print >>stderr, "%s is %i times in UDD - no interest in just known sources (queue = %s)" \ |
| 211 |
|
% (srcpkg.s['Source'], int(in_udd), srcpkg.s['Source']) |
| 212 |
continue |
continue |
| 213 |
|
|
| 214 |
src_info_base = srcpkg.s['Source'] + '_' + srcpkg.s['Version'] |
src_info_base = srcpkg.s['Source'] + '_' + srcpkg.s['Version'] |
| 255 |
print >>srco, "%s: %s" % (field, value) |
print >>srco, "%s: %s" % (field, value) |
| 256 |
elif field == 'Description': |
elif field == 'Description': |
| 257 |
if in_source: |
if in_source: |
| 258 |
srcpkg.s[field] = value |
srcpkg.s[field] = de_html(value) |
| 259 |
else: |
else: |
| 260 |
binpkg.b[field] = value |
binpkg.b[field] = de_html(value) |
| 261 |
print >>srco, "%s: %s" % (field, value) |
print >>srco, "%s: %s" % (field, value) |
| 262 |
elif field == 'Architecture': |
elif field == 'Architecture': |
| 263 |
if in_source: |
if in_source: |
| 374 |
if match: |
if match: |
| 375 |
if match.groups()[0][0] != ' ': |
if match.groups()[0][0] != ' ': |
| 376 |
description += ' ' |
description += ' ' |
| 377 |
description += match.groups()[0] |
description += de_html(match.groups()[0]) |
| 378 |
in_description = 0 |
in_description = 0 |
| 379 |
if not in_source: # binpkg and binpkg.b: |
if not in_source: # binpkg and binpkg.b: |
| 380 |
binpkg.b['Description'] = description |
(binpkg.b['Description'], binpkg.b['Long_Description']) = description.split("\n",1) |
| 381 |
binpkg.b['Long_Description'] = description.split("\n",1)[1] |
print >>srco, "Description: %s\n%s" % (binpkg.b['Description'], binpkg.b['Long_Description']) |
|
print >>srco, "Description: %s\n" % (description) |
|
| 382 |
else: |
else: |
| 383 |
if line[0] != ' ': |
if line[0] != ' ': |
| 384 |
description += ' ' |
description += ' ' |
| 385 |
description += line |
description += de_html(line) |
| 386 |
else: |
else: |
| 387 |
match = ftpnew_gatherer.src_html_has_description_start_re.match(line) |
match = ftpnew_gatherer.src_html_has_description_start_re.match(line) |
| 388 |
if match: |
if match: |
| 389 |
in_description = 1 |
in_description = 1 |
| 390 |
description = match.groups()[0] + "\n" |
description = de_html(match.groups()[0]) + "\n" |
| 391 |
srci.close() |
srci.close() |
| 392 |
srco.close() |
srco.close() |
| 393 |
# cur.execute("EXECUTE ftpnew_insert (%s, %s, %s, %s)"\ |
# cur.execute("EXECUTE ftpnew_insert (%s, %s, %s, %s)"\ |