/[collab-qa]/udd/udd/ftpnew_gatherer.py
ViewVC logotype

Diff of /udd/udd/ftpnew_gatherer.py

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1407 by tille, Mon Mar 16 06:35:13 2009 UTC revision 1408 by tille, Mon Mar 16 10:21:12 2009 UTC
# Line 20  from psycopg2 import IntegrityError Line 20  from psycopg2 import IntegrityError
20  def get_gatherer(connection, config, source):  def get_gatherer(connection, config, source):
21    return ftpnew_gatherer(connection, config, source)    return ftpnew_gatherer(connection, config, source)
22    
23    DEBUG=0
24    
25  # When parsing src html pages we have to get rid of certain html strings  # When parsing src html pages we have to get rid of certain html strings
26  def de_html(string):  def de_html(string):
27    string= re.sub("</?span[^>]*>", "", string)    string= re.sub("</?span[^>]*>", '',  string)
28    string= re.sub("&lt;", "<", string)    string= re.sub("&quot;",        '"', string)
29    string= re.sub("&gt;", ">", string)    string= re.sub("&amp;",         '&', string)
30    string= re.sub("</?pre>", "", string)    string= re.sub("&lt;",          '<', string)
31      string= re.sub("&gt;",          '>', string)
32      string= re.sub("</?pre>",       '',  string)
33    return string    return string
34    
35  # These fields are not forewarded to UDD tables for the moment  # These fields are not forewarded to UDD tables for the moment
# Line 145  class ftpnew_gatherer(gatherer): Line 149  class ftpnew_gatherer(gatherer):
149        cur.execute(query)        cur.execute(query)
150        in_udd = cur.fetchone()[0]        in_udd = cur.fetchone()[0]
151        if in_udd:        if in_udd:
152          print >>stderr, "Binary package %s is %i times in UDD - no interest in just known binaries (queue = %s)" \          if DEBUG != 0:
153                       % (value, int(in_udd), queue)            print >>stderr, "Binary package %s is %i times in UDD - no interest in just known binaries (queue = %s)" \
154                         % (value, int(in_udd), queue)
155          return 1          return 1
156      return 0      return 0
157    
# Line 201  class ftpnew_gatherer(gatherer): Line 206  class ftpnew_gatherer(gatherer):
206        cur.execute(query)        cur.execute(query)
207        in_udd = cur.fetchone()[0]        in_udd = cur.fetchone()[0]
208        if in_udd:        if in_udd:
209          print >>stderr, "%s is %i times in UDD - no interest in just known sources (queue = %s)" \          if DEBUG != 0:
210              % (srcpkg.s['Source'], int(in_udd), srcpkg.s['Source'])            print >>stderr, "%s is %i times in UDD - no interest in just known sources (queue = %s)" \
211                              % (srcpkg.s['Source'], int(in_udd), srcpkg.s['Source'])
212          continue          continue
213    
214        src_info_base = srcpkg.s['Source'] + '_' + srcpkg.s['Version']        src_info_base = srcpkg.s['Source'] + '_' + srcpkg.s['Version']
# Line 249  class ftpnew_gatherer(gatherer): Line 255  class ftpnew_gatherer(gatherer):
255              print >>srco, "%s: %s" % (field, value)              print >>srco, "%s: %s" % (field, value)
256            elif field == 'Description':            elif field == 'Description':
257              if in_source:              if in_source:
258                srcpkg.s[field]  = value                srcpkg.s[field]  = de_html(value)
259              else:              else:
260                binpkg.b[field]  = value                binpkg.b[field]  = de_html(value)
261              print >>srco, "%s: %s" % (field, value)              print >>srco, "%s: %s" % (field, value)
262            elif field == 'Architecture':            elif field == 'Architecture':
263              if in_source:              if in_source:
# Line 368  class ftpnew_gatherer(gatherer): Line 374  class ftpnew_gatherer(gatherer):
374            if match:            if match:
375              if match.groups()[0][0] != ' ':              if match.groups()[0][0] != ' ':
376                description += ' '                description += ' '
377              description += match.groups()[0]              description += de_html(match.groups()[0])
378              in_description = 0              in_description = 0
379              if not in_source: # binpkg and binpkg.b:              if not in_source: # binpkg and binpkg.b:
380                binpkg.b['Description']      = description                (binpkg.b['Description'], binpkg.b['Long_Description']) = description.split("\n",1)
381                binpkg.b['Long_Description'] = description.split("\n",1)[1]                print >>srco, "Description: %s\n%s" % (binpkg.b['Description'], binpkg.b['Long_Description'])
             print >>srco, "Description: %s\n" % (description)  
382            else:            else:
383              if line[0] != ' ':              if line[0] != ' ':
384                description += ' '                description += ' '
385              description += line              description += de_html(line)
386          else:          else:
387            match = ftpnew_gatherer.src_html_has_description_start_re.match(line)            match = ftpnew_gatherer.src_html_has_description_start_re.match(line)
388            if match:            if match:
389              in_description = 1              in_description = 1
390              description = match.groups()[0] + "\n"              description = de_html(match.groups()[0]) + "\n"
391        srci.close()        srci.close()
392        srco.close()        srco.close()
393  #        cur.execute("EXECUTE ftpnew_insert (%s, %s, %s, %s)"\  #        cur.execute("EXECUTE ftpnew_insert (%s, %s, %s, %s)"\

Legend:
Removed from v.1407  
changed lines
  Added in v.1408

  ViewVC Help
Powered by ViewVC 1.1.5