560d5077e3a4a956134ac31f66952ba09d6c7bc1
1 #!/usr/bin/python
3 # TODO:
4 # - Filter packages by tags (for example, keep only role::program packages)
5 # - Go from packages to tags
6 # - Build a tag cloud or a word cloud
7 # - Extract significant phrases from significant package descriptions
9 # wget http://popcon.debian.org/by_inst.gz
11 import gzip
12 import math
13 import os.path
14 import apt
15 import sys
16 from debian import debtags
17 from optparse import OptionParser
18 from collections import Counter
20 if not os.path.exists('/var/lib/debtags/package-tags'):
21 sys.stderr.write("Sorry, /var/lib/debtags/package-tags doesn't exist and is required")
22 sys.exit(2)
24 class Parser(OptionParser):
25 def __init__(self, *args, **kwargs):
26 OptionParser.__init__(self, *args, **kwargs)
28 def error(self, msg):
29 sys.stderr.write("%s: error: %s\n\n" % (self.get_prog_name(), msg))
30 self.print_help(sys.stderr)
31 sys.exit(2)
33 parser = Parser(usage="usage: %prog [options] filename",
34 version="%prog 0.1",
35 description="Count of packages grouped by implementation language")
37 (opts, args) = parser.parse_args()
39 horror_urls = {
40 "implemented-in::c-sharp": "http://www.fsf.org/news/dont-depend-on-mono",
41 "implemented-in::ecmascript": "http://wiki.theory.org/YourLanguageSucks#JavaScript_sucks_because:",
42 "implemented-in::java": "http://wiki.theory.org/YourLanguageSucks#Java_sucks_because:",
43 "implemented-in::objc": "http://wiki.theory.org/YourLanguageSucks#Objective-C_sucks_because:",
44 "implemented-in::php": "http://me.veekun.com/blog/2012/04/09/php-a-fractal-of-bad-design/",
45 "implemented-in::ruby": "http://wiki.theory.org/YourLanguageSucks#Ruby_sucks_because:",
46 "implemented-in::tcl": "http://wiki.tcl.tk/640",
47 }
50 def parse_vote(file):
51 """
52 Parse a popcon vote file, generating the names of the valid packages in
53 the vote
54 """
55 for line in file:
56 if line.startswith("POPULARITY"):
57 continue
58 elif line.startswith("END-POPULARITY"):
59 continue
60 else:
61 data = line[:-1].split(" ")
62 if len(data) < 4:
63 continue
64 if data[3] == '<NOFILES>':
65 # Empty/virtual packages
66 yield data[2], 0.1
67 elif len(data) == 4:
68 # Used packages
69 yield data[2], 1.
70 elif data[4] == '<OLD>':
71 # Unused packages
72 yield data[2], 0.3
73 elif data[4] == '<RECENT-CTIME>':
74 # Recently installed packages
75 yield data[2], 0.8
78 apt_cache = apt.Cache()
79 by_lang = Counter()
81 with open("/var/log/popularity-contest") as infd:
82 db = debtags.DB()
83 db.read(open('/var/lib/debtags/package-tags'))
84 for t in db.iter_tags():
85 if t.startswith("implemented-in::"):
86 by_lang[t] = 0
88 for pkg, tf in parse_vote(infd):
89 if not pkg in apt_cache: continue
90 aptpkg = apt_cache[pkg]
91 if not aptpkg.installed: continue
92 tags = db.tags_of_package(pkg)
94 if args:
95 for t in tags:
96 if t == args[0]:
97 print pkg
98 break
99 else:
100 for t in tags:
101 if t.startswith("implemented-in::"):
102 by_lang[t] += 1
104 if not args:
105 for k, v in sorted(by_lang.iteritems()):
106 if v == 0:
107 comment = "WIN!"
108 else:
109 comment = horror_urls.get(k, "")
110 print "%4d %-30s %s" % (v, k, comment)
