560d5077e3a4a956134ac31f66952ba09d6c7bc1
[collab-maint/debian-horoscope.git] / horrorscope
1 #!/usr/bin/python
3 # TODO:
4 #  - Filter packages by tags (for example, keep only role::program packages)
5 #  - Go from packages to tags
6 #  - Build a tag cloud or a word cloud
7 #  - Extract significant phrases from significant package descriptions
9 # wget http://popcon.debian.org/by_inst.gz
11 import gzip
12 import math
13 import os.path
14 import apt
15 import sys
16 from debian import debtags
17 from optparse import OptionParser
18 from collections import Counter
20 if not os.path.exists('/var/lib/debtags/package-tags'):
21     sys.stderr.write("Sorry, /var/lib/debtags/package-tags doesn't exist and is required")
22     sys.exit(2)
24 class Parser(OptionParser):
25     def __init__(self, *args, **kwargs):
26         OptionParser.__init__(self, *args, **kwargs)
28     def error(self, msg):
29         sys.stderr.write("%s: error: %s\n\n" % (self.get_prog_name(), msg))
30         self.print_help(sys.stderr)
31         sys.exit(2)
33 parser = Parser(usage="usage: %prog [options] filename",
34         version="%prog 0.1",
35         description="Count of packages grouped by implementation language")
37 (opts, args) = parser.parse_args()
39 horror_urls = {
40     "implemented-in::c-sharp": "http://www.fsf.org/news/dont-depend-on-mono",
41     "implemented-in::ecmascript": "http://wiki.theory.org/YourLanguageSucks#JavaScript_sucks_because:",
42     "implemented-in::java": "http://wiki.theory.org/YourLanguageSucks#Java_sucks_because:",
43     "implemented-in::objc": "http://wiki.theory.org/YourLanguageSucks#Objective-C_sucks_because:",
44     "implemented-in::php": "http://me.veekun.com/blog/2012/04/09/php-a-fractal-of-bad-design/",
45     "implemented-in::ruby": "http://wiki.theory.org/YourLanguageSucks#Ruby_sucks_because:",
46     "implemented-in::tcl": "http://wiki.tcl.tk/640",
47 }
50 def parse_vote(file):
51     """
52     Parse a popcon vote file, generating the names of the valid packages in
53     the vote
54     """
55     for line in file:
56         if line.startswith("POPULARITY"):
57             continue
58         elif line.startswith("END-POPULARITY"):
59             continue
60         else:
61             data = line[:-1].split(" ")
62             if len(data) < 4:
63                 continue
64             if data[3] == '<NOFILES>':
65                 # Empty/virtual packages
66                 yield data[2], 0.1
67             elif len(data) == 4:
68                 # Used packages
69                 yield data[2], 1.
70             elif data[4] == '<OLD>':
71                 # Unused packages
72                 yield data[2], 0.3
73             elif data[4] == '<RECENT-CTIME>':
74                 # Recently installed packages
75                 yield data[2], 0.8
78 apt_cache = apt.Cache()
79 by_lang = Counter()
81 with open("/var/log/popularity-contest") as infd:
82     db = debtags.DB()
83     db.read(open('/var/lib/debtags/package-tags'))
84     for t in db.iter_tags():
85         if t.startswith("implemented-in::"):
86             by_lang[t] = 0
88     for pkg, tf in parse_vote(infd):
89         if not pkg in apt_cache: continue
90         aptpkg = apt_cache[pkg]
91         if not aptpkg.installed: continue
92         tags = db.tags_of_package(pkg)
94         if args:
95             for t in tags:
96                 if t == args[0]:
97                     print pkg
98                     break
99         else:
100             for t in tags:
101                 if t.startswith("implemented-in::"):
102                     by_lang[t] += 1
104 if not args:
105     for k, v in sorted(by_lang.iteritems()):
106         if v == 0:
107             comment = "WIN!"
108         else:
109             comment = horror_urls.get(k, "")
110         print "%4d %-30s %s" % (v, k, comment)