1 #!/usr/bin/python
2 #emacs: -*- mode: python-mode; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*-
3 #ex: set sts=4 ts=4 sw=4 noet:
4 #------------------------- =+- Python script -+= -------------------------
5 """
6 Yaroslav Halchenko Dartmouth
7 web: http://www.onerussian.com College
8 e-mail: yoh@onerussian.com ICQ#: 60653192
10 DESCRIPTION (NOTES):
12 Extracts References: field from debian/copyright files...
14 COPYRIGHT:
15 2010, Yaroslav Halchenko
16 2010, Michael Hanke
18 LICENSE: MIT
20 Permission is hereby granted, free of charge, to any person obtaining a copy
21 of this software and associated documentation files (the "Software"), to deal
22 in the Software without restriction, including without limitation the rights
23 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
24 copies of the Software, and to permit persons to whom the Software is
25 furnished to do so, subject to the following conditions:
27 The above copyright notice and this permission notice shall be included in
28 all copies or substantial portions of the Software.
30 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
31 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
32 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
33 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
34 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
35 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
36 THE SOFTWARE.
37 """
38 #-----------------\____________________________________/------------------
40 __version__ = '0.0.1'
42 DPKG_DOC_PATH = '/usr/share/doc/%s/copyright'
44 def get_references(filename):
45 """Extracts References: field from machine-readable copyright file
46 """
47 try:
48 f = open(filename, 'r')
49 except IOError, e:
50 # actually probably just let it go
51 raise e
53 # Skip empty or starting with #
54 for line in f:
55 if not ((not line.strip()) or line.startswith('#')):
56 break
57 if not line:
58 return None
60 # TODO: could we rely on having Name?
61 # nope -- it is optional... but still we could use it I guess
63 # Check if it is in machine readable format
64 if not line.lstrip().lower().startswith('format-specification'):
65 return None
67 for line in f:
68 if not ':' in line:
69 continue
70 ref = line[line.index(':') + 1:].strip(' ') # we might like to include
71 # leftover on the 1st line
72 if line.lower().startswith('references'):
73 # We got our hit -- lets get those lines
74 for line_ in f:
75 #print '>%s<' % line_
76 if (not len(line_) or line_[0] in [' ', '\t']):
77 ref += line_
78 else:
79 break
80 # and leave
81 return ref.lstrip()
83 return None # we saw nothing
85 # Collect all references
86 def get_pkgs_references(pkgs):
87 # we start with reverse references so we could accumulate for
88 # multiple packages having the same references. We can't rely on
89 # Name since it is optional
90 rev_refs = {}
91 for p in pkgs:
92 # XXX ugly, just for now
93 # if not a path, only then assume package name
94 if not p.startswith('/'):
95 filename = DPKG_DOC_PATH % p
96 else:
97 filename = p
98 ref = get_references(filename)
99 if not ref: # skip missing ones
100 continue
101 pkgs = rev_refs.pop(ref, ())
102 rev_refs[ref] = pkgs + (p,)
104 # return regular dictionary which goes from list of pkgs to references
105 return dict([(pkgs, r) for (r, pkgs) in rev_refs.iteritems()])
107 def output_as_bibtex(refs):
108 """Just to output collected refs as BibTex
109 """
110 pkgss = sorted(refs.keys())
111 for pkgs in pkgss:
112 print "Packages: %s\n%s" % (', '.join(set(pkgs)), refs[pkgs])
114 if __name__ == '__main__':
115 #refs = get_pkgs_references(['lipsia', 'lipsia-doc', 'fsl'])
116 #output_as_bibtex(refs)
118 import glob
119 gs = glob.glob('/usr/share/doc/*/copyright')
120 output_as_bibtex(get_pkgs_references(gs))
121 # %timeit get_pkgs_references(gs)
