2 # Written by Alec Warner for the Gentoo Foundation 2008
3 # This code is hereby placed into the public domain.
5 """Craws an ebuild repository for local use flags and generates documentation.
7 This module attempts to read metadata.xml files in an ebuild repository and
8 uses the <flag> xml tags to generate a set of documentation for local USE
11 It is a non-goal of this script to validate XML contents.
14 TEXT, <pkg>, <pkg>, TEXT. is difficult to parse into text and requires icky
15 rules; see _GetTextFromNode for the nasty details.
18 __author__ = "Alec Warner <antarus@gentoo.org>"
27 from xml.dom import minidom
28 from xml.parsers import expat
30 METADATA_XML = 'metadata.xml'
33 class RepositoryError(Exception):
34 """Basic Exception for repository problems."""
38 def FindMetadataFiles(repo_path, category_path, output=sys.stdout):
39 """Locate metadata files in repo_path.
42 repo_path: path to repository.
43 category_path: path to a category file (None is ok).
44 output: file-like object to write output to.
47 Raises; RepositoryError.
50 profile_path = os.path.join(repo_path, 'profiles')
51 logging.info('path to profile is: %s' % profile_path)
52 categories = GetCategories(profile_path, category_path)
54 for cat in categories:
55 cat_path = os.path.join(repo_path, cat)
56 logging.debug('path to category %s is %s' % (cat, cat_path))
58 tmp_pkgs = GetPackages(cat_path)
60 if e.errno == errno.ENOENT:
61 logging.error('skipping %s because it was not in %s' % (cat,
63 pkg_paths = [os.path.join(cat_path, pkg) for pkg in tmp_pkgs]
64 packages.extend(pkg_paths)
67 for num, pkg_path in enumerate(packages):
68 metadata_path = os.path.join(pkg_path, METADATA_XML)
69 logging.info('processing %s (%s/%s)' % (metadata_path, num, total))
71 f = open(metadata_path, 'rb')
73 if e.errno == errno.ENOENT:
74 logging.error('Time to shoot the maintainer: %s does not contain a metadata.xml!' % (pkg_path))
77 # remember to re-raise if it's not a missing file
79 metadata = GetLocalFlagInfoFromMetadataXml(f)
80 pkg_split = pkg_path.split('/')
81 for k, v in metadata.iteritems():
83 output.write('%s/%s:%s - %s\n' % (pkg_split[-2] ,pkg_split[-1], k, v))
84 except UnicodeEncodeError, e:
85 logging.error('Unicode found in %s, not generating to output' % (pkg_path))
88 def _GetTextFromNode(node):
89 """Given an XML node, try to turn all it's children into text.
92 node: a Node instance.
96 This function has a few tweaks 'children' and 'base_children' which attempt
97 to aid the parser in determining where to insert spaces. Nodes that have
98 no children are 'raw text' nodes that do not need spaces. Nodes that have
99 children are 'complex' nodes (often <pkg> nodes) that usually require a
100 trailing space to ensure sane output.
102 NOTE: The above comment is now bullocks as the regex handles spacing; I may
103 remove the 'children' crap in a future release but it works for now.
105 Strip out \n and \t as they are not valid in use.local.desc.
110 data = node.nodeValue
112 whitespace = re.compile('\s+')
113 data = whitespace.sub(' ', data)
114 return (data, children)
118 for child in node.childNodes:
119 child_desc, children = _GetTextFromNode(child)
121 return (desc, base_children)
124 def GetLocalFlagInfoFromMetadataXml(metadata_file):
125 """Determine use.local.desc information from metadata files.
128 metadata_file: a file-like object holding metadata.xml
134 dom_tree = minidom.parseString(metadata_file.read())
135 except expat.ExpatError, e:
136 logging.error('%s (in file: %s)' % (e, metadata_file))
139 flag_tags = dom_tree.getElementsByTagName('flag')
140 for flag in flag_tags:
141 use_flag = flag.getAttribute('name')
142 desc, unused_children = _GetTextFromNode(flag)
149 def GetPackages(cat_path):
150 """Return a list of packages for a given category."""
152 files = os.listdir(cat_path)
153 func = lambda f: f != METADATA_XML and f != 'CVS' and f != '.svn'
154 files = filter(func, files)
158 def GetCategories(profile_path, categories_path):
159 """Return a set of categories for a given repository.
162 profile_path: path to profiles/ dir of a repository.
164 a list of categories.
165 Raises: RepositoryError.
168 if not categories_path:
169 categories_path = os.path.join(profile_path, 'categories')
171 f = open(categories_path, 'rb')
172 except (IOError, OSError), e:
173 raise RepositoryError('Problem while opening %s: %s' % (
175 categories = [cat.strip() for cat in f.readlines()]
180 """Simple Option Parsing."""
182 parser = optparse.OptionParser()
183 parser.add_option('-r', '--repo_path', help=('path to repository from '
184 'which the documentation will be generated.'))
185 parser.add_option('-c', '--category_file', help=('path to a category',
186 'file if repo_path lacks a profile/category file'))
188 opts, unused_args = parser.parse_args()
190 if not opts.repo_path:
191 parser.error('--repo_path is a required option')
193 logging.debug('REPO_PATH is %s' % opts.repo_path)
195 return (opts, unused_args)
200 opts, unused_args = GetOpts()
201 FindMetadataFiles(opts.repo_path, opts.category_file)
204 if __name__ == '__main__':