2 # Written by Alec Warner for the Gentoo Foundation 2008
3 # This code is hereby placed into the public domain.
5 """Craws an ebuild repository for local use flags and generates documentation.
7 This module attempts to read metadata.xml files in an ebuild repository and
8 uses the <flag> xml tags to generate a set of documentation for local USE
11 It is a non-goal of this script to validate XML contents.
14 TEXT, <pkg>, <pkg>, TEXT. is difficult to parse into text and requires icky
15 rules; see _GetTextFromNode for the nasty details.
18 __author__ = "Alec Warner <antarus@gentoo.org>"
27 from xml.dom import minidom
28 from xml.parsers import expat
30 METADATA_XML = 'metadata.xml'
33 class RepositoryError(Exception):
34 """Basic Exception for repository problems."""
38 def FindMetadataFiles(repo_path, category_path, output=sys.stdout):
39 """Locate metadata files in repo_path.
42 repo_path: path to repository.
43 category_path: path to a category file (None is ok).
44 output: file-like object to write output to.
47 Raises; RepositoryError.
50 profile_path = os.path.join(repo_path, 'profiles')
51 logging.info('path to profile is: %s' % profile_path)
52 categories = GetCategories(profile_path, category_path)
54 for cat in categories:
55 cat_path = os.path.join(repo_path, cat)
56 logging.debug('path to category %s is %s' % (cat, cat_path))
58 tmp_pkgs = GetPackages(cat_path)
60 if e.errno == errno.ENOENT:
61 logging.error('skipping %s because it was not in %s' % (cat,
63 pkg_paths = [os.path.join(cat_path, pkg) for pkg in tmp_pkgs]
64 packages.extend(pkg_paths)
67 for num, pkg_path in enumerate(packages):
68 metadata_path = os.path.join(pkg_path, METADATA_XML)
69 logging.info('processing %s (%s/%s)' % (metadata_path, num, total))
70 f = open(metadata_path, 'rb')
71 metadata = GetLocalFlagInfoFromMetadataXml(f)
72 pkg_split = pkg_path.split('/')
73 for k, v in metadata.iteritems():
74 output.write('%s/%s:%s - %s\n' % (pkg_split[-2] ,pkg_split[-1], k, v))
77 def _GetTextFromNode(node):
78 """Given an XML node, try to turn all it's children into text.
81 node: a Node instance.
85 This function has a few tweaks 'children' and 'base_children' which attempt
86 to aid the parser in determining where to insert spaces. Nodes that have
87 no children are 'raw text' nodes that do not need spaces. Nodes that have
88 children are 'complex' nodes (often <pkg> nodes) that usually require a
89 trailing space to ensure sane output.
91 NOTE: The above comment is now bullocks as the regex handles spacing; I may
92 remove the 'children' crap in a future release but it works for now.
94 Strip out \n and \t as they are not valid in use.local.desc.
101 whitespace = re.compile('\s+')
102 data = whitespace.sub(' ', data)
103 return (data, children)
107 for child in node.childNodes:
108 child_desc, children = _GetTextFromNode(child)
110 return (desc, base_children)
113 def GetLocalFlagInfoFromMetadataXml(metadata_file):
114 """Determine use.local.desc information from metadata files.
117 metadata_file: a file-like object holding metadata.xml
123 dom_tree = minidom.parseString(metadata_file.read())
124 except expat.ExpatError, e:
125 logging.error('%s (in file: %s)' % (e, metadata_file))
128 flag_tags = dom_tree.getElementsByTagName('flag')
129 for flag in flag_tags:
130 use_flag = flag.getAttribute('name')
131 desc, unused_children = _GetTextFromNode(flag)
138 def GetPackages(cat_path):
139 """Return a list of packages for a given category."""
141 files = os.listdir(cat_path)
142 func = lambda f: f != METADATA_XML and f != 'CVS' and f != '.svn'
143 files = filter(func, files)
147 def GetCategories(profile_path, categories_path):
148 """Return a set of categories for a given repository.
151 profile_path: path to profiles/ dir of a repository.
153 a list of categories.
154 Raises: RepositoryError.
157 if not categories_path:
158 categories_path = os.path.join(profile_path, 'categories')
160 f = open(categories_path, 'rb')
161 except (IOError, OSError), e:
162 raise RepositoryError('Problem while opening %s: %s' % (
164 categories = [cat.strip() for cat in f.readlines()]
169 """Simple Option Parsing."""
171 parser = optparse.OptionParser()
172 parser.add_option('-r', '--repo_path', help=('path to repository from '
173 'which the documentation will be generated.'))
174 parser.add_option('-c', '--category_file', help=('path to a category',
175 'file if repo_path lacks a profile/category file'))
177 opts, unused_args = parser.parse_args()
179 if not opts.repo_path:
180 parser.error('--repo_path is a required option')
182 logging.debug('REPO_PATH is %s' % opts.repo_path)
184 return (opts, unused_args)
189 opts, unused_args = GetOpts()
190 FindMetadataFiles(opts.repo_path, opts.category_file)
193 if __name__ == '__main__':