#!/usr/bin/env python # # macroscope -- generate reports on WML macro and resource usage # # By Eric S. Raymond April 2007. # (Yes, this *is* named after an ancient Piers Anthony novel.) # # This tool cross-references macro definitions with macro calls, and # resource (sound or image) files with uses of the resources in WML. # and generates various useful reports from such cross-references. # # # It takes a list of directories as arguments; if none is given, it # behaves as though the current directory had been specified as a # single argument. Each directory is treated as a separate domain for # macro and resource visibility purposes, except that macros and resources # under the first directory are made visible in all later ones. (Typically # the first directory should point at a copy of mainline and all later # ones at UMC.) # # The checking done by this tool has a couple of flaws: # # (1) It doesn't actually evaluate file inclusions. Instead, any # macro definition satisfies any macro call made under the same # directory. Exception: when an #undef is detected, the macro is # tagged local and not visible outside the span of lines where it's # defined. # # (2) It doesn't read [binary_path] tags, as this would require # implementing a WML parser. Instead, it assumes that a resource-file # reference can be satisfied by any matching image file from anywhere # in the same directory it came from. The resources under the *first* # directory argument (only) are visible everywhere. # # (3) A reference with embedded {}s in a macro will have the macro's # formal args substituted in at WML evaluation time. Instead, this # tool treats each {} as a .* wildcard and considers the reference to # match *every* resource filename that matches that pattern. Under # appropriate circumstances this might report a resource filename # statically matching the pattern as having been referenced even # though none of the actual macro calls would actually generate it. # # (4) There are some implicit references. Notably, if an attack name # is specified but no icon is given, the attack icon will default to # a name generated from the attack name, # # Problems (1) and (2) imply that this tool might conceivably report # that a reference has been satisfied when under actual # WML-interpreter rules it has not. Problem (4) means the reverse # can also occur. # # The reporting format is compatible with GNU Emacs compile mode. import sys, os, time, re, getopt, sre_constants, md5 resource_extensions = ("png", "jpg", "ogg", "wav") def htmlize(line): "HTML-escape a text line" return line.replace("<", "<").replace(">", ">").replace("&", "&") def interpret(lines, css): "Interpret the ! convention for .cfg comments." inlisting = False outstr = '
' % css for line in lines: line = line.rstrip() if not inlisting and not line: outstr += "
" continue if not inlisting and line[0] == '!': outstr += "
\n" inlisting = True bracketdepth = curlydepth = 0 line = htmlize(line) if inlisting: outstr += line[1:] + "\n" else: outstr += line + "\n" if inlisting: if line and line[0] != '!': outstr += "\n
" inlisting = False if not inlisting: outstr += "
\n" else: outstr += "\n" outstr = outstr.replace("", "") outstr = outstr.replace("\n\n", "\n") return outstr class Forest: "Return an iterable directory forest object." def __init__(self, dirpath, exclude=None): "Get the names of all files under dirpath, ignoring .svn directories." self.forest = [] self.dirpath = dirpath for dir in dirpath: subtree = [] if os.path.isdir(dir): # So we skip .cfgs in a UMC mirror os.path.walk(dir, lambda arg, dir, names: subtree.extend(map(lambda x: os.path.normpath(os.path.join(dir, x)), names)), None) self.forest.append(subtree) for i in range(len(self.forest)): self.forest[i] = filter(lambda x: ".svn" not in x, self.forest[i]) self.forest[i] = filter(lambda x: not os.path.isdir(x), self.forest[i]) if exclude: self.forest[i] = filter(lambda x: not re.search(exclude, x), self.forest[i]) self.forest[i] = filter(lambda x: not x.endswith("-bak"), self.forest[i]) # Compute cliques (will be used later for visibility checks) self.clique = {} counter = 0 for tree in self.forest: for filename in tree: self.clique[filename] = counter counter += 1 def parent(self, filename): "Return the directory root that caused this path to be included." return self.dirpath[self.clique[filename]] def neighbors(self, fn1, fn2): "Are two files from the same tree?" return self.clique[fn1] == self.clique[fn2] def flatten(self): allfiles = [] for tree in self.forest: allfiles += tree return allfiles def generator(self): "Return a generator that walks through all files." for tree in self.forest: for filename in tree: yield filename def iswml(filename): "Is the specified filename WML?" return filename.endswith(".cfg") def isresource(filename): "Is the specifired name a resource?" (root, ext) = os.path.splitext(filename) return ext and ext[1:] in resource_extensions class reference: "Describes a location by file and line." def __init__(self, filename, lineno=None, docstring=None): self.filename = filename self.lineno = lineno self.references = {} self.docstring = docstring self.undef = None def append(self, fn, n): if fn not in self.references: self.references[fn] = [] self.references[fn].append(n+1) def dump_references(self): for (file, linenumbers) in self.references.items(): print " %s: %s" % (file, `linenumbers`[1:-1]) def __cmp__(self, other): "Compare two documentation objects for place in the sort order." # Major sort by file, minor by line number. This presumes that the # files correspond to coherent topics and gives us control of the # sequence. byfile = cmp(self.filename, other.filename) if byfile: return byfile else: return cmp(self.lineno, other.lineno) def __str__(self): if self.lineno: return '"%s", line %d' % (self.filename, self.lineno) else: return self.filename class CrossRef: macro_reference = re.compile(r"\{([A-Z_][A-Z0-9_:]*[A-Za-z0-9_])\b") file_reference = re.compile(r"[A-Za-z0-9{}.][A-Za-z0-9_/+{}.-]*\.(" + "|".join(resource_extensions) + ")") def mark_matching_resources(self, pattern, fn, n): "Mark all definitions matching a specified pattern with a reference." pattern = pattern.replace("+", r"\+") try: pattern = re.compile(os.sep + pattern + "$") except sre_constants.error: print >>sys.stderr, "macroscope: confused by %s" % pattern return None key = None for trial in self.fileref: if pattern.search(trial) and self.visible_from(trial, fn, n): key = trial self.fileref[key].append(fn, n) return key def visible_from(self, defn, fn, n): "Is specified definition visible from the specified file and line?" if type(defn) == type(""): defn = self.fileref[defn] if defn.undef != None: # Local macros are only visible in the file where they were defined # FIXME: we should check line spans here. return defn.filename == fn elif defn.filename in self.filelist.forest[0]: # Macros in the first subtree are visible everywhere. return True elif not self.filelist.neighbors(defn.filename, fn): # Otherwise, must be in the same subtree. return False else: # If the two files are in the same subtree, assume visibility. # This doesn't match the actual preprocessor semantics. # It means any macro without an undef is visible anywhere in the # same argument directory. # # We can't do better than this without a lot of hairy graph- # coloring logic to simulate include path interpretation. # If that logic ever gets built, it will go here. return True def __init__(self, dirpath, exclude="", warnlevel=0): "Build cross-reference object from the specified filelist." self.dirpath = dirpath self.filelist = Forest(dirpath, exclude) self.xref = {} self.fileref = {} self.noxref = False for filename in self.filelist.generator(): if warnlevel > 1: print filename + ":" if isresource(filename): self.fileref[filename] = reference(filename) elif iswml(filename): # It's a WML file, scan for macro defitions dfp = open(filename) state = "outside" for (n, line) in enumerate(dfp): if warnlevel > 1: print `line`[1:-1] if line.strip().startswith("#define"): tokens = line.split() name = tokens[1] here = reference(filename, n+1, line) here.hash = md5.new() here.docstring = line.lstrip()[8:] # Strip off #define_ state = "macro_header" continue elif state != 'outside' and line.strip().endswith("#enddef"): here.hash.update(line) here.hash = here.hash.digest() if name in self.xref: for defn in self.xref[name]: if not self.visible_from(defn, filename, n): continue elif defn.hash != here.hash: print >>sys.stderr, \ "%s: overrides different %s definition at %s" \ % (here, name, defn) elif warnlevel > 0: print >>sys.stderr, \ "%s: duplicates %s definition at %s" \ % (here, name, defn) if name not in self.xref: self.xref[name] = [] self.xref[name].append(here) state = "outside" elif state == "macro_header" and line and line[0] != "#": state = "macro_body" if state == "macro_header": here.docstring += line[1:] if state in ("macro_header", "macro_body"): here.hash.update(line) elif line.strip().startswith("#undef"): tokens = line.split() name = tokens[1] if name in self.xref and self.xref[name]: self.xref[name][-1].undef = n else: print "%s: unbalanced #undef on %s" \ % (reference(filename, n), name) dfp.close() elif filename.endswith(".def"): # It's a list of names to be considered defined self.noxref = True dfp = open(filename) for line in dfp: self.xref[line.strip()] = True dfp.close() # Next, decorate definitions with all references from the filelist. self.unresolved = [] self.missing = [] formals = [] for fn in self.filelist.generator(): if iswml(fn): rfp = open(fn) for (n, line) in enumerate(rfp): if line.startswith("#define"): formals = line.split()[2:] elif line.startswith("#enddef"): formals = [] if '#' in line: line = line.split('#')[0] if not line: continue # Find references to macros for match in re.finditer(CrossRef.macro_reference, line): name = match.group(1) candidates = 0 if name in formals: continue elif name in self.xref: for defn in self.xref[name]: if self.visible_from(defn, fn, n+1): candidates += 1 defn.append(fn, n+1) if candidates > 1: print "%s: more than one definition of %s is visible here." % (reference(fn, n), name) if candidates == 0: self.unresolved.append((name, reference(fn,n+1))) # Find references to resource files for match in re.finditer(CrossRef.file_reference, line): name = match.group(0) key = None # If name is already in our resource list, it's easy. if name in self.fileref and self.visible_from(name, fn, n): self.fileref[trial].append(fn, n+1) continue # If the name contains subtitutable parts, count # it as a reference to everything the substitutions # could potentially match. elif '{' in name: pattern = re.sub(r"\{[^}]*\}", '.*', name) key = self.mark_matching_resources(pattern, fn,n+1) if key: self.fileref[key].append(fn, n+1) else: candidates = [] for trial in self.fileref: if trial.endswith(os.sep + name) and self.visible_from(trial, fn, n): key = trial self.fileref[trial].append(fn, n+1) candidates.append(trial) if len(candidates) > 1: print "%s: more than one definition of %s is visible here (%s)." % (reference(fn, n), name, ", ".join(candidates)) if not key: self.missing.append((name, reference(fn,n+1))) rfp.close() def xrefdump(self, pred=None): "Report resolved macro references." for name in self.xref: for defn in self.xref[name]: if pred and not pred(name, defn): continue defn.dump_references() if defn.undef: type = "local" else: type = "global" nrefs = len(defn.references) if nrefs == 0: print "%s: %s macro %s is unused" % (defn, type, name) else: print "%s: %s macro %s is used in %d files:" % (defn, type, name, nrefs) for (name, defloc) in self.fileref.items(): if pred and not pred(name, defloc): continue nrefs = len(defloc.references) if nrefs == 0: print "Resource %s is unused" % defloc else: print "Resource %s is used in %d files:" % (defloc, nrefs) defloc.dump_references() def unresdump(self): "Report unresolved references." if len(self.unresolved) == 0 and len(self.missing) == 0: print "# No unresolved references" else: #print self.fileref.keys() print "# Unresolved references:" for (name, reference) in self.unresolved + self.missing: print "%s -> %s" % (reference, name) def deflist(self, pred=None): "List all resource definitions." for name in self.xref: for defn in self.xref[name]: if not pred or pred(name, defn): print name for (name, defloc) in self.fileref.items(): if not pred or pred(name, defloc): print name def extracthelp(self, pref, fp): "Deliver all macro help comments in HTML form." # Bug: finds only the first definition of each macro in scope. doclist = self.xref.keys() doclist = filter(lambda x: self.xref[x][0].docstring.count("\n") > 1, doclist) doclist.sort(lambda x, y: cmp(self.xref[x][0], self.xref[y])) outstr = "" filename = None counted = 0 for name in doclist: entry = self.xref[name][0] if entry.filename != filename: if counted: outstr += "\n" counted += 1 filename = entry.filename if filename.startswith(pref): displayname = filename[len(pref):] else: displayname = filename outstr += "