#!/usr/bin/env python # # macroscope -- generate reports on WML macro and resource usage # # By Eric S. Raymond April 2007. # (Yes, this *is* named after an ancient Piers Anthony novel.) # # This tool cross-references macro definitions with macro calls, and # resource (sound or image) files with uses of the resources in WML. # and generates various useful reports from such cross-references. # The checking done by this tool has a couple of flaws: # # (1) It doesn't actually evaluate file inclusions. Instead, any # macro definition from anywhere in the set of input trees can be used # to satisfy a macro call anywhere else. # # (2) It doesn't read [binary_path] tags, as this would require # implementing a WML parser. Instead, it assumes that a resource-file # reference can be satisfied by any matching image file from anywhere # in the set of input trees. # # (3) A reference with embedded {}s in a macro will have the macro's # formal args substituted in at WML evaluation time. Instead, this # tool treats each {} as a .* wildcard and considers the reference to # match *every* resource filename that matches that pattern. Under # appropriate circumstances this might report a resource filename # statically matching the pattern as having been referenced even # though none of the actual macro calls would actually generate it. # # (4) There are some implicit references. Notably, if an attack name # is specified but no icon is given, the attack icon will default to # a name generated from the attack name, # # Problems (1) and (2) imply that this tool might conceivably report # that a reference has been satisfied when under actual # WML-interpreter rules it has not. Problem (4) means the reverse # can also occur. # # The reporting format is compatible with GNU Emacs compile mode. import sys, os, time, re, getopt, sre_constants resource_extensions = ("png", "jpg", "ogg", "wav") def htmlize(line): "HTML-escape a text line" return line.replace("<", "<").replace(">", ">").replace("&", "&") def interpret(lines, css): "Interpret the ! convention for .cfg comments." inlisting = False outstr = '

' % css for line in lines: line = line.rstrip() if not inlisting and not line: outstr += "

" continue if not inlisting and line[0] == '!': outstr += "

\n
"
            inlisting = True
            bracketdepth = curlydepth = 0
        line = htmlize(line)
        if inlisting:
            outstr += line[1:] + "\n"
        else:
            outstr += line + "\n"
        if inlisting:
            if line and line[0] != '!':
                outstr += "
\n

" inlisting = False if not inlisting: outstr += "

\n" else: outstr += "\n" outstr = outstr.replace("

", "") outstr = outstr.replace("\n\n", "\n") return outstr def allfiles(dirpath, exclude): "Get the names of all files under dirpath, ignoring .svn directories." datafiles = [] for dir in dirpath: os.path.walk(dir, lambda arg, dir, names: datafiles.extend(map(lambda x: os.path.normpath(os.path.join(dir, x)), names)), None) datafiles = filter(lambda x: ".svn" not in x, datafiles) datafiles = filter(lambda x: not os.path.isdir(x), datafiles) if exclude: datafiles = filter(lambda x: not re.search(exclude, x), datafiles) datafiles = filter(lambda x: not x.endswith("-bak"), datafiles) return datafiles def iswml(filename): "Is the specified filename WML?" return filename.endswith(".cfg") class reference: "Describes a location by file and line." def __init__(self, filename, line=None, docstring=None): self.filename = filename self.line = line self.references = {} self.docstring = docstring def append(self, fn, n): if fn not in self.references: self.references[fn] = [] self.references[fn].append(n+1) def dump_references(self): for (file, linenumbers) in self.references.items(): print " %s: %s" % (file, `linenumbers`[1:-1]) def __cmp__(self, other): "Compare two documentation objects for place in the sort order." # Major sort by file, minor by line number. This presumes that the # files correspond to coherent topics and gives us control of the # sequence. byfile = cmp(self.filename, other.filename) if byfile: return byfile else: return cmp(self.line, other.line) def __str__(self): if self.line: return '"%s", line %d' % (self.filename, self.line) else: return self.filename class CrossRef: macro_reference = re.compile(r"\{([A-Z_][A-Z0-9_:]*[A-Za-z0-9_])\b") file_reference = re.compile(r"[A-Za-z0-9{}][A-Za-z0-9_/+{}-]*\.(" + "|".join(resource_extensions) + ")") def imagesearch(self, name): # Here is where we implement the funky rules for image # resolution. If we can't identify a reference to the image # name under an image directory, look for it under particular # subdirectories. for super in ("units", "terrain", "portraits", "items"): trial = os.path.join(super, name) for path in self.fileref: if path == trial or path.endswith(os.sep + trial): return path return None def mark_matching_resources(self, pattern, fn, n): "Mark all definitions matching a specified pattern with a reference." pattern = pattern.replace("+", r"\+") try: pattern = re.compile("^" + pattern + "$") except sre_constants.error: print >>sys.stderr, "macroscope: confused by %s" % pattern return None key = None for trial in self.fileref: if pattern.match(trial): key = trial self.fileref[key].append(fn, n) return key def __init__(self, filelist): "Build cross-reference object from the specified filelist." self.xref = {} self.fileref = {} self.noxref = False for filename in filelist: if filter(lambda x: x, map(lambda x: filename.endswith("." + x), resource_extensions)): # It's a resource file of some sort. # # The rule we're applying here is: # 1) If it's a sound file, its name is the part of # the path after "sounds/" or "music/". # 2) If it's an image file, its name is the part of # the path after "images/". (root, ext) = os.path.splitext(filename) if ext in (".ogg", ".wav"): for superdir in ("music", "sounds"): foundit = filename.find(superdir) if foundit > -1: name = filename[foundit:] name = name[len(superdir)+1:] elif ext in (".png", ".jpg"): foundit = filename.find("images") if foundit > -1: name = filename[foundit:] name = name[len("images")+1:] self.fileref[name] = reference(filename) elif iswml(filename): # It's a WML file, scan for macro defitions dfp = open(filename) here = None for (n, line) in enumerate(dfp): if line.strip().startswith("#define"): tokens = line.strip().split() name = tokens[1] here = reference(filename, n+1, line) if name in self.xref: print >>sys.stderr, "*** Warning: duplicate definition of %s from %s, at %s" \ % (name, self.xref[name], here) self.xref[name] = here here.docstring = line[8:] # Strip off #define_ elif here: if line[0] == "#": here.docstring += line[1:] else: here = None dfp.close() elif filename.endswith(".def"): # It's a list of names to be considered defined self.noxref = True dfp = open(filename) for line in dfp: self.xref[line.strip()] = True dfp.close() # Next, decorate definitions with all references from the filelist. self.unresolved = [] self.missing = [] formals = [] for fn in filelist: if iswml(fn): rfp = open(fn) for (n, line) in enumerate(rfp): if line.startswith("#define"): formals = line.split()[2:] elif line.startswith("#enddef"): formals = [] if '#' in line: line = line.split('#')[0] if not line: continue # Find references to macros for match in re.finditer(CrossRef.macro_reference, line): name = match.group(1) if name in formals: continue elif name in self.xref: self.xref[name].append(fn, n+1) else: self.unresolved.append((name, reference(fn,n+1))) # Find references to resource files for match in re.finditer(CrossRef.file_reference, line): name = match.group(0) # If name is already in our resource list, it's easy. if name in self.fileref: self.fileref[name].append(fn, n+1) continue # If the name contains subtitutable parts, count # it as a reference to everything the substitutions # could potentially match. elif '{' in name: pattern = re.sub(r"\{[^}]*\}", '.*', name) key = self.mark_matching_resources(pattern, fn,n+1) if key: self.fileref[key].append(fn, n+1) # Might be time to do an image path search elif name.endswith(".png") or name.endswith(".jpg"): key = self.imagesearch(name) if key: self.fileref[key].append(fn, n+1) if not key: self.missing.append((name, reference(fn,n+1))) rfp.close() def xrefdump(self, pred=None): "Report resolved macro references." for (name, defloc) in self.xref.items(): if pred and not pred(name, defloc): continue nrefs = len(defloc.references) if nrefs == 0: print "%s: macro %s is unused" % (defloc, name) else: print "%s: macro %s is used in %d files:" % (defloc, name, nrefs) defloc.dump_references() for (name, defloc) in self.fileref.items(): if pred and not pred(name, defloc): continue nrefs = len(defloc.references) if nrefs == 0: print "Resource %s is unused" % defloc else: print "Resource %s is used in %d files:" % (defloc, nrefs) defloc.dump_references() def unresdump(self): "Report unresolved references." if len(self.unresolved) == 0 and len(self.missing) == 0: print "# No unresolved references" else: #print self.fileref.keys() print "# Unresolved references:" for (name, reference) in self.unresolved + self.missing: print "%s -> %s" % (reference, name) def deflist(self, pred=None): "List all resource definitions." for (name, defloc) in self.xref.items() + self.fileref.items(): if pred and not pred(name, defloc): continue nrefs = len(defloc.references) if nrefs: print name def extracthelp(self, pref, fp): "Deliver all macro help comments in HTML form." doclist = self.xref.keys() doclist = filter(lambda x: self.xref[x].docstring.count("\n") > 1, doclist) doclist.sort(lambda x, y: cmp(self.xref[x], self.xref[y])) outstr = "" filename = None counted = 0 for name in doclist: entry = self.xref[name] if entry.filename != filename: if counted: outstr += "\n" counted += 1 filename = entry.filename if filename.startswith(pref): displayname = filename[len(pref):] else: displayname = filename outstr += "

From file: " + displayname + "

\n" hdr = [] dfp = open(filename) for line in dfp: if line[0] == '#': hdr.append(line[1:]) else: break dfp.close() if hdr: outstr += interpret(hdr, "file_explanation") outstr += "
\n" if entry.docstring: lines = entry.docstring.split("\n") header = lines.pop(0).split() if lines and not lines[-1]: # Ignore trailing blank lines lines.pop() if not lines: # Ignore definitions without a docstring continue outstr += "\n
\n" outstr += "" + header[0] + "" if header[1:]: outstr += " "+" ".join(header[1:])+"" outstr += "\n
\n" outstr += "
\n" outstr += interpret(lines, "macro_explanation") outstr += "
\n" outstr += "
\n" fp.write(outstr) if __name__ == "__main__": def help(): sys.stderr.write("""\ Usage: macroscope [options] dirpath Options may be any of these: -h, --help Emit this help message and quit -c, --crossreference Report resolved macro references -d, --deflist Make definition list -e reg, --exclude reg Ignore files matching -f dir, --from dir Report only on macros defined under dir -l, --listfiles List files that will be processed -r ddd, --refcount=ddd Report only on macros w/references in ddd files -u, --unresolved Report unresolved macro references --forced-used reg Ignore refcount 0 on names matching regexp --extracthelp Extract help from macro definition comments. The required dirpath argument may be a colon-separated directory list. """) # Process options (options, arguments) = getopt.getopt(sys.argv[1:], "cdhe:f:lr:u", [ 'crossreference', 'definitions', 'exclude=', 'extracthelp', 'force-used=', 'from=', 'help', 'listfiles', 'refcount=', 'unresolved', ]) crossreference = definitions = listfiles = unresolved = extracthelp = False from_restrict = None refcount_restrict = None forceused = None exclude = [] for (switch, val) in options: if switch in ('-h', '--help'): help() sys.exit(0) if switch in ('-f', '--from'): from_restrict = val elif switch in ('-c', '--crossreference'): crossreference = True elif switch in ('-d', '--definitions'): definitions = True elif switch in ('-e', '--exclude'): exclude.append(val) elif switch == '--extracthelp': extracthelp = True elif switch == '--force-used': forceused = val elif switch in ('-l', '--listfiles'): listfiles = True elif switch in ('-r', '--refcount'): refcount_restrict = int(val) elif switch in ('-u', '--unresolved'): unresolved = True if len(arguments): dirpath = arguments[0].split(":") else: dirpath = ['.'] filelist = allfiles(dirpath, "|".join(exclude)) xref = CrossRef(filelist) if extracthelp: xref.extracthelp(dirpath[0], sys.stdout) elif listfiles: for filename in filelist: print filename elif crossreference or definitions or listfiles or unresolved: print "# Macroscope reporting on %s" % time.ctime() print "# Invocation: %s" % " ".join(sys.argv) print "# Working directory: %s" % os.getcwd() def predicate(name, defloc): if from_restrict and not defloc.filename.startswith(from_restrict): return False if refcount_restrict!=None \ and len(defloc.references) != refcount_restrict \ or (refcount_restrict == 0 and forceused and re.search(forceused, name)): return False return True if crossreference: if xref.noxref: print >>sys.stderr, "macroscope: can't make cross-reference, input included a definitions file." else: xref.xrefdump(predicate) if definitions: xref.deflist(predicate) if unresolved: xref.unresdump()