#!/usr/bin/env python # # macroscope -- generate reports on WML macro and resource usage # # By Eric S. Raymond April 2007. # (Yes, this *is* named after an ancient Piers Anthony novel.) # # This tool cross-references macro definitions with macro calls, and # resource (sound or image) files with uses of the resources in WML. # and generates various useful reports from such cross-references. # The checking done by this tool has a couple of flaws: # # (1) It doesn't actually evaluate file inclusions. Instead, any # macro definition from anywhere in the set of input trees can be used # to satisfy a macro call anywhere else. # # (2) It doesn't read [binary_path] tags, as this would require # implementing a WML parser. Instead, it assumes that a resource-file # reference can be satisfied by any matching image file from anywhere # in the set of input trees. # # (3) A reference with embedded {}s in a macro will have the macro's # formal args substituted in at WML evaluation time. Instead, this # tool treats each {} as a .* wildcard and considers the reference to # match *every* resource filename that matches that pattern. Under # appropriate circumstances this might report a resource filename # statically matching the pattern as having been referenced even # though none of the actual macro calls would actually generate it. # # (4) There are some implicit references. Notably, if an attack name # is specified but no icon is given, the attack icon will default to # a name generated from the attack name, # # Problems (1) and (2) imply that this tool might conceivably report # that a reference has been satisfied when under actual # WML-interpreter rules it has not. Problem (4) means the reverse # can also occur. # # The reporting format is compatible with GNU Emacs compile mode. import sys, os, time, re, getopt, sre_constants resource_extensions = ("png", "jpg", "ogg", "wav") def htmlize(line): "HTML-escape a text line" return line.replace("<", "<").replace(">", ">").replace("&", "&") def interpret(lines, css): "Interpret the ! convention for .cfg comments." inlisting = False outstr = '
' % css for line in lines: line = line.rstrip() if not inlisting and not line: outstr += "
" continue if not inlisting and line[0] == '!': outstr += "
\n" inlisting = True bracketdepth = curlydepth = 0 line = htmlize(line) if inlisting: outstr += line[1:] + "\n" else: outstr += line + "\n" if inlisting: if line and line[0] != '!': outstr += "\n
" inlisting = False if not inlisting: outstr += "
\n" else: outstr += "\n" outstr = outstr.replace("", "") outstr = outstr.replace("\n\n", "\n") return outstr def allfiles(dirpath, exclude): "Get the names of all files under dirpath, ignoring .svn directories." datafiles = [] for dir in dirpath: os.path.walk(dir, lambda arg, dir, names: datafiles.extend(map(lambda x: os.path.normpath(os.path.join(dir, x)), names)), None) datafiles = filter(lambda x: ".svn" not in x, datafiles) datafiles = filter(lambda x: not os.path.isdir(x), datafiles) if exclude: datafiles = filter(lambda x: not re.search(exclude, x), datafiles) datafiles = filter(lambda x: not x.endswith("-bak"), datafiles) return datafiles def iswml(filename): "Is the specified filename WML?" return filename.endswith(".cfg") class reference: "Describes a location by file and line." def __init__(self, filename, line=None, docstring=None): self.filename = filename self.line = line self.references = {} self.docstring = docstring def append(self, fn, n): if fn not in self.references: self.references[fn] = [] self.references[fn].append(n+1) def dump_references(self): for (file, linenumbers) in self.references.items(): print " %s: %s" % (file, `linenumbers`[1:-1]) def __cmp__(self, other): "Compare two documentation objects for place in the sort order." # Major sort by file, minor by line number. This presumes that the # files correspond to coherent topics and gives us control of the # sequence. byfile = cmp(self.filename, other.filename) if byfile: return byfile else: return cmp(self.line, other.line) def __str__(self): if self.line: return '"%s", line %d' % (self.filename, self.line) else: return self.filename class CrossRef: macro_reference = re.compile(r"\{([A-Z_][A-Z0-9_:]*[A-Za-z0-9_])\b") file_reference = re.compile(r"[A-Za-z0-9{}][A-Za-z0-9_/+{}-]*\.(" + "|".join(resource_extensions) + ")") def imagesearch(self, name): # Here is where we implement the funky rules for image # resolution. If we can't identify a reference to the image # name under an image directory, look for it under particular # subdirectories. for super in ("units", "terrain", "portraits", "items"): trial = os.path.join(super, name) for path in self.fileref: if path == trial or path.endswith(os.sep + trial): return path return None def mark_matching_resources(self, pattern, fn, n): "Mark all definitions matching a specified pattern with a reference." pattern = pattern.replace("+", r"\+") try: pattern = re.compile("^" + pattern + "$") except sre_constants.error: print >>sys.stderr, "macroscope: confused by %s" % pattern return None key = None for trial in self.fileref: if pattern.match(trial): key = trial self.fileref[key].append(fn, n) return key def __init__(self, filelist): "Build cross-reference object from the specified filelist." self.xref = {} self.fileref = {} self.noxref = False for filename in filelist: if filter(lambda x: x, map(lambda x: filename.endswith("." + x), resource_extensions)): # It's a resource file of some sort. # # The rule we're applying here is: # 1) If it's a sound file, its name is the part of # the path after "sounds/" or "music/". # 2) If it's an image file, its name is the part of # the path after "images/". (root, ext) = os.path.splitext(filename) if ext in (".ogg", ".wav"): for superdir in ("music", "sounds"): foundit = filename.find(superdir) if foundit > -1: name = filename[foundit:] name = name[len(superdir)+1:] elif ext in (".png", ".jpg"): foundit = filename.find("images") if foundit > -1: name = filename[foundit:] name = name[len("images")+1:] self.fileref[name] = reference(filename) elif iswml(filename): # It's a WML file, scan for macro defitions dfp = open(filename) here = None for (n, line) in enumerate(dfp): if line.strip().startswith("#define"): tokens = line.strip().split() name = tokens[1] here = reference(filename, n+1, line) if name in self.xref: print >>sys.stderr, "*** Warning: duplicate definition of %s from %s, at %s" \ % (name, self.xref[name], here) self.xref[name] = here here.docstring = line[8:] # Strip off #define_ elif here: if line[0] == "#": here.docstring += line[1:] else: here = None dfp.close() elif filename.endswith(".def"): # It's a list of names to be considered defined self.noxref = True dfp = open(filename) for line in dfp: self.xref[line.strip()] = True dfp.close() # Next, decorate definitions with all references from the filelist. self.unresolved = [] self.missing = [] formals = [] for fn in filelist: if iswml(fn): rfp = open(fn) for (n, line) in enumerate(rfp): if line.startswith("#define"): formals = line.split()[2:] elif line.startswith("#enddef"): formals = [] if '#' in line: line = line.split('#')[0] if not line: continue # Find references to macros for match in re.finditer(CrossRef.macro_reference, line): name = match.group(1) if name in formals: continue elif name in self.xref: self.xref[name].append(fn, n+1) else: self.unresolved.append((name, reference(fn,n+1))) # Find references to resource files for match in re.finditer(CrossRef.file_reference, line): name = match.group(0) # If name is already in our resource list, it's easy. if name in self.fileref: self.fileref[name].append(fn, n+1) continue # If the name contains subtitutable parts, count # it as a reference to everything the substitutions # could potentially match. elif '{' in name: pattern = re.sub(r"\{[^}]*\}", '.*', name) key = self.mark_matching_resources(pattern, fn,n+1) if key: self.fileref[key].append(fn, n+1) # Might be time to do an image path search elif name.endswith(".png") or name.endswith(".jpg"): key = self.imagesearch(name) if key: self.fileref[key].append(fn, n+1) if not key: self.missing.append((name, reference(fn,n+1))) rfp.close() def xrefdump(self, pred=None): "Report resolved macro references." for (name, defloc) in self.xref.items(): if pred and not pred(name, defloc): continue nrefs = len(defloc.references) if nrefs == 0: print "%s: macro %s is unused" % (defloc, name) else: print "%s: macro %s is used in %d files:" % (defloc, name, nrefs) defloc.dump_references() for (name, defloc) in self.fileref.items(): if pred and not pred(name, defloc): continue nrefs = len(defloc.references) if nrefs == 0: print "Resource %s is unused" % defloc else: print "Resource %s is used in %d files:" % (defloc, nrefs) defloc.dump_references() def unresdump(self): "Report unresolved references." if len(self.unresolved) == 0 and len(self.missing) == 0: print "# No unresolved references" else: #print self.fileref.keys() print "# Unresolved references:" for (name, reference) in self.unresolved + self.missing: print "%s -> %s" % (reference, name) def deflist(self, pred=None): "List all resource definitions." for (name, defloc) in self.xref.items() + self.fileref.items(): if pred and not pred(name, defloc): continue nrefs = len(defloc.references) if nrefs: print name def extracthelp(self, pref, fp): "Deliver all macro help comments in HTML form." doclist = self.xref.keys() doclist = filter(lambda x: self.xref[x].docstring.count("\n") > 1, doclist) doclist.sort(lambda x, y: cmp(self.xref[x], self.xref[y])) outstr = "" filename = None counted = 0 for name in doclist: entry = self.xref[name] if entry.filename != filename: if counted: outstr += "\n" counted += 1 filename = entry.filename if filename.startswith(pref): displayname = filename[len(pref):] else: displayname = filename outstr += "