wesnoth/data/tools/macroscope

#!/usr/bin/env python
#
# macroscope -- generate reports on WML macro and resource usage
#
# By Eric S. Raymond April 2007.
# (Yes, this *is* named after an ancient Piers Anthony novel.)
#
# This tool cross-references macro definitions with macro calls, and
# resource (sound or image) files with uses of the resources in WML.
# and generates various useful reports from such cross-references.
# The checking done by this tool has a couple of flaws:
#
# (1) It doesn't actually evaluate file inclusions.  Instead, any
# macro definition from anywhere in the set of input trees can be used
# to satisfy a macro call anywhere else.
#
# (2) It doesn't read [binary_path] tags, as this would require
# implementing a WML parser.  Instead, it assumes that a resource-file
# reference can be satisfied by any matching image file from anywhere
# in the set of input trees.
#
# (3) A reference with embedded {}s in a macro will have the macro's
# formal args substituted in at WML evaluation time.  Instead, this
# tool treats each {} as a .* wildcard and considers the reference to
# match *every* resource filename that matches that pattern.  Under
# appropriate circumstances this might report a resource filename
# statically matching the pattern as having been referenced even
# though none of the actual macro calls would actually generate it.
#
# (4) There are some implicit references.  Notably, if an attack name
# is specified but no icon is given, the attack icon will default to
# a name generated from the attack name,
#
# Problems (1) and (2) imply that this tool might conceivably report
# that a reference has been satisfied when under actual
# WML-interpreter rules it has not.  Problem (4) means the reverse
# can also occur.
#
# The reporting format is compatible with GNU Emacs compile mode.

import sys, os, time, re, getopt, sre_constants

resource_extensions = ("png", "jpg", "ogg", "wav")

def htmlize(line):
    "HTML-escape a text line"
    return line.replace("<", "&lt;").replace(">", "&gt;").replace("&", "&amp;")

def interpret(lines, css):
    "Interpret the ! convention for .cfg comments."
    inlisting = False
    outstr = '<p class="%s">' % css
    for line in lines:
        line = line.rstrip()
        if not inlisting and not line:
            outstr += "</p><p>"
            continue
        if not inlisting and line[0] == '!':
            outstr += "</p>\n<pre class='listing'>"
            inlisting = True
            bracketdepth = curlydepth = 0
        line = htmlize(line)
        if inlisting:
            outstr += line[1:] + "\n"
        else:
            outstr += line + "\n"
        if inlisting:
            if line and line[0] != '!':
                outstr += "</pre>\n<p>"
                inlisting = False
    if not inlisting:
        outstr += "</p>\n"
    else:
        outstr += "</pre>\n"
    outstr = outstr.replace("<p></p>", "")
    outstr = outstr.replace("\n\n</pre>", "\n</pre>")
    return outstr

def allfiles(dirpath, exclude):
    "Get the names of all files under dirpath, ignoring .svn directories."
    datafiles = []
    for dir in dirpath:
        os.path.walk(dir,
                     lambda arg, dir, names: datafiles.extend(map(lambda x: os.path.normpath(os.path.join(dir, x)), names)),
                     None)
    datafiles = filter(lambda x: ".svn" not in x, datafiles)
    datafiles = filter(lambda x: not os.path.isdir(x), datafiles)
    if exclude:
        datafiles = filter(lambda x: not re.search(exclude, x), datafiles)
    datafiles = filter(lambda x: not x.endswith("-bak"), datafiles)
    return datafiles

def iswml(filename):
    "Is the specified filename WML?"
    return filename.endswith(".cfg")

class reference:
    "Describes a location by file and line."
    def __init__(self, filename, line=None, docstring=None):
        self.filename = filename
        self.line = line
        self.references = {}
        self.docstring = docstring
    def append(self, fn, n):
        if fn not in self.references:
            self.references[fn] = []
        self.references[fn].append(n+1)
    def dump_references(self):
        for (file, linenumbers) in self.references.items():
            print "    %s: %s" % (file, `linenumbers`[1:-1])
    def __cmp__(self, other):
        "Compare two documentation objects for place in the sort order."
        # Major sort by file, minor by line number.  This presumes that the
        # files correspond to coherent topics and gives us control of the
        # sequence.
        byfile = cmp(self.filename, other.filename)
        if byfile:
            return byfile
        else:
            return cmp(self.line, other.line)
    def __str__(self):
        if self.line:
            return '"%s", line %d' % (self.filename, self.line)
        else:
            return self.filename

class CrossRef:
    macro_reference = re.compile(r"\{([A-Z_][A-Z0-9_:]*[A-Za-z0-9_])\b")
    file_reference =  re.compile(r"[A-Za-z0-9{}][A-Za-z0-9_/+{}-]*\.(" + "|".join(resource_extensions) + ")")
    def imagesearch(self, name):
        # Here is where we implement the funky rules for image
        # resolution.  If we can't identify a reference to the image
        # name under an image directory, look for it under particular
        # subdirectories.
        for super in ("units", "terrain", "portraits", "items"):
            trial = os.path.join(super, name)
            for path in self.fileref:
                if path == trial or path.endswith(os.sep + trial):
                    return path
        return None
    def mark_matching_resources(self, pattern, fn, n):
        "Mark all definitions matching a specified pattern with a reference."
        pattern = pattern.replace("+", r"\+")
        try:
            pattern = re.compile("^" + pattern + "$")
        except sre_constants.error:
            print >>sys.stderr, "macroscope: confused by %s" % pattern
            return None
        key = None
        for trial in self.fileref:
            if pattern.match(trial):
                key = trial
                self.fileref[key].append(fn, n)
        return key
    def __init__(self, filelist):
        "Build cross-reference object from the specified filelist."
        self.xref = {}
        self.fileref = {}
        self.noxref = False
        for filename in filelist:
            if filter(lambda x: x, map(lambda x: filename.endswith("." + x), resource_extensions)):
                # It's a resource file of some sort.
                #
                # The rule we're applying here is:
                # 1) If it's a sound file, its name is the part of
                #    the path after "sounds/" or "music/".
                # 2) If it's an image file, its name is the part of
                #    the path after "images/".
                (root, ext) = os.path.splitext(filename)
                if ext in (".ogg", ".wav"):
                    for superdir in ("music", "sounds"):
                        foundit = filename.find(superdir)
                        if foundit > -1:
                            name = filename[foundit:]
                            name = name[len(superdir)+1:]
                elif ext in (".png", ".jpg"):
                    foundit = filename.find("images")
                    if foundit > -1:
                        name = filename[foundit:]
                        name = name[len("images")+1:]
                self.fileref[name] = reference(filename)
            elif iswml(filename):
                # It's a WML file, scan for macro defitions
                dfp = open(filename)
                here = None
                for (n, line) in enumerate(dfp):
                    if line.strip().startswith("#define"):
                        tokens = line.strip().split()
                        name = tokens[1]
                        here = reference(filename, n+1, line)
                        if name in self.xref:
                            print >>sys.stderr, "*** Warning: duplicate definition of %s from %s, at %s" \
                                  % (name, self.xref[name], here)
                        self.xref[name] = here
                        here.docstring = line[8:]	# Strip off #define_
                    elif here:
                        if line[0] == "#":
                            here.docstring += line[1:]
                        else:
                            here = None
                dfp.close()
            elif filename.endswith(".def"):
                # It's a list of names to be considered defined
                self.noxref = True
                dfp = open(filename)
                for line in dfp:
                    self.xref[line.strip()] = True
                dfp.close()

        # Next, decorate definitions with all references from the filelist.
        self.unresolved = []
        self.missing = []
        formals = []
        for fn in filelist:
            if iswml(fn):
                rfp = open(fn)
                for (n, line) in enumerate(rfp):
                    if line.startswith("#define"):
                        formals = line.split()[2:]
                    elif line.startswith("#enddef"):
                        formals = []
                    if '#' in line:
                        line = line.split('#')[0]
                    if not line:
                        continue
                    # Find references to macros
                    for match in re.finditer(CrossRef.macro_reference, line):
                        name = match.group(1)
                        if name in formals:
                            continue
                        elif name in self.xref:
                            self.xref[name].append(fn, n+1)
                        else:
                            self.unresolved.append((name, reference(fn,n+1)))
                    # Find references to resource files
                    for match in re.finditer(CrossRef.file_reference, line):
                        name = match.group(0)
                        # If name is already in our resource list, it's easy.
                        if name in self.fileref:
                            self.fileref[name].append(fn, n+1)
                            continue
                        # If the name contains subtitutable parts, count
                        # it as a reference to everything the substitutions
                        # could potentially match.
                        elif '{' in name:
                            pattern = re.sub(r"\{[^}]*\}", '.*', name)
                            key = self.mark_matching_resources(pattern, fn,n+1)
                            if key:
                                self.fileref[key].append(fn, n+1)
                        # Might be time to do an image path search
                        elif name.endswith(".png") or name.endswith(".jpg"):
                            key = self.imagesearch(name)
                            if key:
                                self.fileref[key].append(fn, n+1)
                        if not key:
                            self.missing.append((name, reference(fn,n+1)))
                rfp.close()
    def xrefdump(self, pred=None):
        "Report resolved macro references."
        for (name, defloc) in self.xref.items():
            if pred and not pred(name, defloc):
                continue
            nrefs = len(defloc.references)
            if nrefs == 0:
                print "%s: macro %s is unused" % (defloc, name)
            else:
                print "%s: macro %s is used in %d files:" % (defloc, name, nrefs)
            defloc.dump_references()
        for (name, defloc) in self.fileref.items():
            if pred and not pred(name, defloc):
                continue
            nrefs = len(defloc.references)
            if nrefs == 0:
                print "Resource %s is unused" % defloc
            else:
                print "Resource %s is used in %d files:" % (defloc, nrefs)
            defloc.dump_references()
    def unresdump(self):
        "Report unresolved references."
        if len(self.unresolved) == 0 and len(self.missing) == 0:
            print "# No unresolved references"
        else:
            #print self.fileref.keys()
            print "# Unresolved references:"
            for (name, reference) in self.unresolved + self.missing:
                print "%s -> %s" % (reference, name)
    def deflist(self, pred=None):
        "List all resource definitions."
        for (name, defloc) in self.xref.items() + self.fileref.items():
            if pred and not pred(name, defloc):
                continue
            nrefs = len(defloc.references)
            if nrefs:
                print name
    def extracthelp(self, pref, fp):
        "Deliver all macro help comments in HTML form."
        doclist = self.xref.keys()
        doclist = filter(lambda x: self.xref[x].docstring.count("\n") > 1, doclist)
        doclist.sort(lambda x, y: cmp(self.xref[x], self.xref[y]))
        outstr = ""
        filename = None
        counted = 0
        for name in doclist:
            entry = self.xref[name]
            if entry.filename != filename:
                if counted:
                    outstr += "</dl>\n"
                counted += 1
                filename = entry.filename
                if filename.startswith(pref):
                    displayname = filename[len(pref):]
                else:
                    displayname = filename
                outstr += "<h1 class='file_header'>From file: " + displayname + "</h1>\n"
                hdr = []
                dfp = open(filename)
                for line in dfp:
                    if line[0] == '#':
                        hdr.append(line[1:])
                    else:
                        break
                dfp.close()
                if hdr:
                    outstr += interpret(hdr, "file_explanation")
                outstr += "<dl>\n"
            if entry.docstring:
                lines = entry.docstring.split("\n")
                header = lines.pop(0).split()
                if lines and not lines[-1]:	# Ignore trailing blank lines
                    lines.pop()
                if not lines:	# Ignore definitions without a docstring
                    continue
                outstr += "\n<dt>\n"
                outstr += "<em class='macro_name'>" + header[0] + "</em>"
                if header[1:]:
                    outstr += " <em class='macro_formals'>"+" ".join(header[1:])+"</em>"
                outstr += "\n</dt>\n"
                outstr += "<dd>\n"
                outstr += interpret(lines, "macro_explanation")
                outstr += "</dd>\n"
        outstr += "</dl>\n"
        fp.write(outstr)

if __name__ == "__main__":
    def help():
        sys.stderr.write("""\
Usage: macroscope [options] dirpath
    Options may be any of these:
    -h, --help                 Emit this help message and quit
    -c, --crossreference       Report resolved macro references
    -d, --deflist              Make definition list
    -e reg, --exclude reg      Ignore files matching
    -f dir, --from dir         Report only on macros defined under dir
    -l, --listfiles            List files that will be processed
    -r ddd, --refcount=ddd     Report only on macros w/references in ddd files
    -u, --unresolved           Report unresolved macro references
    --forced-used reg          Ignore refcount 0 on names matching regexp
    --extracthelp              Extract help from macro definition comments.
   The required dirpath argument may be a colon-separated directory list.
""")

    # Process options
    (options, arguments) = getopt.getopt(sys.argv[1:], "cdhe:f:lr:u",
                                         [
                                          'crossreference',
                                          'definitions',
                                          'exclude=',
                                          'extracthelp',
                                          'force-used=',
                                          'from=',
        				  'help',
                                          'listfiles',
                                          'refcount=',
                                          'unresolved',
                                          ])
    crossreference = definitions = listfiles = unresolved = extracthelp = False
    from_restrict = None
    refcount_restrict = None
    forceused = None
    exclude = []
    for (switch, val) in options:
        if switch in ('-h', '--help'):
            help()
            sys.exit(0)
        if switch in ('-f', '--from'):
            from_restrict = val
        elif switch in ('-c', '--crossreference'):
            crossreference = True
        elif switch in ('-d', '--definitions'):
            definitions = True
        elif switch in ('-e', '--exclude'):
            exclude.append(val)
        elif switch == '--extracthelp':
            extracthelp = True
        elif switch == '--force-used':
            forceused = val
        elif switch in ('-l', '--listfiles'):
            listfiles = True
        elif switch in ('-r', '--refcount'):
            refcount_restrict = int(val)
        elif switch in ('-u', '--unresolved'):
            unresolved = True

    if len(arguments):
        dirpath = arguments[0].split(":")
    else:
        dirpath = ['.']
    filelist = allfiles(dirpath, "|".join(exclude))
    xref = CrossRef(filelist)
    if extracthelp:
        xref.extracthelp(dirpath[0], sys.stdout)
    elif listfiles:
        for filename in filelist:
            print filename
    elif crossreference or definitions or listfiles or unresolved:
        print "# Macroscope reporting on %s" % time.ctime()
        print "# Invocation: %s" % " ".join(sys.argv)
        print "# Working directory: %s" % os.getcwd()
        def predicate(name, defloc):
            if from_restrict and not defloc.filename.startswith(from_restrict):
                return False
            if refcount_restrict!=None \
                   and len(defloc.references) != refcount_restrict \
                   or (refcount_restrict == 0 and forceused and re.search(forceused, name)):
                return False
            return True
        if crossreference:
            if xref.noxref:
                print >>sys.stderr, "macroscope: can't make cross-reference, input included a definitions file."
            else:
                xref.xrefdump(predicate)
        if definitions:
            xref.deflist(predicate)
        if unresolved:
            xref.unresdump()