#!/usr/bin/env python """\ wmlindent - re-indent WML in a uniform way. By Eric S. Raymond, June 2007. Call with no arguments to filter WML on stdin to reindented WML on stdout. If arguments are specified, they are taken to be files to be re-indented in place; a directory name causes reindenting on all WML beneath it. The indent unit is four spaces. Absence of an option to change this is deliberate; the purpose of this tool is to *prevent* style wars, not encourage them. On non-empty lines, this code never modifies anything but leading and trailing whitespace. Leading whitespace will be regularized to the current indent; trailing whitespace will be stripped. After processing all lines will end with a Unix-style \n end-of-line marker. Runs of entirely blank lines will be reduced to one blank line, except in two cases where they will be discarded: (a) before WML closing tags, and (b) after WML opening tags. Interrupting will be safe, as each reindenting will be done to a copy that is atomically renamed when it's done. Note: This does not include a parser. It will produce bad results on WML that is syntactically unbalanced. Unbalanced double quotes that aren't part of a multiline literal will also confuse it. You will receive warnings if there's an indent open at end of file or if a closer occurs with indent already zero; these two conditions strongly suggest unbalanced WML. """ import sys, os, getopt, wmltools def is_directive(str): "Identify things that shouldn't be indented." for prefix in ("#ifdef", "#else", "#endif", "#define", "#enddef"): if str.startswith(prefix): return True return False def closer(str): "Are we looking at a closing tag?" return str.startswith("[/") def opener(str): "Are we looking at an opening tag?" return str.startswith("[") and not closer(str) def reindent(name, infp, outfp): "Reindent WML." baseindent = " " dostrip = True seen_wml = False inmacro = False indent = "" lasttag = "" countblanks = 0 for line in infp: # Strip each line, unless we're in something like a multiline string. if dostrip: transformed = line.strip() + "\n" else: transformed = line # Track whether we've seen real WML rather than just macro definitions if transformed.startswith("#define"): saved_indent = indent indent = baseindent inmacro = True elif transformed.startswith("#enddef"): indent = saved_indent inmacro = False elif not inmacro and transformed[0] in ('[', ']'): seen_wml = True # In the close case, we must compute new indent *before* emitting # the new line so the close tag will be at the same level as the # one that started the block. if closer(transformed): if indent == "": print >>sys.stderr, "wmlindent: from %s, close tag with indent already zero." % name else: indent = indent[:-len(baseindent)] # Cope with blank lines outside of multiline literals if dostrip: if transformed == "\n": countblanks += 1 continue elif countblanks > 0: countblanks = 0 # All sequences of blank lines get mapped to one blank # line, except (a) before closing tags and (b) after # opening tags, In these cases they are ignored. if not closer(transformed) and not opener(lasttag): outfp.write("\n") # Here's where we apply the current indent if dostrip and transformed and not is_directive(transformed): output = indent + transformed else: output = transformed # Nuke trailing space and canonicalize to Unix-style end-of-line if dostrip: output = output.rstrip() + "\n" # And ship the line outfp.write(output) # May need to indent based on the line we just saw. if transformed.startswith("[") and not transformed.startswith("[/"): indent += baseindent # Compute the dostrip state likewise. This is the only tricky part. # We look for unbalanced string quotes, syntax = transformed.split("#")[0] if syntax.count('"') == 1: dostrip = "=" not in syntax # Are we going to be immediately following a tag? if opener(transformed) or closer(transformed): lasttag = transformed else: lasttag = "" # Pure macro files look like they have unbalanced indents. That's OK if indent != "" and seen_wml: print >>sys.stderr, "wmlindent: from %s, end of file with indent nonzero." % name def allwmlfiles(dir): "Get names of all WML files under dir, or dir itself if not a directory." datafiles = [] if not os.path.isdir(dir): if dir.endswith(".cfg"): datafiles.append(dir) else: for root, dirs, files in os.walk(dir): if wmltools.vcdir in dirs: dirs.remove(wmltools.vcdir) for name in files: if os.path.join(root, name).endswith(".cfg"): datafiles.append(os.path.join(root, name)) return datafiles def convertor(linefilter, arglist): "Apply a filter to command-line arguments." if not arglist: linefilter("standard input", sys.stdin, sys.stdout) else: for arg in arglist: for filename in allwmlfiles(arg): try: infp = open(filename, "r") outfp = open(filename + ".out", "w") linefilter(filename, infp, outfp) infp.close() outfp.close() except KeyboardInterrupt: os.remove(filename + ".out") else: os.remove(filename) # For Windows portability # There's a tiny window open if you keyboard- # interrupt here. It's unavoidable, because # there's no known way to do an atomic rename # under Windows when the target exists -- see # Python manual 14.1.4::rename() os.rename(filename + ".out", filename) if __name__ == '__main__': (options, arguments) = getopt.getopt(sys.argv[1:], "h:") for (opt, val) in options: if opt == "-?": print __doc__ convertor(lambda n, f1, f2: reindent(n, f1, f2), arguments)