wesnoth/data/tools/wmlindent

229 lines
9.1 KiB
Plaintext
Raw Normal View History

2007-06-14 19:40:49 +00:00
#!/usr/bin/env python
"""\
wmlindent - re-indent WML in a uniform way.
By Eric S. Raymond, June 2007.
Call with no arguments to filter WML on stdin to reindented WML on
stdout. If arguments are specified, they are taken to be files to be
re-indented in place; a directory name causes reindenting on all WML
beneath it.
The indent unit is four spaces. Absence of an option to change this is
deliberate; the purpose of this tool is to *prevent* style wars, not encourage
them.
2007-06-14 19:40:49 +00:00
2007-06-29 03:21:25 +00:00
On non-empty lines, this code never modifies anything but leading and
trailing whitespace. Leading whitespace will be regularized to the
current indent; trailing whitespace will be stripped. After processing
all lines will end with a Unix-style \n end-of-line marker.
Runs of entirely blank lines will be reduced to one blank line, except
in two cases where they will be discarded: (a) before WML closing
tags, and (b) after WML opening tags.
It is possible to wrap a section of lines in special comments so that
wmlindent will ignore them. You may need to do this for unbalanced
macros (it'ds better, though, to get rid of those where possible.
Use 'wmlindent: {start,stop} ignoring' anywhere in a comment.
2007-06-29 03:21:25 +00:00
Interrupting will be safe, as each reindenting will be done to a copy
that is atomically renamed when it's done. If the output file is identical
to the input, the output file will simply be deleted, so the timestamp
on the input file won't be touched.
2007-06-29 03:21:25 +00:00
2007-06-29 11:01:37 +00:00
The --dryrun option detects and reports files that would be changed
without changing them. The --verbose or -v option enables reporting
of files that are (or would be, under --dryrun) changed. With -v -v,
unchanged files are also reported.
2007-06-14 19:40:49 +00:00
Note: This does not include a parser. It will produce bad results on WML
that is syntactically unbalanced. Unbalanced double quotes that aren't part
of a multiline literal will also confuse it. You will receive warnings
if there's an indent open at end of file or if a closer occurs with
indent already zero; these two conditions strongly suggest unbalanced WML.
2007-06-14 19:40:49 +00:00
"""
import sys, os, getopt, filecmp, wmltools, re
2007-06-14 19:40:49 +00:00
def is_directive(str):
"Identify things that shouldn't be indented."
for prefix in ("#ifdef", "#else", "#endif", "#define", "#enddef"):
if str.startswith(prefix):
return True
return False
2007-06-29 03:21:25 +00:00
def closer(str):
"Are we looking at a closing tag?"
return str.startswith("[/")
def opener(str):
"Are we looking at an opening tag?"
return str.startswith("[") and not closer(str)
class bailout:
def __init__(self, filename, lineno, msg):
self.filename = filename
self.lineno = lineno
self.msg = msg
def reindent(name, infp, outfp):
2007-06-14 19:40:49 +00:00
"Reindent WML."
baseindent = " "
2007-06-14 19:40:49 +00:00
dostrip = True
seen_wml = False
inmacro = False
ignoring = False
2007-06-14 19:40:49 +00:00
indent = ""
2007-06-29 03:21:25 +00:00
lasttag = ""
countlines = 0
2007-06-29 03:21:25 +00:00
countblanks = 0
multitag = re.compile(r"\[a-z]].*\[[a-z]") # Avoid triggering on arrays
2007-06-14 19:40:49 +00:00
for line in infp:
countlines += 1
# Implement passthrough mode
if "wmlindent: start ignoring" in line:
ignoring = True
outfp.write(line)
continue
elif ignoring:
outfp.write(line)
if "wmlindent: stop ignoring" in line:
ignoring = False
continue
# Detect things we can't handle
if multitag.search(line):
raise bailout(name, countlines, "multiple tags on the line")
2007-06-14 19:40:49 +00:00
# Strip each line, unless we're in something like a multiline string.
if dostrip:
transformed = line.strip() + "\n"
2007-06-14 19:40:49 +00:00
else:
transformed = line
# Track whether we've seen real WML rather than just macro definitions
if transformed.startswith("#define"):
saved_indent = indent
indent = baseindent
inmacro = True
elif transformed.startswith("#enddef"):
indent = saved_indent
inmacro = False
elif not inmacro and transformed[0] in ('[', ']'):
seen_wml = True
2007-06-14 19:40:49 +00:00
# In the close case, we must compute new indent *before* emitting
# the new line so the close tag will be at the same level as the
2007-06-14 19:40:49 +00:00
# one that started the block.
2007-06-29 03:21:25 +00:00
if closer(transformed):
if indent == "":
print >>sys.stderr, 'wmlindent: "%s", line %d: close tag with indent already zero.' % (name, countlines)
else:
indent = indent[:-len(baseindent)]
2007-06-29 03:21:25 +00:00
# Cope with blank lines outside of multiline literals
if dostrip:
if transformed == "\n":
countblanks += 1
continue
elif countblanks > 0:
countblanks = 0
# All sequences of blank lines get mapped to one blank
# line, except (a) before closing tags and (b) after
# opening tags, In these cases they are ignored.
if not closer(transformed) and not opener(lasttag):
outfp.write("\n")
# Here's where we apply the current indent
2007-06-14 19:40:49 +00:00
if dostrip and transformed and not is_directive(transformed):
output = indent + transformed
else:
output = transformed
# Nuke trailing space and canonicalize to Unix-style end-of-line
if dostrip:
output = output.rstrip() + "\n"
# And ship the line
outfp.write(output)
2007-06-14 19:40:49 +00:00
# May need to indent based on the line we just saw.
if transformed.startswith("[") and not transformed.startswith("[/"):
indent += baseindent
# Compute the dostrip state likewise.
# We look for unbalanced string quotes.
if dostrip:
eligible = transformed.split("#")[0]
else:
eligible = transformed
if eligible.count('"') % 2:
dostrip = not dostrip
2007-06-29 03:21:25 +00:00
# Are we going to be immediately following a tag?
if opener(transformed) or closer(transformed):
lasttag = transformed
else:
lasttag = ""
# Pure macro files look like they have unbalanced indents. That's OK
if indent != "" and seen_wml:
print >>sys.stderr, 'wmlindent: "%s". line %d: end of file with indent nonzero.' % (name, countlines)
2007-06-14 19:40:49 +00:00
def allwmlfiles(dir):
"Get names of all WML files under dir, or dir itself if not a directory."
datafiles = []
if not os.path.isdir(dir):
if dir.endswith(".cfg"):
datafiles.append(dir)
else:
for root, dirs, files in os.walk(dir):
if wmltools.vcdir in dirs:
dirs.remove(wmltools.vcdir)
for name in files:
if os.path.join(root, name).endswith(".cfg"):
datafiles.append(os.path.join(root, name))
return datafiles
def convertor(linefilter, arglist):
2007-06-14 19:40:49 +00:00
"Apply a filter to command-line arguments."
if not arglist:
linefilter("standard input", sys.stdin, sys.stdout)
2007-06-14 19:40:49 +00:00
else:
for arg in arglist:
for filename in allwmlfiles(arg):
try:
infp = open(filename, "r")
outfp = open(filename + ".out", "w")
linefilter(filename, infp, outfp)
infp.close()
outfp.close()
except bailout, e:
sys.stderr.write('wmlindent: "%s", %d: %s\n' % (e.filename, e.lineno, e.msg))
os.remove(filename + ".out")
except KeyboardInterrupt:
os.remove(filename + ".out")
2007-06-29 10:42:10 +00:00
sys.stderr.write("wmlindent: %s interrupted\n" % filename)
else:
if filecmp.cmp(filename, filename + ".out"):
2007-06-29 10:42:10 +00:00
if verbose >= 2:
sys.stderr.write("wmlindent: %s unchanged\n" % filename)
os.remove(filename + ".out")
else:
2007-06-29 10:42:10 +00:00
if verbose >= 1:
sys.stderr.write("wmlindent: %s changed\n" % filename)
2007-06-29 11:08:13 +00:00
if dryrun:
os.remove(filename + ".out")
else:
2007-06-29 11:01:37 +00:00
os.remove(filename) # For Windows portability
# There's a tiny window open if you keyboard-
# interrupt here. It's unavoidable, because
# there's no known way to do an atomic rename
# under Windows when the target exists -- see
# Python manual 14.1.4::rename()
os.rename(filename + ".out", filename)
2007-06-14 19:40:49 +00:00
if __name__ == '__main__':
2007-06-29 11:01:37 +00:00
(options, arguments) = getopt.getopt(sys.argv[1:], "h:v",
['dryrun', 'verbose'])
2007-06-29 10:42:10 +00:00
verbose = 0
2007-06-29 11:01:37 +00:00
dryrun = False
2007-06-14 19:40:49 +00:00
for (opt, val) in options:
if opt == "-?":
2007-06-14 19:40:49 +00:00
print __doc__
2007-06-29 11:01:37 +00:00
elif opt in ('-d', '--dryrun'):
dryrun = True
verbose = max(1, verbose)
2007-06-29 11:03:48 +00:00
elif opt in ('-v', '--verbose'):
2007-06-29 10:42:10 +00:00
verbose += 1
convertor(lambda n, f1, f2: reindent(n, f1, f2), arguments)