wesnoth/data/tools/wmlparser.py

596 lines
21 KiB
Python
Executable File

#!/usr/bin/env python
# encoding: utf8
import wmldata, os, glob, sys
import re
"""Module implementing a WML parser."""
class Error(Exception):
def __init__(self, parser, text):
self.text = "%s:%d: %s" % (parser.filename, parser.line, text)
for i in range(len(parser.texts)):
parent = parser.texts[-1 - i]
self.text += "\n " + " " * i + "from %s:%d" % (parent.filename, parent.line)
def __str__(self):
return self.text
class Parser:
class Macro:
"""Class to hold one single macro."""
def __init__(self, name, params, text):
self.name, self.params, self.text = name, params, text
class TextState:
def __init__(self, filename, text, textpos, line, current_path,
textdomain):
self.filename, self.text, self.textpos, self.line =\
filename, text, textpos, line
self.current_path = current_path
self.textdomain = textdomain
def __init__(self, data_dir, user_dir = None):
"""
Initialize a new WMLParser instance.
data_dir is used for resolving {filepath} and {@filepath}
user_dir is used for resolving {~filepath} and {@filepath}
See http://www.wesnoth.org/wiki/PreprocessorRef
"""
self.data_dir = data_dir
self.user_dir = user_dir
self.textpos = 0
self.line = 1
self.macros = {}
self.texts = []
self.text = ""
self.filename = ""
self.current_path = "."
self.textdomain = ""
self.macro_callback = None
# If set, included files are only parsed when under the given directory.
self.only_expand_pathes = []
# Whether to print current file, comments, and macro replacements.
self.verbose = False
def read_encoded(self, filename):
"""
Helper for gracefully handling non-utf8 files and fixing up non-unix
line endings.
"""
try:
text = file(filename).read()
except IOError:
sys.stderr.write("Cannot open file %s!\n" % filename)
return ""
try:
u = text.decode("utf8")
except UnicodeDecodeError:
u = text.decode("latin1")
text = u
text = text.replace("\r\n", "\n").replace("\t", " ")
return text
def set_macro_callback(self, callback):
"""
You can set a last-resort function which is called when a macro could
not be resolved by the Parser. The calling format is:
callback(wmlparser, name, params)
"""
self.macro_callback = callback
def parse_file(self, filename):
"""
Set the parser to parse the given file.
"""
text = self.read_encoded(filename)
self.push_text(filename, text, cd = os.path.dirname(filename))
def parse_stream(self, stream):
"""
Set the parser to parse from a file object.
"""
text = stream.read()
text = text.replace("\r\n", "\n").replace("\t", " ")
self.push_text("inline", text)
def parse_text(self, text):
"""
Set the parser to directly parse from the given string.
"""
text = text.replace("\r\n", "\n").replace("\t", " ")
self.push_text("inline", text)
def push_text(self, filename, text, params = None, cd = None):
"""
Recrusively parse a sub-document, e.g. when a file is included or a
macro is executed.
"""
if self.verbose:
sys.stderr.write("%s:%d: Now parsing %s.\n" % (self.filename,
self.line, filename))
text = text.replace("\r\n", "\n").replace("\t", " ")
if not text: text = "\n"
self.texts.append(self.TextState(self.filename, self.text, self.textpos,
self.line, self.current_path, self.textdomain))
self.filename, self.text, self.params = filename, text, params
self.textpos = 0
self.line = 1
self.textdomain = ""
if cd: self.current_path = cd
def pop_text(self):
"""
Finish the current text and return to parsing the caller.
"""
textstate = self.texts.pop()
self.filename, self.text, self.textpos, self.line =\
textstate.filename, textstate.text, textstate.textpos, textstate.line
self.current_path = textstate.current_path
self.textdomain = textstate.textdomain
if self.verbose:
sys.stderr.write("%s:%d: Back.\n" % (self.filename, self.line))
def get_macros(self):
"""
Return a list of all macros currently known to the parser.
"""
return self.macros
def add_macros(self, macros):
"""
Add additional macros to the parser.
"""
for macro in macros:
self.macros[macro] = macros[macro]
def read_next(self):
"""Read the next character, taking care of \r and \t."""
c = self.text[self.textpos]
self.textpos += 1
if c == "\n":
self.line += 1
if self.textpos == len(self.text):
if len(self.texts): self.pop_text()
return c
def at_end(self):
"""
Return True if the parser is at the very end of the input, that is the
last character of the topmost input text has been read.
"""
return len(self.texts) == 0 and self.textpos == len(self.text)
def peek_next(self):
"""Like read_next, but does not consume."""
if self.textpos >= len(self.text):
if len(self.texts):
ts = self.texts[-1]
if ts.textpos >= len(ts.text): return ""
return ts.text[ts.textpos]
return self.text[self.textpos]
def read_until(self, sep):
"""Read until a character inside the string sep is found."""
mob = re.compile(".*?[" + sep + "]", re.S).match(self.text, self.textpos)
if mob:
found = mob.group(0)
self.line += found.count("\n")
self.textpos = mob.end(0)
if self.textpos == len(self.text):
if len(self.texts): self.pop_text()
return found
else:
found = self.text[self.textpos:]
self.line += found.count("\n")
self.textpos = len(self.text)
if len(self.texts):
self.pop_text()
found += self.read_until(sep)
return found
def read_while(self, sep):
"""Read while characters are inside the string sep."""
text = ""
while not self.at_end():
c = self.peek_next()
if not c in sep:
return text
c = self.read_next()
text += c
return text
def skip_whitespace_and_newlines(self):
self.read_while(" \t\r\n")
def skip_whitespace_inside_statement(self):
self.read_while(" \t\r\n")
if not self.at_end():
c = self.peek_next()
if c == "#":
if self.check_for("#define"): return
if self.check_for("#undef"): return
if self.check_for("#textdomain"): return
if self.check_for("#ifdef"): return
if self.check_for("#else"): return
if self.check_for("#end"): return
self.read_until("\n")
self.skip_whitespace_inside_statement()
def skip_whitespace(self):
self.read_while(" ")
def check_for(self, str):
"""Compare the following text with str."""
return self.text[self.textpos:self.textpos + len(str)] == str
def read_upto_string(self, str):
"""Read input up to and including the given string."""
pos = self.text.find(str, self.textpos)
if pos == -1:
return None
found = self.text[self.textpos:pos]
self.textpos = pos + len(str)
self.line += found.count("\n")
if self.textpos == len(self.text):
self.pop_text()
return found
def parse_macro(self):
"""No recursive macro processing is done here. If a macro is passed as
parameter to a macro, then whoever parses the macro replacement will do
the macro recursion.
Actually - I'm too tired right now to think this through. Maybe it
should be done the other way around, starting expansion with the
innermost macro?
"""
params = []
macro = self.read_until("}")
if macro[-1] != "}":
raise Error(self, "Unclosed macro")
return
preserve = macro
macro = macro[:-1] # Get rid of final }
# If the macro starts with ~, assume a file in userdata.
if macro[0] == "~":
if self.user_dir:
dirpath = self.user_dir + "/" + macro[1:]
else:
dirpath = ""
# If the macro starts with @, look first in data then in userdata.
elif macro[0] == "@":
dirpath = self.data_dir + "/" + macro[1:]
if not os.path.exists(dirpath) and self.user_dir:
dirpath = self.user_dir + "/" + macro[1:]
# If the macro starts with ., look relative to the currently parsed
# file.
elif macro[0] == ".":
dirpath = self.current_path + macro[1:]
# Otherwise, try to interprete the macro as a filename in the data dir.
else:
dirpath = self.data_dir + "/" + macro
if os.path.exists(dirpath):
dirpath = os.path.normpath(dirpath)
if self.only_expand_pathes:
if not [x for x in self.only_expand_pathes if os.path.commonprefix([dirpath, x]) == x]:
return None
# If it is a directory, parse all cfg files within.
if os.path.isdir(dirpath):
files = glob.glob(dirpath + "/*.cfg")
else:
files = [dirpath]
for path in files:
self.push_text(path, self.read_encoded(path), cd = os.path.dirname(path))
return None
# No file was found, try to do macro expansion.
self.push_text("macro", preserve)
# Find all parameters.
while 1:
read = self.read_until('"}{ (\n')
if not read:
sys.stderr.write("? %s\n" % macro)
sys.stderr.write(" (%s)\n" % params)
raise Error(self, "Unexpected end of file")
break
sep = read[-1]
read = read[:-1]
if sep == "}":
if read: params += [read]
break
elif sep == "{":
balance = 1
param = sep + read
while balance:
c = self.read_next()
if c == "{":
balance += 1
elif c == "}":
balance -= 1
param += c
params += [param]
elif sep == '"':
# Cannot parse strings here.. it must be passed to the macro as
# is, so the real string parser can handle it. Else there will
# be subtle bugs, e.g. when a MACRO evaluates to a + at the end
# of line.
read += '"' + self.read_until('"')
params += [read]
elif sep == "(":
balance = 1
param = read
while balance:
c = self.read_next()
if c == "(":
balance += 1
elif c == ")":
balance -= 1
param += c
params += [param[:-1]]
else:
if read:
params += [read]
self.read_while(" \n")
name = params[0]
if name in self.macros:
macro = self.macros[name]
text = macro.text
for i in range(len(macro.params)):
if 1 + i >= len(params):
raise Error(self, "Not enough parameters for macro %s. " % name +
"%d given but %d needed %s." % (len(params) - 1,
len(macro.params), macro.params))
if self.verbose:
print "Replacing {%s} with %s" % (macro.params[i], params[1 + i])
text = text.replace("{%s}" % macro.params[i],
params[1 + i])
if text:
self.push_text(name, text)
else:
pass # empty macro, nothing to do
else:
if self.macro_callback:
keep_macro = self.macro_callback(self, name, params)
if keep_macro: return keep_macro
sys.stderr.write("No macro %s.\n" % name)
sys.stderr.write(" (%s:%d)\n" % (self.filename, self.line))
return name
return None
def parse_string(self):
text = ""
while not self.at_end():
text += self.read_until('"{')
if text[-1] == '"':
return text[:-1]
elif text[-1] == '{':
text = text[:-1]
not_found = self.parse_macro()
if not isinstance(not_found, wmldata.Data):
if not_found:
text += not_found
else:
break
raise Error(self, "Unclosed string")
def parse_inside(self, data, c):
variables = []
values = []
variable = ""
value = ""
got_assign = False
spaces = ""
while 1:
if c == "{":
self.parse_macro()
elif c == "\n":
break
elif c == "+":
value = value.rstrip() # remove whitespace before +
self.skip_whitespace_inside_statement() # read over newline
elif not got_assign:
if c == "=":
variables += [variable.rstrip()]
got_assign = True
translatable = False
self.skip_whitespace()
else:
if c == ",":
variables += [variable]
variable = ""
else:
variable += c
else:
if c == '"':
# remove possible _
i = len(value)
while i > 0:
i -= 1
if value[i] != " ": break
if value and value[i] == "_":
translatable = True
# This is not the assignement =, but from e.g. MENU_IMG_TXT
if i == 0 or value[i - 1] in [" ", "="]:
# remove whitespace before _
while i > 1:
if value[i - 1] != " ": break
i -= 1
value = value[:i]
string = self.parse_string()
value += string
spaces = ""
else:
if c == "," and len(values) + 1 < len(variables):
values += [value]
value = ""
spaces = ""
elif c == " ":
spaces += c
else:
if spaces:
value += spaces
spaces = ""
value += c
if self.at_end(): break
c = self.read_next()
if not got_assign:
raise Error(self, "= expected for \"%s\"" % variable)
return []
values += [value]
data = []
j = 0
for i in range(len(variables)):
data += [wmldata.DataText(variables[i], values[j],
translatable = translatable, textdomain = self.textdomain)]
j += 1
return data
def parse_top(self, data, state = None):
while 1:
self.skip_whitespace_and_newlines()
if self.at_end():
break
c = self.read_next()
if c == "#": # comment or preprocessor
if self.check_for("define "):
self.read_until(" ")
params = []
while 1:
name = self.read_until(" \n")
sep = name[-1]
name = name[:-1]
if name: params += [name]
if sep == "\n": break
self.read_while(" ")
text = self.read_upto_string("#enddef")
if text == None:
raise Error(self, "#define without #enddef")
return
self.macros[params[0]] = self.Macro(params[0], params[1:], text)
elif self.check_for("undef "):
self.read_until(" ")
name = self.read_until(" \n")
self.macros[name] = None
elif self.check_for("ifdef "):
self.read_until(" ")
name = self.read_until(" \n")
if name[-1] == " ": self.read_while(" \n")
name = name[:-1]
subdata = wmldata.DataIfDef(name, [], "then")
self.parse_top(subdata, "#ifdef")
data.insert(subdata)
elif self.check_for("else"):
self.read_until("\n")
if state != "#ifdef":
raise Error(self, "#else without #ifdef")
subdata = wmldata.DataIfDef("else", [], "else")
self.parse_top(subdata, "#else")
data.insert(subdata)
return
elif self.check_for("endif"):
self.read_until("\n")
if state != "#ifdef" and state != "#else":
self.read_until("\n")
raise Error(self, "#endif without #ifdef or #else")
return
elif self.check_for("textdomain"):
self.read_until(" ")
name = self.read_until("\n").strip()
if name == "wesnoth":
self.textdomain = "wesnoth"
else:
self.textdomain = ""
else: # comment
line = self.read_until("\n")
comment = c + line
if self.verbose:
print "Comment removed: " + comment,
elif c == '[':
name = self.read_until("]")[:-1]
if name[0] == '/':
if state == name[1:] or state == "+" + name[1:]:
return
raise Error(self, "Mismatched closing tag [%s], expected [/%s]" % (name, state))
subdata = wmldata.DataSub(name)
self.parse_top(subdata, name)
data.insert(subdata)
elif c == '{':
keep_macro = self.parse_macro()
if isinstance(keep_macro, wmldata.Data):
data.insert(keep_macro)
else:
for subdata in self.parse_inside(data, c):
data.insert(subdata)
if __name__ == "__main__":
import optparse, subprocess
try: import psyco
except ImportError: pass
else: psyco.full()
optionparser = optparse.OptionParser()
optionparser.set_usage("usage: %prog [options] [filename]")
optionparser.add_option("-p", "--path", help = "specify wesnoth data path")
optionparser.add_option("-C", "--color", help = "use colored output")
optionparser.add_option("-u", "--userpath", help = "specify userdata path")
optionparser.add_option("-e", "--execute", help = "execute given WML")
optionparser.add_option("-v", "--verbose", action = "store_true",
help = "make the parser very verbose")
optionparser.add_option("-c", "--contents", action = "store_true",
help = "display contents of every tag")
options, args = optionparser.parse_args()
if options.path:
path = options.path
else:
try:
p = subprocess.Popen(["wesnoth", "--path"], stdout = subprocess.PIPE)
path = p.stdout.read().strip()
path = os.path.join(path, "data")
except OSError:
sys.stderr.write("Could not determine Wesnoth path.\n")
wmlparser = Parser(path, options.userpath)
if options.verbose:
wmlparser.verbose = True
if options.execute:
wmlparser.parse_text(options.execute)
elif args:
wmlparser.parse_file(args[0])
else:
wmlparser.parse_stream(sys.stdin)
data = wmldata.DataSub("WML")
wmlparser.parse_top(data)
data.debug(show_contents = options.contents, use_color = options.color)