wesnoth/data/tools/wmllint

#!/usr/bin/env python
#
# wmllint -- check WML for conformance to the most recent dialect
#
# By Eric S. Raymond April 2007.
#
# All conversion logic for lifting WML and maps from older versions of the
# markup to newer ones should live here.  This includes resource path changes
# and renames, also map format conversions.
#
# Note: Lift logic for pre-1.4 versions has been removed; if you need
# it, check out a copy of wmllint from the 1.4 stable branch and use
# that to lift before running this one.  I did this for a policy
# reason; I wanted to kill off the --oldversion switch.  It will *not*
# be restored; in future, changes to WML syntax *must* be forward
# compatible in such a way that tags from old versions can be
# unambiguously recognized (this will save everybody heartburn).  As a
# virtuous side effect, this featurectomy cuts wmllint's code
# complexity by over 50%, improves performance by about 33%, and
# banishes some annoying behaviors related to the 1.2 map-conversion
# code.
#
# While the script is at it, it checks for various incorrect and dodgy WML
# constructs, including:
#   * unbalanced tags
#   * strings that need a translation mark and should not have them
#   * strings that have a translation mark and should not
#   * translatable strings containing macro references
#   * filter references by id= not matched by an actual unit
#   * abilities or traits without matching special notes, or vice-versa
#   * consistency between recruit= and recruitment_pattern= instances
#   * unknown unit types in recruitment lists
#   * double space after punctuation in translatable strings.
#   * unknown races or movement types in units
#   * unknown base units
#
# Takes any number of directories as arguments.  Each directory is converted.
# If no directories are specified, acts on the current directory.
#
# The recommended procedure is this:
# 1. Run it with --dryrun first to see what it will do.
# 2. If the messages look good, run without --dryrun; the old content
#    will be left in backup files with a -bak extension.
# 3. Eyeball the changes with the --diff option.
# 4. Use wmlscope, with a directory list including the Wesnoth mainline WML
#    as first argument, to check that you have no unresolved references.
# 5. Test the conversion.
# 6. Use either --clean to remove the -bak files or --revert to
#    undo the conversion.
#
# Standalone terrain mask files *must* have a .mask extension on their name
# or they'll have an incorrect usage=map generated into them.
#
# Note: You can shut wmllint up about custom terrains by having a comment
# on the same line that includes the string "wmllint: ignore" or
# "wmllint: noconvert". The same magic comments will also disable checking
# of translation marks.
#
# You can also prevent description insertions with "wmllint: no-icon".
#
# You can force otherwise undeclared characters to be recogized with
# a magic comment containing the string "wmllint: recognize".
# The rest of the line is stripped and treated as the name of a character
# who should be recognized in descriptions.  This will be useful,
# for example, if your scenario follows a continue so there are
# characters present who were not explicitly recalled.  It may
# also be useful if you have wrapped unit-creation or recall markup in macros
# and wmllint cannot recognize it.
#
# Similarly, it is possible to explicitly declare a unit's usage class
# with a magic comment that looks like this:
#    wmllint: usage of <unit> is <class>
# Note that <unit> must be a string wrapped in ASCII doublequotes.  This
# declaration will be useful if you are declaring units with macros that
# include a substitutable formal in the unit name; there are examples in UtBS.
#
# You can disable stack-based malformation checks with a comment
# containing "wmllint: validate-off" and re-enable with "wmllint: validate-on".
#
# You can prevent file conversions with a comment containing
# "wmllint: noconvert" on the same line as the filename.
#
# You can skip checks on unbalanced WML (e.g. in a macro definition) by
# bracketing it with "wmllint: unbalanced-on" and "wmllint: unbalanced-off".
# Note that this will also disable stack-based validation on the span
# of lines they enclose.
#
# You can suppress warnings about newlines in messages (and attempts to
# replair them) with "wmllint: display on", and re-enable them with
# "wmllint: display off".  The repair attempts (only) may also be
# suppressed with the --stringfreeze option.

import sys, os, re, getopt, string, copy, difflib, time
from wesnoth.wmltools import *
from wesnoth.wmliterator import *

# Global changes meant to be done on all lines.  Suppressed by noconvert.
linechanges = (
        ("canrecruit=1", "canrecruit=yes"),
        ("canrecruit=0", "canrecruit=no"),
        ("generate_description", "generate_name"),
        # These changed just after 1.5.0
        ("[special_filter]", "[filter_attack]"),
        ("[wml_filter]", "[filter_wml]"),
        ("[unit_filter]", "[filter]"),
        ("[secondary_unit_filter]", "[filter_second]"),
        ("[attack_filter]", "[filter_attack]"),
        ("[secondary_attack_filter]", "[filter_second_attack]"),
        ("[special_filter_second]", "[filter_second_attack]"),
        ("[/special_filter]", "[/filter_attack]"),
        ("[/wml_filter]", "[/filter_wml]"),
        ("[/unit_filter]", "[/filter]"),
        ("[/secondary_unit_filter]", "[/filter_second]"),
        ("[/attack_filter]", "[/filter_attack]"),
        ("[/secondary_attack_filter]", "[/filter_second_attack]"),
        ("[/special_filter_second]", "[/filter_second_attack]"),
        ("grassland=", "flat="),
        ("tundra=", "frozen="),
        ("cavewall=", "impassable="),
        ("canyon=", "unwalkable="),
        # This changed after 1.5.2
        ("advanceto=", "advances_to="),
        )

def validate_stack(stack, filename, lineno):
    "Check the stack for deprecated WML syntax."
    if verbose >= 3:
        print '"%s", line %d: %s' % (filename, lineno+1, stack)
    if stack:
        (tag, attributes) = tagstack[-1]
        ancestors = map(lambda x: x[0], tagstack)
        #if tag == "sound" and "attack" in ancestors:
        #    print '"%s", line %d: deprecated [sound] within [attack] tag' % (filename, lineno+1)

def validate_on_pop(tagstack, closer, filename, lineno):
    "Validate the stack at the time a new close tag is seen."
    (tag, attributes) = tagstack[-1]
    ancestors = map(lambda x: x[0], tagstack)
    if verbose >= 3:
        print '"%s", line %d: closing %s I see %s with %s' % (filename, lineno, closer, tag, attributes)
    # Detect a malformation that will cause the game to barf while attempting
    # to deserialize an empty unit.
    if closer == "side" and "type" not in attributes and ("no_leader" not in attributes or attributes["no_leader"] != "yes") and "multiplayer" not in ancestors:
        print '"%s", line %d: [side] without type attribute' % (filename, lineno)
    # This assumes that conversion will always happen in units/ files.
    if "units" not in filename and closer == "unit" and "race" in attributes:
        print '"%s", line %d: [unit] needs hand fixup to [unit_type]' % \
              (filename, lineno)
    if closer == "campaign" and "id" not in attributes:
        print '"%s", line %d: campaign has no ID' % \
              (filename, lineno)

def within(tag):
    "Did the specified tag lead one of our enclosing contexts?"
    if type(tag) == type(()):	# Can take a list.
        for t in tag:
            if within(t):
                return True
        else:
            return False
    else:
        return tag in map(lambda x: x[0], tagstack)

def under(tag):
    "Did the specified tag lead the latest context?"
    if type(tag) == type(()):	# Can take a list.
        for t in tag:
            if within(t):
                return True
        else:
            return False
    elif tagstack:
        return tag == tagstack[-1][0]
    else:
        return False

def standard_unit_filter():
    "Are we within the syntactic context of a standard unit filter?"
    # It's under("message") rather than within("message") because
    # [message] can contain [option] markup with menu item description=
    # attributes that should not be altered.
    return within(("filter", "filter_second",
                   "filter_adjacent", "filter_opponent",
                   "unit_filter", "secondary_unit_filter",
                   "special_filter", "special_filter_second",
                   "neighbor_unit_filter",
                   "recall", "teleport", "kill", "unstone", "store_unit",
                   "have_unit", "scroll_to_unit", "role",
                   "hide_unit", "unhide_unit",
                   "protect_unit", "target", "avoid")) \
                   or under("message")

# Sanity checking

# Associations for the ability sanity checks.
# Note: Depends on ABILITY_EXTRA_HEAL not occurring outside ABILITY_CURES.
notepairs = (
    ("movement_type=undeadspirit", "{SPECIAL_NOTES_SPIRIT}"),
    ("type=arcane", "{SPECIAL_NOTES_ARCANE}"),
    ("{ABILITY_HEALS}", "{SPECIAL_NOTES_HEALS}"),
    #("{ABILITY_EXTRA_HEAL}", "{SPECIAL_NOTES_EXTRA_HEAL}"),
    ("{ABILITY_UNPOISON}", "{SPECIAL_NOTES_UNPOISON}"),
    ("{ABILITY_CURES}", "{SPECIAL_NOTES_CURES}"),
    ("{ABILITY_REGENERATES}", "{SPECIAL_NOTES_REGENERATES}"),
    ("{ABILITY_STEADFAST}", "{SPECIAL_NOTES_STEADFAST}"),
    ("{ABILITY_LEADERSHIP_LEVEL_", "{SPECIAL_NOTES_LEADERSHIP}"), # No } deliberately
    ("{ABILITY_SKIRMISHER}", "{SPECIAL_NOTES_SKIRMISHER}"),
    ("{ABILITY_ILLUMINATES}", "{SPECIAL_NOTES_ILLUMINATES}"),
    ("{ABILITY_TELEPORT}", "{SPECIAL_NOTES_TELEPORT}"),
    ("{ABILITY_AMBUSH}", "{SPECIAL_NOTES_AMBUSH}"),
    ("{ABILITY_NIGHTSTALK}", "{SPECIAL_NOTES_NIGHTSTALK}"),
    ("{ABILITY_CONCEALMENT}", "{SPECIAL_NOTES_CONCEALMENT}"),
    ("{ABILITY_SUBMERGE}", "{SPECIAL_NOTES_SUBMERGE}"),
    ("{ABILITY_FEEDING}", "{SPECIAL_NOTES_FEEDING}"),
    ("{WEAPON_SPECIAL_BERSERK}", "{SPECIAL_NOTES_BERSERK}"),
    ("{WEAPON_SPECIAL_BACKSTAB}", "{SPECIAL_NOTES_BACKSTAB}"),
    ("{WEAPON_SPECIAL_PLAGUE", "{SPECIAL_NOTES_PLAGUE}"),	# No } deliberately
    ("{WEAPON_SPECIAL_SLOW}", "{SPECIAL_NOTES_SLOW}"),
    ("{WEAPON_SPECIAL_STONE}", "{SPECIAL_NOTES_STONE}"),
    ("{WEAPON_SPECIAL_MARKSMAN}", "{SPECIAL_NOTES_MARKSMAN}"),
    ("{WEAPON_SPECIAL_MAGICAL}", "{SPECIAL_NOTES_MAGICAL}"),
    ("{WEAPON_SPECIAL_SWARM}", "{SPECIAL_NOTES_SWARM}"),
    ("{WEAPON_SPECIAL_CHARGE}", "{SPECIAL_NOTES_CHARGE}"),
    ("{WEAPON_SPECIAL_DRAIN}", "{SPECIAL_NOTES_DRAIN}"),
    ("{WEAPON_SPECIAL_FIRSTSTRIKE}", "{SPECIAL_NOTES_FIRSTSTRIKE}"),
    ("{WEAPON_SPECIAL_POISON}", "{SPECIAL_NOTES_POISON}"),
    )

trait_note = dict(notepairs)
note_trait = dict(map(lambda p: (p[1], p[0]), notepairs))

# This needs to match the list of usage types in ai_python.cpp
usage_types = ("scout", "fighter", "mixed fighter", "archer", "healer")

# These are accumulated by sanity_check() and examined by consistency_check()
unit_types = []
derived_units = []
usage = {}
sides = []
advances = []
movetypes = []
unit_movetypes = []
races = []
unit_races = []

def sanity_check(filename, lines):
    "Perform sanity and consistency checks on input lines."
    for i in range(len(lines)):
        # Check for things marked translated that aren't strings
        if "_" in lines[i] and not "wmllint: ignore" in lines[i]:
            m = re.search(r'[=(]\s*_\s+("?)', lines[i])
            if m and not m.group(1):
                msg = '"%s", line %d: translatability mark before non-string' % \
                      (filename, i+1)
                print msg
    # Sanity-check abilities and traits against notes macros.
    # Note: This check is disabled on units derived via [base_unit].
    # Also, build dictionaries of unit movement types and races
    in_unit_type = False
    in_theme = False
    in_filter_attack = False
    in_base_unit = False
    unit_race = None
    for i in range(len(lines)):
        if "[filter_attack]" in lines[i]:
            in_filter_attack = True
            continue
        elif "[/filter_attack]" in lines[i]:
            in_filter_attack = False
            continue
        if "[base_unit]" in lines[i]:
            in_base_unit = True
            continue
        elif "[/base_unit]" in lines[i]:
            in_base_unit = False
            continue
        elif "[theme]" in lines[i]:
            in_theme = True
            continue
        elif "[/theme]" in lines[i]:
            in_theme = False
            continue
        elif "[unit_type]" in lines[i]:
            unit_id = ""
            base_unit = ""
            traits = []
            notes = []
            has_special_notes = False
            in_unit_type = i+1
            continue
        elif "[/unit_type]" in lines[i]:
            #print '"%s", %d: unit has traits %s and notes %s' \
            #      % (filename, in_unit_type, traits, notes)
            if unit_id and base_unit:
                derived_units.append((filename, i+1, unit_id, base_unit))
            if unit_id and not base_unit:
                missing_notes = []
                for trait in traits:
                    tn = trait_note[trait]
                    if tn not in notes and tn not in missing_notes:
                        missing_notes.append(tn)
                missing_traits = []
                for note in notes:
                    nt = note_trait[note]
                    if nt not in traits and nt not in missing_traits:
                        missing_traits.append(nt)
                if (notes or traits) and not has_special_notes:
                    missing_notes = ["{SPECIAL_NOTES}"] + missing_notes
                if missing_notes:
                    print '"%s", line %d: unit %s is missing notes +%s' \
                          % (filename, in_unit_type, unit_id, "+".join(missing_notes))
                if missing_traits:
                    print '"%s", line %d: unit %s is missing traits %s' \
                          % (filename, in_unit_type, unit_id, "+".join(missing_traits))
                if not (notes or traits) and has_special_notes:
                    print '"%s", line %d: unit %s has superfluous {SPECIAL_NOTES}' \
                         % (filename, in_unit_type, unit_id)
                if not in_theme and not base_unit and not unit_race:
                    print '"%s", line %d: unit %s has no race' \
                         % (filename, in_unit_type, unit_id)
            in_unit_type = None
            traits = []
            notes = []
            unit_id = ""
            base_unit = ""
            has_special_notes = False
            unit_race = None
        if in_unit_type and not in_filter_attack:
            try:
                (key, prefix, value, comment) = parse_attribute(lines[i])
                if key == "id":
                    if value[0] == "_":
                        value = value[1:].strip()
                    if not unit_id and not in_base_unit:
                        unit_id = value
                        unit_types.append(unit_id)
                    if not base_unit and in_base_unit:
                        base_unit = value
                elif key == "usage":
                    assert(unit_id)
                    usage[unit_id] = value
                elif key == "movement_type":
                    if '{' not in value:
                        assert(unit_id)
                        unit_movetypes.append((unit_id, filename, i+1, value))
                elif key == "race":
                    if '{' not in value:
                        assert(unit_id)
                        unit_race = value
                        unit_races.append((unit_id, filename, i+1, unit_race))
                elif key == "advances_to":
                    assert(unit_id)
                    advancements = value
                    if advancements.strip() != "null":
                        advances.append((unit_id, filename, i+1, advancements))
            except TypeError:
                pass
            if "{SPECIAL_NOTES}" in lines[i]:
                has_special_notes = True
            for (p, q) in notepairs:
                if p in lines[i]:
                    traits.append(p)
                if q in lines[i]:
                    notes.append(q)
    # Collect information on defined movement types
    in_movetype = False
    for i in range(len(lines)):
        if "[movetype]" in lines[i]:
            in_movetype = True
            continue
        elif "[/movetype]" in lines[i]:
            in_movetype = False
            continue
        if in_movetype:
            try:
                (key, prefix, value, comment) = parse_attribute(lines[i])
                if key == 'name':
                    movetypes.append(value)
            except TypeError:
                pass
    # Collect information on defined races
    in_race = False
    for i in range(len(lines)):
        if "[race]" in lines[i]:
            in_race = True
            continue
        elif "[/race]" in lines[i]:
            in_race = False
            continue
        if in_race:
            try:
                (key, prefix, value, comment) = parse_attribute(lines[i])
                if key == 'id':
                    races.append(value)
            except TypeError:
                pass
    # Sanity-check recruit and recruitment_pattern.
    # This code has a limitation; if there are multiple instances of
    # recruit and recruitment_pattern (as can happen if these lists
    # vary by EASY/NORMAL/HARD level) this code will only record the
    # last of each for later consistency checking.
    in_side = False
    in_ai = in_subunit = False
    recruit = []
    in_generator = False
    sidecount = 0
    recruitment_pattern = []
    for i in range(len(lines)):
        if "[generator]" in lines[i]:
            in_generator = True
            continue
        elif "[/generator]" in lines[i]:
            in_generator = False
            continue
        elif "[side]" in lines[i]:
            in_side = True
            sidecount += 1
            continue
        elif "[/side]" in lines[i]:
            if recruit or recruitment_pattern:
                sides.append((filename, recruit, recruitment_pattern))
            in_side = False
            recruit = []
            recruitment_pattern = []
            continue
        elif in_side and "[ai]" in lines[i]:
            in_ai = True
            continue
        elif in_side and "[unit]" in lines[i]:
            in_subunit = True
            continue
        elif in_side and "[/ai]" in lines[i]:
            in_ai = False
            continue
        elif in_side and "[/unit]" in lines[i]:
            in_subunit = False
            continue
        if not in_side or in_subunit or '=' not in lines[i]:
            continue
        try:
            (key, prefix, value, comment) = parse_attribute(lines[i])
            if key == "recruit" and value:
                recruit = (i+1, map(lambda x: x.strip(), value.split(",")))
            elif key == "recruitment_pattern" and value:
                if not in_ai:
                    print '"%s", line %d: recruitment_pattern outside [ai]' \
                              % (filename, i+1)
                else:
                    recruitment_pattern = (i+1, map(lambda x: x.strip(), value.split(",")))
                    for utype in recruitment_pattern[1]:
                        if not utype in usage_types:
                            print '"%s", line %d: unknown usage class %s' \
                                  % (filename, i+1, utype)
            elif key == "side" and not in_ai:
                try:
                    if not in_generator and sidecount != int(value):
                        print '"%s", line %d: side number %s is out of sequence' \
                              % (filename, i+1, value)
                except ValueError:
                    pass	# Ignore ill-formed integer literals
        except TypeError:
            pass
    # Interpret magic comments for setting the usage pattern of units.
    # This copes with some wacky UtBS units that are defined with
    # variant-spawning macros.  The prototype comment looks like this:
    #wmllint: usage of "Desert Fighter" is fighter
    for i in range(len(lines)):
        m = re.match('# *wmllint: usage of "([^"]*)" is +(.*)', lines[i])
        if m:
            usage[m.group(1)] = m.group(2).strip()
            unit_types.append(m.group(1))
    # Consistency-check the id= attributes in [side], [unit], [recall],
    # and [message] scopes, also correctness-check translation marks and look
    # for double spaces at end of sentence.
    present = []
    in_scenario = False
    in_person = False
    in_trait = False
    ignore_id = False
    in_object = False
    ignoreable = False
    preamble_seen = False
    sentence_end = re.compile("(?<=[.!?;:])  +")
    for i in range(len(lines)):
        if '[' in lines[i]:
            preamble_seen = True
        if "[scenario]" in lines[i]:
            in_scenario = True
            preamble_seen = False
        elif "[/scenario]" in lines[i]:
            in_scenario = False
        elif "[trait]" in lines[i]:
            in_trait = True
        elif "[/trait]" in lines[i]:
            in_trait = False
        elif "[object]" in lines[i]:
            in_object = True
        elif "[/object]" in lines[i]:
            in_object = False
        elif "[label]" in lines[i] or "[chamber]" in lines[i] or "[time]" in lines[i]:
            ignore_id = True
        elif "[/label]" in lines[i] or "[/chamber]" in lines[i] or "[/time]" in lines[i]:
            ignore_id = False
        elif "[kill]" in lines[i] or "[effect]" in lines[i] or "[move_unit_fake]" in lines[i] or "[scroll_to_unit]" in lines[i]:
            ignoreable = True
        elif "[/kill]" in lines[i] or "[/effect]" in lines[i] or "[/move_unit_fake]" in lines[i] or "[/scroll_to_unit]" in lines[i]:
            ignoreable = False
        elif "[side]" in lines[i] or "[unit]" in lines[i] or "[recall]" in lines[i]:
            in_person = True
            continue
        elif "[/side]" in lines[i] or "[/unit]" in lines[i] or "[/recall]" in lines[i]:
            in_person = False
        if not in_scenario:
            continue
        m = re.search("# *wmllint: recognize +(.*)", lines[i])
        if m:
            present.append(string_strip(m.group(1)).strip())
        if '=' not in lines[i] or ignoreable:
            continue
        try:
            (key, prefix, value, comment) = parse_attribute(lines[i])
            if "wmllint: ignore" in comment:
                continue
            has_tr_mark = value.lstrip().startswith("_")
            if key == 'role':
                present.append(value)
            if has_tr_mark:
                if '{' in value:
                    print '"%s", line %d: macro reference in translatable string'\
                          % (filename, i+1)
                if future and re.search("[.,!?]  ", lines[i]):
                    print '"%s", line %d: extraneous space in translatable string'\
                          % (filename, i+1)
            # Check correctness of translation marks and descriptions
            if key.startswith("#"):	# FIXME: parse_attribute is confused.
                pass
            elif key == 'letter':	# May be led with _s for void
                pass
            elif key in ('name', 'male_name', 'female_name'):		# FIXME: check this someday
                pass
            elif key in ("message", "name", "description", "story", "note", "text", "summary", "caption", "label", "cannot_use_message", "set_description", "user_team_name") and not value.startswith("$"):
                if not has_tr_mark:
                    print '"%s", line %d: %s needs translation mark' \
                          % (filename, i+1, key)
                    lines[i] = lines[i].replace('=', "=_ ")
                nv = sentence_end.sub(" ", value)
                if nv != value:
                    print '"%s", line %d: double space after sentence end' \
                          % (filename, i+1)
                    if not stringfreeze:
                        lines[i] = sentence_end.sub(" ", lines[i])
            else:
                if key == "id":
                    if in_person:
                        present.append(value)
                    elif value in ('narrator', 'unit', 'second_unit') or (value and value[0] in ("$", "{")):
                        continue
                    elif preamble_seen and not ignore_id and not in_object and not value in present:
                        print '"%s", line %d: unknown \'%s\' referred to by id' \
                              % (filename, i+1, value)
                if has_tr_mark and not ("wmllint: ignore" in comment or "wmllint: noconvert" in comment):
                    print '"%s", line %d: %s should not have a translation mark' \
                              % (filename, i+1, key)
                    lines[i] = lines[i].replace("_", "", 1)
        except TypeError:
            pass
    # Check for textdomain strings; should be exactly one, on line 1
    textdomains = []
    for i in range(len(lines)):
        if "#textdomain" in lines[i]:
            textdomains.append(i+1)
    if not textdomains:
        print '"%s", line 1: no textdomain string' % filename
    elif textdomains[0] == 1:	# Multiples are OK if first is on line 1
        pass
    elif len(textdomains) > 1:
        print '"%s", line %d: multiple textdomain strings on lines %s' % \
              (filename, textdomains[0], ", ".join(map(str, textdomains)))
    else:
        w = textdomains[0]
        print '"%s", line %d: single textdomain declaration not on line 1.' % \
              (filename, w)
        lines = [lines[w-1].lstrip()] + lines[:w-1] + lines[w:]
    return lines

def consistency_check():
    "Consistency-check state information picked up by sanity_check"
    utypes = []
    derivedlist = map(lambda x: x[2], derived_units)
    baselist = map(lambda x: x[3], derived_units)
    for (filename, recruitlist, patternlist) in sides:
        #print "%s: %d=%s, %d=%s" %  (filename, rl, recruit, pl, recruitment_pattern)
        if recruitlist:
            (rl, recruit) = recruitlist
            for rtype in recruit:
                if rtype not in unit_types:
                    print '"%s", line %d: %s is not a known unit type' % (filename, rl, rtype)
                    continue
                elif rtype not in usage:
                    if not rtype in derivedlist:
                        print '"%s", line %d: %s has no usage type' % (filename, rl, rtype)
                    continue
                utype = usage[rtype]
                if patternlist:
                    (pl, recruitment_pattern) = patternlist
                    if utype not in recruitment_pattern:
                        print '"%s", line %d: %s (%s) doesn\'t match the recruitment pattern (%s) for its side' % (filename, rl, rtype, utype, ", ".join(recruitment_pattern))
                    utypes.append(utype)
            if patternlist:
                (pl, recruitment_pattern) = patternlist
                for utype in recruitment_pattern:
                    if utype not in utypes:
                        print '"%s", line %d: %s doesn\'t match a recruitable type for its side' % (filename, pl, utype)
    if movetypes:
        for (unit_id, filename, line, movetype) in unit_movetypes:
            if movetype not in movetypes:
                print '"%s", line %d: %s has unknown movement type' \
                      % (filename, line, unit_id)
    if races:
        for (unit_id, filename, line, race) in unit_races:
            if race not in races:
                print '"%s", line %d: %s has unknown race' \
                      % (filename, line, unit_id)
    # Should we be checking the transitive closure of derivation?
    # It's not clear whether [base_unit] works when the base is itself derived.
    for (filename, line, unit_type, base_unit) in derived_units:
        if base_unit not in unit_types:
            print '"%s", line %d: derivation of %s from %s does not resolve' \
                  % (filename, line, unit_type, base_unit)
    # Check that all advancements are known units
    for (unit_id, filename, lineno, advancements) in advances:
        advancements = map(string.strip, advancements.split(","))
        bad_advancements = filter(lambda x: x not in (unit_types+derivedlist), advancements)
        if bad_advancements:
            print '"%s", line %d: %s has unknown advancements %s' \
                  % (filename, lineno, unit_id, bad_advancements)

# Syntax transformations

leading_ws = re.compile(r"^\s*")

def leader(s):
    "Return a copy of the leading whitespace in the argument."
    return leading_ws.match(s).group(0)

def hack_syntax(filename, lines):
    # Syntax transformations go here.  This gets called once per WML file;
    # the name of the file is passed as filename, text of the file as the
    # array of strings in lines.  Modify lines in place as needed, and
    # set modcount to nonzero when you actually change any.
    # Ensure that every attack has a translatable description.
    for i in range(len(lines)):
        if "no-syntax-rewrite" in lines[i]:

            break
        elif "[attack]" in lines[i]:
            j = i;
            have_description = False
            while '[/attack]' not in lines[j]:
                if lines[j].strip().startswith("description"):
                    have_description = True
                j += 1
            if not have_description:
                j = i
                while '[/attack]' not in lines[j]:
                    fields = lines[j].strip().split('#')
                    syntactic = fields[0]
                    comment = ""
                    if len(fields) > 1:
                        comment = fields[1]
                    if syntactic.strip().startswith("name"):
                        description = syntactic.split("=")[1].strip()
                        if not description.startswith('"'):
                            description = '"' + description + '"\n'
                        # Skip the insertion if this is a dummy declaration
                        # or one modifying an attack inherited from a base unit.
                        if "no-icon" not in comment:
                            new_line = leader(syntactic) + "description=_"+description
                            if verbose:
                                print '"%s", line %d: inserting %s' % (filename, i+1, `new_line`)
                            lines.insert(j+1, new_line)
                            j += 1
                    j += 1
    # Ensure that every speaker=narrator block without an image uses
    # wesnoth-icon.png as an image.
    need_image = in_message = False
    for i in range(len(lines)):
        if "no-syntax-rewrite" in lines[i]:
            break
        precomment = lines[i].split("#")[0]
        if '[message]' in precomment:
            in_message = True
        if "speaker=narrator" in precomment:
            need_image = True
        elif precomment.strip().startswith("image"):
            need_image = False
        elif '[/message]' in precomment:
            if need_image:
                # This line presumes the code has been through wmlindent
                if verbose:
                    print '"%s", line %d: inserting "image=wesnoth-icon.png"'%(filename, i+1)
                lines.insert(i, leader(precomment) + baseindent + "image=wesnoth-icon.png\n")
            need_image = in_message = False
    # Boucmanize death animations
    if future:
        in_death = None
        frame_commented = in_death_commented = False
        frame_start = frame_end = None
        image = None
        for i in range(len(lines)):
            if "no-syntax-rewrite" in lines[i]:
                break
            elif "[death]" in lines[i]:
                in_death = i
                in_death_commented = lines[i].strip().startswith("#")
            elif "[/death]" in lines[i]:
                if frame_start is None:
                    print '"%s", %d: [death] with no frames' % (filename, i)
                    continue
                # Find the image tag
                for inside in range(frame_start, frame_end):
                    if "image=" in lines[inside]:
                        image = lines[inside].strip().split("=")[1]
                        break
                else:
                    print'"%s", line %d: no image in last frame'\
                          % (filename, i)
                    continue
                # Modify the death wrapper
                lines[i] = lines[i].replace("death", "animation")
                inner = leader(lines[in_death])+baseindent
                if in_death_commented:
                    inner = "#" + inner
                lines[in_death] = lines[in_death].replace("death", "animation") \
                                  + inner + "apply_to=death" + "\n"
                # Add a new last frame to the death animation
                outer = leader(lines[frame_start])
                if frame_commented:
                    outer = "#" + outer
                inner = outer + baseindent
                if frame_commented:
                    inner = "#" + inner
                insertion = outer + "[frame]\n" + \
                            inner + "duration=600\n" + \
                            inner + "alpha=1~0\n" + \
                            inner + "image=" + image + "\n" + \
                            outer + "[/frame]\n"
                lines[i] = insertion + lines[i]
                in_death = frame_start = frame_end = None
                frame_commented = in_death_commented = False
            elif in_death and "[frame]" in lines[i]:
                frame_start = i
                frame_commented = lines[i].strip().startswith("#")
            elif in_death and "[/frame]" in lines[i]:
                frame_end = i
    # More syntax transformations would go here.
    return lines

# Generic machinery starts here

def is_map(filename):
    "Is this file a map?"
    return filename.endswith(".map")

if 0:	# Not used, as there are currently no defined map transforms
    class maptransform_error:
        "Error object to be thrown by maptransform."
        def __init__(self, infile, inline, type):
            self.infile = infile
            self.inline = inline
            self.type = type
        def __repr__(self):
            return '"%s", line %d: %s' % (self.infile, self.inline, self.type)

    def maptransform_sample(filename, baseline, inmap, y):
        "Transform a map line."
        # Sample to illustrate how map-transformation hooks are called.
        # The baseline argument will be the starting line number of the map.
        # The inmap argument will be a 2D string array containing the
        # entire map.  y will be the vertical coordinate of the map line.
        # You pass a list of these as the second argument of translator().
        raise maptransform_error(filename, baseline+y+1,
                             "unrecognized map element at line %d" % (y,))

tagstack = []	# For tracking tag nesting

def outermap(func, inmap):
    "Apply a transformation based on neighborhood to the outermost ring."
    # Top and bottom rows
    for i in range(len(inmap[0])):
        inmap[0][i] = func(inmap[0][i])
        inmap[len(inmap)-1][i] = func(inmap[len(inmap)-1][i])
    # Leftmost and rightmost columns excluding top and bottom rows
    for i in range(1, len(inmap)-1):
        inmap[i][0] = func(inmap[i][0])
        inmap[i][len(inmap[0])-1] = func(inmap[i][len(inmap[0])-1])

def translator(filename, mapxforms, textxform):
    "Apply mapxform to map lines and textxform to non-map lines."
    global tagstack
    unmodified = file(filename).readlines()
    # Pull file into an array of lines, CR-stripping as needed
    mfile = []
    map_only = not filename.endswith(".cfg")
    terminator = "\n"
    for line in unmodified:
        if line.endswith("\n"):
            line = line[:-1]
        if line.endswith("\r"):
            line = line[:-1]
            if not stripcr:
                terminator = '\r\n'
        mfile.append(line)
        if "map_data" in line:
            map_only = False
    # Process line-by-line
    lineno = baseline = 0
    cont = False
    validate = True
    unbalanced = False
    newdata = []
    refname = None
    while mfile:
        if not map_only:
            line = mfile.pop(0)
            if verbose >= 3:
                sys.stdout.write(line + terminator)
            lineno += 1
        # Check for one certain error condition
        if line.count("{") and line.count("}"):
            refname = line[line.find("{"):line.rfind("}")]
            # Ignore all-caps macro arguments.
            if refname == refname.upper():
                pass
            elif 'mask=' in line and not (refname.endswith("}") or refname.endswith(".mask")):
                print \
                      '"%s", line %d: fatal error, mask file without .mask extension (%s)' \
                      % (filename, lineno+1, refname)
                sys.exit(1)
        # Exclude map_data= lines that are just 1 line without
        # continuation, or which contain {}.  The former are
        # pathological and the parse won't handle them, the latter
        # refer to map files which will be checked separately.
        if map_only or (("map_data=" in line or "mask=" in line)
                        and line.count('"') in (1, 2)
                        and line.count("{") == 0
                        and  line.count("}") == 0
                        and not within('time')):
            outmap = []
            add_border = True
            add_usage = True
            have_header = have_delimiter = False
            maskwarn = False
            maptype = None
            if map_only:
                if filename.endswith(".mask"):
                    maptype = "mask"
                else:
                    maptype = "map"
            else:
                leadws = leader(line)
                if "map_data" in line:
                    maptype = "map"
                elif "mask" in line:
                    maptype = "mask"
            baseline = lineno
            cont = True
            if not map_only:
                fields = line.split('"')
                if fields[1].strip():
                    mfile.insert(0, fields[1])
                if len(fields) == 3:
                    mfile.insert(1, '"')
            if verbose >= 3:
                print "*** Entering %s mode on:" % maptype
                print mfile
            # Gather the map header (if any) and data lines
            savedheaders = []
            while cont and mfile:
                line = mfile.pop(0)
                if verbose >= 3:
                    sys.stdout.write(line + terminator)
                lineno += 1
                # This code supports ignoring comments and header lines
                if len(line) == 0 or line[0] == '#' or '=' in line:
                    if '=' in line:
                        have_header = True
                    if 'border_size' in line:
                        add_border = False
                    if "usage" in line:
                        add_usage = False
                        usage = line.split("=")[1].strip()
                        if usage == 'mask':
                            add_border = False
                            if filename.endswith(".map"):
                                print "warning: usage=mask in file with .map extension"
                        elif usage == 'map':
                            if filename.endswith(".mask"):
                                print "warning: usage=map in file with .mask extension"
                    if len(line) == 0:
                        have_delimiter = True
                    savedheaders.append(line + terminator)
                    continue
                if '"' in line:
                    cont = False
                    if verbose >= 3:
                        print "*** Exiting map mode."
                    line = line.split('"')[0]
                if line:
                    if ',' in line:
                        fields = line.split(",")
                    else:
                        fields = map(lambda x: x, line)
                    outmap.append(fields)
                    if not maskwarn and maptype == 'map' and "_s" in line:
                        print \
                              '"%s", line %d: warning, fog in map file' \
                              % (filename, lineno+1)
                        maskwarn = True
            # Checking the outmap length here is a bit of a crock;
            # the one-line map we don't want to mess with is in the
            # NO_MAP macro.
            if len(outmap) == 1:
                add_border = add_usage = False
            # Deduce the map type
            if not map_only:
                if maptype == "map":
                    newdata.append(leadws + "map_data=\"")
                elif maptype == "mask":
                    newdata.append(leadws + "mask=\"")
            original = copy.deepcopy(outmap)
            for transform in mapxforms:
                for y in range(len(outmap)):
                    transform(filename, baseline, outmap, y)
            if maptype == "mask":
                add_border = False
            if add_border:
                print '%s, "line %d": adding map border...' % \
                      (filename, baseline)
                newdata.append("border_size=1" + terminator)
                have_header = True
                # Start by duplicating the current outermost ring
                outmap = [outmap[0]] + outmap + [outmap[-1]]
                for i in range(len(outmap)):
                    outmap[i] = [outmap[i][0]] + outmap[i] + [outmap[i][-1]]
                # Strip villages out of the edges
                outermap(lambda n: re.sub(r"\^V[a-z]+", "", n), outmap)
                # Strip keeps out of the edges
                outermap(lambda n: re.sub(r"K([a-z]+)", r"C\1", n), outmap)
                # Strip the starting positions out of the edges
                outermap(lambda n: re.sub(r"[1-9] ", r"", n), outmap)
                # Turn big trees on the edges to ordinary forest hexes
                outermap(lambda n: n.replace(r"Gg^Fet", r"Gs^Fp"), outmap)
            if add_usage:
                print '%s, "line %d": adding %s usage header...' % \
                      (filename, baseline, maptype)
                newdata.append("usage=" + maptype + terminator)
                have_header = True
            newdata += savedheaders
            if have_header and not have_delimiter:
                newdata.append(terminator)
            for y in range(len(outmap)):
                newdata.append(",".join(outmap[y]) + terminator)
            # All lines of the map are processed, add the appropriate trailer
            if not map_only:
                newdata.append("\"" + terminator)
        elif "map_data=" in line and (line.count("{") or line.count("}")):
            newline = line
            refre = re.compile(r"\{@?([^A-Z].*)\}").search(line)
            if refre:
                mapfile = refre.group(1)
                if not mapfile.endswith(".map") and is_map(mapfile):
                    newline = newline.replace(mapfile, mapfile + ".map")
            newdata.append(newline + terminator)
            if newline != line:
                if verbose > 0:
                    print 'wmllint: "%s", line %d: %s -> %s.' % (filename, lineno, line, newline)
        elif "map_data=" in line and line.count('"') > 1:
            print 'wmllint: "%s", line %d: one-line map.' % (filename, lineno)
            newdata.append(line + terminator)
        else:
            # Handle text (non-map) lines.  It can use within().
            newline = textxform(filename, lineno, line)
            newdata.append(newline + terminator)
            # Now do warnings based on the state of the tag stack.
            if not unbalanced:
                fields = newline.split("#")
                trimmed = fields[0]
                destringed = re.sub('"[^"]*"', '', trimmed)	# Ignore string literals
                comment = ""
                if len(fields) > 1:
                    comment = fields[1]
                for instance in re.finditer(r"\[\/?\+?([a-z][a-z_]*[a-z])\]", destringed):
                    tag = instance.group(1)
                    attributes = []
                    closer = instance.group(0)[1] == '/'
                    if not closer:
                        tagstack.append((tag, {}))
                    else:
                        if len(tagstack) == 0:
                            print '"%s", line %d: closer [/%s] with tag stack empty.' % (filename, lineno+1, tag)
                        elif tagstack[-1][0] != tag:
                            print '"%s", line %d: unbalanced [%s] closed with [/%s].' % (filename, lineno+1, tagstack[-1][0], tag)
                        else:
                            if validate:
                                validate_on_pop(tagstack, tag, filename, lineno)
                            tagstack.pop()
                if tagstack:
                    for instance in re.finditer(r'([a-z][a-z_]*[a-z])\s*=(.*)', trimmed):
                        attribute = instance.group(1)
                        value = instance.group(2)
                        if '#' in value:
                            value = value.split("#")[0]
                        tagstack[-1][1][attribute] = value.strip()
                    if validate:
                        validate_stack(tagstack, filename, lineno)
            if "wmllint: validate-on" in comment:
                validate = True
            if "wmllint: validate-off" in comment:
                validate = False
            if "wmllint: unbalanced-on" in comment:
                unbalanced = True
            if "wmllint: unbalanced-off" in comment:
                unbalanced = False
    # It's an error if the tag stack is nonempty at the end of any file:
    if tagstack:
        print '"%s", line %d: tag stack nonempty (%s) at end of file.' % (filename, lineno, tagstack)
    tagstack = []
    if iswml(filename):
        # Perform semantic sanity checks
        newdata = sanity_check(filename, newdata)
        # OK, now perform WML rewrites
        newdata = hack_syntax(filename, newdata)
        # Run everything together
        filetext = "".join(newdata)
        transformed = filetext
    else:
        # Map or mask -- just run everything together
        transformed = "".join(newdata)
    # Simple check for unbalanced macro calls
    unclosed = None
    linecount = 1
    startline = None
    quotecount = 0
    display_state = False
    singleline = False
    for i in range(len(transformed)):
        if transformed[i] == '\n':
            if singleline:
                singleline = False
                if not display_state and quotecount % 2:
                    print '"%s", line %d: newline within string' % (filename, linecount)
            linecount += 1
        elif transformed[i-7:i] == "message" and not transformed[i] == ']':
            singleline = True
        elif re.match(" *wmllint: *display +on", transformed[i:]):
            display_state = True
        elif re.match(" *wmllint: *display +off", transformed[i:]):
            display_state = False
        elif transformed[i] == '"':
            quotecount += 1
            if quotecount % 2 == 0:
                singleline = False
    # Return None if the transformation functions made no changes.
    if "".join(unmodified) != transformed:
        return transformed
    else:
        return None

vctypes = (".svn", ".git")

def interesting(fn):
    "Is a file interesting for conversion purposes?"
    return fn.endswith(".cfg") or is_map(fn)

def allcfgfiles(dir):
    "Get the names of all interesting files under dir."
    datafiles = []
    if not os.path.isdir(dir):
        if interesting(dir):
            if not os.path.exists(dir):
                sys.stderr.write("wmllint: %s does not exist\n" % dir)
            else:
                datafiles.append(dir)
    else:
        for root, dirs, files in os.walk(dir):
            for vcsubdir in vctypes:
                if vcsubdir in dirs:
                    dirs.remove(vcsubdir)
            for name in files:
                if interesting(os.path.join(root, name)):
                    datafiles.append(os.path.join(root, name))
    datafiles.sort()	# So diffs for same campaigns will cluster in reports
    return map(os.path.normpath, datafiles)

def help():
        sys.stderr.write("""\
Usage: wmllint [options] [dir]
    Convert Battle of Wesnoth WML from older versions to newer ones.
    Takes any number of directories as arguments.  Each directory is converted.
    If no directories are specified, acts on the current directory.
    Options may be any of these:
    -h, --help                 Emit this help message and quit.
    -d, --dryrun               List changes but don't perform them.
    -v, --verbose              -v        lists changes.
                               -v -v     names each file before it's processed.
                               -v -v -v  shows verbose parse details.
    -c, --clean                Clean up -bak files.
    -D, --diff                 Display diffs between converted and unconverted files.
    -r, --revert               Revert the conversion from the -bak files.
    -s, --stripcr              Convert DOS-style CR/LF to Unix-style LF.
    --future                   Enable experimental WML conversions.
""")

if __name__ == '__main__':
    try:
        (options, arguments) = getopt.getopt(sys.argv[1:], "cdDfhnrsv", [
            "clean",
            "diffs",
            "dryrun",
            "future",
            "help",
            "revert",
            "stripcr",
            "verbose",
            ])
    except getopt.GetoptError:
        help()
        sys.exit(1)
    clean = False
    diffs = False
    dryrun = False
    future = False
    revert = False
    stringfreeze = False
    stripcr = False
    verbose = 0
    for (switch, val) in options:
        if switch in ('-h', '--help'):
            help()
            sys.exit(0)
        elif switch in ('-c', '--clean'):
            clean = True
        elif switch in ('-d', '--dryrun'):
            dryrun = True
            verbose = max(1, verbose)
        elif switch in ('-D', '--diffs'):
            diffs = True
        elif switch in ('-f', '--future'):
            future = True
        elif switch in ('-r', '--revert'):
            revert = True
        elif switch in ('-s', '--stripcr'):
            stripcr = True
        elif switch in ('-S', '--stringfreeze'):
            stringfreeze = True
        elif switch in ('-v', '--verbose'):
            verbose += 1
    if clean and revert:
        sys.stderr.write("wmllint: can't do clean and revert together.\n")
        sys.exit(1)

    def hasdigit(str):
        for c in str:
            if c in "0123456789":
                return True
        return False

    def texttransform(filename, lineno, line):
        "Resource-name transformation on text lines."
        original = line
        # Perform line changes
        if "wmllint: noconvert" not in original:
            for (old, new) in linechanges:
                line = line.replace(old, new)
        # Perform tag renaming for 1.5.  Note: this has to happen before
        # the sanity check, which assumes [unit] has already been
        # mapped to [unit_type].  Also, beware that this test will fail to
        # convert any unit definitions not in conventionally-named
        # directories -- this is necessary in order to avoid stepping
        # on SingleUnitWML in macro files.
        # UnitWML
        if "units" in filename:
            line = line.replace("[unit]", "[unit_type]")
            line = line.replace("[+unit]", "[+unit_type]")
            line = line.replace("[/unit]", "[/unit_type]")
        # Handle SingleUnitWML or Standard Unit Filter or SideWML
        # Also, when macro calls have description= in them, the arg is
        # a SUF being passed in.
        if (under("unit") and not "units" in filename) or \
               standard_unit_filter() or \
               under("side") or \
               re.search("{[A-Z]+.*description=.*}", line):
            if "id" not in tagstack[-1][1] and "_" not in line:
                line = re.sub(r"\bdescription\s*=", "id=", line)
            if "name" not in tagstack[-1][1]:
                line = re.sub(r"user_description\s*=", "name=", line)
        # Now, inside objects...
        if under("object") and "description" not in tagstack[-1][1]:
            line = re.sub(r"user_description\s*=", "description=", line)
        # Alas, WML variable references cannot be converted so
        # automatically.
        if ".description" in line:
            print '"%s", line %d: .description may need hand fixup' % \
                                   (filename, lineno)
        if ".user_description" in line:
            print '"%s", line %d: .user_description may need hand fixup' % \
                                   (filename, lineno)
        # In unit type definitions
        if under("unit_type") or under("female") or under("unit"):
            line = line.replace("unit_description=", "description=")
            line = line.replace("advanceto=", "advances_to=")
        # Inside themes
        if within("theme"):
            line = line.replace("[unit_description]", "[unit_name]")
        # Report the changes
        if verbose > 0 and line != original:
            msg = "%s, line %d: %s -> %s" % \
                  (filename, lineno, original.strip(), line.strip())
            print msg
        return line

    try:
        if not arguments:
            arguments = ["."]

        for dir in arguments:
            ofp = None
            for fn in allcfgfiles(dir):
                if verbose >= 2:
                    print fn + ":"
                backup = fn + "-bak"
                if clean or revert:
                    # Do housekeeping
                    if os.path.exists(backup):
                        if clean:
                            print "wmllint: removing %s" % backup
                            if not dryrun:
                                os.remove(backup)
                        elif revert:
                            print "wmllint: reverting %s" % backup
                            if not dryrun:
                                os.rename(backup, fn)
                elif diffs:
                    # Display diffs
                    if os.path.exists(backup):
                        fromdate = time.ctime(os.stat(backup).st_mtime)
                        todate = time.ctime(os.stat(fn).st_mtime)
                        fromlines = open(backup, 'U').readlines()
                        tolines = open(fn, 'U').readlines()
                        diff = difflib.unified_diff(fromlines, tolines,
                                             backup, fn, fromdate, todate, n=3)
                        sys.stdout.writelines(diff)
                else:
                    # Do file conversions
                    try:
                        changed = translator(fn, [], texttransform)
                        if changed:
                            print "wmllint: converting", fn
                            if not dryrun:
                                os.rename(fn, backup)
                                ofp = open(fn, "w")
                                ofp.write(changed)
                                ofp.close()
                    #except maptransform_error, e:
                    #    sys.stderr.write("wmllint: " + `e` + "\n")
                    except:
                        sys.stderr.write("wmllint: internal error on %s\n" % fn)
                        (exc_type, exc_value, exc_traceback) = sys.exc_info()
                        raise exc_type, exc_value, exc_traceback
        # Constency-check everything we got from the file scans
        consistency_check()
    except KeyboardInterrupt:
        print "Aborted"

# wmllint ends here