mirror of
https://github.com/wesnoth/wesnoth
synced 2025-04-18 21:05:29 +00:00

The fix in 9535ed9
didn't work, the script still crashed on that line, the
logic for working out how many tests in the batch had passed wasn't correct.
This commit drops that logic, and drops the dependent feature of trying to
retry the next test in the batch, and simply says the entire batch failed.
The new logic for filtering the output of a failed batch is much simpler - the
script doesn't filter it and we assume the developer is just going to ignore
any line that starts "PASS TEST". Only tests that are expected to pass get
batched, so the output is going to look something like the following:
PASS TEST: test_return
PASS TEST: test_assert
PASS TEST: test_return_2
PASS TEST: test_assert_2
Assertion failed: assert false
FAIL TEST: test_assert_fail
Failure, Wesnoth returned 1 FAIL but we expected 0 PASS
A set of constants is added defining how many -v arguments are needed to
enable a given set of debugging output. Expectation is that level 0 is normal,
and level 1 is for developers who like long-running build processes to print
something to show that they're not stuck.
343 lines
15 KiB
Python
Executable File
343 lines
15 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# encoding: utf-8
|
|
"""
|
|
This script runs a sequence of wml unit test scenarios.
|
|
"""
|
|
|
|
import argparse, enum, os, re, subprocess, sys
|
|
|
|
class Verbosity(enum.IntEnum):
|
|
"""What to display depending on how many -v arguments were given on the command line."""
|
|
# This doesn't use enum.unique, more than one of these could have the same int level
|
|
NAMES_OF_TESTS_RUN = 1 # Show progress when running interactively, with a new line each time a new batch starts
|
|
SCRIPT_DEBUGGING = 2 # The command lines needed to run Wesnoth directly for a given test is printed
|
|
OUTPUT_OF_PASSING_TESTS = 3 # Print the output even when tests give the expected result
|
|
|
|
class UnexpectedTestStatusException(Exception):
|
|
"""Exception raised when a unit test doesn't return the expected result."""
|
|
pass
|
|
|
|
class UnitTestResult(enum.Enum):
|
|
"""Enum corresponding to game_launcher.hpp's unit_test_result"""
|
|
PASS = 0
|
|
FAIL = 1
|
|
TIMEOUT = 2
|
|
FAIL_LOADING_REPLAY = 3
|
|
FAIL_PLAYING_REPLAY = 4
|
|
FAIL_BROKE_STRICT = 5
|
|
FAIL_WML_EXCEPTION = 6
|
|
FAIL_BY_DEFEAT = 7
|
|
PASS_BY_VICTORY = 8
|
|
|
|
def __str__(self):
|
|
return str(self.value) + ' ' + self.name
|
|
|
|
class TestCase:
|
|
"""Represents a single line of the wml_test_schedule."""
|
|
def __init__(self, status, name):
|
|
self.status = status
|
|
self.name = name
|
|
|
|
def __str__(self):
|
|
return "TestCase<{status}, {name}>".format(status=self.status, name=self.name)
|
|
|
|
class TestResultAccumulator:
|
|
passed = []
|
|
skipped = []
|
|
failed = []
|
|
crashed = []
|
|
|
|
def __init__(self, total):
|
|
self.total = total
|
|
|
|
def pass_test(self, batch):
|
|
"""These tests had the expected result - passed if UnitTestResult.PASS was expected, etc."""
|
|
self.passed += batch
|
|
|
|
def skip_test(self, batch):
|
|
"""These tests were not run at all. For example, if they expect UnitTestResult.TIMEOUT but the run requested no timeouts."""
|
|
self.skipped += batch
|
|
|
|
def fail_test(self, batch):
|
|
"""These tests did not crash, but did not have the expected result - they passed if UnitTestResult.FAIL was expected, etc."""
|
|
self.failed += batch
|
|
|
|
def crash_test(self, batch):
|
|
"""Segfaults and similar unusual exits from the program under test."""
|
|
self.crashed += batch
|
|
|
|
def __bool__(self):
|
|
"""Returns true if everything that was expected to run (based on the command line options) ran and passed."""
|
|
# The logic here is redundant, from the length of passed and skipped we should know that failed and crashed are both empty;
|
|
# however a false everything-passed result is much worse than a couple of extra checks here.
|
|
return len(self.passed) + len(self.skipped) == self.total and len(self.failed) == 0 and len(self.crashed) == 0
|
|
|
|
def __str__(self):
|
|
result = "Passed {count} of {total} tests\n".format(count=len(self.passed), total=self.total)
|
|
if self.skipped:
|
|
result += "Skipped batches containing {count} tests: ({names})\n".format(count=len(self.skipped),
|
|
names=", ".join([test.name for test in self.skipped]))
|
|
if self.failed:
|
|
result += "Failed batches containing {count} tests: ({names})\n".format(count=len(self.failed),
|
|
names=", ".join([test.name for test in self.failed]))
|
|
if self.crashed:
|
|
result += "Crashed during batches containing {count} tests: ({names})".format(count=len(self.crashed),
|
|
names=", ".join([test.name for test in self.crashed]))
|
|
return result;
|
|
|
|
class TestListParser:
|
|
"""Each line in the list of tests should be formatted:
|
|
<expected return code><space><name of unit test scenario>
|
|
|
|
For example:
|
|
0 test_functionality
|
|
|
|
Lines beginning # are treated as comments.
|
|
"""
|
|
def __init__(self, options):
|
|
self.verbose = options.verbose
|
|
self.filename = options.list
|
|
|
|
def get(self, batcher):
|
|
status_name_re = re.compile(r"^(\d+) ([\w-]+)$")
|
|
test_list = []
|
|
for line in open(self.filename, mode="rt"):
|
|
line = line.strip()
|
|
if line == "" or line.startswith("#"):
|
|
continue
|
|
|
|
x = status_name_re.match(line)
|
|
if x is None:
|
|
print("Could not parse test list file: ", line)
|
|
|
|
t = TestCase(UnitTestResult(int(x.groups()[0])), x.groups()[1])
|
|
if self.verbose >= Verbosity.SCRIPT_DEBUGGING:
|
|
print(t)
|
|
test_list.append(t)
|
|
return batcher(test_list), TestResultAccumulator(len(test_list))
|
|
|
|
def run_with_rerun_for_sdl_video(args, timeout):
|
|
"""A wrapper for subprocess.run with a workaround for the issue of travis+18.04
|
|
intermittently failing to initialise SDL.
|
|
"""
|
|
# Sanity check on the number of retries. It's a rare failure, a single retry would probably
|
|
# be enough.
|
|
sdl_retries = 0
|
|
while sdl_retries < 10:
|
|
res = subprocess.run(args, timeout=timeout, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
|
retry = False
|
|
if b"Could not initialize SDL_video" in res.stdout:
|
|
retry = True
|
|
if not retry:
|
|
return res
|
|
sdl_retries += 1
|
|
print("Could not initialise SDL_video error, attempt", sdl_retries)
|
|
|
|
class WesnothRunner:
|
|
def __init__(self, options):
|
|
self.verbose = options.verbose
|
|
if options.path in ["XCode", "xcode", "Xcode"]:
|
|
import glob
|
|
path_list = []
|
|
build = "Debug" if options.debug_bin else "Release"
|
|
pattern = os.path.join("~/Library/Developer/XCode/DerivedData/The_Battle_for_Wesnoth*/Build/Products",
|
|
build, "The Battle for Wesnoth.app/Contents/MacOS/The Battle for Wesnoth")
|
|
path_list.extend(glob.glob(os.path.expanduser(pattern)))
|
|
if len(path_list) == 0:
|
|
raise FileNotFoundError("Couldn't find your xcode build dir")
|
|
if len(path_list) > 1:
|
|
# seems better than choosing one at random
|
|
raise RuntimeError("Found more than one xcode build dir")
|
|
path = path_list[0]
|
|
else:
|
|
if options.path is None:
|
|
path = os.path.split(os.path.realpath(sys.argv[0]))[0]
|
|
else:
|
|
path = options.path
|
|
if os.path.isdir(path):
|
|
path += "/wesnoth"
|
|
if options.debug_bin:
|
|
path += "-debug"
|
|
self.common_args = [path, "--nobanner"]
|
|
if os.name == 'nt':
|
|
self.common_args.append("--wnoconsole")
|
|
self.common_args.append("--wnoredirect")
|
|
if options.strict_mode:
|
|
self.common_args.append("--log-strict=warning")
|
|
if options.clean:
|
|
self.common_args.append("--noaddons")
|
|
if options.additional_arg is not None:
|
|
self.common_args.extend(options.additional_arg)
|
|
self.timeout = options.timeout
|
|
self.batch_timeout = options.batch_timeout
|
|
if self.verbose >= Verbosity.SCRIPT_DEBUGGING:
|
|
print("Options that will be used for all Wesnoth instances:", repr(self.common_args))
|
|
|
|
def run_tests(self, test_list, test_summary):
|
|
"""Run all of the tests in a single instance of Wesnoth"""
|
|
if len(test_list) == 0:
|
|
raise ValueError("Running an empty test list")
|
|
if len(test_list) > 1:
|
|
for test in test_list:
|
|
if test.status != UnitTestResult.PASS:
|
|
raise NotImplementedError("run_tests doesn't yet support batching tests with non-zero statuses")
|
|
expected_result = test_list[0].status
|
|
|
|
if expected_result == UnitTestResult.TIMEOUT and self.timeout == 0:
|
|
test_summary.skip_test(test_list)
|
|
if self.verbose >= Verbosity.NAMES_OF_TESTS_RUN:
|
|
print('Skipping test', test_list[0].name, 'because timeout is disabled')
|
|
return
|
|
if expected_result == UnitTestResult.FAIL_BROKE_STRICT and not options.strict_mode:
|
|
test_summary.skip_test(test_list)
|
|
if self.verbose >= Verbosity.NAMES_OF_TESTS_RUN:
|
|
print('Skipping test', test_list[0].name, 'because strict mode is disabled')
|
|
return
|
|
|
|
args = self.common_args.copy()
|
|
for test in test_list:
|
|
args.append("-u")
|
|
args.append(test.name)
|
|
|
|
if self.timeout == 0:
|
|
timeout = None
|
|
elif len(test_list) == 1:
|
|
timeout = self.timeout
|
|
else:
|
|
timeout = self.batch_timeout
|
|
|
|
if self.verbose >= Verbosity.NAMES_OF_TESTS_RUN:
|
|
if len(test_list) == 1:
|
|
print("Running test", test_list[0].name)
|
|
else:
|
|
print("Running {count} tests ({names})".format(count=len(test_list),
|
|
names=", ".join([test.name for test in test_list])))
|
|
if self.verbose >= Verbosity.SCRIPT_DEBUGGING:
|
|
print(repr(args))
|
|
|
|
try:
|
|
res = run_with_rerun_for_sdl_video(args, timeout)
|
|
except subprocess.TimeoutExpired as t:
|
|
if expected_result != UnitTestResult.TIMEOUT:
|
|
print("Timed out (killed by Python timeout implementation), while running batch ({names})".format(
|
|
names=", ".join([test.name for test in test_list])))
|
|
res = subprocess.CompletedProcess(args, UnitTestResult.TIMEOUT.value, t.output or b'')
|
|
if self.verbose >= Verbosity.OUTPUT_OF_PASSING_TESTS:
|
|
print(res.stdout.decode('utf-8'))
|
|
print("Result:", res.returncode)
|
|
if res.returncode < 0:
|
|
print("Wesnoth exited because of signal", -res.returncode)
|
|
if options.backtrace:
|
|
print("Launching GDB for a backtrace...")
|
|
gdb_args = ["gdb", "-q", "-batch", "-ex", "start", "-ex", "continue", "-ex", "bt", "-ex", "quit", "--args"]
|
|
gdb_args.extend(args)
|
|
subprocess.run(gdb_args, timeout=240)
|
|
test_summary.crash_test(test_list)
|
|
raise UnexpectedTestStatusException()
|
|
returned_result = UnitTestResult(res.returncode)
|
|
if returned_result == expected_result:
|
|
test_summary.pass_test(test_list)
|
|
else:
|
|
if self.verbose < Verbosity.OUTPUT_OF_PASSING_TESTS:
|
|
# Batch mode only supports tests that should UnitTestResult.PASS, so the output
|
|
# here is likely to have many 'PASS TEST' lines with the failing test last.
|
|
# If support for batching other UnitTestResults is added then this needs to filter
|
|
# the output from the other tests.
|
|
print(res.stdout.decode('utf-8'))
|
|
print("Failure, Wesnoth returned", returned_result, "but we expected", expected_result)
|
|
test_summary.fail_test(test_list)
|
|
raise UnexpectedTestStatusException()
|
|
|
|
def test_batcher(test_list):
|
|
"""A generator function that collects tests into batches which a single
|
|
instance of Wesnoth can run.
|
|
"""
|
|
expected_to_pass = []
|
|
for test in test_list:
|
|
if test.status == UnitTestResult.PASS:
|
|
expected_to_pass.append(test)
|
|
else:
|
|
yield [test]
|
|
if len(expected_to_pass) == 0:
|
|
return
|
|
if options.batch_max == 0:
|
|
yield expected_to_pass
|
|
return
|
|
while len(expected_to_pass) > 0:
|
|
yield expected_to_pass[0:options.batch_max]
|
|
expected_to_pass = expected_to_pass[options.batch_max:]
|
|
|
|
def test_nobatcher(test_list):
|
|
"""A generator function that provides the same API as test_batcher but
|
|
emits the tests one at a time."""
|
|
for test in test_list:
|
|
yield [test]
|
|
|
|
if __name__ == '__main__':
|
|
ap = argparse.ArgumentParser()
|
|
# The options that are mandatory to support (because they're used in the Travis script)
|
|
# are the one-letter forms of verbose, clean, timeout and backtrace.
|
|
ap.add_argument("-v", "--verbose", action="count", default=0,
|
|
help="Verbose mode. Additional -v arguments increase the verbosity.")
|
|
ap.add_argument("-c", "--clean", action="store_true",
|
|
help="Clean mode. (Don't load any add-ons. Used for mainline tests.)")
|
|
ap.add_argument("-a", "--additional_arg", action="append",
|
|
help="Additional arguments to go to wesnoth. For options that start with a hyphen, '--add_argument --data-dir' will give an error, use '--add_argument=--data-dir' instead.")
|
|
ap.add_argument("-t", "--timeout", type=int, default=10,
|
|
help="New timer value to use, instead of 10s as default. The value 0 means no timer, and also skips tests that expect timeout.")
|
|
ap.add_argument("-bt", "--batch-timeout", type=int, default=300,
|
|
help="New timer value to use for batched tests, instead of 300s as default.")
|
|
ap.add_argument("-bm", "--batch-max", type=int, default=0,
|
|
help="Maximum number of tests to do in a batch. Default no limit.")
|
|
ap.add_argument("-bd", "--batch-disable", action="store_const", const=1, dest='batch_max',
|
|
help="Disable test batching, may be useful if debugging a small subset of tests. Equivalent to --batch-max=1")
|
|
ap.add_argument("-s", "--no-strict", dest="strict_mode", action="store_false",
|
|
help="Disable strict mode. By default, we run wesnoth with the option --log-strict=warning to ensure errors result in a failed test.")
|
|
ap.add_argument("-d", "--debug_bin", action="store_true",
|
|
help="Run wesnoth-debug binary instead of wesnoth.")
|
|
ap.add_argument("-g", "--backtrace", action="store_true",
|
|
help="If we encounter a crash, generate a backtrace using gdb. Must have gdb installed for this option.")
|
|
ap.add_argument("-p", "--path", metavar="dir",
|
|
help="Path to wesnoth binary. By default assume it is with this script."
|
|
+ " If running on a Mac, passing 'xcode' as the path will attempt to locate the binary in Xcode derived data directories.")
|
|
ap.add_argument("-l", "--list", metavar="filename",
|
|
help="Loads list of tests from the given file.",
|
|
default="wml_test_schedule")
|
|
|
|
# Workaround for argparse not accepting option values that start with a hyphen,
|
|
# for example "-a --user-data-dir". https://bugs.python.org/issue9334
|
|
# New callers can use "-a=--user-data-dir", but compatibility with the old version
|
|
# of run_wml_tests needs support for "-a --user-data-dir".
|
|
try:
|
|
while True:
|
|
i = sys.argv.index("-a")
|
|
sys.argv[i] = "=".join(["-a", sys.argv.pop(i + 1)])
|
|
except IndexError:
|
|
pass
|
|
except ValueError:
|
|
pass
|
|
|
|
options = ap.parse_args()
|
|
|
|
if options.verbose > 1:
|
|
print(repr(options))
|
|
|
|
batcher = test_nobatcher if options.batch_max == 1 else test_batcher
|
|
test_list, test_summary = TestListParser(options).get(batcher)
|
|
runner = WesnothRunner(options)
|
|
|
|
for batch in test_list:
|
|
try:
|
|
runner.run_tests(batch, test_summary)
|
|
except UnexpectedTestStatusException as e:
|
|
if test_summary:
|
|
raise RuntimeError("Logic error in run_wml_tests - a test has failed, but test_summary says everything passed")
|
|
|
|
if options.verbose > 0 or not test_summary:
|
|
print(test_summary)
|
|
else:
|
|
print("WML Unit Tests Result:", len(test_summary.passed), "of", test_summary.total, "tests passed")
|
|
|
|
if not test_summary:
|
|
sys.exit(1)
|