run_wml_tests: Print the correct test name when a test fails

The fix in 9535ed9 didn't work, the script still crashed on that line, the
logic for working out how many tests in the batch had passed wasn't correct.
This commit drops that logic, and drops the dependent feature of trying to
retry the next test in the batch, and simply says the entire batch failed.

The new logic for filtering the output of a failed batch is much simpler - the
script doesn't filter it and we assume the developer is just going to ignore
any line that starts "PASS TEST". Only tests that are expected to pass get
batched, so the output is going to look something like the following:

	PASS TEST: test_return
	PASS TEST: test_assert
	PASS TEST: test_return_2
	PASS TEST: test_assert_2
	Assertion failed: assert false
	FAIL TEST: test_assert_fail

	Failure, Wesnoth returned 1 FAIL but we expected 0 PASS

A set of constants is added defining how many -v arguments are needed to
enable a given set of debugging output. Expectation is that level 0 is normal,
and level 1 is for developers who like long-running build processes to print
something to show that they're not stuck.
This commit is contained in:
Steve Cotton 2021-04-17 16:31:31 +02:00 committed by Steve Cotton
parent 3a14e47e90
commit fa72014158

View File

@ -6,6 +6,13 @@ This script runs a sequence of wml unit test scenarios.
import argparse, enum, os, re, subprocess, sys
class Verbosity(enum.IntEnum):
"""What to display depending on how many -v arguments were given on the command line."""
# This doesn't use enum.unique, more than one of these could have the same int level
NAMES_OF_TESTS_RUN = 1 # Show progress when running interactively, with a new line each time a new batch starts
SCRIPT_DEBUGGING = 2 # The command lines needed to run Wesnoth directly for a given test is printed
OUTPUT_OF_PASSING_TESTS = 3 # Print the output even when tests give the expected result
class UnexpectedTestStatusException(Exception):
"""Exception raised when a unit test doesn't return the expected result."""
pass
@ -35,28 +42,48 @@ class TestCase:
return "TestCase<{status}, {name}>".format(status=self.status, name=self.name)
class TestResultAccumulator:
passed = 0
skipped = 0
failed = 0
crashed = 0
passed = []
skipped = []
failed = []
crashed = []
def __init__(self, total):
self.total = total
def pass_test(self, n = 1):
self.passed = self.passed + n
def pass_test(self, batch):
"""These tests had the expected result - passed if UnitTestResult.PASS was expected, etc."""
self.passed += batch
def skip_test(self, n = 1):
self.skipped = self.skipped + n
def skip_test(self, batch):
"""These tests were not run at all. For example, if they expect UnitTestResult.TIMEOUT but the run requested no timeouts."""
self.skipped += batch
def fail_test(self, n = 1):
self.failed = self.failed + n
def fail_test(self, batch):
"""These tests did not crash, but did not have the expected result - they passed if UnitTestResult.FAIL was expected, etc."""
self.failed += batch
def crash_test(self, n = 1):
self.crashed = self.failed + n
def crash_test(self, batch):
"""Segfaults and similar unusual exits from the program under test."""
self.crashed += batch
def __bool__(self):
return self.passed + self.skipped == self.total
"""Returns true if everything that was expected to run (based on the command line options) ran and passed."""
# The logic here is redundant, from the length of passed and skipped we should know that failed and crashed are both empty;
# however a false everything-passed result is much worse than a couple of extra checks here.
return len(self.passed) + len(self.skipped) == self.total and len(self.failed) == 0 and len(self.crashed) == 0
def __str__(self):
result = "Passed {count} of {total} tests\n".format(count=len(self.passed), total=self.total)
if self.skipped:
result += "Skipped batches containing {count} tests: ({names})\n".format(count=len(self.skipped),
names=", ".join([test.name for test in self.skipped]))
if self.failed:
result += "Failed batches containing {count} tests: ({names})\n".format(count=len(self.failed),
names=", ".join([test.name for test in self.failed]))
if self.crashed:
result += "Crashed during batches containing {count} tests: ({names})".format(count=len(self.crashed),
names=", ".join([test.name for test in self.crashed]))
return result;
class TestListParser:
"""Each line in the list of tests should be formatted:
@ -84,7 +111,7 @@ class TestListParser:
print("Could not parse test list file: ", line)
t = TestCase(UnitTestResult(int(x.groups()[0])), x.groups()[1])
if self.verbose > 1:
if self.verbose >= Verbosity.SCRIPT_DEBUGGING:
print(t)
test_list.append(t)
return batcher(test_list), TestResultAccumulator(len(test_list))
@ -102,7 +129,7 @@ def run_with_rerun_for_sdl_video(args, timeout):
if b"Could not initialize SDL_video" in res.stdout:
retry = True
if not retry:
return res
return res
sdl_retries += 1
print("Could not initialise SDL_video error, attempt", sdl_retries)
@ -143,7 +170,7 @@ class WesnothRunner:
self.common_args.extend(options.additional_arg)
self.timeout = options.timeout
self.batch_timeout = options.batch_timeout
if self.verbose > 1:
if self.verbose >= Verbosity.SCRIPT_DEBUGGING:
print("Options that will be used for all Wesnoth instances:", repr(self.common_args))
def run_tests(self, test_list, test_summary):
@ -155,47 +182,49 @@ class WesnothRunner:
if test.status != UnitTestResult.PASS:
raise NotImplementedError("run_tests doesn't yet support batching tests with non-zero statuses")
expected_result = test_list[0].status
if expected_result == UnitTestResult.TIMEOUT and self.timeout == 0:
test_summary.skip_test(test_list)
if self.verbose >= Verbosity.NAMES_OF_TESTS_RUN:
print('Skipping test', test_list[0].name, 'because timeout is disabled')
return
if expected_result == UnitTestResult.FAIL_BROKE_STRICT and not options.strict_mode:
test_summary.skip_test(test_list)
if self.verbose >= Verbosity.NAMES_OF_TESTS_RUN:
print('Skipping test', test_list[0].name, 'because strict mode is disabled')
return
args = self.common_args.copy()
for test in test_list:
args.append("-u")
args.append(test.name)
if self.timeout == 0:
if test.status == UnitTestResult.TIMEOUT:
test_summary.skip_test()
print('Skipping test', test_list[0].name, 'because timeout is disabled')
return
timeout = None
elif len(test_list) == 1:
timeout = self.timeout
else:
if test.status == UnitTestResult.FAIL_BROKE_STRICT and not options.strict_mode:
test_summary.skip_test()
print('Skipping test', test_list[0].name, 'because strict mode is disabled')
return
if len(test_list) == 1:
timeout = self.timeout
else:
timeout = self.batch_timeout
if self.verbose > 0:
timeout = self.batch_timeout
if self.verbose >= Verbosity.NAMES_OF_TESTS_RUN:
if len(test_list) == 1:
print("Running test", test_list[0].name)
else:
print("Running {count} tests ({names})".format(count=len(test_list),
names=", ".join([test.name for test in test_list])))
if self.verbose > 1:
print(repr(args))
if self.verbose >= Verbosity.SCRIPT_DEBUGGING:
print(repr(args))
try:
res = run_with_rerun_for_sdl_video(args, timeout)
except subprocess.TimeoutExpired as t:
if self.verbose > 0:
print("Timed out (killed by Python timeout implementation)")
if expected_result != UnitTestResult.TIMEOUT:
print("Timed out (killed by Python timeout implementation), while running batch ({names})".format(
names=", ".join([test.name for test in test_list])))
res = subprocess.CompletedProcess(args, UnitTestResult.TIMEOUT.value, t.output or b'')
if self.verbose > 0:
if self.verbose >= Verbosity.OUTPUT_OF_PASSING_TESTS:
print(res.stdout.decode('utf-8'))
if self.verbose > 1:
print("Result:", res.returncode)
num_passed = 1
if test_list[0].status == UnitTestResult.PASS:
num_passed = res.stdout.count(b"PASS TEST")
test_summary.pass_test(num_passed)
print("Result:", res.returncode)
if res.returncode < 0:
print("Wesnoth exited because of signal", -res.returncode)
if options.backtrace:
@ -203,22 +232,21 @@ class WesnothRunner:
gdb_args = ["gdb", "-q", "-batch", "-ex", "start", "-ex", "continue", "-ex", "bt", "-ex", "quit", "--args"]
gdb_args.extend(args)
subprocess.run(gdb_args, timeout=240)
test_summary.crash_test()
test_summary.skip_test(len(test_list) - num_passed - 1)
test_summary.crash_test(test_list)
raise UnexpectedTestStatusException()
returned_result = UnitTestResult(res.returncode)
if returned_result != expected_result:
if self.verbose == 0:
for line in res.stdout.decode('utf-8').splitlines():
if expected_result == UnitTestResult.PASS and line.startswith("PASS TEST"):
continue
print(line)
print("Failure, Wesnoth returned", returned_result, "for", test_list[num_passed].name, "but we expected", expected_result)
test_summary.fail_test()
test_summary.skip_test(len(test_list) - num_passed - 1)
if returned_result == expected_result:
test_summary.pass_test(test_list)
else:
if self.verbose < Verbosity.OUTPUT_OF_PASSING_TESTS:
# Batch mode only supports tests that should UnitTestResult.PASS, so the output
# here is likely to have many 'PASS TEST' lines with the failing test last.
# If support for batching other UnitTestResults is added then this needs to filter
# the output from the other tests.
print(res.stdout.decode('utf-8'))
print("Failure, Wesnoth returned", returned_result, "but we expected", expected_result)
test_summary.fail_test(test_list)
raise UnexpectedTestStatusException()
elif test_list[0].status != UnitTestResult.PASS:
test_summary.pass_test()
def test_batcher(test_list):
"""A generator function that collects tests into batches which a single
@ -250,7 +278,7 @@ if __name__ == '__main__':
# The options that are mandatory to support (because they're used in the Travis script)
# are the one-letter forms of verbose, clean, timeout and backtrace.
ap.add_argument("-v", "--verbose", action="count", default=0,
help="Verbose mode. Use -v twice for very verbose mode.")
help="Verbose mode. Additional -v arguments increase the verbosity.")
ap.add_argument("-c", "--clean", action="store_true",
help="Clean mode. (Don't load any add-ons. Used for mainline tests.)")
ap.add_argument("-a", "--additional_arg", action="append",
@ -299,27 +327,16 @@ if __name__ == '__main__':
runner = WesnothRunner(options)
for batch in test_list:
while len(batch) > 0:
last_passed_count = test_summary.passed
try:
runner.run_tests(batch, test_summary)
batch = []
except UnexpectedTestStatusException as e:
just_passed = test_summary.passed - last_passed_count
batch = batch[just_passed + 1 :]
test_summary.skip_test(-len(batch))
try:
runner.run_tests(batch, test_summary)
except UnexpectedTestStatusException as e:
if test_summary:
raise RuntimeError("Logic error in run_wml_tests - a test has failed, but test_summary says everything passed")
print("Result:" if options.verbose > 0 else "WML Unit Tests Result:", test_summary.passed, "of", test_summary.total, "tests passed")
if test_summary.passed != test_summary.total:
breakdown = ["{0} passed".format(test_summary.passed)]
if test_summary.failed != 0:
breakdown.append("{0} failed".format(test_summary.failed))
if test_summary.crashed != 0:
breakdown.append("{0} crashed".format(test_summary.crashed))
if test_summary.skipped != 0:
breakdown.append("{0} skipped".format(test_summary.skipped))
print(" ({0})".format(', '.join(breakdown)))
if options.verbose > 0 or not test_summary:
print(test_summary)
else:
print("WML Unit Tests Result:", len(test_summary.passed), "of", test_summary.total, "tests passed")
if not test_summary:
sys.exit(1)