run_wml_tests: Print the correct test name when a test fails

The fix in 9535ed9 didn't work, the script still crashed on that line, the logic for working out how many tests in the batch had passed wasn't correct. This commit drops that logic, and drops the dependent feature of trying to retry the next test in the batch, and simply says the entire batch failed. The new logic for filtering the output of a failed batch is much simpler - the script doesn't filter it and we assume the developer is just going to ignore any line that starts "PASS TEST". Only tests that are expected to pass get batched, so the output is going to look something like the following: PASS TEST: test_return PASS TEST: test_assert PASS TEST: test_return_2 PASS TEST: test_assert_2 Assertion failed: assert false FAIL TEST: test_assert_fail Failure, Wesnoth returned 1 FAIL but we expected 0 PASS A set of constants is added defining how many -v arguments are needed to enable a given set of debugging output. Expectation is that level 0 is normal, and level 1 is for developers who like long-running build processes to print something to show that they're not stuck.
2025-04-17 03:41:27 +00:00 · 2021-04-17 16:31:31 +02:00 · 2021-04-17 16:31:31 +02:00 · fa72014158
commit fa72014158
parent 3a14e47e90
1 changed files with 91 additions and 74 deletions
--- a/165
+++ b/165
@ -6,6 +6,13 @@ This script runs a sequence of wml unit test scenarios.

 import argparse, enum, os, re, subprocess, sys

+class Verbosity(enum.IntEnum):
+    """What to display depending on how many -v arguments were given on the command line."""
+    # This doesn't use enum.unique, more than one of these could have the same int level
+    NAMES_OF_TESTS_RUN = 1 # Show progress when running interactively, with a new line each time a new batch starts
+    SCRIPT_DEBUGGING = 2 # The command lines needed to run Wesnoth directly for a given test is printed
+    OUTPUT_OF_PASSING_TESTS = 3 # Print the output even when tests give the expected result
+
 class UnexpectedTestStatusException(Exception):
    """Exception raised when a unit test doesn't return the expected result."""
    pass
@ -35,28 +42,48 @@ class TestCase:
        return "TestCase<{status}, {name}>".format(status=self.status, name=self.name)

 class TestResultAccumulator:
-    passed = 0
-    skipped = 0
-    failed = 0
-    crashed = 0
+    passed = []
+    skipped = []
+    failed = []
+    crashed = []
    
    def __init__(self, total):
        self.total = total
    
-    def pass_test(self, n = 1):
-        self.passed = self.passed + n
+    def pass_test(self, batch):
+        """These tests had the expected result - passed if UnitTestResult.PASS was expected, etc."""
+        self.passed += batch
    
-    def skip_test(self, n = 1):
-        self.skipped = self.skipped + n
+    def skip_test(self, batch):
+        """These tests were not run at all. For example, if they expect UnitTestResult.TIMEOUT but the run requested no timeouts."""
+        self.skipped += batch
    
-    def fail_test(self, n = 1):
-        self.failed = self.failed + n
+    def fail_test(self, batch):
+        """These tests did not crash, but did not have the expected result - they passed if UnitTestResult.FAIL was expected, etc."""
+        self.failed += batch
    
-    def crash_test(self, n = 1):
-        self.crashed = self.failed + n
+    def crash_test(self, batch):
+        """Segfaults and similar unusual exits from the program under test."""
+        self.crashed += batch
    
    def __bool__(self):
-        return self.passed + self.skipped == self.total
+        """Returns true if everything that was expected to run (based on the command line options) ran and passed."""
+        # The logic here is redundant, from the length of passed and skipped we should know that failed and crashed are both empty;
+        # however a false everything-passed result is much worse than a couple of extra checks here.
+        return len(self.passed) + len(self.skipped) == self.total and len(self.failed) == 0 and len(self.crashed) == 0
+
+    def __str__(self):
+        result = "Passed {count} of {total} tests\n".format(count=len(self.passed), total=self.total)
+        if self.skipped:
+            result += "Skipped batches containing {count} tests: ({names})\n".format(count=len(self.skipped),
+                    names=", ".join([test.name for test in self.skipped]))
+        if self.failed:
+            result += "Failed batches containing {count} tests: ({names})\n".format(count=len(self.failed),
+                    names=", ".join([test.name for test in self.failed]))
+        if self.crashed:
+            result += "Crashed during batches containing {count} tests: ({names})".format(count=len(self.crashed),
+                    names=", ".join([test.name for test in self.crashed]))
+        return result;

 class TestListParser:
    """Each line in the list of tests should be formatted:
@ -84,7 +111,7 @@ class TestListParser:
                print("Could not parse test list file: ", line)

            t = TestCase(UnitTestResult(int(x.groups()[0])), x.groups()[1])
-            if self.verbose > 1:
+            if self.verbose >= Verbosity.SCRIPT_DEBUGGING:
                print(t)
            test_list.append(t)
        return batcher(test_list), TestResultAccumulator(len(test_list))
@ -102,7 +129,7 @@ def run_with_rerun_for_sdl_video(args, timeout):
        if b"Could not initialize SDL_video" in res.stdout:
            retry = True
        if not retry:
-        	  return res
+            return res
        sdl_retries += 1
        print("Could not initialise SDL_video error, attempt", sdl_retries)

@ -143,7 +170,7 @@ class WesnothRunner:
            self.common_args.extend(options.additional_arg)
        self.timeout = options.timeout
        self.batch_timeout = options.batch_timeout
-        if self.verbose > 1:
+        if self.verbose >= Verbosity.SCRIPT_DEBUGGING:
            print("Options that will be used for all Wesnoth instances:", repr(self.common_args))

    def run_tests(self, test_list, test_summary):
@ -155,47 +182,49 @@ class WesnothRunner:
                if test.status != UnitTestResult.PASS:
                    raise NotImplementedError("run_tests doesn't yet support batching tests with non-zero statuses")
        expected_result = test_list[0].status
+
+        if expected_result == UnitTestResult.TIMEOUT and self.timeout == 0:
+            test_summary.skip_test(test_list)
+            if self.verbose >= Verbosity.NAMES_OF_TESTS_RUN:
+                print('Skipping test', test_list[0].name, 'because timeout is disabled')
+            return
+        if expected_result == UnitTestResult.FAIL_BROKE_STRICT and not options.strict_mode:
+            test_summary.skip_test(test_list)
+            if self.verbose >= Verbosity.NAMES_OF_TESTS_RUN:
+                print('Skipping test', test_list[0].name, 'because strict mode is disabled')
+            return
+
        args = self.common_args.copy()
        for test in test_list:
            args.append("-u")
            args.append(test.name)
+
        if self.timeout == 0:
-            if test.status == UnitTestResult.TIMEOUT:
-                test_summary.skip_test()
-                print('Skipping test', test_list[0].name, 'because timeout is disabled')
-                return
            timeout = None
+        elif len(test_list) == 1:
+            timeout = self.timeout
        else:
-            if test.status == UnitTestResult.FAIL_BROKE_STRICT and not options.strict_mode:
-                test_summary.skip_test()
-                print('Skipping test', test_list[0].name, 'because strict mode is disabled')
-                return
-            if len(test_list) == 1:
-                timeout = self.timeout
-            else:
-                timeout = self.batch_timeout
-        if self.verbose > 0:
+            timeout = self.batch_timeout
+
+        if self.verbose >= Verbosity.NAMES_OF_TESTS_RUN:
            if len(test_list) == 1:
                print("Running test", test_list[0].name)
            else:
                print("Running {count} tests ({names})".format(count=len(test_list),
                    names=", ".join([test.name for test in test_list])))
-            if self.verbose > 1:
-                print(repr(args))
+        if self.verbose >= Verbosity.SCRIPT_DEBUGGING:
+            print(repr(args))
+
        try:
            res = run_with_rerun_for_sdl_video(args, timeout)
        except subprocess.TimeoutExpired as t:
-            if self.verbose > 0:
-                print("Timed out (killed by Python timeout implementation)")
+            if expected_result != UnitTestResult.TIMEOUT:
+                print("Timed out (killed by Python timeout implementation), while running batch ({names})".format(
+                    names=", ".join([test.name for test in test_list])))
            res = subprocess.CompletedProcess(args, UnitTestResult.TIMEOUT.value, t.output or b'')
-        if self.verbose > 0:
+        if self.verbose >= Verbosity.OUTPUT_OF_PASSING_TESTS:
            print(res.stdout.decode('utf-8'))
-            if self.verbose > 1:
-                print("Result:", res.returncode)
-        num_passed = 1
-        if test_list[0].status == UnitTestResult.PASS:
-            num_passed = res.stdout.count(b"PASS TEST")
-            test_summary.pass_test(num_passed)
+            print("Result:", res.returncode)
        if res.returncode < 0:
            print("Wesnoth exited because of signal", -res.returncode)
            if options.backtrace:
@ -203,22 +232,21 @@ class WesnothRunner:
                gdb_args = ["gdb", "-q", "-batch", "-ex", "start", "-ex", "continue", "-ex", "bt", "-ex", "quit", "--args"]
                gdb_args.extend(args)
                subprocess.run(gdb_args, timeout=240)
-            test_summary.crash_test()
-            test_summary.skip_test(len(test_list) - num_passed - 1)
+            test_summary.crash_test(test_list)
            raise UnexpectedTestStatusException()
        returned_result = UnitTestResult(res.returncode)
-        if returned_result != expected_result:
-            if self.verbose == 0:
-                for line in res.stdout.decode('utf-8').splitlines():
-                    if expected_result == UnitTestResult.PASS and line.startswith("PASS TEST"):
-                        continue
-                    print(line)
-            print("Failure, Wesnoth returned", returned_result, "for", test_list[num_passed].name, "but we expected", expected_result)
-            test_summary.fail_test()
-            test_summary.skip_test(len(test_list) - num_passed - 1)
+        if returned_result == expected_result:
+            test_summary.pass_test(test_list)
+        else:
+            if self.verbose < Verbosity.OUTPUT_OF_PASSING_TESTS:
+                # Batch mode only supports tests that should UnitTestResult.PASS, so the output
+                # here is likely to have many 'PASS TEST' lines with the failing test last.
+                # If support for batching other UnitTestResults is added then this needs to filter
+                # the output from the other tests.
+                print(res.stdout.decode('utf-8'))
+            print("Failure, Wesnoth returned", returned_result, "but we expected", expected_result)
+            test_summary.fail_test(test_list)
            raise UnexpectedTestStatusException()
-        elif test_list[0].status != UnitTestResult.PASS:
-            test_summary.pass_test()

 def test_batcher(test_list):
    """A generator function that collects tests into batches which a single
@ -250,7 +278,7 @@ if __name__ == '__main__':
    # The options that are mandatory to support (because they're used in the Travis script)
    # are the one-letter forms of verbose, clean, timeout and backtrace.
    ap.add_argument("-v", "--verbose", action="count", default=0,
-        help="Verbose mode. Use -v twice for very verbose mode.")
+        help="Verbose mode. Additional -v arguments increase the verbosity.")
    ap.add_argument("-c", "--clean", action="store_true",
        help="Clean mode. (Don't load any add-ons. Used for mainline tests.)")
    ap.add_argument("-a", "--additional_arg", action="append",
@ -299,27 +327,16 @@ if __name__ == '__main__':
    runner = WesnothRunner(options)

    for batch in test_list:
-        while len(batch) > 0:
-            last_passed_count = test_summary.passed
-            try:
-                runner.run_tests(batch, test_summary)
-                batch = []
-            except UnexpectedTestStatusException as e:
-                just_passed = test_summary.passed - last_passed_count
-                batch = batch[just_passed + 1 :]
-                test_summary.skip_test(-len(batch))
+        try:
+            runner.run_tests(batch, test_summary)
+        except UnexpectedTestStatusException as e:
+            if test_summary:
+                raise RuntimeError("Logic error in run_wml_tests - a test has failed, but test_summary says everything passed")

-    print("Result:" if options.verbose > 0 else "WML Unit Tests Result:", test_summary.passed, "of", test_summary.total, "tests passed")
-
-    if test_summary.passed != test_summary.total:
-        breakdown = ["{0} passed".format(test_summary.passed)]
-        if test_summary.failed != 0:
-            breakdown.append("{0} failed".format(test_summary.failed))
-        if test_summary.crashed != 0:
-            breakdown.append("{0} crashed".format(test_summary.crashed))
-        if test_summary.skipped != 0:
-            breakdown.append("{0} skipped".format(test_summary.skipped))
-        print("    ({0})".format(', '.join(breakdown)))
+    if options.verbose > 0 or not test_summary:
+        print(test_summary)
+    else:
+        print("WML Unit Tests Result:", len(test_summary.passed), "of", test_summary.total, "tests passed")

    if not test_summary:
        sys.exit(1)