Bug 1760979 - Fix wpt verify jobs, r=karlcow

This was regressed by the changes in PR 32403. Also add a test to check that we don't regress in the same way again. This mocks out actually running the tests, which of course means we aren't testing that we get the right kind of return value from wptrunner. Differential Revision: https://phabricator.services.mozilla.com/D142100
2025-03-01 22:07:41 +00:00 · 2022-03-30 07:53:56 +00:00 · 2022-03-30 07:53:56 +00:00 · e5b73651f0
commit e5b73651f0
parent 03343af68c
3 changed files with 61 additions and 24 deletions
--- a/testing/web-platform/tests/tools/wptrunner/wptrunner/stability.py
+++ b/testing/web-platform/tests/tools/wptrunner/wptrunner/stability.py
@ -10,6 +10,8 @@ from mozlog import reader
 from mozlog.formatters import JSONFormatter
 from mozlog.handlers import BaseHandler, StreamHandler, LogLevelFilter

+from . import wptrunner
+
 here = os.path.dirname(__file__)
 localpaths = imp.load_source("localpaths", os.path.abspath(os.path.join(here, os.pardir, os.pardir, "localpaths.py")))
 from ci.tc.github_checks_output import get_gh_checks_outputter  # type: ignore
@ -262,7 +264,6 @@ def write_results(log, results, iterations, pr_number=None, use_details=False):


 def run_step(logger, iterations, restart_after_iteration, kwargs_extras, **kwargs):
-    from . import wptrunner
    kwargs = copy.deepcopy(kwargs)

    if restart_after_iteration:
@ -274,7 +275,7 @@ def run_step(logger, iterations, restart_after_iteration, kwargs_extras, **kwarg
    kwargs.update(kwargs_extras)

    def wrap_handler(x):
-        if not kwargs["verify_log_full"]:
+        if not kwargs.get("verify_log_full", False):
            x = LogLevelFilter(x, "WARNING")
            x = LogActionFilter(x, ["log", "process_output"])
        return x
@ -289,18 +290,16 @@ def run_step(logger, iterations, restart_after_iteration, kwargs_extras, **kwarg
    logger.add_handler(StreamHandler(log, JSONFormatter()))

    _, test_status = wptrunner.run_tests(**kwargs)
-    iterations = test_status.repeated_runs
-
-    if not restart_after_iteration:
-        iterations = kwargs["rerun"]

    logger._state.handlers = initial_handlers
    logger._state.running_tests = set()
    logger._state.suite_started = False

    log.seek(0)
-    results, inconsistent, slow = process_results(log, test_status.repeated_runs)
-    return test_status, results, inconsistent, slow
+    total_iterations = test_status.repeated_runs * kwargs.get("rerun", 1)
+    all_skipped = test_status.all_skipped
+    results, inconsistent, slow = process_results(log, total_iterations)
+    return total_iterations, all_skipped, results, inconsistent, slow


 def get_steps(logger, repeat_loop, repeat_restart, kwargs_extras):
@ -363,7 +362,7 @@ def check_stability(logger, repeat_loop=10, repeat_restart=5, chaos_mode=True, m
    start_time = datetime.now()
    step_results = []

-    github_checks_outputter = get_gh_checks_outputter(kwargs["github_checks_text_file"])
+    github_checks_outputter = get_gh_checks_outputter(kwargs.get("github_checks_text_file"))

    for desc, step_func, expected_iterations in steps:
        if max_time and datetime.now() - start_time > max_time:
@ -375,15 +374,10 @@ def check_stability(logger, repeat_loop=10, repeat_restart=5, chaos_mode=True, m
        logger.info(':::')
        logger.info('::: Running test verification step "%s"...' % desc)
        logger.info(':::')
-        test_status, results, inconsistent, slow = step_func(**kwargs)
+        total_iterations, all_skipped, results, inconsistent, slow = step_func(**kwargs)

-        # Use the number of iterations of the test suite that were run to process the results.
-        # if the runs were stopped to avoid hitting the maximum run time.
-        iterations = test_status.repeated_runs
-        all_skipped = test_status.all_skipped
-
-        logger.info(f"::: Ran {iterations} of expected {expected_iterations} iterations.")
-        if iterations <= 1 and expected_iterations > 1 and not all_skipped:
+        logger.info(f"::: Ran {total_iterations} of expected {expected_iterations} iterations.")
+        if total_iterations <= 1 and expected_iterations > 1 and not all_skipped:
            step_results.append((desc, "FAIL"))
            logger.info("::: Reached iteration timeout before finishing 2 or more repeat runs.")
            logger.info("::: At least 2 successful repeat runs are required to validate stability.")
@ -391,13 +385,14 @@ def check_stability(logger, repeat_loop=10, repeat_restart=5, chaos_mode=True, m
            return 1

        if output_results:
-            write_results(logger.info, results, iterations)
+            write_results(logger.info, results, total_iterations)

        if inconsistent:
            step_results.append((desc, "FAIL"))
            if github_checks_outputter:
-                write_github_checks_summary_inconsistent(github_checks_outputter.output, inconsistent, iterations)
-            write_inconsistent(logger.info, inconsistent, iterations)
+                write_github_checks_summary_inconsistent(github_checks_outputter.output,
+                                                         inconsistent, total_iterations)
+            write_inconsistent(logger.info, inconsistent, total_iterations)
            write_summary(logger, step_results, "FAIL")
            return 1

@ -411,8 +406,8 @@ def check_stability(logger, repeat_loop=10, repeat_restart=5, chaos_mode=True, m

        # If the tests passed but the number of iterations didn't match the number expected to run,
        # it is likely that the runs were stopped early to avoid a timeout.
-        if iterations != expected_iterations:
-            result = f"PASS *  {iterations}/{expected_iterations} repeats completed"
+        if total_iterations != expected_iterations:
+            result = f"PASS *  {total_iterations}/{expected_iterations} repeats completed"
            step_results.append((desc, result))
        else:
            step_results.append((desc, "PASS"))
--- a/testing/web-platform/tests/tools/wptrunner/wptrunner/tests/test_stability.py
+++ b/testing/web-platform/tests/tools/wptrunner/wptrunner/tests/test_stability.py
@ -1,5 +1,12 @@
-from .. import stability
+import sys
 from collections import OrderedDict, defaultdict
+from unittest import mock
+
+from mozlog.structuredlog import StructuredLogger
+from mozlog.formatters import TbplFormatter
+from mozlog.handlers import StreamHandler
+
+from .. import stability, wptrunner

 def test_is_inconsistent():
    assert stability.is_inconsistent({"PASS": 10}, 10) is False
@ -140,3 +147,38 @@ def test_err_string():
        {u'OK': 2, u'FAIL': 1, u'SKIP': 1}, 4) == u"FAIL: 1/4, OK: 2/4, SKIP: 1/4"
    assert stability.err_string(
        {u'FAIL': 1, u'SKIP': 1, u'OK': 2}, 4) == u"FAIL: 1/4, OK: 2/4, SKIP: 1/4"
+
+
+def test_check_stability_iterations():
+    logger = StructuredLogger("test-stability")
+    logger.add_handler(StreamHandler(sys.stdout, TbplFormatter()))
+
+    kwargs = {"verify_log_full": False}
+
+    def mock_run_tests(**kwargs):
+        repeats = kwargs.get("repeat", 1)
+        for _ in range(repeats):
+            logger.suite_start(tests=[], name="test")
+            for _ in range(kwargs.get("rerun", 1)):
+                logger.test_start("/example/test.html")
+                logger.test_status("/example/test.html", subtest="test1", status="PASS")
+                logger.test_end("/example/test.html", status="OK")
+            logger.suite_end()
+
+        status = wptrunner.TestStatus()
+        status.total_tests = 1
+        status.repeated_runs = repeats
+        status.expected_repeated_runs = repeats
+
+        return (None, status)
+
+    # Don't actually load wptrunner, because that will end up starting a browser
+    # which we don't want to do in this test.
+    with mock.patch("wptrunner.stability.wptrunner.run_tests") as mock_run:
+        mock_run.side_effect = mock_run_tests
+        assert stability.check_stability(logger,
+                                         repeat_loop=10,
+                                         repeat_restart=5,
+                                         chaos_mode=False,
+                                         output_results=False,
+                                         **kwargs) is None
--- a/testing/web-platform/tests/tools/wptrunner/wptrunner/wptrunner.py
+++ b/testing/web-platform/tests/tools/wptrunner/wptrunner/wptrunner.py
@ -328,7 +328,7 @@ def run_tests(config, test_paths, product, **kwargs):

        test_status = TestStatus()
        repeat = kwargs["repeat"]
-        test_status.expected_repeat = repeat
+        test_status.expected_repeated_runs = repeat

        if len(test_loader.test_ids) == 0 and kwargs["test_list"]:
            logger.critical("Unable to find any tests at the path(s):")