Bug 1760979 - Fix wpt verify jobs, r=karlcow

This was regressed by the changes in PR 32403.

Also add a test to check that we don't regress in the same way
again. This mocks out actually running the tests, which of course
means we aren't testing that we get the right kind of return value
from wptrunner.

Differential Revision: https://phabricator.services.mozilla.com/D142100
This commit is contained in:
James Graham 2022-03-30 07:53:56 +00:00
parent 03343af68c
commit e5b73651f0
3 changed files with 61 additions and 24 deletions

View File

@ -10,6 +10,8 @@ from mozlog import reader
from mozlog.formatters import JSONFormatter
from mozlog.handlers import BaseHandler, StreamHandler, LogLevelFilter
from . import wptrunner
here = os.path.dirname(__file__)
localpaths = imp.load_source("localpaths", os.path.abspath(os.path.join(here, os.pardir, os.pardir, "localpaths.py")))
from ci.tc.github_checks_output import get_gh_checks_outputter # type: ignore
@ -262,7 +264,6 @@ def write_results(log, results, iterations, pr_number=None, use_details=False):
def run_step(logger, iterations, restart_after_iteration, kwargs_extras, **kwargs):
from . import wptrunner
kwargs = copy.deepcopy(kwargs)
if restart_after_iteration:
@ -274,7 +275,7 @@ def run_step(logger, iterations, restart_after_iteration, kwargs_extras, **kwarg
kwargs.update(kwargs_extras)
def wrap_handler(x):
if not kwargs["verify_log_full"]:
if not kwargs.get("verify_log_full", False):
x = LogLevelFilter(x, "WARNING")
x = LogActionFilter(x, ["log", "process_output"])
return x
@ -289,18 +290,16 @@ def run_step(logger, iterations, restart_after_iteration, kwargs_extras, **kwarg
logger.add_handler(StreamHandler(log, JSONFormatter()))
_, test_status = wptrunner.run_tests(**kwargs)
iterations = test_status.repeated_runs
if not restart_after_iteration:
iterations = kwargs["rerun"]
logger._state.handlers = initial_handlers
logger._state.running_tests = set()
logger._state.suite_started = False
log.seek(0)
results, inconsistent, slow = process_results(log, test_status.repeated_runs)
return test_status, results, inconsistent, slow
total_iterations = test_status.repeated_runs * kwargs.get("rerun", 1)
all_skipped = test_status.all_skipped
results, inconsistent, slow = process_results(log, total_iterations)
return total_iterations, all_skipped, results, inconsistent, slow
def get_steps(logger, repeat_loop, repeat_restart, kwargs_extras):
@ -363,7 +362,7 @@ def check_stability(logger, repeat_loop=10, repeat_restart=5, chaos_mode=True, m
start_time = datetime.now()
step_results = []
github_checks_outputter = get_gh_checks_outputter(kwargs["github_checks_text_file"])
github_checks_outputter = get_gh_checks_outputter(kwargs.get("github_checks_text_file"))
for desc, step_func, expected_iterations in steps:
if max_time and datetime.now() - start_time > max_time:
@ -375,15 +374,10 @@ def check_stability(logger, repeat_loop=10, repeat_restart=5, chaos_mode=True, m
logger.info(':::')
logger.info('::: Running test verification step "%s"...' % desc)
logger.info(':::')
test_status, results, inconsistent, slow = step_func(**kwargs)
total_iterations, all_skipped, results, inconsistent, slow = step_func(**kwargs)
# Use the number of iterations of the test suite that were run to process the results.
# if the runs were stopped to avoid hitting the maximum run time.
iterations = test_status.repeated_runs
all_skipped = test_status.all_skipped
logger.info(f"::: Ran {iterations} of expected {expected_iterations} iterations.")
if iterations <= 1 and expected_iterations > 1 and not all_skipped:
logger.info(f"::: Ran {total_iterations} of expected {expected_iterations} iterations.")
if total_iterations <= 1 and expected_iterations > 1 and not all_skipped:
step_results.append((desc, "FAIL"))
logger.info("::: Reached iteration timeout before finishing 2 or more repeat runs.")
logger.info("::: At least 2 successful repeat runs are required to validate stability.")
@ -391,13 +385,14 @@ def check_stability(logger, repeat_loop=10, repeat_restart=5, chaos_mode=True, m
return 1
if output_results:
write_results(logger.info, results, iterations)
write_results(logger.info, results, total_iterations)
if inconsistent:
step_results.append((desc, "FAIL"))
if github_checks_outputter:
write_github_checks_summary_inconsistent(github_checks_outputter.output, inconsistent, iterations)
write_inconsistent(logger.info, inconsistent, iterations)
write_github_checks_summary_inconsistent(github_checks_outputter.output,
inconsistent, total_iterations)
write_inconsistent(logger.info, inconsistent, total_iterations)
write_summary(logger, step_results, "FAIL")
return 1
@ -411,8 +406,8 @@ def check_stability(logger, repeat_loop=10, repeat_restart=5, chaos_mode=True, m
# If the tests passed but the number of iterations didn't match the number expected to run,
# it is likely that the runs were stopped early to avoid a timeout.
if iterations != expected_iterations:
result = f"PASS * {iterations}/{expected_iterations} repeats completed"
if total_iterations != expected_iterations:
result = f"PASS * {total_iterations}/{expected_iterations} repeats completed"
step_results.append((desc, result))
else:
step_results.append((desc, "PASS"))

View File

@ -1,5 +1,12 @@
from .. import stability
import sys
from collections import OrderedDict, defaultdict
from unittest import mock
from mozlog.structuredlog import StructuredLogger
from mozlog.formatters import TbplFormatter
from mozlog.handlers import StreamHandler
from .. import stability, wptrunner
def test_is_inconsistent():
assert stability.is_inconsistent({"PASS": 10}, 10) is False
@ -140,3 +147,38 @@ def test_err_string():
{u'OK': 2, u'FAIL': 1, u'SKIP': 1}, 4) == u"FAIL: 1/4, OK: 2/4, SKIP: 1/4"
assert stability.err_string(
{u'FAIL': 1, u'SKIP': 1, u'OK': 2}, 4) == u"FAIL: 1/4, OK: 2/4, SKIP: 1/4"
def test_check_stability_iterations():
logger = StructuredLogger("test-stability")
logger.add_handler(StreamHandler(sys.stdout, TbplFormatter()))
kwargs = {"verify_log_full": False}
def mock_run_tests(**kwargs):
repeats = kwargs.get("repeat", 1)
for _ in range(repeats):
logger.suite_start(tests=[], name="test")
for _ in range(kwargs.get("rerun", 1)):
logger.test_start("/example/test.html")
logger.test_status("/example/test.html", subtest="test1", status="PASS")
logger.test_end("/example/test.html", status="OK")
logger.suite_end()
status = wptrunner.TestStatus()
status.total_tests = 1
status.repeated_runs = repeats
status.expected_repeated_runs = repeats
return (None, status)
# Don't actually load wptrunner, because that will end up starting a browser
# which we don't want to do in this test.
with mock.patch("wptrunner.stability.wptrunner.run_tests") as mock_run:
mock_run.side_effect = mock_run_tests
assert stability.check_stability(logger,
repeat_loop=10,
repeat_restart=5,
chaos_mode=False,
output_results=False,
**kwargs) is None

View File

@ -328,7 +328,7 @@ def run_tests(config, test_paths, product, **kwargs):
test_status = TestStatus()
repeat = kwargs["repeat"]
test_status.expected_repeat = repeat
test_status.expected_repeated_runs = repeat
if len(test_loader.test_ids) == 0 and kwargs["test_list"]:
logger.critical("Unable to find any tests at the path(s):")