mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-12-01 08:42:13 +00:00
Bug 1695972 - Add a mach command for finding fission-specific regressions in wpt, r=kashav,jmaher
This adds a `mach wpt-fission-regressions` command that uses the wpt expectation data to look for tests which have a worse result in fission. With the `--all-json=<path>` argument it will output a JSON file containing details of all the regressions. With the `--untriaged=<path>` argument it will output a file containing a list of failures that have not yet been triaged. It also adds a try job to produce those files as artifacts whenever wpt metadata is changed. The actual implementation is based on reading the wpt expectation data with sample run_info values corresponding to the configurations in which we have fission enabled, but with the "fission" property set to False (to get a baseline result) and True (to get a with-fission result) and then comparing the resulting expectations. The implemenation is pretty suboptimal performance wise since we end up reading the metadata once per configuration i.e. 6 times, and this is slow. It could be optimised by using the conditional metadata backend, reading it once, and then evaluating per configuration. However that would require a little more work and the presumption is that this will be shortlived until fission becomes the default configuration. Differential Revision: https://phabricator.services.mozilla.com/D106954
This commit is contained in:
parent
c6942877da
commit
c186b32f83
@ -33,3 +33,26 @@ summary:
|
||||
- 'testing/web-platform/meta/**'
|
||||
- 'testing/web-platform/mozilla/meta/**'
|
||||
- 'testing/web-platform/metasummary.py'
|
||||
|
||||
|
||||
fission-regression:
|
||||
description: Summarize fission regressions in wpt
|
||||
treeherder:
|
||||
symbol: wpt-fis
|
||||
index:
|
||||
product: source
|
||||
job-name: source-wpt-fission-regressions
|
||||
run:
|
||||
using: mach
|
||||
mach: wpt-fission-regressions --all-json /builds/worker/artifacts/regressions.json --untriaged /builds/worker/artifacts/untriaged.txt
|
||||
worker:
|
||||
artifacts:
|
||||
- type: directory
|
||||
path: /builds/worker/artifacts
|
||||
name: public
|
||||
max-run-time: 2700
|
||||
when:
|
||||
files-changed:
|
||||
- 'testing/web-platform/meta/**'
|
||||
- 'testing/web-platform/mozilla/meta/**'
|
||||
- 'testing/web-platform/fissionregressions.py'
|
||||
|
513
testing/web-platform/fissionregressions.py
Normal file
513
testing/web-platform/fissionregressions.py
Normal file
@ -0,0 +1,513 @@
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
|
||||
from mozlog import commandline
|
||||
|
||||
run_infos = {
|
||||
"linux-opt": {
|
||||
"os": "linux",
|
||||
"processor": "x86_64",
|
||||
"version": "Ubuntu 18.04",
|
||||
"os_version": "18.04",
|
||||
"bits": 64,
|
||||
"has_sandbox": True,
|
||||
"webrender": True,
|
||||
"automation": True,
|
||||
"linux_distro": "Ubuntu",
|
||||
"apple_silicon": False,
|
||||
"appname": "firefox",
|
||||
"artifact": False,
|
||||
"asan": False,
|
||||
"bin_suffix": "",
|
||||
"buildapp": "browser",
|
||||
"buildtype_guess": "pgo",
|
||||
"cc_type": "clang",
|
||||
"ccov": False,
|
||||
"crashreporter": True,
|
||||
"datareporting": True,
|
||||
"debug": False,
|
||||
"devedition": False,
|
||||
"early_beta_or_earlier": True,
|
||||
"healthreport": True,
|
||||
"nightly_build": True,
|
||||
"non_native_theme": True,
|
||||
"normandy": True,
|
||||
"official": True,
|
||||
"pgo": True,
|
||||
"platform_guess": "linux64",
|
||||
"release_or_beta": False,
|
||||
"require_signing": False,
|
||||
"stylo": True,
|
||||
"sync": True,
|
||||
"telemetry": False,
|
||||
"tests_enabled": True,
|
||||
"toolkit": "gtk",
|
||||
"tsan": False,
|
||||
"ubsan": False,
|
||||
"updater": True,
|
||||
"python_version": 3,
|
||||
"product": "firefox",
|
||||
"verify": False,
|
||||
"wasm": True,
|
||||
"e10s": True,
|
||||
"headless": False,
|
||||
"sw-e10s": True,
|
||||
"fission": True,
|
||||
"sessionHistoryInParent": True,
|
||||
"swgl": False,
|
||||
},
|
||||
"linux-debug": {
|
||||
"os": "linux",
|
||||
"processor": "x86_64",
|
||||
"version": "Ubuntu 18.04",
|
||||
"os_version": "18.04",
|
||||
"bits": 64,
|
||||
"has_sandbox": True,
|
||||
"webrender": True,
|
||||
"automation": True,
|
||||
"linux_distro": "Ubuntu",
|
||||
"apple_silicon": False,
|
||||
"appname": "firefox",
|
||||
"artifact": False,
|
||||
"asan": False,
|
||||
"bin_suffix": "",
|
||||
"buildapp": "browser",
|
||||
"buildtype_guess": "debug",
|
||||
"cc_type": "clang",
|
||||
"ccov": False,
|
||||
"crashreporter": True,
|
||||
"datareporting": True,
|
||||
"debug": True,
|
||||
"devedition": False,
|
||||
"early_beta_or_earlier": True,
|
||||
"healthreport": True,
|
||||
"nightly_build": True,
|
||||
"non_native_theme": True,
|
||||
"normandy": True,
|
||||
"official": True,
|
||||
"pgo": False,
|
||||
"platform_guess": "linux64",
|
||||
"release_or_beta": False,
|
||||
"require_signing": False,
|
||||
"stylo": True,
|
||||
"sync": True,
|
||||
"telemetry": False,
|
||||
"tests_enabled": True,
|
||||
"toolkit": "gtk",
|
||||
"tsan": False,
|
||||
"ubsan": False,
|
||||
"updater": True,
|
||||
"python_version": 3,
|
||||
"product": "firefox",
|
||||
"verify": False,
|
||||
"wasm": True,
|
||||
"e10s": True,
|
||||
"headless": False,
|
||||
"sw-e10s": True,
|
||||
"fission": False,
|
||||
"sessionHistoryInParent": False,
|
||||
"swgl": False,
|
||||
},
|
||||
"win-opt": {
|
||||
"os": "win",
|
||||
"processor": "x86_64",
|
||||
"version": "10.0.17134",
|
||||
"os_version": "10.0",
|
||||
"bits": 64,
|
||||
"has_sandbox": True,
|
||||
"webrender": True,
|
||||
"automation": True,
|
||||
"service_pack": "",
|
||||
"apple_silicon": False,
|
||||
"appname": "firefox",
|
||||
"artifact": False,
|
||||
"asan": False,
|
||||
"bin_suffix": ".exe",
|
||||
"buildapp": "browser",
|
||||
"buildtype_guess": "pgo",
|
||||
"cc_type": "clang-cl",
|
||||
"ccov": False,
|
||||
"crashreporter": True,
|
||||
"datareporting": True,
|
||||
"debug": False,
|
||||
"devedition": False,
|
||||
"early_beta_or_earlier": True,
|
||||
"healthreport": True,
|
||||
"nightly_build": True,
|
||||
"non_native_theme": False,
|
||||
"normandy": True,
|
||||
"official": True,
|
||||
"pgo": True,
|
||||
"platform_guess": "win64",
|
||||
"release_or_beta": False,
|
||||
"require_signing": False,
|
||||
"stylo": True,
|
||||
"sync": True,
|
||||
"telemetry": False,
|
||||
"tests_enabled": True,
|
||||
"toolkit": "windows",
|
||||
"tsan": False,
|
||||
"ubsan": False,
|
||||
"updater": True,
|
||||
"python_version": 3,
|
||||
"product": "firefox",
|
||||
"verify": False,
|
||||
"wasm": True,
|
||||
"e10s": True,
|
||||
"headless": False,
|
||||
"sw-e10s": True,
|
||||
"fission": False,
|
||||
"sessionHistoryInParent": False,
|
||||
"swgl": False,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
# RE that checks for anything containing a three+ digit number
|
||||
maybe_bug_re = re.compile(r".*\d\d\d+")
|
||||
|
||||
|
||||
def get_parser():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--all-json", type=os.path.abspath, help="Path to write json output to"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--untriaged",
|
||||
type=os.path.abspath,
|
||||
help="Path to write list of regressions with no associated bug",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--platform",
|
||||
dest="platforms",
|
||||
action="append",
|
||||
choices=list(run_infos.keys()),
|
||||
help="Configurations to compute fission changes for",
|
||||
)
|
||||
commandline.add_logging_group(parser)
|
||||
return parser
|
||||
|
||||
|
||||
def allowed_results(test, subtest=None):
|
||||
return test.expected(subtest), test.known_intermittent(subtest)
|
||||
|
||||
|
||||
def is_worse(baseline_result, new_result):
|
||||
if new_result == baseline_result:
|
||||
return False
|
||||
|
||||
if new_result in ("PASS", "OK"):
|
||||
return False
|
||||
|
||||
if baseline_result in ("PASS", "OK"):
|
||||
return True
|
||||
|
||||
# A crash -> not crash isn't a regression
|
||||
if baseline_result == "CRASH":
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def is_regression(baseline_result, new_result):
|
||||
if baseline_result == new_result:
|
||||
return False
|
||||
|
||||
baseline_expected, baseline_intermittent = baseline_result
|
||||
new_expected, new_intermittent = new_result
|
||||
|
||||
baseline_all = {baseline_expected} | set(baseline_intermittent)
|
||||
new_all = {new_expected} | set(new_intermittent)
|
||||
|
||||
if baseline_all == new_all:
|
||||
return False
|
||||
|
||||
if not baseline_intermittent and not new_intermittent:
|
||||
return is_worse(baseline_expected, new_expected)
|
||||
|
||||
# If it was intermittent and isn't now, check if the new result is
|
||||
# worse than any of the previous results so that [PASS, FAIL] -> FAIL
|
||||
# looks like a regression
|
||||
if baseline_intermittent and not new_intermittent:
|
||||
return any(is_worse(result, new_expected) for result in baseline_all)
|
||||
|
||||
# If it was a perma and is now intermittent, check if any new result is
|
||||
# worse than the previous result.
|
||||
if not baseline_intermittent and new_intermittent:
|
||||
return any(is_worse(baseline_expected, result) for result in new_all)
|
||||
|
||||
# If it was an intermittent and is still an intermittent
|
||||
# check if any new result not in the old results is worse than
|
||||
# any old result
|
||||
new_results = new_all - baseline_all
|
||||
return any(
|
||||
is_worse(baseline_result, new_result)
|
||||
for new_result in new_results
|
||||
for baseline_result in baseline_all
|
||||
)
|
||||
|
||||
|
||||
def get_meta_prop(test, subtest, name):
|
||||
for meta in test.itermeta(subtest):
|
||||
try:
|
||||
value = meta.get(name)
|
||||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
return value
|
||||
return None
|
||||
|
||||
|
||||
def include_result(result):
|
||||
if result.disabled or result.regressions:
|
||||
return True
|
||||
|
||||
if isinstance(result, TestResult):
|
||||
for subtest_result in result.subtest_results.values():
|
||||
if subtest_result.disabled or subtest_result.regressions:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
class Result:
|
||||
def __init__(self):
|
||||
self.bugs = set()
|
||||
self.disabled = set()
|
||||
self.regressions = {}
|
||||
|
||||
def add_regression(self, platform, baseline_results, fission_results):
|
||||
self.regressions[platform] = {
|
||||
"baseline": [baseline_results[0]] + baseline_results[1],
|
||||
"fission": [fission_results[0]] + fission_results[1],
|
||||
}
|
||||
|
||||
def to_json(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def is_triaged(self):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class TestResult(Result):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.subtest_results = {}
|
||||
|
||||
def add_subtest(self, name):
|
||||
self.subtest_results[name] = SubtestResult(self)
|
||||
|
||||
def to_json(self):
|
||||
rv = {}
|
||||
include_subtests = {
|
||||
name: item.to_json()
|
||||
for name, item in self.subtest_results.items()
|
||||
if include_result(item)
|
||||
}
|
||||
if include_subtests:
|
||||
rv["subtest_results"] = include_subtests
|
||||
if self.regressions:
|
||||
rv["regressions"] = self.regressions
|
||||
if self.disabled:
|
||||
rv["disabled"] = list(self.disabled)
|
||||
if self.bugs:
|
||||
rv["bugs"] = list(self.bugs)
|
||||
return rv
|
||||
|
||||
def is_triaged(self):
|
||||
return bool(self.bugs) or (
|
||||
not self.regressions
|
||||
and all(
|
||||
subtest_result.is_triaged()
|
||||
for subtest_result in self.subtest_results.values()
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
class SubtestResult(Result):
|
||||
def __init__(self, parent):
|
||||
super().__init__()
|
||||
self.parent = parent
|
||||
|
||||
def to_json(self):
|
||||
rv = {}
|
||||
if self.regressions:
|
||||
rv["regressions"] = self.regressions
|
||||
if self.disabled:
|
||||
rv["disabled"] = list(self.disabled)
|
||||
bugs = self.bugs - self.parent.bugs
|
||||
if bugs:
|
||||
rv["bugs"] = bugs
|
||||
return rv
|
||||
|
||||
def is_triaged(self):
|
||||
return bool(self.parent.bugs or self.bugs)
|
||||
|
||||
|
||||
def run(logger, src_root, obj_root, **kwargs):
|
||||
commandline.setup_logging(
|
||||
logger, {key: value for key, value in kwargs.items() if key.startswith("log_")}
|
||||
)
|
||||
|
||||
import manifestupdate
|
||||
|
||||
sys.path.insert(
|
||||
0,
|
||||
os.path.abspath(os.path.join(os.path.dirname(__file__), "tests", "tools")),
|
||||
)
|
||||
from wptrunner import testloader, wpttest
|
||||
|
||||
logger.info("Loading test manifest")
|
||||
test_manifests = manifestupdate.run(src_root, obj_root, logger)
|
||||
|
||||
test_results = {}
|
||||
|
||||
platforms = kwargs["platforms"]
|
||||
if platforms is None:
|
||||
platforms = run_infos.keys()
|
||||
|
||||
for platform in platforms:
|
||||
platform_run_info = run_infos[platform]
|
||||
run_info_baseline = platform_run_info.copy()
|
||||
run_info_baseline["fission"] = False
|
||||
|
||||
tests = {}
|
||||
|
||||
for kind in ("baseline", "fission"):
|
||||
logger.info("Loading tests %s %s" % (platform, kind))
|
||||
run_info = platform_run_info.copy()
|
||||
run_info["fission"] = kind == "fission"
|
||||
|
||||
test_loader = testloader.TestLoader(
|
||||
test_manifests, wpttest.enabled_tests, run_info, manifest_filters=[]
|
||||
)
|
||||
tests[kind] = {
|
||||
test.id: test
|
||||
for _, _, test in test_loader.iter_tests()
|
||||
if test._test_metadata is not None
|
||||
}
|
||||
|
||||
for test_id, baseline_test in tests["baseline"].items():
|
||||
fission_test = tests["fission"][test_id]
|
||||
|
||||
if test_id not in test_results:
|
||||
test_results[test_id] = TestResult()
|
||||
|
||||
test_result = test_results[test_id]
|
||||
|
||||
baseline_bug = get_meta_prop(baseline_test, None, "bug")
|
||||
fission_bug = get_meta_prop(fission_test, None, "bug")
|
||||
if fission_bug and fission_bug != baseline_bug:
|
||||
test_result.bugs.add(fission_bug)
|
||||
|
||||
if fission_test.disabled() and not baseline_test.disabled():
|
||||
test_result.disabled.add(platform)
|
||||
reason = get_meta_prop(fission_test, None, "disabled")
|
||||
if reason and maybe_bug_re.match(reason):
|
||||
test_result.bugs.add(reason)
|
||||
|
||||
baseline_results = allowed_results(baseline_test)
|
||||
fission_results = allowed_results(fission_test)
|
||||
result_is_regression = is_regression(baseline_results, fission_results)
|
||||
|
||||
if baseline_results != fission_results:
|
||||
logger.debug(
|
||||
" %s %s %s %s"
|
||||
% (test_id, baseline_results, fission_results, result_is_regression)
|
||||
)
|
||||
|
||||
if result_is_regression:
|
||||
test_result.add_regression(platform, baseline_results, fission_results)
|
||||
|
||||
for (
|
||||
name,
|
||||
baseline_subtest_meta,
|
||||
) in baseline_test._test_metadata.subtests.items():
|
||||
fission_subtest_meta = baseline_test._test_metadata.subtests[name]
|
||||
if name not in test_result.subtest_results:
|
||||
test_result.add_subtest(name)
|
||||
|
||||
subtest_result = test_result.subtest_results[name]
|
||||
|
||||
baseline_bug = get_meta_prop(baseline_test, name, "bug")
|
||||
fission_bug = get_meta_prop(fission_test, name, "bug")
|
||||
if fission_bug and fission_bug != baseline_bug:
|
||||
subtest_result.bugs.add(fission_bug)
|
||||
|
||||
if bool(fission_subtest_meta.disabled) and not bool(
|
||||
baseline_subtest_meta.disabled
|
||||
):
|
||||
subtest_result.disabled.add(platform)
|
||||
if maybe_bug_re.match(fission_subtest_meta.disabled):
|
||||
subtest_result.bugs.add(fission_subtest_meta.disabled)
|
||||
|
||||
baseline_results = allowed_results(baseline_test, name)
|
||||
fission_results = allowed_results(fission_test, name)
|
||||
|
||||
result_is_regression = is_regression(baseline_results, fission_results)
|
||||
|
||||
if baseline_results != fission_results:
|
||||
logger.debug(
|
||||
" %s %s %s %s %s"
|
||||
% (
|
||||
test_id,
|
||||
name,
|
||||
baseline_results,
|
||||
fission_results,
|
||||
result_is_regression,
|
||||
)
|
||||
)
|
||||
|
||||
if result_is_regression:
|
||||
subtest_result.add_regression(
|
||||
platform, baseline_results, fission_results
|
||||
)
|
||||
|
||||
test_results = {
|
||||
test_id: result
|
||||
for test_id, result in test_results.items()
|
||||
if include_result(result)
|
||||
}
|
||||
|
||||
if kwargs["all_json"] is not None:
|
||||
write_all(test_results, kwargs["all_json"])
|
||||
|
||||
if kwargs["untriaged"] is not None:
|
||||
write_untriaged(test_results, kwargs["untriaged"])
|
||||
|
||||
|
||||
def write_all(test_results, path):
|
||||
json_data = {test_id: result.to_json() for test_id, result in test_results.items()}
|
||||
|
||||
dir_name = os.path.dirname(path)
|
||||
if not os.path.exists(dir_name):
|
||||
os.makedirs(dir_name)
|
||||
|
||||
with open(path, "w") as f:
|
||||
json.dump(json_data, f, indent=2)
|
||||
|
||||
|
||||
def write_untriaged(test_results, path):
|
||||
dir_name = os.path.dirname(path)
|
||||
if not os.path.exists(dir_name):
|
||||
os.makedirs(dir_name)
|
||||
|
||||
data = sorted(
|
||||
(test_id, result)
|
||||
for test_id, result in test_results.items()
|
||||
if not result.is_triaged()
|
||||
)
|
||||
|
||||
with open(path, "w") as f:
|
||||
for test_id, result in data:
|
||||
f.write(test_id + "\n")
|
||||
for name, subtest_result in sorted(result.subtest_results.items()):
|
||||
if not subtest_result.is_triaged():
|
||||
f.write(" %s\n" % name)
|
@ -370,6 +370,25 @@ class WebPlatformTestsTestPathsRunner(MozbuildObject):
|
||||
return True
|
||||
|
||||
|
||||
class WebPlatformTestsFissionRegressionsRunner(MozbuildObject):
|
||||
def run(self, **kwargs):
|
||||
import mozlog
|
||||
import fissionregressions
|
||||
|
||||
src_root = self.topsrcdir
|
||||
obj_root = self.topobjdir
|
||||
logger = mozlog.structuredlog.StructuredLogger("web-platform-tests")
|
||||
|
||||
try:
|
||||
return fissionregressions.run(logger, src_root, obj_root, **kwargs)
|
||||
except Exception:
|
||||
import traceback
|
||||
import pdb
|
||||
|
||||
traceback.print_exc()
|
||||
pdb.post_mortem()
|
||||
|
||||
|
||||
def create_parser_update():
|
||||
from update import updatecommandline
|
||||
|
||||
@ -409,6 +428,12 @@ def create_parser_unittest():
|
||||
return unittestrunner.get_parser()
|
||||
|
||||
|
||||
def create_parser_fission_regressions():
|
||||
import fissionregressions
|
||||
|
||||
return fissionregressions.get_parser()
|
||||
|
||||
|
||||
def create_parser_testpaths():
|
||||
import argparse
|
||||
from mozboot.util import get_state_dir
|
||||
@ -602,3 +627,14 @@ class MachCommands(MachCommandBase):
|
||||
runner = self._spawn(WebPlatformTestsTestPathsRunner)
|
||||
runner.run(**params)
|
||||
return 0
|
||||
|
||||
@Command(
|
||||
"wpt-fission-regressions",
|
||||
category="testing",
|
||||
description="Dump a list of fission-specific regressions",
|
||||
parser=create_parser_fission_regressions,
|
||||
)
|
||||
def wpt_fission_regressions(self, **params):
|
||||
runner = self._spawn(WebPlatformTestsFissionRegressionsRunner)
|
||||
runner.run(**params)
|
||||
return 0
|
||||
|
Loading…
Reference in New Issue
Block a user