Bug 1695972 - Add a mach command for finding fission-specific regressions in wpt, r=kashav,jmaher

This adds a `mach wpt-fission-regressions` command that uses the wpt
expectation data to look for tests which have a worse result in
fission. With the `--all-json=<path>` argument it will output a JSON
file containing details of all the regressions. With the
`--untriaged=<path>` argument it will output a file containing a list
of failures that have not yet been triaged.

It also adds a try job to produce those files as artifacts whenever
wpt metadata is changed.

The actual implementation is based on reading the wpt expectation data
with sample run_info values corresponding to the configurations in
which we have fission enabled, but with the "fission" property set to
False (to get a baseline result) and True (to get a with-fission
result) and then comparing the resulting expectations.

The implemenation is pretty suboptimal performance wise since we end
up reading the metadata once per configuration i.e. 6 times, and this
is slow. It could be optimised by using the conditional metadata
backend, reading it once, and then evaluating per
configuration. However that would require a little more work and the
presumption is that this will be shortlived until fission becomes the
default configuration.

Differential Revision: https://phabricator.services.mozilla.com/D106954
This commit is contained in:
James Graham 2021-03-02 21:19:00 +00:00
parent c6942877da
commit c186b32f83
3 changed files with 572 additions and 0 deletions

View File

@ -33,3 +33,26 @@ summary:
- 'testing/web-platform/meta/**'
- 'testing/web-platform/mozilla/meta/**'
- 'testing/web-platform/metasummary.py'
fission-regression:
description: Summarize fission regressions in wpt
treeherder:
symbol: wpt-fis
index:
product: source
job-name: source-wpt-fission-regressions
run:
using: mach
mach: wpt-fission-regressions --all-json /builds/worker/artifacts/regressions.json --untriaged /builds/worker/artifacts/untriaged.txt
worker:
artifacts:
- type: directory
path: /builds/worker/artifacts
name: public
max-run-time: 2700
when:
files-changed:
- 'testing/web-platform/meta/**'
- 'testing/web-platform/mozilla/meta/**'
- 'testing/web-platform/fissionregressions.py'

View File

@ -0,0 +1,513 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import argparse
import json
import os
import re
import sys
from mozlog import commandline
run_infos = {
"linux-opt": {
"os": "linux",
"processor": "x86_64",
"version": "Ubuntu 18.04",
"os_version": "18.04",
"bits": 64,
"has_sandbox": True,
"webrender": True,
"automation": True,
"linux_distro": "Ubuntu",
"apple_silicon": False,
"appname": "firefox",
"artifact": False,
"asan": False,
"bin_suffix": "",
"buildapp": "browser",
"buildtype_guess": "pgo",
"cc_type": "clang",
"ccov": False,
"crashreporter": True,
"datareporting": True,
"debug": False,
"devedition": False,
"early_beta_or_earlier": True,
"healthreport": True,
"nightly_build": True,
"non_native_theme": True,
"normandy": True,
"official": True,
"pgo": True,
"platform_guess": "linux64",
"release_or_beta": False,
"require_signing": False,
"stylo": True,
"sync": True,
"telemetry": False,
"tests_enabled": True,
"toolkit": "gtk",
"tsan": False,
"ubsan": False,
"updater": True,
"python_version": 3,
"product": "firefox",
"verify": False,
"wasm": True,
"e10s": True,
"headless": False,
"sw-e10s": True,
"fission": True,
"sessionHistoryInParent": True,
"swgl": False,
},
"linux-debug": {
"os": "linux",
"processor": "x86_64",
"version": "Ubuntu 18.04",
"os_version": "18.04",
"bits": 64,
"has_sandbox": True,
"webrender": True,
"automation": True,
"linux_distro": "Ubuntu",
"apple_silicon": False,
"appname": "firefox",
"artifact": False,
"asan": False,
"bin_suffix": "",
"buildapp": "browser",
"buildtype_guess": "debug",
"cc_type": "clang",
"ccov": False,
"crashreporter": True,
"datareporting": True,
"debug": True,
"devedition": False,
"early_beta_or_earlier": True,
"healthreport": True,
"nightly_build": True,
"non_native_theme": True,
"normandy": True,
"official": True,
"pgo": False,
"platform_guess": "linux64",
"release_or_beta": False,
"require_signing": False,
"stylo": True,
"sync": True,
"telemetry": False,
"tests_enabled": True,
"toolkit": "gtk",
"tsan": False,
"ubsan": False,
"updater": True,
"python_version": 3,
"product": "firefox",
"verify": False,
"wasm": True,
"e10s": True,
"headless": False,
"sw-e10s": True,
"fission": False,
"sessionHistoryInParent": False,
"swgl": False,
},
"win-opt": {
"os": "win",
"processor": "x86_64",
"version": "10.0.17134",
"os_version": "10.0",
"bits": 64,
"has_sandbox": True,
"webrender": True,
"automation": True,
"service_pack": "",
"apple_silicon": False,
"appname": "firefox",
"artifact": False,
"asan": False,
"bin_suffix": ".exe",
"buildapp": "browser",
"buildtype_guess": "pgo",
"cc_type": "clang-cl",
"ccov": False,
"crashreporter": True,
"datareporting": True,
"debug": False,
"devedition": False,
"early_beta_or_earlier": True,
"healthreport": True,
"nightly_build": True,
"non_native_theme": False,
"normandy": True,
"official": True,
"pgo": True,
"platform_guess": "win64",
"release_or_beta": False,
"require_signing": False,
"stylo": True,
"sync": True,
"telemetry": False,
"tests_enabled": True,
"toolkit": "windows",
"tsan": False,
"ubsan": False,
"updater": True,
"python_version": 3,
"product": "firefox",
"verify": False,
"wasm": True,
"e10s": True,
"headless": False,
"sw-e10s": True,
"fission": False,
"sessionHistoryInParent": False,
"swgl": False,
},
}
# RE that checks for anything containing a three+ digit number
maybe_bug_re = re.compile(r".*\d\d\d+")
def get_parser():
parser = argparse.ArgumentParser()
parser.add_argument(
"--all-json", type=os.path.abspath, help="Path to write json output to"
)
parser.add_argument(
"--untriaged",
type=os.path.abspath,
help="Path to write list of regressions with no associated bug",
)
parser.add_argument(
"--platform",
dest="platforms",
action="append",
choices=list(run_infos.keys()),
help="Configurations to compute fission changes for",
)
commandline.add_logging_group(parser)
return parser
def allowed_results(test, subtest=None):
return test.expected(subtest), test.known_intermittent(subtest)
def is_worse(baseline_result, new_result):
if new_result == baseline_result:
return False
if new_result in ("PASS", "OK"):
return False
if baseline_result in ("PASS", "OK"):
return True
# A crash -> not crash isn't a regression
if baseline_result == "CRASH":
return False
return True
def is_regression(baseline_result, new_result):
if baseline_result == new_result:
return False
baseline_expected, baseline_intermittent = baseline_result
new_expected, new_intermittent = new_result
baseline_all = {baseline_expected} | set(baseline_intermittent)
new_all = {new_expected} | set(new_intermittent)
if baseline_all == new_all:
return False
if not baseline_intermittent and not new_intermittent:
return is_worse(baseline_expected, new_expected)
# If it was intermittent and isn't now, check if the new result is
# worse than any of the previous results so that [PASS, FAIL] -> FAIL
# looks like a regression
if baseline_intermittent and not new_intermittent:
return any(is_worse(result, new_expected) for result in baseline_all)
# If it was a perma and is now intermittent, check if any new result is
# worse than the previous result.
if not baseline_intermittent and new_intermittent:
return any(is_worse(baseline_expected, result) for result in new_all)
# If it was an intermittent and is still an intermittent
# check if any new result not in the old results is worse than
# any old result
new_results = new_all - baseline_all
return any(
is_worse(baseline_result, new_result)
for new_result in new_results
for baseline_result in baseline_all
)
def get_meta_prop(test, subtest, name):
for meta in test.itermeta(subtest):
try:
value = meta.get(name)
except KeyError:
pass
else:
return value
return None
def include_result(result):
if result.disabled or result.regressions:
return True
if isinstance(result, TestResult):
for subtest_result in result.subtest_results.values():
if subtest_result.disabled or subtest_result.regressions:
return True
return False
class Result:
def __init__(self):
self.bugs = set()
self.disabled = set()
self.regressions = {}
def add_regression(self, platform, baseline_results, fission_results):
self.regressions[platform] = {
"baseline": [baseline_results[0]] + baseline_results[1],
"fission": [fission_results[0]] + fission_results[1],
}
def to_json(self):
raise NotImplementedError
def is_triaged(self):
raise NotImplementedError
class TestResult(Result):
def __init__(self):
super().__init__()
self.subtest_results = {}
def add_subtest(self, name):
self.subtest_results[name] = SubtestResult(self)
def to_json(self):
rv = {}
include_subtests = {
name: item.to_json()
for name, item in self.subtest_results.items()
if include_result(item)
}
if include_subtests:
rv["subtest_results"] = include_subtests
if self.regressions:
rv["regressions"] = self.regressions
if self.disabled:
rv["disabled"] = list(self.disabled)
if self.bugs:
rv["bugs"] = list(self.bugs)
return rv
def is_triaged(self):
return bool(self.bugs) or (
not self.regressions
and all(
subtest_result.is_triaged()
for subtest_result in self.subtest_results.values()
)
)
class SubtestResult(Result):
def __init__(self, parent):
super().__init__()
self.parent = parent
def to_json(self):
rv = {}
if self.regressions:
rv["regressions"] = self.regressions
if self.disabled:
rv["disabled"] = list(self.disabled)
bugs = self.bugs - self.parent.bugs
if bugs:
rv["bugs"] = bugs
return rv
def is_triaged(self):
return bool(self.parent.bugs or self.bugs)
def run(logger, src_root, obj_root, **kwargs):
commandline.setup_logging(
logger, {key: value for key, value in kwargs.items() if key.startswith("log_")}
)
import manifestupdate
sys.path.insert(
0,
os.path.abspath(os.path.join(os.path.dirname(__file__), "tests", "tools")),
)
from wptrunner import testloader, wpttest
logger.info("Loading test manifest")
test_manifests = manifestupdate.run(src_root, obj_root, logger)
test_results = {}
platforms = kwargs["platforms"]
if platforms is None:
platforms = run_infos.keys()
for platform in platforms:
platform_run_info = run_infos[platform]
run_info_baseline = platform_run_info.copy()
run_info_baseline["fission"] = False
tests = {}
for kind in ("baseline", "fission"):
logger.info("Loading tests %s %s" % (platform, kind))
run_info = platform_run_info.copy()
run_info["fission"] = kind == "fission"
test_loader = testloader.TestLoader(
test_manifests, wpttest.enabled_tests, run_info, manifest_filters=[]
)
tests[kind] = {
test.id: test
for _, _, test in test_loader.iter_tests()
if test._test_metadata is not None
}
for test_id, baseline_test in tests["baseline"].items():
fission_test = tests["fission"][test_id]
if test_id not in test_results:
test_results[test_id] = TestResult()
test_result = test_results[test_id]
baseline_bug = get_meta_prop(baseline_test, None, "bug")
fission_bug = get_meta_prop(fission_test, None, "bug")
if fission_bug and fission_bug != baseline_bug:
test_result.bugs.add(fission_bug)
if fission_test.disabled() and not baseline_test.disabled():
test_result.disabled.add(platform)
reason = get_meta_prop(fission_test, None, "disabled")
if reason and maybe_bug_re.match(reason):
test_result.bugs.add(reason)
baseline_results = allowed_results(baseline_test)
fission_results = allowed_results(fission_test)
result_is_regression = is_regression(baseline_results, fission_results)
if baseline_results != fission_results:
logger.debug(
" %s %s %s %s"
% (test_id, baseline_results, fission_results, result_is_regression)
)
if result_is_regression:
test_result.add_regression(platform, baseline_results, fission_results)
for (
name,
baseline_subtest_meta,
) in baseline_test._test_metadata.subtests.items():
fission_subtest_meta = baseline_test._test_metadata.subtests[name]
if name not in test_result.subtest_results:
test_result.add_subtest(name)
subtest_result = test_result.subtest_results[name]
baseline_bug = get_meta_prop(baseline_test, name, "bug")
fission_bug = get_meta_prop(fission_test, name, "bug")
if fission_bug and fission_bug != baseline_bug:
subtest_result.bugs.add(fission_bug)
if bool(fission_subtest_meta.disabled) and not bool(
baseline_subtest_meta.disabled
):
subtest_result.disabled.add(platform)
if maybe_bug_re.match(fission_subtest_meta.disabled):
subtest_result.bugs.add(fission_subtest_meta.disabled)
baseline_results = allowed_results(baseline_test, name)
fission_results = allowed_results(fission_test, name)
result_is_regression = is_regression(baseline_results, fission_results)
if baseline_results != fission_results:
logger.debug(
" %s %s %s %s %s"
% (
test_id,
name,
baseline_results,
fission_results,
result_is_regression,
)
)
if result_is_regression:
subtest_result.add_regression(
platform, baseline_results, fission_results
)
test_results = {
test_id: result
for test_id, result in test_results.items()
if include_result(result)
}
if kwargs["all_json"] is not None:
write_all(test_results, kwargs["all_json"])
if kwargs["untriaged"] is not None:
write_untriaged(test_results, kwargs["untriaged"])
def write_all(test_results, path):
json_data = {test_id: result.to_json() for test_id, result in test_results.items()}
dir_name = os.path.dirname(path)
if not os.path.exists(dir_name):
os.makedirs(dir_name)
with open(path, "w") as f:
json.dump(json_data, f, indent=2)
def write_untriaged(test_results, path):
dir_name = os.path.dirname(path)
if not os.path.exists(dir_name):
os.makedirs(dir_name)
data = sorted(
(test_id, result)
for test_id, result in test_results.items()
if not result.is_triaged()
)
with open(path, "w") as f:
for test_id, result in data:
f.write(test_id + "\n")
for name, subtest_result in sorted(result.subtest_results.items()):
if not subtest_result.is_triaged():
f.write(" %s\n" % name)

View File

@ -370,6 +370,25 @@ class WebPlatformTestsTestPathsRunner(MozbuildObject):
return True
class WebPlatformTestsFissionRegressionsRunner(MozbuildObject):
def run(self, **kwargs):
import mozlog
import fissionregressions
src_root = self.topsrcdir
obj_root = self.topobjdir
logger = mozlog.structuredlog.StructuredLogger("web-platform-tests")
try:
return fissionregressions.run(logger, src_root, obj_root, **kwargs)
except Exception:
import traceback
import pdb
traceback.print_exc()
pdb.post_mortem()
def create_parser_update():
from update import updatecommandline
@ -409,6 +428,12 @@ def create_parser_unittest():
return unittestrunner.get_parser()
def create_parser_fission_regressions():
import fissionregressions
return fissionregressions.get_parser()
def create_parser_testpaths():
import argparse
from mozboot.util import get_state_dir
@ -602,3 +627,14 @@ class MachCommands(MachCommandBase):
runner = self._spawn(WebPlatformTestsTestPathsRunner)
runner.run(**params)
return 0
@Command(
"wpt-fission-regressions",
category="testing",
description="Dump a list of fission-specific regressions",
parser=create_parser_fission_regressions,
)
def wpt_fission_regressions(self, **params):
runner = self._spawn(WebPlatformTestsFissionRegressionsRunner)
runner.run(**params)
return 0