mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-10-09 11:25:00 +00:00
Bug 1834866 - change isolate failures to confirm failures. r=taskgraph-reviewers,gbrown,bhearsum
Differential Revision: https://phabricator.services.mozilla.com/D178989
This commit is contained in:
parent
f70ce93a5c
commit
eaedb702ec
@ -5,7 +5,6 @@
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
|
||||
from taskgraph.util.parameterization import resolve_task_references
|
||||
@ -74,6 +73,7 @@ def get_failures(task_id):
|
||||
break
|
||||
return test_path
|
||||
|
||||
# collect dirs that don't have a specific manifest
|
||||
dirs = set()
|
||||
tests = set()
|
||||
artifacts = list_artifacts(task_id)
|
||||
@ -85,44 +85,41 @@ def get_failures(task_id):
|
||||
if not stream:
|
||||
continue
|
||||
|
||||
# The number of tasks created is determined by the
|
||||
# `times` value and the number of distinct tests and
|
||||
# directories as: times * (1 + len(tests) + len(dirs)).
|
||||
# Since the maximum value of `times` specifiable in the
|
||||
# Treeherder UI is 100, the number of tasks created can
|
||||
# reach a very large value depending on the number of
|
||||
# unique tests. During testing, it was found that 10
|
||||
# distinct tests were sufficient to cause the action task
|
||||
# to exceed the maxRunTime of 1800 seconds resulting in it
|
||||
# being aborted. We limit the number of distinct tests
|
||||
# and thereby the number of distinct test directories to a
|
||||
# maximum of 5 to keep the action task from timing out.
|
||||
|
||||
# We handle the stream as raw bytes because it may contain invalid
|
||||
# UTF-8 characters in portions other than those containing the error
|
||||
# messages we're looking for.
|
||||
for line in stream.read().split(b"\n"):
|
||||
test_path = munge_test_path(line.strip())
|
||||
if not line.strip():
|
||||
continue
|
||||
|
||||
if test_path:
|
||||
l = json.loads(line)
|
||||
if "group_results" in l.keys() and l["status"] != "OK":
|
||||
dirs.add(l["group_results"].group())
|
||||
|
||||
elif "test" in l.keys():
|
||||
test_path = munge_test_path(line.strip())
|
||||
tests.add(test_path.decode("utf-8"))
|
||||
test_dir = os.path.dirname(test_path)
|
||||
if test_dir:
|
||||
dirs.add(test_dir.decode("utf-8"))
|
||||
|
||||
# only run the failing test not both test + dir
|
||||
if l["group"] in dirs:
|
||||
dirs.remove(l["group"])
|
||||
|
||||
if len(tests) > 4:
|
||||
break
|
||||
|
||||
# turn group into dir by stripping off leafname
|
||||
dirs = set([d.split("/")[0:-1] for d in dirs])
|
||||
|
||||
return {"dirs": sorted(dirs), "tests": sorted(tests)}
|
||||
|
||||
|
||||
def create_isolate_failure_tasks(task_definition, failures, level, times):
|
||||
def create_confirm_failure_tasks(task_definition, failures, level):
|
||||
"""
|
||||
Create tasks to re-run the original task plus tasks to test
|
||||
each failing test directory and individual path.
|
||||
|
||||
"""
|
||||
logger.info(f"Isolate task:\n{json.dumps(task_definition, indent=2)}")
|
||||
logger.info(f"Confirm Failures task:\n{json.dumps(task_definition, indent=2)}")
|
||||
|
||||
# Operate on a copy of the original task_definition
|
||||
task_definition = copy_task(task_definition)
|
||||
@ -153,12 +150,9 @@ def create_isolate_failure_tasks(task_definition, failures, level, times):
|
||||
|
||||
command = copy_task(task_definition["payload"]["command"])
|
||||
|
||||
th_dict["groupSymbol"] = th_dict["groupSymbol"] + "-I"
|
||||
th_dict["groupSymbol"] = th_dict["groupSymbol"] + "-cf"
|
||||
th_dict["tier"] = 3
|
||||
|
||||
for i in range(times):
|
||||
create_task_from_def(task_definition, level)
|
||||
|
||||
if repeatable_task:
|
||||
task_definition["payload"]["maxRunTime"] = 3600 * 3
|
||||
|
||||
@ -175,7 +169,7 @@ def create_isolate_failure_tasks(task_definition, failures, level, times):
|
||||
repeat_args = ["--repeat=19"] if repeatable_task else []
|
||||
else:
|
||||
logger.error(
|
||||
"create_isolate_failure_tasks: Unknown failure_group {}".format(
|
||||
"create_confirm_failure_tasks: Unknown failure_group {}".format(
|
||||
failure_group
|
||||
)
|
||||
)
|
||||
@ -203,33 +197,23 @@ def create_isolate_failure_tasks(task_definition, failures, level, times):
|
||||
failure_path, task_definition["payload"]["command"]
|
||||
)
|
||||
)
|
||||
for i in range(times):
|
||||
create_task_from_def(task_definition, level)
|
||||
create_task_from_def(task_definition, level)
|
||||
|
||||
|
||||
@register_callback_action(
|
||||
name="isolate-test-failures",
|
||||
title="Isolate test failures in job",
|
||||
symbol="it",
|
||||
description="Re-run Tests for original manifest, directories and tests for failing tests.",
|
||||
name="confirm-failures",
|
||||
title="Confirm failures in job",
|
||||
symbol="cf",
|
||||
description="Re-run Tests for original manifest, directories or tests for failing tests.",
|
||||
order=150,
|
||||
context=[{"kind": "test"}],
|
||||
schema={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"times": {
|
||||
"type": "integer",
|
||||
"default": 1,
|
||||
"minimum": 1,
|
||||
"maximum": 100,
|
||||
"title": "Times",
|
||||
"description": "How many times to run each task.",
|
||||
}
|
||||
},
|
||||
"properties": {},
|
||||
"additionalProperties": False,
|
||||
},
|
||||
)
|
||||
def isolate_test_failures(parameters, graph_config, input, task_group_id, task_id):
|
||||
def confirm_failures(parameters, graph_config, input, task_group_id, task_id):
|
||||
task = get_task_definition(task_id)
|
||||
decision_task_id, full_task_graph, label_to_taskid = fetch_graph_and_labels(
|
||||
parameters, graph_config
|
||||
@ -249,7 +233,5 @@ def isolate_test_failures(parameters, graph_config, input, task_group_id, task_i
|
||||
task_definition.setdefault("dependencies", []).extend(dependencies.values())
|
||||
|
||||
failures = get_failures(task_id)
|
||||
logger.info("isolate_test_failures: %s" % failures)
|
||||
create_isolate_failure_tasks(
|
||||
task_definition, failures, parameters["level"], input["times"]
|
||||
)
|
||||
logger.info("confirm_failures: %s" % failures)
|
||||
create_confirm_failure_tasks(task_definition, failures, parameters["level"])
|
Loading…
Reference in New Issue
Block a user