Bug 1764371 - relpro: check for existing tasks in the task group on rerun r=releng-reviewers,hneiva

Release promotion action tasks aren't atomic, so they may schedule some
tasks, then fail.  The scheduled tasks depend on the action task so
normally they never run and everything's fine.
However if the action task is rerun and the rerun succeeds, it unblocks
both the tasks it scheduled and the ones scheduled by previous runs,
which may not be safe.
Prevent this by explicitly checking for existing tasks in the group
before anything else, and returning an error if any are incomplete.

Differential Revision: https://phabricator.services.mozilla.com/D144964
This commit is contained in:
Julien Cristau 2022-05-18 08:48:54 +00:00
parent e6dfc9123b
commit 6afece8ec6

View File

@ -6,9 +6,11 @@
import json
import os
import requests
from taskgraph.parameters import Parameters
from taskgraph.taskgraph import TaskGraph
from taskgraph.util.taskcluster import get_artifact
from taskgraph.util.taskcluster import get_artifact, list_task_group_incomplete_tasks
from gecko_taskgraph.actions.registry import register_callback_action
from gecko_taskgraph.util.taskgraph import (
@ -298,6 +300,22 @@ def release_promotion_action(parameters, graph_config, input, task_group_id, tas
"do_not_optimize", promotion_config.get("do-not-optimize", [])
)
# Make sure no pending tasks remain from a previous run
own_task_id = os.environ.get("TASK_ID", "")
try:
for t in list_task_group_incomplete_tasks(own_task_id):
if t == own_task_id:
continue
raise Exception(
"task group has unexpected pre-existing incomplete tasks (e.g. {})".format(
t
)
)
except requests.exceptions.HTTPError as e:
# 404 means the task group doesn't exist yet, and we're fine
if e.response.status_code != 404:
raise
# Build previous_graph_ids from ``previous_graph_ids``, ``revision``,
# or the action parameters.
previous_graph_ids = input.get("previous_graph_ids")