diff --git a/taskcluster/gecko_taskgraph/actions/util.py b/taskcluster/gecko_taskgraph/actions/util.py index 25e0c6b3fcd0..576cdd9e05b7 100644 --- a/taskcluster/gecko_taskgraph/actions/util.py +++ b/taskcluster/gecko_taskgraph/actions/util.py @@ -14,6 +14,7 @@ import jsone import requests from requests.exceptions import HTTPError from slugid import nice as slugid +from taskgraph.optimize.base import optimize_task_graph from taskgraph.taskgraph import TaskGraph from taskgraph.util.taskcluster import ( find_task_id, @@ -27,7 +28,6 @@ from taskgraph.util.taskcluster import ( from gecko_taskgraph import create from gecko_taskgraph.decision import read_artifact, write_artifact, rename_artifact -from gecko_taskgraph.optimize import optimize_task_graph from gecko_taskgraph.util.taskcluster import trigger_hook from gecko_taskgraph.util.taskgraph import find_decision_task @@ -283,6 +283,8 @@ def create_tasks( If you wish to create the tasks in a new group, leave out decision_task_id. Returns an updated label_to_taskid containing the new tasks""" + import gecko_taskgraph.optimize # noqa: triggers registration of strategies + if suffix != "": suffix = f"-{suffix}" to_run = set(to_run) diff --git a/taskcluster/gecko_taskgraph/decision.py b/taskcluster/gecko_taskgraph/decision.py index e9036815395b..02bbb45078a2 100644 --- a/taskcluster/gecko_taskgraph/decision.py +++ b/taskcluster/gecko_taskgraph/decision.py @@ -119,7 +119,7 @@ try_task_config_schema = Schema( "optimize-strategies", description="Alternative optimization strategies to use instead of the default. " "A module path pointing to a dict to be use as the `strategy_override` " - "argument in `gecko_taskgraph.optimize.optimize_task_graph`.", + "argument in `taskgraph.optimize.base.optimize_task_graph`.", ): str, Optional("rebuild"): int, Optional("tasks-regex"): { diff --git a/taskcluster/gecko_taskgraph/generator.py b/taskcluster/gecko_taskgraph/generator.py index 5161157e0451..e8113ba8ac56 100644 --- a/taskcluster/gecko_taskgraph/generator.py +++ b/taskcluster/gecko_taskgraph/generator.py @@ -10,6 +10,7 @@ import attr from taskgraph import filter_tasks from taskgraph.config import GraphConfig from taskgraph.graph import Graph +from taskgraph.optimize.base import optimize_task_graph from taskgraph.parameters import parameters_loader from taskgraph.task import Task from taskgraph.taskgraph import TaskGraph @@ -18,7 +19,6 @@ from taskgraph.util.python_path import find_object from taskgraph.util.yaml import load_yaml from .morph import morph -from .optimize import optimize_task_graph from .util.verify import verifications from .config import load_graph_config diff --git a/taskcluster/gecko_taskgraph/optimize/__init__.py b/taskcluster/gecko_taskgraph/optimize/__init__.py index e34a76a06bbe..2ac8be441df9 100644 --- a/taskcluster/gecko_taskgraph/optimize/__init__.py +++ b/taskcluster/gecko_taskgraph/optimize/__init__.py @@ -11,544 +11,21 @@ task. See ``taskcluster/docs/optimization.rst`` for more information. """ - -import datetime -import logging -from abc import ABCMeta, abstractmethod, abstractproperty -from collections import defaultdict - -from slugid import nice as slugid -from taskgraph.graph import Graph -from taskgraph.taskgraph import TaskGraph -from taskgraph.util.parameterization import ( - resolve_task_references, - resolve_timestamps, +from taskgraph.optimize.base import ( + Alias, + All, + Any, + Not, + register_strategy, + registry, ) from taskgraph.util.python_path import import_sibling_modules -logger = logging.getLogger(__name__) -registry = {} +# Use the gecko_taskgraph version of 'skip-unless-changed' for now. +registry.pop("skip-unless-changed", None) - -def register_strategy(name, args=()): - def wrap(cls): - if name not in registry: - registry[name] = cls(*args) - if not hasattr(registry[name], "description"): - registry[name].description = name - return cls - - return wrap - - -def optimize_task_graph( - target_task_graph, - requested_tasks, - params, - do_not_optimize, - decision_task_id, - existing_tasks=None, - strategy_override=None, -): - """ - Perform task optimization, returning a taskgraph and a map from label to - assigned taskId, including replacement tasks. - """ - label_to_taskid = {} - if not existing_tasks: - existing_tasks = {} - - # instantiate the strategies for this optimization process - strategies = registry.copy() - if strategy_override: - strategies.update(strategy_override) - - optimizations = _get_optimizations(target_task_graph, strategies) - - removed_tasks = remove_tasks( - target_task_graph=target_task_graph, - requested_tasks=requested_tasks, - optimizations=optimizations, - params=params, - do_not_optimize=do_not_optimize, - ) - - replaced_tasks = replace_tasks( - target_task_graph=target_task_graph, - optimizations=optimizations, - params=params, - do_not_optimize=do_not_optimize, - label_to_taskid=label_to_taskid, - existing_tasks=existing_tasks, - removed_tasks=removed_tasks, - ) - - return ( - get_subgraph( - target_task_graph, - removed_tasks, - replaced_tasks, - label_to_taskid, - decision_task_id, - ), - label_to_taskid, - ) - - -def _get_optimizations(target_task_graph, strategies): - def optimizations(label): - task = target_task_graph.tasks[label] - if task.optimization: - opt_by, arg = list(task.optimization.items())[0] - strategy = strategies[opt_by] - if hasattr(strategy, "description"): - opt_by += f" ({strategy.description})" - return (opt_by, strategy, arg) - else: - return ("never", strategies["never"], None) - - return optimizations - - -def _log_optimization(verb, opt_counts, opt_reasons=None): - if opt_reasons: - message = "optimize: {label} {action} because of {reason}" - for label, (action, reason) in opt_reasons.items(): - logger.debug(message.format(label=label, action=action, reason=reason)) - - if opt_counts: - logger.info( - f"{verb.title()} " - + ", ".join(f"{c} tasks by {b}" for b, c in sorted(opt_counts.items())) - + " during optimization." - ) - else: - logger.info(f"No tasks {verb} during optimization") - - -def remove_tasks( - target_task_graph, requested_tasks, params, optimizations, do_not_optimize -): - """ - Implement the "Removing Tasks" phase, returning a set of task labels of all removed tasks. - """ - opt_counts = defaultdict(int) - opt_reasons = {} - removed = set() - dependents_of = target_task_graph.graph.reverse_links_dict() - tasks = target_task_graph.tasks - prune_candidates = set() - - # Traverse graph so dependents (child nodes) are guaranteed to be processed - # first. - for label in target_task_graph.graph.visit_preorder(): - # Dependents that can be pruned away (shouldn't cause this task to run). - # Only dependents that either: - # A) Explicitly reference this task in their 'if_dependencies' list, or - # B) Don't have an 'if_dependencies' attribute (i.e are in 'prune_candidates' - # because they should be removed but have prune_deps themselves) - # should be considered. - prune_deps = { - l - for l in dependents_of[label] - if l in prune_candidates - if not tasks[l].if_dependencies or label in tasks[l].if_dependencies - } - - def _keep(reason): - """Mark a task as being kept in the graph. Also recursively removes - any dependents from `prune_candidates`, assuming they should be - kept because of this task. - """ - opt_reasons[label] = ("kept", reason) - - # Removes dependents that were in 'prune_candidates' from a task - # that ended up being kept (and therefore the dependents should - # also be kept). - queue = list(prune_deps) - while queue: - l = queue.pop() - - # If l is a prune_dep of multiple tasks it could be queued up - # multiple times. Guard against it being already removed. - if l not in prune_candidates: - continue - - # If a task doesn't set 'if_dependencies' itself (rather it was - # added to 'prune_candidates' due to one of its depenendents), - # then we shouldn't remove it. - if not tasks[l].if_dependencies: - continue - - prune_candidates.remove(l) - queue.extend([r for r in dependents_of[l] if r in prune_candidates]) - - def _remove(reason): - """Potentially mark a task as being removed from the graph. If the - task has dependents that can be pruned, add this task to - `prune_candidates` rather than removing it. - """ - if prune_deps: - # If there are prune_deps, unsure if we can remove this task yet. - prune_candidates.add(label) - else: - opt_reasons[label] = ("removed", reason) - opt_counts[reason] += 1 - removed.add(label) - - # if we're not allowed to optimize, that's easy.. - if label in do_not_optimize: - _keep("do not optimize") - continue - - # If there are remaining tasks depending on this one, do not remove. - if any( - l for l in dependents_of[label] if l not in removed and l not in prune_deps - ): - _keep("dependent tasks") - continue - - # Some tasks in the task graph only exist because they were required - # by a task that has just been optimized away. They can now be removed. - if label not in requested_tasks: - _remove("dependents optimized") - continue - - # Call the optimization strategy. - task = tasks[label] - opt_by, opt, arg = optimizations(label) - if opt.should_remove_task(task, params, arg): - _remove(opt_by) - continue - - # Some tasks should only run if their dependency was also run. Since we - # haven't processed dependencies yet, we add them to a list of - # candidate tasks for pruning. - if task.if_dependencies: - opt_reasons[label] = ("kept", opt_by) - prune_candidates.add(label) - else: - _keep(opt_by) - - if prune_candidates: - reason = "if-dependencies pruning" - for label in prune_candidates: - # There's an edge case where a triangle graph can cause a - # dependency to stay in 'prune_candidates' when the dependent - # remains. Do a final check to ensure we don't create any bad - # edges. - dependents = any( - d - for d in dependents_of[label] - if d not in prune_candidates - if d not in removed - ) - if dependents: - opt_reasons[label] = ("kept", "dependent tasks") - continue - removed.add(label) - opt_counts[reason] += 1 - opt_reasons[label] = ("removed", reason) - - _log_optimization("removed", opt_counts, opt_reasons) - return removed - - -def replace_tasks( - target_task_graph, - params, - optimizations, - do_not_optimize, - label_to_taskid, - removed_tasks, - existing_tasks, -): - """ - Implement the "Replacing Tasks" phase, returning a set of task labels of - all replaced tasks. The replacement taskIds are added to label_to_taskid as - a side-effect. - """ - opt_counts = defaultdict(int) - replaced = set() - dependents_of = target_task_graph.graph.reverse_links_dict() - dependencies_of = target_task_graph.graph.links_dict() - - for label in target_task_graph.graph.visit_postorder(): - - # if we're not allowed to optimize, that's easy.. - if label in do_not_optimize: - continue - - # if this task depends on un-replaced, un-removed tasks, do not replace - if any( - l not in replaced and l not in removed_tasks for l in dependencies_of[label] - ): - continue - - # if the task already exists, that's an easy replacement - repl = existing_tasks.get(label) - if repl: - label_to_taskid[label] = repl - replaced.add(label) - opt_counts["existing_tasks"] += 1 - continue - - # call the optimization strategy - task = target_task_graph.tasks[label] - opt_by, opt, arg = optimizations(label) - - # compute latest deadline of dependents (if any) - dependents = [target_task_graph.tasks[l] for l in dependents_of[label]] - deadline = None - if dependents: - now = datetime.datetime.utcnow() - deadline = max( - resolve_timestamps(now, task.task["deadline"]) for task in dependents - ) - repl = opt.should_replace_task(task, params, deadline, arg) - if repl: - if repl is True: - # True means remove this task; get_subgraph will catch any - # problems with removed tasks being depended on - removed_tasks.add(label) - else: - label_to_taskid[label] = repl - replaced.add(label) - opt_counts[opt_by] += 1 - continue - - _log_optimization("replaced", opt_counts) - return replaced - - -def get_subgraph( - target_task_graph, - removed_tasks, - replaced_tasks, - label_to_taskid, - decision_task_id, -): - """ - Return the subgraph of target_task_graph consisting only of - non-optimized tasks and edges between them. - - To avoid losing track of taskIds for tasks optimized away, this method - simultaneously substitutes real taskIds for task labels in the graph, and - populates each task definition's `dependencies` key with the appropriate - taskIds. Task references are resolved in the process. - """ - - # check for any dependency edges from included to removed tasks - bad_edges = [ - (l, r, n) - for l, r, n in target_task_graph.graph.edges - if l not in removed_tasks and r in removed_tasks - ] - if bad_edges: - probs = ", ".join( - f"{l} depends on {r} as {n} but it has been removed" - for l, r, n in bad_edges - ) - raise Exception("Optimization error: " + probs) - - # fill in label_to_taskid for anything not removed or replaced - assert replaced_tasks <= set(label_to_taskid) - for label in sorted( - target_task_graph.graph.nodes - removed_tasks - set(label_to_taskid) - ): - label_to_taskid[label] = slugid() - - # resolve labels to taskIds and populate task['dependencies'] - tasks_by_taskid = {} - named_links_dict = target_task_graph.graph.named_links_dict() - omit = removed_tasks | replaced_tasks - for label, task in target_task_graph.tasks.items(): - if label in omit: - continue - task.task_id = label_to_taskid[label] - named_task_dependencies = { - name: label_to_taskid[label] - for name, label in named_links_dict.get(label, {}).items() - } - - # Add remaining soft dependencies - if task.soft_dependencies: - named_task_dependencies.update( - { - label: label_to_taskid[label] - for label in task.soft_dependencies - if label in label_to_taskid and label not in omit - } - ) - - task.task = resolve_task_references( - task.label, - task.task, - task_id=task.task_id, - decision_task_id=decision_task_id, - dependencies=named_task_dependencies, - ) - deps = task.task.setdefault("dependencies", []) - deps.extend(sorted(named_task_dependencies.values())) - tasks_by_taskid[task.task_id] = task - - # resolve edges to taskIds - edges_by_taskid = ( - (label_to_taskid.get(left), label_to_taskid.get(right), name) - for (left, right, name) in target_task_graph.graph.edges - ) - # ..and drop edges that are no longer entirely in the task graph - # (note that this omits edges to replaced tasks, but they are still in task.dependnecies) - edges_by_taskid = { - (left, right, name) - for (left, right, name) in edges_by_taskid - if left in tasks_by_taskid and right in tasks_by_taskid - } - - return TaskGraph(tasks_by_taskid, Graph(set(tasks_by_taskid), edges_by_taskid)) - - -@register_strategy("never") -class OptimizationStrategy: - def should_remove_task(self, task, params, arg): - """Determine whether to optimize this task by removing it. Returns - True to remove.""" - return False - - def should_replace_task(self, task, params, deadline, arg): - """Determine whether to optimize this task by replacing it. Returns a - taskId to replace this task, True to replace with nothing, or False to - keep the task.""" - return False - - -@register_strategy("always") -class Always(OptimizationStrategy): - def should_remove_task(self, task, params, arg): - return True - - -class CompositeStrategy(OptimizationStrategy, metaclass=ABCMeta): - def __init__(self, *substrategies, **kwargs): - self.substrategies = [] - missing = set() - for sub in substrategies: - if isinstance(sub, str): - if sub not in registry.keys(): - missing.add(sub) - continue - sub = registry[sub] - - self.substrategies.append(sub) - - if missing: - raise TypeError( - "substrategies aren't registered: {}".format( - ", ".join(sorted(missing)) - ) - ) - - self.split_args = kwargs.pop("split_args", None) - if not self.split_args: - self.split_args = lambda arg, substrategies: [arg] * len(substrategies) - if kwargs: - raise TypeError("unexpected keyword args") - - @abstractproperty - def description(self): - """A textual description of the combined substrategies.""" - - @abstractmethod - def reduce(self, results): - """Given all substrategy results as a generator, return the overall - result.""" - - def _generate_results(self, fname, *args): - *passthru, arg = args - for sub, arg in zip( - self.substrategies, self.split_args(arg, self.substrategies) - ): - yield getattr(sub, fname)(*passthru, arg) - - def should_remove_task(self, *args): - results = self._generate_results("should_remove_task", *args) - return self.reduce(results) - - def should_replace_task(self, *args): - results = self._generate_results("should_replace_task", *args) - return self.reduce(results) - - -class Any(CompositeStrategy): - """Given one or more optimization strategies, remove or replace a task if any of them - says to. - - Replacement will use the value returned by the first strategy that says to replace. - """ - - @property - def description(self): - return "-or-".join([s.description for s in self.substrategies]) - - @classmethod - def reduce(cls, results): - for rv in results: - if rv: - return rv - return False - - -class All(CompositeStrategy): - """Given one or more optimization strategies, remove or replace a task if all of them - says to. - - Replacement will use the value returned by the first strategy passed in. - Note the values used for replacement need not be the same, as long as they - all say to replace. - """ - - @property - def description(self): - return "-and-".join([s.description for s in self.substrategies]) - - @classmethod - def reduce(cls, results): - for rv in results: - if not rv: - return rv - return True - - -class Alias(CompositeStrategy): - """Provides an alias to an existing strategy. - - This can be useful to swap strategies in and out without needing to modify - the task transforms. - """ - - def __init__(self, strategy): - super().__init__(strategy) - - @property - def description(self): - return self.substrategies[0].description - - def reduce(self, results): - return next(results) - - -class Not(CompositeStrategy): - """Given a strategy, returns the opposite.""" - - def __init__(self, strategy): - super().__init__(strategy) - - @property - def description(self): - return "not-" + self.substrategies[0].description - - def reduce(self, results): - return not next(results) +# Trigger registration in sibling modules. +import_sibling_modules() def split_bugbug_arg(arg, substrategies): @@ -570,10 +47,6 @@ def split_bugbug_arg(arg, substrategies): return [arg] * index + [{}] * (len(substrategies) - index) -# Trigger registration in sibling modules. -import_sibling_modules() - - # Register composite strategies. register_strategy("build", args=("skip-unless-schedules",))(Alias) register_strategy("test", args=("skip-unless-schedules",))(Alias) diff --git a/taskcluster/gecko_taskgraph/optimize/backstop.py b/taskcluster/gecko_taskgraph/optimize/backstop.py index f2bacab04a4d..7b0c86222b55 100644 --- a/taskcluster/gecko_taskgraph/optimize/backstop.py +++ b/taskcluster/gecko_taskgraph/optimize/backstop.py @@ -3,7 +3,8 @@ # file, You can obtain one at http://mozilla.org/MPL/2.0/. -from gecko_taskgraph.optimize import All, OptimizationStrategy, register_strategy +from taskgraph.optimize.base import All, OptimizationStrategy, register_strategy + from gecko_taskgraph.util.backstop import BACKSTOP_PUSH_INTERVAL diff --git a/taskcluster/gecko_taskgraph/optimize/bugbug.py b/taskcluster/gecko_taskgraph/optimize/bugbug.py index d4533d0f23d3..32188dbad249 100644 --- a/taskcluster/gecko_taskgraph/optimize/bugbug.py +++ b/taskcluster/gecko_taskgraph/optimize/bugbug.py @@ -7,7 +7,8 @@ from fnmatch import fnmatch from collections import defaultdict from urllib.parse import urlsplit -from gecko_taskgraph.optimize import register_strategy, registry, OptimizationStrategy +from taskgraph.optimize.base import register_strategy, registry, OptimizationStrategy + from gecko_taskgraph.util.bugbug import ( BugbugTimeoutException, push_schedules, diff --git a/taskcluster/gecko_taskgraph/optimize/strategies.py b/taskcluster/gecko_taskgraph/optimize/strategies.py index 741bec52e0c3..5756a48e003a 100644 --- a/taskcluster/gecko_taskgraph/optimize/strategies.py +++ b/taskcluster/gecko_taskgraph/optimize/strategies.py @@ -9,10 +9,10 @@ from datetime import datetime import mozpack.path as mozpath from mozbuild.base import MozbuildObject from mozbuild.util import memoize +from taskgraph.optimize.base import register_strategy, OptimizationStrategy from taskgraph.util.taskcluster import find_task_id from gecko_taskgraph import files_changed -from gecko_taskgraph.optimize import register_strategy, OptimizationStrategy from gecko_taskgraph.util.taskcluster import status_task logger = logging.getLogger(__name__) diff --git a/taskcluster/gecko_taskgraph/test/conftest.py b/taskcluster/gecko_taskgraph/test/conftest.py index 244daeb239c6..2995c308f7b6 100644 --- a/taskcluster/gecko_taskgraph/test/conftest.py +++ b/taskcluster/gecko_taskgraph/test/conftest.py @@ -8,17 +8,17 @@ from mach.logging import LoggingManager from responses import RequestsMock from taskgraph import target_tasks as target_tasks_mod from taskgraph.config import GraphConfig +from taskgraph.optimize import base as optimize_mod +from taskgraph.optimize.base import OptimizationStrategy from taskgraph.parameters import Parameters from gecko_taskgraph import ( GECKO, generator, - optimize as optimize_mod, ) from gecko_taskgraph.actions import render_actions_json from gecko_taskgraph.config import load_graph_config from gecko_taskgraph.generator import TaskGraphGenerator, Kind -from gecko_taskgraph.optimize import OptimizationStrategy from gecko_taskgraph.util.templates import merge @@ -122,6 +122,8 @@ class FakeParameters(dict): class FakeOptimization(OptimizationStrategy): + description = "Fake strategy for testing" + def __init__(self, mode, *args, **kwargs): super().__init__(*args, **kwargs) self.mode = mode diff --git a/taskcluster/gecko_taskgraph/test/python.ini b/taskcluster/gecko_taskgraph/test/python.ini index 1600145f1940..13317673b54a 100644 --- a/taskcluster/gecko_taskgraph/test/python.ini +++ b/taskcluster/gecko_taskgraph/test/python.ini @@ -8,7 +8,6 @@ subsuite = taskgraph [test_generator.py] [test_main.py] [test_morph.py] -[test_optimize.py] [test_optimize_strategies.py] [test_target_tasks.py] [test_taskcluster_yml.py] diff --git a/taskcluster/gecko_taskgraph/test/test_optimize.py b/taskcluster/gecko_taskgraph/test/test_optimize.py deleted file mode 100644 index b91c580ab96f..000000000000 --- a/taskcluster/gecko_taskgraph/test/test_optimize.py +++ /dev/null @@ -1,504 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - - -from datetime import datetime, timedelta -from functools import partial - -import pytest -from mozunit import main -from taskgraph.graph import Graph -from taskgraph.task import Task -from taskgraph.taskgraph import TaskGraph - -from gecko_taskgraph import optimize -from gecko_taskgraph.optimize import OptimizationStrategy, All, Any, Not - - -class Remove(OptimizationStrategy): - def should_remove_task(self, task, params, arg): - return True - - -class Replace(OptimizationStrategy): - def should_replace_task(self, task, params, deadline, taskid): - expires = datetime.utcnow() + timedelta(days=1) - if deadline and expires < datetime.strptime(deadline, "%Y-%m-%dT%H:%M:%S.%fZ"): - return False - return taskid - - -def default_strategies(): - return { - "never": OptimizationStrategy(), - "remove": Remove(), - "replace": Replace(), - } - - -def make_task( - label, - optimization=None, - task_def=None, - optimized=None, - task_id=None, - dependencies=None, - if_dependencies=None, -): - task_def = task_def or { - "sample": "task-def", - "deadline": {"relative-datestamp": "1 hour"}, - } - task = Task( - kind="test", - label=label, - attributes={}, - task=task_def, - if_dependencies=if_dependencies or [], - ) - task.optimization = optimization - task.task_id = task_id - if dependencies is not None: - task.task["dependencies"] = sorted(dependencies) - return task - - -def make_graph(*tasks_and_edges, **kwargs): - tasks = {t.label: t for t in tasks_and_edges if isinstance(t, Task)} - edges = {e for e in tasks_and_edges if not isinstance(e, Task)} - tg = TaskGraph(tasks, Graph(set(tasks), edges)) - - if kwargs.get("deps", True): - # set dependencies based on edges - for l, r, name in tg.graph.edges: - tg.tasks[l].dependencies[name] = r - - return tg - - -def make_opt_graph(*tasks_and_edges): - tasks = {t.task_id: t for t in tasks_and_edges if isinstance(t, Task)} - edges = {e for e in tasks_and_edges if not isinstance(e, Task)} - return TaskGraph(tasks, Graph(set(tasks), edges)) - - -def make_triangle(deps=True, **opts): - """ - Make a "triangle" graph like this: - - t1 <-------- t3 - `---- t2 --' - """ - return make_graph( - make_task("t1", opts.get("t1")), - make_task("t2", opts.get("t2")), - make_task("t3", opts.get("t3")), - ("t3", "t2", "dep"), - ("t3", "t1", "dep2"), - ("t2", "t1", "dep"), - deps=deps, - ) - - -@pytest.mark.parametrize( - "graph,kwargs,exp_removed", - ( - # A graph full of optimization=never has nothing removed - pytest.param( - make_triangle(), - {}, - # expectations - set(), - id="never", - ), - # A graph full of optimization=remove removes everything - pytest.param( - make_triangle( - t1={"remove": None}, - t2={"remove": None}, - t3={"remove": None}, - ), - {}, - # expectations - {"t1", "t2", "t3"}, - id="all", - ), - # Tasks with the 'any' composite strategy are removed when any substrategy says to - pytest.param( - make_triangle( - t1={"any": None}, - t2={"any": None}, - t3={"any": None}, - ), - {"strategies": lambda: {"any": Any("never", "remove")}}, - # expectations - {"t1", "t2", "t3"}, - id="composite_strategies_any", - ), - # Tasks with the 'all' composite strategy are removed when all substrategies say to - pytest.param( - make_triangle( - t1={"all": None}, - t2={"all": None}, - t3={"all": None}, - ), - {"strategies": lambda: {"all": All("never", "remove")}}, - # expectations - set(), - id="composite_strategies_all", - ), - # Tasks with the 'not' composite strategy are removed when the substrategy says not to - pytest.param( - make_graph( - make_task("t1", {"not-never": None}), - make_task("t2", {"not-remove": None}), - ), - { - "strategies": lambda: { - "not-never": Not("never"), - "not-remove": Not("remove"), - } - }, - # expectations - {"t1"}, - id="composite_strategies_not", - ), - # Removable tasks that are depended on by non-removable tasks are not removed - pytest.param( - make_triangle( - t1={"remove": None}, - t3={"remove": None}, - ), - {}, - # expectations - {"t3"}, - id="blocked", - ), - # Removable tasks that are marked do_not_optimize are not removed - pytest.param( - make_triangle( - t1={"remove": None}, - t2={"remove": None}, # but do_not_optimize - t3={"remove": None}, - ), - {"do_not_optimize": {"t2"}}, - # expectations - {"t3"}, - id="do_not_optimize", - ), - # Tasks with 'if_dependencies' are removed when deps are not run - pytest.param( - make_graph( - make_task("t1", {"remove": None}), - make_task("t2", {"remove": None}), - make_task("t3", {"never": None}, if_dependencies=["t1", "t2"]), - make_task("t4", {"never": None}, if_dependencies=["t1"]), - ("t3", "t2", "dep"), - ("t3", "t1", "dep2"), - ("t2", "t1", "dep"), - ("t4", "t1", "dep3"), - ), - {"requested_tasks": {"t3", "t4"}}, - # expectations - {"t1", "t2", "t3", "t4"}, - id="if_deps_removed", - ), - # Parents of tasks with 'if_dependencies' are also removed even if requested - pytest.param( - make_graph( - make_task("t1", {"remove": None}), - make_task("t2", {"remove": None}), - make_task("t3", {"never": None}, if_dependencies=["t1", "t2"]), - make_task("t4", {"never": None}, if_dependencies=["t1"]), - ("t3", "t2", "dep"), - ("t3", "t1", "dep2"), - ("t2", "t1", "dep"), - ("t4", "t1", "dep3"), - ), - {}, - # expectations - {"t1", "t2", "t3", "t4"}, - id="if_deps_parents_removed", - ), - # Tasks with 'if_dependencies' are kept if at least one of said dependencies are kept - pytest.param( - make_graph( - make_task("t1", {"never": None}), - make_task("t2", {"remove": None}), - make_task("t3", {"never": None}, if_dependencies=["t1", "t2"]), - make_task("t4", {"never": None}, if_dependencies=["t1"]), - ("t3", "t2", "dep"), - ("t3", "t1", "dep2"), - ("t2", "t1", "dep"), - ("t4", "t1", "dep3"), - ), - {}, - # expectations - set(), - id="if_deps_kept", - ), - # Ancestor of task with 'if_dependencies' does not cause it to be kept - pytest.param( - make_graph( - make_task("t1", {"never": None}), - make_task("t2", {"remove": None}), - make_task("t3", {"never": None}, if_dependencies=["t2"]), - ("t3", "t2", "dep"), - ("t2", "t1", "dep2"), - ), - {}, - # expectations - {"t2", "t3"}, - id="if_deps_ancestor_does_not_keep", - ), - # Unhandled edge case where 't1' and 't2' are kept even though they - # don't have any dependents and are not in 'requested_tasks' - pytest.param( - make_graph( - make_task("t1", {"never": None}), - make_task("t2", {"never": None}, if_dependencies=["t1"]), - make_task("t3", {"remove": None}), - make_task("t4", {"never": None}, if_dependencies=["t3"]), - ("t2", "t1", "e1"), - ("t4", "t2", "e2"), - ("t4", "t3", "e3"), - ), - {"requested_tasks": {"t3", "t4"}}, - # expectations - {"t1", "t2", "t3", "t4"}, - id="if_deps_edge_case_1", - marks=pytest.mark.xfail, - ), - ), -) -def test_remove_tasks(monkeypatch, graph, kwargs, exp_removed): - """Tests the `remove_tasks` function. - - Each test case takes three arguments: - - 1. A `TaskGraph` instance. - 2. Keyword arguments to pass into `remove_tasks`. - 3. The set of task labels that are expected to be removed. - """ - # set up strategies - strategies = default_strategies() - monkeypatch.setattr(optimize, "registry", strategies) - extra = kwargs.pop("strategies", None) - if extra: - if callable(extra): - extra = extra() - strategies.update(extra) - - kwargs.setdefault("params", {}) - kwargs.setdefault("do_not_optimize", set()) - kwargs.setdefault("requested_tasks", graph) - - got_removed = optimize.remove_tasks( - target_task_graph=graph, - optimizations=optimize._get_optimizations(graph, strategies), - **kwargs - ) - assert got_removed == exp_removed - - -@pytest.mark.parametrize( - "graph,kwargs,exp_replaced,exp_removed,exp_label_to_taskid", - ( - # A task cannot be replaced if it depends on one that was not replaced - pytest.param( - make_triangle( - t1={"replace": "e1"}, - t3={"replace": "e3"}, - ), - {}, - # expectations - {"t1"}, - set(), - {"t1": "e1"}, - id="blocked", - ), - # A task cannot be replaced if it should not be optimized - pytest.param( - make_triangle( - t1={"replace": "e1"}, - t2={"replace": "xxx"}, # but do_not_optimize - t3={"replace": "e3"}, - ), - {"do_not_optimize": {"t2"}}, - # expectations - {"t1"}, - set(), - {"t1": "e1"}, - id="do_not_optimize", - ), - # No tasks are replaced when strategy is 'never' - pytest.param( - make_triangle(), - {}, - # expectations - set(), - set(), - {}, - id="never", - ), - # All replacable tasks are replaced when strategy is 'replace' - pytest.param( - make_triangle( - t1={"replace": "e1"}, - t2={"replace": "e2"}, - t3={"replace": "e3"}, - ), - {}, - # expectations - {"t1", "t2", "t3"}, - set(), - {"t1": "e1", "t2": "e2", "t3": "e3"}, - id="all", - ), - # A task can be replaced with nothing - pytest.param( - make_triangle( - t1={"replace": "e1"}, - t2={"replace": True}, - t3={"replace": True}, - ), - {}, - # expectations - {"t1"}, - {"t2", "t3"}, - {"t1": "e1"}, - id="tasks_removed", - ), - # A task which expires before a dependents deadline is not a valid replacement. - pytest.param( - make_graph( - make_task("t1", {"replace": "e1"}), - make_task( - "t2", task_def={"deadline": {"relative-datestamp": "2 days"}} - ), - make_task( - "t3", task_def={"deadline": {"relative-datestamp": "1 minute"}} - ), - ("t2", "t1", "dep1"), - ("t3", "t1", "dep2"), - ), - {}, - # expectations - set(), - set(), - {}, - id="deadline", - ), - ), -) -def test_replace_tasks( - graph, - kwargs, - exp_replaced, - exp_removed, - exp_label_to_taskid, -): - """Tests the `replace_tasks` function. - - Each test case takes five arguments: - - 1. A `TaskGraph` instance. - 2. Keyword arguments to pass into `replace_tasks`. - 3. The set of task labels that are expected to be replaced. - 4. The set of task labels that are expected to be removed. - 5. The expected label_to_taskid. - """ - kwargs.setdefault("params", {}) - kwargs.setdefault("do_not_optimize", set()) - kwargs.setdefault("label_to_taskid", {}) - kwargs.setdefault("removed_tasks", set()) - kwargs.setdefault("existing_tasks", {}) - - got_replaced = optimize.replace_tasks( - target_task_graph=graph, - optimizations=optimize._get_optimizations(graph, default_strategies()), - **kwargs - ) - assert got_replaced == exp_replaced - assert kwargs["removed_tasks"] == exp_removed - assert kwargs["label_to_taskid"] == exp_label_to_taskid - - -@pytest.mark.parametrize( - "graph,kwargs,exp_subgraph,exp_label_to_taskid", - ( - # Test get_subgraph returns a similarly-shaped subgraph when nothing is removed - pytest.param( - make_triangle(deps=False), - {}, - make_opt_graph( - make_task("t1", task_id="tid1", dependencies={}), - make_task("t2", task_id="tid2", dependencies={"tid1"}), - make_task("t3", task_id="tid3", dependencies={"tid1", "tid2"}), - ("tid3", "tid2", "dep"), - ("tid3", "tid1", "dep2"), - ("tid2", "tid1", "dep"), - ), - {"t1": "tid1", "t2": "tid2", "t3": "tid3"}, - id="no_change", - ), - # Test get_subgraph returns a smaller subgraph when tasks are removed - pytest.param( - make_triangle(deps=False), - { - "removed_tasks": {"t2", "t3"}, - }, - # expectations - make_opt_graph(make_task("t1", task_id="tid1", dependencies={})), - {"t1": "tid1"}, - id="removed", - ), - # Test get_subgraph returns a smaller subgraph when tasks are replaced - pytest.param( - make_triangle(deps=False), - { - "replaced_tasks": {"t1", "t2"}, - "label_to_taskid": {"t1": "e1", "t2": "e2"}, - }, - # expectations - make_opt_graph(make_task("t3", task_id="tid1", dependencies={"e1", "e2"})), - {"t1": "e1", "t2": "e2", "t3": "tid1"}, - id="replaced", - ), - ), -) -def test_get_subgraph(monkeypatch, graph, kwargs, exp_subgraph, exp_label_to_taskid): - """Tests the `get_subgraph` function. - - Each test case takes 4 arguments: - - 1. A `TaskGraph` instance. - 2. Keyword arguments to pass into `get_subgraph`. - 3. The expected subgraph. - 4. The expected label_to_taskid. - """ - monkeypatch.setattr( - optimize, "slugid", partial(next, ("tid%d" % i for i in range(1, 10))) - ) - - kwargs.setdefault("removed_tasks", set()) - kwargs.setdefault("replaced_tasks", set()) - kwargs.setdefault("label_to_taskid", {}) - kwargs.setdefault("decision_task_id", "DECISION-TASK") - - got_subgraph = optimize.get_subgraph(graph, **kwargs) - assert got_subgraph.graph == exp_subgraph.graph - assert got_subgraph.tasks == exp_subgraph.tasks - assert kwargs["label_to_taskid"] == exp_label_to_taskid - - -def test_get_subgraph_removed_dep(): - "get_subgraph raises an Exception when a task depends on a removed task" - graph = make_triangle() - with pytest.raises(Exception): - optimize.get_subgraph(graph, {"t2"}, set(), {}) - - -if __name__ == "__main__": - main() diff --git a/taskcluster/gecko_taskgraph/test/test_optimize_strategies.py b/taskcluster/gecko_taskgraph/test/test_optimize_strategies.py index 5907bd4dcd44..5ffe6155f0b7 100644 --- a/taskcluster/gecko_taskgraph/test/test_optimize_strategies.py +++ b/taskcluster/gecko_taskgraph/test/test_optimize_strategies.py @@ -8,9 +8,10 @@ from time import mktime import pytest from mozunit import main +from taskgraph.optimize.base import registry from taskgraph.task import Task -from gecko_taskgraph.optimize import project, registry +from gecko_taskgraph.optimize import project from gecko_taskgraph.optimize.strategies import IndexSearch, SkipUnlessSchedules from gecko_taskgraph.optimize.backstop import SkipUnlessBackstop, SkipUnlessPushInterval from gecko_taskgraph.optimize.bugbug import (