From 671dda0818b4921316f7fa1303be21b3c3dc8efa Mon Sep 17 00:00:00 2001 From: "Dustin J. Mitchell" Date: Wed, 23 Aug 2017 16:21:06 +0000 Subject: [PATCH] Bug 1383880: add support for optimizing tasks based on SCHEDULES; r=ahal This adds some new optimization strategies. For tests, we use Either(SETA, SkipUnlessSchedules), thereby giving both mechanisms a chance to skip tasks. On try, SETA is omitted. MozReview-Commit-ID: GL4tlwyeBa6 --HG-- extra : rebase_source : 0c1ce762afc7a691788379d4f4206df106f6df63 --- build/sparse-profiles/taskgraph | 4 ++ taskcluster/taskgraph/optimize.py | 64 ++++++++++++++++++++++++ taskcluster/taskgraph/transforms/task.py | 5 ++ 3 files changed, 73 insertions(+) diff --git a/build/sparse-profiles/taskgraph b/build/sparse-profiles/taskgraph index e4e88d055163..c99d6b78a8dc 100644 --- a/build/sparse-profiles/taskgraph +++ b/build/sparse-profiles/taskgraph @@ -22,6 +22,10 @@ path:tools/lint/ # for new-style try pushes path:try_task_config.json +# Moz.build files are read in filesystem mode +glob:**/moz.build +glob:**/*.mozbuild + # Tooltool manifests also need to be opened. Assume they # are all somewhere in "tooltool-manifests" directories. glob:**/tooltool-manifests/** diff --git a/taskcluster/taskgraph/optimize.py b/taskcluster/taskgraph/optimize.py index 537911481b93..85759e8e1a27 100644 --- a/taskcluster/taskgraph/optimize.py +++ b/taskcluster/taskgraph/optimize.py @@ -24,10 +24,14 @@ from .taskgraph import TaskGraph from .util.seta import is_low_value_task from .util.taskcluster import find_task_id from .util.parameterization import resolve_task_references +from mozbuild.util import memoize from slugid import nice as slugid +from mozbuild.frontend import reader logger = logging.getLogger(__name__) +TOPSRCDIR = os.path.abspath(os.path.join(__file__, '../../../')) + def optimize_task_graph(target_task_graph, params, do_not_optimize, existing_tasks=None, strategies=None): @@ -71,6 +75,8 @@ def _make_default_strategies(): 'index-search': IndexSearch(), 'seta': SETA(), 'skip-unless-changed': SkipUnlessChanged(), + 'skip-unless-schedules': SkipUnlessSchedules(), + 'skip-unless-schedules-or-seta': Either(SkipUnlessSchedules(), SETA()), } @@ -244,6 +250,37 @@ class OptimizationStrategy(object): return False +class Either(OptimizationStrategy): + """Given one or more optimization strategies, remove a task if any of them + says to, and replace with a task if any finds a replacement (preferring the + earliest). By default, each substrategy gets the same arg, but split_args + can return a list of args for each strategy, if desired.""" + def __init__(self, *substrategies, **kwargs): + self.substrategies = substrategies + self.split_args = kwargs.pop('split_args', None) + if not self.split_args: + self.split_args = lambda arg: [arg] * len(substrategies) + if kwargs: + raise TypeError("unexpected keyword args") + + def _for_substrategies(self, arg, fn): + for sub, arg in zip(self.substrategies, self.split_args(arg)): + rv = fn(sub, arg) + if rv: + return rv + return False + + def should_remove_task(self, task, params, arg): + return self._for_substrategies( + arg, + lambda sub, arg: sub.should_remove_task(task, params, arg)) + + def should_replace_task(self, task, params, arg): + return self._for_substrategies( + arg, + lambda sub, arg: sub.should_replace_task(task, params, arg)) + + class IndexSearch(OptimizationStrategy): def should_remove_task(self, task, params, index_paths): "If this task has no dependencies, don't run it.." @@ -300,3 +337,30 @@ class SkipUnlessChanged(OptimizationStrategy): task.label) return True return False + + +class SkipUnlessSchedules(OptimizationStrategy): + + @memoize + def scheduled_by_push(self, repository, revision): + changed_files = files_changed.get_changed_files(repository, revision) + + config = reader.EmptyConfig(TOPSRCDIR) + rdr = reader.BuildReader(config) + components = set() + for p, m in rdr.files_info(changed_files).items(): + components |= set(m['SCHEDULES'].components) + + return components + + def should_remove_task(self, task, params, conditions): + if params.get('pushlog_id') == -1: + return False + + scheduled = self.scheduled_by_push(params['head_repository'], params['head_rev']) + conditions = set(conditions) + # if *any* of the condition components are scheduled, do not optimize + if conditions & scheduled: + return False + + return True diff --git a/taskcluster/taskgraph/transforms/task.py b/taskcluster/taskgraph/transforms/task.py index 947a05fb013d..9b1e068c6f28 100644 --- a/taskcluster/taskgraph/transforms/task.py +++ b/taskcluster/taskgraph/transforms/task.py @@ -17,6 +17,7 @@ import time from copy import deepcopy from mozbuild.util import memoize +from mozbuild import schedules from taskgraph.util.attributes import TRUNK_PROJECTS from taskgraph.util.hash import hash_path from taskgraph.util.treeherder import split_symbol @@ -159,6 +160,10 @@ task_description_schema = Schema({ {'seta': None}, # skip this task if none of the given file patterns match {'skip-unless-changed': [basestring]}, + # skip this task if unless the change files' SCHEDULES contains any of these components + {'skip-unless-schedules': list(schedules.ALL_COMPONENTS)}, + # skip if SETA or skip-unless-schedules says to + {'skip-unless-schedules-or-seta': list(schedules.ALL_COMPONENTS)}, ), # the provisioner-id/worker-type for the task. The following parameters will