Bug 1383880: allow only one optimization per task; r=ahal,glandium

It is not at *all* clear how multiple optimizations for a single task should interact. No simple logical operation is right in all cases, and in fact in most imaginable cases the desired behavior turns out to be independent of all but one of the optimizations. For example, given both `seta` and `skip-unless-files-changed` optimizations, if SETA says to skip a test, it is low value and should be skipped regardless of what files have changed. But if SETA says to run a test, then it has likely been skipped in previous pushes, so it should be run regardless of what has changed in this push. This also adds a bit more output about optimization, that may be useful for anyone wondering why a particular job didn't run. MozReview-Commit-ID: 3OsvRnWjai4 --HG-- extra : rebase_source : ba0aa536e8c474b36c63d1447c83ed9885f1e3e6 extra : source : a3b7bdfdb116300daa3f49e0dfc96177e1369440
2024-11-28 23:31:56 +00:00 · 2017-08-01 20:02:59 +00:00 · 2017-08-01 20:02:59 +00:00 · e8c05596d8
commit e8c05596d8
parent 4ee635be2a
11 changed files with 120 additions and 100 deletions
--- a/taskcluster/ci/build/android-stuff.yml
+++ b/taskcluster/ci/build/android-stuff.yml
@ -39,9 +39,9 @@ android-dependencies/opt:
        custom-build-variant-cfg: api-16-gradle-dependencies
        tooltool-downloads: internal
        job-script: taskcluster/scripts/builder/build-android-dependencies.sh
-    optimizations:
-      - - skip-unless-changed
-        - - "mobile/android/config/**"
+    optimization:
+        skip-unless-changed:
+          - "mobile/android/config/**"
          - "testing/mozharness/configs/builds/releng_sub_android_configs/*gradle_dependencies.py"
          - "**/*.gradle"
          - "taskcluster/docker/android-build/**"
@ -82,9 +82,9 @@ android-test/opt:
        secrets: true
        custom-build-variant-cfg: android-test
        tooltool-downloads: internal
-    optimizations:
-      - - skip-unless-changed
-        - - "mobile/android/base/**"
+    optimization:
+        skip-unless-changed:
+          - "mobile/android/base/**"
          - "mobile/android/config/**"
          - "mobile/android/tests/background/junit4/**"
          - "**/*.gradle"
@ -140,9 +140,9 @@ android-lint/opt:
        secrets: true
        custom-build-variant-cfg: android-lint
        tooltool-downloads: internal
-    optimizations:
-      - - skip-unless-changed
-        - - "mobile/android/**/*.java"
+    optimization:
+        skip-unless-changed:
+          - "mobile/android/**/*.java"
          - "mobile/android/**/*.jpeg"
          - "mobile/android/**/*.jpg"
          - "mobile/android/**/*.png"
@ -192,9 +192,9 @@ android-checkstyle/opt:
        secrets: true
        custom-build-variant-cfg: android-checkstyle
        tooltool-downloads: internal
-    optimizations:
-      - - skip-unless-changed
-        - - "mobile/android/**/checkstyle.xml"
+    optimization:
+        skip-unless-changed:
+          - "mobile/android/**/checkstyle.xml"
          - "mobile/android/**/*.java"
          - "mobile/android/**/Makefile.in"
          - "mobile/android/config/**"
@ -246,9 +246,9 @@ android-findbugs/opt:
        secrets: true
        custom-build-variant-cfg: android-findbugs
        tooltool-downloads: internal
-    optimizations:
-      - - skip-unless-changed
-        - - "mobile/android/**/*.java"
+    optimization:
+        skip-unless-changed:
+          - "mobile/android/**/*.java"
          - "mobile/android/**/Makefile.in"
          - "mobile/android/config/**"
          - "mobile/android/**/moz.build"
--- a/taskcluster/taskgraph/generator.py
+++ b/taskcluster/taskgraph/generator.py
@ -66,7 +66,7 @@ class Kind(object):
                      label=task_dict['label'],
                      attributes=task_dict['attributes'],
                      task=task_dict['task'],
-                      optimizations=task_dict.get('optimizations'),
+                      optimization=task_dict.get('optimization'),
                      dependencies=task_dict.get('dependencies'))
                 for task_dict in transforms(trans_config, inputs)]
        return tasks
--- a/taskcluster/taskgraph/optimize.py
+++ b/taskcluster/taskgraph/optimize.py
@ -7,6 +7,7 @@ from __future__ import absolute_import, print_function, unicode_literals
 import logging
 import os
 import requests
+from collections import defaultdict

 from .graph import Graph
 from . import files_changed
@ -45,17 +46,13 @@ def optimize_task_graph(target_task_graph, params, do_not_optimize, existing_tas

 def optimize_task(task, params):
    """
-    Optimize a single task by running its optimizations in order until one
-    succeeds.
+    Run the optimization for a given task
    """
-    for opt in task.optimizations:
-        opt_type, args = opt[0], opt[1:]
-        opt_fn = _optimizations[opt_type]
-        opt_result = opt_fn(task, params, *args)
-        if opt_result:
-            return opt_result
-
-    return False
+    if not task.optimization:
+        return False
+    opt_type, arg = task.optimization.items()[0]
+    opt_fn = _optimizations[opt_type]
+    return opt_fn(task, params, arg)


 def annotate_task_graph(target_task_graph, params, do_not_optimize,
@ -71,6 +68,7 @@ def annotate_task_graph(target_task_graph, params, do_not_optimize,

    # set .optimized for all tasks, and .task_id for optimized tasks
    # with replacements
+    opt_counts = defaultdict(lambda: {'away': 0, 'replaced': 0})
    for label in target_task_graph.graph.visit_postorder():
        task = target_task_graph.tasks[label]
        named_task_dependencies = named_links_dict.get(label, {})
@ -85,19 +83,23 @@ def annotate_task_graph(target_task_graph, params, do_not_optimize,

        # if this task is blacklisted, don't even consider optimizing
        replacement_task_id = None
+        opt_by = None
        if label in do_not_optimize:
            optimized = False
        # Let's check whether this task has been created before
        elif existing_tasks is not None and label in existing_tasks:
            optimized = True
            replacement_task_id = existing_tasks[label]
+            opt_by = "existing_tasks"
        # otherwise, examine the task itself (which may be an expensive operation)
        else:
            opt_result = optimize_task(task, params)

            # use opt_result to determine values for optimized, replacement_task_id
            optimized = bool(opt_result)
-            replacement_task_id = opt_result if opt_result and opt_result is not True else None
+            if optimized:
+                opt_by = task.optimization.keys()[0]
+                replacement_task_id = opt_result if opt_result is not True else None

        task.optimized = optimized
        task.task_id = replacement_task_id
@ -106,14 +108,25 @@ def annotate_task_graph(target_task_graph, params, do_not_optimize,

        if optimized:
            if replacement_task_id:
+                opt_counts[opt_by]['replaced'] += 1
                logger.debug("optimizing `{}`, replacing with task `{}`"
                             .format(label, replacement_task_id))
            else:
+                opt_counts[opt_by]['away'] += 1
                logger.debug("optimizing `{}` away".format(label))
                # note: any dependent tasks will fail when they see this
+
+    for opt_by in sorted(opt_counts):
+        counts = opt_counts[opt_by]
+        if counts['away'] and not counts['replaced']:
+            msg = "optimized away {} tasks for {}: ".format(counts['away'], opt_by)
+        elif counts['replaced'] and not counts['away']:
+            msg = "optimized {} tasks, replacing with other tasks, for {}: ".format(
+                counts['away'], opt_by)
        else:
-            if replacement_task_id:
-                raise Exception("{}: optimize_task returned False with a taskId".format(label))
+            msg = "optimized {} tasks for {}, replacing {} and optimizing {} away".format(
+                sum(counts.values()), opt_by, counts['replaced'], counts['away'])
+        logger.info(msg)


 def get_subgraph(annotated_task_graph, named_links_dict, label_to_taskid):
@ -168,21 +181,22 @@ def optimization(name):


@optimization('index-search')
-def opt_index_search(task, params, index_path):
-    try:
-        task_id = find_task_id(
-            index_path,
-            use_proxy=bool(os.environ.get('TASK_ID')))
-
-        return task_id or True
-    except requests.exceptions.HTTPError:
-        pass
+def opt_index_search(task, params, index_paths):
+    for index_path in index_paths:
+        try:
+            task_id = find_task_id(
+                index_path,
+                use_proxy=bool(os.environ.get('TASK_ID')))
+            return task_id
+        except requests.exceptions.HTTPError:
+            # 404 will end up here and go on to the next index path
+            pass

    return False


@optimization('seta')
-def opt_seta(task, params):
+def opt_seta(task, params, _):
    bbb_task = False

    # for bbb tasks we need to send in the buildbot buildername
--- a/taskcluster/taskgraph/task.py
+++ b/taskcluster/taskgraph/task.py
@ -13,7 +13,7 @@ class Task(object):
    - label; the label for this task
    - attributes: a dictionary of attributes for this task (used for filtering)
    - task: the task definition (JSON-able dictionary)
-    - optimizations: optimizations to apply to the task (see taskgraph.optimize)
+    - optimization: optimization to apply to the task (see taskgraph.optimize)
    - dependencies: tasks this one depends on, in the form {name: label}, for example
      {'build': 'build-linux64/opt', 'docker-image': 'build-docker-image-desktop-test'}

@ -26,7 +26,7 @@ class Task(object):
    display, comparison, serialization, etc. It has no functionality of its own.
    """
    def __init__(self, kind, label, attributes, task,
-                 optimizations=None, dependencies=None):
+                 optimization=None, dependencies=None):
        self.kind = kind
        self.label = label
        self.attributes = attributes
@ -37,7 +37,7 @@ class Task(object):

        self.attributes['kind'] = kind

-        self.optimizations = optimizations or []
+        self.optimization = optimization
        self.dependencies = dependencies or {}

    def __eq__(self, other):
@ -46,12 +46,12 @@ class Task(object):
            self.attributes == other.attributes and \
            self.task == other.task and \
            self.task_id == other.task_id and \
-            self.optimizations == other.optimizations and \
+            self.optimization == other.optimization and \
            self.dependencies == other.dependencies

    def __repr__(self):
        return ('Task({kind!r}, {label!r}, {attributes!r}, {task!r}, '
-                'optimizations={optimizations!r}, '
+                'optimization={optimization!r}, '
                'dependencies={dependencies!r})'.format(**self.__dict__))

    def to_json(self):
@ -60,7 +60,7 @@ class Task(object):
            'label': self.label,
            'attributes': self.attributes,
            'dependencies': self.dependencies,
-            'optimizations': self.optimizations,
+            'optimization': self.optimization,
            'task': self.task,
        }
        if self.task_id:
@ -79,7 +79,7 @@ class Task(object):
            label=task_dict['label'],
            attributes=task_dict['attributes'],
            task=task_dict['task'],
-            optimizations=task_dict['optimizations'],
+            optimization=task_dict['optimization'],
            dependencies=task_dict.get('dependencies'))
        if 'task_id' in task_dict:
            rv.task_id = task_dict['task_id']
--- a/taskcluster/taskgraph/test/test_optimize.py
+++ b/taskcluster/taskgraph/test/test_optimize.py
@ -61,8 +61,8 @@ class TestOptimize(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        # set up some simple optimization functions
-        optimization('no-optimize')(lambda self, params: False)
-        optimization('optimize-away')(lambda self, params: True)
+        optimization('no-optimize')(lambda self, params, arg: False)
+        optimization('optimize-away')(lambda self, params, arg: True)
        optimization('optimize-to-task')(lambda self, params, task: task)

    def make_task(self, label, optimization=None, task_def=None, optimized=None, task_id=None):
@ -70,9 +70,9 @@ class TestOptimize(unittest.TestCase):
        task = Task(kind='test', label=label, attributes={}, task=task_def)
        task.optimized = optimized
        if optimization:
-            task.optimizations = [optimization]
+            task.optimization = optimization
        else:
-            task.optimizations = []
+            task.optimization = None
        task.task_id = task_id
        return task

@ -92,9 +92,9 @@ class TestOptimize(unittest.TestCase):
    def test_annotate_task_graph_no_optimize(self):
        "annotating marks everything as un-optimized if the kind returns that"
        graph = self.make_graph(
-            self.make_task('task1', ['no-optimize']),
-            self.make_task('task2', ['no-optimize']),
-            self.make_task('task3', ['no-optimize']),
+            self.make_task('task1', {'no-optimize': []}),
+            self.make_task('task2', {'no-optimize': []}),
+            self.make_task('task3', {'no-optimize': []}),
            ('task2', 'task1', 'build'),
            ('task2', 'task3', 'image'),
        )
@ -109,8 +109,8 @@ class TestOptimize(unittest.TestCase):
    def test_annotate_task_graph_optimize_away_dependency(self):
        "raises exception if kind optimizes away a task on which another depends"
        graph = self.make_graph(
-            self.make_task('task1', ['optimize-away']),
-            self.make_task('task2', ['no-optimize']),
+            self.make_task('task1', {'optimize-away': []}),
+            self.make_task('task2', {'no-optimize': []}),
            ('task2', 'task1', 'build'),
        )
        self.assertRaises(
@ -121,8 +121,8 @@ class TestOptimize(unittest.TestCase):
    def test_annotate_task_graph_do_not_optimize(self):
        "annotating marks everything as un-optimized if in do_not_optimize"
        graph = self.make_graph(
-            self.make_task('task1', ['optimize-away']),
-            self.make_task('task2', ['optimize-away']),
+            self.make_task('task1', {'optimize-away': True}),
+            self.make_task('task2', {'optimize-away': True}),
            ('task2', 'task1', 'build'),
        )
        label_to_taskid = {}
@ -138,9 +138,9 @@ class TestOptimize(unittest.TestCase):
    def test_annotate_task_graph_nos_do_not_propagate(self):
        "a task with a non-optimized dependency can be optimized"
        graph = self.make_graph(
-            self.make_task('task1', ['no-optimize']),
-            self.make_task('task2', ['optimize-to-task', 'taskid']),
-            self.make_task('task3', ['optimize-to-task', 'taskid']),
+            self.make_task('task1', {'no-optimize': []}),
+            self.make_task('task2', {'optimize-to-task': 'taskid'}),
+            self.make_task('task3', {'optimize-to-task': 'taskid'}),
            ('task2', 'task1', 'build'),
            ('task2', 'task3', 'image'),
        )
@ -233,9 +233,9 @@ class TestOptimize(unittest.TestCase):
    def test_optimize(self):
        "optimize_task_graph annotates and extracts the subgraph from a simple graph"
        input = self.make_graph(
-            self.make_task('task1', ['optimize-to-task', 'dep1']),
-            self.make_task('task2', ['no-optimize']),
-            self.make_task('task3', ['no-optimize']),
+            self.make_task('task1', {'optimize-to-task': 'dep1'}),
+            self.make_task('task2', {'no-optimize': []}),
+            self.make_task('task3', {'no-optimize': []}),
            ('task2', 'task1', 'build'),
            ('task2', 'task3', 'image'),
        )
--- a/taskcluster/taskgraph/test/test_taskgraph.py
+++ b/taskcluster/taskgraph/test/test_taskgraph.py
@ -24,7 +24,7 @@ class TestTaskGraph(unittest.TestCase):
            'b': Task(kind='test', label='b',
                      attributes={},
                      task={'task': 'def'},
-                      optimizations=[['seta']],
+                      optimization={'seta': None},
                      # note that this dep is ignored, superseded by that
                      # from the taskgraph's edges
                      dependencies={'first': 'a'}),
@ -41,7 +41,7 @@ class TestTaskGraph(unittest.TestCase):
                'attributes': {'attr': 'a-task', 'kind': 'test'},
                'task': {'taskdef': True},
                'dependencies': {'edgelabel': 'b'},
-                'optimizations': [],
+                'optimization': None,
            },
            'b': {
                'kind': 'test',
@ -49,7 +49,7 @@ class TestTaskGraph(unittest.TestCase):
                'attributes': {'kind': 'test'},
                'task': {'task': 'def'},
                'dependencies': {},
-                'optimizations': [['seta']],
+                'optimization': {'seta': None},
            }
        })

@ -60,14 +60,14 @@ class TestTaskGraph(unittest.TestCase):
                label='a',
                attributes={},
                dependencies={'prereq': 'b'},  # must match edges, below
-                optimizations=[['seta']],
+                optimization={'seta': None},
                task={'task': 'def'}),
            'b': Task(
                kind='pre',
                label='b',
                attributes={},
                dependencies={},
-                optimizations=[['seta']],
+                optimization={'seta': None},
                task={'task': 'def2'}),
        }, graph=Graph(nodes={'a', 'b'}, edges={('a', 'b', 'prereq')}))

--- a/taskcluster/taskgraph/transforms/docker_image.py
+++ b/taskcluster/taskgraph/transforms/docker_image.py
@ -61,9 +61,9 @@ def fill_template(config, tasks):
        # up on level 3 at some point if most tasks use this as a common image
        # for a given context hash, a worker within Taskcluster does not need to contain
        # the same image per branch.
-        optimizations = [['index-search', '{}.level-{}.{}.hash.{}'.format(
-            INDEX_PREFIX, level, image_name, context_hash)]
-            for level in reversed(range(int(config.params['level']), 4))]
+        optimization = {'index-search': ['{}.level-{}.{}.hash.{}'.format(
+            INDEX_PREFIX, level, image_name, context_hash)
+            for level in reversed(range(int(config.params['level']), 4))]}

        # Adjust the zstandard compression level based on the execution level.
        # We use faster compression for level 1 because we care more about
@ -80,7 +80,7 @@ def fill_template(config, tasks):
            'attributes': {'image_name': image_name},
            'expires-after': '1 year',
            'routes': routes,
-            'optimizations': optimizations,
+            'optimization': optimization,
            'scopes': ['secrets:get:project/taskcluster/gecko/hgfingerprint'],
            'treeherder': {
                'symbol': job_symbol,
--- a/taskcluster/taskgraph/transforms/job/init.py
+++ b/taskcluster/taskgraph/transforms/job/init.py
@ -27,6 +27,7 @@ from voluptuous import (
    Extra,
    Optional,
    Required,
+    Exclusive,
 )

 logger = logging.getLogger(__name__)
@ -59,13 +60,14 @@ job_description_schema = Schema({
    Optional('index'): task_description_schema['index'],
    Optional('run-on-projects'): task_description_schema['run-on-projects'],
    Optional('coalesce'): task_description_schema['coalesce'],
-    Optional('optimizations'): task_description_schema['optimizations'],
+    Exclusive('optimization', 'optimization'): task_description_schema['optimization'],
    Optional('needs-sccache'): task_description_schema['needs-sccache'],

-    # The "when" section contains descriptions of the circumstances
-    # under which this task should be included in the task graph.  This
-    # will be converted into an element in the `optimizations` list.
-    Optional('when'): Any({
+    # The "when" section contains descriptions of the circumstances under which
+    # this task should be included in the task graph.  This will be converted
+    # into an optimization, so it cannot be specified in a job description that
+    # also gives 'optimization'.
+    Exclusive('when', 'optimization'): Any({
        # This task only needs to be run if a file matching one of the given
        # patterns has changed in the push.  The patterns use the mozpack
        # match function (python/mozbuild/mozpack/path.py).
@ -103,11 +105,12 @@ def validate(config, jobs):
 def rewrite_when_to_optimization(config, jobs):
    for job in jobs:
        when = job.pop('when', {})
-        files_changed = when.get('files-changed')
-        if not files_changed:
+        if not when:
            yield job
            continue

+        files_changed = when.get('files-changed')
+
        # add some common files
        files_changed.extend([
            '{}/**'.format(config.path),
@ -118,7 +121,7 @@ def rewrite_when_to_optimization(config, jobs):
                job['worker']['docker-image']['in-tree']))

        # "only when files changed" implies "skip if files have not changed"
-        job.setdefault('optimizations', []).append(['skip-unless-changed', files_changed])
+        job['optimization'] = {'skip-unless-changed': files_changed}

        assert 'when' not in job
        yield job
--- a/taskcluster/taskgraph/transforms/job/toolchain.py
+++ b/taskcluster/taskgraph/transforms/job/toolchain.py
@ -53,7 +53,7 @@ toolchain_run_schema = Schema({
 })


-def add_optimizations(config, run, taskdesc):
+def add_optimization(config, run, taskdesc):
    files = list(run.get('resources', []))
    # This file
    files.append('taskcluster/taskgraph/transforms/job/toolchain.py')
@ -81,13 +81,13 @@ def add_optimizations(config, run, taskdesc):
        'digest': digest,
    }

-    optimizations = taskdesc.setdefault('optimizations', [])
-
    # We'll try to find a cached version of the toolchain at levels above
    # and including the current level, starting at the highest level.
+    index_routes = []
    for level in reversed(range(int(config.params['level']), 4)):
        subs['level'] = level
-        optimizations.append(['index-search', TOOLCHAIN_INDEX.format(**subs)])
+        index_routes.append(TOOLCHAIN_INDEX.format(**subs))
+    taskdesc['optimization'] = {'index-search': index_routes}

    # ... and cache at the lowest level.
    taskdesc.setdefault('routes', []).append(
@ -136,7 +136,7 @@ def docker_worker_toolchain(config, job, taskdesc):
    if 'toolchain-alias' in run:
        attributes['toolchain-alias'] = run['toolchain-alias']

-    add_optimizations(config, run, taskdesc)
+    add_optimization(config, run, taskdesc)


@run_job_using("generic-worker", "toolchain-script", schema=toolchain_run_schema)
@ -183,4 +183,4 @@ def windows_toolchain(config, job, taskdesc):
    if 'toolchain-alias' in run:
        attributes['toolchain-alias'] = run['toolchain-alias']

-    add_optimizations(config, run, taskdesc)
+    add_optimization(config, run, taskdesc)
--- a/taskcluster/taskgraph/transforms/task.py
+++ b/taskcluster/taskgraph/transforms/task.py
@ -167,17 +167,19 @@ task_description_schema = Schema({
        'size': int,
    },

-    # Optimizations to perform on this task during the optimization phase,
-    # specified in order.  These optimizations are defined in
-    # taskcluster/taskgraph/optimize.py.
-    Optional('optimizations'): [Any(
-        # search the index for the given index namespace, and replace this task if found
-        ['index-search', basestring],
+    # Optimization to perform on this task during the optimization phase.
+    # Optimizations are defined in taskcluster/taskgraph/optimize.py.
+    Required('optimization', default=None): Any(
+        # always run this task (default)
+        None,
+        # search the index for the given index namespaces, and replace this task if found
+        # the search occurs in order, with the first match winning
+        {'index-search': [basestring]},
        # consult SETA and skip this task if it is low-value
-        ['seta'],
+        {'seta': None},
        # skip this task if none of the given file patterns match
-        ['skip-unless-changed', [basestring]],
-    )],
+        {'skip-unless-changed': [basestring]},
+    ),

    # the provisioner-id/worker-type for the task.  The following parameters will
    # be substituted in this string:
@ -1232,7 +1234,7 @@ def build_task(config, tasks):
            'task': task_def,
            'dependencies': task.get('dependencies', {}),
            'attributes': attributes,
-            'optimizations': task.get('optimizations', []),
+            'optimization': task.get('optimization', None),
        }


--- a/taskcluster/taskgraph/transforms/tests.py
+++ b/taskcluster/taskgraph/transforms/tests.py
@ -901,7 +901,6 @@ def make_job_description(config, tests):
        jobdesc['name'] = name
        jobdesc['label'] = label
        jobdesc['description'] = test['description']
-        jobdesc['when'] = test.get('when', {})
        jobdesc['attributes'] = attributes
        jobdesc['dependencies'] = {'build': build_label}

@ -931,9 +930,11 @@ def make_job_description(config, tests):
        }

        # run SETA unless this is a try push
-        jobdesc['optimizations'] = optimizations = []
-        if config.params['project'] != 'try':
-            optimizations.append(['seta'])
+        if config.params['project'] == 'try':
+            jobdesc['when'] = test.get('when', {})
+        else:
+            # when SETA is enabled, the 'when' does not apply (optimizations don't mix)
+            jobdesc['optimization'] = {'seta': None}

        run = jobdesc['run'] = {}
        run['using'] = 'mozharness-test'