Bug 1383880: allow only one optimization per task; r=ahal,glandium

It is not at *all* clear how multiple optimizations for a single task should
interact. No simple logical operation is right in all cases, and in fact in
most imaginable cases the desired behavior turns out to be independent of all
but one of the optimizations. For example, given both `seta` and
`skip-unless-files-changed` optimizations, if SETA says to skip a test, it is
low value and should be skipped regardless of what files have changed. But if
SETA says to run a test, then it has likely been skipped in previous pushes, so
it should be run regardless of what has changed in this push.

This also adds a bit more output about optimization, that may be useful for
anyone wondering why a particular job didn't run.

MozReview-Commit-ID: 3OsvRnWjai4

--HG--
extra : rebase_source : ba0aa536e8c474b36c63d1447c83ed9885f1e3e6
extra : source : a3b7bdfdb116300daa3f49e0dfc96177e1369440
This commit is contained in:
Dustin J. Mitchell 2017-08-01 20:02:59 +00:00
parent 4ee635be2a
commit e8c05596d8
11 changed files with 120 additions and 100 deletions

View File

@ -39,9 +39,9 @@ android-dependencies/opt:
custom-build-variant-cfg: api-16-gradle-dependencies
tooltool-downloads: internal
job-script: taskcluster/scripts/builder/build-android-dependencies.sh
optimizations:
- - skip-unless-changed
- - "mobile/android/config/**"
optimization:
skip-unless-changed:
- "mobile/android/config/**"
- "testing/mozharness/configs/builds/releng_sub_android_configs/*gradle_dependencies.py"
- "**/*.gradle"
- "taskcluster/docker/android-build/**"
@ -82,9 +82,9 @@ android-test/opt:
secrets: true
custom-build-variant-cfg: android-test
tooltool-downloads: internal
optimizations:
- - skip-unless-changed
- - "mobile/android/base/**"
optimization:
skip-unless-changed:
- "mobile/android/base/**"
- "mobile/android/config/**"
- "mobile/android/tests/background/junit4/**"
- "**/*.gradle"
@ -140,9 +140,9 @@ android-lint/opt:
secrets: true
custom-build-variant-cfg: android-lint
tooltool-downloads: internal
optimizations:
- - skip-unless-changed
- - "mobile/android/**/*.java"
optimization:
skip-unless-changed:
- "mobile/android/**/*.java"
- "mobile/android/**/*.jpeg"
- "mobile/android/**/*.jpg"
- "mobile/android/**/*.png"
@ -192,9 +192,9 @@ android-checkstyle/opt:
secrets: true
custom-build-variant-cfg: android-checkstyle
tooltool-downloads: internal
optimizations:
- - skip-unless-changed
- - "mobile/android/**/checkstyle.xml"
optimization:
skip-unless-changed:
- "mobile/android/**/checkstyle.xml"
- "mobile/android/**/*.java"
- "mobile/android/**/Makefile.in"
- "mobile/android/config/**"
@ -246,9 +246,9 @@ android-findbugs/opt:
secrets: true
custom-build-variant-cfg: android-findbugs
tooltool-downloads: internal
optimizations:
- - skip-unless-changed
- - "mobile/android/**/*.java"
optimization:
skip-unless-changed:
- "mobile/android/**/*.java"
- "mobile/android/**/Makefile.in"
- "mobile/android/config/**"
- "mobile/android/**/moz.build"

View File

@ -66,7 +66,7 @@ class Kind(object):
label=task_dict['label'],
attributes=task_dict['attributes'],
task=task_dict['task'],
optimizations=task_dict.get('optimizations'),
optimization=task_dict.get('optimization'),
dependencies=task_dict.get('dependencies'))
for task_dict in transforms(trans_config, inputs)]
return tasks

View File

@ -7,6 +7,7 @@ from __future__ import absolute_import, print_function, unicode_literals
import logging
import os
import requests
from collections import defaultdict
from .graph import Graph
from . import files_changed
@ -45,17 +46,13 @@ def optimize_task_graph(target_task_graph, params, do_not_optimize, existing_tas
def optimize_task(task, params):
"""
Optimize a single task by running its optimizations in order until one
succeeds.
Run the optimization for a given task
"""
for opt in task.optimizations:
opt_type, args = opt[0], opt[1:]
opt_fn = _optimizations[opt_type]
opt_result = opt_fn(task, params, *args)
if opt_result:
return opt_result
return False
if not task.optimization:
return False
opt_type, arg = task.optimization.items()[0]
opt_fn = _optimizations[opt_type]
return opt_fn(task, params, arg)
def annotate_task_graph(target_task_graph, params, do_not_optimize,
@ -71,6 +68,7 @@ def annotate_task_graph(target_task_graph, params, do_not_optimize,
# set .optimized for all tasks, and .task_id for optimized tasks
# with replacements
opt_counts = defaultdict(lambda: {'away': 0, 'replaced': 0})
for label in target_task_graph.graph.visit_postorder():
task = target_task_graph.tasks[label]
named_task_dependencies = named_links_dict.get(label, {})
@ -85,19 +83,23 @@ def annotate_task_graph(target_task_graph, params, do_not_optimize,
# if this task is blacklisted, don't even consider optimizing
replacement_task_id = None
opt_by = None
if label in do_not_optimize:
optimized = False
# Let's check whether this task has been created before
elif existing_tasks is not None and label in existing_tasks:
optimized = True
replacement_task_id = existing_tasks[label]
opt_by = "existing_tasks"
# otherwise, examine the task itself (which may be an expensive operation)
else:
opt_result = optimize_task(task, params)
# use opt_result to determine values for optimized, replacement_task_id
optimized = bool(opt_result)
replacement_task_id = opt_result if opt_result and opt_result is not True else None
if optimized:
opt_by = task.optimization.keys()[0]
replacement_task_id = opt_result if opt_result is not True else None
task.optimized = optimized
task.task_id = replacement_task_id
@ -106,14 +108,25 @@ def annotate_task_graph(target_task_graph, params, do_not_optimize,
if optimized:
if replacement_task_id:
opt_counts[opt_by]['replaced'] += 1
logger.debug("optimizing `{}`, replacing with task `{}`"
.format(label, replacement_task_id))
else:
opt_counts[opt_by]['away'] += 1
logger.debug("optimizing `{}` away".format(label))
# note: any dependent tasks will fail when they see this
for opt_by in sorted(opt_counts):
counts = opt_counts[opt_by]
if counts['away'] and not counts['replaced']:
msg = "optimized away {} tasks for {}: ".format(counts['away'], opt_by)
elif counts['replaced'] and not counts['away']:
msg = "optimized {} tasks, replacing with other tasks, for {}: ".format(
counts['away'], opt_by)
else:
if replacement_task_id:
raise Exception("{}: optimize_task returned False with a taskId".format(label))
msg = "optimized {} tasks for {}, replacing {} and optimizing {} away".format(
sum(counts.values()), opt_by, counts['replaced'], counts['away'])
logger.info(msg)
def get_subgraph(annotated_task_graph, named_links_dict, label_to_taskid):
@ -168,21 +181,22 @@ def optimization(name):
@optimization('index-search')
def opt_index_search(task, params, index_path):
try:
task_id = find_task_id(
index_path,
use_proxy=bool(os.environ.get('TASK_ID')))
return task_id or True
except requests.exceptions.HTTPError:
pass
def opt_index_search(task, params, index_paths):
for index_path in index_paths:
try:
task_id = find_task_id(
index_path,
use_proxy=bool(os.environ.get('TASK_ID')))
return task_id
except requests.exceptions.HTTPError:
# 404 will end up here and go on to the next index path
pass
return False
@optimization('seta')
def opt_seta(task, params):
def opt_seta(task, params, _):
bbb_task = False
# for bbb tasks we need to send in the buildbot buildername

View File

@ -13,7 +13,7 @@ class Task(object):
- label; the label for this task
- attributes: a dictionary of attributes for this task (used for filtering)
- task: the task definition (JSON-able dictionary)
- optimizations: optimizations to apply to the task (see taskgraph.optimize)
- optimization: optimization to apply to the task (see taskgraph.optimize)
- dependencies: tasks this one depends on, in the form {name: label}, for example
{'build': 'build-linux64/opt', 'docker-image': 'build-docker-image-desktop-test'}
@ -26,7 +26,7 @@ class Task(object):
display, comparison, serialization, etc. It has no functionality of its own.
"""
def __init__(self, kind, label, attributes, task,
optimizations=None, dependencies=None):
optimization=None, dependencies=None):
self.kind = kind
self.label = label
self.attributes = attributes
@ -37,7 +37,7 @@ class Task(object):
self.attributes['kind'] = kind
self.optimizations = optimizations or []
self.optimization = optimization
self.dependencies = dependencies or {}
def __eq__(self, other):
@ -46,12 +46,12 @@ class Task(object):
self.attributes == other.attributes and \
self.task == other.task and \
self.task_id == other.task_id and \
self.optimizations == other.optimizations and \
self.optimization == other.optimization and \
self.dependencies == other.dependencies
def __repr__(self):
return ('Task({kind!r}, {label!r}, {attributes!r}, {task!r}, '
'optimizations={optimizations!r}, '
'optimization={optimization!r}, '
'dependencies={dependencies!r})'.format(**self.__dict__))
def to_json(self):
@ -60,7 +60,7 @@ class Task(object):
'label': self.label,
'attributes': self.attributes,
'dependencies': self.dependencies,
'optimizations': self.optimizations,
'optimization': self.optimization,
'task': self.task,
}
if self.task_id:
@ -79,7 +79,7 @@ class Task(object):
label=task_dict['label'],
attributes=task_dict['attributes'],
task=task_dict['task'],
optimizations=task_dict['optimizations'],
optimization=task_dict['optimization'],
dependencies=task_dict.get('dependencies'))
if 'task_id' in task_dict:
rv.task_id = task_dict['task_id']

View File

@ -61,8 +61,8 @@ class TestOptimize(unittest.TestCase):
@classmethod
def setUpClass(cls):
# set up some simple optimization functions
optimization('no-optimize')(lambda self, params: False)
optimization('optimize-away')(lambda self, params: True)
optimization('no-optimize')(lambda self, params, arg: False)
optimization('optimize-away')(lambda self, params, arg: True)
optimization('optimize-to-task')(lambda self, params, task: task)
def make_task(self, label, optimization=None, task_def=None, optimized=None, task_id=None):
@ -70,9 +70,9 @@ class TestOptimize(unittest.TestCase):
task = Task(kind='test', label=label, attributes={}, task=task_def)
task.optimized = optimized
if optimization:
task.optimizations = [optimization]
task.optimization = optimization
else:
task.optimizations = []
task.optimization = None
task.task_id = task_id
return task
@ -92,9 +92,9 @@ class TestOptimize(unittest.TestCase):
def test_annotate_task_graph_no_optimize(self):
"annotating marks everything as un-optimized if the kind returns that"
graph = self.make_graph(
self.make_task('task1', ['no-optimize']),
self.make_task('task2', ['no-optimize']),
self.make_task('task3', ['no-optimize']),
self.make_task('task1', {'no-optimize': []}),
self.make_task('task2', {'no-optimize': []}),
self.make_task('task3', {'no-optimize': []}),
('task2', 'task1', 'build'),
('task2', 'task3', 'image'),
)
@ -109,8 +109,8 @@ class TestOptimize(unittest.TestCase):
def test_annotate_task_graph_optimize_away_dependency(self):
"raises exception if kind optimizes away a task on which another depends"
graph = self.make_graph(
self.make_task('task1', ['optimize-away']),
self.make_task('task2', ['no-optimize']),
self.make_task('task1', {'optimize-away': []}),
self.make_task('task2', {'no-optimize': []}),
('task2', 'task1', 'build'),
)
self.assertRaises(
@ -121,8 +121,8 @@ class TestOptimize(unittest.TestCase):
def test_annotate_task_graph_do_not_optimize(self):
"annotating marks everything as un-optimized if in do_not_optimize"
graph = self.make_graph(
self.make_task('task1', ['optimize-away']),
self.make_task('task2', ['optimize-away']),
self.make_task('task1', {'optimize-away': True}),
self.make_task('task2', {'optimize-away': True}),
('task2', 'task1', 'build'),
)
label_to_taskid = {}
@ -138,9 +138,9 @@ class TestOptimize(unittest.TestCase):
def test_annotate_task_graph_nos_do_not_propagate(self):
"a task with a non-optimized dependency can be optimized"
graph = self.make_graph(
self.make_task('task1', ['no-optimize']),
self.make_task('task2', ['optimize-to-task', 'taskid']),
self.make_task('task3', ['optimize-to-task', 'taskid']),
self.make_task('task1', {'no-optimize': []}),
self.make_task('task2', {'optimize-to-task': 'taskid'}),
self.make_task('task3', {'optimize-to-task': 'taskid'}),
('task2', 'task1', 'build'),
('task2', 'task3', 'image'),
)
@ -233,9 +233,9 @@ class TestOptimize(unittest.TestCase):
def test_optimize(self):
"optimize_task_graph annotates and extracts the subgraph from a simple graph"
input = self.make_graph(
self.make_task('task1', ['optimize-to-task', 'dep1']),
self.make_task('task2', ['no-optimize']),
self.make_task('task3', ['no-optimize']),
self.make_task('task1', {'optimize-to-task': 'dep1'}),
self.make_task('task2', {'no-optimize': []}),
self.make_task('task3', {'no-optimize': []}),
('task2', 'task1', 'build'),
('task2', 'task3', 'image'),
)

View File

@ -24,7 +24,7 @@ class TestTaskGraph(unittest.TestCase):
'b': Task(kind='test', label='b',
attributes={},
task={'task': 'def'},
optimizations=[['seta']],
optimization={'seta': None},
# note that this dep is ignored, superseded by that
# from the taskgraph's edges
dependencies={'first': 'a'}),
@ -41,7 +41,7 @@ class TestTaskGraph(unittest.TestCase):
'attributes': {'attr': 'a-task', 'kind': 'test'},
'task': {'taskdef': True},
'dependencies': {'edgelabel': 'b'},
'optimizations': [],
'optimization': None,
},
'b': {
'kind': 'test',
@ -49,7 +49,7 @@ class TestTaskGraph(unittest.TestCase):
'attributes': {'kind': 'test'},
'task': {'task': 'def'},
'dependencies': {},
'optimizations': [['seta']],
'optimization': {'seta': None},
}
})
@ -60,14 +60,14 @@ class TestTaskGraph(unittest.TestCase):
label='a',
attributes={},
dependencies={'prereq': 'b'}, # must match edges, below
optimizations=[['seta']],
optimization={'seta': None},
task={'task': 'def'}),
'b': Task(
kind='pre',
label='b',
attributes={},
dependencies={},
optimizations=[['seta']],
optimization={'seta': None},
task={'task': 'def2'}),
}, graph=Graph(nodes={'a', 'b'}, edges={('a', 'b', 'prereq')}))

View File

@ -61,9 +61,9 @@ def fill_template(config, tasks):
# up on level 3 at some point if most tasks use this as a common image
# for a given context hash, a worker within Taskcluster does not need to contain
# the same image per branch.
optimizations = [['index-search', '{}.level-{}.{}.hash.{}'.format(
INDEX_PREFIX, level, image_name, context_hash)]
for level in reversed(range(int(config.params['level']), 4))]
optimization = {'index-search': ['{}.level-{}.{}.hash.{}'.format(
INDEX_PREFIX, level, image_name, context_hash)
for level in reversed(range(int(config.params['level']), 4))]}
# Adjust the zstandard compression level based on the execution level.
# We use faster compression for level 1 because we care more about
@ -80,7 +80,7 @@ def fill_template(config, tasks):
'attributes': {'image_name': image_name},
'expires-after': '1 year',
'routes': routes,
'optimizations': optimizations,
'optimization': optimization,
'scopes': ['secrets:get:project/taskcluster/gecko/hgfingerprint'],
'treeherder': {
'symbol': job_symbol,

View File

@ -27,6 +27,7 @@ from voluptuous import (
Extra,
Optional,
Required,
Exclusive,
)
logger = logging.getLogger(__name__)
@ -59,13 +60,14 @@ job_description_schema = Schema({
Optional('index'): task_description_schema['index'],
Optional('run-on-projects'): task_description_schema['run-on-projects'],
Optional('coalesce'): task_description_schema['coalesce'],
Optional('optimizations'): task_description_schema['optimizations'],
Exclusive('optimization', 'optimization'): task_description_schema['optimization'],
Optional('needs-sccache'): task_description_schema['needs-sccache'],
# The "when" section contains descriptions of the circumstances
# under which this task should be included in the task graph. This
# will be converted into an element in the `optimizations` list.
Optional('when'): Any({
# The "when" section contains descriptions of the circumstances under which
# this task should be included in the task graph. This will be converted
# into an optimization, so it cannot be specified in a job description that
# also gives 'optimization'.
Exclusive('when', 'optimization'): Any({
# This task only needs to be run if a file matching one of the given
# patterns has changed in the push. The patterns use the mozpack
# match function (python/mozbuild/mozpack/path.py).
@ -103,11 +105,12 @@ def validate(config, jobs):
def rewrite_when_to_optimization(config, jobs):
for job in jobs:
when = job.pop('when', {})
files_changed = when.get('files-changed')
if not files_changed:
if not when:
yield job
continue
files_changed = when.get('files-changed')
# add some common files
files_changed.extend([
'{}/**'.format(config.path),
@ -118,7 +121,7 @@ def rewrite_when_to_optimization(config, jobs):
job['worker']['docker-image']['in-tree']))
# "only when files changed" implies "skip if files have not changed"
job.setdefault('optimizations', []).append(['skip-unless-changed', files_changed])
job['optimization'] = {'skip-unless-changed': files_changed}
assert 'when' not in job
yield job

View File

@ -53,7 +53,7 @@ toolchain_run_schema = Schema({
})
def add_optimizations(config, run, taskdesc):
def add_optimization(config, run, taskdesc):
files = list(run.get('resources', []))
# This file
files.append('taskcluster/taskgraph/transforms/job/toolchain.py')
@ -81,13 +81,13 @@ def add_optimizations(config, run, taskdesc):
'digest': digest,
}
optimizations = taskdesc.setdefault('optimizations', [])
# We'll try to find a cached version of the toolchain at levels above
# and including the current level, starting at the highest level.
index_routes = []
for level in reversed(range(int(config.params['level']), 4)):
subs['level'] = level
optimizations.append(['index-search', TOOLCHAIN_INDEX.format(**subs)])
index_routes.append(TOOLCHAIN_INDEX.format(**subs))
taskdesc['optimization'] = {'index-search': index_routes}
# ... and cache at the lowest level.
taskdesc.setdefault('routes', []).append(
@ -136,7 +136,7 @@ def docker_worker_toolchain(config, job, taskdesc):
if 'toolchain-alias' in run:
attributes['toolchain-alias'] = run['toolchain-alias']
add_optimizations(config, run, taskdesc)
add_optimization(config, run, taskdesc)
@run_job_using("generic-worker", "toolchain-script", schema=toolchain_run_schema)
@ -183,4 +183,4 @@ def windows_toolchain(config, job, taskdesc):
if 'toolchain-alias' in run:
attributes['toolchain-alias'] = run['toolchain-alias']
add_optimizations(config, run, taskdesc)
add_optimization(config, run, taskdesc)

View File

@ -167,17 +167,19 @@ task_description_schema = Schema({
'size': int,
},
# Optimizations to perform on this task during the optimization phase,
# specified in order. These optimizations are defined in
# taskcluster/taskgraph/optimize.py.
Optional('optimizations'): [Any(
# search the index for the given index namespace, and replace this task if found
['index-search', basestring],
# Optimization to perform on this task during the optimization phase.
# Optimizations are defined in taskcluster/taskgraph/optimize.py.
Required('optimization', default=None): Any(
# always run this task (default)
None,
# search the index for the given index namespaces, and replace this task if found
# the search occurs in order, with the first match winning
{'index-search': [basestring]},
# consult SETA and skip this task if it is low-value
['seta'],
{'seta': None},
# skip this task if none of the given file patterns match
['skip-unless-changed', [basestring]],
)],
{'skip-unless-changed': [basestring]},
),
# the provisioner-id/worker-type for the task. The following parameters will
# be substituted in this string:
@ -1232,7 +1234,7 @@ def build_task(config, tasks):
'task': task_def,
'dependencies': task.get('dependencies', {}),
'attributes': attributes,
'optimizations': task.get('optimizations', []),
'optimization': task.get('optimization', None),
}

View File

@ -901,7 +901,6 @@ def make_job_description(config, tests):
jobdesc['name'] = name
jobdesc['label'] = label
jobdesc['description'] = test['description']
jobdesc['when'] = test.get('when', {})
jobdesc['attributes'] = attributes
jobdesc['dependencies'] = {'build': build_label}
@ -931,9 +930,11 @@ def make_job_description(config, tests):
}
# run SETA unless this is a try push
jobdesc['optimizations'] = optimizations = []
if config.params['project'] != 'try':
optimizations.append(['seta'])
if config.params['project'] == 'try':
jobdesc['when'] = test.get('when', {})
else:
# when SETA is enabled, the 'when' does not apply (optimizations don't mix)
jobdesc['optimization'] = {'seta': None}
run = jobdesc['run'] = {}
run['using'] = 'mozharness-test'