Bug 1467359 - Implement fetch-url jobs using a transform; r=tomprince

Previously, the fetch kind was defined by a job "using" flavor.

An upcoming commit will need to derive new tasks from fetch job
definitions. In order to do this, we require a transform. And
this transform would need access to the original data from the
job description. But by the time the "using" transform runs, a
lot of this data is thrown away. It would be possible to stuff
the meaningful metadata inside attributes on the created task.
But this would result in the fetch logic being fragmented across
multiple Python modules (a fetch-specific transform and a job/using-
specific transform). I think it is better to keep all that logic
in a single Python module.

This commit converts the "using: fetch-url" job transform to a
dedicated transform for the fetch kind.

Since we're now using a dedicated transform, we no longer need
to use the normal job schema for defining fetch jobs. So we
refactor the schema a little so it is simpler.

I verified the taskgraph output is nearly identical to before by
diffing the JSON output of `mach taskgraph full`. Aside from the
additions of ['worker']['implementation'] and ['worker']['os']
fields (which seem to be required by a schema somewhere), everything
was identical.

Differential Revision: https://phabricator.services.mozilla.com/D1574

--HG--
rename : taskcluster/taskgraph/transforms/job/fetch.py => taskcluster/taskgraph/transforms/fetch.py
extra : rebase_source : 7186d4b8fe2c6a8dc7dfa7629dfb96ec670411e9
extra : amend_source : bce1f37233d7496841a8a7aa0eaf89542bbba610
This commit is contained in:
Gregory Szorc 2018-06-20 16:25:34 -07:00
parent 489f18fa4a
commit e00e84bea4
4 changed files with 258 additions and 225 deletions

View File

@ -5,20 +5,10 @@
loader: taskgraph.loader.transform:loader
transforms:
- taskgraph.transforms.fetch:transforms
- taskgraph.transforms.try_job:transforms
- taskgraph.transforms.job:transforms
- taskgraph.transforms.task:transforms
job-defaults:
run-on-projects: []
treeherder:
kind: build
platform: fetch/opt
tier: 1
worker-type: aws-provisioner-v1/gecko-{level}-images
worker:
docker-image: {in-tree: fetch}
max-run-time: 900
jobs-from:
- toolchains.yml

View File

@ -1,9 +1,8 @@
binutils-2.25.1:
description: binutils 2.25.1 source code
treeherder:
symbol: binutils2.25.1
run:
using: fetch-url
symbol: binutils2.25.1
fetch:
type: static-url
url: ftp://ftp.gnu.org/gnu/binutils/binutils-2.25.1.tar.bz2
sha256: b5b14added7d78a8d1ca70b5cb75fef57ce2197264f4f5835326b0df22ac9f22
size: 24163561
@ -13,10 +12,9 @@ binutils-2.25.1:
binutils-2.27:
description: binutils 2.27 source code
treeherder:
symbol: binutils2.27
run:
using: fetch-url
symbol: binutils2.27
fetch:
type: static-url
url: ftp://ftp.gnu.org/gnu/binutils/binutils-2.27.tar.bz2
sha256: 369737ce51587f92466041a97ab7d2358c6d9e1b6490b3940eb09fb0a9a6ac88
size: 26099568
@ -26,10 +24,9 @@ binutils-2.27:
binutils-2.28.1:
description: binutils 2.28.1 source code
treeherder:
symbol: binutils2.28.1
run:
using: fetch-url
symbol: binutils2.28.1
fetch:
type: static-url
url: ftp://ftp.gnu.org/gnu/binutils/binutils-2.28.1.tar.xz
sha256: 16328a906e55a3c633854beec8e9e255a639b366436470b4f6245eb0d2fde942
size: 19440112
@ -39,20 +36,18 @@ binutils-2.28.1:
cloog-0.18.1:
description: cloog source code
treeherder:
symbol: cloog0.18.1
run:
using: fetch-url
symbol: cloog0.18.1
fetch:
type: static-url
url: ftp://gcc.gnu.org/pub/gcc/infrastructure/cloog-0.18.1.tar.gz
sha256: 02500a4edd14875f94fe84cbeda4290425cb0c1c2474c6f75d75a303d64b4196
size: 3857324
gcc-4.9.4:
description: GCC 4.9.4 source code
treeherder:
symbol: gcc4.9.4
run:
using: fetch-url
symbol: gcc4.9.4
fetch:
type: static-url
url: ftp://ftp.gnu.org/gnu/gcc/gcc-4.9.4/gcc-4.9.4.tar.bz2
sha256: 6c11d292cd01b294f9f84c9a59c230d80e9e4a47e5c6355f046bb36d4f358092
size: 90097606
@ -62,10 +57,9 @@ gcc-4.9.4:
gcc-6.4.0:
description: GCC 6.4.0 source code
treeherder:
symbol: gcc6.4.0
run:
using: fetch-url
symbol: gcc6.4.0
fetch:
type: static-url
url: ftp://ftp.gnu.org/gnu/gcc/gcc-6.4.0/gcc-6.4.0.tar.xz
sha256: 850bf21eafdfe5cd5f6827148184c08c4a0852a37ccf36ce69855334d2c914d4
size: 76156220
@ -75,10 +69,9 @@ gcc-6.4.0:
gcc-7.3.0:
description: GCC 7.3.0 source code
treeherder:
symbol: gcc7.3.0
run:
using: fetch-url
symbol: gcc7.3.0
fetch:
type: static-url
url: ftp://ftp.gnu.org/gnu/gcc/gcc-7.3.0/gcc-7.3.0.tar.xz
sha256: 832ca6ae04636adbb430e865a1451adf6979ab44ca1c8374f61fba65645ce15c
size: 62462388
@ -88,10 +81,9 @@ gcc-7.3.0:
gmp-5.1.3:
description: GMP 5.1.3 source code
treeherder:
symbol: gmp5.1.3
run:
using: fetch-url
symbol: gmp5.1.3
fetch:
type: static-url
url: https://ftp.gnu.org/gnu/gmp/gmp-5.1.3.tar.bz2
sha256: 752079520b4690531171d0f4532e40f08600215feefede70b24fabdc6f1ab160
size: 2196480
@ -101,10 +93,9 @@ gmp-5.1.3:
gmp-6.1.0:
description: GMP 6.1.0 source code
treeherder:
symbol: gmp6.1.0
run:
using: fetch-url
symbol: gmp6.1.0
fetch:
type: static-url
url: https://ftp.gnu.org/gnu/gmp/gmp-6.1.0.tar.bz2
sha256: 498449a994efeba527885c10405993427995d3f86b8768d8cdf8d9dd7c6b73e8
size: 2383840
@ -114,40 +105,36 @@ gmp-6.1.0:
isl-0.12.2:
description: ISL 0.12.2 source code
treeherder:
symbol: isl0.12.2
run:
using: fetch-url
symbol: isl0.12.2
fetch:
type: static-url
url: ftp://gcc.gnu.org/pub/gcc/infrastructure/isl-0.12.2.tar.bz2
sha256: f4b3dbee9712850006e44f0db2103441ab3d13b406f77996d1df19ee89d11fb4
size: 1319434
isl-0.15:
description: ISL 0.15 source code
treeherder:
symbol: isl0.15
run:
using: fetch-url
symbol: isl0.15
fetch:
type: static-url
url: ftp://gcc.gnu.org/pub/gcc/infrastructure/isl-0.15.tar.bz2
sha256: 8ceebbf4d9a81afa2b4449113cee4b7cb14a687d7a549a963deb5e2a41458b6b
size: 1574964
isl-0.16.1:
description: ISL 0.16.1 source code
treeherder:
symbol: isl0.16.1
run:
using: fetch-url
symbol: isl0.16.1
fetch:
type: static-url
url: ftp://gcc.gnu.org/pub/gcc/infrastructure/isl-0.16.1.tar.bz2
sha256: 412538bb65c799ac98e17e8cfcdacbb257a57362acfaaff254b0fcae970126d2
size: 1626446
mpc-0.8.2:
description: mpc 0.8.2 source code
treeherder:
symbol: mpc0.8.2
run:
using: fetch-url
symbol: mpc0.8.2
fetch:
type: static-url
url: http://www.multiprecision.org/downloads/mpc-0.8.2.tar.gz
sha256: ae79f8d41d8a86456b68607e9ca398d00f8b7342d1d83bcf4428178ac45380c7
size: 548401
@ -157,10 +144,9 @@ mpc-0.8.2:
mpc-1.0.3:
description: mpc 1.0.3 source code
treeherder:
symbol: mpc1.0.3
run:
using: fetch-url
symbol: mpc1.0.3
fetch:
type: static-url
url: http://www.multiprecision.org/downloads/mpc-1.0.3.tar.gz
sha256: 617decc6ea09889fb08ede330917a00b16809b8db88c29c31bfbb49cbf88ecc3
size: 669925
@ -170,10 +156,9 @@ mpc-1.0.3:
mpfr-3.1.4:
description: mpfr 3.1.4 source code
treeherder:
symbol: mpfr3.1.4
run:
using: fetch-url
symbol: mpfr3.1.4
fetch:
type: static-url
url: http://www.mpfr.org/mpfr-3.1.4/mpfr-3.1.4.tar.bz2
sha256: d3103a80cdad2407ed581f3618c4bed04e0c92d1cf771a65ead662cc397f7775
size: 1279284
@ -183,10 +168,9 @@ mpfr-3.1.4:
mpfr-3.1.5:
description: mpfr 3.1.5 source code
treeherder:
symbol: mpfr3.1.5
run:
using: fetch-url
symbol: mpfr3.1.5
fetch:
type: static-url
url: http://www.mpfr.org/mpfr-3.1.5/mpfr-3.1.5.tar.bz2
sha256: ca498c1c7a74dd37a576f353312d1e68d490978de4395fa28f1cbd46a364e658
size: 1279489
@ -196,30 +180,27 @@ mpfr-3.1.5:
grcov-linux-x86_64:
description: grcov binary release
treeherder:
symbol: grcov-linux-x86_64
run:
using: fetch-url
symbol: grcov-linux-x86_64
fetch:
type: static-url
url: https://github.com/mozilla/grcov/releases/download/v0.2.3/grcov-linux-x86_64.tar.bz2
sha256: 5ed6ee891e0a1cbf1d84e27c615ec1f844c89f517f29cbec97379ded7c0894cd
size: 1858593
grcov-osx-x86_64:
description: grcov binary release
treeherder:
symbol: grcov-osx-x86_64
run:
using: fetch-url
symbol: grcov-osx-x86_64
fetch:
type: static-url
url: https://github.com/mozilla/grcov/releases/download/v0.2.3/grcov-osx-x86_64.tar.bz2
sha256: 3db4ef6c0dfaf35d39c9cacf19c60c482dae9413b5fe3bf343fa26667451c893
size: 919638
grcov-win-x86_64:
description: grcov binary release
treeherder:
symbol: grcov-win-x86_64
run:
using: fetch-url
symbol: grcov-win-x86_64
fetch:
type: static-url
url: https://github.com/mozilla/grcov/releases/download/v0.2.3/grcov-win-x86_64.tar.bz2
sha256: baede397959ec6f5e0c9630c8e9e1e59ebe723ec95acea9bb204511a886e0f2f
size: 1009309

View File

@ -0,0 +1,200 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
# Support for running tasks that download remote content and re-export
# it as task artifacts.
from __future__ import absolute_import, unicode_literals
import os
from voluptuous import (
Any,
Extra,
Optional,
Required,
)
import taskgraph
from .base import (
TransformSequence,
)
from ..util.cached_tasks import (
add_optimization,
)
from ..util.schema import (
Schema,
validate_schema,
)
CACHE_TYPE = 'content.v1'
transforms = TransformSequence()
FETCH_SCHEMA = Schema({
# Name of the task.
Required('name'): basestring,
# Relative path (from config.path) to the file the task was defined
# in.
Optional('job-from'): basestring,
# Description of the task.
Required('description'): basestring,
# Treeherder symbol.
Required('symbol'): basestring,
Required('fetch'): Any({
'type': 'static-url',
# The URL to download.
Required('url'): basestring,
# The SHA-256 of the downloaded content.
Required('sha256'): basestring,
# Size of the downloaded entity, in bytes.
Required('size'): int,
# GPG signature verification.
Optional('gpg-signature'): {
# URL where GPG signature document can be obtained. Can contain the
# value ``{url}``, which will be substituted with the value from
# ``url``.
Required('sig-url'): basestring,
# Path to file containing GPG public key(s) used to validate
# download.
Required('key-path'): basestring,
},
# The name to give to the generated artifact.
Optional('artifact-name'): basestring,
# IMPORTANT: when adding anything that changes the behavior of the task,
# it is important to update the digest data used to compute cache hits.
}),
})
@transforms.add
def validate(config, jobs):
for job in jobs:
validate_schema(
FETCH_SCHEMA, job,
'In fetch task {!r}:'.format(job.get('name', 'unknown')))
yield job
@transforms.add
def process_fetch_job(config, jobs):
# Converts fetch-url entries to the job schema.
for job in jobs:
typ = job['fetch']['type']
if typ == 'static-url':
yield create_fetch_url_task(config, job)
else:
# validate() should have caught this.
assert False
def make_base_task(config, name, description, command):
# Fetch tasks are idempotent and immutable. Have them live for
# essentially forever.
if config.params['level'] == '3':
expires = '1000 years'
else:
expires = '28 days'
return {
'attributes': {},
'name': name,
'description': description,
'expires-after': expires,
'label': 'fetch-%s' % name,
'run-on-projects': [],
'treeherder': {
'kind': 'build',
'platform': 'fetch/opt',
'tier': 1,
},
'run': {
'using': 'run-task',
'checkout': False,
'command': command,
},
'worker-type': 'aws-provisioner-v1/gecko-{level}-images',
'worker': {
'chain-of-trust': True,
'docker-image': {'in-tree': 'fetch'},
'env': {},
'max-run-time': 900,
},
}
def create_fetch_url_task(config, job):
name = job['name']
fetch = job['fetch']
artifact_name = fetch.get('artifact-name')
if not artifact_name:
artifact_name = fetch['url'].split('/')[-1]
args = [
'/builds/worker/bin/fetch-content', 'static-url',
'--sha256', fetch['sha256'],
'--size', '%d' % fetch['size'],
]
env = {}
if 'gpg-signature' in fetch:
sig_url = fetch['gpg-signature']['sig-url'].format(url=fetch['url'])
key_path = os.path.join(taskgraph.GECKO, fetch['gpg-signature'][
'key-path'])
with open(key_path, 'rb') as fh:
gpg_key = fh.read()
env['FETCH_GPG_KEY'] = gpg_key
args.extend([
'--gpg-sig-url', sig_url,
'--gpg-key-env', 'FETCH_GPG_KEY',
])
args.extend([
fetch['url'], '/builds/worker/artifacts/%s' % artifact_name,
])
task = make_base_task(config, name, job['description'], args)
task['treeherder']['symbol'] = job['symbol']
task['worker']['artifacts'] = [{
'type': 'directory',
'name': 'public',
'path': '/builds/worker/artifacts',
}]
task['worker']['env'] = env
task['attributes']['fetch-artifact'] = 'public/%s' % artifact_name
if not taskgraph.fast:
cache_name = task['label'].replace('{}-'.format(config.kind), '', 1)
# This adds the level to the index path automatically.
add_optimization(
config,
task,
cache_type=CACHE_TYPE,
cache_name=cache_name,
# We don't include the GPG signature in the digest because it isn't
# materially important for caching: GPG signatures are supplemental
# trust checking beyond what the shasum already provides.
digest_data=[fetch['sha256'], '%d' % fetch['size'], artifact_name],
)
return task

View File

@ -1,138 +0,0 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
# Support for running tasks that download remote content and re-export
# it as task artifacts.
from __future__ import absolute_import, unicode_literals
import os
from voluptuous import (
Optional,
Required,
)
import taskgraph
from . import (
run_job_using,
)
from ...util.cached_tasks import (
add_optimization,
)
from ...util.schema import (
Schema,
)
CACHE_TYPE = 'content.v1'
url_schema = Schema({
Required('using'): 'fetch-url',
# Base work directory used to set up the task.
Required('workdir'): basestring,
# The URL to download.
Required('url'): basestring,
# The SHA-256 of the downloaded content.
Required('sha256'): basestring,
# Size of the downloaded entity, in bytes.
Required('size'): int,
# GPG signature verification.
Optional('gpg-signature'): {
# URL where GPG signature document can be obtained. Can contain the
# value ``{url}``, which will be substituted with the value from
# ``url``.
Required('sig-url'): basestring,
# Path to file containing GPG public key(s) used to validate download.
Required('key-path'): basestring,
},
# The name to give to the generated artifact.
Optional('artifact-name'): basestring,
# IMPORTANT: when adding anything that changes the behavior of the task,
# it is important to update the digest data used to compute cache hits.
})
@run_job_using('docker-worker', 'fetch-url',
schema=url_schema)
def cache_url(config, job, taskdesc):
"""Configure a task to download a URL and expose it as an artifact."""
run = job['run']
worker = taskdesc['worker']
worker['chain-of-trust'] = True
# Fetch tasks are idempotent and immutable. Have them live for
# essentially forever.
if config.params['level'] == '3':
expires = '1000 years'
else:
expires = '28 days'
taskdesc['expires-after'] = expires
artifact_name = run.get('artifact-name')
if not artifact_name:
artifact_name = run['url'].split('/')[-1]
worker.setdefault('artifacts', []).append({
'type': 'directory',
'name': 'public',
'path': '/builds/worker/artifacts',
})
env = worker.setdefault('env', {})
args = [
'/builds/worker/bin/fetch-content', 'static-url',
'--sha256', run['sha256'],
'--size', '%d' % run['size'],
]
if 'gpg-signature' in run:
sig_url = run['gpg-signature']['sig-url'].format(url=run['url'])
key_path = os.path.join(taskgraph.GECKO, run['gpg-signature'][
'key-path'])
with open(key_path, 'rb') as fh:
gpg_key = fh.read()
env['FETCH_GPG_KEY'] = gpg_key
args.extend([
'--gpg-sig-url', sig_url,
'--gpg-key-env', 'FETCH_GPG_KEY',
])
args.extend([
run['url'], '/builds/worker/artifacts/%s' % artifact_name,
])
worker['command'] = ['/builds/worker/bin/run-task', '--'] + args
attributes = taskdesc.setdefault('attributes', {})
attributes['fetch-artifact'] = 'public/%s' % artifact_name
if not taskgraph.fast:
cache_name = taskdesc['label'].replace('{}-'.format(config.kind), '', 1)
# This adds the level to the index path automatically.
add_optimization(
config,
taskdesc,
cache_type=CACHE_TYPE,
cache_name=cache_name,
# We don't include the GPG signature in the digest because it isn't
# materially important for caching: GPG signatures are supplemental
# trust checking beyond what the shasum already provides.
digest_data=[run['sha256'], '%d' % run['size'], artifact_name],
)