Bug 1415619: Factor out toolchain cache index calculations. r=dustin,mshal

MozReview-Commit-ID: KSvwr94j5QB --HG-- extra : rebase_source : 2a246ef983069420f54d400fc48eb3b3cfad2886
2025-03-02 22:37:50 +00:00 · 2017-11-06 13:37:00 -07:00 · 2017-11-06 13:37:00 -07:00 · 74ceb1d825
commit 74ceb1d825
parent f6119599d6
2 changed files with 73 additions and 24 deletions
--- a/taskcluster/taskgraph/transforms/job/toolchain.py
+++ b/taskcluster/taskgraph/transforms/job/toolchain.py
@ -7,8 +7,6 @@ Support for running toolchain-building jobs via dedicated scripts

 from __future__ import absolute_import, print_function, unicode_literals

-import hashlib
-
 from mozbuild.shellutil import quote as shell_quote

 from taskgraph.util.schema import Schema
@ -23,9 +21,10 @@ from taskgraph.transforms.job.common import (
 )
 from taskgraph.util.hash import hash_paths
 from taskgraph import GECKO
+from taskgraph.util.cached_tasks import add_optimization


-TOOLCHAIN_INDEX = 'gecko.cache.level-{level}.toolchains.v1.{name}.{digest}'
+CACHE_TYPE = 'toolchains.v1'

 toolchain_run_schema = Schema({
    Required('using'): 'toolchain-script',
@ -66,7 +65,7 @@ toolchain_run_schema = Schema({
 })


-def add_optimization(config, run, taskdesc):
+def get_digest_data(config, run, taskdesc):
    files = list(run.get('resources', []))
    # This file
    files.append('taskcluster/taskgraph/transforms/job/toolchain.py')
@ -92,24 +91,7 @@ def add_optimization(config, run, taskdesc):
    args = run.get('arguments')
    if args:
        data.extend(args)
-
-    label = taskdesc['label']
-    subs = {
-        'name': label.replace('%s-' % config.kind, ''),
-        'digest': hashlib.sha256('\n'.join(data)).hexdigest()
-    }
-
-    # We'll try to find a cached version of the toolchain at levels above
-    # and including the current level, starting at the highest level.
-    index_routes = []
-    for level in reversed(range(int(config.params['level']), 4)):
-        subs['level'] = level
-        index_routes.append(TOOLCHAIN_INDEX.format(**subs))
-    taskdesc['optimization'] = {'index-search': index_routes}
-
-    # ... and cache at the lowest level.
-    taskdesc.setdefault('routes', []).append(
-        'index.{}'.format(TOOLCHAIN_INDEX.format(**subs)))
+    return data


@run_job_using("docker-worker", "toolchain-script", schema=toolchain_run_schema)
@ -173,7 +155,13 @@ def docker_worker_toolchain(config, job, taskdesc):
    if 'toolchain-alias' in run:
        attributes['toolchain-alias'] = run['toolchain-alias']

-    add_optimization(config, run, taskdesc)
+    name = taskdesc['label'].replace('{}-'.format(config.kind), '', 1)
+    add_optimization(
+        config, taskdesc,
+        cache_type=CACHE_TYPE,
+        cache_name=name,
+        digest_data=get_digest_data(config, run, taskdesc),
+    )


@run_job_using("generic-worker", "toolchain-script", schema=toolchain_run_schema)
@ -229,4 +217,10 @@ def windows_toolchain(config, job, taskdesc):
    if 'toolchain-alias' in run:
        attributes['toolchain-alias'] = run['toolchain-alias']

-    add_optimization(config, run, taskdesc)
+    name = taskdesc['label'].replace('{}-'.format(config.kind), '', 1)
+    add_optimization(
+        config, taskdesc,
+        cache_type=CACHE_TYPE,
+        cache_name=name,
+        digest_data=get_digest_data(config, run, taskdesc),
+    )
--- a/taskcluster/taskgraph/util/cached_tasks.py
+++ b/taskcluster/taskgraph/util/cached_tasks.py
@ -0,0 +1,55 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+import hashlib
+
+
+TARGET_CACHE_INDEX = (
+    'gecko.cache.level-{level}.{type}.{name}.{digest}'
+)
+
+
+def add_optimization(config, taskdesc, cache_type, cache_name, digest=None, digest_data=None):
+    """
+    Allow the results of this task to be cached. This adds index routes to the
+    task so it can be looked up for future runs, and optimization hints so that
+    cached artifacts can be found. Exactly one of `digest` and `digest_data`
+    must be passed.
+
+    :param TransformConfig config: The configuration for the kind being transformed.
+    :param dict taskdesc: The description of the current task.
+    :param str cache_type: The type of task result being cached.
+    :param str cache_name: The name of the object being cached.
+    :param digest: A unique string indentifying this version of the artifacts
+        being generated. Typically this will be the hash of inputs to the task.
+    :type digest: bytes or None
+    :param digest_data: A list of bytes representing the inputs of this task.
+        They will be concatenated and hashed to create the digest for this
+        task.
+    :type digest_data: list of bytes or None
+    """
+    if (digest is None) == (digest_data is None):
+        raise Exception("Must pass exactly one of `digest` and `digest_data`.")
+    if digest is None:
+        digest = hashlib.sha256('\n'.join(digest_data)).hexdigest()
+
+    subs = {
+        'type': cache_type,
+        'name': cache_name,
+        'digest': digest,
+    }
+
+    # We'll try to find a cached version of the toolchain at levels above
+    # and including the current level, starting at the highest level.
+    index_routes = []
+    for level in reversed(range(int(config.params['level']), 4)):
+        subs['level'] = level
+        index_routes.append(TARGET_CACHE_INDEX.format(**subs))
+    taskdesc['optimization'] = {'index-search': index_routes}
+
+    # ... and cache at the lowest level.
+    taskdesc.setdefault('routes', []).append(
+        'index.{}'.format(TARGET_CACHE_INDEX.format(**subs)))