Bug 1422133 - Generate runnable-jobs.json.gz file as part of the Gecko decision task run. r=dustin

The runnable jobs API on Treeherder is timing out quite frequently.
This is due that it downloads the full-task-graph.json artifact from the Gecko decision task.

This file has grown to be massive (over 30MB) and that takes too long to fetch. The API times out after 20 seconds.

The data we need from the artifact is minimal. We can generate a file with just the minimum amount of data needed.

This code change adds logic to generate a runnable-jobs.json.gz file after the full-task-graph.json artifact is generated.

MozReview-Commit-ID: 9u2H7HbUAcl

--HG--
extra : rebase_source : f450527e7dd565e29cfe2e04dc52761e77827cc0
This commit is contained in:
Armen Zambrano G. 2017-11-30 16:05:53 -05:00
parent f99309301e
commit a97dfc7c07
4 changed files with 120 additions and 3 deletions

View File

@ -145,9 +145,16 @@ So for instance, if you had already requested a build task in the ``try`` comman
and you wish to add a test which depends on this build, the original build task
is re-used.
Action Tasks are currently scheduled by
[pulse_actions](https://github.com/mozilla/pulse_actions). This feature is only
present on ``try`` pushes for now.
Runnable jobs
-------------
As part of the execution of the Gecko decision task we generate a
``public/runnable-jobs.json.gz`` file. It contains a subset of all the data
contained within the ``full-task-graph.json``.
This file has the minimum ammount of data needed by Treeherder to show all
tasks that can be scheduled on a push.
Task Parameterization
---------------------

View File

@ -89,6 +89,25 @@ PER_PROJECT_PARAMETERS = {
}
def full_task_graph_to_runnable_jobs(full_task_json):
runnable_jobs = {}
for label, node in full_task_json.iteritems():
if not ('extra' in node['task'] and 'treeherder' in node['task']['extra']):
continue
th = node['task']['extra']['treeherder']
runnable_jobs[label] = {
'symbol': th['symbol']
}
for i in ('groupName', 'groupSymbol', 'collection'):
if i in th:
runnable_jobs[label][i] = th[i]
if th.get('machine', {}).get('platform'):
runnable_jobs[label]['platform'] = th['machine']['platform']
return runnable_jobs
def taskgraph_decision(options, parameters=None):
"""
Run the decision task. This function implements `mach taskgraph decision`,
@ -118,6 +137,9 @@ def taskgraph_decision(options, parameters=None):
full_task_json = tgg.full_task_graph.to_json()
write_artifact('full-task-graph.json', full_task_json)
# write out the public/runnable-jobs.json.gz file
write_artifact('runnable-jobs.json.gz', full_task_graph_to_runnable_jobs(full_task_json))
# this is just a test to check whether the from_json() function is working
_, _ = TaskGraph.from_json(full_task_json)
@ -259,5 +281,9 @@ def write_artifact(filename, data):
elif filename.endswith('.json'):
with open(path, 'w') as f:
json.dump(data, f, sort_keys=True, indent=2, separators=(',', ': '))
elif filename.endswith('.gz'):
import gzip
with gzip.open(path, 'wb') as f:
f.write(json.dumps(data))
else:
raise TypeError("Don't know how to write to {}".format(filename))

View File

@ -18,6 +18,7 @@ subsuite = taskgraph
[test_util_docker.py]
[test_util_parameterization.py]
[test_util_python_path.py]
[test_util_runnable_jobs.py]
[test_util_schema.py]
[test_util_templates.py]
[test_util_time.py]

View File

@ -0,0 +1,83 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from __future__ import absolute_import
import unittest
from taskgraph.decision import full_task_graph_to_runnable_jobs
from taskgraph.graph import Graph
from taskgraph.taskgraph import TaskGraph
from taskgraph.task import Task
from mozunit import main
class TestRunnableJobs(unittest.TestCase):
tasks = [
{
'kind': 'build',
'label': 'a',
'attributes': {},
'task': {
'extra': {
'treeherder': {
'symbol': 'B'
}
},
}
},
{
'kind': 'test',
'label': 'b',
'attributes': {},
'task': {
'extra': {
'treeherder': {
'collection': {
'opt': True
},
'groupName': 'Some group',
'groupSymbol': 'GS',
'machine': {
'platform': 'linux64'
},
'symbol': 't'
}
},
}
},
]
def make_taskgraph(self, tasks):
label_to_taskid = {k: k + '-tid' for k in tasks}
for label, task_id in label_to_taskid.iteritems():
tasks[label].task_id = task_id
graph = Graph(nodes=set(tasks), edges=set())
taskgraph = TaskGraph(tasks, graph)
return taskgraph, label_to_taskid
def test_taskgraph_to_runnable_jobs(self):
tg, label_to_taskid = self.make_taskgraph({
t['label']: Task(**t) for t in self.tasks[:]
})
res = full_task_graph_to_runnable_jobs(tg.to_json())
self.assertEqual(res, {
'a': {
'symbol': 'B'
},
'b': {
'collection': {'opt': True},
'groupName': 'Some group',
'groupSymbol': 'GS',
'symbol': 't',
'platform': 'linux64'
}
})
if __name__ == '__main__':
main()