Bug 1648591: Remove taskgraph based cron implementation; r=aki

Differential Revision: https://phabricator.services.mozilla.com/D81270
This commit is contained in:
Tom Prince 2020-07-07 16:04:00 +00:00
parent b8932efddc
commit 909e7275e8
9 changed files with 18 additions and 497 deletions

View File

@ -1,7 +1,6 @@
# Definitions for jobs that run periodically. For details on the format, see
# `taskcluster/taskgraph/cron/schema.py`. For documentation, see
# `taskcluster/docs/cron.rst`.
# `https://hg.mozilla.org/ci/ci-admin/file/tip/build-decision/src/build_decision/cron/schema.yml`.
# For documentation, see `taskcluster/docs/cron.rst`.
---
jobs:

View File

@ -1,6 +1,5 @@
# This file is rendered via JSON-e by
# - mozilla-taskcluster - See
# https://docs.taskcluster.net/reference/integrations/mozilla-taskcluster/docs/taskcluster-yml
# - hg-push - https://hg.mozilla.org/ci/ci-admin/file/tip/build-decision/src/build_decision/hg_push.py
# {
# tasks_for: 'hg-push',
# push: {owner, comment, pushlog_id, pushdate},
@ -10,7 +9,7 @@
# ownTaskId: // taskId of the task that will be created
# }
#
# - cron tasks - See taskcluster/taskgraph/cron/decision.py
# - cron tasks - https://hg.mozilla.org/ci/ci-admin/file/tip/build-decision/src/build_decision/cron/decision.py
# {
# tasks_for: 'cron',
# push: {revision, pushlog_id, pushdate, owner}

View File

@ -11,8 +11,8 @@ In the root of the Gecko directory, you will find `.cron.yml`. This defines
the periodic tasks ("cron jobs") run for Gecko. Each specifies a name, what to
do, and some parameters to determine when the cron job should occur.
See ``taskcluster/taskgraph/cron/schema.py`` for details on the format and
meaning of this file.
See `the scema <https://hg.mozilla.org/ci/ci-admin/file/tip/build-decision/src/build_decision/cron/schema.yml>`_
for details on the format and meaning of this file.
How It Works
------------
@ -20,12 +20,13 @@ How It Works
The `TaskCluster Hooks Service <https://firefox-ci-tc.services.mozilla.com/hooks>`_
has a hook configured for each repository supporting periodic task graphs. The
hook runs every 15 minutes, and the resulting task is referred to as a "cron task".
That cron task runs `./mach taskgraph cron` in a checkout of the Gecko source
tree.
That cron task runs the `build-decision
<https://hg.mozilla.org/ci/ci-admin/file/tip/build-decision>`_ image in a
checkout of the Gecko source tree.
The mach subcommand reads ``.cron.yml``, then consults the current time
(actually the time the cron task was created, rounded down to the nearest 15
minutes) and creates tasks for any cron jobs scheduled at that time.
The task reads ``.cron.yml``, then consults the current time (actually the time
the cron task was created, rounded down to the nearest 15 minutes) and creates
tasks for any cron jobs scheduled at that time.
Each cron job in ``.cron.yml`` specifies a ``job.type``, corresponding to a
function responsible for creating TaskCluster tasks when the job runs.

View File

@ -208,43 +208,13 @@ class MachCommands(MachCommandBase):
sys.exit(1)
@SubCommand('taskgraph', 'cron',
description="Run the cron task")
@CommandArgument('--base-repository',
required=False,
help='(ignored)')
@CommandArgument('--head-repository',
required=True,
help='URL for "head" repository to fetch')
@CommandArgument('--head-ref',
required=False,
help='(ignored)')
@CommandArgument('--project',
required=True,
help='Project to use for creating tasks. Example: --project=mozilla-central')
@CommandArgument('--level',
required=True,
help='SCM level of this repository')
@CommandArgument('--force-run',
required=False,
help='If given, force this cronjob to run regardless of time, '
'and run no others')
@CommandArgument('--no-create',
required=False,
action='store_true',
help='Do not actually create tasks')
@CommandArgument('--root', '-r',
required=False,
help="root of the repository to get cron task definitions from")
description="Provide a pointer to the new `.cron.yml` handler.")
def taskgraph_cron(self, **options):
"""Run the cron task; this task creates zero or more decision tasks. It is run
from the hooks service on a regular basis."""
import taskgraph.cron
try:
self.setup_logging()
return taskgraph.cron.taskgraph_cron(options)
except Exception:
traceback.print_exc()
sys.exit(1)
print(
'Handling of ".cron.yml" files has move to '
"https://hg.mozilla.org/ci/ci-admin/file/tip/build-decision."
)
sys.exit(1)
@SubCommand('taskgraph', 'action-callback',
description='Run action callback used by action tasks')

View File

@ -1,157 +0,0 @@
# -*- coding: utf-8 -*-
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from __future__ import absolute_import, print_function, unicode_literals
import datetime
import json
import logging
import os
import traceback
from . import decision, schema
from .util import match_utc
from ..create import create_task
from .. import GECKO
from taskgraph.util.attributes import match_run_on_projects
from taskgraph.util.hg import calculate_head_rev
from taskgraph.util.schema import resolve_keyed_by
from taskgraph.util.taskcluster import get_session
from taskgraph.util.yaml import load_yaml
# Functions to handle each `job.type` in `.cron.yml`. These are called with
# the contents of the `job` property from `.cron.yml` and should return a
# sequence of (taskId, task) tuples which will subsequently be fed to
# createTask.
JOB_TYPES = {
'decision-task': decision.run_decision_task,
}
logger = logging.getLogger(__name__)
def load_jobs(params, root):
cron_yml = load_yaml(root, '.cron.yml')
schema.validate(cron_yml)
# resolve keyed_by fields in each job
jobs = cron_yml['jobs']
return {j['name']: j for j in jobs}
def should_run(job, params):
run_on_projects = job.get('run-on-projects', ['all'])
if not match_run_on_projects(params['project'], run_on_projects):
return False
# Resolve when key here, so we don't require it before we know that we
# actually want to run on this branch.
resolve_keyed_by(job, 'when', 'Cron job ' + job['name'],
project=params['project'])
if not any(match_utc(params, sched=sched) for sched in job.get('when', [])):
return False
return True
def run_job(job_name, job, params, root):
params = params.copy()
params['job_name'] = job_name
try:
job_type = job['job']['type']
if job_type in JOB_TYPES:
tasks = JOB_TYPES[job_type](job['job'], params, root=root)
else:
raise Exception("job type {} not recognized".format(job_type))
if params['no_create']:
for task_id, task in tasks:
logger.info("Not creating task {} (--no-create):\n".format(task_id) +
json.dumps(task, sort_keys=True, indent=4, separators=(',', ': ')))
else:
for task_id, task in tasks:
create_task(get_session(), task_id, job_name, task)
except Exception:
# report the exception, but don't fail the whole cron task, as that
# would leave other jobs un-run. NOTE: we could report job failure to
# a responsible person here via tc-notify
traceback.print_exc()
logger.error("cron job {} run failed; continuing to next job".format(
params['job_name']))
def calculate_time(options):
if 'TASK_ID' not in os.environ:
# running in a development environment, so look for CRON_TIME or use
# the current time
if 'CRON_TIME' in os.environ:
logger.warning("setting params['time'] based on $CRON_TIME")
time = datetime.datetime.utcfromtimestamp(
int(os.environ['CRON_TIME']))
print(time)
else:
logger.warning("using current time for params['time']; try setting $CRON_TIME "
"to a timestamp")
time = datetime.datetime.utcnow()
else:
# fetch this task from the queue
res = get_session().get(
'http://taskcluster/queue/v1/task/' + os.environ['TASK_ID'])
if res.status_code != 200:
try:
logger.error(res.json()['message'])
except Exception:
logger.error(res.text)
res.raise_for_status()
# the task's `created` time is close to when the hook ran, although that
# may be some time ago if task execution was delayed
created = res.json()['created']
time = datetime.datetime.strptime(created, '%Y-%m-%dT%H:%M:%S.%fZ')
# round down to the nearest 15m
minute = time.minute - (time.minute % 15)
time = time.replace(minute=minute, second=0, microsecond=0)
logger.info("calculated cron schedule time is {}".format(time))
return time
def taskgraph_cron(options):
root = options.get('root') or GECKO
params = {
# repositories
'repository_url': options['head_repository'],
# *calculated* head_rev; this is based on the current meaning of this
# reference in the working copy
'head_rev': calculate_head_rev(root),
# the project (short name for the repository) and its SCM level
'project': options['project'],
'level': options['level'],
# if true, tasks will not actually be created
'no_create': options['no_create'],
# the time that this cron task was created (as a UTC datetime object)
'time': calculate_time(options),
}
jobs = load_jobs(params, root=root)
if options['force_run']:
job_name = options['force_run']
logger.info("force-running cron job {}".format(job_name))
run_job(job_name, jobs[job_name], params, root)
return
for job_name, job in sorted(jobs.items()):
if should_run(job, params):
logger.info("running cron job {}".format(job_name))
run_job(job_name, job, params, root)
else:
logger.info("not running cron job {}".format(job_name))

View File

@ -1,84 +0,0 @@
# -*- coding: utf-8 -*-
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from __future__ import absolute_import, print_function, unicode_literals
import jsone
import pipes
import os
import slugid
from taskgraph.util.time import current_json_time
from taskgraph.util.hg import find_hg_revision_push_info
from taskgraph.util.yaml import load_yaml
def run_decision_task(job, params, root):
arguments = []
if 'target-tasks-method' in job:
arguments.append('--target-tasks-method={}'.format(job['target-tasks-method']))
if job.get('optimize-target-tasks') is not None:
arguments.append('--optimize-target-tasks={}'.format(
str(job['optimize-target-tasks']).lower(),
))
if 'include-push-tasks' in job:
arguments.append('--include-push-tasks')
if 'rebuild-kinds' in job:
for kind in job['rebuild-kinds']:
arguments.append('--rebuild-kind={}'.format(kind))
return [
make_decision_task(
params,
symbol=job['treeherder-symbol'],
arguments=arguments,
root=root),
]
def make_decision_task(params, root, symbol, arguments=[]):
"""Generate a basic decision task, based on the root .taskcluster.yml"""
taskcluster_yml = load_yaml(root, '.taskcluster.yml')
push_info = find_hg_revision_push_info(
params['repository_url'],
params['head_rev'])
# provide a similar JSON-e context to what mozilla-taskcluster provides:
# https://docs.taskcluster.net/reference/integrations/mozilla-taskcluster/docs/taskcluster-yml
# but with a different tasks_for and an extra `cron` section
context = {
'tasks_for': 'cron',
'repository': {
'url': params['repository_url'],
'project': params['project'],
'level': params['level'],
},
'push': {
'revision': params['head_rev'],
# remainder are fake values, but the decision task expects them anyway
'pushlog_id': push_info['pushid'],
'pushdate': push_info['pushdate'],
'owner': 'cron',
},
'cron': {
'task_id': os.environ.get('TASK_ID', '<cron task id>'),
'job_name': params['job_name'],
'job_symbol': symbol,
# args are shell-quoted since they are given to `bash -c`
'quoted_args': ' '.join(pipes.quote(a) for a in arguments),
},
'now': current_json_time(),
'ownTaskId': slugid.nice(),
}
rendered = jsone.render(taskcluster_yml, context)
if len(rendered['tasks']) != 1:
raise Exception("Expected .taskcluster.yml to only produce one cron task")
task = rendered['tasks'][0]
task_id = task.pop('taskId')
return (task_id, task)

View File

@ -1,89 +0,0 @@
# -*- coding: utf-8 -*-
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from __future__ import absolute_import, print_function, unicode_literals
from six import text_type
from voluptuous import Any, Required, All, Optional
from taskgraph.util.schema import (
optionally_keyed_by,
validate_schema,
Schema,
)
def even_15_minutes(minutes):
if minutes % 15 != 0:
raise ValueError("minutes must be evenly divisible by 15")
cron_yml_schema = Schema({
'jobs': [{
# Name of the crontask (must be unique)
Required('name'): text_type,
# what to run
# Description of the job to run, keyed by 'type'
Required('job'): {
Required('type'): 'decision-task',
# Treeherder symbol for the cron task
Required('treeherder-symbol'): text_type,
# --target-tasks-method './mach taskgraph decision' argument
Required('target-tasks-method'): text_type,
Optional(
'optimize-target-tasks',
description='If specified, this indicates whether the target '
'tasks are eligible for optimization. Otherwise, '
'the default for the project is used.',
): bool,
Optional(
'include-push-tasks',
description='Whether tasks from the on-push graph should be re-used '
'in the cron graph.',
): bool,
Optional(
'rebuild-kinds',
description='Kinds that should not be re-used from the on-push graph.',
): [text_type],
},
# when to run it
# Optional set of projects on which this job should run; if omitted, this will
# run on all projects for which cron tasks are set up. This works just like the
# `run_on_projects` attribute, where strings like "release" and "integration" are
# expanded to cover multiple repositories. (taskcluster/docs/attributes.rst)
'run-on-projects': [text_type],
# Array of times at which this task should run. These *must* be a
# multiple of 15 minutes, the minimum scheduling interval. This field
# can be keyed by project so that each project has a different schedule
# for the same job.
'when': optionally_keyed_by(
'project',
[
{
'hour': int,
'minute': All(int, even_15_minutes),
# You probably don't want both day and weekday.
'day': int, # Day of the month, as used by datetime.
'weekday': Any('Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday',
'Saturday', 'Sunday')
}
]
),
}],
})
def validate(cron_yml):
validate_schema(cron_yml_schema, cron_yml, "Invalid .cron.yml:")

View File

@ -1,40 +0,0 @@
# -*- coding: utf-8 -*-
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from __future__ import absolute_import, print_function, unicode_literals
from six import string_types
def match_utc(params, sched):
"""Return True if params['time'] matches the given schedule.
If minute is not specified, then every multiple of fifteen minutes will match.
Times not an even multiple of fifteen minutes will result in an exception
(since they would never run).
If hour is not specified, any hour will match. Similar for day and weekday.
"""
if sched.get('minute') and sched.get('minute') % 15 != 0:
raise Exception("cron jobs only run on multiples of 15 minutes past the hour")
if sched.get('minute') is not None and sched.get('minute') != params['time'].minute:
return False
if sched.get('hour') is not None and sched.get('hour') != params['time'].hour:
return False
if sched.get('day') is not None and sched.get('day') != params['time'].day:
return False
if isinstance(sched.get('weekday'), string_types):
if sched['weekday'].lower() != params['time'].strftime('%A').lower():
return False
elif sched.get('weekday') is not None:
# don't accept other values.
return False
return True

View File

@ -1,78 +0,0 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from __future__ import absolute_import, unicode_literals
import datetime
import unittest
from mozunit import main
from taskgraph.cron.util import (
match_utc,
)
class TestMatchUtc(unittest.TestCase):
def test_hour_minute(self):
params = {'time': datetime.datetime(2017, 1, 26, 16, 30, 0)}
self.assertFalse(match_utc(params, {'hour': 4, 'minute': 30}))
self.assertTrue(match_utc(params, {'hour': 16, 'minute': 30}))
self.assertFalse(match_utc(params, {'hour': 16, 'minute': 0}))
def test_hour_only(self):
params = {'time': datetime.datetime(2017, 1, 26, 16, 0, 0)}
self.assertFalse(match_utc(params, {'hour': 0}))
self.assertFalse(match_utc(params, {'hour': 4}))
self.assertTrue(match_utc(params, {'hour': 16}))
params = {'time': datetime.datetime(2017, 1, 26, 16, 15, 0)}
self.assertFalse(match_utc(params, {'hour': 0}))
self.assertFalse(match_utc(params, {'hour': 4}))
self.assertTrue(match_utc(params, {'hour': 16}))
params = {'time': datetime.datetime(2017, 1, 26, 16, 30, 0)}
self.assertFalse(match_utc(params, {'hour': 0}))
self.assertFalse(match_utc(params, {'hour': 4}))
self.assertTrue(match_utc(params, {'hour': 16}))
params = {'time': datetime.datetime(2017, 1, 26, 16, 45, 0)}
self.assertFalse(match_utc(params, {'hour': 0}))
self.assertFalse(match_utc(params, {'hour': 4}))
self.assertTrue(match_utc(params, {'hour': 16}))
def test_minute_only(self):
params = {'time': datetime.datetime(2017, 1, 26, 13, 0, 0)}
self.assertTrue(match_utc(params, {'minute': 0}))
self.assertFalse(match_utc(params, {'minute': 15}))
self.assertFalse(match_utc(params, {'minute': 30}))
self.assertFalse(match_utc(params, {'minute': 45}))
def test_zeroes(self):
params = {'time': datetime.datetime(2017, 1, 26, 0, 0, 0)}
self.assertTrue(match_utc(params, {'minute': 0}))
self.assertTrue(match_utc(params, {'hour': 0}))
self.assertFalse(match_utc(params, {'hour': 1}))
self.assertFalse(match_utc(params, {'minute': 15}))
self.assertFalse(match_utc(params, {'minute': 30}))
self.assertFalse(match_utc(params, {'minute': 45}))
def test_invalid_minute(self):
params = {'time': datetime.datetime(2017, 1, 26, 13, 0, 0)}
self.assertRaises(Exception, lambda:
match_utc(params, {'minute': 1}))
def test_day_hour_minute(self):
params = {'time': datetime.datetime(2017, 1, 26, 16, 30, 0)}
self.assertFalse(match_utc(params, {'day': 25, 'hour': 16, 'minute': 30}))
self.assertTrue(match_utc(params, {'day': 26, 'hour': 16, 'minute': 30}))
self.assertFalse(match_utc(params, {'day': 26, 'hour': 16, 'minute': 0}))
def test_weekday_hour_minute(self):
params = {'time': datetime.datetime(2017, 1, 26, 16, 30, 0)}
self.assertFalse(match_utc(params, {'weekday': 'Wednesday', 'hour': 16, 'minute': 30}))
self.assertTrue(match_utc(params, {'weekday': 'Thursday', 'hour': 16, 'minute': 30}))
self.assertFalse(match_utc(params, {'weekday': 'Thursday', 'hour': 16, 'minute': 0}))
if __name__ == '__main__':
main()