Bug 1648723 - Use the util/hg.py module in the backstop module to avoid code duplication. r=ahal

Differential Revision: https://phabricator.services.mozilla.com/D85276
This commit is contained in:
Marco Castelluccio 2020-07-30 17:14:09 +00:00
parent f3f890a7de
commit f4e638b22d
2 changed files with 28 additions and 77 deletions

View File

@ -304,15 +304,29 @@ def test_bugbug_fallback(monkeypatch, responses, params):
assert not opt.should_remove_task(default_tasks[1], params, None)
def test_backstop(params):
def test_backstop(responses, params):
all_labels = {t.label for t in default_tasks}
opt = Backstop(10, 60, {'try'}) # every 10th push or 1 hour
responses.add(
responses.GET,
"https://hg.mozilla.org/integration/autoland/json-pushes/?version=2&startID=6&endID=7", # noqa
json={"pushes": {"7": {}}},
status=200,
)
# If there's no previous push date, run tasks
params['pushlog_id'] = 8
scheduled = {t.label for t in default_tasks if not opt.should_remove_task(t, params, None)}
assert scheduled == all_labels
responses.add(
responses.GET,
"https://hg.mozilla.org/integration/autoland/json-pushes/?version=2&startID=7&endID=8", # noqa
json={"pushes": {"8": {"date": params['pushdate']}}},
status=200,
)
# Only multiples of 10 schedule tasks. Pushdate from push 8 was cached.
params['pushlog_id'] = 9
params['pushdate'] += 3599
@ -324,6 +338,13 @@ def test_backstop(params):
scheduled = {t.label for t in default_tasks if not opt.should_remove_task(t, params, None)}
assert scheduled == all_labels
responses.add(
responses.GET,
"https://hg.mozilla.org/integration/autoland/json-pushes/?version=2&startID=9&endID=10", # noqa
json={"pushes": {"10": {"date": params['pushdate']}}},
status=200,
)
# Tasks are also scheduled if an hour has passed.
params['pushlog_id'] = 11
params['pushdate'] += 3600

View File

@ -5,22 +5,12 @@
from __future__ import absolute_import, print_function, unicode_literals
import logging
from collections import defaultdict
import requests
from mozbuild.util import memoize
from redo import retry
from taskgraph.util.hg import get_push_data
BACKSTOP_PUSH_INTERVAL = 10
BACKSTOP_TIME_INTERVAL = 60 # minutes
PUSH_ENDPOINT = (
"{head_repository}/json-pushes/?startID={push_id_start}&endID={push_id_end}"
)
# cached push dates by project
PUSH_DATES = defaultdict(dict)
# cached push_ids that failed to retrieve datetime for
FAILED_JSON_PUSH_CALLS = []
logger = logging.getLogger(__name__)
@ -60,79 +50,19 @@ def is_backstop(
return False
@memoize
def minutes_between_pushes(
time_interval, repository, project, cur_push_id, cur_push_date
):
def minutes_between_pushes(time_interval, repository, project, cur_push_id, cur_push_date):
# figure out the minutes that have elapsed between the current push and previous one
# defaulting to max min so if we can't get value, defaults to run the task
min_between_pushes = time_interval
prev_push_id = cur_push_id - 1
# cache the pushdate for the current push so we can use it next time
PUSH_DATES[project].update({cur_push_id: cur_push_date})
data = get_push_data(repository, project, prev_push_id, prev_push_id)
# check if we already have the previous push id's datetime cached
prev_push_date = PUSH_DATES[project].get(prev_push_id, 0)
# we have datetime of current and previous push, so return elapsed minutes and bail
if cur_push_date > 0 and prev_push_date > 0:
return (cur_push_date - prev_push_date) / 60
# datetime for previous pushid not cached, so must retrieve it
# if we already tried to retrieve the datetime for this pushid
# before and the json-push request failed, don't try it again
if prev_push_id in FAILED_JSON_PUSH_CALLS:
return min_between_pushes
url = PUSH_ENDPOINT.format(
head_repository=repository,
push_id_start=prev_push_id - 1,
push_id_end=prev_push_id,
)
try:
response = retry(
requests.get,
attempts=2,
sleeptime=10,
args=(url,),
kwargs={"timeout": 60, "headers": {"User-Agent": "TaskCluster"}},
)
prev_push_date = response.json().get(str(prev_push_id), {}).get("date", 0)
# cache it for next time
PUSH_DATES[project].update({prev_push_id: prev_push_date})
if data is not None:
prev_push_date = data[prev_push_id].get('date', 0)
# now have datetime of current and previous push
if cur_push_date > 0 and prev_push_date > 0:
min_between_pushes = (cur_push_date - prev_push_date) / 60
# In the event of request times out, requests will raise a TimeoutError.
except requests.exceptions.Timeout:
logger.warning("json-pushes timeout, enabling backstop")
FAILED_JSON_PUSH_CALLS.append(prev_push_id)
# In the event of a network problem (e.g. DNS failure, refused connection, etc),
# requests will raise a ConnectionError.
except requests.exceptions.ConnectionError:
logger.warning("json-pushes connection error, enabling backstop")
FAILED_JSON_PUSH_CALLS.append(prev_push_id)
# In the event of the rare invalid HTTP response(e.g 404, 401),
# requests will raise an HTTPError exception
except requests.exceptions.HTTPError:
logger.warning("Bad Http response, enabling backstop")
FAILED_JSON_PUSH_CALLS.append(prev_push_id)
# When we get invalid JSON (i.e. 500 error), it results in a ValueError (bug 1313426)
except ValueError as error:
logger.warning("Invalid JSON, possible server error: {}".format(error))
FAILED_JSON_PUSH_CALLS.append(prev_push_id)
# We just print the error out as a debug message if we failed to catch the exception above
except requests.exceptions.RequestException as error:
logger.warning(error)
FAILED_JSON_PUSH_CALLS.append(prev_push_id)
return min_between_pushes