Bug 1633866 - [taskgraph.optimize] Move logic to query 'bugbug' service to a utility file, r=marco

With dynamic-test-selection, we'll also need to query the bugbug service from the transforms. Let's move the querying logic to a utility file to share it more easily. Differential Revision: https://phabricator.services.mozilla.com/D73088
2024-11-29 07:42:04 +00:00 · 2020-04-30 13:53:01 +00:00 · 2020-04-30 13:53:01 +00:00 · 984515c753
commit 984515c753
parent 49a25c7035
3 changed files with 84 additions and 58 deletions
--- a/taskcluster/taskgraph/optimize/bugbug.py
+++ b/taskcluster/taskgraph/optimize/bugbug.py
@ -4,28 +4,17 @@

 from __future__ import absolute_import, print_function, unicode_literals

-import json
-import logging
-import time
 from collections import defaultdict

-import requests
-from mozbuild.util import memoize, memoized_property
 from six.moves.urllib.parse import urlsplit

 from taskgraph.optimize import register_strategy, OptimizationStrategy
-from taskgraph.util.taskcluster import requests_retry_session
-
-logger = logging.getLogger(__name__)
-
-# Preset confidence thresholds.
-CT_LOW = 0.5
-CT_MEDIUM = 0.7
-CT_HIGH = 0.9
-
-
-class BugbugTimeoutException(Exception):
-    pass
+from taskgraph.util.bugbug import (
+    push_schedules,
+    CT_HIGH,
+    CT_MEDIUM,
+    CT_LOW,
+)


@register_strategy("bugbug", args=(CT_MEDIUM,))
@ -46,50 +35,15 @@ class BugBugPushSchedules(OptimizationStrategy):
            groups within a task to find the overall task confidence. Otherwise
            the maximum confidence threshold is used (default: False).
    """
-    BUGBUG_BASE_URL = "https://bugbug.herokuapp.com"
-    RETRY_TIMEOUT = 4 * 60  # seconds
-    RETRY_INTERVAL = 5      # seconds
-
    def __init__(self, confidence_threshold, use_reduced_tasks=False, combine_weights=False):
        self.confidence_threshold = confidence_threshold
        self.use_reduced_tasks = use_reduced_tasks
        self.combine_weights = combine_weights

-    @memoized_property
-    def session(self):
-        s = requests.Session()
-        s.headers.update({'X-API-KEY': 'gecko-taskgraph'})
-        return requests_retry_session(retries=5, session=s)
-
-    @memoize
-    def run_query(self, query, data=None):
-        url = self.BUGBUG_BASE_URL + query
-
-        attempts = self.RETRY_TIMEOUT / self.RETRY_INTERVAL
-        i = 0
-        while i < attempts:
-            r = self.session.get(url)
-            r.raise_for_status()
-
-            if r.status_code != 202:
-                break
-
-            time.sleep(self.RETRY_INTERVAL)
-            i += 1
-
-        data = r.json()
-        logger.debug("Bugbug scheduler service returns:\n{}".format(
-                     json.dumps(data, indent=2)))
-
-        if r.status_code == 202:
-            raise BugbugTimeoutException("Timed out waiting for result from '{}'".format(url))
-
-        return data
-
    def should_remove_task(self, task, params, importance):
        branch = urlsplit(params['head_repository']).path.strip('/')
        rev = params['head_rev']
-        data = self.run_query('/push/{branch}/{rev}/schedules'.format(branch=branch, rev=rev))
+        data = push_schedules(branch, rev)

        key = "reduced_tasks" if self.use_reduced_tasks else "tasks"
        tasks = set(
--- a/taskcluster/taskgraph/test/test_optimize_strategies.py
+++ b/taskcluster/taskgraph/test/test_optimize_strategies.py
@ -4,20 +4,29 @@
 from __future__ import absolute_import

 import time
+from datetime import datetime
+from time import mktime

 import pytest
-from datetime import datetime
 from mozunit import main
-from time import mktime

 from taskgraph.optimize.backstop import Backstop
 from taskgraph.optimize.bugbug import (
    BugBugPushSchedules,
-    BugbugTimeoutException,
    DisperseGroups,
    SkipUnlessDebug,
 )
 from taskgraph.task import Task
+from taskgraph.util.bugbug import (
+    BUGBUG_BASE_URL,
+    BugbugTimeoutException,
+    push_schedules,
+)
+
+
+@pytest.fixture(autouse=True)
+def clear_push_schedules_memoize():
+    push_schedules.clear()


@pytest.fixture(scope='module')
@ -206,7 +215,7 @@ def test_optimization_strategy(responses, params, opt, tasks, arg, expected):
 ], ids=idfn)
 def test_bugbug_push_schedules(responses, params, args, data, expected):
    query = "/push/{branch}/{head_rev}/schedules".format(**params)
-    url = BugBugPushSchedules.BUGBUG_BASE_URL + query
+    url = BUGBUG_BASE_URL + query

    responses.add(
        responses.GET,
@ -222,7 +231,7 @@ def test_bugbug_push_schedules(responses, params, args, data, expected):

 def test_bugbug_timeout(monkeypatch, responses, params):
    query = "/push/{branch}/{head_rev}/schedules".format(**params)
-    url = BugBugPushSchedules.BUGBUG_BASE_URL + query
+    url = BUGBUG_BASE_URL + query
    responses.add(
        responses.GET,
        url,
--- a/taskcluster/taskgraph/util/bugbug.py
+++ b/taskcluster/taskgraph/util/bugbug.py
@ -0,0 +1,63 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+import json
+import logging
+import time
+
+import requests
+from mozbuild.util import memoize
+
+from taskgraph.util.taskcluster import requests_retry_session
+
+logger = logging.getLogger(__name__)
+
+BUGBUG_BASE_URL = "https://bugbug.herokuapp.com"
+RETRY_TIMEOUT = 4 * 60  # seconds
+RETRY_INTERVAL = 5      # seconds
+
+# Preset confidence thresholds.
+CT_LOW = 0.5
+CT_MEDIUM = 0.7
+CT_HIGH = 0.9
+
+
+class BugbugTimeoutException(Exception):
+    pass
+
+
+@memoize
+def get_session():
+    s = requests.Session()
+    s.headers.update({'X-API-KEY': 'gecko-taskgraph'})
+    return requests_retry_session(retries=5, session=s)
+
+
+@memoize
+def push_schedules(branch, rev):
+    url = BUGBUG_BASE_URL + '/push/{branch}/{rev}/schedules'.format(branch=branch, rev=rev)
+
+    session = get_session()
+    attempts = RETRY_TIMEOUT / RETRY_INTERVAL
+    i = 0
+    while i < attempts:
+        r = session.get(url)
+        r.raise_for_status()
+
+        if r.status_code != 202:
+            break
+
+        time.sleep(RETRY_INTERVAL)
+        i += 1
+
+    data = r.json()
+    logger.debug("Bugbug scheduler service returns:\n{}".format(
+                 json.dumps(data, indent=2)))
+
+    if r.status_code == 202:
+        raise BugbugTimeoutException("Timed out waiting for result from '{}'".format(url))
+
+    return data