From 461fbf199403fb67f222bdec369bcc4a2e46acc7 Mon Sep 17 00:00:00 2001 From: Johan Lorenzo Date: Wed, 7 Sep 2022 09:13:40 +0000 Subject: [PATCH] Bug 1784232 - Bump taskgraph to v3.0.0 r=ahal Differential Revision: https://phabricator.services.mozilla.com/D155978 --- .taskcluster.yml | 4 +- taskcluster/docs/parameters.rst | 6 + .../gecko_taskgraph/actions/registry.py | 4 + taskcluster/gecko_taskgraph/decision.py | 27 ++ taskcluster/gecko_taskgraph/main.py | 9 + .../gecko_taskgraph/test/test_decision.py | 28 +- .../test/test_taskcluster_yml.py | 3 + third_party/python/poetry.lock | 8 +- third_party/python/requirements.in | 2 +- third_party/python/requirements.txt | 6 +- .../LICENSE | 0 .../METADATA | 6 +- .../RECORD | 44 +-- .../WHEEL | 0 .../entry_points.txt | 1 + .../top_level.txt | 0 .../taskgraph/actions/util.py | 1 + .../taskgraph/decision.py | 81 ++++- .../taskgraph/files_changed.py | 40 ++- .../taskgraph/generator.py | 6 +- .../taskcluster_taskgraph/taskgraph/main.py | 39 ++- .../taskcluster_taskgraph/taskgraph/morph.py | 17 +- .../taskgraph/optimize/strategies.py | 9 +- .../taskgraph/parameters.py | 9 +- .../taskgraph/run-task/run-task | 73 +++- .../taskgraph/transforms/base.py | 4 +- .../taskgraph/transforms/cached_tasks.py | 2 +- .../taskgraph/transforms/code_review.py | 2 +- .../taskgraph/transforms/docker_image.py | 2 +- .../taskgraph/transforms/job/__init__.py | 6 +- .../taskgraph/util/vcs.py | 321 +++++++++++++++++- .../taskgraph/util/verify.py | 18 +- 32 files changed, 674 insertions(+), 104 deletions(-) rename third_party/python/taskcluster_taskgraph/{taskcluster_taskgraph-2.0.0.dist-info => taskcluster_taskgraph-3.0.0.dist-info}/LICENSE (100%) rename third_party/python/taskcluster_taskgraph/{taskcluster_taskgraph-2.0.0.dist-info => taskcluster_taskgraph-3.0.0.dist-info}/METADATA (94%) rename third_party/python/taskcluster_taskgraph/{taskcluster_taskgraph-2.0.0.dist-info => taskcluster_taskgraph-3.0.0.dist-info}/RECORD (71%) rename third_party/python/taskcluster_taskgraph/{taskcluster_taskgraph-2.0.0.dist-info => taskcluster_taskgraph-3.0.0.dist-info}/WHEEL (100%) rename third_party/python/taskcluster_taskgraph/{taskcluster_taskgraph-2.0.0.dist-info => taskcluster_taskgraph-3.0.0.dist-info}/entry_points.txt (98%) rename third_party/python/taskcluster_taskgraph/{taskcluster_taskgraph-2.0.0.dist-info => taskcluster_taskgraph-3.0.0.dist-info}/top_level.txt (100%) diff --git a/.taskcluster.yml b/.taskcluster.yml index 1ac68a86042d..cba3a824d91b 100644 --- a/.taskcluster.yml +++ b/.taskcluster.yml @@ -37,7 +37,7 @@ # # { # tasks_for: 'action', -# push: {owner, pushlog_id, revision}, +# push: {owner, pushlog_id, revision, base_revision}, # repository: {url, project, level}, # input, # taskId, // targetted taskId @@ -196,6 +196,7 @@ tasks: # to `mach taskgraph decision` are all on the command line. $merge: - GECKO_BASE_REPOSITORY: 'https://hg.mozilla.org/mozilla-unified' + GECKO_BASE_REV: '${push.base_revision}' GECKO_HEAD_REPOSITORY: '${repoUrl}' GECKO_HEAD_REF: '${push.revision}' GECKO_HEAD_REV: '${push.revision}' @@ -253,6 +254,7 @@ tasks: --tasks-for='${tasks_for}' --repository-type=hg --base-repository="$GECKO_BASE_REPOSITORY" + --base-rev="$GECKO_BASE_REV" --head-repository="$GECKO_HEAD_REPOSITORY" --head-ref="$GECKO_HEAD_REF" --head-rev="$GECKO_HEAD_REV" diff --git a/taskcluster/docs/parameters.rst b/taskcluster/docs/parameters.rst index d0b0f958d0bf..0f5aeae5e3ab 100644 --- a/taskcluster/docs/parameters.rst +++ b/taskcluster/docs/parameters.rst @@ -36,9 +36,15 @@ Push Information ``base_repository`` in cases where ``base_repository`` is likely to be cached and only a few additional commits are needed from ``head_repository``. +``base_rev`` + The previous revision before ``head_rev`` got merged into. This can be a short revision string. + ``head_rev`` The revision to check out; this can be a short revision string +``base_ref`` + Reference where ``head_rev`` got merged into. It is usually a branch or a tag. + ``head_ref`` For Mercurial repositories, this is the same as ``head_rev``. For git repositories, which do not allow pulling explicit revisions, this gives diff --git a/taskcluster/gecko_taskgraph/actions/registry.py b/taskcluster/gecko_taskgraph/actions/registry.py index 6437fb7b51cb..a94dfa529f8d 100644 --- a/taskcluster/gecko_taskgraph/actions/registry.py +++ b/taskcluster/gecko_taskgraph/actions/registry.py @@ -183,10 +183,14 @@ def register_callback_action( revision = parameters[ "{}head_rev".format(graph_config["project-repo-param-prefix"]) ] + base_revision = parameters[ + "{}base_rev".format(graph_config["project-repo-param-prefix"]) + ] push = { "owner": "mozilla-taskcluster-maintenance@mozilla.com", "pushlog_id": parameters["pushlog_id"], "revision": revision, + "base_revision": base_revision, } match = re.match( diff --git a/taskcluster/gecko_taskgraph/decision.py b/taskcluster/gecko_taskgraph/decision.py index 87bc59ed18c6..1d47b35bc406 100644 --- a/taskcluster/gecko_taskgraph/decision.py +++ b/taskcluster/gecko_taskgraph/decision.py @@ -14,11 +14,19 @@ from collections import defaultdict import yaml from redo import retry from taskgraph.create import create_tasks, testing +from taskgraph.decision import ( + # TODO: Let standalone taskgraph generate parameters instead + # of calling internals + _determine_more_accurate_base_ref, + _determine_more_accurate_base_rev, + _get_env_prefix, +) from taskgraph.parameters import Parameters from taskgraph.taskgraph import TaskGraph from taskgraph.util.python_path import find_object from taskgraph.util.schema import Schema, validate_schema from taskgraph.util.taskcluster import get_artifact +from taskgraph.util.vcs import get_repository from taskgraph.util.yaml import load_yaml from voluptuous import Any, Optional, Required @@ -286,6 +294,8 @@ def get_decision_parameters(graph_config, options): n: options[n] for n in [ "base_repository", + "base_ref", + "base_rev", "head_repository", "head_rev", "head_ref", @@ -313,6 +323,23 @@ def get_decision_parameters(graph_config, options): commit_message = get_hg_commit_message(os.path.join(GECKO, product_dir)) + repo_path = os.getcwd() + repo = get_repository(repo_path) + parameters["base_ref"] = _determine_more_accurate_base_ref( + repo, + candidate_base_ref=options.get("base_ref"), + head_ref=options.get("head_ref"), + base_rev=options.get("base_rev"), + ) + + parameters["base_rev"] = _determine_more_accurate_base_rev( + repo, + base_ref=parameters["base_ref"], + candidate_base_rev=options.get("base_rev"), + head_rev=options.get("head_rev"), + env_prefix=_get_env_prefix(graph_config), + ) + # Define default filter list, as most configurations shouldn't need # custom filters. parameters["filters"] = [ diff --git a/taskcluster/gecko_taskgraph/main.py b/taskcluster/gecko_taskgraph/main.py index fe0f2dbee769..e1fe20fb0c98 100644 --- a/taskcluster/gecko_taskgraph/main.py +++ b/taskcluster/gecko_taskgraph/main.py @@ -595,6 +595,15 @@ def image_digest(args): help='Type of repository, either "hg" or "git"', ) @argument("--base-repository", required=True, help='URL for "base" repository to clone') +@argument( + "--base-ref", default="", help='Reference of the revision in the "base" repository' +) +@argument( + "--base-rev", + default="", + help="Taskgraph decides what to do based on the revision range between " + "`--base-rev` and `--head-rev`. Value is determined automatically if not provided", +) @argument( "--head-repository", required=True, diff --git a/taskcluster/gecko_taskgraph/test/test_decision.py b/taskcluster/gecko_taskgraph/test/test_decision.py index 15af6e814545..9c5d1324e768 100644 --- a/taskcluster/gecko_taskgraph/test/test_decision.py +++ b/taskcluster/gecko_taskgraph/test/test_decision.py @@ -75,8 +75,12 @@ class TestGetDecisionParameters(unittest.TestCase): } @patch("gecko_taskgraph.decision.get_hg_revision_branch") - def test_simple_options(self, mock_get_hg_revision_branch): + @patch("gecko_taskgraph.decision._determine_more_accurate_base_rev") + def test_simple_options( + self, mock_determine_more_accurate_base_rev, mock_get_hg_revision_branch + ): mock_get_hg_revision_branch.return_value = "default" + mock_determine_more_accurate_base_rev.return_value = "baserev" with MockedOpen({self.ttc_file: None}): params = decision.get_decision_parameters(FAKE_GRAPH_CONFIG, self.options) self.assertEqual(params["pushlog_id"], "143") @@ -88,8 +92,12 @@ class TestGetDecisionParameters(unittest.TestCase): self.assertEqual(params["try_task_config"], {}) @patch("gecko_taskgraph.decision.get_hg_revision_branch") - def test_no_email_owner(self, mock_get_hg_revision_branch): + @patch("gecko_taskgraph.decision._determine_more_accurate_base_rev") + def test_no_email_owner( + self, mock_determine_more_accurate_base_rev, mock_get_hg_revision_branch + ): mock_get_hg_revision_branch.return_value = "default" + mock_determine_more_accurate_base_rev.return_value = "baserev" self.options["owner"] = "ffxbld" with MockedOpen({self.ttc_file: None}): params = decision.get_decision_parameters(FAKE_GRAPH_CONFIG, self.options) @@ -97,9 +105,16 @@ class TestGetDecisionParameters(unittest.TestCase): @patch("gecko_taskgraph.decision.get_hg_revision_branch") @patch("gecko_taskgraph.decision.get_hg_commit_message") - def test_try_options(self, mock_get_hg_commit_message, mock_get_hg_revision_branch): + @patch("gecko_taskgraph.decision._determine_more_accurate_base_rev") + def test_try_options( + self, + mock_determine_more_accurate_base_rev, + mock_get_hg_commit_message, + mock_get_hg_revision_branch, + ): mock_get_hg_commit_message.return_value = "try: -b do -t all --artifact" mock_get_hg_revision_branch.return_value = "default" + mock_determine_more_accurate_base_rev.return_value = "baserev" self.options["project"] = "try" with MockedOpen({self.ttc_file: None}): params = decision.get_decision_parameters(FAKE_GRAPH_CONFIG, self.options) @@ -117,11 +132,16 @@ class TestGetDecisionParameters(unittest.TestCase): @patch("gecko_taskgraph.decision.get_hg_revision_branch") @patch("gecko_taskgraph.decision.get_hg_commit_message") + @patch("gecko_taskgraph.decision._determine_more_accurate_base_rev") def test_try_task_config( - self, mock_get_hg_commit_message, mock_get_hg_revision_branch + self, + mock_get_hg_commit_message, + mock_get_hg_revision_branch, + mock_determine_more_accurate_base_rev, ): mock_get_hg_commit_message.return_value = "Fuzzy query=foo" mock_get_hg_revision_branch.return_value = "default" + mock_determine_more_accurate_base_rev.return_value = "baserev" ttc = {"tasks": ["a", "b"]} self.options["project"] = "try" with MockedOpen({self.ttc_file: json.dumps(ttc)}): diff --git a/taskcluster/gecko_taskgraph/test/test_taskcluster_yml.py b/taskcluster/gecko_taskgraph/test/test_taskcluster_yml.py index 19490d4d3c9f..480da51bb8db 100644 --- a/taskcluster/gecko_taskgraph/test/test_taskcluster_yml.py +++ b/taskcluster/gecko_taskgraph/test/test_taskcluster_yml.py @@ -24,6 +24,7 @@ class TestTaskclusterYml(unittest.TestCase): "tasks_for": "hg-push", "push": { "revision": "e8d2d9aff5026ef1f1777b781b47fdcbdb9d8f20", + "base_revision": "e8aebe488b2f2e567940577de25013d00e818f7c", "owner": "dustin@mozilla.com", "pushlog_id": 1556565286, "pushdate": 112957, @@ -51,6 +52,7 @@ class TestTaskclusterYml(unittest.TestCase): }, "push": { "revision": "e8aebe488b2f2e567940577de25013d00e818f7c", + "base_revision": "54cbb3745cdb9a8aa0a4428d405b3b2e1c7d13c2", "pushlog_id": -1, "pushdate": 0, "owner": "cron", @@ -80,6 +82,7 @@ class TestTaskclusterYml(unittest.TestCase): }, "push": { "revision": "e8d2d9aff5026ef1f1777b781b47fdcbdb9d8f20", + "base_revision": "e8aebe488b2f2e567940577de25013d00e818f7c", "owner": "dustin@mozilla.com", "pushlog_id": 1556565286, "pushdate": 112957, diff --git a/third_party/python/poetry.lock b/third_party/python/poetry.lock index 47b5ad10fdcc..7fef0d4c2685 100644 --- a/third_party/python/poetry.lock +++ b/third_party/python/poetry.lock @@ -615,7 +615,7 @@ test = ["pytest", "pytest-cov", "pytest-mock", "httmock", "mock", "setuptools-li [[package]] name = "taskcluster-taskgraph" -version = "2.0.0" +version = "3.0.0" description = "Build taskcluster taskgraphs" category = "main" optional = false @@ -737,7 +737,7 @@ testing = ["pytest (>=4.6)", "pytest-checkdocs (>=1.2.3)", "pytest-flake8", "pyt [metadata] lock-version = "1.1" python-versions = "^3.6" -content-hash = "9d5442add586f045a8bac2403afaade45b7836ae851e906fd598d48c23075eb1" +content-hash = "5aad78b786ff3b9e2ca7bd0bce0708e6b52584f26bd430f88e0e87b89d9d9af3" [metadata.files] aiohttp = [ @@ -1125,8 +1125,8 @@ taskcluster = [ {file = "taskcluster-44.2.2.tar.gz", hash = "sha256:0266a6a901e1a2ec838984a7f24e7adb6d58f9f2e221a7f613388f8f23f786fc"}, ] taskcluster-taskgraph = [ - {file = "taskcluster-taskgraph-2.0.0.tar.gz", hash = "sha256:93eff40ba39a29cd290fc25a2124ed9bf5806d87891edd7e8de35df568708141"}, - {file = "taskcluster_taskgraph-2.0.0-py3-none-any.whl", hash = "sha256:3d22ab488071ddc82997b33fc6c1c524a44bdc7e14b30a274d99dbbdd7389502"}, + {file = "taskcluster-taskgraph-3.0.0.tar.gz", hash = "sha256:fba61e84ba3624056dd84c79d9c1796d4f50d1d25715f3217681c737f7ed3f03"}, + {file = "taskcluster_taskgraph-3.0.0-py3-none-any.whl", hash = "sha256:566e46ba9beeb42e6884f7594ceb5b512b82bb93097ae4abc395e4d513dbe556"}, ] taskcluster-urls = [ {file = "taskcluster-urls-13.0.1.tar.gz", hash = "sha256:b25e122ecec249c4299ac7b20b08db76e3e2025bdaeb699a9d444556de5fd367"}, diff --git a/third_party/python/requirements.in b/third_party/python/requirements.in index 1a16631bac56..a42a499882d0 100644 --- a/third_party/python/requirements.in +++ b/third_party/python/requirements.in @@ -39,7 +39,7 @@ setuptools==51.2.0 six==1.13.0 slugid==2.0.0 taskcluster==44.2.2 -taskcluster-taskgraph==2.0.0 +taskcluster-taskgraph==3.0.0 taskcluster-urls==13.0.1 tqdm==4.62.3 urllib3==1.26 diff --git a/third_party/python/requirements.txt b/third_party/python/requirements.txt index 1ec3c78c5595..c313ec91c7f2 100644 --- a/third_party/python/requirements.txt +++ b/third_party/python/requirements.txt @@ -322,9 +322,9 @@ six==1.13.0; (python_version >= "2.6" and python_full_version < "3.0.0") or (pyt slugid==2.0.0 \ --hash=sha256:aec8b0e01c4ad32e38e12d609eab3ec912fd129aaf6b2ded0199b56a5f8fd67c \ --hash=sha256:a950d98b72691178bdd4d6c52743c4a2aa039207cf7a97d71060a111ff9ba297 -taskcluster-taskgraph==2.0.0 \ - --hash=sha256:93eff40ba39a29cd290fc25a2124ed9bf5806d87891edd7e8de35df568708141 \ - --hash=sha256:3d22ab488071ddc82997b33fc6c1c524a44bdc7e14b30a274d99dbbdd7389502 +taskcluster-taskgraph==3.0.0 \ + --hash=sha256:fba61e84ba3624056dd84c79d9c1796d4f50d1d25715f3217681c737f7ed3f03 \ + --hash=sha256:566e46ba9beeb42e6884f7594ceb5b512b82bb93097ae4abc395e4d513dbe556 taskcluster-urls==13.0.1 \ --hash=sha256:b25e122ecec249c4299ac7b20b08db76e3e2025bdaeb699a9d444556de5fd367 \ --hash=sha256:5e25e7e6818e8877178b175ff43d2e6548afad72694aa125f404a7329ece0973 \ diff --git a/third_party/python/taskcluster_taskgraph/taskcluster_taskgraph-2.0.0.dist-info/LICENSE b/third_party/python/taskcluster_taskgraph/taskcluster_taskgraph-3.0.0.dist-info/LICENSE similarity index 100% rename from third_party/python/taskcluster_taskgraph/taskcluster_taskgraph-2.0.0.dist-info/LICENSE rename to third_party/python/taskcluster_taskgraph/taskcluster_taskgraph-3.0.0.dist-info/LICENSE diff --git a/third_party/python/taskcluster_taskgraph/taskcluster_taskgraph-2.0.0.dist-info/METADATA b/third_party/python/taskcluster_taskgraph/taskcluster_taskgraph-3.0.0.dist-info/METADATA similarity index 94% rename from third_party/python/taskcluster_taskgraph/taskcluster_taskgraph-2.0.0.dist-info/METADATA rename to third_party/python/taskcluster_taskgraph/taskcluster_taskgraph-3.0.0.dist-info/METADATA index 76d0a5f36146..d224019d770f 100644 --- a/third_party/python/taskcluster_taskgraph/taskcluster_taskgraph-2.0.0.dist-info/METADATA +++ b/third_party/python/taskcluster_taskgraph/taskcluster_taskgraph-3.0.0.dist-info/METADATA @@ -1,8 +1,10 @@ Metadata-Version: 2.1 Name: taskcluster-taskgraph -Version: 2.0.0 +Version: 3.0.0 Summary: Build taskcluster taskgraphs Home-page: https://github.com/taskcluster/taskgraph +License: UNKNOWN +Platform: UNKNOWN Classifier: Development Status :: 5 - Production/Stable Classifier: Environment :: Console Classifier: License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0) @@ -24,3 +26,5 @@ Requires-Dist: slugid (>=2.0) Requires-Dist: taskcluster-urls (>=11.0) Requires-Dist: voluptuous (>=0.12.1) +UNKNOWN + diff --git a/third_party/python/taskcluster_taskgraph/taskcluster_taskgraph-2.0.0.dist-info/RECORD b/third_party/python/taskcluster_taskgraph/taskcluster_taskgraph-3.0.0.dist-info/RECORD similarity index 71% rename from third_party/python/taskcluster_taskgraph/taskcluster_taskgraph-2.0.0.dist-info/RECORD rename to third_party/python/taskcluster_taskgraph/taskcluster_taskgraph-3.0.0.dist-info/RECORD index 39880d4e16c5..f4bff0b139f1 100644 --- a/third_party/python/taskcluster_taskgraph/taskcluster_taskgraph-2.0.0.dist-info/RECORD +++ b/third_party/python/taskcluster_taskgraph/taskcluster_taskgraph-3.0.0.dist-info/RECORD @@ -1,15 +1,15 @@ taskgraph/__init__.py,sha256=jwOtU7TkmU317LP_IsgIswpj2T1OPUXXgMRv4sIU7nE,707 taskgraph/config.py,sha256=MoFLjKPUViWYGALi_acWDVXZs7M8cy0zQpUKsJSlBMs,4411 taskgraph/create.py,sha256=1z2AyLvHMkZfDkmPy6um86HG9xTRhE0Sphnbpd-kuEg,5190 -taskgraph/decision.py,sha256=Q59gOkEfi0Dc_bgpwOF2nQfid_7LuOVOfPpr33Fk_co,9887 +taskgraph/decision.py,sha256=X94bfSp6LyYkO7hpi4A0ytWSfHl9YtkRLNaJR8loAWQ,12758 taskgraph/docker.py,sha256=hsMIvRVXiqC8DIGD34WwQrC1JnjaYHSvVWq_lEeNQEE,7471 -taskgraph/files_changed.py,sha256=VElSrr-5dVHUH4N4dPSNbQc1I07XgsalhEiPpeXyWXY,2161 +taskgraph/files_changed.py,sha256=W3_gEgUT-mVH9DaaU_8X6gYpftrqBU3kgveGbzPLziU,2793 taskgraph/filter_tasks.py,sha256=R7tYXiaVPGIkQ6O1c9-QJrKZ59m9pFXCloUlPraVnZU,866 -taskgraph/generator.py,sha256=Mq6t9jEwY-I_Wzb1Hw6r493ePkBDsgKcoT_T71MK0Cc,15106 +taskgraph/generator.py,sha256=ZfSb8dek6tQRxfpHbvQP2KMxXFzmhqwN821tOlNcvzo,15118 taskgraph/graph.py,sha256=9tE3bSSBRHvRLgJzK4dTieGT3RrzQZdR1YbKizEhzlw,4667 -taskgraph/main.py,sha256=ot8nMrW8hyQCicRpPCg-eT2CfiDE4OFqstop6i0bRSE,23238 -taskgraph/morph.py,sha256=ASOaCed_YuLTWdj_pB8qH1_3RFJTu1V6VDwcX3JLn8w,9567 -taskgraph/parameters.py,sha256=NEtKH_kVXKRLlqyQhZ_UJy1hYb11MYfdKveA4NdJtgE,11472 +taskgraph/main.py,sha256=E7dC1q14L4psrNfUe-PMC8QH4cYjsIs91I-aVmzeBaI,23551 +taskgraph/morph.py,sha256=8qxYdruEQkbHGqv7dh3e1OWhH9Y5i6bFUKzDMs-Ctnw,9625 +taskgraph/parameters.py,sha256=rye7dxD3A_Voh9w0Ru28zgZ8rGVv5enUu-k5lE7HvEk,11725 taskgraph/target_tasks.py,sha256=41BIVwiATy8DCQujPduTtnFmgHlKOfw6RPGL4b20WO8,3324 taskgraph/task.py,sha256=QCrOzMaTsy5QHShKUo89XgjJVMl3cSZGZJPLuHCXItE,3132 taskgraph/taskgraph.py,sha256=tfj0ZMqjuwEQDET0W57EcP-_KBEbqkxJci9Z6DkeOEQ,2397 @@ -19,25 +19,25 @@ taskgraph/actions/cancel.py,sha256=UQSt_6y3S6PXNmUo_mNaUOuDvK2bixWjzdjTKXieEEg,1 taskgraph/actions/cancel_all.py,sha256=-ETWKl8BHkk5HjGZRIJpUsFOySE6co0pL0dBDupolu8,1947 taskgraph/actions/registry.py,sha256=p-YTqnhRPSouOqhSoRL5QgUkpO_ab4XIMSFKreu7E_8,13252 taskgraph/actions/retrigger.py,sha256=awSC8XRtPJxADz5tbEWTKdNEudG8SpwUOM7z2lXxH1U,9382 -taskgraph/actions/util.py,sha256=1oxB-1JWMSUoNCtqW4mI2uJGwohqxuf0Vufm0wNGi20,10657 +taskgraph/actions/util.py,sha256=jA5xXehV8N2G542LZOEci_gMHEFN-BrIjkA55On0kc0,10673 taskgraph/loader/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 taskgraph/loader/transform.py,sha256=olUBPjxk3eEIg25sduxlcyqhjoig4ts5kPlT_zs6g9g,2147 taskgraph/optimize/__init__.py,sha256=Oqpq1RW8QzOcu7zaMlNQ3BHT9ws9e_93FWfCqzNcQps,123 taskgraph/optimize/base.py,sha256=WvoDNewyHG46IQbG3th-aau9OxSKegsYNfvdOEmunbA,18341 -taskgraph/optimize/strategies.py,sha256=OT6ibuI8w66pFVN2czT99HygB1MGwCZiBxEFxPPvr04,2501 +taskgraph/optimize/strategies.py,sha256=Y5fS-f_3xsQNfFjCXIwDxrwXBvyp4yZxdPVNh49c7XU,2381 taskgraph/run-task/fetch-content,sha256=uUoyua3OdIgynY5Q9K6EojBwuaM2zo2OiN9bmNS646Q,24291 taskgraph/run-task/hgrc,sha256=BybWLDR89bWi3pE5T05UqmDHs02CbLypE-omLZWU6Uk,896 taskgraph/run-task/robustcheckout.py,sha256=xc24zaBd6dyuoga1ace0M27jo14K4UXNwhqcbHutJ7U,28977 -taskgraph/run-task/run-task,sha256=wUGP0QDP_uSeMrtxdJJAhtOf7FBVMTyNWqPCHP_vq8A,44701 +taskgraph/run-task/run-task,sha256=TVjIoZO9kbpaG-GCMJV_wjlR9H2xk8vJi0wB_rFleEg,46953 taskgraph/transforms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 -taskgraph/transforms/base.py,sha256=7eyfKlexzz3I4HmVUKYRu2fADeAVWeYZaVSnVUw-e8g,5276 -taskgraph/transforms/cached_tasks.py,sha256=In7Dj-vfOaeQx_e9JXbYN167TuInY6XEhFBuObztngc,2598 -taskgraph/transforms/code_review.py,sha256=du7npIGOpVMK9QZtdkHrkNecRiaqd-fHjHavDVkfL58,698 -taskgraph/transforms/docker_image.py,sha256=JzGRAmuiyv_Q6qJnjhAqAruTNtVNcd6rTRkcJ-Oyycg,7558 +taskgraph/transforms/base.py,sha256=N9ec4kw65V_J2KY4C4QRPlbIREbRDYwTlhClstYmOBU,5285 +taskgraph/transforms/cached_tasks.py,sha256=Z10VD1kEBVXJvj8qSsNTq2mYpklh0V1EN8OT6QK3v_E,2607 +taskgraph/transforms/code_review.py,sha256=eE2xrDtdD_n3HT3caQ2HGAkPm6Uutdm4hDCpCoFjEps,707 +taskgraph/transforms/docker_image.py,sha256=ADiOUB-Ngm9Y6uwzGDpQsDJ_-4w6-ZYwLCxQ-0b16E0,7567 taskgraph/transforms/fetch.py,sha256=jxJw7wlEh_WxAa1Bmy2WIHfpdvL79PDsKwC1DFymbBQ,9584 taskgraph/transforms/release_notifications.py,sha256=jrb9CCT-z_etDf690T-AeCvdzIoVWBAeM_FGoW7FIzA,3305 taskgraph/transforms/task.py,sha256=n73lD8XtzpJm2BqJpZb_oiGqNHBJzTcT7GWX6jk7Xqc,47839 -taskgraph/transforms/job/__init__.py,sha256=CXEDt7ESCO9cXVymgJxIjG2HjxkahJPwY52bvum1MpY,16910 +taskgraph/transforms/job/__init__.py,sha256=GKYODycxov7u05owF_ZWgczd7WHi2yHTd8L5Ftvxge0,16929 taskgraph/transforms/job/common.py,sha256=onHnerPcmmvbSk0oHt8mvJmOo7AnjHQya0ombgMNLG8,7106 taskgraph/transforms/job/index_search.py,sha256=Ngh9FFu1bx2kHVTChW2vcrbnb3SzMneRHopXk18RfB4,1220 taskgraph/transforms/job/run_task.py,sha256=oRR-is7dRKRrSCY3WntmJ-pKK3wx9-BMJpY9qru2FWY,8654 @@ -62,13 +62,13 @@ taskgraph/util/taskgraph.py,sha256=ecKEvTfmLVvEKLPO_0g34CqVvc0iCzuNMh3064BZNrE,1 taskgraph/util/templates.py,sha256=Dqxfl244u-PX7dnsk3_vYyzDwpDgJtANK6NmZwN3Qow,1417 taskgraph/util/time.py,sha256=dmR9Y0IGKuE1eHfFZjDuBUroK63XLBxEMM5ploO4li4,3490 taskgraph/util/treeherder.py,sha256=XrdE-Je0ZvXe6_8f0DvvqNbrHherUk-hUuxirImPEIo,2138 -taskgraph/util/vcs.py,sha256=uDQtziKfA7UvYADW8NoL_tf_yANb-U01p4wuAF-uXH8,6492 -taskgraph/util/verify.py,sha256=AXnb3OEgjHaoNxeYg0Sr5xjgW03uxZyK03dQaOsqOLI,8272 +taskgraph/util/vcs.py,sha256=nCmvO_hHJIM4vIJ0vlpbQjdIFRtkpRImCikYde-C_R0,17328 +taskgraph/util/verify.py,sha256=YETuZVkwnfYe57GRPx2x_vedstgqdGiH46HLWAdcks8,8827 taskgraph/util/workertypes.py,sha256=5g2mgIbEKMzDpZNnmPMoMNyy7Wahi-jmWcV1amDAcPo,2341 taskgraph/util/yaml.py,sha256=hfKI_D8Q7dimq4_VvO3WEh8CJsTrsIMwN6set7HIQbY,990 -taskcluster_taskgraph-2.0.0.dist-info/LICENSE,sha256=HyVuytGSiAUQ6ErWBHTqt1iSGHhLmlC8fO7jTCuR8dU,16725 -taskcluster_taskgraph-2.0.0.dist-info/METADATA,sha256=HB2014Uod8iLubVBX2WDR_NfoTz8cVTdhqtMkJ0u-do,973 -taskcluster_taskgraph-2.0.0.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92 -taskcluster_taskgraph-2.0.0.dist-info/entry_points.txt,sha256=2hxDzE3qq_sHh-J3ROqwpxgQgxO-196phWAQREl2-XA,50 -taskcluster_taskgraph-2.0.0.dist-info/top_level.txt,sha256=3JNeYn_hNiNXC7DrdH_vcv-WYSE7QdgGjdvUYvSjVp0,10 -taskcluster_taskgraph-2.0.0.dist-info/RECORD,, +taskcluster_taskgraph-3.0.0.dist-info/LICENSE,sha256=HyVuytGSiAUQ6ErWBHTqt1iSGHhLmlC8fO7jTCuR8dU,16725 +taskcluster_taskgraph-3.0.0.dist-info/METADATA,sha256=rDJwBZW7nHDBPBcMH7n9eTnb2GONIfgG_YHTgsiB7no,1017 +taskcluster_taskgraph-3.0.0.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92 +taskcluster_taskgraph-3.0.0.dist-info/entry_points.txt,sha256=VoXNtZpN4LvyXYB1wq47AU9CO-DMYMJ0VktKxjugzbY,51 +taskcluster_taskgraph-3.0.0.dist-info/top_level.txt,sha256=3JNeYn_hNiNXC7DrdH_vcv-WYSE7QdgGjdvUYvSjVp0,10 +taskcluster_taskgraph-3.0.0.dist-info/RECORD,, diff --git a/third_party/python/taskcluster_taskgraph/taskcluster_taskgraph-2.0.0.dist-info/WHEEL b/third_party/python/taskcluster_taskgraph/taskcluster_taskgraph-3.0.0.dist-info/WHEEL similarity index 100% rename from third_party/python/taskcluster_taskgraph/taskcluster_taskgraph-2.0.0.dist-info/WHEEL rename to third_party/python/taskcluster_taskgraph/taskcluster_taskgraph-3.0.0.dist-info/WHEEL diff --git a/third_party/python/taskcluster_taskgraph/taskcluster_taskgraph-2.0.0.dist-info/entry_points.txt b/third_party/python/taskcluster_taskgraph/taskcluster_taskgraph-3.0.0.dist-info/entry_points.txt similarity index 98% rename from third_party/python/taskcluster_taskgraph/taskcluster_taskgraph-2.0.0.dist-info/entry_points.txt rename to third_party/python/taskcluster_taskgraph/taskcluster_taskgraph-3.0.0.dist-info/entry_points.txt index dec40df69f93..086555b5ccf5 100644 --- a/third_party/python/taskcluster_taskgraph/taskcluster_taskgraph-2.0.0.dist-info/entry_points.txt +++ b/third_party/python/taskcluster_taskgraph/taskcluster_taskgraph-3.0.0.dist-info/entry_points.txt @@ -1,2 +1,3 @@ [console_scripts] taskgraph = taskgraph.main:main + diff --git a/third_party/python/taskcluster_taskgraph/taskcluster_taskgraph-2.0.0.dist-info/top_level.txt b/third_party/python/taskcluster_taskgraph/taskcluster_taskgraph-3.0.0.dist-info/top_level.txt similarity index 100% rename from third_party/python/taskcluster_taskgraph/taskcluster_taskgraph-2.0.0.dist-info/top_level.txt rename to third_party/python/taskcluster_taskgraph/taskcluster_taskgraph-3.0.0.dist-info/top_level.txt diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/actions/util.py b/third_party/python/taskcluster_taskgraph/taskgraph/actions/util.py index 4aea487a3c22..dd3248d20920 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/actions/util.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/actions/util.py @@ -160,6 +160,7 @@ def create_tasks( target_task_graph.for_each_task(update_dependencies) optimized_task_graph, label_to_taskid = optimize_task_graph( target_task_graph, + to_run, params, to_run, decision_task_id, diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/decision.py b/third_party/python/taskcluster_taskgraph/taskgraph/decision.py index 224ba74d7589..38071f1df657 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/decision.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/decision.py @@ -21,7 +21,7 @@ from taskgraph.parameters import Parameters, get_version from taskgraph.taskgraph import TaskGraph from taskgraph.util.python_path import find_object from taskgraph.util.schema import Schema, validate_schema -from taskgraph.util.vcs import get_repository +from taskgraph.util.vcs import Repository, get_repository from taskgraph.util.yaml import load_yaml logger = logging.getLogger(__name__) @@ -143,6 +143,8 @@ def get_decision_parameters(graph_config, options): n: options[n] for n in [ "base_repository", + "base_ref", + "base_rev", "head_repository", "head_rev", "head_ref", @@ -166,6 +168,21 @@ def get_decision_parameters(graph_config, options): except UnicodeDecodeError: commit_message = "" + parameters["base_ref"] = _determine_more_accurate_base_ref( + repo, + candidate_base_ref=options.get("base_ref"), + head_ref=options.get("head_ref"), + base_rev=options.get("base_rev"), + ) + + parameters["base_rev"] = _determine_more_accurate_base_rev( + repo, + base_ref=parameters["base_ref"], + candidate_base_rev=options.get("base_rev"), + head_rev=options.get("head_rev"), + env_prefix=_get_env_prefix(graph_config), + ) + # Define default filter list, as most configurations shouldn't need # custom filters. parameters["filters"] = [ @@ -236,6 +253,68 @@ def get_decision_parameters(graph_config, options): return result +def _determine_more_accurate_base_ref(repo, candidate_base_ref, head_ref, base_rev): + base_ref = candidate_base_ref + + if not candidate_base_ref: + base_ref = repo.default_branch + elif candidate_base_ref == head_ref and base_rev == Repository.NULL_REVISION: + logger.info( + "base_ref and head_ref are identical but base_rev equals the null revision. " + "This is a new branch but Github didn't identify its actual base." + ) + base_ref = repo.default_branch + + if base_ref != candidate_base_ref: + logger.info( + f'base_ref has been reset from "{candidate_base_ref}" to "{base_ref}".' + ) + + return base_ref + + +def _determine_more_accurate_base_rev( + repo, base_ref, candidate_base_rev, head_rev, env_prefix +): + if not candidate_base_rev: + logger.info("base_rev is not set.") + base_ref_or_rev = base_ref + elif candidate_base_rev == Repository.NULL_REVISION: + logger.info("base_rev equals the null revision. This branch is a new one.") + base_ref_or_rev = base_ref + elif not repo.does_revision_exist_locally(candidate_base_rev): + logger.warning( + "base_rev does not exist locally. It is likely because the branch was force-pushed. " + "taskgraph is not able to assess how many commits were changed and assumes it is only " + f"the last one. Please set the {env_prefix.upper()}_BASE_REV environment variable " + "in the decision task and provide `--base-rev` to taskgraph." + ) + base_ref_or_rev = base_ref + else: + base_ref_or_rev = candidate_base_rev + + if base_ref_or_rev == base_ref: + logger.info( + f'Using base_ref "{base_ref}" to determine latest common revision...' + ) + + base_rev = repo.find_latest_common_revision(base_ref_or_rev, head_rev) + if base_rev != candidate_base_rev: + if base_ref_or_rev == candidate_base_rev: + logger.info("base_rev is not an ancestor of head_rev.") + + logger.info( + f'base_rev has been reset from "{candidate_base_rev}" to "{base_rev}".' + ) + + return base_rev + + +def _get_env_prefix(graph_config): + repo_keys = list(graph_config["taskgraph"].get("repositories", {}).keys()) + return repo_keys[0] if repo_keys else "" + + def set_try_config(parameters, task_config_file): if os.path.isfile(task_config_file): logger.info(f"using try tasks from {task_config_file}") diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/files_changed.py b/third_party/python/taskcluster_taskgraph/taskgraph/files_changed.py index 82219b46e92b..6be6e5eeee57 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/files_changed.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/files_changed.py @@ -8,23 +8,43 @@ Support for optimizing tasks based on the set of files that have changed. import logging +import os import requests from redo import retry from .util.memoize import memoize from .util.path import match as match_path +from .util.vcs import get_repository logger = logging.getLogger(__name__) @memoize -def get_changed_files(repository, revision): +def get_changed_files(head_repository_url, head_rev, base_rev=None): """ - Get the set of files changed in the push headed by the given revision. + Get the set of files changed between revisions. Responses are cached, so multiple calls with the same arguments are OK. """ - url = "{}/json-automationrelevance/{}".format(repository.rstrip("/"), revision) + repo_path = os.getcwd() + repository = get_repository(repo_path) + + if repository.tool == "hg": + # TODO Use VCS version once tested enough + return _get_changed_files_json_automationrelevance( + head_repository_url, head_rev + ) + + return repository.get_changed_files(rev=head_rev, base_rev=base_rev) + + +def _get_changed_files_json_automationrelevance(head_repository_url, head_rev): + """ + Get the set of files changed in the push headed by the given revision. + """ + url = "{}/json-automationrelevance/{}".format( + head_repository_url.rstrip("/"), head_rev + ) logger.debug("Querying version control for metadata: %s", url) def get_automationrelevance(): @@ -48,18 +68,20 @@ def get_changed_files(repository, revision): def check(params, file_patterns): - """Determine whether any of the files changed in the indicated push to - https://hg.mozilla.org match any of the given file patterns.""" - repository = params.get("head_repository") - revision = params.get("head_rev") - if not repository or not revision: + """Determine whether any of the files changed between 2 revisions + match any of the given file patterns.""" + + head_repository_url = params.get("head_repository") + head_rev = params.get("head_rev") + if not head_repository_url or not head_rev: logger.warning( "Missing `head_repository` or `head_rev` parameters; " "assuming all files have changed" ) return True - changed_files = get_changed_files(repository, revision) + base_rev = params.get("base_rev") + changed_files = get_changed_files(head_repository_url, head_rev, base_rev) for pattern in file_patterns: for path in changed_files: diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/generator.py b/third_party/python/taskcluster_taskgraph/taskgraph/generator.py index 1cfddb0cc7ac..d9b6d8ad3a31 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/generator.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/generator.py @@ -51,9 +51,9 @@ class Kind: config = copy.deepcopy(self.config) kind_dependencies = config.get("kind-dependencies", []) - kind_dependencies_tasks = [ - task for task in loaded_tasks if task.kind in kind_dependencies - ] + kind_dependencies_tasks = { + task.label: task for task in loaded_tasks if task.kind in kind_dependencies + } inputs = loader(self.name, self.path, config, parameters, loaded_tasks) diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/main.py b/third_party/python/taskcluster_taskgraph/taskgraph/main.py index 82cfbd42da2c..dc06b4ed5727 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/main.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/main.py @@ -343,7 +343,7 @@ def show_taskgraph(options): logging.root.setLevel(logging.DEBUG) repo = None - cur_ref = None + cur_rev = None diffdir = None output_file = options["output_file"] @@ -361,16 +361,16 @@ def show_taskgraph(options): # as best we can after we're done. In all known cases, using # branch or bookmark (which are both available on the VCS object) # as `branch` is preferable to a specific revision. - cur_ref = repo.branch or repo.head_ref[:12] + cur_rev = repo.branch or repo.head_rev[:12] diffdir = tempfile.mkdtemp() atexit.register( shutil.rmtree, diffdir ) # make sure the directory gets cleaned up options["output_file"] = os.path.join( - diffdir, f"{options['graph_attr']}_{cur_ref}" + diffdir, f"{options['graph_attr']}_{cur_rev}" ) - print(f"Generating {options['graph_attr']} @ {cur_ref}", file=sys.stderr) + print(f"Generating {options['graph_attr']} @ {cur_rev}", file=sys.stderr) parameters: List[Any[str, Parameters]] = options.pop("parameters") if not parameters: @@ -418,33 +418,33 @@ def show_taskgraph(options): del sys.modules[mod] if options["diff"] == "default": - base_ref = repo.base_ref + base_rev = repo.base_rev else: - base_ref = options["diff"] + base_rev = options["diff"] try: - repo.update(base_ref) - base_ref = repo.head_ref[:12] + repo.update(base_rev) + base_rev = repo.head_rev[:12] options["output_file"] = os.path.join( - diffdir, f"{options['graph_attr']}_{base_ref}" + diffdir, f"{options['graph_attr']}_{base_rev}" ) - print(f"Generating {options['graph_attr']} @ {base_ref}", file=sys.stderr) + print(f"Generating {options['graph_attr']} @ {base_rev}", file=sys.stderr) generate_taskgraph(options, parameters, logdir) finally: - repo.update(cur_ref) + repo.update(cur_rev) # Generate diff(s) diffcmd = [ "diff", "-U20", "--report-identical-files", - f"--label={options['graph_attr']}@{base_ref}", - f"--label={options['graph_attr']}@{cur_ref}", + f"--label={options['graph_attr']}@{base_rev}", + f"--label={options['graph_attr']}@{cur_rev}", ] for spec in parameters: - base_path = os.path.join(diffdir, f"{options['graph_attr']}_{base_ref}") - cur_path = os.path.join(diffdir, f"{options['graph_attr']}_{cur_ref}") + base_path = os.path.join(diffdir, f"{options['graph_attr']}_{base_rev}") + cur_path = os.path.join(diffdir, f"{options['graph_attr']}_{cur_rev}") params_name = None if len(parameters) > 1: @@ -593,6 +593,15 @@ def image_digest(args): help='Type of repository, either "hg" or "git"', ) @argument("--base-repository", required=True, help='URL for "base" repository to clone') +@argument( + "--base-ref", default="", help='Reference of the revision in the "base" repository' +) +@argument( + "--base-rev", + default="", + help="Taskgraph decides what to do based on the revision range between " + "`--base-rev` and `--head-rev`. Value is determined automatically if not provided", +) @argument( "--head-repository", required=True, diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/morph.py b/third_party/python/taskcluster_taskgraph/taskgraph/morph.py index a28250c3772d..c48831778241 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/morph.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/morph.py @@ -33,6 +33,12 @@ here = os.path.abspath(os.path.dirname(__file__)) logger = logging.getLogger(__name__) MAX_ROUTES = 10 +registered_morphs = [] + + +def register_morph(func): + registered_morphs.append(func) + def amend_taskgraph(taskgraph, label_to_taskid, to_add): """Add the given tasks to the taskgraph, returning a new taskgraph""" @@ -156,6 +162,7 @@ def make_index_task(parent_task, taskgraph, label_to_taskid, parameters, graph_c return task, taskgraph, label_to_taskid +@register_morph def add_index_tasks(taskgraph, label_to_taskid, parameters, graph_config): """ The TaskCluster queue only allows 10 routes on a task, but we have tasks @@ -196,8 +203,9 @@ def _get_morph_url(): return f"{taskgraph_repo}/raw-file/{taskgraph_rev}/src/taskgraph/morph.py" +@register_morph def add_code_review_task(taskgraph, label_to_taskid, parameters, graph_config): - logger.debug("Morphing: adding index tasks") + logger.debug("Morphing: adding code review task") review_config = parameters.get("code-review") if not review_config: @@ -256,12 +264,7 @@ def add_code_review_task(taskgraph, label_to_taskid, parameters, graph_config): def morph(taskgraph, label_to_taskid, parameters, graph_config): """Apply all morphs""" - morphs = [ - add_index_tasks, - add_code_review_task, - ] - - for m in morphs: + for m in registered_morphs: taskgraph, label_to_taskid = m( taskgraph, label_to_taskid, parameters, graph_config ) diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/optimize/strategies.py b/third_party/python/taskcluster_taskgraph/taskgraph/optimize/strategies.py index 2f6bb376dec7..c6846e60c556 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/optimize/strategies.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/optimize/strategies.py @@ -50,13 +50,8 @@ class IndexSearch(OptimizationStrategy): @register_strategy("skip-unless-changed") class SkipUnlessChanged(OptimizationStrategy): def should_remove_task(self, task, params, file_patterns): - if params.get("repository_type") != "hg": - raise RuntimeError( - "SkipUnlessChanged optimization only works with mercurial repositories" - ) - - # pushlog_id == -1 - this is the case when run from a cron.yml job - if params.get("pushlog_id") == -1: + # pushlog_id == -1 - this is the case when run from a cron.yml job or on a git repository + if params.get("repository_type") == "hg" and params.get("pushlog_id") == -1: return False changed = files_changed.check(params, file_patterns) diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/parameters.py b/third_party/python/taskcluster_taskgraph/taskgraph/parameters.py index 9ce3c0650be4..f6297e0f9478 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/parameters.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/parameters.py @@ -31,6 +31,8 @@ class ParameterMismatch(Exception): base_schema = Schema( { Required("base_repository"): str, + Required("base_ref"): str, + Required("base_rev"): str, Required("build_date"): int, Required("build_number"): int, Required("do_not_optimize"): [str], @@ -83,16 +85,19 @@ def _get_defaults(repo_root=None): repo_url = "" project = "" + default_base_ref = repo.default_branch return { "base_repository": repo_url, + "base_ref": default_base_ref, + "base_rev": repo.find_latest_common_revision(default_base_ref, repo.head_rev), "build_date": int(time.time()), "build_number": 1, "do_not_optimize": [], "existing_tasks": {}, "filters": ["target_tasks_method"], - "head_ref": repo.head_ref, + "head_ref": repo.branch or repo.head_rev, "head_repository": repo_url, - "head_rev": repo.head_ref, + "head_rev": repo.head_rev, "head_tag": "", "level": "3", "moz_build_date": datetime.now().strftime("%Y%m%d%H%M%S"), diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/run-task/run-task b/third_party/python/taskcluster_taskgraph/taskgraph/run-task/run-task index e847bc1352a2..6f54a81f72cc 100755 --- a/third_party/python/taskcluster_taskgraph/taskgraph/run-task/run-task +++ b/third_party/python/taskcluster_taskgraph/taskgraph/run-task/run-task @@ -106,6 +106,13 @@ IS_MACOSX = sys.platform == 'darwin' IS_POSIX = os.name == 'posix' IS_WINDOWS = os.name == 'nt' +# Both mercurial and git use sha1 as revision idenfiers. Luckily, both define +# the same value as the null revision. +# +# https://github.com/git/git/blob/dc04167d378fb29d30e1647ff6ff51dd182bc9a3/t/oid-info/hash-info#L7 +# https://www.mercurial-scm.org/repo/hg-stable/file/82efc31bd152/mercurial/node.py#l30 +NULL_REVISION = "0000000000000000000000000000000000000000" + def print_line(prefix, m): now = datetime.datetime.utcnow().isoformat().encode('utf-8') @@ -557,6 +564,8 @@ def git_checkout( destination_path: str, head_repo: str, base_repo: Optional[str], + base_ref: Optional[str], + base_rev: Optional[str], ref: Optional[str], commit: Optional[str], ssh_key_file: Optional[Path], @@ -591,6 +600,42 @@ def git_checkout( retry_required_command(b'vcs', args, extra_env=env) + if base_ref: + args = [ + 'git', + 'fetch', + 'origin', + base_ref + ] + + retry_required_command(b'vcs', args, cwd=destination_path, extra_env=env) + + # Create local branch so that taskgraph is able to compute differences + # between the head branch and the base one, if needed + args = [ + 'git', + 'checkout', + base_ref + ] + + retry_required_command(b'vcs', args, cwd=destination_path, extra_env=env) + + # When commits are force-pushed (like on a testing branch), base_rev doesn't + # exist on base_ref. Fetching it allows taskgraph to compute differences + # between the previous state before the force-push and the current state. + # + # Unlike base_ref just above, there is no need to checkout the revision: + # it's immediately avaiable after the fetch. + if base_rev and base_rev != NULL_REVISION: + args = [ + 'git', + 'fetch', + 'origin', + base_rev + ] + + retry_required_command(b'vcs', args, cwd=destination_path, extra_env=env) + # If a ref isn't provided, we fetch all refs from head_repo, which may be slow args = [ 'git', @@ -606,11 +651,31 @@ def git_checkout( 'git', 'checkout', '-f', - commit if commit else ref ] + if ref: + args.extend(['-B', ref]) + args.append(commit if commit else ref) + run_required_command(b'vcs', args, cwd=destination_path) + if os.path.exists(os.path.join(destination_path, '.gitmodules')): + args = [ + 'git', + 'submodule', + 'init', + ] + + run_required_command(b'vcs', args, cwd=destination_path) + + args = [ + 'git', + 'submodule', + 'update', + ] + + run_required_command(b'vcs', args, cwd=destination_path) + _clean_git_checkout(destination_path) args = [ @@ -818,6 +883,8 @@ def collect_vcs_options(args, project, name): repo_type = os.environ.get('%s_REPOSITORY_TYPE' % env_prefix) base_repo = os.environ.get('%s_BASE_REPOSITORY' % env_prefix) + base_ref = os.environ.get('%s_BASE_REF' % env_prefix) + base_rev = os.environ.get('%s_BASE_REV' % env_prefix) head_repo = os.environ.get('%s_HEAD_REPOSITORY' % env_prefix) revision = os.environ.get('%s_HEAD_REV' % env_prefix) ref = os.environ.get('%s_HEAD_REF' % env_prefix) @@ -849,6 +916,8 @@ def collect_vcs_options(args, project, name): 'checkout': checkout, 'sparse-profile': sparse_profile, 'base-repo': base_repo, + 'base-ref': base_ref, + 'base-rev': base_rev, 'head-repo': head_repo, 'revision': revision, 'ref': ref, @@ -896,6 +965,8 @@ def vcs_checkout_from_args(options, *, hgmo_fingerprint): options['checkout'], options['head-repo'], options['base-repo'], + options['base-ref'], + options['base-rev'], ref, revision, ssh_key_file, diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/base.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/base.py index 8fe147cafac9..383e6a47981e 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/base.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/base.py @@ -46,9 +46,9 @@ class TransformConfig: # the parameters for this task-graph generation run params = attr.ib(type=Parameters) - # a list of all the tasks associated with the kind dependencies of the + # a dict of all the tasks associated with the kind dependencies of the # current kind - kind_dependencies_tasks = attr.ib() + kind_dependencies_tasks = attr.ib(type=dict) # Global configuration of the taskgraph graph_config = attr.ib(type=GraphConfig) diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/cached_tasks.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/cached_tasks.py index 2e8d052b6931..57a55dffb340 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/cached_tasks.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/cached_tasks.py @@ -57,7 +57,7 @@ def cache_task(config, tasks): return digests = {} - for task in config.kind_dependencies_tasks: + for task in config.kind_dependencies_tasks.values(): if "cached_task" in task.attributes: digests[task.label] = format_task_digest(task.attributes["cached_task"]) diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/code_review.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/code_review.py index 2260f8efea08..bdb655b97d57 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/code_review.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/code_review.py @@ -17,7 +17,7 @@ def add_dependencies(config, jobs): job.setdefault("soft-dependencies", []) job["soft-dependencies"] += [ dep_task.label - for dep_task in config.kind_dependencies_tasks + for dep_task in config.kind_dependencies_tasks.values() if dep_task.attributes.get("code-review") is True ] yield job diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/docker_image.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/docker_image.py index fa4d27b21455..dd7c01e5a999 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/docker_image.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/docker_image.py @@ -67,7 +67,7 @@ transforms.add_validate(docker_image_schema) @transforms.add def fill_template(config, tasks): available_packages = set() - for task in config.kind_dependencies_tasks: + for task in config.kind_dependencies_tasks.values(): if task.kind != "packages": continue name = task.label.replace("packages-", "") diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/__init__.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/__init__.py index 534721bf0e40..01c482937968 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/__init__.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/__init__.py @@ -211,7 +211,7 @@ def use_fetches(config, jobs): if value: aliases[f"{config.kind}-{value}"] = label - for task in config.kind_dependencies_tasks: + for task in config.kind_dependencies_tasks.values(): if task.kind in ("fetch", "toolchain"): get_attribute( artifact_names, @@ -275,8 +275,8 @@ def use_fetches(config, jobs): else: dep_tasks = [ task - for task in config.kind_dependencies_tasks - if task.label == dep_label + for label, task in config.kind_dependencies_tasks.items() + if label == dep_label ] if len(dep_tasks) != 1: raise Exception( diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/vcs.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/vcs.py index 35b1ee4be71a..31703cd86cc8 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/util/vcs.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/vcs.py @@ -4,6 +4,7 @@ import os +import re import subprocess from abc import ABC, abstractmethod, abstractproperty from shutil import which @@ -17,36 +18,59 @@ PUSHLOG_TMPL = "{}/json-pushes?version=2&changeset={}&tipsonly=1&full=1" class Repository(ABC): + # Both mercurial and git use sha1 as revision idenfiers. Luckily, both define + # the same value as the null revision. + # + # https://github.com/git/git/blob/dc04167d378fb29d30e1647ff6ff51dd182bc9a3/t/oid-info/hash-info#L7 + # https://www.mercurial-scm.org/repo/hg-stable/file/82efc31bd152/mercurial/node.py#l30 + NULL_REVISION = "0000000000000000000000000000000000000000" + def __init__(self, path): self.path = path self.binary = which(self.tool) if self.binary is None: raise OSError(f"{self.tool} not found!") + self._valid_diff_filter = ("m", "a", "d") self._env = os.environ.copy() def run(self, *args: str, **kwargs): + return_codes = kwargs.pop("return_codes", []) cmd = (self.binary,) + args - return subprocess.check_output( - cmd, cwd=self.path, env=self._env, encoding="utf-8", **kwargs - ) + + try: + return subprocess.check_output( + cmd, cwd=self.path, env=self._env, encoding="utf-8", **kwargs + ) + except subprocess.CalledProcessError as e: + if e.returncode in return_codes: + return "" + raise @abstractproperty def tool(self) -> str: """Version control system being used, either 'hg' or 'git'.""" @abstractproperty - def head_ref(self) -> str: + def head_rev(self) -> str: """Hash of HEAD revision.""" @abstractproperty - def base_ref(self): + def base_rev(self): """Hash of revision the current topic branch is based on.""" @abstractproperty def branch(self): """Current branch or bookmark the checkout has active.""" + @abstractproperty + def remote_name(self): + """Name of the remote repository.""" + + @abstractproperty + def default_branch(self): + """Name of the default branch.""" + @abstractmethod def get_url(self, remote=None): """Get URL of the upstream repository.""" @@ -55,6 +79,43 @@ class Repository(ABC): def get_commit_message(self, revision=None): """Commit message of specified revision or current commit.""" + @abstractmethod + def get_changed_files(self, diff_filter, mode="unstaged", rev=None, base_rev=None): + """Return a list of files that are changed in: + * either this repository's working copy, + * or at a given revision (``rev``) + * or between 2 revisions (``base_rev`` and ``rev``) + + ``diff_filter`` controls which kinds of modifications are returned. + It is a string which may only contain the following characters: + + A - Include files that were added + D - Include files that were deleted + M - Include files that were modified + + By default, all three will be included. + + ``mode`` can be one of 'unstaged', 'staged' or 'all'. Only has an + effect on git. Defaults to 'unstaged'. + + ``rev`` is a specifier for which changesets to consider for + changes. The exact meaning depends on the vcs system being used. + + ``base_rev`` specifies the range of changesets. This parameter cannot + be used without ``rev``. The range includes ``rev`` but excludes + ``base_rev``. + """ + + @abstractmethod + def get_outgoing_files(self, diff_filter, upstream): + """Return a list of changed files compared to upstream. + + ``diff_filter`` works the same as `get_changed_files`. + ``upstream`` is a remote ref to compare against. If unspecified, + this will be determined automatically. If there is no remote ref, + a MissingUpstreamRepo exception will be raised. + """ + @abstractmethod def working_directory_clean(self, untracked=False, ignored=False): """Determine if the working directory is free of modifications. @@ -71,6 +132,18 @@ class Repository(ABC): def update(self, ref): """Update the working directory to the specified reference.""" + @abstractmethod + def find_latest_common_revision(self, base_ref_or_rev, head_rev): + """Find the latest revision that is common to both the given + ``head_rev`` and ``base_ref_or_rev``""" + + @abstractmethod + def does_revision_exist_locally(self, revision): + """Check whether this revision exists in the local repository. + + If this function returns an unexpected value, then make sure + the revision was fetched from the remote repository.""" + class HgRepository(Repository): tool = "hg" @@ -80,11 +153,11 @@ class HgRepository(Repository): self._env["HGPLAIN"] = "1" @property - def head_ref(self): + def head_rev(self): return self.run("log", "-r", ".", "-T", "{node}").strip() @property - def base_ref(self): + def base_rev(self): return self.run("log", "-r", "last(ancestors(.) and public())", "-T", "{node}") @property @@ -97,13 +170,85 @@ class HgRepository(Repository): return None + @property + def remote_name(self): + remotes = self.run("paths", "--quiet").splitlines() + if len(remotes) == 1: + return remotes[0] + + if "default" in remotes: + return "default" + + raise RuntimeError( + f"Cannot determine remote repository name. Candidate remotes: {remotes}" + ) + + @property + def default_branch(self): + # Mercurial recommends keeping "default" + # https://www.mercurial-scm.org/wiki/StandardBranching#Don.27t_use_a_name_other_than_default_for_your_main_development_branch + return "default" + def get_url(self, remote="default"): return self.run("path", "-T", "{url}", remote).strip() def get_commit_message(self, revision=None): - revision = revision or self.head_ref + revision = revision or self.head_rev return self.run("log", "-r", ".", "-T", "{desc}") + def _format_diff_filter(self, diff_filter, for_status=False): + df = diff_filter.lower() + assert all(f in self._valid_diff_filter for f in df) + + # When looking at the changes in the working directory, the hg status + # command uses 'd' for files that have been deleted with a non-hg + # command, and 'r' for files that have been `hg rm`ed. Use both. + return df.replace("d", "dr") if for_status else df + + def _files_template(self, diff_filter): + template = "" + df = self._format_diff_filter(diff_filter) + if "a" in df: + template += "{file_adds % '{file}\\n'}" + if "d" in df: + template += "{file_dels % '{file}\\n'}" + if "m" in df: + template += "{file_mods % '{file}\\n'}" + return template + + def get_changed_files( + self, diff_filter="ADM", mode="unstaged", rev=None, base_rev=None + ): + if rev is None: + if base_rev is not None: + raise ValueError("Cannot specify `base_rev` without `rev`") + # Use --no-status to print just the filename. + df = self._format_diff_filter(diff_filter, for_status=True) + return self.run("status", "--no-status", f"-{df}").splitlines() + else: + template = self._files_template(diff_filter) + revision_argument = rev if base_rev is None else f"{base_rev}~-1::{rev}" + return self.run("log", "-r", revision_argument, "-T", template).splitlines() + + def get_outgoing_files(self, diff_filter="ADM", upstream=None): + template = self._files_template(diff_filter) + + if not upstream: + return self.run( + "log", "-r", "draft() and ancestors(.)", "--template", template + ).split() + + return self.run( + "outgoing", + "-r", + ".", + "--quiet", + "--template", + template, + upstream, + return_codes=(1,), + ).split() + def working_directory_clean(self, untracked=False, ignored=False): args = ["status", "--modified", "--added", "--removed", "--deleted"] if untracked: @@ -118,34 +263,173 @@ class HgRepository(Repository): def update(self, ref): return self.run("update", "--check", ref) + def find_latest_common_revision(self, base_ref_or_rev, head_rev): + return self.run( + "log", + "-r", + f"last(ancestors('{base_ref_or_rev}') and ancestors('{head_rev}'))", + "--template", + "{node}", + ).strip() + + def does_revision_exist_locally(self, revision): + try: + return self.run("log", "-r", revision).strip() != "" + except subprocess.CalledProcessError as e: + # Error code 255 comes with the message: + # "abort: unknown revision $REVISION" + if e.returncode == 255: + return False + raise + class GitRepository(Repository): tool = "git" + _LS_REMOTE_PATTERN = re.compile(r"ref:\s+refs/heads/(?P\S+)\s+HEAD") + @property - def head_ref(self): + def head_rev(self): return self.run("rev-parse", "--verify", "HEAD").strip() @property - def base_ref(self): + def base_rev(self): refs = self.run( "rev-list", "HEAD", "--topo-order", "--boundary", "--not", "--remotes" ).splitlines() if refs: return refs[-1][1:] # boundary starts with a prefix `-` - return self.head_ref + return self.head_rev @property def branch(self): return self.run("branch", "--show-current").strip() or None + @property + def remote_name(self): + try: + remote_branch_name = self.run( + "rev-parse", "--verify", "--abbrev-ref", "--symbolic-full-name", "@{u}" + ).strip() + return remote_branch_name.split("/")[0] + except subprocess.CalledProcessError as e: + # Error code 128 comes with the message: + # "fatal: no upstream configured for branch $BRANCH" + if e.returncode != 128: + raise + + remotes = self.run("remote").splitlines() + if len(remotes) == 1: + return remotes[0] + + if "origin" in remotes: + return "origin" + + raise RuntimeError( + f"Cannot determine remote repository name. Candidate remotes: {remotes}" + ) + + @property + def default_branch(self): + try: + # this one works if the current repo was cloned from an existing + # repo elsewhere + return self._get_default_branch_from_cloned_metadata() + except (subprocess.CalledProcessError, RuntimeError): + pass + + try: + # This call works if you have (network) access to the repo + return self._get_default_branch_from_remote_query() + except (subprocess.CalledProcessError, RuntimeError): + pass + + # this one is the last resort in case the remote is not accessible and + # the local repo is where `git init` was made + return self._guess_default_branch() + + def _get_default_branch_from_remote_query(self): + # This function requires network access to the repo + output = self.run("ls-remote", "--symref", self.remote_name, "HEAD") + matches = self._LS_REMOTE_PATTERN.search(output) + if not matches: + raise RuntimeError( + f'Could not find the default branch of remote repository "{self.remote_name}". ' + "Got: {output}" + ) + + return matches.group("branch_name") + + def _get_default_branch_from_cloned_metadata(self): + output = self.run( + "rev-parse", "--abbrev-ref", f"{self.remote_name}/HEAD" + ).strip() + return "/".join(output.split("/")[1:]) + + def _guess_default_branch(self): + branches = [ + candidate_branch + for line in self.run( + "branch", "--all", "--no-color", "--format=%(refname:short)" + ).splitlines() + for candidate_branch in ("main", "master") + if candidate_branch == line.strip() + ] + + if branches: + return branches[0] + + raise RuntimeError(f"Unable to find default branch. Got: {branches}") + def get_url(self, remote="origin"): return self.run("remote", "get-url", remote).strip() def get_commit_message(self, revision=None): - revision = revision or self.head_ref + revision = revision or self.head_rev return self.run("log", "-n1", "--format=%B") + def get_changed_files( + self, diff_filter="ADM", mode="unstaged", rev=None, base_rev=None + ): + assert all(f.lower() in self._valid_diff_filter for f in diff_filter) + + if rev is None: + if base_rev is not None: + raise ValueError("Cannot specify `base_rev` without `rev`") + cmd = ["diff"] + if mode == "staged": + cmd.append("--cached") + elif mode == "all": + cmd.append("HEAD") + else: + revision_argument = ( + f"{rev}~1..{rev}" if base_rev is None else f"{base_rev}..{rev}" + ) + cmd = ["log", "--format=format:", revision_argument] + + cmd.append("--name-only") + cmd.append("--diff-filter=" + diff_filter.upper()) + + files = self.run(*cmd).splitlines() + return [f for f in files if f] + + def get_outgoing_files(self, diff_filter="ADM", upstream=None): + assert all(f.lower() in self._valid_diff_filter for f in diff_filter) + + not_condition = upstream if upstream else "--remotes" + + files = self.run( + "log", + "--name-only", + f"--diff-filter={diff_filter.upper()}", + "--oneline", + "--pretty=format:", + "HEAD", + "--not", + not_condition, + ).splitlines() + return [f for f in files if f] + def working_directory_clean(self, untracked=False, ignored=False): args = ["status", "--porcelain"] @@ -167,6 +451,19 @@ class GitRepository(Repository): def update(self, ref): self.run("checkout", ref) + def find_latest_common_revision(self, base_ref_or_rev, head_rev): + return self.run("merge-base", base_ref_or_rev, head_rev).strip() + + def does_revision_exist_locally(self, revision): + try: + return self.run("cat-file", "-t", revision).strip() == "commit" + except subprocess.CalledProcessError as e: + # Error code 128 comes with the message: + # "git cat-file: could not get object info" + if e.returncode == 128: + return False + raise + def get_repository(path): """Get a repository object for the repository at `path`. diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/verify.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/verify.py index 2324efcfc983..5911914f135a 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/util/verify.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/verify.py @@ -13,6 +13,7 @@ from taskgraph.config import GraphConfig from taskgraph.parameters import Parameters from taskgraph.taskgraph import TaskGraph from taskgraph.util.attributes import match_run_on_projects +from taskgraph.util.treeherder import join_symbol logger = logging.getLogger(__name__) @@ -131,15 +132,26 @@ def verify_task_graph_symbol(task, taskgraph, scratch_pad, graph_config, paramet treeherder = extra["treeherder"] collection_keys = tuple(sorted(treeherder.get("collection", {}).keys())) + if len(collection_keys) != 1: + raise Exception( + "Task {} can't be in multiple treeherder collections " + "(the part of the platform after `/`): {}".format( + task.label, collection_keys + ) + ) platform = treeherder.get("machine", {}).get("platform") group_symbol = treeherder.get("groupSymbol") symbol = treeherder.get("symbol") - key = (collection_keys, platform, group_symbol, symbol) + key = (platform, collection_keys[0], group_symbol, symbol) if key in scratch_pad: raise Exception( - "conflict between `{}`:`{}` for values `{}`".format( - task.label, scratch_pad[key], key + "Duplicate treeherder platform and symbol in tasks " + "`{}`and `{}`: {} {}".format( + task.label, + scratch_pad[key], + f"{platform}/{collection_keys[0]}", + join_symbol(group_symbol, symbol), ) ) else: