Bug 1733950 - Vendor 'taskcluster-taskgraph==1.0.1' into mozilla-central, r=taskgraph-reviewers,aki

This will slowly start being used by 'taskcluster/gecko_taskgraph'.

Differential Revision: https://phabricator.services.mozilla.com/D127872
This commit is contained in:
Andrew Halberstadt 2021-10-08 04:05:53 +00:00
parent d72ad5288b
commit a7a0fc9117
72 changed files with 12719 additions and 4 deletions

View File

@ -107,6 +107,7 @@ vendored:third_party/python/sentry_sdk
vendored:third_party/python/six
vendored:third_party/python/slugid
vendored:third_party/python/taskcluster
vendored:third_party/python/taskcluster_taskgraph
vendored:third_party/python/taskcluster_urls
vendored:third_party/python/typing_extensions
vendored:third_party/python/urllib3

View File

@ -34,6 +34,7 @@ sentry-sdk==0.14.3
six==1.13.0
slugid==2.0.0
taskcluster==44.2.2
taskcluster-taskgraph==1.0.1
taskcluster-urls==13.0.1
voluptuous==0.12.1
yamllint==1.23

View File

@ -43,6 +43,7 @@ appdirs==1.4.4 \
# via
# -r requirements-mach-vendor-python.in
# glean-parser
# taskcluster-taskgraph
async-timeout==3.0.1 \
--hash=sha256:0c3c816a028d47f659d6ff5c745cb2acf1f966da1fe5c19c77a70282b25f4c5f \
--hash=sha256:4291ca197d287d274d0b6cb5d6f8f8f82d434ed288f962539ff18cc9012f9ea3
@ -57,6 +58,7 @@ attrs==19.1.0 \
# aiohttp
# jsonschema
# mozilla-version
# taskcluster-taskgraph
blessings==1.7 \
--hash=sha256:98e5854d805f50a5b58ac2333411b0482516a8210f23f43308baeb58d77c157d \
--hash=sha256:b1fdd7e7a675295630f9ae71527a8ebc10bfefa236b3d6aa4932ee4462c17ba3 \
@ -187,7 +189,9 @@ jsmin==2.1.0 \
# via -r requirements-mach-vendor-python.in
json-e==2.7.0 \
--hash=sha256:d8c1ec3f5bbc7728c3a504ebe58829f283c64eca230871e4eefe974b4cdaae4a
# via -r requirements-mach-vendor-python.in
# via
# -r requirements-mach-vendor-python.in
# taskcluster-taskgraph
jsonschema==3.2.0 \
--hash=sha256:4e5b3cf8216f577bee9ce139cbe72eca3ea4f292ec60928ff24758ce626cd163 \
--hash=sha256:c8a85b28d377cc7737e46e2d9f2b4f44ee3c0e1deac6bf46ddefc7187d30797a
@ -355,15 +359,20 @@ pyyaml==5.4.1 \
# via
# -r requirements-mach-vendor-python.in
# glean-parser
# taskcluster-taskgraph
# yamllint
redo==2.0.3 \
--hash=sha256:36784bf8ae766e14f9db0e377ccfa02835d648321d2007b6ae0bf4fd612c0f94 \
--hash=sha256:71161cb0e928d824092a5f16203939bbc0867ce4c4685db263cf22c3ae7634a8
# via -r requirements-mach-vendor-python.in
# via
# -r requirements-mach-vendor-python.in
# taskcluster-taskgraph
requests-unixsocket==0.2.0 \
--hash=sha256:014d07bfb66dc805a011a8b4b306cf4ec96d2eddb589f6b2b5765e626f0dc0cc \
--hash=sha256:9e5c1a20afc3cf786197ae59c79bcdb0e7565f218f27df5f891307ee8817c1ea
# via -r requirements-mach-vendor-python.in
# via
# -r requirements-mach-vendor-python.in
# taskcluster-taskgraph
requests==2.25.1 \
--hash=sha256:27973dd4a904a4f13b263a19c866c13b92a39ed1c964655f025f3f8d3d75b804 \
--hash=sha256:c210084e36a42ae6b9219e00e48287def368a26d03a048ddad7bfee44f75871e
@ -372,6 +381,7 @@ requests==2.25.1 \
# requests-unixsocket
# responses
# taskcluster
# taskcluster-taskgraph
responses==0.10.6 \
--hash=sha256:502d9c0c8008439cfcdef7e251f507fcfdd503b56e8c0c87c3c3e3393953f790 \
--hash=sha256:97193c0183d63fba8cd3a041c75464e4b09ea0aff6328800d1546598567dde0b
@ -403,6 +413,11 @@ slugid==2.0.0 \
# via
# -r requirements-mach-vendor-python.in
# taskcluster
# taskcluster-taskgraph
taskcluster-taskgraph==1.0.1 \
--hash=sha256:99376afaee4ca52ea2b98eee6467409aae86fa9c1bd300d028e85032f961edef \
--hash=sha256:9ee1ba6063ad6c367cf67f80904faf872b722b63843b6c8d36525e75a03b9588
# via -r requirements-mach-vendor-python.in
taskcluster-urls==13.0.1 \
--hash=sha256:5e25e7e6818e8877178b175ff43d2e6548afad72694aa125f404a7329ece0973 \
--hash=sha256:b25e122ecec249c4299ac7b20b08db76e3e2025bdaeb699a9d444556de5fd367 \
@ -410,6 +425,7 @@ taskcluster-urls==13.0.1 \
# via
# -r requirements-mach-vendor-python.in
# taskcluster
# taskcluster-taskgraph
taskcluster==44.2.2 \
--hash=sha256:0266a6a901e1a2ec838984a7f24e7adb6d58f9f2e221a7f613388f8f23f786fc \
--hash=sha256:846d73c597f0f47dd8525c85c8d9bc41111d5200b090690d3f16b2f57c56a2e1 \
@ -433,7 +449,9 @@ urllib3==1.25.9 \
voluptuous==0.12.1 \
--hash=sha256:663572419281ddfaf4b4197fd4942d181630120fb39b333e3adad70aeb56444b \
--hash=sha256:8ace33fcf9e6b1f59406bfaf6b8ec7bcc44266a9f29080b4deb4fe6ff2492386
# via -r requirements-mach-vendor-python.in
# via
# -r requirements-mach-vendor-python.in
# taskcluster-taskgraph
yamllint==1.23 \
--hash=sha256:0fa69bf8a86182b7fe14918bdd3a30354c869966bbc7cbfff176af71bda9c806 \
--hash=sha256:59f3ff77f44e7f46be6aecdb985830f73a1c51e290b7082a7d38c2ae1940f4a9

View File

@ -0,0 +1,27 @@
Metadata-Version: 2.1
Name: taskcluster-taskgraph
Version: 1.0.1
Summary: Build taskcluster taskgraphs
Home-page: https://hg.mozilla.org/ci/taskgraph
Author: UNKNOWN
Author-email: UNKNOWN
License: UNKNOWN
Platform: UNKNOWN
Classifier: Programming Language :: Python :: 3.6
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Requires-Dist: appdirs (>=1.4)
Requires-Dist: attrs (>=19.1.0)
Requires-Dist: json-e (>=2.7)
Requires-Dist: PyYAML (>=5.4)
Requires-Dist: redo (>=2.0)
Requires-Dist: requests (>=2.25)
Requires-Dist: requests-unixsocket (>=0.2)
Requires-Dist: slugid (>=2.0)
Requires-Dist: taskcluster-urls (>=11.0)
Requires-Dist: voluptuous (>=0.12.1)
UNKNOWN

View File

@ -0,0 +1,100 @@
taskgraph/__init__.py,sha256=jwOtU7TkmU317LP_IsgIswpj2T1OPUXXgMRv4sIU7nE,707
taskgraph/config.py,sha256=moiD9PJPXQS6c0Bxy9GpKEM3K2dMb9pmDVp6Jt2Oywk,4678
taskgraph/create.py,sha256=VeAYxtLK8f5HufgUSjzDRP7WBSwQza--_O7RzRRhzA4,5190
taskgraph/decision.py,sha256=hfs6UQtgyfJLy0blDLUkLFh2yAz_37ZNcnYW5OHUYtg,9346
taskgraph/docker.py,sha256=HlCEtfW2coUAyGce8ToVfn8OdLfQJT7Vq_mYQntO4gQ,7485
taskgraph/files_changed.py,sha256=30ONF-m1hbGQnQ31E-TCsHG1jhdNjLTOolSXu2jzOCs,2159
taskgraph/filter_tasks.py,sha256=ty4SK0ujcZ8824F4ikwdCaGX1TRSq_90TFArDogJHTo,875
taskgraph/generator.py,sha256=Axwun0FeuEXKZpCKD3F4HHCk5K3STeYrR2gr15DY9DM,14549
taskgraph/graph.py,sha256=x5WnB9a-1OoY1BETREq1rPll9w1coruGHHlXYUA-daE,4535
taskgraph/main.py,sha256=-i-80OYYUroGlQsSpw5Vn62sJgQ_F_6aBMYuoBN8Cas,22449
taskgraph/morph.py,sha256=EH_kP5FAMb0F8Oap4VZeVpMNXGr0QhDVz_lxiY-ksgA,9562
taskgraph/optimize.py,sha256=dxeS3BVaRpPrhC0oK4vmZpcSfdMQTeKiWqvb_Lnx1dM,12518
taskgraph/parameters.py,sha256=fnKAhpIehAudhm8KbbKxIJinyGljalfokiicUFTwMHc,9790
taskgraph/target_tasks.py,sha256=mspItlKD-HCuQR1x_UD6HT_Qd1v5kEHvgRWIUbweRDg,3166
taskgraph/task.py,sha256=OiYCsYhVukqLdQR2olQSG-l7oP2Z_2XaIZxovBPqxCc,2733
taskgraph/taskgraph.py,sha256=Sp6Z68pMGmGVpmaRqUNMRRlK9X7YaGd8lyUGDxOxd3M,2276
taskgraph/actions/__init__.py,sha256=lVP1e0YyELg7-_42MWWDbT0cKv_p53BApVE6vWOiPww,416
taskgraph/actions/add_new_jobs.py,sha256=mX_DFDJaQUHetjyMNi5b8zPCCeqfzDrCjDg5DxTaA-I,1831
taskgraph/actions/cancel.py,sha256=vrCVtbkpYTCyW9s9IHCHYI18yuRs1C8g5x8DNRaBnW8,1307
taskgraph/actions/cancel_all.py,sha256=aa8rcM5-Wee8UcDsGAAsfc3AvwBbnM-ac19Lb-G_tXs,1945
taskgraph/actions/registry.py,sha256=YLz8LeDiy8Lugq3K4M_VozG2Y_dJjY8IhdxaPsu4t_E,13233
taskgraph/actions/retrigger.py,sha256=TAhq1yDqkGz0z4Di40WT79RhFhNJdhQajHHg1lDUKSQ,9385
taskgraph/actions/util.py,sha256=OjajY7GqT7K3kEM_fVT9rhVG8vKLh93dNshIvUsRqgs,10660
taskgraph/loader/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
taskgraph/loader/transform.py,sha256=6DNQ5xIp4RFPJobCzDhisCzarH92YVoGFZOqLbJAFqY,2086
taskgraph/run-task/fetch-content,sha256=auxWFlOG8lSSIKRJZsia7irmA45AoyPiOOK2tdanq0E,23771
taskgraph/run-task/hgrc,sha256=BybWLDR89bWi3pE5T05UqmDHs02CbLypE-omLZWU6Uk,896
taskgraph/run-task/robustcheckout.py,sha256=P41ZGrec5aa8hVTEqOkKJ9wTygsgaXtjoQnl7hkfW-k,28978
taskgraph/run-task/run-task,sha256=AY-hEgsz-vxTPdkqhnek840z67thIOQP4ucL6Rxmpfk,38667
taskgraph/test/__init__.py,sha256=7LTScvkVcLPqivjL-wCb_Znk2GGajaJi1fJ4SjcLMoA,289
taskgraph/test/automationrelevance.json,sha256=ttGejNZeVhcTWXFdoU0xME0YEYRYvaYcAr8zBz0ewrw,17980
taskgraph/test/conftest.py,sha256=MiCe5gyqGDIARdzauK4wgB4jlyJzhfwiAElBxfTYtlg,3462
taskgraph/test/mockedopen.py,sha256=Ccr2qGJSLeWIUYd03Poy8eKKRSW2aTpRGI-0AA7xYrw,4055
taskgraph/test/test_create.py,sha256=oY7DeVW8usjVNe-QPPyTrE3hsvwMo9HvMxPSCllOsMQ,3657
taskgraph/test/test_decision.py,sha256=dxlMnRtpKZrOqSZo-znPLw6TwlFJOB9Yp0sadEnJdaQ,2658
taskgraph/test/test_files_changed.py,sha256=MoHr_M-qtbi9PbKQ9loDlKOOzecUUyE1N0SgxjXfP5Y,2608
taskgraph/test/test_generator.py,sha256=6-ZpG45F3YlTWTCILrktz7LeSs3tEfQWCzekN6OHYHw,4284
taskgraph/test/test_graph.py,sha256=IEly2SS9NZTN3F0AM4VhxxYx0WTj5u7x2UFyojr1Ddg,7064
taskgraph/test/test_main.py,sha256=zUc6GiYUzVQJxjGYJdHKOUjuSf7KDhXrYPhGnPQ5F2A,1703
taskgraph/test/test_morph.py,sha256=XJVuhNQkGxhd3yCOs2AmsZgOFH_j_-V50wxpUpm1V-4,2195
taskgraph/test/test_optimize.py,sha256=-ij-8enLi66ed0mJzb4K_7GXcWWq-s56Yx50c2pDLv0,9124
taskgraph/test/test_parameters.py,sha256=HfL6C-XacHDbPIcMarSH_E2dIBj6ey2LB-p6V1d8mjk,5876
taskgraph/test/test_target_tasks.py,sha256=AzvuEw1NI4b_f14rPFkWuK0bJyxknkPIQi5KI6k2K8A,12046
taskgraph/test/test_taskgraph.py,sha256=KuBulN0QGHON3_ksVE3hhNyenk_QkjWjoGYghrB-VxU,3596
taskgraph/test/test_transforms_base.py,sha256=Vo9slzCB2GePvMoLmkrSdhYVWh2nQYn5bRxMjsx40Mw,873
taskgraph/test/test_transforms_job.py,sha256=pHO_Ea32b3OjCSIYhRHK3CqHsugJizD43_a3jMFtG_A,4490
taskgraph/test/test_util_attributes.py,sha256=K_Wro-p5oA-S4yrsLqT8HMBlOAN4L0krQQQ82WYgGAQ,3596
taskgraph/test/test_util_docker.py,sha256=j1eIYDcL8cch9OjCH-bQrK9t9ljPVZeTpaQNdKJ2nq8,8803
taskgraph/test/test_util_memoize.py,sha256=yq-PTegHBdQZkRm6Iv1NdCRcKCVzsb4pmZPvIz0P6j8,2340
taskgraph/test/test_util_parameterization.py,sha256=rXZhIDDAGyZH5p55AhNzjuYdkekAN6HftRRN41YcIOg,7948
taskgraph/test/test_util_path.py,sha256=icJyH1DyMNVuZ5xfPXvrEQwQ0pQYTg4ORlZq3RK6_V8,5906
taskgraph/test/test_util_python_path.py,sha256=VQo4hwsJ0It-jLIqe4nErPmZn9AQ7rliN25h2oO_zMg,1216
taskgraph/test/test_util_readonlydict.py,sha256=KRgjLvSBsZZj4EUhwcqeUsM1T--iGklVE0QJuC6Xv4o,1234
taskgraph/test/test_util_schema.py,sha256=6lLB-ToEyEt6OC6qQFPZ_yKcNAdv5hyApK-skA4B-KA,5683
taskgraph/test/test_util_taskcluster.py,sha256=yRF_wUpPjYDe6lrrDhTs6HONead97AMNEEBC2slgwug,481
taskgraph/test/test_util_templates.py,sha256=u3ckrzmx1eyk7vXiqRiQlETtVZvSx4FHEXN5xB4GZDQ,1676
taskgraph/test/test_util_time.py,sha256=SG4WmSupTQiL1UhE2UMsMNZEugfPdoaxO4aWSxLGXBM,1803
taskgraph/test/test_util_treeherder.py,sha256=20zzGcMd0BL0ayTFQj6Etj39afdxZPgtZxSGUZ0iL5M,912
taskgraph/test/test_util_vcs.py,sha256=qcmqkgxjIlP164BAgpqlwC1Itk468FnKpZvvftmn0Vk,5699
taskgraph/test/test_util_yaml.py,sha256=zymZxaAZBIBn5u-p91QsA---IqCH_CVVk3YqMoshLlQ,1019
taskgraph/transforms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
taskgraph/transforms/base.py,sha256=oIQdBKrHG_FZXAoRyiazxgLa8zbAI4TEVlduqz8H12I,5277
taskgraph/transforms/cached_tasks.py,sha256=fMCbxqA-HPSMG6mILYCfdIpnUg9uSKlQGKkUcWG9w28,2597
taskgraph/transforms/code_review.py,sha256=du7npIGOpVMK9QZtdkHrkNecRiaqd-fHjHavDVkfL58,698
taskgraph/transforms/docker_image.py,sha256=14FPWvQ1cAOpSMX1hDV2vTQw3-E99DKI78GnDBIWCo8,7590
taskgraph/transforms/fetch.py,sha256=z-SAZTQSOcVRiFa2E8z0dAEhkIUhdOJdwQgdUah2LzA,9400
taskgraph/transforms/task.py,sha256=Db9dGB6-ppmH0EXPbOWnBmmBG4e1sodDQx2nBMicfxU,44650
taskgraph/transforms/job/__init__.py,sha256=imtb3MHVQbKtcCngSnvgumtBfOwxOPiRsJDwHKUtYn0,16891
taskgraph/transforms/job/common.py,sha256=onHnerPcmmvbSk0oHt8mvJmOo7AnjHQya0ombgMNLG8,7106
taskgraph/transforms/job/index_search.py,sha256=zPldmHSalHJjvULAMF9_QAeOZzIeWpr89kOVeP2IJAE,1220
taskgraph/transforms/job/run_task.py,sha256=GqR1ZPMnpoFi_d8HlzpomxB7nWIHctgthYZ_ve3jM6M,8871
taskgraph/transforms/job/toolchain.py,sha256=z2Z7sxI4yn_dI8zzcMWcrcmfTHeK6mgfSNSM6MAgrCU,4649
taskgraph/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
taskgraph/util/archive.py,sha256=Hcm8YHsCtazX7efDSd8vXm8Pw87Z1UP_Bo-gIUEzb_4,2856
taskgraph/util/attributes.py,sha256=zSaRws02rdF1TgvOoFzVNLg2XmwbtVVCTsp4M_qm3RI,2617
taskgraph/util/cached_tasks.py,sha256=lvPtfs9mpm0Wv7Mbajen0vUxuiRAP-ZPRNKVlMgCoXE,3408
taskgraph/util/decision.py,sha256=E2Vm1F--yB2iTIA3ePpMsxOk6Qw5zl9lEgs6BKlqZPI,2432
taskgraph/util/docker.py,sha256=kj9V58ZqE12qtNDeRZjz6mxmgoJzZp_eZTzHQoU5lVA,11676
taskgraph/util/hash.py,sha256=_59JUSZeuSu6fo4XjP36Ubs4vbQ5_4RBv61mcmau-t8,1560
taskgraph/util/keyed_by.py,sha256=cgBH4tG8eH5UUrm5q4ODG7A4fzkGAOI7feVoZy3V8Ho,3419
taskgraph/util/memoize.py,sha256=XDlwc-56gzoY8QTwOoiCOYL-igX7JoMcY-9Ih80Euc8,1331
taskgraph/util/parameterization.py,sha256=H3VLBBER63zvgD5yZxx7H542JZZiSPk2_--9-tbREVg,3181
taskgraph/util/path.py,sha256=s1Mt4MWjkI14QUQn817eALrP0yjzYScwuCiAV48NOKM,4359
taskgraph/util/python_path.py,sha256=93R0mADSe1MeTTOsrDWEjLTW6MVpf2COuf1jXbxuQOk,821
taskgraph/util/readonlydict.py,sha256=XzTG-gqGqWVlSkDxSyOL6Ur7Z0ONhIJ9DVLWV3q4q1w,787
taskgraph/util/schema.py,sha256=ZaxLYnqu9_GmlfuGfNtYN029ZocuxwcDuxGhN5M4-H4,6825
taskgraph/util/taskcluster.py,sha256=Xn4Oly9XStt4j0zVO5QmWb2SN9UF_tFRNyWEPeKOlrc,11042
taskgraph/util/taskgraph.py,sha256=OfaclpaZzVfxViMXaBIAYmSJlI-ZFsBudGVndgH7znw,1982
taskgraph/util/templates.py,sha256=Dqxfl244u-PX7dnsk3_vYyzDwpDgJtANK6NmZwN3Qow,1417
taskgraph/util/time.py,sha256=cMRYsBiz7rgPwgZk77p0P7h9JzeEJENBZCoetBaEHqY,3490
taskgraph/util/treeherder.py,sha256=oCSNiT6l44-c_06H0jZiibsrTsJCae7WJGDXYdA48FE,1993
taskgraph/util/vcs.py,sha256=J_S1jLoNDxwjw4Zzn5e2mQOQuNzfvqha2vLI7J-vNQI,6035
taskgraph/util/verify.py,sha256=e0j_Ec2gDDIVlnEO9tHQ0gjxzGm00NJbPVI5q4BuiHc,5653
taskgraph/util/workertypes.py,sha256=5g2mgIbEKMzDpZNnmPMoMNyy7Wahi-jmWcV1amDAcPo,2341
taskgraph/util/yaml.py,sha256=hfKI_D8Q7dimq4_VvO3WEh8CJsTrsIMwN6set7HIQbY,990
taskcluster_taskgraph-1.0.1.dist-info/METADATA,sha256=fYrLv6KhYlhGunFcIk9utnMZyfg0pBo_gIdePV7MtQE,769
taskcluster_taskgraph-1.0.1.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
taskcluster_taskgraph-1.0.1.dist-info/entry_points.txt,sha256=VoXNtZpN4LvyXYB1wq47AU9CO-DMYMJ0VktKxjugzbY,51
taskcluster_taskgraph-1.0.1.dist-info/top_level.txt,sha256=3JNeYn_hNiNXC7DrdH_vcv-WYSE7QdgGjdvUYvSjVp0,10
taskcluster_taskgraph-1.0.1.dist-info/RECORD,,

View File

@ -0,0 +1,5 @@
Wheel-Version: 1.0
Generator: bdist_wheel (0.37.0)
Root-Is-Purelib: true
Tag: py3-none-any

View File

@ -0,0 +1,3 @@
[console_scripts]
taskgraph = taskgraph.main:main

View File

@ -0,0 +1,15 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
# Maximum number of dependencies a single task can have
# https://docs.taskcluster.net/reference/platform/taskcluster-queue/references/api#createTask
# specifies 100, but we also optionally add the decision task id as a dep in
# taskgraph.create, so let's set this to 99.
MAX_DEPENDENCIES = 99
# Enable fast task generation for local debugging
# This is normally switched on via the --fast/-F flag to `mach taskgraph`
# Currently this skips toolchain task optimizations and schema validation
fast = False

View File

@ -0,0 +1,16 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from .registry import (
register_callback_action,
render_actions_json,
trigger_action_callback,
)
__all__ = [
"register_callback_action",
"render_actions_json",
"trigger_action_callback",
]

View File

@ -0,0 +1,64 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from taskgraph.actions.registry import register_callback_action
from taskgraph.actions.util import (
combine_task_graph_files,
create_tasks,
fetch_graph_and_labels,
)
@register_callback_action(
name="add-new-jobs",
title="Add new jobs",
generic=True,
symbol="add-new",
description="Add new jobs using task labels.",
order=100,
context=[],
schema={
"type": "object",
"properties": {
"tasks": {
"type": "array",
"description": "An array of task labels",
"items": {"type": "string"},
},
"times": {
"type": "integer",
"default": 1,
"minimum": 1,
"maximum": 100,
"title": "Times",
"description": "How many times to run each task.",
},
},
},
)
def add_new_jobs_action(parameters, graph_config, input, task_group_id, task_id):
decision_task_id, full_task_graph, label_to_taskid = fetch_graph_and_labels(
parameters, graph_config
)
to_run = []
for elem in input["tasks"]:
if elem in full_task_graph.tasks:
to_run.append(elem)
else:
raise Exception(f"{elem} was not found in the task-graph")
times = input.get("times", 1)
for i in range(times):
create_tasks(
graph_config,
to_run,
full_task_graph,
label_to_taskid,
parameters,
decision_task_id,
i,
)
combine_task_graph_files(list(range(times)))

View File

@ -0,0 +1,40 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import logging
import requests
from taskgraph.util.taskcluster import cancel_task
from .registry import register_callback_action
logger = logging.getLogger(__name__)
@register_callback_action(
title="Cancel Task",
name="cancel",
symbol="cx",
generic=True,
description=("Cancel the given task"),
order=350,
context=[{}],
)
def cancel_action(parameters, graph_config, input, task_group_id, task_id):
# Note that this is limited by the scopes afforded to generic actions to
# only cancel tasks with the level-specific schedulerId.
try:
cancel_task(task_id, use_proxy=True)
except requests.HTTPError as e:
if e.response.status_code == 409:
# A 409 response indicates that this task is past its deadline. It
# cannot be cancelled at this time, but it's also not running
# anymore, so we can ignore this error.
logger.info(
'Task "{}" is past its deadline and cannot be cancelled.'.format(
task_id
)
)
return
raise

View File

@ -0,0 +1,59 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import concurrent.futures as futures
import logging
import os
import requests
from taskgraph.util.taskcluster import (
list_task_group_incomplete_tasks,
cancel_task,
CONCURRENCY,
)
from .registry import register_callback_action
logger = logging.getLogger(__name__)
@register_callback_action(
title="Cancel All",
name="cancel-all",
generic=True,
symbol="cAll",
description=(
"Cancel all running and pending tasks created by the decision task "
"this action task is associated with."
),
order=400,
context=[],
)
def cancel_all_action(parameters, graph_config, input, task_group_id, task_id):
def do_cancel_task(task_id):
logger.info(f"Cancelling task {task_id}")
try:
cancel_task(task_id, use_proxy=True)
except requests.HTTPError as e:
if e.response.status_code == 409:
# A 409 response indicates that this task is past its deadline. It
# cannot be cancelled at this time, but it's also not running
# anymore, so we can ignore this error.
logger.info(
"Task {} is past its deadline and cannot be cancelled.".format(
task_id
)
)
return
raise
own_task_id = os.environ.get("TASK_ID", "")
to_cancel = [
t for t in list_task_group_incomplete_tasks(task_group_id) if t != own_task_id
]
logger.info(f"Cancelling {len(to_cancel)} tasks")
with futures.ThreadPoolExecutor(CONCURRENCY) as e:
cancel_futs = [e.submit(do_cancel_task, t) for t in to_cancel]
for f in futures.as_completed(cancel_futs):
f.result()

View File

@ -0,0 +1,363 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import json
import os
from types import FunctionType
from collections import namedtuple
from taskgraph import create
from taskgraph.config import load_graph_config
from taskgraph.util import taskcluster, yaml, hash
from taskgraph.parameters import Parameters
from taskgraph.util.memoize import memoize
actions = []
callbacks = {}
Action = namedtuple("Action", ["order", "cb_name", "generic", "action_builder"])
def is_json(data):
"""Return ``True``, if ``data`` is a JSON serializable data structure."""
try:
json.dumps(data)
except ValueError:
return False
return True
@memoize
def read_taskcluster_yml(filename):
"""Load and parse .taskcluster.yml, memoized to save some time"""
return yaml.load_yaml(filename)
@memoize
def hash_taskcluster_yml(filename):
"""
Generate a hash of the given .taskcluster.yml. This is the first 10 digits
of the sha256 of the file's content, and is used by administrative scripts
to create a hook based on this content.
"""
return hash.hash_path(filename)[:10]
def register_callback_action(
name,
title,
symbol,
description,
order=10000,
context=[],
available=lambda parameters: True,
schema=None,
generic=True,
cb_name=None,
):
"""
Register an action callback that can be triggered from supporting
user interfaces, such as Treeherder.
This function is to be used as a decorator for a callback that takes
parameters as follows:
``parameters``:
Decision task parameters, see ``taskgraph.parameters.Parameters``.
``input``:
Input matching specified JSON schema, ``None`` if no ``schema``
parameter is given to ``register_callback_action``.
``task_group_id``:
The id of the task-group this was triggered for.
``task_id`` and `task``:
task identifier and task definition for task the action was triggered
for, ``None`` if no ``context`` parameters was given to
``register_callback_action``.
Parameters
----------
name : str
An identifier for this action, used by UIs to find the action.
title : str
A human readable title for the action to be used as label on a button
or text on a link for triggering the action.
symbol : str
Treeherder symbol for the action callback, this is the symbol that the
task calling your callback will be displayed as. This is usually 1-3
letters abbreviating the action title.
description : str
A human readable description of the action in **markdown**.
This will be display as tooltip and in dialog window when the action
is triggered. This is a good place to describe how to use the action.
order : int
Order of the action in menus, this is relative to the ``order`` of
other actions declared.
context : list of dict
List of tag-sets specifying which tasks the action is can take as input.
If no tag-sets is specified as input the action is related to the
entire task-group, and won't be triggered with a given task.
Otherwise, if ``context = [{'k': 'b', 'p': 'l'}, {'k': 't'}]`` will only
be displayed in the context menu for tasks that has
``task.tags.k == 'b' && task.tags.p = 'l'`` or ``task.tags.k = 't'``.
Esentially, this allows filtering on ``task.tags``.
If this is a function, it is given the decision parameters and must return
a value of the form described above.
available : function
An optional function that given decision parameters decides if the
action is available. Defaults to a function that always returns ``True``.
schema : dict
JSON schema specifying input accepted by the action.
This is optional and can be left ``null`` if no input is taken.
generic : boolean
Whether this is a generic action or has its own permissions.
cb_name : string
The name under which this function should be registered, defaulting to
`name`. This is used to generation actionPerm for non-generic hook
actions, and thus appears in ci-configuration and various role and hook
names. Unlike `name`, which can appear multiple times, cb_name must be
unique among all registered callbacks.
Returns
-------
function
To be used as decorator for the callback function.
"""
mem = {"registered": False} # workaround nonlocal missing in 2.x
assert isinstance(title, str), "title must be a string"
assert isinstance(description, str), "description must be a string"
title = title.strip()
description = description.strip()
# ensure that context is callable
if not callable(context):
context_value = context
context = lambda params: context_value # noqa
def register_callback(cb, cb_name=cb_name):
assert isinstance(name, str), "name must be a string"
assert isinstance(order, int), "order must be an integer"
assert callable(schema) or is_json(
schema
), "schema must be a JSON compatible object"
assert isinstance(cb, FunctionType), "callback must be a function"
# Allow for json-e > 25 chars in the symbol.
if "$" not in symbol:
assert 1 <= len(symbol) <= 25, "symbol must be between 1 and 25 characters"
assert isinstance(symbol, str), "symbol must be a string"
assert not mem[
"registered"
], "register_callback_action must be used as decorator"
if not cb_name:
cb_name = name
assert cb_name not in callbacks, "callback name {} is not unique".format(
cb_name
)
def action_builder(parameters, graph_config, decision_task_id):
if not available(parameters):
return None
actionPerm = "generic" if generic else cb_name
# gather up the common decision-task-supplied data for this action
repo_param = "head_repository"
repository = {
"url": parameters[repo_param],
"project": parameters["project"],
"level": parameters["level"],
}
revision = parameters["head_rev"]
push = {
"owner": "mozilla-taskcluster-maintenance@mozilla.com",
"pushlog_id": parameters["pushlog_id"],
"revision": revision,
}
branch = parameters.get("head_ref")
if branch:
push["branch"] = branch
action = {
"name": name,
"title": title,
"description": description,
# target taskGroupId (the task group this decision task is creating)
"taskGroupId": decision_task_id,
"cb_name": cb_name,
"symbol": symbol,
}
rv = {
"name": name,
"title": title,
"description": description,
"context": context(parameters),
}
if schema:
rv["schema"] = (
schema(graph_config=graph_config) if callable(schema) else schema
)
trustDomain = graph_config["trust-domain"]
level = parameters["level"]
tcyml_hash = hash_taskcluster_yml(graph_config.taskcluster_yml)
# the tcyml_hash is prefixed with `/` in the hookId, so users will be granted
# hooks:trigger-hook:project-gecko/in-tree-action-3-myaction/*; if another
# action was named `myaction/release`, then the `*` in the scope would also
# match that action. To prevent such an accident, we prohibit `/` in hook
# names.
if "/" in actionPerm:
raise Exception("`/` is not allowed in action names; use `-`")
rv.update(
{
"kind": "hook",
"hookGroupId": f"project-{trustDomain}",
"hookId": "in-tree-action-{}-{}/{}".format(
level, actionPerm, tcyml_hash
),
"hookPayload": {
# provide the decision-task parameters as context for triggerHook
"decision": {
"action": action,
"repository": repository,
"push": push,
},
# and pass everything else through from our own context
"user": {
"input": {"$eval": "input"},
"taskId": {"$eval": "taskId"}, # target taskId (or null)
"taskGroupId": {
"$eval": "taskGroupId"
}, # target task group
},
},
"extra": {
"actionPerm": actionPerm,
},
}
)
return rv
actions.append(Action(order, cb_name, generic, action_builder))
mem["registered"] = True
callbacks[cb_name] = cb
return cb
return register_callback
def render_actions_json(parameters, graph_config, decision_task_id):
"""
Render JSON object for the ``public/actions.json`` artifact.
Parameters
----------
parameters : taskgraph.parameters.Parameters
Decision task parameters.
Returns
-------
dict
JSON object representation of the ``public/actions.json`` artifact.
"""
assert isinstance(parameters, Parameters), "requires instance of Parameters"
actions = []
for action in sorted(_get_actions(graph_config), key=lambda action: action.order):
action = action.action_builder(parameters, graph_config, decision_task_id)
if action:
assert is_json(action), "action must be a JSON compatible object"
actions.append(action)
return {
"version": 1,
"variables": {},
"actions": actions,
}
def sanity_check_task_scope(callback, parameters, graph_config):
"""
If this action is not generic, then verify that this task has the necessary
scope to run the action. This serves as a backstop preventing abuse by
running non-generic actions using generic hooks. While scopes should
prevent serious damage from such abuse, it's never a valid thing to do.
"""
for action in _get_actions(graph_config):
if action.cb_name == callback:
break
else:
raise Exception(f"No action with cb_name {callback}")
actionPerm = "generic" if action.generic else action.cb_name
repo_param = "head_repository"
head_repository = parameters[repo_param]
if not head_repository.startswith(("https://hg.mozilla.org", "https://github.com")):
raise Exception(
"{} is not either https://hg.mozilla.org or https://github.com !"
)
expected_scope = f"assume:repo:{head_repository[8:]}:action:{actionPerm}"
# the scope should appear literally; no need for a satisfaction check. The use of
# get_current_scopes here calls the auth service through the Taskcluster Proxy, giving
# the precise scopes available to this task.
if expected_scope not in taskcluster.get_current_scopes():
raise Exception(f"Expected task scope {expected_scope} for this action")
def trigger_action_callback(
task_group_id, task_id, input, callback, parameters, root, test=False
):
"""
Trigger action callback with the given inputs. If `test` is true, then run
the action callback in testing mode, without actually creating tasks.
"""
graph_config = load_graph_config(root)
graph_config.register()
callbacks = _get_callbacks(graph_config)
cb = callbacks.get(callback, None)
if not cb:
raise Exception(
"Unknown callback: {}. Known callbacks: {}".format(
callback, ", ".join(callbacks)
)
)
if test:
create.testing = True
taskcluster.testing = True
if not test:
sanity_check_task_scope(callback, parameters, graph_config)
cb(Parameters(**parameters), graph_config, input, task_group_id, task_id)
def _load(graph_config):
# Load all modules from this folder, relying on the side-effects of register_
# functions to populate the action registry.
actions_dir = os.path.dirname(__file__)
for f in os.listdir(actions_dir):
if f.endswith(".py") and f not in ("__init__.py", "registry.py", "util.py"):
__import__("taskgraph.actions." + f[:-3])
return callbacks, actions
def _get_callbacks(graph_config):
return _load(graph_config)[0]
def _get_actions(graph_config):
return _load(graph_config)[1]

View File

@ -0,0 +1,300 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import logging
import sys
import textwrap
from slugid import nice as slugid
from .util import (
combine_task_graph_files,
create_tasks,
fetch_graph_and_labels,
relativize_datestamps,
create_task_from_def,
)
from .registry import register_callback_action
from taskgraph.util import taskcluster
logger = logging.getLogger(__name__)
RERUN_STATES = ("exception", "failed")
def _should_retrigger(task_graph, label):
"""
Return whether a given task in the taskgraph should be retriggered.
This handles the case where the task isn't there by assuming it should not be.
"""
if label not in task_graph:
logger.info(
"Task {} not in full taskgraph, assuming task should not be retriggered.".format(
label
)
)
return False
return task_graph[label].attributes.get("retrigger", False)
@register_callback_action(
title="Retrigger",
name="retrigger",
symbol="rt",
cb_name="retrigger-decision",
description=textwrap.dedent(
"""\
Create a clone of the task (retriggering decision, action, and cron tasks requires
special scopes)."""
),
order=11,
context=[
{"kind": "decision-task"},
{"kind": "action-callback"},
{"kind": "cron-task"},
],
)
def retrigger_decision_action(parameters, graph_config, input, task_group_id, task_id):
"""For a single task, we try to just run exactly the same task once more.
It's quite possible that we don't have the scopes to do so (especially for
an action), but this is best-effort."""
# make all of the timestamps relative; they will then be turned back into
# absolute timestamps relative to the current time.
task = taskcluster.get_task_definition(task_id)
task = relativize_datestamps(task)
create_task_from_def(slugid(), task, parameters["level"])
@register_callback_action(
title="Retrigger",
name="retrigger",
symbol="rt",
generic=True,
description=("Create a clone of the task."),
order=19, # must be greater than other orders in this file, as this is the fallback version
context=[{"retrigger": "true"}],
schema={
"type": "object",
"properties": {
"downstream": {
"type": "boolean",
"description": (
"If true, downstream tasks from this one will be cloned as well. "
"The dependencies will be updated to work with the new task at the root."
),
"default": False,
},
"times": {
"type": "integer",
"default": 1,
"minimum": 1,
"maximum": 100,
"title": "Times",
"description": "How many times to run each task.",
},
},
},
)
@register_callback_action(
title="Retrigger (disabled)",
name="retrigger",
cb_name="retrigger-disabled",
symbol="rt",
generic=True,
description=(
"Create a clone of the task.\n\n"
"This type of task should typically be re-run instead of re-triggered."
),
order=20, # must be greater than other orders in this file, as this is the fallback version
context=[{}],
schema={
"type": "object",
"properties": {
"downstream": {
"type": "boolean",
"description": (
"If true, downstream tasks from this one will be cloned as well. "
"The dependencies will be updated to work with the new task at the root."
),
"default": False,
},
"times": {
"type": "integer",
"default": 1,
"minimum": 1,
"maximum": 100,
"title": "Times",
"description": "How many times to run each task.",
},
"force": {
"type": "boolean",
"default": False,
"description": (
"This task should not be re-triggered. "
"This can be overridden by passing `true` here."
),
},
},
},
)
def retrigger_action(parameters, graph_config, input, task_group_id, task_id):
decision_task_id, full_task_graph, label_to_taskid = fetch_graph_and_labels(
parameters, graph_config
)
task = taskcluster.get_task_definition(task_id)
label = task["metadata"]["name"]
with_downstream = " "
to_run = [label]
if not input.get("force", None) and not _should_retrigger(full_task_graph, label):
logger.info(
"Not retriggering task {}, task should not be retrigged "
"and force not specified.".format(label)
)
sys.exit(1)
if input.get("downstream"):
to_run = full_task_graph.graph.transitive_closure(
set(to_run), reverse=True
).nodes
to_run = to_run & set(label_to_taskid.keys())
with_downstream = " (with downstream) "
times = input.get("times", 1)
for i in range(times):
create_tasks(
graph_config,
to_run,
full_task_graph,
label_to_taskid,
parameters,
decision_task_id,
i,
)
logger.info(f"Scheduled {label}{with_downstream}(time {i + 1}/{times})")
combine_task_graph_files(list(range(times)))
@register_callback_action(
title="Rerun",
name="rerun",
generic=True,
symbol="rr",
description=(
"Rerun a task.\n\n"
"This only works on failed or exception tasks in the original taskgraph,"
" and is CoT friendly."
),
order=300,
context=[{}],
schema={"type": "object", "properties": {}},
)
def rerun_action(parameters, graph_config, input, task_group_id, task_id):
task = taskcluster.get_task_definition(task_id)
parameters = dict(parameters)
decision_task_id, full_task_graph, label_to_taskid = fetch_graph_and_labels(
parameters, graph_config
)
label = task["metadata"]["name"]
if task_id not in label_to_taskid.values():
logger.error(
"Refusing to rerun {}: taskId {} not in decision task {} label_to_taskid!".format(
label, task_id, decision_task_id
)
)
_rerun_task(task_id, label)
def _rerun_task(task_id, label):
status = taskcluster.status_task(task_id)
if status not in RERUN_STATES:
logger.warning(
"No need to rerun {}: state '{}' not in {}!".format(
label, status, RERUN_STATES
)
)
return
taskcluster.rerun_task(task_id)
logger.info(f"Reran {label}")
@register_callback_action(
title="Retrigger",
name="retrigger-multiple",
symbol="rt",
generic=True,
description=("Create a clone of the task."),
context=[],
schema={
"type": "object",
"properties": {
"requests": {
"type": "array",
"items": {
"tasks": {
"type": "array",
"description": "An array of task labels",
"items": {"type": "string"},
},
"times": {
"type": "integer",
"minimum": 1,
"maximum": 100,
"title": "Times",
"description": "How many times to run each task.",
},
"additionalProperties": False,
},
},
"additionalProperties": False,
},
},
)
def retrigger_multiple(parameters, graph_config, input, task_group_id, task_id):
decision_task_id, full_task_graph, label_to_taskid = fetch_graph_and_labels(
parameters, graph_config
)
suffixes = []
for i, request in enumerate(input.get("requests", [])):
times = request.get("times", 1)
rerun_tasks = [
label
for label in request.get("tasks")
if not _should_retrigger(full_task_graph, label)
]
retrigger_tasks = [
label
for label in request.get("tasks")
if _should_retrigger(full_task_graph, label)
]
for label in rerun_tasks:
# XXX we should not re-run tasks pulled in from other pushes
# In practice, this shouldn't matter, as only completed tasks
# are pulled in from other pushes and treeherder won't pass
# those labels.
_rerun_task(label_to_taskid[label], label)
for j in range(times):
suffix = f"{i}-{j}"
suffixes.append(suffix)
create_tasks(
graph_config,
retrigger_tasks,
full_task_graph,
label_to_taskid,
parameters,
decision_task_id,
suffix,
)
combine_task_graph_files(suffixes)

View File

@ -0,0 +1,282 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import concurrent.futures as futures
import copy
import logging
import os
import re
from functools import reduce
from requests.exceptions import HTTPError
from taskgraph import create
from taskgraph.decision import read_artifact, write_artifact, rename_artifact
from taskgraph.taskgraph import TaskGraph
from taskgraph.optimize import optimize_task_graph
from taskgraph.util.taskcluster import (
get_session,
get_artifact,
list_tasks,
parse_time,
CONCURRENCY,
)
from taskgraph.util.taskgraph import (
find_decision_task,
)
logger = logging.getLogger(__name__)
def get_parameters(decision_task_id):
return get_artifact(decision_task_id, "public/parameters.yml")
def fetch_graph_and_labels(parameters, graph_config):
decision_task_id = find_decision_task(parameters, graph_config)
# First grab the graph and labels generated during the initial decision task
full_task_graph = get_artifact(decision_task_id, "public/full-task-graph.json")
_, full_task_graph = TaskGraph.from_json(full_task_graph)
label_to_taskid = get_artifact(decision_task_id, "public/label-to-taskid.json")
# fetch everything in parallel; this avoids serializing any delay in downloading
# each artifact (such as waiting for the artifact to be mirrored locally)
with futures.ThreadPoolExecutor(CONCURRENCY) as e:
fetches = []
# fetch any modifications made by action tasks and swap out new tasks
# for old ones
def fetch_action(task_id):
logger.info(f"fetching label-to-taskid.json for action task {task_id}")
try:
run_label_to_id = get_artifact(task_id, "public/label-to-taskid.json")
label_to_taskid.update(run_label_to_id)
except HTTPError as e:
if e.response.status_code != 404:
raise
logger.debug(f"No label-to-taskid.json found for {task_id}: {e}")
namespace = "{}.v2.{}.pushlog-id.{}.actions".format(
graph_config["trust-domain"],
parameters["project"],
parameters["pushlog_id"],
)
for task_id in list_tasks(namespace):
fetches.append(e.submit(fetch_action, task_id))
# Similarly for cron tasks..
def fetch_cron(task_id):
logger.info(f"fetching label-to-taskid.json for cron task {task_id}")
try:
run_label_to_id = get_artifact(task_id, "public/label-to-taskid.json")
label_to_taskid.update(run_label_to_id)
except HTTPError as e:
if e.response.status_code != 404:
raise
logger.debug(f"No label-to-taskid.json found for {task_id}: {e}")
namespace = "{}.v2.{}.revision.{}.cron".format(
graph_config["trust-domain"], parameters["project"], parameters["head_rev"]
)
for task_id in list_tasks(namespace):
fetches.append(e.submit(fetch_cron, task_id))
# now wait for each fetch to complete, raising an exception if there
# were any issues
for f in futures.as_completed(fetches):
f.result()
return (decision_task_id, full_task_graph, label_to_taskid)
def create_task_from_def(task_id, task_def, level):
"""Create a new task from a definition rather than from a label
that is already in the full-task-graph. The task definition will
have {relative-datestamp': '..'} rendered just like in a decision task.
Use this for entirely new tasks or ones that change internals of the task.
It is useful if you want to "edit" the full_task_graph and then hand
it to this function. No dependencies will be scheduled. You must handle
this yourself. Seeing how create_tasks handles it might prove helpful."""
task_def["schedulerId"] = f"gecko-level-{level}"
label = task_def["metadata"]["name"]
session = get_session()
create.create_task(session, task_id, label, task_def)
def update_parent(task, graph):
task.task.setdefault("extra", {})["parent"] = os.environ.get("TASK_ID", "")
return task
def update_dependencies(task, graph):
if os.environ.get("TASK_ID"):
task.task.setdefault("dependencies", []).append(os.environ["TASK_ID"])
return task
def create_tasks(
graph_config,
to_run,
full_task_graph,
label_to_taskid,
params,
decision_task_id=None,
suffix="",
modifier=lambda t: t,
):
"""Create new tasks. The task definition will have {relative-datestamp':
'..'} rendered just like in a decision task. Action callbacks should use
this function to create new tasks,
allowing easy debugging with `mach taskgraph action-callback --test`.
This builds up all required tasks to run in order to run the tasks requested.
Optionally this function takes a `modifier` function that is passed in each
task before it is put into a new graph. It should return a valid task. Note
that this is passed _all_ tasks in the graph, not just the set in to_run. You
may want to skip modifying tasks not in your to_run list.
If `suffix` is given, then it is used to give unique names to the resulting
artifacts. If you call this function multiple times in the same action,
pass a different suffix each time to avoid overwriting artifacts.
If you wish to create the tasks in a new group, leave out decision_task_id.
Returns an updated label_to_taskid containing the new tasks"""
if suffix != "":
suffix = f"-{suffix}"
to_run = set(to_run)
# Copy to avoid side-effects later
full_task_graph = copy.deepcopy(full_task_graph)
label_to_taskid = label_to_taskid.copy()
target_graph = full_task_graph.graph.transitive_closure(to_run)
target_task_graph = TaskGraph(
{l: modifier(full_task_graph[l]) for l in target_graph.nodes}, target_graph
)
target_task_graph.for_each_task(update_parent)
if decision_task_id and decision_task_id != os.environ.get("TASK_ID"):
target_task_graph.for_each_task(update_dependencies)
optimized_task_graph, label_to_taskid = optimize_task_graph(
target_task_graph,
params,
to_run,
decision_task_id,
existing_tasks=label_to_taskid,
)
write_artifact(f"task-graph{suffix}.json", optimized_task_graph.to_json())
write_artifact(f"label-to-taskid{suffix}.json", label_to_taskid)
write_artifact(f"to-run{suffix}.json", list(to_run))
create.create_tasks(
graph_config,
optimized_task_graph,
label_to_taskid,
params,
decision_task_id,
)
return label_to_taskid
def _update_reducer(accumulator, new_value):
"similar to set or dict `update` method, but returning the modified object"
accumulator.update(new_value)
return accumulator
def combine_task_graph_files(suffixes):
"""Combine task-graph-{suffix}.json files into a single task-graph.json file.
Since Chain of Trust verification requires a task-graph.json file that
contains all children tasks, we can combine the various task-graph-0.json
type files into a master task-graph.json file at the end.
Actions also look for various artifacts, so we combine those in a similar
fashion.
In the case where there is only one suffix, we simply rename it to avoid the
additional cost of uploading two copies of the same data.
"""
if len(suffixes) == 1:
for filename in ["task-graph", "label-to-taskid", "to-run"]:
rename_artifact(f"{filename}-{suffixes[0]}.json", f"{filename}.json")
return
def combine(file_contents, base):
return reduce(_update_reducer, file_contents, base)
files = [read_artifact(f"task-graph-{suffix}.json") for suffix in suffixes]
write_artifact("task-graph.json", combine(files, dict()))
files = [read_artifact(f"label-to-taskid-{suffix}.json") for suffix in suffixes]
write_artifact("label-to-taskid.json", combine(files, dict()))
files = [read_artifact(f"to-run-{suffix}.json") for suffix in suffixes]
write_artifact("to-run.json", list(combine(files, set())))
def relativize_datestamps(task_def):
"""
Given a task definition as received from the queue, convert all datestamps
to {relative_datestamp: ..} format, with the task creation time as "now".
The result is useful for handing to ``create_task``.
"""
base = parse_time(task_def["created"])
# borrowed from https://github.com/epoberezkin/ajv/blob/master/lib/compile/formats.js
ts_pattern = re.compile(
r"^\d\d\d\d-[0-1]\d-[0-3]\d[t\s]"
r"(?:[0-2]\d:[0-5]\d:[0-5]\d|23:59:60)(?:\.\d+)?"
r"(?:z|[+-]\d\d:\d\d)$",
re.I,
)
def recurse(value):
if isinstance(value, str):
if ts_pattern.match(value):
value = parse_time(value)
diff = value - base
return {"relative-datestamp": f"{int(diff.total_seconds())} seconds"}
if isinstance(value, list):
return [recurse(e) for e in value]
if isinstance(value, dict):
return {k: recurse(v) for k, v in value.items()}
return value
return recurse(task_def)
def add_args_to_command(cmd_parts, extra_args=[]):
"""
Add custom command line args to a given command.
args:
cmd_parts: the raw command as seen by taskcluster
extra_args: array of args we want to add
"""
cmd_type = "default"
if len(cmd_parts) == 1 and isinstance(cmd_parts[0], dict):
# windows has single cmd part as dict: 'task-reference', with long string
cmd_parts = cmd_parts[0]["task-reference"].split(" ")
cmd_type = "dict"
elif len(cmd_parts) == 1 and (
isinstance(cmd_parts[0], str) or isinstance(cmd_parts[0], str)
):
# windows has single cmd part as a long string
cmd_parts = cmd_parts[0].split(" ")
cmd_type = "unicode"
elif len(cmd_parts) == 1 and isinstance(cmd_parts[0], list):
# osx has an single value array with an array inside
cmd_parts = cmd_parts[0]
cmd_type = "subarray"
cmd_parts.extend(extra_args)
if cmd_type == "dict":
cmd_parts = [{"task-reference": " ".join(cmd_parts)}]
elif cmd_type == "unicode":
cmd_parts = [" ".join(cmd_parts)]
elif cmd_type == "subarray":
cmd_parts = [cmd_parts]
return cmd_parts

View File

@ -0,0 +1,142 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import os
import logging
import sys
import attr
from .util import path
from .util.python_path import find_object
from .util.schema import validate_schema, Schema, optionally_keyed_by
from voluptuous import Required, Extra, Any, Optional, Length, All
from .util.yaml import load_yaml
logger = logging.getLogger(__name__)
graph_config_schema = Schema(
{
# The trust-domain for this graph.
# (See https://firefox-source-docs.mozilla.org/taskcluster/taskcluster/taskgraph.html#taskgraph-trust-domain) # noqa
Required("trust-domain"): str,
Required("task-priority"): optionally_keyed_by(
"project",
Any(
"highest",
"very-high",
"high",
"medium",
"low",
"very-low",
"lowest",
),
),
Required("workers"): {
Required("aliases"): {
str: {
Required("provisioner"): optionally_keyed_by("level", str),
Required("implementation"): str,
Required("os"): str,
Required("worker-type"): optionally_keyed_by("level", str),
}
},
},
Required("taskgraph"): {
Optional(
"register",
description="Python function to call to register extensions.",
): str,
Optional("decision-parameters"): str,
Optional(
"cached-task-prefix",
description="The taskcluster index prefix to use for caching tasks. "
"Defaults to `trust-domain`.",
): str,
Required("repositories"): All(
{
str: {
Required("name"): str,
Optional("project-regex"): str,
Optional("ssh-secret-name"): str,
# FIXME
Extra: str,
}
},
Length(min=1),
),
},
Extra: object,
}
)
@attr.s(frozen=True, cmp=False)
class GraphConfig:
_config = attr.ib()
root_dir = attr.ib()
_PATH_MODIFIED = False
def __getitem__(self, name):
return self._config[name]
def __contains__(self, name):
return name in self._config
def register(self):
"""
Add the project's taskgraph directory to the python path, and register
any extensions present.
"""
modify_path = os.path.dirname(self.root_dir)
if GraphConfig._PATH_MODIFIED:
if GraphConfig._PATH_MODIFIED == modify_path:
# Already modified path with the same root_dir.
# We currently need to do this to enable actions to call
# taskgraph_decision, e.g. relpro.
return
raise Exception("Can't register multiple directories on python path.")
GraphConfig._PATH_MODIFIED = modify_path
sys.path.insert(0, modify_path)
register_path = self["taskgraph"].get("register")
if register_path:
find_object(register_path)(self)
@property
def vcs_root(self):
if path.split(self.root_dir)[-2:] != ["taskcluster", "ci"]:
raise Exception(
"Not guessing path to vcs root. "
"Graph config in non-standard location."
)
return os.path.dirname(os.path.dirname(self.root_dir))
@property
def taskcluster_yml(self):
if path.split(self.root_dir)[-2:] != ["taskcluster", "ci"]:
raise Exception(
"Not guessing path to `.taskcluster.yml`. "
"Graph config in non-standard location."
)
return os.path.join(
os.path.dirname(os.path.dirname(self.root_dir)),
".taskcluster.yml",
)
def validate_graph_config(config):
validate_schema(graph_config_schema, config, "Invalid graph configuration:")
def load_graph_config(root_dir):
config_yml = os.path.join(root_dir, "config.yml")
if not os.path.exists(config_yml):
raise Exception(f"Couldn't find taskgraph configuration: {config_yml}")
logger.debug(f"loading config from `{config_yml}`")
config = load_yaml(config_yml)
validate_graph_config(config)
return GraphConfig(config=config, root_dir=root_dir)

View File

@ -0,0 +1,132 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import concurrent.futures as futures
import json
import sys
import logging
from slugid import nice as slugid
from taskgraph.util.parameterization import resolve_timestamps
from taskgraph.util.time import current_json_time
from taskgraph.util.taskcluster import get_session, CONCURRENCY
logger = logging.getLogger(__name__)
# this is set to true for `mach taskgraph action-callback --test`
testing = False
def create_tasks(graph_config, taskgraph, label_to_taskid, params, decision_task_id):
taskid_to_label = {t: l for l, t in label_to_taskid.items()}
# when running as an actual decision task, we use the decision task's
# taskId as the taskGroupId. The process that created the decision task
# helpfully placed it in this same taskGroup. If there is no $TASK_ID,
# fall back to a slugid
scheduler_id = "{}-level-{}".format(graph_config["trust-domain"], params["level"])
# Add the taskGroupId, schedulerId and optionally the decision task
# dependency
for task_id in taskgraph.graph.nodes:
task_def = taskgraph.tasks[task_id].task
# if this task has no dependencies *within* this taskgraph, make it
# depend on this decision task. If it has another dependency within
# the taskgraph, then it already implicitly depends on the decision
# task. The result is that tasks do not start immediately. if this
# loop fails halfway through, none of the already-created tasks run.
if not any(t in taskgraph.tasks for t in task_def.get("dependencies", [])):
task_def.setdefault("dependencies", []).append(decision_task_id)
task_def["taskGroupId"] = decision_task_id
task_def["schedulerId"] = scheduler_id
# If `testing` is True, then run without parallelization
concurrency = CONCURRENCY if not testing else 1
session = get_session()
with futures.ThreadPoolExecutor(concurrency) as e:
fs = {}
# We can't submit a task until its dependencies have been submitted.
# So our strategy is to walk the graph and submit tasks once all
# their dependencies have been submitted.
tasklist = set(taskgraph.graph.visit_postorder())
alltasks = tasklist.copy()
def schedule_tasks():
# bail out early if any futures have failed
if any(f.done() and f.exception() for f in fs.values()):
return
to_remove = set()
new = set()
def submit(task_id, label, task_def):
fut = e.submit(create_task, session, task_id, label, task_def)
new.add(fut)
fs[task_id] = fut
for task_id in tasklist:
task_def = taskgraph.tasks[task_id].task
# If we haven't finished submitting all our dependencies yet,
# come back to this later.
# Some dependencies aren't in our graph, so make sure to filter
# those out
deps = set(task_def.get("dependencies", [])) & alltasks
if any((d not in fs or not fs[d].done()) for d in deps):
continue
submit(task_id, taskid_to_label[task_id], task_def)
to_remove.add(task_id)
# Schedule tasks as many times as task_duplicates indicates
attributes = taskgraph.tasks[task_id].attributes
for i in range(1, attributes.get("task_duplicates", 1)):
# We use slugid() since we want a distinct task id
submit(slugid(), taskid_to_label[task_id], task_def)
tasklist.difference_update(to_remove)
# as each of those futures complete, try to schedule more tasks
for f in futures.as_completed(new):
schedule_tasks()
# start scheduling tasks and run until everything is scheduled
schedule_tasks()
# check the result of each future, raising an exception if it failed
for f in futures.as_completed(fs.values()):
f.result()
def create_task(session, task_id, label, task_def):
# create the task using 'http://taskcluster/queue', which is proxied to the queue service
# with credentials appropriate to this job.
# Resolve timestamps
now = current_json_time(datetime_format=True)
task_def = resolve_timestamps(now, task_def)
if testing:
json.dump(
[task_id, task_def],
sys.stdout,
sort_keys=True,
indent=4,
separators=(",", ": "),
)
# add a newline
print("")
return
logger.info(f"Creating task with taskId {task_id} for {label}")
res = session.put(f"http://taskcluster/queue/v1/task/{task_id}", json=task_def)
if res.status_code != 200:
try:
logger.error(res.json()["message"])
except Exception:
logger.error(res.text)
res.raise_for_status()

View File

@ -0,0 +1,282 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import os
import json
import logging
import time
import yaml
from .actions import render_actions_json
from .create import create_tasks
from .generator import TaskGraphGenerator
from .parameters import Parameters
from .taskgraph import TaskGraph
from taskgraph.util.python_path import find_object
from taskgraph.util.vcs import get_repository
from .util.schema import validate_schema, Schema
from taskgraph.util.yaml import load_yaml
from voluptuous import Optional
logger = logging.getLogger(__name__)
ARTIFACTS_DIR = "artifacts"
# For each project, this gives a set of parameters specific to the project.
# See `taskcluster/docs/parameters.rst` for information on parameters.
PER_PROJECT_PARAMETERS = {
# the default parameters are used for projects that do not match above.
"default": {
"target_tasks_method": "default",
}
}
try_task_config_schema_v2 = Schema(
{
Optional("parameters"): {str: object},
}
)
def full_task_graph_to_runnable_jobs(full_task_json):
runnable_jobs = {}
for label, node in full_task_json.items():
if not ("extra" in node["task"] and "treeherder" in node["task"]["extra"]):
continue
th = node["task"]["extra"]["treeherder"]
runnable_jobs[label] = {"symbol": th["symbol"]}
for i in ("groupName", "groupSymbol", "collection"):
if i in th:
runnable_jobs[label][i] = th[i]
if th.get("machine", {}).get("platform"):
runnable_jobs[label]["platform"] = th["machine"]["platform"]
return runnable_jobs
def taskgraph_decision(options, parameters=None):
"""
Run the decision task. This function implements `mach taskgraph decision`,
and is responsible for
* processing decision task command-line options into parameters
* running task-graph generation exactly the same way the other `mach
taskgraph` commands do
* generating a set of artifacts to memorialize the graph
* calling TaskCluster APIs to create the graph
"""
parameters = parameters or (
lambda graph_config: get_decision_parameters(graph_config, options)
)
decision_task_id = os.environ["TASK_ID"]
# create a TaskGraphGenerator instance
tgg = TaskGraphGenerator(
root_dir=options.get("root"),
parameters=parameters,
decision_task_id=decision_task_id,
write_artifacts=True,
)
# write out the parameters used to generate this graph
write_artifact("parameters.yml", dict(**tgg.parameters))
# write out the public/actions.json file
write_artifact(
"actions.json",
render_actions_json(tgg.parameters, tgg.graph_config, decision_task_id),
)
# write out the full graph for reference
full_task_json = tgg.full_task_graph.to_json()
write_artifact("full-task-graph.json", full_task_json)
# write out the public/runnable-jobs.json file
write_artifact(
"runnable-jobs.json", full_task_graph_to_runnable_jobs(full_task_json)
)
# this is just a test to check whether the from_json() function is working
_, _ = TaskGraph.from_json(full_task_json)
# write out the target task set to allow reproducing this as input
write_artifact("target-tasks.json", list(tgg.target_task_set.tasks.keys()))
# write out the optimized task graph to describe what will actually happen,
# and the map of labels to taskids
write_artifact("task-graph.json", tgg.morphed_task_graph.to_json())
write_artifact("label-to-taskid.json", tgg.label_to_taskid)
# actually create the graph
create_tasks(
tgg.graph_config,
tgg.morphed_task_graph,
tgg.label_to_taskid,
tgg.parameters,
decision_task_id=decision_task_id,
)
def get_decision_parameters(graph_config, options):
"""
Load parameters from the command-line options for 'taskgraph decision'.
This also applies per-project parameters, based on the given project.
"""
parameters = {
n: options[n]
for n in [
"base_repository",
"head_repository",
"head_rev",
"head_ref",
"head_tag",
"project",
"pushlog_id",
"pushdate",
"repository_type",
"owner",
"level",
"target_tasks_method",
"tasks_for",
]
if n in options
}
repo = get_repository(os.getcwd())
commit_message = repo.get_commit_message()
# Define default filter list, as most configurations shouldn't need
# custom filters.
parameters["filters"] = [
"target_tasks_method",
]
parameters["optimize_target_tasks"] = True
parameters["existing_tasks"] = {}
parameters["do_not_optimize"] = []
# owner must be an email, but sometimes (e.g., for ffxbld) it is not, in which
# case, fake it
if "@" not in parameters["owner"]:
parameters["owner"] += "@noreply.mozilla.org"
# use the pushdate as build_date if given, else use current time
parameters["build_date"] = parameters["pushdate"] or int(time.time())
# moz_build_date is the build identifier based on build_date
parameters["moz_build_date"] = time.strftime(
"%Y%m%d%H%M%S", time.gmtime(parameters["build_date"])
)
project = parameters["project"]
try:
parameters.update(PER_PROJECT_PARAMETERS[project])
except KeyError:
logger.warning(
"using default project parameters; add {} to "
"PER_PROJECT_PARAMETERS in {} to customize behavior "
"for this project".format(project, __file__)
)
parameters.update(PER_PROJECT_PARAMETERS["default"])
# `target_tasks_method` has higher precedence than `project` parameters
if options.get("target_tasks_method"):
parameters["target_tasks_method"] = options["target_tasks_method"]
# ..but can be overridden by the commit message: if it contains the special
# string "DONTBUILD" and this is an on-push decision task, then use the
# special 'nothing' target task method.
if "DONTBUILD" in commit_message and options["tasks_for"] == "hg-push":
parameters["target_tasks_method"] = "nothing"
if options.get("optimize_target_tasks") is not None:
parameters["optimize_target_tasks"] = options["optimize_target_tasks"]
if "decision-parameters" in graph_config["taskgraph"]:
find_object(graph_config["taskgraph"]["decision-parameters"])(
graph_config, parameters
)
if options.get("try_task_config_file"):
task_config_file = os.path.abspath(options.get("try_task_config_file"))
else:
# if try_task_config.json is present, load it
task_config_file = os.path.join(os.getcwd(), "try_task_config.json")
# load try settings
if ("try" in project and options["tasks_for"] == "hg-push") or options[
"tasks_for"
] == "github-pull-request":
set_try_config(parameters, task_config_file)
result = Parameters(**parameters)
result.check()
return result
def set_try_config(parameters, task_config_file):
if os.path.isfile(task_config_file):
logger.info(f"using try tasks from {task_config_file}")
with open(task_config_file) as fh:
task_config = json.load(fh)
task_config_version = task_config.pop("version")
if task_config_version == 2:
validate_schema(
try_task_config_schema_v2,
task_config,
"Invalid v2 `try_task_config.json`.",
)
parameters.update(task_config["parameters"])
return
else:
raise Exception(
f"Unknown `try_task_config.json` version: {task_config_version}"
)
def write_artifact(filename, data):
logger.info(f"writing artifact file `{filename}`")
if not os.path.isdir(ARTIFACTS_DIR):
os.mkdir(ARTIFACTS_DIR)
path = os.path.join(ARTIFACTS_DIR, filename)
if filename.endswith(".yml"):
with open(path, "w") as f:
yaml.safe_dump(data, f, allow_unicode=True, default_flow_style=False)
elif filename.endswith(".json"):
with open(path, "w") as f:
json.dump(data, f, sort_keys=True, indent=2, separators=(",", ": "))
elif filename.endswith(".gz"):
import gzip
with gzip.open(path, "wb") as f:
f.write(json.dumps(data))
else:
raise TypeError(f"Don't know how to write to {filename}")
def read_artifact(filename):
path = os.path.join(ARTIFACTS_DIR, filename)
if filename.endswith(".yml"):
return load_yaml(path, filename)
elif filename.endswith(".json"):
with open(path) as f:
return json.load(f)
elif filename.endswith(".gz"):
import gzip
with gzip.open(path, "rb") as f:
return json.load(f)
else:
raise TypeError(f"Don't know how to read {filename}")
def rename_artifact(src, dest):
os.rename(os.path.join(ARTIFACTS_DIR, src), os.path.join(ARTIFACTS_DIR, dest))

View File

@ -0,0 +1,204 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import json
import os
import tarfile
from io import BytesIO
from taskgraph.util import docker
from taskgraph.util.taskcluster import (
get_artifact_url,
get_session,
)
def get_image_digest(image_name):
from taskgraph.generator import load_tasks_for_kind
from taskgraph.parameters import Parameters
params = Parameters(
level=os.environ.get("MOZ_SCM_LEVEL", "3"),
strict=False,
)
tasks = load_tasks_for_kind(params, "docker-image")
task = tasks[f"build-docker-image-{image_name}"]
return task.attributes["cached_task"]["digest"]
def load_image_by_name(image_name, tag=None):
from taskgraph.generator import load_tasks_for_kind
from taskgraph.optimize import IndexSearch
from taskgraph.parameters import Parameters
params = Parameters(
level=os.environ.get("MOZ_SCM_LEVEL", "3"),
strict=False,
)
tasks = load_tasks_for_kind(params, "docker-image")
task = tasks[f"build-docker-image-{image_name}"]
task_id = IndexSearch().should_replace_task(
task, {}, task.optimization.get("index-search", [])
)
if task_id in (True, False):
print(
"Could not find artifacts for a docker image "
"named `{image_name}`. Local commits and other changes "
"in your checkout may cause this error. Try "
"updating to a fresh checkout of mozilla-central "
"to download image.".format(image_name=image_name)
)
return False
return load_image_by_task_id(task_id, tag)
def load_image_by_task_id(task_id, tag=None):
artifact_url = get_artifact_url(task_id, "public/image.tar.zst")
result = load_image(artifact_url, tag)
print("Found docker image: {}:{}".format(result["image"], result["tag"]))
if tag:
print(f"Re-tagged as: {tag}")
else:
tag = "{}:{}".format(result["image"], result["tag"])
print(f"Try: docker run -ti --rm {tag} bash")
return True
def build_context(name, outputFile, args=None):
"""Build a context.tar for image with specified name."""
if not name:
raise ValueError("must provide a Docker image name")
if not outputFile:
raise ValueError("must provide a outputFile")
image_dir = docker.image_path(name)
if not os.path.isdir(image_dir):
raise Exception("image directory does not exist: %s" % image_dir)
docker.create_context_tar(".", image_dir, outputFile, args)
def build_image(name, tag, args=None):
"""Build a Docker image of specified name.
Output from image building process will be printed to stdout.
"""
if not name:
raise ValueError("must provide a Docker image name")
image_dir = docker.image_path(name)
if not os.path.isdir(image_dir):
raise Exception("image directory does not exist: %s" % image_dir)
tag = tag or docker.docker_image(name, by_tag=True)
buf = BytesIO()
docker.stream_context_tar(".", image_dir, buf, "", args)
docker.post_to_docker(buf.getvalue(), "/build", nocache=1, t=tag)
print(f"Successfully built {name} and tagged with {tag}")
if tag.endswith(":latest"):
print("*" * 50)
print("WARNING: no VERSION file found in image directory.")
print("Image is not suitable for deploying/pushing.")
print("Create an image suitable for deploying/pushing by creating")
print("a VERSION file in the image directory.")
print("*" * 50)
def load_image(url, imageName=None, imageTag=None):
"""
Load docker image from URL as imageName:tag, if no imageName or tag is given
it will use whatever is inside the zstd compressed tarball.
Returns an object with properties 'image', 'tag' and 'layer'.
"""
import zstandard as zstd
# If imageName is given and we don't have an imageTag
# we parse out the imageTag from imageName, or default it to 'latest'
# if no imageName and no imageTag is given, 'repositories' won't be rewritten
if imageName and not imageTag:
if ":" in imageName:
imageName, imageTag = imageName.split(":", 1)
else:
imageTag = "latest"
info = {}
def download_and_modify_image():
# This function downloads and edits the downloaded tar file on the fly.
# It emits chunked buffers of the editted tar file, as a generator.
print(f"Downloading from {url}")
# get_session() gets us a requests.Session set to retry several times.
req = get_session().get(url, stream=True)
req.raise_for_status()
with zstd.ZstdDecompressor().stream_reader(req.raw) as ifh:
tarin = tarfile.open(
mode="r|",
fileobj=ifh,
bufsize=zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE,
)
# Stream through each member of the downloaded tar file individually.
for member in tarin:
# Non-file members only need a tar header. Emit one.
if not member.isfile():
yield member.tobuf(tarfile.GNU_FORMAT)
continue
# Open stream reader for the member
reader = tarin.extractfile(member)
# If member is `repositories`, we parse and possibly rewrite the
# image tags.
if member.name == "repositories":
# Read and parse repositories
repos = json.loads(reader.read())
reader.close()
# If there is more than one image or tag, we can't handle it
# here.
if len(repos.keys()) > 1:
raise Exception("file contains more than one image")
info["image"] = image = list(repos.keys())[0]
if len(repos[image].keys()) > 1:
raise Exception("file contains more than one tag")
info["tag"] = tag = list(repos[image].keys())[0]
info["layer"] = layer = repos[image][tag]
# Rewrite the repositories file
data = json.dumps({imageName or image: {imageTag or tag: layer}})
reader = BytesIO(data.encode("utf-8"))
member.size = len(data)
# Emit the tar header for this member.
yield member.tobuf(tarfile.GNU_FORMAT)
# Then emit its content.
remaining = member.size
while remaining:
length = min(remaining, zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE)
buf = reader.read(length)
remaining -= len(buf)
yield buf
# Pad to fill a 512 bytes block, per tar format.
remainder = member.size % 512
if remainder:
yield ("\0" * (512 - remainder)).encode("utf-8")
reader.close()
docker.post_to_docker(download_and_modify_image(), "/images/load", quiet=0)
# Check that we found a repositories file
if not info.get("image") or not info.get("tag") or not info.get("layer"):
raise Exception("No repositories file found!")
return info

View File

@ -0,0 +1,67 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
"""
Support for optimizing tasks based on the set of files that have changed.
"""
import logging
import requests
from redo import retry
from .util.path import match as match_path
from .util.memoize import memoize
logger = logging.getLogger(__name__)
@memoize
def get_changed_files(repository, revision):
"""
Get the set of files changed in the push headed by the given revision.
Responses are cached, so multiple calls with the same arguments are OK.
"""
url = "{}/json-automationrelevance/{}".format(repository.rstrip("/"), revision)
logger.debug("Querying version control for metadata: %s", url)
def get_automationrelevance():
response = requests.get(url, timeout=30)
return response.json()
contents = retry(get_automationrelevance, attempts=10, sleeptime=10)
logger.debug(
"{} commits influencing task scheduling:".format(len(contents["changesets"]))
)
changed_files = set()
for c in contents["changesets"]:
desc = "" # Support empty desc
if c["desc"]:
desc = c["desc"].splitlines()[0].encode("ascii", "ignore")
logger.debug(" {cset} {desc}".format(cset=c["node"][0:12], desc=desc))
changed_files |= set(c["files"])
return changed_files
def check(params, file_patterns):
"""Determine whether any of the files changed in the indicated push to
https://hg.mozilla.org match any of the given file patterns."""
repository = params.get("head_repository")
revision = params.get("head_rev")
if not repository or not revision:
logger.warning(
"Missing `head_repository` or `head_rev` parameters; "
"assuming all files have changed"
)
return True
changed_files = get_changed_files(repository, revision)
for pattern in file_patterns:
for path in changed_files:
if match_path(path, pattern):
return True
return False

View File

@ -0,0 +1,36 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import logging
from . import (
target_tasks,
)
logger = logging.getLogger(__name__)
filter_task_functions = {}
def filter_task(name):
"""Generator to declare a task filter function."""
def wrap(func):
filter_task_functions[name] = func
return func
return wrap
@filter_task("target_tasks_method")
def filter_target_tasks(graph, parameters, graph_config):
"""Proxy filter to use legacy target tasks code.
This should go away once target_tasks are converted to filters.
"""
attr = parameters.get("target_tasks_method", "all_tasks")
fn = target_tasks.get_method(attr)
return fn(graph, parameters, graph_config)

View File

@ -0,0 +1,422 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import logging
import os
import copy
import attr
from typing import AnyStr
from . import filter_tasks
from .graph import Graph
from .taskgraph import TaskGraph
from .task import Task
from .optimize import optimize_task_graph
from .parameters import Parameters
from .morph import morph
from .util.python_path import find_object
from .transforms.base import TransformSequence, TransformConfig
from .util.verify import (
verifications,
)
from .util.yaml import load_yaml
from .config import load_graph_config, GraphConfig
logger = logging.getLogger(__name__)
class KindNotFound(Exception):
"""
Raised when trying to load kind from a directory without a kind.yml.
"""
@attr.s(frozen=True)
class Kind:
name = attr.ib(type=AnyStr)
path = attr.ib(type=AnyStr)
config = attr.ib(type=dict)
graph_config = attr.ib(type=GraphConfig)
def _get_loader(self):
try:
loader = self.config["loader"]
except KeyError:
raise KeyError(f"{self.path!r} does not define `loader`")
return find_object(loader)
def load_tasks(self, parameters, loaded_tasks, write_artifacts):
loader = self._get_loader()
config = copy.deepcopy(self.config)
kind_dependencies = config.get("kind-dependencies", [])
kind_dependencies_tasks = [
task for task in loaded_tasks if task.kind in kind_dependencies
]
inputs = loader(self.name, self.path, config, parameters, loaded_tasks)
transforms = TransformSequence()
for xform_path in config["transforms"]:
transform = find_object(xform_path)
transforms.add(transform)
# perform the transformations on the loaded inputs
trans_config = TransformConfig(
self.name,
self.path,
config,
parameters,
kind_dependencies_tasks,
self.graph_config,
write_artifacts=write_artifacts,
)
tasks = [
Task(
self.name,
label=task_dict["label"],
attributes=task_dict["attributes"],
task=task_dict["task"],
optimization=task_dict.get("optimization"),
dependencies=task_dict.get("dependencies"),
soft_dependencies=task_dict.get("soft-dependencies"),
)
for task_dict in transforms(trans_config, inputs)
]
return tasks
@classmethod
def load(cls, root_dir, graph_config, kind_name):
path = os.path.join(root_dir, kind_name)
kind_yml = os.path.join(path, "kind.yml")
if not os.path.exists(kind_yml):
raise KindNotFound(kind_yml)
logger.debug(f"loading kind `{kind_name}` from `{path}`")
config = load_yaml(kind_yml)
return cls(kind_name, path, config, graph_config)
class TaskGraphGenerator:
"""
The central controller for taskgraph. This handles all phases of graph
generation. The task is generated from all of the kinds defined in
subdirectories of the generator's root directory.
Access to the results of this generation, as well as intermediate values at
various phases of generation, is available via properties. This encourages
the provision of all generation inputs at instance construction time.
"""
# Task-graph generation is implemented as a Python generator that yields
# each "phase" of generation. This allows some mach subcommands to short-
# circuit generation of the entire graph by never completing the generator.
def __init__(
self,
root_dir,
parameters,
decision_task_id="DECISION-TASK",
write_artifacts=False,
):
"""
@param root_dir: root directory, with subdirectories for each kind
@param paramaters: parameters for this task-graph generation, or callable
taking a `GraphConfig` and returning parameters
@type parameters: Union[Parameters, Callable[[GraphConfig], Parameters]]
"""
if root_dir is None:
root_dir = "taskcluster/ci"
self.root_dir = root_dir
self._parameters = parameters
self._decision_task_id = decision_task_id
self._write_artifacts = write_artifacts
# start the generator
self._run = self._run()
self._run_results = {}
@property
def parameters(self):
"""
The properties used for this graph.
@type: Properties
"""
return self._run_until("parameters")
@property
def full_task_set(self):
"""
The full task set: all tasks defined by any kind (a graph without edges)
@type: TaskGraph
"""
return self._run_until("full_task_set")
@property
def full_task_graph(self):
"""
The full task graph: the full task set, with edges representing
dependencies.
@type: TaskGraph
"""
return self._run_until("full_task_graph")
@property
def target_task_set(self):
"""
The set of targetted tasks (a graph without edges)
@type: TaskGraph
"""
return self._run_until("target_task_set")
@property
def target_task_graph(self):
"""
The set of targetted tasks and all of their dependencies
@type: TaskGraph
"""
return self._run_until("target_task_graph")
@property
def optimized_task_graph(self):
"""
The set of targetted tasks and all of their dependencies; tasks that
have been optimized out are either omitted or replaced with a Task
instance containing only a task_id.
@type: TaskGraph
"""
return self._run_until("optimized_task_graph")
@property
def label_to_taskid(self):
"""
A dictionary mapping task label to assigned taskId. This property helps
in interpreting `optimized_task_graph`.
@type: dictionary
"""
return self._run_until("label_to_taskid")
@property
def morphed_task_graph(self):
"""
The optimized task graph, with any subsequent morphs applied. This graph
will have the same meaning as the optimized task graph, but be in a form
more palatable to TaskCluster.
@type: TaskGraph
"""
return self._run_until("morphed_task_graph")
@property
def graph_config(self):
"""
The configuration for this graph.
@type: TaskGraph
"""
return self._run_until("graph_config")
def _load_kinds(self, graph_config, target_kind=None):
if target_kind:
# docker-image is an implicit dependency that never appears in
# kind-dependencies.
queue = [target_kind, "docker-image"]
seen_kinds = set()
while queue:
kind_name = queue.pop()
if kind_name in seen_kinds:
continue
seen_kinds.add(kind_name)
kind = Kind.load(self.root_dir, graph_config, kind_name)
yield kind
queue.extend(kind.config.get("kind-dependencies", []))
else:
for kind_name in os.listdir(self.root_dir):
try:
yield Kind.load(self.root_dir, graph_config, kind_name)
except KindNotFound:
continue
def _run(self):
logger.info("Loading graph configuration.")
graph_config = load_graph_config(self.root_dir)
yield ("graph_config", graph_config)
graph_config.register()
if callable(self._parameters):
parameters = self._parameters(graph_config)
else:
parameters = self._parameters
logger.info("Using {}".format(parameters))
logger.debug("Dumping parameters:\n{}".format(repr(parameters)))
filters = parameters.get("filters", [])
# Always add legacy target tasks method until we deprecate that API.
if "target_tasks_method" not in filters:
filters.insert(0, "target_tasks_method")
filters = [filter_tasks.filter_task_functions[f] for f in filters]
yield ("parameters", parameters)
logger.info("Loading kinds")
# put the kinds into a graph and sort topologically so that kinds are loaded
# in post-order
if parameters.get("target-kind"):
target_kind = parameters["target-kind"]
logger.info(
"Limiting kinds to {target_kind} and dependencies".format(
target_kind=target_kind
)
)
kinds = {
kind.name: kind
for kind in self._load_kinds(graph_config, parameters.get("target-kind"))
}
edges = set()
for kind in kinds.values():
for dep in kind.config.get("kind-dependencies", []):
edges.add((kind.name, dep, "kind-dependency"))
kind_graph = Graph(set(kinds), edges)
if parameters.get("target-kind"):
kind_graph = kind_graph.transitive_closure({target_kind, "docker-image"})
logger.info("Generating full task set")
all_tasks = {}
for kind_name in kind_graph.visit_postorder():
logger.debug(f"Loading tasks for kind {kind_name}")
kind = kinds[kind_name]
try:
new_tasks = kind.load_tasks(
parameters,
list(all_tasks.values()),
self._write_artifacts,
)
except Exception:
logger.exception(f"Error loading tasks for kind {kind_name}:")
raise
for task in new_tasks:
if task.label in all_tasks:
raise Exception("duplicate tasks with label " + task.label)
all_tasks[task.label] = task
logger.info(f"Generated {len(new_tasks)} tasks for kind {kind_name}")
full_task_set = TaskGraph(all_tasks, Graph(set(all_tasks), set()))
yield verifications("full_task_set", full_task_set, graph_config)
logger.info("Generating full task graph")
edges = set()
for t in full_task_set:
for depname, dep in t.dependencies.items():
edges.add((t.label, dep, depname))
full_task_graph = TaskGraph(all_tasks, Graph(full_task_set.graph.nodes, edges))
logger.info(
"Full task graph contains %d tasks and %d dependencies"
% (len(full_task_set.graph.nodes), len(edges))
)
yield verifications("full_task_graph", full_task_graph, graph_config)
logger.info("Generating target task set")
target_task_set = TaskGraph(
dict(all_tasks), Graph(set(all_tasks.keys()), set())
)
for fltr in filters:
old_len = len(target_task_set.graph.nodes)
target_tasks = set(fltr(target_task_set, parameters, graph_config))
target_task_set = TaskGraph(
{l: all_tasks[l] for l in target_tasks}, Graph(target_tasks, set())
)
logger.info(
"Filter %s pruned %d tasks (%d remain)"
% (fltr.__name__, old_len - len(target_tasks), len(target_tasks))
)
yield verifications("target_task_set", target_task_set, graph_config)
logger.info("Generating target task graph")
# include all docker-image build tasks here, in case they are needed for a graph morph
docker_image_tasks = {
t.label
for t in full_task_graph.tasks.values()
if t.attributes["kind"] == "docker-image"
}
# include all tasks with `always_target` set
always_target_tasks = {
t.label
for t in full_task_graph.tasks.values()
if t.attributes.get("always_target")
}
logger.info(
"Adding %d tasks with `always_target` attribute"
% (len(always_target_tasks) - len(always_target_tasks & target_tasks))
)
target_graph = full_task_graph.graph.transitive_closure(
target_tasks | docker_image_tasks | always_target_tasks
)
target_task_graph = TaskGraph(
{l: all_tasks[l] for l in target_graph.nodes}, target_graph
)
yield verifications("target_task_graph", target_task_graph, graph_config)
logger.info("Generating optimized task graph")
existing_tasks = parameters.get("existing_tasks")
do_not_optimize = set(parameters.get("do_not_optimize", []))
if not parameters.get("optimize_target_tasks", True):
do_not_optimize = set(target_task_set.graph.nodes).union(do_not_optimize)
optimized_task_graph, label_to_taskid = optimize_task_graph(
target_task_graph,
parameters,
do_not_optimize,
self._decision_task_id,
existing_tasks=existing_tasks,
)
yield verifications("optimized_task_graph", optimized_task_graph, graph_config)
morphed_task_graph, label_to_taskid = morph(
optimized_task_graph, label_to_taskid, parameters, graph_config
)
yield "label_to_taskid", label_to_taskid
yield verifications("morphed_task_graph", morphed_task_graph, graph_config)
def _run_until(self, name):
while name not in self._run_results:
try:
k, v = next(self._run)
except StopIteration:
raise AttributeError(f"No such run result {name}")
self._run_results[k] = v
return self._run_results[name]
def load_tasks_for_kind(parameters, kind, root_dir=None):
"""
Get all the tasks of a given kind.
This function is designed to be called from outside of taskgraph.
"""
# make parameters read-write
parameters = dict(parameters)
parameters["target-kind"] = kind
parameters = Parameters(strict=False, **parameters)
tgg = TaskGraphGenerator(root_dir=root_dir, parameters=parameters)
return {
task.task["metadata"]["name"]: task
for task in tgg.full_task_set
if task.kind == kind
}

View File

@ -0,0 +1,131 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import attr
import collections
@attr.s(frozen=True)
class Graph:
"""
Generic representation of a directed acyclic graph with labeled edges
connecting the nodes. Graph operations are implemented in a functional
manner, so the data structure is immutable.
It permits at most one edge of a given name between any set of nodes. The
graph is not checked for cycles, and methods may hang or otherwise fail if
given a cyclic graph.
The `nodes` and `edges` attributes may be accessed in a read-only fashion.
The `nodes` attribute is a set of node names, while `edges` is a set of
`(left, right, name)` tuples representing an edge named `name` going from
node `left` to node `right..
"""
nodes = attr.ib(converter=frozenset)
edges = attr.ib(converter=frozenset)
def transitive_closure(self, nodes, reverse=False):
"""
Return the transitive closure of <nodes>: the graph containing all
specified nodes as well as any nodes reachable from them, and any
intervening edges.
If `reverse` is true, the "reachability" will be reversed and this
will return the set of nodes that can reach the specified nodes.
Example
-------
a ------> b ------> c
|
`-------> d
transitive_closure([b]).nodes == set([a, b])
transitive_closure([c]).nodes == set([c, b, a])
transitive_closure([c], reverse=True).nodes == set([c])
transitive_closure([b], reverse=True).nodes == set([b, c, d])
"""
assert isinstance(nodes, set)
assert nodes <= self.nodes
# generate a new graph by expanding along edges until reaching a fixed
# point
new_nodes, new_edges = nodes, set()
nodes, edges = set(), set()
while (new_nodes, new_edges) != (nodes, edges):
nodes, edges = new_nodes, new_edges
add_edges = {
(left, right, name)
for (left, right, name) in self.edges
if (right if reverse else left) in nodes
}
add_nodes = {(left if reverse else right) for (left, right, _) in add_edges}
new_nodes = nodes | add_nodes
new_edges = edges | add_edges
return Graph(new_nodes, new_edges)
def _visit(self, reverse):
queue = collections.deque(sorted(self.nodes))
links_by_node = self.reverse_links_dict() if reverse else self.links_dict()
seen = set()
while queue:
node = queue.popleft()
if node in seen:
continue
links = links_by_node[node]
if all((n in seen) for n in links):
seen.add(node)
yield node
else:
queue.extend(n for n in links if n not in seen)
queue.append(node)
def visit_postorder(self):
"""
Generate a sequence of nodes in postorder, such that every node is
visited *after* any nodes it links to.
Behavior is undefined (read: it will hang) if the graph contains a
cycle.
"""
return self._visit(False)
def visit_preorder(self):
"""
Like visit_postorder, but in reverse: evrey node is visited *before*
any nodes it links to.
"""
return self._visit(True)
def links_dict(self):
"""
Return a dictionary mapping each node to a set of the nodes it links to
(omitting edge names)
"""
links = collections.defaultdict(set)
for left, right, _ in self.edges:
links[left].add(right)
return links
def named_links_dict(self):
"""
Return a two-level dictionary mapping each node to a dictionary mapping
edge names to labels.
"""
links = collections.defaultdict(dict)
for left, right, name in self.edges:
links[left][name] = right
return links
def reverse_links_dict(self):
"""
Return a dictionary mapping each node to a set of the nodes linking to
it (omitting edge names)
"""
links = collections.defaultdict(set)
for left, right, _ in self.edges:
links[right].add(left)
return links

View File

@ -0,0 +1,59 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import logging
from ..util.templates import merge
from ..util.yaml import load_yaml
logger = logging.getLogger(__name__)
def loader(kind, path, config, params, loaded_tasks):
"""
Get the input elements that will be transformed into tasks in a generic
way. The elements themselves are free-form, and become the input to the
first transform.
By default, this reads jobs from the `jobs` key, or from yaml files
named by `jobs-from`. The entities are read from mappings, and the
keys to those mappings are added in the `name` key of each entity.
If there is a `job-defaults` config, then every job is merged with it.
This provides a simple way to set default values for all jobs of a kind.
The `job-defaults` key can also be specified in a yaml file pointed to by
`jobs-from`. In this case it will only apply to tasks defined in the same
file.
Other kind implementations can use a different loader function to
produce inputs and hand them to `transform_inputs`.
"""
def jobs():
defaults = config.get("job-defaults")
for name, job in config.get("jobs", {}).items():
if defaults:
job = merge(defaults, job)
job["job-from"] = "kind.yml"
yield name, job
for filename in config.get("jobs-from", []):
tasks = load_yaml(path, filename)
file_defaults = tasks.pop("job-defaults", None)
if defaults:
file_defaults = merge(defaults, file_defaults or {})
for name, job in tasks.items():
if file_defaults:
job = merge(file_defaults, job)
job["job-from"] = filename
yield name, job
for name, job in jobs():
job["name"] = name
logger.debug(f"Generating tasks for {kind} {name}")
yield job

View File

@ -0,0 +1,723 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import atexit
import os
import re
import shutil
import subprocess
import sys
import tempfile
import traceback
import argparse
import logging
import json
from collections import namedtuple
from concurrent.futures import ProcessPoolExecutor, as_completed
from pathlib import Path
from typing import Any, List
import appdirs
import yaml
Command = namedtuple("Command", ["func", "args", "kwargs", "defaults"])
commands = {}
def command(*args, **kwargs):
defaults = kwargs.pop("defaults", {})
def decorator(func):
commands[args[0]] = Command(func, args, kwargs, defaults)
return func
return decorator
def argument(*args, **kwargs):
def decorator(func):
if not hasattr(func, "args"):
func.args = []
func.args.append((args, kwargs))
return func
return decorator
def format_taskgraph_labels(taskgraph):
return "\n".join(
taskgraph.tasks[index].label for index in taskgraph.graph.visit_postorder()
)
def format_taskgraph_json(taskgraph):
return json.dumps(
taskgraph.to_json(), sort_keys=True, indent=2, separators=(",", ": ")
)
def format_taskgraph_yaml(taskgraph):
return yaml.safe_dump(taskgraph.to_json(), default_flow_style=False)
def get_filtered_taskgraph(taskgraph, tasksregex):
"""
Filter all the tasks on basis of a regular expression
and returns a new TaskGraph object
"""
from taskgraph.graph import Graph
from taskgraph.taskgraph import TaskGraph
# return original taskgraph if no regular expression is passed
if not tasksregex:
return taskgraph
named_links_dict = taskgraph.graph.named_links_dict()
filteredtasks = {}
filterededges = set()
regexprogram = re.compile(tasksregex)
for key in taskgraph.graph.visit_postorder():
task = taskgraph.tasks[key]
if regexprogram.match(task.label):
filteredtasks[key] = task
for depname, dep in named_links_dict[key].items():
if regexprogram.match(dep):
filterededges.add((key, dep, depname))
filtered_taskgraph = TaskGraph(
filteredtasks, Graph(set(filteredtasks), filterededges)
)
return filtered_taskgraph
FORMAT_METHODS = {
"labels": format_taskgraph_labels,
"json": format_taskgraph_json,
"yaml": format_taskgraph_yaml,
}
def get_taskgraph_generator(root, parameters):
"""Helper function to make testing a little easier."""
from taskgraph.generator import TaskGraphGenerator
return TaskGraphGenerator(root_dir=root, parameters=parameters)
def format_taskgraph(options, parameters, logfile=None):
import taskgraph
from taskgraph.parameters import parameters_loader
if logfile:
oldhandler = logging.root.handlers[-1]
logging.root.removeHandler(oldhandler)
handler = logging.FileHandler(logfile, mode="w")
handler.setFormatter(oldhandler.formatter)
logging.root.addHandler(handler)
if options["fast"]:
taskgraph.fast = True
if isinstance(parameters, str):
parameters = parameters_loader(
parameters,
overrides={"target-kind": options.get("target_kind")},
strict=False,
)
tgg = get_taskgraph_generator(options.get("root"), parameters)
tg = getattr(tgg, options["graph_attr"])
tg = get_filtered_taskgraph(tg, options["tasks_regex"])
format_method = FORMAT_METHODS[options["format"] or "labels"]
return format_method(tg)
def dump_output(out, path=None, params_spec=None):
from taskgraph.parameters import Parameters
params_name = Parameters.format_spec(params_spec)
fh = None
if path:
# Substitute params name into file path if necessary
if params_spec and "{params}" not in path:
name, ext = os.path.splitext(path)
name += "_{params}"
path = name + ext
path = path.format(params=params_name)
fh = open(path, "w")
else:
print(
"Dumping result with parameters from {}:".format(params_name),
file=sys.stderr,
)
print(out + "\n", file=fh)
def generate_taskgraph(options, parameters, logdir):
from taskgraph.parameters import Parameters
def logfile(spec):
"""Determine logfile given a parameters specification."""
if logdir is None:
return None
return os.path.join(
logdir,
"{}_{}.log".format(options["graph_attr"], Parameters.format_spec(spec)),
)
# Don't bother using futures if there's only one parameter. This can make
# tracebacks a little more readable and avoids additional process overhead.
if len(parameters) == 1:
spec = parameters[0]
out = format_taskgraph(options, spec, logfile(spec))
dump_output(out, options["output_file"])
return
futures = {}
with ProcessPoolExecutor() as executor:
for spec in parameters:
f = executor.submit(format_taskgraph, options, spec, logfile(spec))
futures[f] = spec
for future in as_completed(futures):
output_file = options["output_file"]
spec = futures[future]
e = future.exception()
if e:
out = "".join(traceback.format_exception(type(e), e, e.__traceback__))
if options["diff"]:
# Dump to console so we don't accidentally diff the tracebacks.
output_file = None
else:
out = future.result()
dump_output(
out,
path=output_file,
params_spec=spec if len(parameters) > 1 else None,
)
@command(
"tasks",
help="Show all tasks in the taskgraph.",
defaults={"graph_attr": "full_task_set"},
)
@command(
"full", help="Show the full taskgraph.", defaults={"graph_attr": "full_task_graph"}
)
@command(
"target",
help="Show the set of target tasks.",
defaults={"graph_attr": "target_task_set"},
)
@command(
"target-graph",
help="Show the target graph.",
defaults={"graph_attr": "target_task_graph"},
)
@command(
"optimized",
help="Show the optimized graph.",
defaults={"graph_attr": "optimized_task_graph"},
)
@command(
"morphed",
help="Show the morphed graph.",
defaults={"graph_attr": "morphed_task_graph"},
)
@argument("--root", "-r", help="root of the taskgraph definition relative to topsrcdir")
@argument("--quiet", "-q", action="store_true", help="suppress all logging output")
@argument(
"--verbose", "-v", action="store_true", help="include debug-level logging output"
)
@argument(
"--json",
"-J",
action="store_const",
dest="format",
const="json",
help="Output task graph as a JSON object",
)
@argument(
"--yaml",
"-Y",
action="store_const",
dest="format",
const="yaml",
help="Output task graph as a YAML object",
)
@argument(
"--labels",
"-L",
action="store_const",
dest="format",
const="labels",
help="Output the label for each task in the task graph (default)",
)
@argument(
"--parameters",
"-p",
default=None,
action="append",
help="Parameters to use for the generation. Can be a path to file (.yml or "
".json; see `taskcluster/docs/parameters.rst`), a directory (containing "
"parameters files), a url, of the form `project=mozilla-central` to download "
"latest parameters file for the specified project from CI, or of the form "
"`task-id=<decision task id>` to download parameters from the specified "
"decision task. Can be specified multiple times, in which case multiple "
"generations will happen from the same invocation (one per parameters "
"specified).",
)
@argument(
"--no-optimize",
dest="optimize",
action="store_false",
default="true",
help="do not remove tasks from the graph that are found in the "
"index (a.k.a. optimize the graph)",
)
@argument(
"-o",
"--output-file",
default=None,
help="file path to store generated output.",
)
@argument(
"--tasks-regex",
"--tasks",
default=None,
help="only return tasks with labels matching this regular " "expression.",
)
@argument(
"--target-kind",
default=None,
help="only return tasks that are of the given kind, or their dependencies.",
)
@argument(
"-F",
"--fast",
default=False,
action="store_true",
help="enable fast task generation for local debugging.",
)
@argument(
"--diff",
const="default",
nargs="?",
default=None,
help="Generate and diff the current taskgraph against another revision. "
"Without args the base revision will be used. A revision specifier such as "
"the hash or `.~1` (hg) or `HEAD~1` (git) can be used as well.",
)
def show_taskgraph(options):
from taskgraph.parameters import Parameters
from taskgraph.util.vcs import get_repository
if options.pop("verbose", False):
logging.root.setLevel(logging.DEBUG)
repo = None
cur_ref = None
diffdir = None
output_file = options["output_file"]
if options["diff"]:
repo = get_repository(os.getcwd())
if not repo.working_directory_clean():
print(
"abort: can't diff taskgraph with dirty working directory",
file=sys.stderr,
)
return 1
# We want to return the working directory to the current state
# as best we can after we're done. In all known cases, using
# branch or bookmark (which are both available on the VCS object)
# as `branch` is preferable to a specific revision.
cur_ref = repo.branch or repo.head_ref[:12]
diffdir = tempfile.mkdtemp()
atexit.register(
shutil.rmtree, diffdir
) # make sure the directory gets cleaned up
options["output_file"] = os.path.join(
diffdir, f"{options['graph_attr']}_{cur_ref}"
)
print(f"Generating {options['graph_attr']} @ {cur_ref}", file=sys.stderr)
parameters: List[Any[str, Parameters]] = options.pop("parameters")
if not parameters:
kwargs = {
"target-kind": options.get("target_kind"),
}
parameters = [Parameters(strict=False, **kwargs)] # will use default values
for param in parameters[:]:
if isinstance(param, str) and os.path.isdir(param):
parameters.remove(param)
parameters.extend(
[
p.as_posix()
for p in Path(param).iterdir()
if p.suffix in (".yml", ".json")
]
)
logdir = None
if len(parameters) > 1:
# Log to separate files for each process instead of stderr to
# avoid interleaving.
basename = os.path.basename(os.getcwd())
logdir = os.path.join(appdirs.user_log_dir("taskgraph"), basename)
if not os.path.isdir(logdir):
os.makedirs(logdir)
else:
# Only setup logging if we have a single parameter spec. Otherwise
# logging will go to files. This is also used as a hook for Gecko
# to setup its `mach` based logging.
setup_logging()
generate_taskgraph(options, parameters, logdir)
if options["diff"]:
assert diffdir is not None
assert repo is not None
# Some transforms use global state for checks, so will fail
# when running taskgraph a second time in the same session.
# Reload all taskgraph modules to avoid this.
for mod in sys.modules.copy():
if mod != __name__ and mod.startswith("taskgraph"):
del sys.modules[mod]
if options["diff"] == "default":
base_ref = repo.base_ref
else:
base_ref = options["diff"]
try:
repo.update(base_ref)
base_ref = repo.head_ref[:12]
options["output_file"] = os.path.join(
diffdir, f"{options['graph_attr']}_{base_ref}"
)
print(f"Generating {options['graph_attr']} @ {base_ref}", file=sys.stderr)
generate_taskgraph(options, parameters, logdir)
finally:
repo.update(cur_ref)
# Generate diff(s)
diffcmd = [
"diff",
"-U20",
"--report-identical-files",
f"--label={options['graph_attr']}@{base_ref}",
f"--label={options['graph_attr']}@{cur_ref}",
]
for spec in parameters:
base_path = os.path.join(diffdir, f"{options['graph_attr']}_{base_ref}")
cur_path = os.path.join(diffdir, f"{options['graph_attr']}_{cur_ref}")
params_name = None
if len(parameters) > 1:
params_name = Parameters.format_spec(spec)
base_path += f"_{params_name}"
cur_path += f"_{params_name}"
try:
proc = subprocess.run(
diffcmd + [base_path, cur_path],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
universal_newlines=True,
check=True,
)
diff_output = proc.stdout
returncode = 0
except subprocess.CalledProcessError as e:
# returncode 1 simply means diffs were found
if e.returncode != 1:
print(e.stderr, file=sys.stderr)
raise
diff_output = e.output
returncode = e.returncode
dump_output(
diff_output,
# Don't bother saving file if no diffs were found. Log to
# console in this case instead.
path=None if returncode == 0 else output_file,
params_spec=spec if len(parameters) > 1 else None,
)
if options["format"] != "json":
print(
"If you were expecting differences in task bodies "
'you should pass "-J"\n',
file=sys.stderr,
)
if len(parameters) > 1:
print("See '{}' for logs".format(logdir), file=sys.stderr)
@command("build-image", help="Build a Docker image")
@argument("image_name", help="Name of the image to build")
@argument(
"-t", "--tag", help="tag that the image should be built as.", metavar="name:tag"
)
@argument(
"--context-only",
help="File name the context tarball should be written to."
"with this option it will only build the context.tar.",
metavar="context.tar",
)
def build_image(args):
from taskgraph.docker import build_image, build_context
if args["context_only"] is None:
build_image(args["image_name"], args["tag"], os.environ)
else:
build_context(args["image_name"], args["context_only"], os.environ)
@command(
"load-image",
help="Load a pre-built Docker image. Note that you need to "
"have docker installed and running for this to work.",
)
@argument(
"--task-id",
help="Load the image at public/image.tar.zst in this task, "
"rather than searching the index",
)
@argument(
"-t",
"--tag",
help="tag that the image should be loaded as. If not "
"image will be loaded with tag from the tarball",
metavar="name:tag",
)
@argument(
"image_name",
nargs="?",
help="Load the image of this name based on the current "
"contents of the tree (as built for mozilla-central "
"or mozilla-inbound)",
)
def load_image(args):
from taskgraph.docker import load_image_by_name, load_image_by_task_id
if not args.get("image_name") and not args.get("task_id"):
print("Specify either IMAGE-NAME or TASK-ID")
sys.exit(1)
try:
if args["task_id"]:
ok = load_image_by_task_id(args["task_id"], args.get("tag"))
else:
ok = load_image_by_name(args["image_name"], args.get("tag"))
if not ok:
sys.exit(1)
except Exception:
traceback.print_exc()
sys.exit(1)
@command("image-digest", help="Print the digest of a docker image.")
@argument(
"image_name",
help="Print the digest of the image of this name based on the current "
"contents of the tree.",
)
def image_digest(args):
from taskgraph.docker import get_image_digest
try:
digest = get_image_digest(args["image_name"])
print(digest)
except Exception:
traceback.print_exc()
sys.exit(1)
@command("decision", help="Run the decision task")
@argument("--root", "-r", help="root of the taskgraph definition relative to topsrcdir")
@argument(
"--message",
required=False,
help=argparse.SUPPRESS,
)
@argument(
"--project",
required=True,
help="Project to use for creating task graph. Example: --project=try",
)
@argument("--pushlog-id", dest="pushlog_id", required=True, default="0")
@argument("--pushdate", dest="pushdate", required=True, type=int, default=0)
@argument("--owner", required=True, help="email address of who owns this graph")
@argument("--level", required=True, help="SCM level of this repository")
@argument(
"--target-tasks-method", help="method for selecting the target tasks to generate"
)
@argument(
"--repository-type",
required=True,
help='Type of repository, either "hg" or "git"',
)
@argument("--base-repository", required=True, help='URL for "base" repository to clone')
@argument(
"--head-repository",
required=True,
help='URL for "head" repository to fetch revision from',
)
@argument(
"--head-ref", required=True, help="Reference (this is same as rev usually for hg)"
)
@argument(
"--head-rev", required=True, help="Commit revision to use from head repository"
)
@argument("--head-tag", help="Tag attached to the revision", default="")
@argument(
"--tasks-for", required=True, help="the tasks_for value used to generate this task"
)
@argument("--try-task-config-file", help="path to try task configuration file")
def decision(options):
from taskgraph.decision import taskgraph_decision
taskgraph_decision(options)
@command("action-callback", description="Run action callback used by action tasks")
@argument(
"--root",
"-r",
default="taskcluster/ci",
help="root of the taskgraph definition relative to topsrcdir",
)
def action_callback(options):
from taskgraph.actions import trigger_action_callback
from taskgraph.actions.util import get_parameters
try:
# the target task for this action (or null if it's a group action)
task_id = json.loads(os.environ.get("ACTION_TASK_ID", "null"))
# the target task group for this action
task_group_id = os.environ.get("ACTION_TASK_GROUP_ID", None)
input = json.loads(os.environ.get("ACTION_INPUT", "null"))
callback = os.environ.get("ACTION_CALLBACK", None)
root = options["root"]
parameters = get_parameters(task_group_id)
return trigger_action_callback(
task_group_id=task_group_id,
task_id=task_id,
input=input,
callback=callback,
parameters=parameters,
root=root,
test=False,
)
except Exception:
traceback.print_exc()
sys.exit(1)
@command("test-action-callback", description="Run an action callback in a testing mode")
@argument(
"--root",
"-r",
default="taskcluster/ci",
help="root of the taskgraph definition relative to topsrcdir",
)
@argument(
"--parameters",
"-p",
default="",
help="parameters file (.yml or .json; see " "`taskcluster/docs/parameters.rst`)`",
)
@argument("--task-id", default=None, help="TaskId to which the action applies")
@argument(
"--task-group-id", default=None, help="TaskGroupId to which the action applies"
)
@argument("--input", default=None, help="Action input (.yml or .json)")
@argument("callback", default=None, help="Action callback name (Python function name)")
def test_action_callback(options):
import taskgraph.parameters
import taskgraph.actions
from taskgraph.util import yaml
from taskgraph.config import load_graph_config
def load_data(filename):
with open(filename) as f:
if filename.endswith(".yml"):
return yaml.load_stream(f)
elif filename.endswith(".json"):
return json.load(f)
else:
raise Exception(f"unknown filename {filename}")
try:
task_id = options["task_id"]
if options["input"]:
input = load_data(options["input"])
else:
input = None
root = options["root"]
graph_config = load_graph_config(root)
trust_domain = graph_config["trust-domain"]
graph_config.register()
parameters = taskgraph.parameters.load_parameters_file(
options["parameters"], strict=False, trust_domain=trust_domain
)
parameters.check()
return taskgraph.actions.trigger_action_callback(
task_group_id=options["task_group_id"],
task_id=task_id,
input=input,
callback=options["callback"],
parameters=parameters,
root=root,
test=True,
)
except Exception:
traceback.print_exc()
sys.exit(1)
def create_parser():
parser = argparse.ArgumentParser(description="Interact with taskgraph")
subparsers = parser.add_subparsers()
for _, (func, args, kwargs, defaults) in commands.items():
subparser = subparsers.add_parser(*args, **kwargs)
for arg in func.args:
subparser.add_argument(*arg[0], **arg[1])
subparser.set_defaults(command=func, **defaults)
return parser
def setup_logging():
logging.basicConfig(
format="%(asctime)s - %(levelname)s - %(message)s", level=logging.INFO
)
def main(args=sys.argv[1:]):
setup_logging()
parser = create_parser()
args = parser.parse_args(args)
try:
args.command(vars(args))
except Exception:
traceback.print_exc()
sys.exit(1)

View File

@ -0,0 +1,268 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
"""
Graph morphs are modifications to task-graphs that take place *after* the
optimization phase.
These graph morphs are largely invisible to developers running `./mach`
locally, so they should be limited to changes that do not modify the meaning of
the graph.
"""
# Note that the translation of `{'task-reference': '..'}` and
# `artifact-reference` are handled in the optimization phase (since
# optimization involves dealing with taskIds directly). Similarly,
# `{'relative-datestamp': '..'}` is handled at the last possible moment during
# task creation.
import logging
import os
import re
from slugid import nice as slugid
from .task import Task
from .graph import Graph
from .taskgraph import TaskGraph
from .util.workertypes import get_worker_type
here = os.path.abspath(os.path.dirname(__file__))
logger = logging.getLogger(__name__)
MAX_ROUTES = 10
def amend_taskgraph(taskgraph, label_to_taskid, to_add):
"""Add the given tasks to the taskgraph, returning a new taskgraph"""
new_tasks = taskgraph.tasks.copy()
new_edges = set(taskgraph.graph.edges)
for task in to_add:
new_tasks[task.task_id] = task
assert task.label not in label_to_taskid
label_to_taskid[task.label] = task.task_id
for depname, dep in task.dependencies.items():
new_edges.add((task.task_id, dep, depname))
taskgraph = TaskGraph(new_tasks, Graph(set(new_tasks), new_edges))
return taskgraph, label_to_taskid
def derive_index_task(task, taskgraph, label_to_taskid, parameters, graph_config):
"""Create the shell of a task that depends on `task` and on the given docker
image."""
purpose = "index-task"
label = f"{purpose}-{task.label}"
provisioner_id, worker_type = get_worker_type(
graph_config, "misc", parameters["level"]
)
task_def = {
"provisionerId": provisioner_id,
"workerType": worker_type,
"dependencies": [task.task_id],
"created": {"relative-datestamp": "0 seconds"},
"deadline": task.task["deadline"],
# no point existing past the parent task's deadline
"expires": task.task["deadline"],
"metadata": {
"name": label,
"description": "{} for {}".format(
purpose, task.task["metadata"]["description"]
),
"owner": task.task["metadata"]["owner"],
"source": task.task["metadata"]["source"],
},
"scopes": [],
"payload": {
"image": {
"path": "public/image.tar.zst",
"namespace": "taskgraph.cache.level-3.docker-images.v2.index-task.latest",
"type": "indexed-image",
},
"features": {
"taskclusterProxy": True,
},
"maxRunTime": 600,
},
}
# only include the docker-image dependency here if it is actually in the
# taskgraph (has not been optimized). It is included in
# task_def['dependencies'] unconditionally.
dependencies = {"parent": task.task_id}
task = Task(
kind="misc",
label=label,
attributes={},
task=task_def,
dependencies=dependencies,
)
task.task_id = slugid()
return task, taskgraph, label_to_taskid
# these regular expressions capture route prefixes for which we have a star
# scope, allowing them to be summarized. Each should correspond to a star scope
# in each Gecko `assume:repo:hg.mozilla.org/...` role.
_SCOPE_SUMMARY_REGEXPS = [
# TODO Bug 1631839 - Remove these scopes once the migration is done
re.compile(r"(index:insert-task:project\.mobile\.fenix\.v2\.[^.]*\.).*"),
re.compile(
r"(index:insert-task:project\.mobile\.reference-browser\.v3\.[^.]*\.).*"
),
]
def make_index_task(parent_task, taskgraph, label_to_taskid, parameters, graph_config):
index_paths = [
r.split(".", 1)[1] for r in parent_task.task["routes"] if r.startswith("index.")
]
parent_task.task["routes"] = [
r for r in parent_task.task["routes"] if not r.startswith("index.")
]
task, taskgraph, label_to_taskid = derive_index_task(
parent_task, taskgraph, label_to_taskid, parameters, graph_config
)
# we need to "summarize" the scopes, otherwise a particularly
# namespace-heavy index task might have more scopes than can fit in a
# temporary credential.
scopes = set()
domain_scope_regex = re.compile(
r"(index:insert-task:{trust_domain}\.v2\.[^.]*\.).*".format(
trust_domain=re.escape(graph_config["trust-domain"])
)
)
all_scopes_summary_regexps = _SCOPE_SUMMARY_REGEXPS + [domain_scope_regex]
for path in index_paths:
scope = f"index:insert-task:{path}"
for summ_re in all_scopes_summary_regexps:
match = summ_re.match(scope)
if match:
scope = match.group(1) + "*"
break
scopes.add(scope)
task.task["scopes"] = sorted(scopes)
task.task["payload"]["command"] = ["insert-indexes.js"] + index_paths
task.task["payload"]["env"] = {
"TARGET_TASKID": parent_task.task_id,
"INDEX_RANK": parent_task.task.get("extra", {}).get("index", {}).get("rank", 0),
}
return task, taskgraph, label_to_taskid
def add_index_tasks(taskgraph, label_to_taskid, parameters, graph_config):
"""
The TaskCluster queue only allows 10 routes on a task, but we have tasks
with many more routes, for purposes of indexing. This graph morph adds
"index tasks" that depend on such tasks and do the index insertions
directly, avoiding the limits on task.routes.
"""
logger.debug("Morphing: adding index tasks")
added = []
for label, task in taskgraph.tasks.items():
if len(task.task.get("routes", [])) <= MAX_ROUTES:
continue
task, taskgraph, label_to_taskid = make_index_task(
task, taskgraph, label_to_taskid, parameters, graph_config
)
added.append(task)
if added:
taskgraph, label_to_taskid = amend_taskgraph(taskgraph, label_to_taskid, added)
logger.info(f"Added {len(added)} index tasks")
return taskgraph, label_to_taskid
def _get_morph_url():
"""
Guess a URL for the current file, for source metadata for created tasks.
If we checked out the taskgraph code with run-task in the decision task,
we can use TASKGRAPH_* to find the right version, which covers the
existing use case.
"""
taskgraph_repo = os.environ.get(
"TASKGRAPH_HEAD_REPOSITORY", "https://hg.mozilla.org/ci/taskgraph"
)
taskgraph_rev = os.environ.get("TASKGRAPH_HEAD_REV", "default")
return f"{taskgraph_repo}/raw-file/{taskgraph_rev}/src/taskgraph/morph.py"
def add_code_review_task(taskgraph, label_to_taskid, parameters, graph_config):
logger.debug("Morphing: adding index tasks")
review_config = parameters.get("code-review")
if not review_config:
return taskgraph, label_to_taskid
code_review_tasks = {}
for label, task in taskgraph.tasks.items():
if task.attributes.get("code-review"):
code_review_tasks[task.label] = task.task_id
if code_review_tasks:
code_review_task_def = {
"provisionerId": "built-in",
"workerType": "succeed",
"dependencies": sorted(code_review_tasks.values()),
# This option permits to run the task
# regardless of the dependencies tasks exit status
# as we are interested in the task failures
"requires": "all-resolved",
"created": {"relative-datestamp": "0 seconds"},
"deadline": {"relative-datestamp": "1 day"},
# no point existing past the parent task's deadline
"expires": {"relative-datestamp": "1 day"},
"metadata": {
"name": "code-review",
"description": "List all issues found in static analysis and linting tasks",
"owner": parameters["owner"],
"source": _get_morph_url(),
},
"scopes": [],
"payload": {},
"routes": ["project.relman.codereview.v1.try_ending"],
"extra": {
"code-review": {
"phabricator-build-target": review_config[
"phabricator-build-target"
],
"repository": parameters["head_repository"],
"revision": parameters["head_rev"],
}
},
}
task = Task(
kind="misc",
label="code-review",
attributes={},
task=code_review_task_def,
dependencies=code_review_tasks,
)
task.task_id = slugid()
taskgraph, label_to_taskid = amend_taskgraph(taskgraph, label_to_taskid, [task])
logger.info("Added code review task.")
return taskgraph, label_to_taskid
def morph(taskgraph, label_to_taskid, parameters, graph_config):
"""Apply all morphs"""
morphs = [
add_index_tasks,
add_code_review_task,
]
for m in morphs:
taskgraph, label_to_taskid = m(
taskgraph, label_to_taskid, parameters, graph_config
)
return taskgraph, label_to_taskid

View File

@ -0,0 +1,376 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
"""
The objective of optimization is to remove as many tasks from the graph as
possible, as efficiently as possible, thereby delivering useful results as
quickly as possible. For example, ideally if only a test script is modified in
a push, then the resulting graph contains only the corresponding test suite
task.
See ``taskcluster/docs/optimization.rst`` for more information.
"""
import logging
import os
from collections import defaultdict
from slugid import nice as slugid
from .graph import Graph
from . import files_changed
from .taskgraph import TaskGraph
from .util.taskcluster import find_task_id
from .util.parameterization import resolve_task_references
logger = logging.getLogger(__name__)
TOPSRCDIR = os.path.abspath(os.path.join(__file__, "../../../"))
def optimize_task_graph(
target_task_graph,
params,
do_not_optimize,
decision_task_id,
existing_tasks=None,
strategies=None,
):
"""
Perform task optimization, returning a taskgraph and a map from label to
assigned taskId, including replacement tasks.
"""
label_to_taskid = {}
if not existing_tasks:
existing_tasks = {}
# instantiate the strategies for this optimization process
if not strategies:
strategies = _make_default_strategies()
optimizations = _get_optimizations(target_task_graph, strategies)
removed_tasks = remove_tasks(
target_task_graph=target_task_graph,
optimizations=optimizations,
params=params,
do_not_optimize=do_not_optimize,
)
replaced_tasks = replace_tasks(
target_task_graph=target_task_graph,
optimizations=optimizations,
params=params,
do_not_optimize=do_not_optimize,
label_to_taskid=label_to_taskid,
existing_tasks=existing_tasks,
removed_tasks=removed_tasks,
)
return (
get_subgraph(
target_task_graph,
removed_tasks,
replaced_tasks,
label_to_taskid,
decision_task_id,
),
label_to_taskid,
)
def _make_default_strategies():
return {
"never": OptimizationStrategy(), # "never" is the default behavior
"index-search": IndexSearch(),
"skip-unless-changed": SkipUnlessChanged(),
}
def _get_optimizations(target_task_graph, strategies):
def optimizations(label):
task = target_task_graph.tasks[label]
if task.optimization:
opt_by, arg = list(task.optimization.items())[0]
return (opt_by, strategies[opt_by], arg)
else:
return ("never", strategies["never"], None)
return optimizations
def _log_optimization(verb, opt_counts):
if opt_counts:
logger.info(
"{} {} during optimization.".format(
verb.title(),
", ".join(f"{c} tasks by {b}" for b, c in sorted(opt_counts.items())),
)
)
else:
logger.info(f"No tasks {verb} during optimization")
def remove_tasks(target_task_graph, params, optimizations, do_not_optimize):
"""
Implement the "Removing Tasks" phase, returning a set of task labels of all removed tasks.
"""
opt_counts = defaultdict(int)
removed = set()
reverse_links_dict = target_task_graph.graph.reverse_links_dict()
for label in target_task_graph.graph.visit_preorder():
# if we're not allowed to optimize, that's easy..
if label in do_not_optimize:
continue
# if there are remaining tasks depending on this one, do not remove..
if any(l not in removed for l in reverse_links_dict[label]):
continue
# call the optimization strategy
task = target_task_graph.tasks[label]
opt_by, opt, arg = optimizations(label)
if opt.should_remove_task(task, params, arg):
removed.add(label)
opt_counts[opt_by] += 1
continue
_log_optimization("removed", opt_counts)
return removed
def replace_tasks(
target_task_graph,
params,
optimizations,
do_not_optimize,
label_to_taskid,
removed_tasks,
existing_tasks,
):
"""
Implement the "Replacing Tasks" phase, returning a set of task labels of
all replaced tasks. The replacement taskIds are added to label_to_taskid as
a side-effect.
"""
opt_counts = defaultdict(int)
replaced = set()
links_dict = target_task_graph.graph.links_dict()
for label in target_task_graph.graph.visit_postorder():
# if we're not allowed to optimize, that's easy..
if label in do_not_optimize:
continue
# if this task depends on un-replaced, un-removed tasks, do not replace
if any(l not in replaced and l not in removed_tasks for l in links_dict[label]):
continue
# if the task already exists, that's an easy replacement
repl = existing_tasks.get(label)
if repl:
label_to_taskid[label] = repl
replaced.add(label)
opt_counts["existing_tasks"] += 1
continue
# call the optimization strategy
task = target_task_graph.tasks[label]
opt_by, opt, arg = optimizations(label)
repl = opt.should_replace_task(task, params, arg)
if repl:
if repl is True:
# True means remove this task; get_subgraph will catch any
# problems with removed tasks being depended on
removed_tasks.add(label)
else:
label_to_taskid[label] = repl
replaced.add(label)
opt_counts[opt_by] += 1
continue
_log_optimization("replaced", opt_counts)
return replaced
def get_subgraph(
target_task_graph,
removed_tasks,
replaced_tasks,
label_to_taskid,
decision_task_id,
):
"""
Return the subgraph of target_task_graph consisting only of
non-optimized tasks and edges between them.
To avoid losing track of taskIds for tasks optimized away, this method
simultaneously substitutes real taskIds for task labels in the graph, and
populates each task definition's `dependencies` key with the appropriate
taskIds. Task references are resolved in the process.
"""
# check for any dependency edges from included to removed tasks
bad_edges = [
(l, r, n)
for l, r, n in target_task_graph.graph.edges
if l not in removed_tasks and r in removed_tasks
]
if bad_edges:
probs = ", ".join(
f"{l} depends on {r} as {n} but it has been removed"
for l, r, n in bad_edges
)
raise Exception("Optimization error: " + probs)
# fill in label_to_taskid for anything not removed or replaced
assert replaced_tasks <= set(label_to_taskid)
for label in sorted(
target_task_graph.graph.nodes - removed_tasks - set(label_to_taskid)
):
label_to_taskid[label] = slugid()
# resolve labels to taskIds and populate task['dependencies']
tasks_by_taskid = {}
named_links_dict = target_task_graph.graph.named_links_dict()
omit = removed_tasks | replaced_tasks
for label, task in target_task_graph.tasks.items():
if label in omit:
continue
task.task_id = label_to_taskid[label]
named_task_dependencies = {
name: label_to_taskid[label]
for name, label in named_links_dict.get(label, {}).items()
}
# Add remaining soft dependencies
if task.soft_dependencies:
named_task_dependencies.update(
{
label: label_to_taskid[label]
for label in task.soft_dependencies
if label in label_to_taskid and label not in omit
}
)
task.task = resolve_task_references(
task.label,
task.task,
task_id=task.task_id,
decision_task_id=decision_task_id,
dependencies=named_task_dependencies,
)
deps = task.task.setdefault("dependencies", [])
deps.extend(sorted(named_task_dependencies.values()))
tasks_by_taskid[task.task_id] = task
# resolve edges to taskIds
edges_by_taskid = (
(label_to_taskid.get(left), label_to_taskid.get(right), name)
for (left, right, name) in target_task_graph.graph.edges
)
# ..and drop edges that are no longer entirely in the task graph
# (note that this omits edges to replaced tasks, but they are still in task.dependnecies)
edges_by_taskid = {
(left, right, name)
for (left, right, name) in edges_by_taskid
if left in tasks_by_taskid and right in tasks_by_taskid
}
return TaskGraph(tasks_by_taskid, Graph(set(tasks_by_taskid), edges_by_taskid))
class OptimizationStrategy:
def should_remove_task(self, task, params, arg):
"""Determine whether to optimize this task by removing it. Returns
True to remove."""
return False
def should_replace_task(self, task, params, arg):
"""Determine whether to optimize this task by replacing it. Returns a
taskId to replace this task, True to replace with nothing, or False to
keep the task."""
return False
class Either(OptimizationStrategy):
"""Given one or more optimization strategies, remove a task if any of them
says to, and replace with a task if any finds a replacement (preferring the
earliest). By default, each substrategy gets the same arg, but split_args
can return a list of args for each strategy, if desired."""
def __init__(self, *substrategies, **kwargs):
self.substrategies = substrategies
self.split_args = kwargs.pop("split_args", None)
if not self.split_args:
self.split_args = lambda arg: [arg] * len(substrategies)
if kwargs:
raise TypeError("unexpected keyword args")
def _for_substrategies(self, arg, fn):
for sub, arg in zip(self.substrategies, self.split_args(arg)):
rv = fn(sub, arg)
if rv:
return rv
return False
def should_remove_task(self, task, params, arg):
return self._for_substrategies(
arg, lambda sub, arg: sub.should_remove_task(task, params, arg)
)
def should_replace_task(self, task, params, arg):
return self._for_substrategies(
arg, lambda sub, arg: sub.should_replace_task(task, params, arg)
)
class IndexSearch(OptimizationStrategy):
# A task with no dependencies remaining after optimization will be replaced
# if artifacts exist for the corresponding index_paths.
# Otherwise, we're in one of the following cases:
# - the task has un-optimized dependencies
# - the artifacts have expired
# - some changes altered the index_paths and new artifacts need to be
# created.
# In every of those cases, we need to run the task to create or refresh
# artifacts.
def should_replace_task(self, task, params, index_paths):
"Look for a task with one of the given index paths"
for index_path in index_paths:
try:
task_id = find_task_id(
index_path, use_proxy=bool(os.environ.get("TASK_ID"))
)
return task_id
except KeyError:
# 404 will end up here and go on to the next index path
pass
return False
class SkipUnlessChanged(OptimizationStrategy):
def should_remove_task(self, task, params, file_patterns):
if params.get("repository_type") != "hg":
raise RuntimeError(
"SkipUnlessChanged optimization only works with mercurial repositories"
)
# pushlog_id == -1 - this is the case when run from a cron.yml job
if params.get("pushlog_id") == -1:
return False
changed = files_changed.check(params, file_patterns)
if not changed:
logger.debug(
'no files found matching a pattern in `skip-unless-changed` for "{}"'.format(
task.label
)
)
return True
return False

View File

@ -0,0 +1,305 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import hashlib
import json
import os
import time
from datetime import datetime
from pprint import pformat
from urllib.parse import urlparse
from urllib.request import urlopen
from taskgraph.util.memoize import memoize
from taskgraph.util.readonlydict import ReadOnlyDict
from taskgraph.util.schema import validate_schema
from taskgraph.util.vcs import get_repository
from voluptuous import (
ALLOW_EXTRA,
Required,
Optional,
Schema,
)
class ParameterMismatch(Exception):
"""Raised when a parameters.yml has extra or missing parameters."""
@memoize
def _repo():
return get_repository(os.getcwd())
# Please keep this list sorted and in sync with taskcluster/docs/parameters.rst
base_schema = Schema(
{
Required("base_repository"): str,
Required("build_date"): int,
Required("do_not_optimize"): [str],
Required("existing_tasks"): {str: str},
Required("filters"): [str],
Required("head_ref"): str,
Required("head_repository"): str,
Required("head_rev"): str,
Required("head_tag"): str,
Required("level"): str,
Required("moz_build_date"): str,
Required("optimize_target_tasks"): bool,
Required("owner"): str,
Required("project"): str,
Required("pushdate"): int,
Required("pushlog_id"): str,
Required("repository_type"): str,
# target-kind is not included, since it should never be
# used at run-time
Required("target_tasks_method"): str,
Required("tasks_for"): str,
Optional("code-review"): {
Required("phabricator-build-target"): str,
},
}
)
def extend_parameters_schema(schema):
"""
Extend the schema for parameters to include per-project configuration.
This should be called by the `taskgraph.register` function in the
graph-configuration.
"""
global base_schema
base_schema = base_schema.extend(schema)
class Parameters(ReadOnlyDict):
"""An immutable dictionary with nicer KeyError messages on failure"""
def __init__(self, strict=True, **kwargs):
self.strict = strict
self.spec = kwargs.pop("spec", None)
self._id = None
if not self.strict:
# apply defaults to missing parameters
kwargs = Parameters._fill_defaults(**kwargs)
ReadOnlyDict.__init__(self, **kwargs)
@property
def id(self):
if not self._id:
self._id = hashlib.sha256(
json.dumps(self, sort_keys=True).encode("utf-8")
).hexdigest()[:12]
return self._id
@staticmethod
def format_spec(spec):
"""
Get a friendly identifier from a parameters specifier.
Args:
spec (str): Parameters specifier.
Returns:
str: Name to identify parameters by.
"""
if spec is None:
return "defaults"
if any(spec.startswith(s) for s in ("task-id=", "project=")):
return spec
result = urlparse(spec)
if result.scheme in ("http", "https"):
spec = result.path
return os.path.splitext(os.path.basename(spec))[0]
@staticmethod
def _fill_defaults(**kwargs):
defaults = {
"base_repository": _repo().get_url(),
"build_date": int(time.time()),
"do_not_optimize": [],
"existing_tasks": {},
"filters": ["target_tasks_method"],
"head_ref": _repo().head_ref,
"head_repository": _repo().get_url(),
"head_rev": _repo().head_ref,
"head_tag": "",
"level": "3",
"moz_build_date": datetime.now().strftime("%Y%m%d%H%M%S"),
"optimize_target_tasks": True,
"owner": "nobody@mozilla.com",
"project": _repo().get_url().rsplit("/", 1)[1],
"pushdate": int(time.time()),
"pushlog_id": "0",
"repository_type": _repo().tool,
"target_tasks_method": "default",
"tasks_for": "",
}
for name, default in defaults.items():
if name not in kwargs:
kwargs[name] = default
return kwargs
def check(self):
schema = (
base_schema if self.strict else base_schema.extend({}, extra=ALLOW_EXTRA)
)
try:
validate_schema(schema, self.copy(), "Invalid parameters:")
except Exception as e:
raise ParameterMismatch(str(e))
def __getitem__(self, k):
try:
return super().__getitem__(k)
except KeyError:
raise KeyError(f"taskgraph parameter {k!r} not found")
def is_try(self):
"""
Determine whether this graph is being built on a try project or for
`mach try fuzzy`.
"""
return "try" in self["project"] or self["tasks_for"] == "github-pull-request"
@property
def moz_build_date(self):
# XXX self["moz_build_date"] is left as a string because:
# * of backward compatibility
# * parameters are output in a YAML file
return datetime.strptime(self["moz_build_date"], "%Y%m%d%H%M%S")
def file_url(self, path, pretty=False):
"""
Determine the VCS URL for viewing a file in the tree, suitable for
viewing by a human.
:param str path: The path, relative to the root of the repository.
:param bool pretty: Whether to return a link to a formatted version of the
file, or the raw file version.
:return str: The URL displaying the given path.
"""
if self["repository_type"] == "hg":
if path.startswith("comm/"):
path = path[len("comm/") :]
repo = self["comm_head_repository"]
rev = self["comm_head_rev"]
else:
repo = self["head_repository"]
rev = self["head_rev"]
endpoint = "file" if pretty else "raw-file"
return f"{repo}/{endpoint}/{rev}/{path}"
elif self["repository_type"] == "git":
# For getting the file URL for git repositories, we only support a Github HTTPS remote
repo = self["head_repository"]
if repo.startswith("https://github.com/"):
if repo.endswith("/"):
repo = repo[:-1]
rev = self["head_rev"]
endpoint = "blob" if pretty else "raw"
return f"{repo}/{endpoint}/{rev}/{path}"
elif repo.startswith("git@github.com:"):
if repo.endswith(".git"):
repo = repo[:-4]
rev = self["head_rev"]
endpoint = "blob" if pretty else "raw"
return "{}/{}/{}/{}".format(
repo.replace("git@github.com:", "https://github.com/"),
endpoint,
rev,
path,
)
else:
raise ParameterMismatch(
"Don't know how to determine file URL for non-github"
"repo: {}".format(repo)
)
else:
raise RuntimeError(
'Only the "git" and "hg" repository types are supported for using file_url()'
)
def __str__(self):
return f"Parameters(id={self.id}) (from {self.format_spec(self.spec)})"
def __repr__(self):
return pformat(dict(self), indent=2)
def load_parameters_file(spec, strict=True, overrides=None, trust_domain=None):
"""
Load parameters from a path, url, decision task-id or project.
Examples:
task-id=fdtgsD5DQUmAQZEaGMvQ4Q
project=mozilla-central
"""
from taskgraph.util.taskcluster import get_artifact_url, find_task_id
from taskgraph.util import yaml
if overrides is None:
overrides = {}
overrides["spec"] = spec
if not spec:
return Parameters(strict=strict, **overrides)
try:
# reading parameters from a local parameters.yml file
f = open(spec)
except OSError:
# fetching parameters.yml using task task-id, project or supplied url
task_id = None
if spec.startswith("task-id="):
task_id = spec.split("=")[1]
elif spec.startswith("project="):
if trust_domain is None:
raise ValueError(
"Can't specify parameters by project "
"if trust domain isn't supplied.",
)
index = "{trust_domain}.v2.{project}.latest.taskgraph.decision".format(
trust_domain=trust_domain,
project=spec.split("=")[1],
)
task_id = find_task_id(index)
if task_id:
spec = get_artifact_url(task_id, "public/parameters.yml")
f = urlopen(spec)
if spec.endswith(".yml"):
kwargs = yaml.load_stream(f)
elif spec.endswith(".json"):
kwargs = json.load(f)
else:
raise TypeError(f"Parameters file `{spec}` is not JSON or YAML")
kwargs.update(overrides)
return Parameters(strict=strict, **kwargs)
def parameters_loader(spec, strict=True, overrides=None):
def get_parameters(graph_config):
parameters = load_parameters_file(
spec,
strict=strict,
overrides=overrides,
trust_domain=graph_config["trust-domain"],
)
parameters.check()
return parameters
return get_parameters

View File

@ -0,0 +1,675 @@
#!/usr/bin/python3 -u
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import argparse
import bz2
import concurrent.futures
import contextlib
import datetime
import gzip
import hashlib
import json
import lzma
import multiprocessing
import os
import pathlib
import random
import stat
import subprocess
import sys
import tarfile
import tempfile
import time
import urllib.parse
import urllib.request
import zipfile
try:
import zstandard
except ImportError:
zstandard = None
CONCURRENCY = multiprocessing.cpu_count()
def log(msg):
print(msg, file=sys.stderr)
sys.stderr.flush()
class IntegrityError(Exception):
"""Represents an integrity error when downloading a URL."""
def ZstdCompressor(*args, **kwargs):
if not zstandard:
raise ValueError('zstandard Python package not available')
return zstandard.ZstdCompressor(*args, **kwargs)
def ZstdDecompressor(*args, **kwargs):
if not zstandard:
raise ValueError('zstandard Python package not available')
return zstandard.ZstdDecompressor(*args, **kwargs)
@contextlib.contextmanager
def rename_after_close(fname, *args, **kwargs):
"""
Context manager that opens a temporary file to use as a writer,
and closes the file on context exit, renaming it to the expected
file name in case of success, or removing it in case of failure.
Takes the same options as open(), but must be used as a context
manager.
"""
path = pathlib.Path(fname)
tmp = path.with_name('%s.tmp' % path.name)
try:
with tmp.open(*args, **kwargs) as fh:
yield fh
except Exception:
tmp.unlink()
raise
else:
tmp.rename(fname)
# The following is copied from
# https://github.com/mozilla-releng/redo/blob/6d07678a014e0c525e54a860381a165d34db10ff/redo/__init__.py#L15-L85
def retrier(attempts=5, sleeptime=10, max_sleeptime=300, sleepscale=1.5, jitter=1):
"""
A generator function that sleeps between retries, handles exponential
backoff and jitter. The action you are retrying is meant to run after
retrier yields.
At each iteration, we sleep for sleeptime + random.randint(-jitter, jitter).
Afterwards sleeptime is multiplied by sleepscale for the next iteration.
Args:
attempts (int): maximum number of times to try; defaults to 5
sleeptime (float): how many seconds to sleep between tries; defaults to
60s (one minute)
max_sleeptime (float): the longest we'll sleep, in seconds; defaults to
300s (five minutes)
sleepscale (float): how much to multiply the sleep time by each
iteration; defaults to 1.5
jitter (int): random jitter to introduce to sleep time each iteration.
the amount is chosen at random between [-jitter, +jitter]
defaults to 1
Yields:
None, a maximum of `attempts` number of times
Example:
>>> n = 0
>>> for _ in retrier(sleeptime=0, jitter=0):
... if n == 3:
... # We did the thing!
... break
... n += 1
>>> n
3
>>> n = 0
>>> for _ in retrier(sleeptime=0, jitter=0):
... if n == 6:
... # We did the thing!
... break
... n += 1
... else:
... print("max tries hit")
max tries hit
"""
jitter = jitter or 0 # py35 barfs on the next line if jitter is None
if jitter > sleeptime:
# To prevent negative sleep times
raise Exception('jitter ({}) must be less than sleep time ({})'.format(jitter, sleeptime))
sleeptime_real = sleeptime
for _ in range(attempts):
log("attempt %i/%i" % (_ + 1, attempts))
yield sleeptime_real
if jitter:
sleeptime_real = sleeptime + random.randint(-jitter, jitter)
# our jitter should scale along with the sleeptime
jitter = int(jitter * sleepscale)
else:
sleeptime_real = sleeptime
sleeptime *= sleepscale
if sleeptime_real > max_sleeptime:
sleeptime_real = max_sleeptime
# Don't need to sleep the last time
if _ < attempts - 1:
log("sleeping for %.2fs (attempt %i/%i)" % (sleeptime_real, _ + 1, attempts))
time.sleep(sleeptime_real)
def stream_download(url, sha256=None, size=None):
"""Download a URL to a generator, optionally with content verification.
If ``sha256`` or ``size`` are defined, the downloaded URL will be
validated against those requirements and ``IntegrityError`` will be
raised if expectations do not match.
Because verification cannot occur until the file is completely downloaded
it is recommended for consumers to not do anything meaningful with the
data if content verification is being used. To securely handle retrieved
content, it should be streamed to a file or memory and only operated
on after the generator is exhausted without raising.
"""
log('Downloading %s' % url)
h = hashlib.sha256()
length = 0
t0 = time.time()
with urllib.request.urlopen(url) as fh:
if not url.endswith('.gz') and fh.info().get('Content-Encoding') == 'gzip':
fh = gzip.GzipFile(fileobj=fh)
while True:
chunk = fh.read(65536)
if not chunk:
break
h.update(chunk)
length += len(chunk)
yield chunk
duration = time.time() - t0
digest = h.hexdigest()
log('%s resolved to %d bytes with sha256 %s in %.3fs' % (
url, length, digest, duration))
if size:
if size == length:
log('Verified size of %s' % url)
else:
raise IntegrityError('size mismatch on %s: wanted %d; got %d' % (
url, size, length))
if sha256:
if digest == sha256:
log('Verified sha256 integrity of %s' % url)
else:
raise IntegrityError('sha256 mismatch on %s: wanted %s; got %s' % (
url, sha256, digest))
def download_to_path(url, path, sha256=None, size=None):
"""Download a URL to a filesystem path, possibly with verification."""
# We download to a temporary file and rename at the end so there's
# no chance of the final file being partially written or containing
# bad data.
try:
path.unlink()
except FileNotFoundError:
pass
for _ in retrier(attempts=5, sleeptime=60):
try:
log('Downloading %s to %s' % (url, path))
with rename_after_close(path, 'wb') as fh:
for chunk in stream_download(url, sha256=sha256, size=size):
fh.write(chunk)
return
except IntegrityError:
raise
except Exception as e:
log("Download failed: {}".format(e))
continue
raise Exception("Download failed, no more retries!")
def gpg_verify_path(path: pathlib.Path, public_key_data: bytes,
signature_data: bytes):
"""Verify that a filesystem path verifies using GPG.
Takes a Path defining a file to verify. ``public_key_data`` contains
bytes with GPG public key data. ``signature_data`` contains a signed
GPG document to use with ``gpg --verify``.
"""
log('Validating GPG signature of %s' % path)
log('GPG key data:\n%s' % public_key_data.decode('ascii'))
with tempfile.TemporaryDirectory() as td:
try:
# --batch since we're running unattended.
gpg_args = ['gpg', '--homedir', td, '--batch']
log('Importing GPG key...')
subprocess.run(gpg_args + ['--import'],
input=public_key_data,
check=True)
log('Verifying GPG signature...')
subprocess.run(gpg_args + ['--verify', '-', '%s' % path],
input=signature_data,
check=True)
log('GPG signature verified!')
finally:
# There is a race between the agent self-terminating and
# shutil.rmtree() from the temporary directory cleanup that can
# lead to exceptions. Kill the agent before cleanup to prevent this.
env = dict(os.environ)
env['GNUPGHOME'] = td
subprocess.run(['gpgconf', '--kill', 'gpg-agent'], env=env)
def open_tar_stream(path: pathlib.Path):
""""""
if path.suffix == '.bz2':
return bz2.open(str(path), 'rb')
elif path.suffix == '.gz':
return gzip.open(str(path), 'rb')
elif path.suffix == '.xz':
return lzma.open(str(path), 'rb')
elif path.suffix == '.zst':
dctx = ZstdDecompressor()
return dctx.stream_reader(path.open('rb'))
elif path.suffix == '.tar':
return path.open('rb')
else:
raise ValueError('unknown archive format for tar file: %s' % path)
def archive_type(path: pathlib.Path):
"""Attempt to identify a path as an extractable archive."""
if path.suffixes[-2:-1] == ['.tar']:
return 'tar'
elif path.suffix == '.zip':
return 'zip'
else:
return None
def extract_archive(path, dest_dir, typ):
"""Extract an archive to a destination directory."""
# Resolve paths to absolute variants.
path = path.resolve()
dest_dir = dest_dir.resolve()
log('Extracting %s to %s' % (path, dest_dir))
t0 = time.time()
# We pipe input to the decompressor program so that we can apply
# custom decompressors that the program may not know about.
if typ == 'tar':
ifh = open_tar_stream(path)
# On Windows, the tar program doesn't support things like symbolic
# links, while Windows actually support them. The tarfile module in
# python does. So use that. But since it's significantly slower than
# the tar program on Linux, only use tarfile on Windows (tarfile is
# also not much slower on Windows, presumably because of the
# notoriously bad I/O).
if sys.platform == 'win32':
tar = tarfile.open(fileobj=ifh, mode='r|')
tar.extractall(str(dest_dir))
args = []
else:
args = ['tar', 'xf', '-']
pipe_stdin = True
elif typ == 'zip':
# unzip from stdin has wonky behavior. We don't use a pipe for it.
ifh = open(os.devnull, 'rb')
args = ['unzip', '-o', str(path)]
pipe_stdin = False
else:
raise ValueError('unknown archive format: %s' % path)
if args:
with ifh, subprocess.Popen(args, cwd=str(dest_dir), bufsize=0,
stdin=subprocess.PIPE) as p:
while True:
if not pipe_stdin:
break
chunk = ifh.read(131072)
if not chunk:
break
p.stdin.write(chunk)
if p.returncode:
raise Exception('%r exited %d' % (args, p.returncode))
log('%s extracted in %.3fs' % (path, time.time() - t0))
def repack_archive(orig: pathlib.Path, dest: pathlib.Path,
strip_components=0, prefix=''):
assert orig != dest
log('Repacking as %s' % dest)
orig_typ = archive_type(orig)
typ = archive_type(dest)
if not orig_typ:
raise Exception('Archive type not supported for %s' % orig.name)
if not typ:
raise Exception('Archive type not supported for %s' % dest.name)
if dest.suffixes[-2:] != ['.tar', '.zst']:
raise Exception('Only producing .tar.zst archives is supported.')
if strip_components or prefix:
def filter(name):
if strip_components:
stripped = '/'.join(name.split('/')[strip_components:])
if not stripped:
raise Exception(
'Stripping %d components would remove files'
% strip_components)
name = stripped
return prefix + name
else:
filter = None
with rename_after_close(dest, 'wb') as fh:
ctx = ZstdCompressor()
if orig_typ == 'zip':
assert typ == 'tar'
zip = zipfile.ZipFile(orig)
# Convert the zip stream to a tar on the fly.
with ctx.stream_writer(fh) as compressor, \
tarfile.open(fileobj=compressor, mode='w:') as tar:
for zipinfo in zip.infolist():
if zipinfo.is_dir():
continue
tarinfo = tarfile.TarInfo()
filename = zipinfo.filename
tarinfo.name = filter(filename) if filter else filename
tarinfo.size = zipinfo.file_size
# Zip files don't have any knowledge of the timezone
# they were created in. Which is not really convenient to
# reliably convert to a timestamp. But we don't really
# care about accuracy, but rather about reproducibility,
# so we pick UTC.
time = datetime.datetime(
*zipinfo.date_time, tzinfo=datetime.timezone.utc)
tarinfo.mtime = time.timestamp()
# 0 is MS-DOS, 3 is UNIX. Only in the latter case do we
# get anything useful for the tar file mode.
if zipinfo.create_system == 3:
mode = zipinfo.external_attr >> 16
else:
mode = 0o0644
tarinfo.mode = stat.S_IMODE(mode)
if stat.S_ISLNK(mode):
tarinfo.type = tarfile.SYMTYPE
tarinfo.linkname = zip.read(filename).decode()
tar.addfile(tarinfo, zip.open(filename))
elif stat.S_ISREG(mode) or stat.S_IFMT(mode) == 0:
tar.addfile(tarinfo, zip.open(filename))
else:
raise Exception('Unsupported file mode %o'
% stat.S_IFMT(mode))
elif orig_typ == 'tar':
if typ == 'zip':
raise Exception('Repacking a tar to zip is not supported')
assert typ == 'tar'
ifh = open_tar_stream(orig)
if filter:
# To apply the filter, we need to open the tar stream and
# tweak it.
origtar = tarfile.open(fileobj=ifh, mode='r|')
with ctx.stream_writer(fh) as compressor, \
tarfile.open(fileobj=compressor, mode='w:') as tar:
for tarinfo in origtar:
if tarinfo.isdir():
continue
tarinfo.name = filter(tarinfo.name)
tar.addfile(tarinfo, origtar.extractfile(tarinfo))
else:
# We only change compression here. The tar stream is unchanged.
ctx.copy_stream(ifh, fh)
def fetch_and_extract(url, dest_dir, extract=True, sha256=None, size=None):
"""Fetch a URL and extract it to a destination path.
If the downloaded URL is an archive, it is extracted automatically
and the archive is deleted. Otherwise the file remains in place in
the destination directory.
"""
basename = urllib.parse.urlparse(url).path.split('/')[-1]
dest_path = dest_dir / basename
download_to_path(url, dest_path, sha256=sha256, size=size)
if not extract:
return
typ = archive_type(dest_path)
if typ:
extract_archive(dest_path, dest_dir, typ)
log('Removing %s' % dest_path)
dest_path.unlink()
def fetch_urls(downloads):
"""Fetch URLs pairs to a pathlib.Path."""
with concurrent.futures.ThreadPoolExecutor(CONCURRENCY) as e:
fs = []
for download in downloads:
fs.append(e.submit(fetch_and_extract, *download))
for f in fs:
f.result()
def git_checkout_archive(dest_path: pathlib.Path, repo: str, commit: str,
prefix=None):
"""Produce an archive of the files comprising a Git checkout."""
dest_path.parent.mkdir(parents=True, exist_ok=True)
if dest_path.suffixes[-2:] != ['.tar', '.zst']:
raise Exception('Only producing .tar.zst archives is supported.')
with tempfile.TemporaryDirectory() as td:
temp_dir = pathlib.Path(td)
if not prefix:
prefix = repo.rstrip('/').rsplit('/', 1)[-1]
git_dir = temp_dir / prefix
# This could be faster with a shallow clone. However, Git requires a ref
# to initiate a clone. Since the commit-ish may not refer to a ref, we
# simply perform a full clone followed by a checkout.
print('cloning %s to %s' % (repo, git_dir))
subprocess.run(['git', 'clone', '--recurse-submodules', repo, str(git_dir)],
check=True)
subprocess.run(['git', 'checkout', '--recurse-submodules', commit],
cwd=str(git_dir), check=True)
print('creating archive %s of commit %s' % (dest_path, commit))
proc = subprocess.Popen([
'tar', 'cf', '-', '--exclude=.git', '-C', str(temp_dir), prefix,
], stdout=subprocess.PIPE)
with rename_after_close(dest_path, 'wb') as out:
ctx = ZstdCompressor()
ctx.copy_stream(proc.stdout, out)
proc.wait()
def command_git_checkout_archive(args):
dest = pathlib.Path(args.dest)
try:
git_checkout_archive(dest, args.repo, args.commit,
prefix=args.path_prefix)
except Exception:
try:
dest.unlink()
except FileNotFoundError:
pass
raise
def command_static_url(args):
gpg_sig_url = args.gpg_sig_url
gpg_env_key = args.gpg_key_env
if bool(gpg_sig_url) != bool(gpg_env_key):
print('--gpg-sig-url and --gpg-key-env must both be defined')
return 1
if gpg_sig_url:
gpg_signature = b''.join(stream_download(gpg_sig_url))
gpg_key = os.environb[gpg_env_key.encode('ascii')]
dest = pathlib.Path(args.dest)
dest.parent.mkdir(parents=True, exist_ok=True)
basename = urllib.parse.urlparse(args.url).path.split('/')[-1]
if basename.endswith(''.join(dest.suffixes)):
dl_dest = dest
else:
dl_dest = dest.parent / basename
try:
download_to_path(args.url, dl_dest, sha256=args.sha256, size=args.size)
if gpg_sig_url:
gpg_verify_path(dl_dest, gpg_key, gpg_signature)
if dl_dest != dest or args.strip_components or args.add_prefix:
repack_archive(dl_dest, dest, args.strip_components, args.add_prefix)
except Exception:
try:
dl_dest.unlink()
except FileNotFoundError:
pass
raise
if dl_dest != dest:
log('Removing %s' % dl_dest)
dl_dest.unlink()
def api(root_url, service, version, path):
# taskcluster-lib-urls is not available when this script runs, so
# simulate its behavior:
if root_url == 'https://taskcluster.net':
return 'https://{service}.taskcluster.net/{version}/{path}'.format(
service=service, version=version, path=path)
return '{root_url}/api/{service}/{version}/{path}'.format(
root_url=root_url, service=service, version=version, path=path)
def command_task_artifacts(args):
start = time.monotonic()
fetches = json.loads(os.environ['MOZ_FETCHES'])
downloads = []
for fetch in fetches:
extdir = pathlib.Path(args.dest)
if 'dest' in fetch:
extdir = extdir.joinpath(fetch['dest'])
extdir.mkdir(parents=True, exist_ok=True)
root_url = os.environ['TASKCLUSTER_ROOT_URL']
if fetch['artifact'].startswith('public/'):
path = 'task/{task}/artifacts/{artifact}'.format(
task=fetch['task'], artifact=fetch['artifact'])
url = api(root_url, 'queue', 'v1', path)
else:
url = ('{proxy_url}/api/queue/v1/task/{task}/artifacts/{artifact}').format(
proxy_url=os.environ['TASKCLUSTER_PROXY_URL'],
task=fetch['task'],
artifact=fetch['artifact'])
downloads.append((url, extdir, fetch['extract']))
fetch_urls(downloads)
end = time.monotonic()
perfherder_data = {
'framework': {'name': 'build_metrics'},
'suites': [{
'name': 'fetch_content',
'value': end - start,
'lowerIsBetter': True,
'shouldAlert': False,
'subtests': [],
}],
}
print('PERFHERDER_DATA: {}'.format(json.dumps(perfherder_data)), file=sys.stderr)
def main():
parser = argparse.ArgumentParser()
subparsers = parser.add_subparsers(title='sub commands')
git_checkout = subparsers.add_parser(
'git-checkout-archive',
help='Obtain an archive of files from a Git repository checkout')
git_checkout.set_defaults(func=command_git_checkout_archive)
git_checkout.add_argument('--path-prefix',
help='Prefix for paths in produced archive')
git_checkout.add_argument('repo',
help='URL to Git repository to be cloned')
git_checkout.add_argument('commit',
help='Git commit to check out')
git_checkout.add_argument('dest',
help='Destination path of archive')
url = subparsers.add_parser('static-url', help='Download a static URL')
url.set_defaults(func=command_static_url)
url.add_argument('--sha256', required=True,
help='SHA-256 of downloaded content')
url.add_argument('--size', required=True, type=int,
help='Size of downloaded content, in bytes')
url.add_argument('--gpg-sig-url',
help='URL containing signed GPG document validating '
'URL to fetch')
url.add_argument('--gpg-key-env',
help='Environment variable containing GPG key to validate')
url.add_argument('--strip-components', type=int, default=0,
help='Number of leading components to strip from file '
'names in the downloaded archive')
url.add_argument('--add-prefix', default='',
help='Prefix to add to file names in the downloaded '
'archive')
url.add_argument('url', help='URL to fetch')
url.add_argument('dest', help='Destination path')
artifacts = subparsers.add_parser('task-artifacts',
help='Fetch task artifacts')
artifacts.set_defaults(func=command_task_artifacts)
artifacts.add_argument('-d', '--dest', default=os.environ.get('MOZ_FETCHES_DIR'),
help='Destination directory which will contain all '
'artifacts (defaults to $MOZ_FETCHES_DIR)')
args = parser.parse_args()
if not args.dest:
parser.error('no destination directory specified, either pass in --dest '
'or set $MOZ_FETCHES_DIR')
return args.func(args)
if __name__ == '__main__':
sys.exit(main())

View File

@ -0,0 +1,33 @@
# By default the progress bar starts after 3s and updates every 0.1s. We
# change this so it shows and updates every 1.0s.
# We also tell progress to assume a TTY is present so updates are printed
# even if there is no known TTY.
[progress]
delay = 1.0
refresh = 1.0
assume-tty = true
[extensions]
share =
sparse =
robustcheckout = /usr/local/mercurial/robustcheckout.py
[hostsecurity]
# When running a modern Python, Mercurial will default to TLS 1.1+.
# When running on a legacy Python, Mercurial will default to TLS 1.0+.
# There is no good reason we shouldn't be running a modern Python
# capable of speaking TLS 1.2. And the only Mercurial servers we care
# about should be running TLS 1.2. So make TLS 1.2 the minimum.
minimumprotocol = tls1.2
# Settings to make 1-click loaners more useful.
[extensions]
histedit =
rebase =
[diff]
git = 1
showfunc = 1
[pager]
pager = LESS=FRSXQ less

View File

@ -0,0 +1,714 @@
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
"""Robustly perform a checkout.
This extension provides the ``hg robustcheckout`` command for
ensuring a working directory is updated to the specified revision
from a source repo using best practices to ensure optimal clone
times and storage efficiency.
"""
import contextlib
import json
import os
import random
import re
import socket
import ssl
import time
from mercurial.i18n import _
from mercurial.node import hex, nullid
from mercurial import (
commands,
configitems,
error,
exchange,
extensions,
hg,
match as matchmod,
pycompat,
registrar,
scmutil,
urllibcompat,
util,
vfs,
)
# Causes worker to purge caches on process exit and for task to retry.
EXIT_PURGE_CACHE = 72
testedwith = b'4.5 4.6 4.7 4.8 4.9 5.0 5.1 5.2 5.3 5.4 5.5'
minimumhgversion = b'4.5'
cmdtable = {}
command = registrar.command(cmdtable)
configtable = {}
configitem = registrar.configitem(configtable)
configitem(b'robustcheckout', b'retryjittermin', default=configitems.dynamicdefault)
configitem(b'robustcheckout', b'retryjittermax', default=configitems.dynamicdefault)
def getsparse():
from mercurial import sparse
return sparse
def peerlookup(remote, v):
# TRACKING hg46 4.6 added commandexecutor API.
if util.safehasattr(remote, 'commandexecutor'):
with remote.commandexecutor() as e:
return e.callcommand(b'lookup', {b'key': v}).result()
else:
return remote.lookup(v)
@command(b'robustcheckout', [
(b'', b'upstream', b'', b'URL of upstream repo to clone from'),
(b'r', b'revision', b'', b'Revision to check out'),
(b'b', b'branch', b'', b'Branch to check out'),
(b'', b'purge', False, b'Whether to purge the working directory'),
(b'', b'sharebase', b'', b'Directory where shared repos should be placed'),
(b'', b'networkattempts', 3, b'Maximum number of attempts for network '
b'operations'),
(b'', b'sparseprofile', b'', b'Sparse checkout profile to use (path in repo)'),
(b'U', b'noupdate', False, b'the clone will include an empty working directory\n'
b'(only a repository)'),
],
b'[OPTION]... URL DEST',
norepo=True)
def robustcheckout(ui, url, dest, upstream=None, revision=None, branch=None,
purge=False, sharebase=None, networkattempts=None,
sparseprofile=None, noupdate=False):
"""Ensure a working copy has the specified revision checked out.
Repository data is automatically pooled into the common directory
specified by ``--sharebase``, which is a required argument. It is required
because pooling storage prevents excessive cloning, which makes operations
complete faster.
One of ``--revision`` or ``--branch`` must be specified. ``--revision``
is preferred, as it is deterministic and there is no ambiguity as to which
revision will actually be checked out.
If ``--upstream`` is used, the repo at that URL is used to perform the
initial clone instead of cloning from the repo where the desired revision
is located.
``--purge`` controls whether to removed untracked and ignored files from
the working directory. If used, the end state of the working directory
should only contain files explicitly under version control for the requested
revision.
``--sparseprofile`` can be used to specify a sparse checkout profile to use.
The sparse checkout profile corresponds to a file in the revision to be
checked out. If a previous sparse profile or config is present, it will be
replaced by this sparse profile. We choose not to "widen" the sparse config
so operations are as deterministic as possible. If an existing checkout
is present and it isn't using a sparse checkout, we error. This is to
prevent accidentally enabling sparse on a repository that may have
clients that aren't sparse aware. Sparse checkout support requires Mercurial
4.3 or newer and the ``sparse`` extension must be enabled.
"""
if not revision and not branch:
raise error.Abort(b'must specify one of --revision or --branch')
if revision and branch:
raise error.Abort(b'cannot specify both --revision and --branch')
# Require revision to look like a SHA-1.
if revision:
if len(revision) < 12 or len(revision) > 40 or not re.match(b'^[a-f0-9]+$', revision):
raise error.Abort(b'--revision must be a SHA-1 fragment 12-40 '
b'characters long')
sharebase = sharebase or ui.config(b'share', b'pool')
if not sharebase:
raise error.Abort(b'share base directory not defined; refusing to operate',
hint=b'define share.pool config option or pass --sharebase')
# Sparse profile support was added in Mercurial 4.3, where it was highly
# experimental. Because of the fragility of it, we only support sparse
# profiles on 4.3. When 4.4 is released, we'll need to opt in to sparse
# support. We /could/ silently fall back to non-sparse when not supported.
# However, given that sparse has performance implications, we want to fail
# fast if we can't satisfy the desired checkout request.
if sparseprofile:
try:
extensions.find(b'sparse')
except KeyError:
raise error.Abort(b'sparse extension must be enabled to use '
b'--sparseprofile')
ui.warn(b'(using Mercurial %s)\n' % util.version())
# worker.backgroundclose only makes things faster if running anti-virus,
# which our automation doesn't. Disable it.
ui.setconfig(b'worker', b'backgroundclose', False)
# By default the progress bar starts after 3s and updates every 0.1s. We
# change this so it shows and updates every 1.0s.
# We also tell progress to assume a TTY is present so updates are printed
# even if there is no known TTY.
# We make the config change here instead of in a config file because
# otherwise we're at the whim of whatever configs are used in automation.
ui.setconfig(b'progress', b'delay', 1.0)
ui.setconfig(b'progress', b'refresh', 1.0)
ui.setconfig(b'progress', b'assume-tty', True)
sharebase = os.path.realpath(sharebase)
optimes = []
behaviors = set()
start = time.time()
try:
return _docheckout(ui, url, dest, upstream, revision, branch, purge,
sharebase, optimes, behaviors, networkattempts,
sparse_profile=sparseprofile, noupdate=noupdate)
finally:
overall = time.time() - start
# We store the overall time multiple ways in order to help differentiate
# the various "flavors" of operations.
# ``overall`` is always the total operation time.
optimes.append(('overall', overall))
def record_op(name):
# If special behaviors due to "corrupt" storage occur, we vary the
# name to convey that.
if 'remove-store' in behaviors:
name += '_rmstore'
if 'remove-wdir' in behaviors:
name += '_rmwdir'
optimes.append((name, overall))
# We break out overall operations primarily by their network interaction
# We have variants within for working directory operations.
if 'clone' in behaviors and 'create-store' in behaviors:
record_op('overall_clone')
if 'sparse-update' in behaviors:
record_op('overall_clone_sparsecheckout')
else:
record_op('overall_clone_fullcheckout')
elif 'pull' in behaviors or 'clone' in behaviors:
record_op('overall_pull')
if 'sparse-update' in behaviors:
record_op('overall_pull_sparsecheckout')
else:
record_op('overall_pull_fullcheckout')
if 'empty-wdir' in behaviors:
record_op('overall_pull_emptywdir')
else:
record_op('overall_pull_populatedwdir')
else:
record_op('overall_nopull')
if 'sparse-update' in behaviors:
record_op('overall_nopull_sparsecheckout')
else:
record_op('overall_nopull_fullcheckout')
if 'empty-wdir' in behaviors:
record_op('overall_nopull_emptywdir')
else:
record_op('overall_nopull_populatedwdir')
server_url = urllibcompat.urlreq.urlparse(url).netloc
if 'TASKCLUSTER_INSTANCE_TYPE' in os.environ:
perfherder = {
'framework': {
'name': 'vcs',
},
'suites': [],
}
for op, duration in optimes:
perfherder['suites'].append({
'name': op,
'value': duration,
'lowerIsBetter': True,
'shouldAlert': False,
'serverUrl': server_url.decode('utf-8'),
'hgVersion': util.version().decode('utf-8'),
'extraOptions': [os.environ['TASKCLUSTER_INSTANCE_TYPE']],
'subtests': [],
})
ui.write(b'PERFHERDER_DATA: %s\n' %
pycompat.bytestr(json.dumps(perfherder, sort_keys=True)))
def _docheckout(ui, url, dest, upstream, revision, branch, purge, sharebase,
optimes, behaviors, networkattemptlimit, networkattempts=None,
sparse_profile=None, noupdate=False):
if not networkattempts:
networkattempts = [1]
def callself():
return _docheckout(ui, url, dest, upstream, revision, branch, purge,
sharebase, optimes, behaviors, networkattemptlimit,
networkattempts=networkattempts,
sparse_profile=sparse_profile,
noupdate=noupdate)
@contextlib.contextmanager
def timeit(op, behavior):
behaviors.add(behavior)
errored = False
try:
start = time.time()
yield
except Exception:
errored = True
raise
finally:
elapsed = time.time() - start
if errored:
op += '_errored'
optimes.append((op, elapsed))
ui.write(b'ensuring %s@%s is available at %s\n' % (url, revision or branch,
dest))
# We assume that we're the only process on the machine touching the
# repository paths that we were told to use. This means our recovery
# scenario when things aren't "right" is to just nuke things and start
# from scratch. This is easier to implement than verifying the state
# of the data and attempting recovery. And in some scenarios (such as
# potential repo corruption), it is probably faster, since verifying
# repos can take a while.
destvfs = vfs.vfs(dest, audit=False, realpath=True)
def deletesharedstore(path=None):
storepath = path or destvfs.read(b'.hg/sharedpath').strip()
if storepath.endswith(b'.hg'):
storepath = os.path.dirname(storepath)
storevfs = vfs.vfs(storepath, audit=False)
storevfs.rmtree(forcibly=True)
if destvfs.exists() and not destvfs.exists(b'.hg'):
raise error.Abort(b'destination exists but no .hg directory')
# Refuse to enable sparse checkouts on existing checkouts. The reasoning
# here is that another consumer of this repo may not be sparse aware. If we
# enabled sparse, we would lock them out.
if destvfs.exists() and sparse_profile and not destvfs.exists(b'.hg/sparse'):
raise error.Abort(b'cannot enable sparse profile on existing '
b'non-sparse checkout',
hint=b'use a separate working directory to use sparse')
# And the other direction for symmetry.
if not sparse_profile and destvfs.exists(b'.hg/sparse'):
raise error.Abort(b'cannot use non-sparse checkout on existing sparse '
b'checkout',
hint=b'use a separate working directory to use sparse')
# Require checkouts to be tied to shared storage because efficiency.
if destvfs.exists(b'.hg') and not destvfs.exists(b'.hg/sharedpath'):
ui.warn(b'(destination is not shared; deleting)\n')
with timeit('remove_unshared_dest', 'remove-wdir'):
destvfs.rmtree(forcibly=True)
# Verify the shared path exists and is using modern pooled storage.
if destvfs.exists(b'.hg/sharedpath'):
storepath = destvfs.read(b'.hg/sharedpath').strip()
ui.write(b'(existing repository shared store: %s)\n' % storepath)
if not os.path.exists(storepath):
ui.warn(b'(shared store does not exist; deleting destination)\n')
with timeit('removed_missing_shared_store', 'remove-wdir'):
destvfs.rmtree(forcibly=True)
elif not re.search(br'[a-f0-9]{40}/\.hg$', storepath.replace(b'\\', b'/')):
ui.warn(b'(shared store does not belong to pooled storage; '
b'deleting destination to improve efficiency)\n')
with timeit('remove_unpooled_store', 'remove-wdir'):
destvfs.rmtree(forcibly=True)
if destvfs.isfileorlink(b'.hg/wlock'):
ui.warn(b'(dest has an active working directory lock; assuming it is '
b'left over from a previous process and that the destination '
b'is corrupt; deleting it just to be sure)\n')
with timeit('remove_locked_wdir', 'remove-wdir'):
destvfs.rmtree(forcibly=True)
def handlerepoerror(e):
if pycompat.bytestr(e) == _(b'abandoned transaction found'):
ui.warn(b'(abandoned transaction found; trying to recover)\n')
repo = hg.repository(ui, dest)
if not repo.recover():
ui.warn(b'(could not recover repo state; '
b'deleting shared store)\n')
with timeit('remove_unrecovered_shared_store', 'remove-store'):
deletesharedstore()
ui.warn(b'(attempting checkout from beginning)\n')
return callself()
raise
# At this point we either have an existing working directory using
# shared, pooled storage or we have nothing.
def handlenetworkfailure():
if networkattempts[0] >= networkattemptlimit:
raise error.Abort(b'reached maximum number of network attempts; '
b'giving up\n')
ui.warn(b'(retrying after network failure on attempt %d of %d)\n' %
(networkattempts[0], networkattemptlimit))
# Do a backoff on retries to mitigate the thundering herd
# problem. This is an exponential backoff with a multipler
# plus random jitter thrown in for good measure.
# With the default settings, backoffs will be:
# 1) 2.5 - 6.5
# 2) 5.5 - 9.5
# 3) 11.5 - 15.5
backoff = (2 ** networkattempts[0] - 1) * 1.5
jittermin = ui.configint(b'robustcheckout', b'retryjittermin', 1000)
jittermax = ui.configint(b'robustcheckout', b'retryjittermax', 5000)
backoff += float(random.randint(jittermin, jittermax)) / 1000.0
ui.warn(b'(waiting %.2fs before retry)\n' % backoff)
time.sleep(backoff)
networkattempts[0] += 1
def handlepullerror(e):
"""Handle an exception raised during a pull.
Returns True if caller should call ``callself()`` to retry.
"""
if isinstance(e, error.Abort):
if e.args[0] == _(b'repository is unrelated'):
ui.warn(b'(repository is unrelated; deleting)\n')
destvfs.rmtree(forcibly=True)
return True
elif e.args[0].startswith(_(b'stream ended unexpectedly')):
ui.warn(b'%s\n' % e.args[0])
# Will raise if failure limit reached.
handlenetworkfailure()
return True
# TODO test this branch
elif isinstance(e, error.ResponseError):
if e.args[0].startswith(_(b'unexpected response from remote server:')):
ui.warn(b'(unexpected response from remote server; retrying)\n')
destvfs.rmtree(forcibly=True)
# Will raise if failure limit reached.
handlenetworkfailure()
return True
elif isinstance(e, ssl.SSLError):
# Assume all SSL errors are due to the network, as Mercurial
# should convert non-transport errors like cert validation failures
# to error.Abort.
ui.warn(b'ssl error: %s\n' % e)
handlenetworkfailure()
return True
elif isinstance(e, urllibcompat.urlerr.urlerror):
if isinstance(e.reason, socket.error):
ui.warn(b'socket error: %s\n' % pycompat.bytestr(e.reason))
handlenetworkfailure()
return True
else:
ui.warn(b'unhandled URLError; reason type: %s; value: %s\n' % (
e.reason.__class__.__name__, e.reason))
else:
ui.warn(b'unhandled exception during network operation; type: %s; '
b'value: %s\n' % (e.__class__.__name__, e))
return False
# Perform sanity checking of store. We may or may not know the path to the
# local store. It depends if we have an existing destvfs pointing to a
# share. To ensure we always find a local store, perform the same logic
# that Mercurial's pooled storage does to resolve the local store path.
cloneurl = upstream or url
try:
clonepeer = hg.peer(ui, {}, cloneurl)
rootnode = peerlookup(clonepeer, b'0')
except error.RepoLookupError:
raise error.Abort(b'unable to resolve root revision from clone '
b'source')
except (error.Abort, ssl.SSLError, urllibcompat.urlerr.urlerror) as e:
if handlepullerror(e):
return callself()
raise
if rootnode == nullid:
raise error.Abort(b'source repo appears to be empty')
storepath = os.path.join(sharebase, hex(rootnode))
storevfs = vfs.vfs(storepath, audit=False)
if storevfs.isfileorlink(b'.hg/store/lock'):
ui.warn(b'(shared store has an active lock; assuming it is left '
b'over from a previous process and that the store is '
b'corrupt; deleting store and destination just to be '
b'sure)\n')
if destvfs.exists():
with timeit('remove_dest_active_lock', 'remove-wdir'):
destvfs.rmtree(forcibly=True)
with timeit('remove_shared_store_active_lock', 'remove-store'):
storevfs.rmtree(forcibly=True)
if storevfs.exists() and not storevfs.exists(b'.hg/requires'):
ui.warn(b'(shared store missing requires file; this is a really '
b'odd failure; deleting store and destination)\n')
if destvfs.exists():
with timeit('remove_dest_no_requires', 'remove-wdir'):
destvfs.rmtree(forcibly=True)
with timeit('remove_shared_store_no_requires', 'remove-store'):
storevfs.rmtree(forcibly=True)
if storevfs.exists(b'.hg/requires'):
requires = set(storevfs.read(b'.hg/requires').splitlines())
# FUTURE when we require generaldelta, this is where we can check
# for that.
required = {b'dotencode', b'fncache'}
missing = required - requires
if missing:
ui.warn(b'(shared store missing requirements: %s; deleting '
b'store and destination to ensure optimal behavior)\n' %
b', '.join(sorted(missing)))
if destvfs.exists():
with timeit('remove_dest_missing_requires', 'remove-wdir'):
destvfs.rmtree(forcibly=True)
with timeit('remove_shared_store_missing_requires', 'remove-store'):
storevfs.rmtree(forcibly=True)
created = False
if not destvfs.exists():
# Ensure parent directories of destination exist.
# Mercurial 3.8 removed ensuredirs and made makedirs race safe.
if util.safehasattr(util, 'ensuredirs'):
makedirs = util.ensuredirs
else:
makedirs = util.makedirs
makedirs(os.path.dirname(destvfs.base), notindexed=True)
makedirs(sharebase, notindexed=True)
if upstream:
ui.write(b'(cloning from upstream repo %s)\n' % upstream)
if not storevfs.exists():
behaviors.add(b'create-store')
try:
with timeit('clone', 'clone'):
shareopts = {b'pool': sharebase, b'mode': b'identity'}
res = hg.clone(ui, {}, clonepeer, dest=dest, update=False,
shareopts=shareopts,
stream=True)
except (error.Abort, ssl.SSLError, urllibcompat.urlerr.urlerror) as e:
if handlepullerror(e):
return callself()
raise
except error.RepoError as e:
return handlerepoerror(e)
except error.RevlogError as e:
ui.warn(b'(repo corruption: %s; deleting shared store)\n' % e)
with timeit('remove_shared_store_revlogerror', 'remote-store'):
deletesharedstore()
return callself()
# TODO retry here.
if res is None:
raise error.Abort(b'clone failed')
# Verify it is using shared pool storage.
if not destvfs.exists(b'.hg/sharedpath'):
raise error.Abort(b'clone did not create a shared repo')
created = True
# The destination .hg directory should exist. Now make sure we have the
# wanted revision.
repo = hg.repository(ui, dest)
# We only pull if we are using symbolic names or the requested revision
# doesn't exist.
havewantedrev = False
if revision:
try:
ctx = scmutil.revsingle(repo, revision)
except error.RepoLookupError:
ctx = None
if ctx:
if not ctx.hex().startswith(revision):
raise error.Abort(b'--revision argument is ambiguous',
hint=b'must be the first 12+ characters of a '
b'SHA-1 fragment')
checkoutrevision = ctx.hex()
havewantedrev = True
if not havewantedrev:
ui.write(b'(pulling to obtain %s)\n' % (revision or branch,))
remote = None
try:
remote = hg.peer(repo, {}, url)
pullrevs = [peerlookup(remote, revision or branch)]
checkoutrevision = hex(pullrevs[0])
if branch:
ui.warn(b'(remote resolved %s to %s; '
b'result is not deterministic)\n' %
(branch, checkoutrevision))
if checkoutrevision in repo:
ui.warn(b'(revision already present locally; not pulling)\n')
else:
with timeit('pull', 'pull'):
pullop = exchange.pull(repo, remote, heads=pullrevs)
if not pullop.rheads:
raise error.Abort(b'unable to pull requested revision')
except (error.Abort, ssl.SSLError, urllibcompat.urlerr.urlerror) as e:
if handlepullerror(e):
return callself()
raise
except error.RepoError as e:
return handlerepoerror(e)
except error.RevlogError as e:
ui.warn(b'(repo corruption: %s; deleting shared store)\n' % e)
deletesharedstore()
return callself()
finally:
if remote:
remote.close()
# Now we should have the wanted revision in the store. Perform
# working directory manipulation.
# Avoid any working directory manipulations if `-U`/`--noupdate` was passed
if noupdate:
ui.write(b'(skipping update since `-U` was passed)\n')
return None
# Purge if requested. We purge before update because this way we're
# guaranteed to not have conflicts on `hg update`.
if purge and not created:
ui.write(b'(purging working directory)\n')
purgeext = extensions.find(b'purge')
# Mercurial 4.3 doesn't purge files outside the sparse checkout.
# See https://bz.mercurial-scm.org/show_bug.cgi?id=5626. Force
# purging by monkeypatching the sparse matcher.
try:
old_sparse_fn = getattr(repo.dirstate, '_sparsematchfn', None)
if old_sparse_fn is not None:
# TRACKING hg50
# Arguments passed to `matchmod.always` were unused and have been removed
if util.versiontuple(n=2) >= (5, 0):
repo.dirstate._sparsematchfn = lambda: matchmod.always()
else:
repo.dirstate._sparsematchfn = lambda: matchmod.always(repo.root, '')
with timeit('purge', 'purge'):
if purgeext.purge(ui, repo, all=True, abort_on_err=True,
# The function expects all arguments to be
# defined.
**{'print': None,
'print0': None,
'dirs': None,
'files': None}):
raise error.Abort(b'error purging')
finally:
if old_sparse_fn is not None:
repo.dirstate._sparsematchfn = old_sparse_fn
# Update the working directory.
if repo[b'.'].node() == nullid:
behaviors.add('empty-wdir')
else:
behaviors.add('populated-wdir')
if sparse_profile:
sparsemod = getsparse()
# By default, Mercurial will ignore unknown sparse profiles. This could
# lead to a full checkout. Be more strict.
try:
repo.filectx(sparse_profile, changeid=checkoutrevision).data()
except error.ManifestLookupError:
raise error.Abort(b'sparse profile %s does not exist at revision '
b'%s' % (sparse_profile, checkoutrevision))
# TRACKING hg48 - parseconfig takes `action` param
if util.versiontuple(n=2) >= (4, 8):
old_config = sparsemod.parseconfig(repo.ui, repo.vfs.tryread(b'sparse'), b'sparse')
else:
old_config = sparsemod.parseconfig(repo.ui, repo.vfs.tryread(b'sparse'))
old_includes, old_excludes, old_profiles = old_config
if old_profiles == {sparse_profile} and not old_includes and not \
old_excludes:
ui.write(b'(sparse profile %s already set; no need to update '
b'sparse config)\n' % sparse_profile)
else:
if old_includes or old_excludes or old_profiles:
ui.write(b'(replacing existing sparse config with profile '
b'%s)\n' % sparse_profile)
else:
ui.write(b'(setting sparse config to profile %s)\n' %
sparse_profile)
# If doing an incremental update, this will perform two updates:
# one to change the sparse profile and another to update to the new
# revision. This is not desired. But there's not a good API in
# Mercurial to do this as one operation.
with repo.wlock(), timeit('sparse_update_config',
'sparse-update-config'):
fcounts = map(len, sparsemod._updateconfigandrefreshwdir(
repo, [], [], [sparse_profile], force=True))
repo.ui.status(b'%d files added, %d files dropped, '
b'%d files conflicting\n' % tuple(fcounts))
ui.write(b'(sparse refresh complete)\n')
op = 'update_sparse' if sparse_profile else 'update'
behavior = 'update-sparse' if sparse_profile else 'update'
with timeit(op, behavior):
if commands.update(ui, repo, rev=checkoutrevision, clean=True):
raise error.Abort(b'error updating')
ui.write(b'updated to %s\n' % checkoutrevision)
return None
def extsetup(ui):
# Ensure required extensions are loaded.
for ext in (b'purge', b'share'):
try:
extensions.find(ext)
except KeyError:
extensions.load(ui, ext, None)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,102 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from taskgraph.util.attributes import (
match_run_on_projects,
match_run_on_tasks_for,
match_run_on_git_branches,
)
_target_task_methods = {}
_GIT_REFS_HEADS_PREFIX = "refs/heads/"
def _target_task(name):
def wrap(func):
_target_task_methods[name] = func
return func
return wrap
def get_method(method):
"""Get a target_task_method to pass to a TaskGraphGenerator."""
return _target_task_methods[method]
def filter_out_cron(task, parameters):
"""
Filter out tasks that run via cron.
"""
return not task.attributes.get("cron")
def filter_for_project(task, parameters):
"""Filter tasks by project. Optionally enable nightlies."""
run_on_projects = set(task.attributes.get("run_on_projects", []))
return match_run_on_projects(parameters["project"], run_on_projects)
def filter_for_tasks_for(task, parameters):
run_on_tasks_for = set(task.attributes.get("run_on_tasks_for", ["all"]))
return match_run_on_tasks_for(parameters["tasks_for"], run_on_tasks_for)
def filter_for_git_branch(task, parameters):
"""Filter tasks by git branch.
If `run_on_git_branch` is not defined, then task runs on all branches"""
# We cannot filter out on git branches if we not on a git repository
if parameters.get("repository_type") != "git":
return True
# Pull requests usually have arbitrary names, let's not filter git branches on them.
if parameters["tasks_for"] == "github-pull-request":
return True
run_on_git_branches = set(task.attributes.get("run_on_git_branches", ["all"]))
git_branch = parameters["head_ref"]
if git_branch.startswith(_GIT_REFS_HEADS_PREFIX):
git_branch = git_branch[len(_GIT_REFS_HEADS_PREFIX) :]
return match_run_on_git_branches(git_branch, run_on_git_branches)
def standard_filter(task, parameters):
return all(
filter_func(task, parameters)
for filter_func in (
filter_out_cron,
filter_for_project,
filter_for_tasks_for,
filter_for_git_branch,
)
)
@_target_task("default")
def target_tasks_default(full_task_graph, parameters, graph_config):
"""Target the tasks which have indicated they should be run on this project
via the `run_on_projects` attributes."""
return [
l for l, t in full_task_graph.tasks.items() if standard_filter(t, parameters)
]
@_target_task("codereview")
def target_tasks_codereview(full_task_graph, parameters, graph_config):
"""Target the tasks which have indicated they should be run on this project
via the `run_on_projects` attributes."""
return [
l
for l, t in full_task_graph.tasks.items()
if standard_filter(t, parameters) and t.attributes.get("code-review")
]
@_target_task("nothing")
def target_tasks_nothing(full_task_graph, parameters, graph_config):
"""Select nothing, for DONTBUILD pushes"""
return []

View File

@ -0,0 +1,76 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import attr
@attr.s
class Task:
"""
Representation of a task in a TaskGraph. Each Task has, at creation:
- kind: the name of the task kind
- label; the label for this task
- attributes: a dictionary of attributes for this task (used for filtering)
- task: the task definition (JSON-able dictionary)
- optimization: optimization to apply to the task (see taskgraph.optimize)
- dependencies: tasks this one depends on, in the form {name: label}, for example
{'build': 'build-linux64/opt', 'docker-image': 'build-docker-image-desktop-test'}
- soft_dependencies: tasks this one may depend on if they are available post
optimisation. They are set as a list of tasks label.
And later, as the task-graph processing proceeds:
- task_id -- TaskCluster taskId under which this task will be created
This class is just a convenience wrapper for the data type and managing
display, comparison, serialization, etc. It has no functionality of its own.
"""
kind = attr.ib()
label = attr.ib()
attributes = attr.ib()
task = attr.ib()
task_id = attr.ib(default=None, init=False)
optimization = attr.ib(default=None)
dependencies = attr.ib(factory=dict)
soft_dependencies = attr.ib(factory=list)
def __attrs_post_init__(self):
self.attributes["kind"] = self.kind
def to_json(self):
rv = {
"kind": self.kind,
"label": self.label,
"attributes": self.attributes,
"dependencies": self.dependencies,
"soft_dependencies": self.soft_dependencies,
"optimization": self.optimization,
"task": self.task,
}
if self.task_id:
rv["task_id"] = self.task_id
return rv
@classmethod
def from_json(cls, task_dict):
"""
Given a data structure as produced by taskgraph.to_json, re-construct
the original Task object. This is used to "resume" the task-graph
generation process, for example in Action tasks.
"""
rv = cls(
kind=task_dict["kind"],
label=task_dict["label"],
attributes=task_dict["attributes"],
task=task_dict["task"],
optimization=task_dict["optimization"],
dependencies=task_dict.get("dependencies"),
soft_dependencies=task_dict.get("soft_dependencies"),
)
if "task_id" in task_dict:
rv.task_id = task_dict["task_id"]
return rv

View File

@ -0,0 +1,69 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from .graph import Graph
from .task import Task
import attr
@attr.s(frozen=True)
class TaskGraph:
"""
Representation of a task graph.
A task graph is a combination of a Graph and a dictionary of tasks indexed
by label. TaskGraph instances should be treated as immutable.
"""
tasks = attr.ib()
graph = attr.ib()
def __attrs_post_init__(self):
assert set(self.tasks) == self.graph.nodes
def for_each_task(self, f, *args, **kwargs):
for task_label in self.graph.visit_postorder():
task = self.tasks[task_label]
f(task, self, *args, **kwargs)
def __getitem__(self, label):
"Get a task by label"
return self.tasks[label]
def __contains__(self, label):
return label in self.tasks
def __iter__(self):
"Iterate over tasks in undefined order"
return iter(self.tasks.values())
def to_json(self):
"Return a JSON-able object representing the task graph, as documented"
named_links_dict = self.graph.named_links_dict()
# this dictionary may be keyed by label or by taskid, so let's just call it 'key'
tasks = {}
for key in self.graph.visit_postorder():
tasks[key] = self.tasks[key].to_json()
# overwrite dependencies with the information in the taskgraph's edges.
tasks[key]["dependencies"] = named_links_dict.get(key, {})
return tasks
@classmethod
def from_json(cls, tasks_dict):
"""
This code is used to generate the a TaskGraph using a dictionary
which is representative of the TaskGraph.
"""
tasks = {}
edges = set()
for key, value in tasks_dict.items():
tasks[key] = Task.from_json(value)
if "task_id" in value:
tasks[key].task_id = value["task_id"]
for depname, dep in value["dependencies"].items():
edges.add((key, dep, depname))
task_graph = cls(tasks, Graph(set(tasks), edges))
return tasks, task_graph

View File

@ -0,0 +1,158 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import re
from typing import AnyStr
import attr
from ..config import GraphConfig
from ..parameters import Parameters
from ..util.schema import Schema, validate_schema
from ..util.memoize import memoize
@attr.s(frozen=True)
class RepoConfig:
prefix = attr.ib(type=str)
name = attr.ib(type=str)
base_repository = attr.ib(type=str)
head_repository = attr.ib(type=str)
head_ref = attr.ib(type=str)
type = attr.ib(type=str)
path = attr.ib(type=str, default="")
head_rev = attr.ib(type=str, default=None)
ssh_secret_name = attr.ib(type=str, default=None)
@attr.s(frozen=True, cmp=False)
class TransformConfig:
"""
A container for configuration affecting transforms. The `config` argument
to transforms is an instance of this class.
"""
# the name of the current kind
kind = attr.ib()
# the path to the kind configuration directory
path = attr.ib(type=AnyStr)
# the parsed contents of kind.yml
config = attr.ib(type=dict)
# the parameters for this task-graph generation run
params = attr.ib(type=Parameters)
# a list of all the tasks associated with the kind dependencies of the
# current kind
kind_dependencies_tasks = attr.ib()
# Global configuration of the taskgraph
graph_config = attr.ib(type=GraphConfig)
# whether to write out artifacts for the decision task
write_artifacts = attr.ib(type=bool)
@property
@memoize
def repo_configs(self):
repositories = self.graph_config["taskgraph"]["repositories"]
if len(repositories) == 1:
current_prefix = list(repositories.keys())[0]
else:
project = self.params["project"]
matching_repos = {
repo_prefix: repo
for (repo_prefix, repo) in repositories.items()
if re.match(repo["project-regex"], project)
}
if len(matching_repos) != 1:
raise Exception(
f"Couldn't find repository matching project `{project}`"
)
current_prefix = list(matching_repos.keys())[0]
repo_configs = {
current_prefix: RepoConfig(
prefix=current_prefix,
name=repositories[current_prefix]["name"],
base_repository=self.params["base_repository"],
head_repository=self.params["head_repository"],
head_ref=self.params["head_ref"],
head_rev=self.params["head_rev"],
type=self.params["repository_type"],
ssh_secret_name=repositories[current_prefix].get("ssh-secret-name"),
),
}
if len(repositories) != 1:
repo_configs.update(
{
repo_prefix: RepoConfig(
prefix=repo_prefix,
name=repo["name"],
base_repository=repo["default-repository"],
head_repository=repo["default-repository"],
head_ref=repo["default-ref"],
type=repo["type"],
ssh_secret_name=repo.get("ssh-secret-name"),
)
for (repo_prefix, repo) in repositories.items()
if repo_prefix != current_prefix
}
)
return repo_configs
@attr.s()
class TransformSequence:
"""
Container for a sequence of transforms. Each transform is represented as a
callable taking (config, items) and returning a generator which will yield
transformed items. The resulting sequence has the same interface.
This is convenient to use in a file full of transforms, as it provides a
decorator, @transforms.add, that will add the decorated function to the
sequence.
"""
_transforms = attr.ib(factory=list)
def __call__(self, config, items):
for xform in self._transforms:
items = xform(config, items)
if items is None:
raise Exception(f"Transform {xform} is not a generator")
return items
def add(self, func):
self._transforms.append(func)
return func
def add_validate(self, schema):
self.add(ValidateSchema(schema))
@attr.s
class ValidateSchema:
schema = attr.ib(type=Schema)
def __call__(self, config, tasks):
for task in tasks:
if "name" in task:
error = "In {kind} kind task {name!r}:".format(
kind=config.kind, name=task["name"]
)
elif "label" in task:
error = "In job {label!r}:".format(label=task["label"])
elif "primary-dependency" in task:
error = "In {kind} kind task for {dependency!r}:".format(
kind=config.kind, dependency=task["primary-dependency"].label
)
else:
error = "In unknown task:"
validate_schema(self.schema, task, error)
yield task

View File

@ -0,0 +1,89 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from collections import deque
import taskgraph
from taskgraph.transforms.base import TransformSequence
from taskgraph.util.cached_tasks import add_optimization
transforms = TransformSequence()
def order_tasks(config, tasks):
"""Iterate image tasks in an order where parent tasks come first."""
if config.kind == "docker-image":
kind_prefix = "build-docker-image-"
else:
kind_prefix = config.kind + "-"
pending = deque(tasks)
task_labels = {task["label"] for task in pending}
emitted = set()
while True:
try:
task = pending.popleft()
except IndexError:
break
parents = {
task
for task in task.get("dependencies", {}).values()
if task.startswith(kind_prefix)
}
if parents and not emitted.issuperset(parents & task_labels):
pending.append(task)
continue
emitted.add(task["label"])
yield task
def format_task_digest(cached_task):
return "/".join(
[
cached_task["type"],
cached_task["name"],
cached_task["digest"],
]
)
@transforms.add
def cache_task(config, tasks):
if taskgraph.fast:
for task in tasks:
yield task
return
digests = {}
for task in config.kind_dependencies_tasks:
if "cached_task" in task.attributes:
digests[task.label] = format_task_digest(task.attributes["cached_task"])
for task in order_tasks(config, tasks):
cache = task.pop("cache", None)
if cache is None:
yield task
continue
dependency_digests = []
for p in task.get("dependencies", {}).values():
if p in digests:
dependency_digests.append(digests[p])
else:
raise Exception(
"Cached task {} has uncached parent task: {}".format(
task["label"], p
)
)
digest_data = cache["digest-data"] + sorted(dependency_digests)
add_optimization(
config,
task,
cache_type=cache["type"],
cache_name=cache["name"],
digest_data=digest_data,
)
digests[task["label"]] = format_task_digest(task["attributes"]["cached_task"])
yield task

View File

@ -0,0 +1,23 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
"""
Add soft dependencies and configuration to code-review tasks.
"""
from taskgraph.transforms.base import TransformSequence
transforms = TransformSequence()
@transforms.add
def add_dependencies(config, jobs):
for job in jobs:
job.setdefault("soft-dependencies", [])
job["soft-dependencies"] += [
dep_task.label
for dep_task in config.kind_dependencies_tasks
if dep_task.attributes.get("code-review") is True
]
yield job

View File

@ -0,0 +1,219 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import json
import logging
import os
import re
import taskgraph
from taskgraph.transforms.base import TransformSequence
from taskgraph.util.docker import (
generate_context_hash,
create_context_tar,
)
from taskgraph.util.schema import (
Schema,
)
from voluptuous import (
Optional,
Required,
)
from .task import task_description_schema
logger = logging.getLogger(__name__)
CONTEXTS_DIR = "docker-contexts"
DIGEST_RE = re.compile("^[0-9a-f]{64}$")
IMAGE_BUILDER_IMAGE = (
"taskcluster/image_builder:4.0.0"
"@sha256:"
"866c304445334703b68653e1390816012c9e6bdabfbd1906842b5b229e8ed044"
)
transforms = TransformSequence()
docker_image_schema = Schema(
{
# Name of the docker image.
Required("name"): str,
# Name of the parent docker image.
Optional("parent"): str,
# Treeherder symbol.
Optional("symbol"): str,
# relative path (from config.path) to the file the docker image was defined
# in.
Optional("job-from"): str,
# Arguments to use for the Dockerfile.
Optional("args"): {str: str},
# Name of the docker image definition under taskcluster/docker, when
# different from the docker image name.
Optional("definition"): str,
# List of package tasks this docker image depends on.
Optional("packages"): [str],
Optional(
"index",
description="information for indexing this build so its artifacts can be discovered",
): task_description_schema["index"],
Optional(
"cache",
description="Whether this image should be cached based on inputs.",
): bool,
}
)
transforms.add_validate(docker_image_schema)
@transforms.add
def fill_template(config, tasks):
available_packages = set()
for task in config.kind_dependencies_tasks:
if task.kind != "packages":
continue
name = task.label.replace("packages-", "")
available_packages.add(name)
context_hashes = {}
tasks = list(tasks)
if not taskgraph.fast and config.write_artifacts:
if not os.path.isdir(CONTEXTS_DIR):
os.makedirs(CONTEXTS_DIR)
for task in tasks:
image_name = task.pop("name")
job_symbol = task.pop("symbol", None)
args = task.pop("args", {})
definition = task.pop("definition", image_name)
packages = task.pop("packages", [])
parent = task.pop("parent", None)
for p in packages:
if p not in available_packages:
raise Exception(
"Missing package job for {}-{}: {}".format(
config.kind, image_name, p
)
)
if not taskgraph.fast:
context_path = os.path.join("taskcluster", "docker", definition)
topsrcdir = os.path.dirname(config.graph_config.taskcluster_yml)
if config.write_artifacts:
context_file = os.path.join(CONTEXTS_DIR, f"{image_name}.tar.gz")
logger.info(f"Writing {context_file} for docker image {image_name}")
context_hash = create_context_tar(
topsrcdir,
context_path,
context_file,
args,
)
else:
context_hash = generate_context_hash(topsrcdir, context_path, args)
else:
if config.write_artifacts:
raise Exception("Can't write artifacts if `taskgraph.fast` is set.")
context_hash = "0" * 40
digest_data = [context_hash]
digest_data += [json.dumps(args, sort_keys=True)]
context_hashes[image_name] = context_hash
description = "Build the docker image {} for use by dependent tasks".format(
image_name
)
args["DOCKER_IMAGE_PACKAGES"] = " ".join(f"<{p}>" for p in packages)
# Adjust the zstandard compression level based on the execution level.
# We use faster compression for level 1 because we care more about
# end-to-end times. We use slower/better compression for other levels
# because images are read more often and it is worth the trade-off to
# burn more CPU once to reduce image size.
zstd_level = "3" if int(config.params["level"]) == 1 else "10"
# include some information that is useful in reconstructing this task
# from JSON
taskdesc = {
"label": "build-docker-image-" + image_name,
"description": description,
"attributes": {
"image_name": image_name,
"artifact_prefix": "public",
},
"expires-after": "28 days" if config.params.is_try() else "1 year",
"scopes": [],
"run-on-projects": [],
"worker-type": "images",
"worker": {
"implementation": "docker-worker",
"os": "linux",
"artifacts": [
{
"type": "file",
"path": "/workspace/image.tar.zst",
"name": "public/image.tar.zst",
}
],
"env": {
"CONTEXT_TASK_ID": {"task-reference": "<decision>"},
"CONTEXT_PATH": "public/docker-contexts/{}.tar.gz".format(
image_name
),
"HASH": context_hash,
"PROJECT": config.params["project"],
"IMAGE_NAME": image_name,
"DOCKER_IMAGE_ZSTD_LEVEL": zstd_level,
"DOCKER_BUILD_ARGS": {
"task-reference": json.dumps(args),
},
"VCS_BASE_REPOSITORY": config.params["base_repository"],
"VCS_HEAD_REPOSITORY": config.params["head_repository"],
"VCS_HEAD_REV": config.params["head_rev"],
"VCS_REPOSITORY_TYPE": config.params["repository_type"],
},
"chain-of-trust": True,
"max-run-time": 7200,
},
}
if "index" in task:
taskdesc["index"] = task["index"]
if job_symbol:
taskdesc["treeherder"] = {
"symbol": job_symbol,
"platform": "taskcluster-images/opt",
"kind": "other",
"tier": 1,
}
worker = taskdesc["worker"]
worker["docker-image"] = IMAGE_BUILDER_IMAGE
digest_data.append(f"image-builder-image:{IMAGE_BUILDER_IMAGE}")
if packages:
deps = taskdesc.setdefault("dependencies", {})
for p in sorted(packages):
deps[p] = f"packages-{p}"
if parent:
deps = taskdesc.setdefault("dependencies", {})
deps["parent"] = f"build-docker-image-{parent}"
worker["env"]["PARENT_TASK_ID"] = {
"task-reference": "<parent>",
}
if task.get("cache", True) and not taskgraph.fast:
taskdesc["cache"] = {
"type": "docker-images.v2",
"name": image_name,
"digest-data": digest_data,
}
yield taskdesc

View File

@ -0,0 +1,318 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
# Support for running tasks that download remote content and re-export
# it as task artifacts.
import attr
import os
import re
from voluptuous import (
Optional,
Required,
Extra,
)
import taskgraph
from .base import (
TransformSequence,
)
from ..util.cached_tasks import (
add_optimization,
)
from ..util.schema import Schema, validate_schema
from ..util.treeherder import (
join_symbol,
)
from ..util import path
CACHE_TYPE = "content.v1"
FETCH_SCHEMA = Schema(
{
# Name of the task.
Required("name"): str,
# Relative path (from config.path) to the file the task was defined
# in.
Optional("job-from"): str,
# Description of the task.
Required("description"): str,
Optional("docker-image"): object,
Optional(
"fetch-alias",
description="An alias that can be used instead of the real fetch job name in "
"fetch stanzas for jobs.",
): str,
Optional(
"artifact-prefix",
description="The prefix of the taskcluster artifact being uploaded. "
"Defaults to `public/`; if it starts with something other than "
"`public/` the artifact will require scopes to access.",
): str,
Optional("attributes"): {str: object},
Required("fetch"): {
Required("type"): str,
Extra: object,
},
}
)
# define a collection of payload builders, depending on the worker implementation
fetch_builders = {}
@attr.s(frozen=True)
class FetchBuilder:
schema = attr.ib(type=Schema)
builder = attr.ib()
def fetch_builder(name, schema):
schema = Schema({Required("type"): name}).extend(schema)
def wrap(func):
fetch_builders[name] = FetchBuilder(schema, func)
return func
return wrap
transforms = TransformSequence()
transforms.add_validate(FETCH_SCHEMA)
@transforms.add
def process_fetch_job(config, jobs):
# Converts fetch-url entries to the job schema.
for job in jobs:
typ = job["fetch"]["type"]
name = job["name"]
fetch = job.pop("fetch")
if typ not in fetch_builders:
raise Exception(f"Unknown fetch type {typ} in fetch {name}")
validate_schema(fetch_builders[typ].schema, fetch, f"In task.fetch {name!r}:")
job.update(configure_fetch(config, typ, name, fetch))
yield job
def configure_fetch(config, typ, name, fetch):
if typ not in fetch_builders:
raise Exception(f"No fetch type {typ} in fetch {name}")
validate_schema(fetch_builders[typ].schema, fetch, f"In task.fetch {name!r}:")
return fetch_builders[typ].builder(config, name, fetch)
@transforms.add
def make_task(config, jobs):
# Fetch tasks are idempotent and immutable. Have them live for
# essentially forever.
if config.params["level"] == "3":
expires = "1000 years"
else:
expires = "28 days"
for job in jobs:
name = job["name"]
artifact_prefix = job.get("artifact-prefix", "public")
env = job.get("env", {})
env.update({"UPLOAD_DIR": "/builds/worker/artifacts"})
attributes = job.get("attributes", {})
attributes["fetch-artifact"] = path.join(artifact_prefix, job["artifact_name"])
alias = job.get("fetch-alias")
if alias:
attributes["fetch-alias"] = alias
task = {
"attributes": attributes,
"name": name,
"description": job["description"],
"expires-after": expires,
"label": "fetch-%s" % name,
"run-on-projects": [],
"run": {
"using": "run-task",
"checkout": False,
"command": job["command"],
},
"worker-type": "images",
"worker": {
"chain-of-trust": True,
"docker-image": job.get("docker-image", {"in-tree": "fetch"}),
"env": env,
"max-run-time": 900,
"artifacts": [
{
"type": "directory",
"name": artifact_prefix,
"path": "/builds/worker/artifacts",
}
],
},
}
if "treeherder" in config.graph_config:
task["treeherder"] = {
"symbol": join_symbol("Fetch", name),
"kind": "build",
"platform": "fetch/opt",
"tier": 1,
}
if not taskgraph.fast:
cache_name = task["label"].replace(f"{config.kind}-", "", 1)
# This adds the level to the index path automatically.
add_optimization(
config,
task,
cache_type=CACHE_TYPE,
cache_name=cache_name,
digest_data=job["digest_data"],
)
yield task
@fetch_builder(
"static-url",
schema={
# The URL to download.
Required("url"): str,
# The SHA-256 of the downloaded content.
Required("sha256"): str,
# Size of the downloaded entity, in bytes.
Required("size"): int,
# GPG signature verification.
Optional("gpg-signature"): {
# URL where GPG signature document can be obtained. Can contain the
# value ``{url}``, which will be substituted with the value from
# ``url``.
Required("sig-url"): str,
# Path to file containing GPG public key(s) used to validate
# download.
Required("key-path"): str,
},
# The name to give to the generated artifact. Defaults to the file
# portion of the URL. Using a different extension converts the
# archive to the given type. Only conversion to .tar.zst is
# supported.
Optional("artifact-name"): str,
# Strip the given number of path components at the beginning of
# each file entry in the archive.
# Requires an artifact-name ending with .tar.zst.
Optional("strip-components"): int,
# Add the given prefix to each file entry in the archive.
# Requires an artifact-name ending with .tar.zst.
Optional("add-prefix"): str,
# IMPORTANT: when adding anything that changes the behavior of the task,
# it is important to update the digest data used to compute cache hits.
},
)
def create_fetch_url_task(config, name, fetch):
artifact_name = fetch.get("artifact-name")
if not artifact_name:
artifact_name = fetch["url"].split("/")[-1]
command = [
"fetch-content",
"static-url",
]
# Arguments that matter to the cache digest
args = [
"--sha256",
fetch["sha256"],
"--size",
"%d" % fetch["size"],
]
if fetch.get("strip-components"):
args.extend(["--strip-components", "%d" % fetch["strip-components"]])
if fetch.get("add-prefix"):
args.extend(["--add-prefix", fetch["add-prefix"]])
command.extend(args)
env = {}
if "gpg-signature" in fetch:
sig_url = fetch["gpg-signature"]["sig-url"].format(url=fetch["url"])
key_path = os.path.join(taskgraph.GECKO, fetch["gpg-signature"]["key-path"])
with open(key_path, "r") as fh:
gpg_key = fh.read()
env["FETCH_GPG_KEY"] = gpg_key
command.extend(
[
"--gpg-sig-url",
sig_url,
"--gpg-key-env",
"FETCH_GPG_KEY",
]
)
command.extend(
[
fetch["url"],
"/builds/worker/artifacts/%s" % artifact_name,
]
)
return {
"command": command,
"artifact_name": artifact_name,
"env": env,
# We don't include the GPG signature in the digest because it isn't
# materially important for caching: GPG signatures are supplemental
# trust checking beyond what the shasum already provides.
"digest_data": args + [artifact_name],
}
@fetch_builder(
"git",
schema={
Required("repo"): str,
Required("revision"): str,
Optional("artifact-name"): str,
Optional("path-prefix"): str,
},
)
def create_git_fetch_task(config, name, fetch):
path_prefix = fetch.get("path-prefix")
if not path_prefix:
path_prefix = fetch["repo"].rstrip("/").rsplit("/", 1)[-1]
artifact_name = fetch.get("artifact-name")
if not artifact_name:
artifact_name = f"{path_prefix}.tar.zst"
if not re.match(r"[0-9a-fA-F]{40}", fetch["revision"]):
raise Exception(f'Revision is not a sha1 in fetch task "{name}"')
args = [
"fetch-content",
"git-checkout-archive",
"--path-prefix",
path_prefix,
fetch["repo"],
fetch["revision"],
"/builds/worker/artifacts/%s" % artifact_name,
]
return {
"command": args,
"artifact_name": artifact_name,
"digest_data": [fetch["revision"], path_prefix, artifact_name],
}

View File

@ -0,0 +1,444 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
"""
Convert a job description into a task description.
Jobs descriptions are similar to task descriptions, but they specify how to run
the job at a higher level, using a "run" field that can be interpreted by
run-using handlers in `taskcluster/taskgraph/transforms/job`.
"""
import copy
import logging
import json
import os
from taskgraph.transforms.base import TransformSequence
from taskgraph.util import path as mozpath
from taskgraph.util.schema import (
validate_schema,
Schema,
)
from taskgraph.util.taskcluster import get_artifact_prefix
from taskgraph.util.workertypes import worker_type_implementation
from taskgraph.transforms.cached_tasks import order_tasks
from taskgraph.transforms.task import task_description_schema
from voluptuous import (
Extra,
Any,
Optional,
Required,
Exclusive,
)
logger = logging.getLogger(__name__)
# Schema for a build description
job_description_schema = Schema(
{
# The name of the job and the job's label. At least one must be specified,
# and the label will be generated from the name if necessary, by prepending
# the kind.
Optional("name"): str,
Optional("label"): str,
# the following fields are passed directly through to the task description,
# possibly modified by the run implementation. See
# taskcluster/taskgraph/transforms/task.py for the schema details.
Required("description"): task_description_schema["description"],
Optional("attributes"): task_description_schema["attributes"],
Optional("job-from"): task_description_schema["job-from"],
Optional("dependencies"): task_description_schema["dependencies"],
Optional("soft-dependencies"): task_description_schema["soft-dependencies"],
Optional("requires"): task_description_schema["requires"],
Optional("expires-after"): task_description_schema["expires-after"],
Optional("routes"): task_description_schema["routes"],
Optional("scopes"): task_description_schema["scopes"],
Optional("tags"): task_description_schema["tags"],
Optional("extra"): task_description_schema["extra"],
Optional("treeherder"): task_description_schema["treeherder"],
Optional("index"): task_description_schema["index"],
Optional("run-on-projects"): task_description_schema["run-on-projects"],
Optional("run-on-tasks-for"): task_description_schema["run-on-tasks-for"],
Optional("run-on-git-branches"): task_description_schema["run-on-git-branches"],
Optional("always-target"): task_description_schema["always-target"],
Exclusive("optimization", "optimization"): task_description_schema[
"optimization"
],
Optional("needs-sccache"): task_description_schema["needs-sccache"],
# The "when" section contains descriptions of the circumstances under which
# this task should be included in the task graph. This will be converted
# into an optimization, so it cannot be specified in a job description that
# also gives 'optimization'.
Exclusive("when", "optimization"): {
# This task only needs to be run if a file matching one of the given
# patterns has changed in the push. The patterns use the mozpack
# match function (python/mozbuild/mozpack/path.py).
Optional("files-changed"): [str],
},
# A list of artifacts to install from 'fetch' tasks.
Optional("fetches"): {
Any("toolchain", "fetch"): [str],
str: [
str,
{
Required("artifact"): str,
Optional("dest"): str,
Optional("extract"): bool,
},
],
},
# A description of how to run this job.
"run": {
# The key to a job implementation in a peer module to this one
"using": str,
# Base work directory used to set up the task.
Optional("workdir"): str,
# Any remaining content is verified against that job implementation's
# own schema.
Extra: object,
},
Required("worker-type"): task_description_schema["worker-type"],
# This object will be passed through to the task description, with additions
# provided by the job's run-using function
Optional("worker"): dict,
}
)
transforms = TransformSequence()
transforms.add_validate(job_description_schema)
@transforms.add
def rewrite_when_to_optimization(config, jobs):
for job in jobs:
when = job.pop("when", {})
if not when:
yield job
continue
files_changed = when.get("files-changed")
# implicitly add task config directory.
files_changed.append(f"{config.path}/**")
# "only when files changed" implies "skip if files have not changed"
job["optimization"] = {"skip-unless-changed": files_changed}
assert "when" not in job
yield job
@transforms.add
def set_implementation(config, jobs):
for job in jobs:
impl, os = worker_type_implementation(config.graph_config, job["worker-type"])
if os:
job.setdefault("tags", {})["os"] = os
if impl:
job.setdefault("tags", {})["worker-implementation"] = impl
worker = job.setdefault("worker", {})
assert "implementation" not in worker
worker["implementation"] = impl
if os:
worker["os"] = os
yield job
@transforms.add
def set_label(config, jobs):
for job in jobs:
if "label" not in job:
if "name" not in job:
raise Exception("job has neither a name nor a label")
job["label"] = "{}-{}".format(config.kind, job["name"])
if job.get("name"):
del job["name"]
yield job
@transforms.add
def add_resource_monitor(config, jobs):
for job in jobs:
if job.get("attributes", {}).get("resource-monitor"):
worker_implementation, worker_os = worker_type_implementation(
config.graph_config, job["worker-type"]
)
# Normalise worker os so that linux-bitbar and similar use linux tools.
worker_os = worker_os.split("-")[0]
if "win7" in job["worker-type"]:
arch = "32"
else:
arch = "64"
job.setdefault("fetches", {})
job["fetches"].setdefault("toolchain", [])
job["fetches"]["toolchain"].append(f"{worker_os}{arch}-resource-monitor")
if worker_implementation == "docker-worker":
artifact_source = "/builds/worker/monitoring/resource-monitor.json"
else:
artifact_source = "monitoring/resource-monitor.json"
job["worker"].setdefault("artifacts", [])
job["worker"]["artifacts"].append(
{
"name": "public/monitoring/resource-monitor.json",
"type": "file",
"path": artifact_source,
}
)
# Set env for output file
job["worker"].setdefault("env", {})
job["worker"]["env"]["RESOURCE_MONITOR_OUTPUT"] = artifact_source
yield job
def get_attribute(dict, key, attributes, attribute_name):
"""Get `attribute_name` from the given `attributes` dict, and if there
is a corresponding value, set `key` in `dict` to that value."""
value = attributes.get(attribute_name)
if value:
dict[key] = value
@transforms.add
def use_fetches(config, jobs):
artifact_names = {}
aliases = {}
if config.kind in ("toolchain", "fetch"):
jobs = list(jobs)
for job in jobs:
run = job.get("run", {})
label = job["label"]
get_attribute(artifact_names, label, run, "toolchain-artifact")
value = run.get(f"{config.kind}-alias")
if value:
aliases[f"{config.kind}-{value}"] = label
for task in config.kind_dependencies_tasks:
if task.kind in ("fetch", "toolchain"):
get_attribute(
artifact_names,
task.label,
task.attributes,
f"{task.kind}-artifact",
)
value = task.attributes.get(f"{task.kind}-alias")
if value:
aliases[f"{task.kind}-{value}"] = task.label
artifact_prefixes = {}
for job in order_tasks(config, jobs):
artifact_prefixes[job["label"]] = get_artifact_prefix(job)
fetches = job.pop("fetches", None)
if not fetches:
yield job
continue
job_fetches = []
name = job.get("name", job.get("label"))
dependencies = job.setdefault("dependencies", {})
worker = job.setdefault("worker", {})
prefix = get_artifact_prefix(job)
for kind, artifacts in fetches.items():
if kind in ("fetch", "toolchain"):
for fetch_name in artifacts:
label = f"{kind}-{fetch_name}"
label = aliases.get(label, label)
if label not in artifact_names:
raise Exception(
"Missing fetch job for {kind}-{name}: {fetch}".format(
kind=config.kind, name=name, fetch=fetch_name
)
)
path = artifact_names[label]
dependencies[label] = label
job_fetches.append(
{
"artifact": path,
"task": f"<{label}>",
"extract": True,
}
)
else:
if kind not in dependencies:
raise Exception(
"{name} can't fetch {kind} artifacts because "
"it has no {kind} dependencies!".format(name=name, kind=kind)
)
dep_label = dependencies[kind]
if dep_label in artifact_prefixes:
prefix = artifact_prefixes[dep_label]
else:
dep_tasks = [
task
for task in config.kind_dependencies_tasks
if task.label == dep_label
]
if len(dep_tasks) != 1:
raise Exception(
"{name} can't fetch {kind} artifacts because "
"there are {tasks} with label {label} in kind dependencies!".format(
name=name,
kind=kind,
label=dependencies[kind],
tasks="no tasks"
if len(dep_tasks) == 0
else "multiple tasks",
)
)
prefix = get_artifact_prefix(dep_tasks[0])
for artifact in artifacts:
if isinstance(artifact, str):
path = artifact
dest = None
extract = True
else:
path = artifact["artifact"]
dest = artifact.get("dest")
extract = artifact.get("extract", True)
fetch = {
"artifact": f"{prefix}/{path}",
"task": f"<{kind}>",
"extract": extract,
}
if dest is not None:
fetch["dest"] = dest
job_fetches.append(fetch)
job_artifact_prefixes = {
mozpath.dirname(fetch["artifact"])
for fetch in job_fetches
if not fetch["artifact"].startswith("public/")
}
if job_artifact_prefixes:
# Use taskcluster-proxy and request appropriate scope. For example, add
# 'scopes: [queue:get-artifact:path/to/*]' for 'path/to/artifact.tar.xz'.
worker["taskcluster-proxy"] = True
for prefix in sorted(job_artifact_prefixes):
scope = f"queue:get-artifact:{prefix}/*"
if scope not in job.setdefault("scopes", []):
job["scopes"].append(scope)
env = worker.setdefault("env", {})
env["MOZ_FETCHES"] = {"task-reference": json.dumps(job_fetches, sort_keys=True)}
env.setdefault("MOZ_FETCHES_DIR", "fetches")
yield job
@transforms.add
def make_task_description(config, jobs):
"""Given a build description, create a task description"""
# import plugin modules first, before iterating over jobs
import_all()
for job in jobs:
# always-optimized tasks never execute, so have no workdir
if job["worker"]["implementation"] in ("docker-worker", "generic-worker"):
job["run"].setdefault("workdir", "/builds/worker")
taskdesc = copy.deepcopy(job)
# fill in some empty defaults to make run implementations easier
taskdesc.setdefault("attributes", {})
taskdesc.setdefault("dependencies", {})
taskdesc.setdefault("soft-dependencies", [])
taskdesc.setdefault("routes", [])
taskdesc.setdefault("scopes", [])
taskdesc.setdefault("extra", {})
# give the function for job.run.using on this worker implementation a
# chance to set up the task description.
configure_taskdesc_for_run(
config, job, taskdesc, job["worker"]["implementation"]
)
del taskdesc["run"]
# yield only the task description, discarding the job description
yield taskdesc
# A registry of all functions decorated with run_job_using
registry = {}
def run_job_using(worker_implementation, run_using, schema=None, defaults={}):
"""Register the decorated function as able to set up a task description for
jobs with the given worker implementation and `run.using` property. If
`schema` is given, the job's run field will be verified to match it.
The decorated function should have the signature `using_foo(config, job, taskdesc)`
and should modify the task description in-place. The skeleton of
the task description is already set up, but without a payload."""
def wrap(func):
for_run_using = registry.setdefault(run_using, {})
if worker_implementation in for_run_using:
raise Exception(
"run_job_using({!r}, {!r}) already exists: {!r}".format(
run_using, worker_implementation, for_run_using[run_using]
)
)
for_run_using[worker_implementation] = (func, schema, defaults)
return func
return wrap
@run_job_using(
"always-optimized", "always-optimized", Schema({"using": "always-optimized"})
)
def always_optimized(config, job, taskdesc):
pass
def configure_taskdesc_for_run(config, job, taskdesc, worker_implementation):
"""
Run the appropriate function for this job against the given task
description.
This will raise an appropriate error if no function exists, or if the job's
run is not valid according to the schema.
"""
run_using = job["run"]["using"]
if run_using not in registry:
raise Exception(f"no functions for run.using {run_using!r}")
if worker_implementation not in registry[run_using]:
raise Exception(
"no functions for run.using {!r} on {!r}".format(
run_using, worker_implementation
)
)
func, schema, defaults = registry[run_using][worker_implementation]
for k, v in defaults.items():
job["run"].setdefault(k, v)
if schema:
validate_schema(
schema,
job["run"],
"In job.run using {!r}/{!r} for job {!r}:".format(
job["run"]["using"], worker_implementation, job["label"]
),
)
func(config, job, taskdesc)
def import_all():
"""Import all modules that are siblings of this one, triggering the decorator
above in the process."""
for f in os.listdir(os.path.dirname(__file__)):
if f.endswith(".py") and f not in ("commmon.py", "__init__.py"):
__import__("taskgraph.transforms.job." + f[:-3])

View File

@ -0,0 +1,201 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
"""
Common support for various job types. These functions are all named after the
worker implementation they operate on, and take the same three parameters, for
consistency.
"""
import hashlib
import json
from taskgraph.util.taskcluster import get_artifact_prefix
def get_vcsdir_name(os):
if os == "windows":
return "src"
else:
return "vcs"
def add_cache(job, taskdesc, name, mount_point, skip_untrusted=False):
"""Adds a cache based on the worker's implementation.
Args:
job (dict): Task's job description.
taskdesc (dict): Target task description to modify.
name (str): Name of the cache.
mount_point (path): Path on the host to mount the cache.
skip_untrusted (bool): Whether cache is used in untrusted environments
(default: False). Only applies to docker-worker.
"""
if not job["run"].get("use-caches", True):
return
worker = job["worker"]
if worker["implementation"] == "docker-worker":
taskdesc["worker"].setdefault("caches", []).append(
{
"type": "persistent",
"name": name,
"mount-point": mount_point,
"skip-untrusted": skip_untrusted,
}
)
elif worker["implementation"] == "generic-worker":
taskdesc["worker"].setdefault("mounts", []).append(
{
"cache-name": name,
"directory": mount_point,
}
)
else:
# Caches not implemented
pass
def docker_worker_add_workspace_cache(config, job, taskdesc, extra=None):
"""Add the workspace cache.
Args:
config (TransformConfig): Transform configuration object.
job (dict): Task's job description.
taskdesc (dict): Target task description to modify.
extra (str): Optional context passed in that supports extending the cache
key name to avoid undesired conflicts with other caches.
"""
cache_name = "{}-build-{}-{}-workspace".format(
config.params["project"],
taskdesc["attributes"]["build_platform"],
taskdesc["attributes"]["build_type"],
)
if extra:
cache_name = f"{cache_name}-{extra}"
mount_point = "{workdir}/workspace".format(**job["run"])
# Don't enable the workspace cache when we can't guarantee its
# behavior, like on Try.
add_cache(job, taskdesc, cache_name, mount_point, skip_untrusted=True)
def add_artifacts(config, job, taskdesc, path):
taskdesc["worker"].setdefault("artifacts", []).append(
{
"name": get_artifact_prefix(taskdesc),
"path": path,
"type": "directory",
}
)
def docker_worker_add_artifacts(config, job, taskdesc):
"""Adds an artifact directory to the task"""
path = "{workdir}/artifacts/".format(**job["run"])
taskdesc["worker"]["env"]["UPLOAD_DIR"] = path
add_artifacts(config, job, taskdesc, path)
def generic_worker_add_artifacts(config, job, taskdesc):
"""Adds an artifact directory to the task"""
# The path is the location on disk; it doesn't necessarily
# mean the artifacts will be public or private; that is set via the name
# attribute in add_artifacts.
add_artifacts(config, job, taskdesc, path=get_artifact_prefix(taskdesc))
def support_vcs_checkout(config, job, taskdesc, repo_configs, sparse=False):
"""Update a job/task with parameters to enable a VCS checkout.
This can only be used with ``run-task`` tasks, as the cache name is
reserved for ``run-task`` tasks.
"""
worker = job["worker"]
is_mac = worker["os"] == "macosx"
is_win = worker["os"] == "windows"
is_linux = worker["os"] == "linux"
is_docker = worker["implementation"] == "docker-worker"
assert is_mac or is_win or is_linux
if is_win:
checkoutdir = "./build"
hgstore = "y:/hg-shared"
elif is_docker:
checkoutdir = "{workdir}/checkouts".format(**job["run"])
hgstore = f"{checkoutdir}/hg-store"
else:
checkoutdir = "./checkouts"
hgstore = f"{checkoutdir}/hg-shared"
vcsdir = checkoutdir + "/" + get_vcsdir_name(worker["os"])
cache_name = "checkouts"
# Robust checkout does not clean up subrepositories, so ensure that tasks
# that checkout different sets of paths have separate caches.
# See https://bugzilla.mozilla.org/show_bug.cgi?id=1631610
if len(repo_configs) > 1:
checkout_paths = {
"\t".join([repo_config.path, repo_config.prefix])
for repo_config in sorted(
repo_configs.values(), key=lambda repo_config: repo_config.path
)
}
checkout_paths_str = "\n".join(checkout_paths).encode("utf-8")
digest = hashlib.sha256(checkout_paths_str).hexdigest()
cache_name += f"-repos-{digest}"
# Sparse checkouts need their own cache because they can interfere
# with clients that aren't sparse aware.
if sparse:
cache_name += "-sparse"
# Workers using Mercurial >= 5.8 will enable revlog-compression-zstd, which
# workers using older versions can't understand, so they can't share cache.
# At the moment, only docker workers use the newer version.
if is_docker:
cache_name += "-hg58"
add_cache(job, taskdesc, cache_name, checkoutdir)
env = taskdesc["worker"].setdefault("env", {})
env.update(
{
"HG_STORE_PATH": hgstore,
"REPOSITORIES": json.dumps(
{repo.prefix: repo.name for repo in repo_configs.values()}
),
"VCS_PATH": vcsdir,
}
)
for repo_config in repo_configs.values():
env.update(
{
f"{repo_config.prefix.upper()}_{key}": value
for key, value in {
"BASE_REPOSITORY": repo_config.base_repository,
"HEAD_REPOSITORY": repo_config.head_repository,
"HEAD_REV": repo_config.head_rev,
"HEAD_REF": repo_config.head_ref,
"REPOSITORY_TYPE": repo_config.type,
"SSH_SECRET_NAME": repo_config.ssh_secret_name,
}.items()
if value is not None
}
)
if repo_config.ssh_secret_name:
taskdesc["scopes"].append(f"secrets:get:{repo_config.ssh_secret_name}")
if any(repo_config.type == "hg" for repo_config in repo_configs.values()):
# Give task access to hgfingerprint secret so it can pin the certificate
# for hg.mozilla.org.
taskdesc["scopes"].append("secrets:get:project/taskcluster/gecko/hgfingerprint")
# only some worker platforms have taskcluster-proxy enabled
if job["worker"]["implementation"] in ("docker-worker",):
taskdesc["worker"]["taskcluster-proxy"] = True

View File

@ -0,0 +1,37 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
"""
This transform allows including indexed tasks from other projects in the
current taskgraph. The transform takes a list of indexes, and the optimization
phase will replace the task with the task from the other graph.
"""
from taskgraph.transforms.base import TransformSequence
from taskgraph.util.schema import Schema
from taskgraph.transforms.job import run_job_using
from voluptuous import Required
transforms = TransformSequence()
run_task_schema = Schema(
{
Required("using"): "index-search",
Required(
"index-search",
"A list of indexes in decreasing order of priority at which to lookup for this "
"task. This is interpolated with the graph parameters.",
): [str],
}
)
@run_job_using("always-optimized", "index-search", schema=run_task_schema)
def fill_template(config, job, taskdesc):
run = job["run"]
taskdesc["optimization"] = {
"index-search": [index.format(**config.params) for index in run["index-search"]]
}

View File

@ -0,0 +1,247 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
"""
Support for running jobs that are invoked via the `run-task` script.
"""
import os
import attr
from taskgraph.transforms.task import taskref_or_string
from taskgraph.transforms.job import run_job_using
from taskgraph.util import path
from taskgraph.util.schema import Schema
from taskgraph.transforms.job.common import support_vcs_checkout
from voluptuous import Required, Any, Optional
run_task_schema = Schema(
{
Required("using"): "run-task",
# if true, add a cache at ~worker/.cache, which is where things like pip
# tend to hide their caches. This cache is never added for level-1 jobs.
# TODO Once bug 1526028 is fixed, this and 'use-caches' should be merged.
Required("cache-dotcache"): bool,
# Whether or not to use caches.
Optional("use-caches"): bool,
# if true (the default), perform a checkout on the worker
Required("checkout"): Any(bool, {str: dict}),
Optional(
"cwd",
description="Path to run command in. If a checkout is present, the path "
"to the checkout will be interpolated with the key `checkout`",
): str,
# The sparse checkout profile to use. Value is the filename relative to the
# directory where sparse profiles are defined (build/sparse-profiles/).
Required("sparse-profile"): Any(str, None),
# The command arguments to pass to the `run-task` script, after the
# checkout arguments. If a list, it will be passed directly; otherwise
# it will be included in a single argument to `bash -cx`.
Required("command"): Any([taskref_or_string], taskref_or_string),
# Context to substitute into the command using format string
# substitution (e.g {value}). This is useful if certain aspects of the
# command need to be generated in transforms.
Optional("command-context"): dict,
# Base work directory used to set up the task.
Required("workdir"): str,
# Whether to run as root. (defaults to False)
Optional("run-as-root"): bool,
}
)
def common_setup(config, job, taskdesc, command):
run = job["run"]
if run["checkout"]:
repo_configs = config.repo_configs
if len(repo_configs) > 1 and run["checkout"] is True:
raise Exception("Must explicitly sepcify checkouts with multiple repos.")
elif run["checkout"] is not True:
repo_configs = {
repo: attr.evolve(repo_configs[repo], **config)
for (repo, config) in run["checkout"].items()
}
vcs_path = support_vcs_checkout(
config,
job,
taskdesc,
repo_configs=repo_configs,
sparse=bool(run["sparse-profile"]),
)
vcs_path = taskdesc["worker"]["env"]["VCS_PATH"]
for repo_config in repo_configs.values():
checkout_path = path.join(vcs_path, repo_config.path)
command.append(f"--{repo_config.prefix}-checkout={checkout_path}")
if run["sparse-profile"]:
command.append(
"--{}-sparse-profile=build/sparse-profiles/{}".format(
repo_config.prefix,
run["sparse-profile"],
)
)
if "cwd" in run:
run["cwd"] = path.normpath(run["cwd"].format(checkout=vcs_path))
elif "cwd" in run and "{checkout}" in run["cwd"]:
raise Exception(
"Found `{{checkout}}` interpolation in `cwd` for task {name} "
"but the task doesn't have a checkout: {cwd}".format(
cwd=run["cwd"], name=job.get("name", job.get("label"))
)
)
if "cwd" in run:
command.extend(("--task-cwd", run["cwd"]))
taskdesc["worker"].setdefault("env", {})["MOZ_SCM_LEVEL"] = config.params["level"]
worker_defaults = {
"cache-dotcache": False,
"checkout": True,
"sparse-profile": None,
"run-as-root": False,
}
def script_url(config, script):
# This logic is a bit of a hack, and should be replaced by something better.
# TASK_ID is used as a proxy for running in automation. In that case, we
# want to use the run-task/fetch-content corresponding to the taskgraph
# version we are running, and otherwise, we aren't going to run the task we
# generate, so the exact version doesn't matter.
# If we checked out the taskgraph code with run-task in the decision task,
# we can use TASKGRAPH_* to find the right version, which covers the
# existing use case.
if "TASK_ID" in os.environ:
if (
"TASKGRAPH_HEAD_REPOSITORY" not in os.environ
or "TASKGRAPH_HEAD_REV" not in os.environ
):
raise Exception(
"Must specify 'TASKGRAPH_HEAD_REPOSITORY' and 'TASKGRAPH_HEAD_REV' "
"to use run-task on generic-worker."
)
taskgraph_repo = os.environ.get(
"TASKGRAPH_HEAD_REPOSITORY", "https://hg.mozilla.org/ci/taskgraph"
)
taskgraph_rev = os.environ.get("TASKGRAPH_HEAD_REV", "default")
return "{}/raw-file/{}/src/taskgraph/run-task/{}".format(
taskgraph_repo, taskgraph_rev, script
)
@run_job_using(
"docker-worker", "run-task", schema=run_task_schema, defaults=worker_defaults
)
def docker_worker_run_task(config, job, taskdesc):
run = job["run"]
worker = taskdesc["worker"] = job["worker"]
command = ["/usr/local/bin/run-task"]
common_setup(config, job, taskdesc, command)
if run.get("cache-dotcache"):
worker["caches"].append(
{
"type": "persistent",
"name": "{project}-dotcache".format(**config.params),
"mount-point": "{workdir}/.cache".format(**run),
"skip-untrusted": True,
}
)
run_command = run["command"]
command_context = run.get("command-context")
if command_context:
run_command = run_command.format(**command_context)
# dict is for the case of `{'task-reference': str}`.
if isinstance(run_command, str) or isinstance(run_command, dict):
run_command = ["bash", "-cx", run_command]
command.append("--fetch-hgfingerprint")
if run["run-as-root"]:
command.extend(("--user", "root", "--group", "root"))
command.append("--")
command.extend(run_command)
worker["command"] = command
@run_job_using(
"generic-worker", "run-task", schema=run_task_schema, defaults=worker_defaults
)
def generic_worker_run_task(config, job, taskdesc):
run = job["run"]
worker = taskdesc["worker"] = job["worker"]
is_win = worker["os"] == "windows"
is_mac = worker["os"] == "macosx"
is_bitbar = worker["os"] == "linux-bitbar"
if is_win:
command = ["C:/mozilla-build/python3/python3.exe", "run-task"]
elif is_mac:
command = ["/tools/python36/bin/python3", "run-task"]
else:
command = ["./run-task"]
common_setup(config, job, taskdesc, command)
worker.setdefault("mounts", [])
if run.get("cache-dotcache"):
worker["mounts"].append(
{
"cache-name": "{project}-dotcache".format(**config.params),
"directory": "{workdir}/.cache".format(**run),
}
)
worker["mounts"].append(
{
"content": {
"url": script_url(config, "run-task"),
},
"file": "./run-task",
}
)
if worker.get("env", {}).get("MOZ_FETCHES"):
worker["mounts"].append(
{
"content": {
"url": script_url(config, "fetch-content"),
},
"file": "./fetch-content",
}
)
run_command = run["command"]
if isinstance(run_command, str):
if is_win:
run_command = f'"{run_command}"'
run_command = ["bash", "-cx", run_command]
command_context = run.get("command-context")
if command_context:
for i in range(len(run_command)):
run_command[i] = run_command[i].format(**command_context)
if run["run-as-root"]:
command.extend(("--user", "root", "--group", "root"))
command.append("--")
if is_bitbar:
# Use the bitbar wrapper script which sets up the device and adb
# environment variables
command.append("/builds/taskcluster/script.py")
command.extend(run_command)
if is_win:
worker["command"] = [" ".join(command)]
else:
worker["command"] = [
["chmod", "+x", "run-task"],
command,
]

View File

@ -0,0 +1,136 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
"""
Support for running toolchain-building jobs via dedicated scripts
"""
from taskgraph.util.schema import Schema
from voluptuous import Optional, Required, Any
from taskgraph.transforms.job import (
configure_taskdesc_for_run,
run_job_using,
)
from taskgraph.transforms.job.common import (
docker_worker_add_artifacts,
get_vcsdir_name,
)
from taskgraph.util.hash import hash_paths
import taskgraph
CACHE_TYPE = "toolchains.v3"
toolchain_run_schema = Schema(
{
Required("using"): "toolchain-script",
# The script (in taskcluster/scripts/misc) to run.
Required("script"): str,
# Arguments to pass to the script.
Optional("arguments"): [str],
# Sparse profile to give to checkout using `run-task`. If given,
# a filename in `build/sparse-profiles`. Defaults to
# "toolchain-build", i.e., to
# `build/sparse-profiles/toolchain-build`. If `None`, instructs
# `run-task` to not use a sparse profile at all.
Required("sparse-profile"): Any(str, None),
# Paths/patterns pointing to files that influence the outcome of a
# toolchain build.
Optional("resources"): [str],
# Path to the artifact produced by the toolchain job
Required("toolchain-artifact"): str,
Optional(
"toolchain-alias",
description="An alias that can be used instead of the real toolchain job name in "
"fetch stanzas for jobs.",
): str,
# Base work directory used to set up the task.
Required("workdir"): str,
}
)
def get_digest_data(config, run, taskdesc):
files = list(run.pop("resources", []))
# The script
files.append("taskcluster/scripts/toolchain/{}".format(run["script"]))
# Accumulate dependency hashes for index generation.
data = [hash_paths(config.graph_config.vcs_root, files)]
# If the task uses an in-tree docker image, we want it to influence
# the index path as well. Ideally, the content of the docker image itself
# should have an influence, but at the moment, we can't get that
# information here. So use the docker image name as a proxy. Not a lot of
# changes to docker images actually have an impact on the resulting
# toolchain artifact, so we'll just rely on such important changes to be
# accompanied with a docker image name change.
image = taskdesc["worker"].get("docker-image", {}).get("in-tree")
if image:
data.append(image)
# Likewise script arguments should influence the index.
args = run.get("arguments")
if args:
data.extend(args)
return data
toolchain_defaults = {
"sparse-profile": "toolchain-build",
}
@run_job_using(
"docker-worker",
"toolchain-script",
schema=toolchain_run_schema,
defaults=toolchain_defaults,
)
def docker_worker_toolchain(config, job, taskdesc):
run = job["run"]
worker = taskdesc["worker"] = job["worker"]
worker["chain-of-trust"] = True
srcdir = get_vcsdir_name(worker["os"])
# If the task doesn't have a docker-image, set a default
worker.setdefault("docker-image", {"in-tree": "toolchain-build"})
# Allow the job to specify where artifacts come from, but add
# public/build if it's not there already.
artifacts = worker.setdefault("artifacts", [])
if not any(artifact.get("name") == "public/build" for artifact in artifacts):
docker_worker_add_artifacts(config, job, taskdesc)
env = worker["env"]
env.update(
{
"MOZ_BUILD_DATE": config.params["moz_build_date"],
"MOZ_SCM_LEVEL": config.params["level"],
}
)
attributes = taskdesc.setdefault("attributes", {})
attributes["toolchain-artifact"] = run.pop("toolchain-artifact")
if "toolchain-alias" in run:
attributes["toolchain-alias"] = run.pop("toolchain-alias")
if not taskgraph.fast:
name = taskdesc["label"].replace(f"{config.kind}-", "", 1)
taskdesc["cache"] = {
"type": CACHE_TYPE,
"name": name,
"digest-data": get_digest_data(config, run, taskdesc),
}
run["using"] = "run-task"
run["cwd"] = "{checkout}/.."
run["command"] = [
"{}/taskcluster/scripts/toolchain/{}".format(srcdir, run.pop("script"))
] + run.pop("arguments", [])
configure_taskdesc_for_run(config, job, taskdesc, worker["implementation"])

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,87 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import os
import gzip
import stat
import tarfile
# 2016-01-01T00:00:00+0000
DEFAULT_MTIME = 1451606400
def create_tar_from_files(fp, files):
"""Create a tar file deterministically.
Receives a dict mapping names of files in the archive to local filesystem
paths or ``mozpack.files.BaseFile`` instances.
The files will be archived and written to the passed file handle opened
for writing.
Only regular files can be written.
FUTURE accept a filename argument (or create APIs to write files)
"""
with tarfile.open(name="", mode="w", fileobj=fp, dereference=True) as tf:
for archive_path, f in sorted(files.items()):
if isinstance(f, str):
mode = os.stat(f).st_mode
f = open(f, "rb")
else:
mode = 0o0644
ti = tarfile.TarInfo(archive_path)
ti.mode = mode
ti.type = tarfile.REGTYPE
if not ti.isreg():
raise ValueError("not a regular file: %s" % f)
# Disallow setuid and setgid bits. This is an arbitrary restriction.
# However, since we set uid/gid to root:root, setuid and setgid
# would be a glaring security hole if the archive were
# uncompressed as root.
if ti.mode & (stat.S_ISUID | stat.S_ISGID):
raise ValueError("cannot add file with setuid or setgid set: " "%s" % f)
# Set uid, gid, username, and group as deterministic values.
ti.uid = 0
ti.gid = 0
ti.uname = ""
ti.gname = ""
# Set mtime to a constant value.
ti.mtime = DEFAULT_MTIME
f.seek(0, 2)
ti.size = f.tell()
f.seek(0, 0)
# tarfile wants to pass a size argument to read(). So just
# wrap/buffer in a proper file object interface.
tf.addfile(ti, f)
def create_tar_gz_from_files(fp, files, filename=None, compresslevel=9):
"""Create a tar.gz file deterministically from files.
This is a glorified wrapper around ``create_tar_from_files`` that
adds gzip compression.
The passed file handle should be opened for writing in binary mode.
When the function returns, all data has been written to the handle.
"""
# Offset 3-7 in the gzip header contains an mtime. Pin it to a known
# value so output is deterministic.
gf = gzip.GzipFile(
filename=filename or "",
mode="wb",
fileobj=fp,
compresslevel=compresslevel,
mtime=DEFAULT_MTIME,
)
with gf:
create_tar_from_files(gf, files)

View File

@ -0,0 +1,84 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import re
def attrmatch(attributes, **kwargs):
"""Determine whether the given set of task attributes matches. The
conditions are given as keyword arguments, where each keyword names an
attribute. The keyword value can be a literal, a set, or a callable. A
literal must match the attribute exactly. Given a set, the attribute value
must be in the set. A callable is called with the attribute value. If an
attribute is specified as a keyword argument but not present in the
attributes, the result is False."""
for kwkey, kwval in kwargs.items():
if kwkey not in attributes:
return False
attval = attributes[kwkey]
if isinstance(kwval, set):
if attval not in kwval:
return False
elif callable(kwval):
if not kwval(attval):
return False
elif kwval != attributes[kwkey]:
return False
return True
def keymatch(attributes, target):
"""Determine if any keys in attributes are a match to target, then return
a list of matching values. First exact matches will be checked. Failing
that, regex matches and finally a default key.
"""
# exact match
if target in attributes:
return [attributes[target]]
# regular expression match
matches = [v for k, v in attributes.items() if re.match(k + "$", target)]
if matches:
return matches
# default
if "default" in attributes:
return [attributes["default"]]
return []
def _match_run_on(key, run_on):
"""
Determine whether the given parameter is included in the corresponding `run-on-attribute`.
"""
if "all" in run_on:
return True
return key in run_on
match_run_on_projects = _match_run_on
match_run_on_tasks_for = _match_run_on
def match_run_on_git_branches(git_branch, run_on_git_branches):
"""
Determine whether the given project is included in the `run-on-git-branches` parameter.
Allows 'all'.
"""
if "all" in run_on_git_branches:
return True
for expected_git_branch_pattern in run_on_git_branches:
if re.match(expected_git_branch_pattern, git_branch):
return True
return False
def sorted_unique_list(*args):
"""Join one or more lists, and return a sorted list of unique members"""
combined = set().union(*args)
return sorted(combined)

View File

@ -0,0 +1,87 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import hashlib
import time
TARGET_CACHE_INDEX = "{cache_prefix}.cache.level-{level}.{type}.{name}.hash.{digest}"
EXTRA_CACHE_INDEXES = [
"{cache_prefix}.cache.level-{level}.{type}.{name}.latest",
"{cache_prefix}.cache.level-{level}.{type}.{name}.pushdate.{build_date_long}",
]
def add_optimization(
config, taskdesc, cache_type, cache_name, digest=None, digest_data=None
):
"""
Allow the results of this task to be cached. This adds index routes to the
task so it can be looked up for future runs, and optimization hints so that
cached artifacts can be found. Exactly one of `digest` and `digest_data`
must be passed.
:param TransformConfig config: The configuration for the kind being transformed.
:param dict taskdesc: The description of the current task.
:param str cache_type: The type of task result being cached.
:param str cache_name: The name of the object being cached.
:param digest: A unique string indentifying this version of the artifacts
being generated. Typically this will be the hash of inputs to the task.
:type digest: bytes or None
:param digest_data: A list of bytes representing the inputs of this task.
They will be concatenated and hashed to create the digest for this
task.
:type digest_data: list of bytes or None
"""
if (digest is None) == (digest_data is None):
raise Exception("Must pass exactly one of `digest` and `digest_data`.")
if digest is None:
digest = hashlib.sha256("\n".join(digest_data).encode("utf-8")).hexdigest()
if "cached-task-prefix" in config.graph_config["taskgraph"]:
cache_prefix = config.graph_config["taskgraph"]["cached-task-prefix"]
else:
cache_prefix = config.graph_config["trust-domain"]
subs = {
"cache_prefix": cache_prefix,
"type": cache_type,
"name": cache_name,
"digest": digest,
}
# We'll try to find a cached version of the toolchain at levels above and
# including the current level, starting at the highest level.
# Chain-of-trust doesn't handle tasks not built on the tip of a
# pull-request, so don't look for level-1 tasks if building a pull-request.
index_routes = []
min_level = int(config.params["level"])
if config.params["tasks_for"] == "github-pull-request":
min_level = max(min_level, 3)
for level in reversed(range(min_level, 4)):
subs["level"] = level
index_routes.append(TARGET_CACHE_INDEX.format(**subs))
taskdesc["optimization"] = {"index-search": index_routes}
# ... and cache at the lowest level.
subs["level"] = config.params["level"]
taskdesc.setdefault("routes", []).append(
f"index.{TARGET_CACHE_INDEX.format(**subs)}"
)
# ... and add some extra routes for humans
subs["build_date_long"] = time.strftime(
"%Y.%m.%d.%Y%m%d%H%M%S", time.gmtime(config.params["build_date"])
)
taskdesc["routes"].extend(
[f"index.{route.format(**subs)}" for route in EXTRA_CACHE_INDEXES]
)
taskdesc["attributes"]["cached_task"] = {
"type": cache_type,
"name": cache_name,
"digest": digest,
}

View File

@ -0,0 +1,78 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
"""
Utilities for generating a decision task from :file:`.taskcluster.yml`.
"""
import jsone
import yaml
import os
import slugid
from .vcs import find_hg_revision_push_info
from .templates import merge
from .time import current_json_time
def make_decision_task(params, root, context, head_rev=None):
"""Generate a basic decision task, based on the root .taskcluster.yml"""
with open(os.path.join(root, ".taskcluster.yml"), "rb") as f:
taskcluster_yml = yaml.safe_load(f)
if not head_rev:
head_rev = params["head_rev"]
if params["repository_type"] == "hg":
pushlog = find_hg_revision_push_info(params["repository_url"], head_rev)
hg_push_context = {
"pushlog_id": pushlog["pushid"],
"pushdate": pushlog["pushdate"],
"owner": pushlog["user"],
}
else:
hg_push_context = {}
slugids = {}
def as_slugid(name):
# https://github.com/taskcluster/json-e/issues/164
name = name[0]
if name not in slugids:
slugids[name] = slugid.nice()
return slugids[name]
# provide a similar JSON-e context to what mozilla-taskcluster provides:
# https://docs.taskcluster.net/reference/integrations/mozilla-taskcluster/docs/taskcluster-yml
# but with a different tasks_for and an extra `cron` section
context = merge(
{
"repository": {
"url": params["repository_url"],
"project": params["project"],
"level": params["level"],
},
"push": merge(
{
"revision": params["head_rev"],
# remainder are fake values, but the decision task expects them anyway
"comment": " ",
},
hg_push_context,
),
"now": current_json_time(),
"as_slugid": as_slugid,
},
context,
)
rendered = jsone.render(taskcluster_yml, context)
if len(rendered["tasks"]) != 1:
raise Exception("Expected .taskcluster.yml to only produce one cron task")
task = rendered["tasks"][0]
task_id = task.pop("taskId")
return (task_id, task)

View File

@ -0,0 +1,342 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import hashlib
import io
import json
import os
import re
import requests_unixsocket
import sys
import urllib.parse
from .archive import create_tar_gz_from_files
from .memoize import memoize
IMAGE_DIR = os.path.join(".", "taskcluster", "docker")
from .yaml import load_yaml
def docker_url(path, **kwargs):
docker_socket = os.environ.get("DOCKER_SOCKET", "/var/run/docker.sock")
return urllib.parse.urlunparse(
(
"http+unix",
urllib.parse.quote(docker_socket, safe=""),
path,
"",
urllib.parse.urlencode(kwargs),
"",
)
)
def post_to_docker(tar, api_path, **kwargs):
"""POSTs a tar file to a given docker API path.
The tar argument can be anything that can be passed to requests.post()
as data (e.g. iterator or file object).
The extra keyword arguments are passed as arguments to the docker API.
"""
req = requests_unixsocket.Session().post(
docker_url(api_path, **kwargs),
data=tar,
stream=True,
headers={"Content-Type": "application/x-tar"},
)
if req.status_code != 200:
message = req.json().get("message")
if not message:
message = f"docker API returned HTTP code {req.status_code}"
raise Exception(message)
status_line = {}
buf = b""
for content in req.iter_content(chunk_size=None):
if not content:
continue
# Sometimes, a chunk of content is not a complete json, so we cumulate
# with leftovers from previous iterations.
buf += content
try:
data = json.loads(buf)
except Exception:
continue
buf = b""
# data is sometimes an empty dict.
if not data:
continue
# Mimick how docker itself presents the output. This code was tested
# with API version 1.18 and 1.26.
if "status" in data:
if "id" in data:
if sys.stderr.isatty():
total_lines = len(status_line)
line = status_line.setdefault(data["id"], total_lines)
n = total_lines - line
if n > 0:
# Move the cursor up n lines.
sys.stderr.write(f"\033[{n}A")
# Clear line and move the cursor to the beginning of it.
sys.stderr.write("\033[2K\r")
sys.stderr.write(
"{}: {} {}\n".format(
data["id"], data["status"], data.get("progress", "")
)
)
if n > 1:
# Move the cursor down n - 1 lines, which, considering
# the carriage return on the last write, gets us back
# where we started.
sys.stderr.write(f"\033[{n - 1}B")
else:
status = status_line.get(data["id"])
# Only print status changes.
if status != data["status"]:
sys.stderr.write("{}: {}\n".format(data["id"], data["status"]))
status_line[data["id"]] = data["status"]
else:
status_line = {}
sys.stderr.write("{}\n".format(data["status"]))
elif "stream" in data:
sys.stderr.write(data["stream"])
elif "aux" in data:
sys.stderr.write(repr(data["aux"]))
elif "error" in data:
sys.stderr.write("{}\n".format(data["error"]))
# Sadly, docker doesn't give more than a plain string for errors,
# so the best we can do to propagate the error code from the command
# that failed is to parse the error message...
errcode = 1
m = re.search(r"returned a non-zero code: (\d+)", data["error"])
if m:
errcode = int(m.group(1))
sys.exit(errcode)
else:
raise NotImplementedError(repr(data))
sys.stderr.flush()
def docker_image(name, by_tag=False):
"""
Resolve in-tree prebuilt docker image to ``<registry>/<repository>@sha256:<digest>``,
or ``<registry>/<repository>:<tag>`` if `by_tag` is `True`.
"""
try:
with open(os.path.join(IMAGE_DIR, name, "REGISTRY")) as f:
registry = f.read().strip()
except OSError:
with open(os.path.join(IMAGE_DIR, "REGISTRY")) as f:
registry = f.read().strip()
if not by_tag:
hashfile = os.path.join(IMAGE_DIR, name, "HASH")
try:
with open(hashfile) as f:
return f"{registry}/{name}@{f.read().strip()}"
except OSError:
raise Exception(f"Failed to read HASH file {hashfile}")
try:
with open(os.path.join(IMAGE_DIR, name, "VERSION")) as f:
tag = f.read().strip()
except OSError:
tag = "latest"
return f"{registry}/{name}:{tag}"
class VoidWriter:
"""A file object with write capabilities that does nothing with the written
data."""
def write(self, buf):
pass
def generate_context_hash(topsrcdir, image_path, args=None):
"""Generates a sha256 hash for context directory used to build an image."""
return stream_context_tar(topsrcdir, image_path, VoidWriter(), args=args)
class HashingWriter:
"""A file object with write capabilities that hashes the written data at
the same time it passes down to a real file object."""
def __init__(self, writer):
self._hash = hashlib.sha256()
self._writer = writer
def write(self, buf):
self._hash.update(buf)
self._writer.write(buf)
def hexdigest(self):
return self._hash.hexdigest()
def create_context_tar(topsrcdir, context_dir, out_path, args=None):
"""Create a context tarball.
A directory ``context_dir`` containing a Dockerfile will be assembled into
a gzipped tar file at ``out_path``.
We also scan the source Dockerfile for special syntax that influences
context generation.
If a line in the Dockerfile has the form ``# %include <path>``,
the relative path specified on that line will be matched against
files in the source repository and added to the context under the
path ``topsrcdir/``. If an entry is a directory, we add all files
under that directory.
If a line in the Dockerfile has the form ``# %ARG <name>``, occurrences of
the string ``$<name>`` in subsequent lines are replaced with the value
found in the ``args`` argument. Exception: this doesn't apply to VOLUME
definitions.
Returns the SHA-256 hex digest of the created archive.
"""
with open(out_path, "wb") as fh:
return stream_context_tar(
topsrcdir,
context_dir,
fh,
image_name=os.path.basename(out_path),
args=args,
)
RUN_TASK_ROOT = os.path.join(os.path.dirname(os.path.dirname(__file__)), "run-task")
RUN_TASK_FILES = {
f"run-task/{path}": os.path.join(RUN_TASK_ROOT, path)
for path in [
"run-task",
"fetch-content",
"hgrc",
"robustcheckout.py",
]
}
RUN_TASK_SNIPPET = [
"COPY run-task/run-task /usr/local/bin/run-task\n",
"COPY run-task/fetch-content /usr/local/bin/fetch-content\n",
"COPY run-task/robustcheckout.py /usr/local/mercurial/robustcheckout.py\n"
"COPY run-task/hgrc /etc/mercurial/hgrc.d/mozilla.rc\n",
]
def stream_context_tar(topsrcdir, context_dir, out_file, image_name=None, args=None):
"""Like create_context_tar, but streams the tar file to the `out_file` file
object."""
archive_files = {}
replace = []
content = []
topsrcdir = os.path.abspath(topsrcdir)
context_dir = os.path.join(topsrcdir, context_dir)
for root, dirs, files in os.walk(context_dir):
for f in files:
source_path = os.path.join(root, f)
archive_path = source_path[len(context_dir) + 1 :]
archive_files[archive_path] = open(source_path, "rb")
# Parse Dockerfile for special syntax of extra files to include.
content = []
with open(os.path.join(context_dir, "Dockerfile"), "r") as fh:
for line in fh:
if line.startswith("# %ARG"):
p = line[len("# %ARG ") :].strip()
if not args or p not in args:
raise Exception(f"missing argument: {p}")
replace.append((re.compile(fr"\${p}\b"), args[p]))
continue
for regexp, s in replace:
line = re.sub(regexp, s, line)
content.append(line)
if not line.startswith("# %include"):
continue
if line.strip() == "# %include-run-task":
content.extend(RUN_TASK_SNIPPET)
archive_files.update(RUN_TASK_FILES)
continue
p = line[len("# %include ") :].strip()
if os.path.isabs(p):
raise Exception("extra include path cannot be absolute: %s" % p)
fs_path = os.path.normpath(os.path.join(topsrcdir, p))
# Check for filesystem traversal exploits.
if not fs_path.startswith(topsrcdir):
raise Exception("extra include path outside topsrcdir: %s" % p)
if not os.path.exists(fs_path):
raise Exception("extra include path does not exist: %s" % p)
if os.path.isdir(fs_path):
for root, dirs, files in os.walk(fs_path):
for f in files:
source_path = os.path.join(root, f)
rel = source_path[len(fs_path) + 1 :]
archive_path = os.path.join("topsrcdir", p, rel)
archive_files[archive_path] = source_path
else:
archive_path = os.path.join("topsrcdir", p)
archive_files[archive_path] = fs_path
archive_files["Dockerfile"] = io.BytesIO("".join(content).encode("utf-8"))
writer = HashingWriter(out_file)
create_tar_gz_from_files(writer, archive_files, image_name)
return writer.hexdigest()
@memoize
def image_paths():
"""Return a map of image name to paths containing their Dockerfile."""
config = load_yaml("taskcluster", "ci", "docker-image", "kind.yml")
return {
k: os.path.join(IMAGE_DIR, v.get("definition", k))
for k, v in config["jobs"].items()
}
def image_path(name):
paths = image_paths()
if name in paths:
return paths[name]
return os.path.join(IMAGE_DIR, name)
@memoize
def parse_volumes(image):
"""Parse VOLUME entries from a Dockerfile for an image."""
volumes = set()
path = image_path(image)
with open(os.path.join(path, "Dockerfile"), "rb") as fh:
for line in fh:
line = line.strip()
# We assume VOLUME definitions don't use %ARGS.
if not line.startswith(b"VOLUME "):
continue
v = line.split(None, 1)[1]
if v.startswith(b"["):
raise ValueError(
"cannot parse array syntax for VOLUME; "
"convert to multiple entries"
)
volumes |= {volume.decode("utf-8") for volume in v.split()}
return volumes

View File

@ -0,0 +1,54 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import hashlib
from pathlib import Path
from taskgraph.util.memoize import memoize
from taskgraph.util import path as mozpath
@memoize
def hash_path(path):
"""Hash a single file.
Returns the SHA-256 hash in hex form.
"""
with open(path, "rb") as fh:
return hashlib.sha256(fh.read()).hexdigest()
def _find_files(base_path):
for path in Path(base_path).rglob("*"):
if path.is_file():
yield str(path)
def hash_paths(base_path, patterns):
"""
Give a list of path patterns, return a digest of the contents of all
the corresponding files, similarly to git tree objects or mercurial
manifests.
Each file is hashed. The list of all hashes and file paths is then
itself hashed to produce the result.
"""
h = hashlib.sha256()
found = set()
for pattern in patterns:
files = _find_files(base_path)
matches = [path for path in files if mozpath.match(path, pattern)]
if matches:
found.update(matches)
else:
raise Exception("%s did not match anything" % pattern)
for path in sorted(found):
h.update(
"{} {}\n".format(
hash_path(mozpath.abspath(mozpath.join(base_path, path))),
mozpath.normsep(path),
).encode("utf-8")
)
return h.hexdigest()

View File

@ -0,0 +1,97 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from .attributes import keymatch
def evaluate_keyed_by(
value, item_name, attributes, defer=None, enforce_single_match=True
):
"""
For values which can either accept a literal value, or be keyed by some
attributes, perform that lookup and return the result.
For example, given item::
by-test-platform:
macosx-10.11/debug: 13
win.*: 6
default: 12
a call to `evaluate_keyed_by(item, 'thing-name', {'test-platform': 'linux96')`
would return `12`.
Items can be nested as deeply as desired::
by-test-platform:
win.*:
by-project:
ash: ..
cedar: ..
linux: 13
default: 12
Args:
value (str): Name of the value to perform evaluation on.
item_name (str): Used to generate useful error messages.
attributes (dict): Dictionary of attributes used to lookup 'by-<key>' with.
defer (list):
Allows evaluating a by-* entry at a later time. In the example
above it's possible that the project attribute hasn't been set yet,
in which case we'd want to stop before resolving that subkey and
then call this function again later. This can be accomplished by
setting `defer=["project"]` in this example.
enforce_single_match (bool):
If True (default), each task may only match a single arm of the
evaluation.
"""
while True:
if not isinstance(value, dict) or len(value) != 1:
return value
value_key = next(iter(value))
if not value_key.startswith("by-"):
return value
keyed_by = value_key[3:] # strip off 'by-' prefix
if defer and keyed_by in defer:
return value
key = attributes.get(keyed_by)
alternatives = next(iter(value.values()))
if len(alternatives) == 1 and "default" in alternatives:
# Error out when only 'default' is specified as only alternatives,
# because we don't need to by-{keyed_by} there.
raise Exception(
"Keyed-by '{}' unnecessary with only value 'default' "
"found, when determining item {}".format(keyed_by, item_name)
)
if key is None:
if "default" in alternatives:
value = alternatives["default"]
continue
else:
raise Exception(
"No attribute {} and no value for 'default' found "
"while determining item {}".format(keyed_by, item_name)
)
matches = keymatch(alternatives, key)
if enforce_single_match and len(matches) > 1:
raise Exception(
"Multiple matching values for {} {!r} found while "
"determining item {}".format(keyed_by, key, item_name)
)
elif matches:
value = matches[0]
continue
raise Exception(
"No {} matching {!r} nor 'default' found while determining item {}".format(
keyed_by, key, item_name
)
)

View File

@ -0,0 +1,40 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
# Imported from
# https://searchfox.org/mozilla-central/rev/c3ebaf6de2d481c262c04bb9657eaf76bf47e2ac/python/mozbuild/mozbuild/util.py#923-949
import functools
class memoize(dict):
"""A decorator to memoize the results of function calls depending
on its arguments.
Both functions and instance methods are handled, although in the
instance method case, the results are cache in the instance itself.
"""
def __init__(self, func):
self.func = func
functools.update_wrapper(self, func)
def __call__(self, *args):
if args not in self:
self[args] = self.func(*args)
return self[args]
def method_call(self, instance, *args):
name = "_%s" % self.func.__name__
if not hasattr(instance, name):
setattr(instance, name, {})
cache = getattr(instance, name)
if args not in cache:
cache[args] = self.func(instance, *args)
return cache[args]
def __get__(self, instance, cls):
return functools.update_wrapper(
functools.partial(self.method_call, instance), self.func
)

View File

@ -0,0 +1,98 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import re
from taskgraph.util.time import json_time_from_now
from taskgraph.util.taskcluster import get_artifact_url
TASK_REFERENCE_PATTERN = re.compile("<([^>]+)>")
ARTIFACT_REFERENCE_PATTERN = re.compile("<([^/]+)/([^>]+)>")
def _recurse(val, param_fns):
def recurse(val):
if isinstance(val, list):
return [recurse(v) for v in val]
elif isinstance(val, dict):
if len(val) == 1:
for param_key, param_fn in param_fns.items():
if set(val.keys()) == {param_key}:
return param_fn(val[param_key])
return {k: recurse(v) for k, v in val.items()}
else:
return val
return recurse(val)
def resolve_timestamps(now, task_def):
"""Resolve all instances of `{'relative-datestamp': '..'}` in the given task definition"""
return _recurse(
task_def,
{
"relative-datestamp": lambda v: json_time_from_now(v, now),
},
)
def resolve_task_references(label, task_def, task_id, decision_task_id, dependencies):
"""Resolve all instances of
{'task-reference': '..<..>..'}
and
{'artifact-reference`: '..<dependency/artifact/path>..'}
in the given task definition, using the given dependencies"""
def task_reference(val):
def repl(match):
key = match.group(1)
if key == "self":
return task_id
elif key == "decision":
return decision_task_id
try:
return dependencies[key]
except KeyError:
# handle escaping '<'
if key == "<":
return key
raise KeyError(f"task '{label}' has no dependency named '{key}'")
return TASK_REFERENCE_PATTERN.sub(repl, val)
def artifact_reference(val):
def repl(match):
dependency, artifact_name = match.group(1, 2)
if dependency == "self":
raise KeyError(f"task '{label}' can't reference artifacts of self")
elif dependency == "decision":
task_id = decision_task_id
else:
try:
task_id = dependencies[dependency]
except KeyError:
raise KeyError(
"task '{}' has no dependency named '{}'".format(
label, dependency
)
)
assert artifact_name.startswith(
"public/"
), "artifact-reference only supports public artifacts, not `{}`".format(
artifact_name
)
return get_artifact_url(task_id, artifact_name)
return ARTIFACT_REFERENCE_PATTERN.sub(repl, val)
return _recurse(
task_def,
{
"task-reference": task_reference,
"artifact-reference": artifact_reference,
},
)

View File

@ -0,0 +1,155 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
"""
Like :py:mod:`os.path`, with a reduced set of functions, and with normalized path
separators (always use forward slashes).
Also contains a few additional utilities not found in :py:mod:`os.path`.
"""
# Imported from
# https://searchfox.org/mozilla-central/rev/c3ebaf6de2d481c262c04bb9657eaf76bf47e2ac/python/mozbuild/mozpack/path.py
import posixpath
import os
import re
def normsep(path):
"""
Normalize path separators, by using forward slashes instead of whatever
:py:const:`os.sep` is.
"""
if os.sep != "/":
path = path.replace(os.sep, "/")
if os.altsep and os.altsep != "/":
path = path.replace(os.altsep, "/")
return path
def relpath(path, start):
rel = normsep(os.path.relpath(path, start))
return "" if rel == "." else rel
def realpath(path):
return normsep(os.path.realpath(path))
def abspath(path):
return normsep(os.path.abspath(path))
def join(*paths):
return normsep(os.path.join(*paths))
def normpath(path):
return posixpath.normpath(normsep(path))
def dirname(path):
return posixpath.dirname(normsep(path))
def commonprefix(paths):
return posixpath.commonprefix([normsep(path) for path in paths])
def basename(path):
return os.path.basename(path)
def splitext(path):
return posixpath.splitext(normsep(path))
def split(path):
"""
Return the normalized path as a list of its components.
``split('foo/bar/baz')`` returns ``['foo', 'bar', 'baz']``
"""
return normsep(path).split("/")
def basedir(path, bases):
"""
Given a list of directories (`bases`), return which one contains the given
path. If several matches are found, the deepest base directory is returned.
``basedir('foo/bar/baz', ['foo', 'baz', 'foo/bar'])`` returns ``'foo/bar'``
(`'foo'` and `'foo/bar'` both match, but `'foo/bar'` is the deepest match)
"""
path = normsep(path)
bases = [normsep(b) for b in bases]
if path in bases:
return path
for b in sorted(bases, reverse=True):
if b == "" or path.startswith(b + "/"):
return b
re_cache = {}
# Python versions < 3.7 return r'\/' for re.escape('/').
if re.escape("/") == "/":
MATCH_STAR_STAR_RE = re.compile(r"(^|/)\\\*\\\*/")
MATCH_STAR_STAR_END_RE = re.compile(r"(^|/)\\\*\\\*$")
else:
MATCH_STAR_STAR_RE = re.compile(r"(^|\\\/)\\\*\\\*\\\/")
MATCH_STAR_STAR_END_RE = re.compile(r"(^|\\\/)\\\*\\\*$")
def match(path, pattern):
"""
Return whether the given path matches the given pattern.
An asterisk can be used to match any string, including the null string, in
one part of the path:
``foo`` matches ``*``, ``f*`` or ``fo*o``
However, an asterisk matching a subdirectory may not match the null string:
``foo/bar`` does *not* match ``foo/*/bar``
If the pattern matches one of the ancestor directories of the path, the
patch is considered matching:
``foo/bar`` matches ``foo``
Two adjacent asterisks can be used to match files and zero or more
directories and subdirectories.
``foo/bar`` matches ``foo/**/bar``, or ``**/bar``
"""
if not pattern:
return True
if pattern not in re_cache:
p = re.escape(pattern)
p = MATCH_STAR_STAR_RE.sub(r"\1(?:.+/)?", p)
p = MATCH_STAR_STAR_END_RE.sub(r"(?:\1.+)?", p)
p = p.replace(r"\*", "[^/]*") + "(?:/.*)?$"
re_cache[pattern] = re.compile(p)
return re_cache[pattern].match(path) is not None
def rebase(oldbase, base, relativepath):
"""
Return `relativepath` relative to `base` instead of `oldbase`.
"""
if base == oldbase:
return relativepath
if len(base) < len(oldbase):
assert basedir(oldbase, [base]) == base
relbase = relpath(oldbase, base)
result = join(relbase, relativepath)
else:
assert basedir(base, [oldbase]) == oldbase
relbase = relpath(base, oldbase)
result = relpath(relativepath, relbase)
result = normpath(result)
if relativepath.endswith("/") and not result.endswith("/"):
result += "/"
return result

View File

@ -0,0 +1,24 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
def find_object(path):
"""
Find a Python object given a path of the form <modulepath>:<objectpath>.
Conceptually equivalent to
def find_object(modulepath, objectpath):
import <modulepath> as mod
return mod.<objectpath>
"""
if path.count(":") != 1:
raise ValueError(f'python path {path!r} does not have the form "module:object"')
modulepath, objectpath = path.split(":")
obj = __import__(modulepath)
for a in modulepath.split(".")[1:]:
obj = getattr(obj, a)
for a in objectpath.split("."):
obj = getattr(obj, a)
return obj

View File

@ -0,0 +1,22 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
# Imported from
# https://searchfox.org/mozilla-central/rev/c3ebaf6de2d481c262c04bb9657eaf76bf47e2ac/python/mozbuild/mozbuild/util.py#115-127
class ReadOnlyDict(dict):
"""A read-only dictionary."""
def __init__(self, *args, **kwargs):
dict.__init__(self, *args, **kwargs)
def __delitem__(self, key):
raise Exception("Object does not support deletion.")
def __setitem__(self, key, value):
raise Exception("Object does not support assignment.")
def update(self, *args, **kwargs):
raise Exception("Object does not support update.")

View File

@ -0,0 +1,215 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import re
import pprint
import collections
import voluptuous
import taskgraph
from .keyed_by import evaluate_keyed_by
def validate_schema(schema, obj, msg_prefix):
"""
Validate that object satisfies schema. If not, generate a useful exception
beginning with msg_prefix.
"""
if taskgraph.fast:
return
try:
schema(obj)
except voluptuous.MultipleInvalid as exc:
msg = [msg_prefix]
for error in exc.errors:
msg.append(str(error))
raise Exception("\n".join(msg) + "\n" + pprint.pformat(obj))
def optionally_keyed_by(*arguments):
"""
Mark a schema value as optionally keyed by any of a number of fields. The
schema is the last argument, and the remaining fields are taken to be the
field names. For example:
'some-value': optionally_keyed_by(
'test-platform', 'build-platform',
Any('a', 'b', 'c'))
The resulting schema will allow nesting of `by-test-platform` and
`by-build-platform` in either order.
"""
schema = arguments[-1]
fields = arguments[:-1]
# build the nestable schema by generating schema = Any(schema,
# by-fld1, by-fld2, by-fld3) once for each field. So we don't allow
# infinite nesting, but one level of nesting for each field.
for _ in arguments:
options = [schema]
for field in fields:
options.append({"by-" + field: {str: schema}})
schema = voluptuous.Any(*options)
return schema
def resolve_keyed_by(item, field, item_name, **extra_values):
"""
For values which can either accept a literal value, or be keyed by some
other attribute of the item, perform that lookup and replacement in-place
(modifying `item` directly). The field is specified using dotted notation
to traverse dictionaries.
For example, given item::
job:
test-platform: linux128
chunks:
by-test-platform:
macosx-10.11/debug: 13
win.*: 6
default: 12
a call to `resolve_keyed_by(item, 'job.chunks', item['thing-name'])`
would mutate item in-place to::
job:
test-platform: linux128
chunks: 12
The `item_name` parameter is used to generate useful error messages.
If extra_values are supplied, they represent additional values available
for reference from by-<field>.
Items can be nested as deeply as the schema will allow::
chunks:
by-test-platform:
win.*:
by-project:
ash: ..
cedar: ..
linux: 13
default: 12
"""
# find the field, returning the item unchanged if anything goes wrong
container, subfield = item, field
while "." in subfield:
f, subfield = subfield.split(".", 1)
if f not in container:
return item
container = container[f]
if not isinstance(container, dict):
return item
if subfield not in container:
return item
container[subfield] = evaluate_keyed_by(
value=container[subfield],
item_name=f"`{field}` in `{item_name}`",
attributes=dict(item, **extra_values),
)
return item
# Schemas for YAML files should use dashed identifiers by default. If there are
# components of the schema for which there is a good reason to use another format,
# they can be whitelisted here.
WHITELISTED_SCHEMA_IDENTIFIERS = [
# upstream-artifacts and artifact-map are handed directly to scriptWorker,
# which expects interCaps
lambda path: any(
exc in path for exc in ("['upstream-artifacts']", "['artifact-map']")
)
]
def check_schema(schema):
identifier_re = re.compile("^[a-z][a-z0-9-]*$")
def whitelisted(path):
return any(f(path) for f in WHITELISTED_SCHEMA_IDENTIFIERS)
def iter(path, sch):
def check_identifier(path, k):
if k in (str,) or k in (str, voluptuous.Extra):
pass
elif isinstance(k, voluptuous.NotIn):
pass
elif isinstance(k, str):
if not identifier_re.match(k) and not whitelisted(path):
raise RuntimeError(
"YAML schemas should use dashed lower-case identifiers, "
"not {!r} @ {}".format(k, path)
)
elif isinstance(k, (voluptuous.Optional, voluptuous.Required)):
check_identifier(path, k.schema)
elif isinstance(k, (voluptuous.Any, voluptuous.All)):
for v in k.validators:
check_identifier(path, v)
elif not whitelisted(path):
raise RuntimeError(
"Unexpected type in YAML schema: {} @ {}".format(
type(k).__name__, path
)
)
if isinstance(sch, collections.Mapping):
for k, v in sch.items():
child = f"{path}[{k!r}]"
check_identifier(child, k)
iter(child, v)
elif isinstance(sch, (list, tuple)):
for i, v in enumerate(sch):
iter(f"{path}[{i}]", v)
elif isinstance(sch, voluptuous.Any):
for v in sch.validators:
iter(path, v)
iter("schema", schema.schema)
class Schema(voluptuous.Schema):
"""
Operates identically to voluptuous.Schema, but applying some taskgraph-specific checks
in the process.
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
check_schema(self)
def extend(self, *args, **kwargs):
schema = super().extend(*args, **kwargs)
check_schema(schema)
# We want twice extend schema to be checked too.
schema.__class__ = Schema
return schema
def __getitem__(self, item):
return self.schema[item]
OptimizationSchema = voluptuous.Any(
# always run this task (default)
None,
# search the index for the given index namespaces, and replace this task if found
# the search occurs in order, with the first match winning
{"index-search": [str]},
# skip this task if none of the given file patterns match
{"skip-unless-changed": [str]},
)
# shortcut for a string where task references are allowed
taskref_or_string = voluptuous.Any(
str,
{voluptuous.Required("task-reference"): str},
{voluptuous.Required("artifact-reference"): str},
)

View File

@ -0,0 +1,314 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import os
import datetime
import functools
import requests
import logging
import taskcluster_urls as liburls
from requests.packages.urllib3.util.retry import Retry
from taskgraph.task import Task
from taskgraph.util.memoize import memoize
from taskgraph.util import yaml
logger = logging.getLogger(__name__)
# this is set to true for `mach taskgraph action-callback --test`
testing = False
# Default rootUrl to use if none is given in the environment; this should point
# to the production Taskcluster deployment used for CI.
PRODUCTION_TASKCLUSTER_ROOT_URL = "https://taskcluster.net"
# the maximum number of parallel Taskcluster API calls to make
CONCURRENCY = 50
# the maximum number of parallel Taskcluster API calls to make
CONCURRENCY = 50
@memoize
def get_root_url(use_proxy):
"""Get the current TASKCLUSTER_ROOT_URL. When running in a task, this must
come from $TASKCLUSTER_ROOT_URL; when run on the command line, we apply a
defualt that points to the production deployment of Taskcluster. If use_proxy
is set, this attempts to get TASKCLUSTER_PROXY_URL instead, failing if it
is not set."""
if use_proxy:
try:
return os.environ["TASKCLUSTER_PROXY_URL"]
except KeyError:
if "TASK_ID" not in os.environ:
raise RuntimeError(
"taskcluster-proxy is not available when not executing in a task"
)
else:
raise RuntimeError("taskcluster-proxy is not enabled for this task")
if "TASKCLUSTER_ROOT_URL" not in os.environ:
if "TASK_ID" in os.environ:
raise RuntimeError(
"$TASKCLUSTER_ROOT_URL must be set when running in a task"
)
else:
logger.debug("Using default TASKCLUSTER_ROOT_URL (Firefox CI production)")
return PRODUCTION_TASKCLUSTER_ROOT_URL
logger.debug(
"Running in Taskcluster instance {}{}".format(
os.environ["TASKCLUSTER_ROOT_URL"],
" with taskcluster-proxy" if "TASKCLUSTER_PROXY_URL" in os.environ else "",
)
)
return os.environ["TASKCLUSTER_ROOT_URL"]
@memoize
def get_session():
session = requests.Session()
retry = Retry(total=5, backoff_factor=0.1, status_forcelist=[500, 502, 503, 504])
# Default HTTPAdapter uses 10 connections. Mount custom adapter to increase
# that limit. Connections are established as needed, so using a large value
# should not negatively impact performance.
http_adapter = requests.adapters.HTTPAdapter(
pool_connections=CONCURRENCY, pool_maxsize=CONCURRENCY, max_retries=retry
)
session.mount("https://", http_adapter)
session.mount("http://", http_adapter)
return session
def _do_request(url, force_get=False, **kwargs):
session = get_session()
if kwargs and not force_get:
response = session.post(url, **kwargs)
else:
response = session.get(url, stream=True, **kwargs)
if response.status_code >= 400:
# Consume content before raise_for_status, so that the connection can be
# reused.
response.content
response.raise_for_status()
return response
def _handle_artifact(path, response):
if path.endswith(".json"):
return response.json()
if path.endswith(".yml"):
return yaml.load_stream(response.text)
response.raw.read = functools.partial(response.raw.read, decode_content=True)
return response.raw
def get_artifact_url(task_id, path, use_proxy=False):
artifact_tmpl = liburls.api(
get_root_url(False), "queue", "v1", "task/{}/artifacts/{}"
)
data = artifact_tmpl.format(task_id, path)
if use_proxy:
# Until Bug 1405889 is deployed, we can't download directly
# from the taskcluster-proxy. Work around by using the /bewit
# endpoint instead.
# The bewit URL is the body of a 303 redirect, which we don't
# want to follow (which fetches a potentially large resource).
response = _do_request(
os.environ["TASKCLUSTER_PROXY_URL"] + "/bewit",
data=data,
allow_redirects=False,
)
return response.text
return data
def get_artifact(task_id, path, use_proxy=False):
"""
Returns the artifact with the given path for the given task id.
If the path ends with ".json" or ".yml", the content is deserialized as,
respectively, json or yaml, and the corresponding python data (usually
dict) is returned.
For other types of content, a file-like object is returned.
"""
response = _do_request(get_artifact_url(task_id, path, use_proxy))
return _handle_artifact(path, response)
def list_artifacts(task_id, use_proxy=False):
response = _do_request(get_artifact_url(task_id, "", use_proxy).rstrip("/"))
return response.json()["artifacts"]
def get_artifact_prefix(task):
prefix = None
if isinstance(task, dict):
prefix = task.get("attributes", {}).get("artifact_prefix")
elif isinstance(task, Task):
prefix = task.attributes.get("artifact_prefix")
else:
raise Exception(f"Can't find artifact-prefix of non-task: {task}")
return prefix or "public/build"
def get_artifact_path(task, path):
return f"{get_artifact_prefix(task)}/{path}"
def get_index_url(index_path, use_proxy=False, multiple=False):
index_tmpl = liburls.api(get_root_url(use_proxy), "index", "v1", "task{}/{}")
return index_tmpl.format("s" if multiple else "", index_path)
def find_task_id(index_path, use_proxy=False):
try:
response = _do_request(get_index_url(index_path, use_proxy))
except requests.exceptions.HTTPError as e:
if e.response.status_code == 404:
raise KeyError(f"index path {index_path} not found")
raise
return response.json()["taskId"]
def get_artifact_from_index(index_path, artifact_path, use_proxy=False):
full_path = index_path + "/artifacts/" + artifact_path
response = _do_request(get_index_url(full_path, use_proxy))
return _handle_artifact(full_path, response)
def list_tasks(index_path, use_proxy=False):
"""
Returns a list of task_ids where each task_id is indexed under a path
in the index. Results are sorted by expiration date from oldest to newest.
"""
results = []
data = {}
while True:
response = _do_request(
get_index_url(index_path, use_proxy, multiple=True), json=data
)
response = response.json()
results += response["tasks"]
if response.get("continuationToken"):
data = {"continuationToken": response.get("continuationToken")}
else:
break
# We can sort on expires because in the general case
# all of these tasks should be created with the same expires time so they end up in
# order from earliest to latest action. If more correctness is needed, consider
# fetching each task and sorting on the created date.
results.sort(key=lambda t: parse_time(t["expires"]))
return [t["taskId"] for t in results]
def parse_time(timestamp):
"""Turn a "JSON timestamp" as used in TC APIs into a datetime"""
return datetime.datetime.strptime(timestamp, "%Y-%m-%dT%H:%M:%S.%fZ")
def get_task_url(task_id, use_proxy=False):
task_tmpl = liburls.api(get_root_url(use_proxy), "queue", "v1", "task/{}")
return task_tmpl.format(task_id)
def get_task_definition(task_id, use_proxy=False):
response = _do_request(get_task_url(task_id, use_proxy))
return response.json()
def cancel_task(task_id, use_proxy=False):
"""Cancels a task given a task_id. In testing mode, just logs that it would
have cancelled."""
if testing:
logger.info(f"Would have cancelled {task_id}.")
else:
_do_request(get_task_url(task_id, use_proxy) + "/cancel", json={})
def status_task(task_id, use_proxy=False):
"""Gets the status of a task given a task_id. In testing mode, just logs that it would
have retrieved status."""
if testing:
logger.info(f"Would have gotten status for {task_id}.")
else:
resp = _do_request(get_task_url(task_id, use_proxy) + "/status")
status = resp.json().get("status", {}).get("state") or "unknown"
return status
def rerun_task(task_id):
"""Reruns a task given a task_id. In testing mode, just logs that it would
have reran."""
if testing:
logger.info(f"Would have rerun {task_id}.")
else:
_do_request(get_task_url(task_id, use_proxy=True) + "/rerun", json={})
def get_current_scopes():
"""Get the current scopes. This only makes sense in a task with the Taskcluster
proxy enabled, where it returns the actual scopes accorded to the task."""
auth_url = liburls.api(get_root_url(True), "auth", "v1", "scopes/current")
resp = _do_request(auth_url)
return resp.json().get("scopes", [])
def get_purge_cache_url(provisioner_id, worker_type, use_proxy=False):
url_tmpl = liburls.api(
get_root_url(use_proxy), "purge-cache", "v1", "purge-cache/{}/{}"
)
return url_tmpl.format(provisioner_id, worker_type)
def purge_cache(provisioner_id, worker_type, cache_name, use_proxy=False):
"""Requests a cache purge from the purge-caches service."""
if testing:
logger.info(
"Would have purged {}/{}/{}.".format(
provisioner_id, worker_type, cache_name
)
)
else:
logger.info(f"Purging {provisioner_id}/{worker_type}/{cache_name}.")
purge_cache_url = get_purge_cache_url(provisioner_id, worker_type, use_proxy)
_do_request(purge_cache_url, json={"cacheName": cache_name})
def send_email(address, subject, content, link, use_proxy=False):
"""Sends an email using the notify service"""
logger.info(f"Sending email to {address}.")
url = liburls.api(get_root_url(use_proxy), "notify", "v1", "email")
_do_request(
url,
json={
"address": address,
"subject": subject,
"content": content,
"link": link,
},
)
def list_task_group_incomplete_tasks(task_group_id):
"""Generate the incomplete tasks in a task group"""
params = {}
while True:
url = liburls.api(
get_root_url(False),
"queue",
"v1",
f"task-group/{task_group_id}/list",
)
resp = _do_request(url, force_get=True, params=params).json()
for task in [t["status"] for t in resp["tasks"]]:
if task["state"] in ["running", "pending", "unscheduled"]:
yield task["taskId"]
if resp.get("continuationToken"):
params = {"continuationToken": resp.get("continuationToken")}
else:
break

View File

@ -0,0 +1,57 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
"""
Tools for interacting with existing taskgraphs.
"""
from taskgraph.util.taskcluster import (
find_task_id,
get_artifact,
)
def find_decision_task(parameters, graph_config):
"""Given the parameters for this action, find the taskId of the decision
task"""
if parameters.get("repository_type", "hg") == "hg":
return find_task_id(
"{}.v2.{}.pushlog-id.{}.decision".format(
graph_config["trust-domain"],
parameters["project"],
parameters["pushlog_id"],
)
)
elif parameters["repository_type"] == "git":
return find_task_id(
"{}.v2.{}.revision.{}.taskgraph.decision".format(
graph_config["trust-domain"],
parameters["project"],
parameters["head_rev"],
)
)
else:
raise Exception(
"Unknown repository_type {}!".format(parameters["repository_type"])
)
def find_existing_tasks_from_previous_kinds(
full_task_graph, previous_graph_ids, rebuild_kinds
):
"""Given a list of previous decision/action taskIds and kinds to ignore
from the previous graphs, return a dictionary of labels-to-taskids to use
as ``existing_tasks`` in the optimization step."""
existing_tasks = {}
for previous_graph_id in previous_graph_ids:
label_to_taskid = get_artifact(previous_graph_id, "public/label-to-taskid.json")
kind_labels = {
t.label
for t in full_task_graph.tasks.values()
if t.attributes["kind"] not in rebuild_kinds
}
for label in set(label_to_taskid.keys()).intersection(kind_labels):
existing_tasks[label] = label_to_taskid[label]
return existing_tasks

View File

@ -0,0 +1,50 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import copy
def merge_to(source, dest):
"""
Merge dict and arrays (override scalar values)
Keys from source override keys from dest, and elements from lists in source
are appended to lists in dest.
:param dict source: to copy from
:param dict dest: to copy to (modified in place)
"""
for key, value in source.items():
# Override mismatching or empty types
if type(value) != type(dest.get(key)): # noqa
dest[key] = source[key]
continue
# Merge dict
if isinstance(value, dict):
merge_to(value, dest[key])
continue
if isinstance(value, list):
dest[key] = dest[key] + source[key]
continue
dest[key] = source[key]
return dest
def merge(*objects):
"""
Merge the given objects, using the semantics described for merge_to, with
objects later in the list taking precedence. From an inheritance
perspective, "parents" should be listed before "children".
Returns the result without modifying any arguments.
"""
if len(objects) == 1:
return copy.deepcopy(objects[0])
return merge_to(objects[-1], merge(*objects[:-1]))

View File

@ -0,0 +1,117 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
# Python port of the ms.js node module this is not a direct port some things are
# more complicated or less precise and we lean on time delta here.
import re
import datetime
PATTERN = re.compile(r"((?:\d+)?\.?\d+) *([a-z]+)")
def seconds(value):
return datetime.timedelta(seconds=int(value))
def minutes(value):
return datetime.timedelta(minutes=int(value))
def hours(value):
return datetime.timedelta(hours=int(value))
def days(value):
return datetime.timedelta(days=int(value))
def months(value):
# See warning in years(), below
return datetime.timedelta(days=int(value) * 30)
def years(value):
# Warning here "years" are vague don't use this for really sensitive date
# computation the idea is to give you a absolute amount of time in the
# future which is not the same thing as "precisely on this date next year"
return datetime.timedelta(days=int(value) * 365)
ALIASES = {}
ALIASES["seconds"] = ALIASES["second"] = ALIASES["s"] = seconds
ALIASES["minutes"] = ALIASES["minute"] = ALIASES["min"] = minutes
ALIASES["hours"] = ALIASES["hour"] = ALIASES["h"] = hours
ALIASES["days"] = ALIASES["day"] = ALIASES["d"] = days
ALIASES["months"] = ALIASES["month"] = ALIASES["mo"] = months
ALIASES["years"] = ALIASES["year"] = ALIASES["y"] = years
class InvalidString(Exception):
pass
class UnknownTimeMeasurement(Exception):
pass
def value_of(input_str):
"""
Convert a string to a json date in the future
:param str input_str: (ex: 1d, 2d, 6years, 2 seconds)
:returns: Unit given in seconds
"""
matches = PATTERN.search(input_str)
if matches is None or len(matches.groups()) < 2:
raise InvalidString(f"'{input_str}' is invalid string")
value, unit = matches.groups()
if unit not in ALIASES:
raise UnknownTimeMeasurement(
"{} is not a valid time measure use one of {}".format(
unit, sorted(ALIASES.keys())
)
)
return ALIASES[unit](value)
def json_time_from_now(input_str, now=None, datetime_format=False):
"""
:param str input_str: Input string (see value of)
:param datetime now: Optionally set the definition of `now`
:param boolean datetime_format: Set `True` to get a `datetime` output
:returns: JSON string representation of time in future.
"""
if now is None:
now = datetime.datetime.utcnow()
time = now + value_of(input_str)
if datetime_format is True:
return time
else:
# Sorta a big hack but the json schema validator for date does not like the
# ISO dates until 'Z' (for timezone) is added...
# the [:23] ensures only whole seconds or milliseconds are included,
# not microseconds (see bug 1381801)
return time.isoformat()[:23] + "Z"
def current_json_time(datetime_format=False):
"""
:param boolean datetime_format: Set `True` to get a `datetime` output
:returns: JSON string representation of the current time.
"""
if datetime_format is True:
return datetime.datetime.utcnow()
else:
# the [:23] ensures only whole seconds or milliseconds are included,
# not microseconds (see bug 1381801)
return datetime.datetime.utcnow().isoformat()[:23] + "Z"

View File

@ -0,0 +1,59 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import re
def split_symbol(treeherder_symbol):
"""Split a symbol expressed as grp(sym) into its two parts. If no group is
given, the returned group is '?'"""
groupSymbol = "?"
symbol = treeherder_symbol
if "(" in symbol:
groupSymbol, symbol = re.match(r"([^(]*)\(([^)]*)\)", symbol).groups()
return groupSymbol, symbol
def join_symbol(group, symbol):
"""Perform the reverse of split_symbol, combining the given group and
symbol. If the group is '?', then it is omitted."""
if group == "?":
return symbol
return f"{group}({symbol})"
def add_suffix(treeherder_symbol, suffix):
"""Add a suffix to a treeherder symbol that may contain a group."""
group, symbol = split_symbol(treeherder_symbol)
symbol += str(suffix)
return join_symbol(group, symbol)
def replace_group(treeherder_symbol, new_group):
"""Add a suffix to a treeherder symbol that may contain a group."""
_, symbol = split_symbol(treeherder_symbol)
return join_symbol(new_group, symbol)
def inherit_treeherder_from_dep(job, dep_job):
"""Inherit treeherder defaults from dep_job"""
treeherder = job.get("treeherder", {})
dep_th_platform = (
dep_job.task.get("extra", {})
.get("treeherder", {})
.get("machine", {})
.get("platform", "")
)
dep_th_collection = list(
dep_job.task.get("extra", {}).get("treeherder", {}).get("collection", {}).keys()
)[0]
# XXX Doesn't yet support non-opt
treeherder.setdefault("platform", f"{dep_th_platform}/{dep_th_collection}")
treeherder.setdefault(
"tier", dep_job.task.get("extra", {}).get("treeherder", {}).get("tier", 1)
)
# Does not set symbol
treeherder.setdefault("kind", "build")
return treeherder

View File

@ -0,0 +1,197 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import os
import subprocess
from abc import ABC, abstractproperty, abstractmethod
from shutil import which
import requests
from redo import retry
PUSHLOG_TMPL = "{}/json-pushes?version=2&changeset={}&tipsonly=1&full=1"
class Repository(ABC):
def __init__(self, path):
self.path = path
self.binary = which(self.tool)
if self.binary is None:
raise OSError(f"{self.tool} not found!")
def run(self, *args: str):
cmd = (self.binary,) + args
return subprocess.check_output(cmd, cwd=self.path, universal_newlines=True)
@abstractproperty
def tool(self) -> str:
"""Version control system being used, either 'hg' or 'git'."""
@abstractproperty
def head_ref(self) -> str:
"""Hash of HEAD revision."""
@abstractproperty
def base_ref(self):
"""Hash of revision the current topic branch is based on."""
@abstractproperty
def branch(self):
"""Current branch or bookmark the checkout has active."""
@abstractmethod
def get_url(self, remote=None):
"""Get URL of the upstream repository."""
@abstractmethod
def get_commit_message(self, revision=None):
"""Commit message of specified revision or current commit."""
@abstractmethod
def working_directory_clean(self, untracked=False, ignored=False):
"""Determine if the working directory is free of modifications.
Returns True if the working directory does not have any file
modifications. False otherwise.
By default, untracked and ignored files are not considered. If
``untracked`` or ``ignored`` are set, they influence the clean check
to factor these file classes into consideration.
"""
@abstractmethod
def update(self, ref):
"""Update the working directory to the specified reference."""
class HgRepository(Repository):
tool = "hg"
@property
def head_ref(self):
return self.run("log", "-r", ".", "-T", "{node}").strip()
@property
def base_ref(self):
return self.run("log", "-r", "last(ancestors(.) and public())", "-T", "{node}")
@property
def branch(self):
bookmarks_fn = os.path.join(self.path, ".hg", "bookmarks.current")
if os.path.exists(bookmarks_fn):
with open(bookmarks_fn) as f:
bookmark = f.read()
return bookmark or None
return None
def get_url(self, remote="default"):
return self.run("path", "-T", "{url}", remote).strip()
def get_commit_message(self, revision=None):
revision = revision or self.head_ref
return self.run("log", "-r", ".", "-T", "{desc}")
def working_directory_clean(self, untracked=False, ignored=False):
args = ["status", "--modified", "--added", "--removed", "--deleted"]
if untracked:
args.append("--unknown")
if ignored:
args.append("--ignored")
# If output is empty, there are no entries of requested status, which
# means we are clean.
return not len(self.run(*args).strip())
def update(self, ref):
return self.run("update", "--check", ref)
class GitRepository(Repository):
tool = "git"
@property
def head_ref(self):
return self.run("rev-parse", "--verify", "HEAD").strip()
@property
def base_ref(self):
refs = self.run(
"rev-list", "HEAD", "--topo-order", "--boundary", "--not", "--remotes"
).splitlines()
if refs:
return refs[-1][1:] # boundary starts with a prefix `-`
return self.head_ref
@property
def branch(self):
return self.run("branch", "--show-current").strip() or None
def get_url(self, remote="origin"):
return self.run("remote", "get-url", remote).strip()
def get_commit_message(self, revision=None):
revision = revision or self.head_ref
return self.run("log", "-n1", "--format=%B")
def working_directory_clean(self, untracked=False, ignored=False):
args = ["status", "--porcelain"]
# Even in --porcelain mode, behavior is affected by the
# ``status.showUntrackedFiles`` option, which means we need to be
# explicit about how to treat untracked files.
if untracked:
args.append("--untracked-files=all")
else:
args.append("--untracked-files=no")
if ignored:
args.append("--ignored")
def update(self, ref):
self.run("checkout", ref)
def get_repository(path):
"""Get a repository object for the repository at `path`.
If `path` is not a known VCS repository, raise an exception.
"""
if os.path.isdir(os.path.join(path, ".hg")):
return HgRepository(path)
elif os.path.exists(os.path.join(path, ".git")):
return GitRepository(path)
raise RuntimeError("Current directory is neither a git or hg repository")
def find_hg_revision_push_info(repository, revision):
"""Given the parameters for this action and a revision, find the
pushlog_id of the revision."""
pushlog_url = PUSHLOG_TMPL.format(repository, revision)
def query_pushlog(url):
r = requests.get(pushlog_url, timeout=60)
r.raise_for_status()
return r
r = retry(
query_pushlog,
args=(pushlog_url,),
attempts=5,
sleeptime=10,
)
pushes = r.json()["pushes"]
if len(pushes) != 1:
raise RuntimeError(
"Unable to find a single pushlog_id for {} revision {}: {}".format(
repository, revision, pushes
)
)
pushid = list(pushes.keys())[0]
return {
"pushdate": pushes[pushid]["date"],
"pushid": pushid,
"user": pushes[pushid]["user"],
}

View File

@ -0,0 +1,169 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import logging
import sys
import attr
logger = logging.getLogger(__name__)
@attr.s(frozen=True)
class VerificationSequence:
"""
Container for a sequence of verifications over a TaskGraph. Each
verification is represented as a callable taking (task, taskgraph,
scratch_pad), called for each task in the taskgraph, and one more
time with no task but with the taskgraph and the same scratch_pad
that was passed for each task.
"""
_verifications = attr.ib(factory=dict)
def __call__(self, graph_name, graph, graph_config):
for verification in self._verifications.get(graph_name, []):
scratch_pad = {}
graph.for_each_task(
verification, scratch_pad=scratch_pad, graph_config=graph_config
)
verification(
None, graph, scratch_pad=scratch_pad, graph_config=graph_config
)
return graph_name, graph
def add(self, graph_name):
def wrap(func):
self._verifications.setdefault(graph_name, []).append(func)
return func
return wrap
verifications = VerificationSequence()
@verifications.add("full_task_graph")
def verify_task_graph_symbol(task, taskgraph, scratch_pad, graph_config):
"""
This function verifies that tuple
(collection.keys(), machine.platform, groupSymbol, symbol) is unique
for a target task graph.
"""
if task is None:
return
task_dict = task.task
if "extra" in task_dict:
extra = task_dict["extra"]
if "treeherder" in extra:
treeherder = extra["treeherder"]
collection_keys = tuple(sorted(treeherder.get("collection", {}).keys()))
platform = treeherder.get("machine", {}).get("platform")
group_symbol = treeherder.get("groupSymbol")
symbol = treeherder.get("symbol")
key = (collection_keys, platform, group_symbol, symbol)
if key in scratch_pad:
raise Exception(
"conflict between `{}`:`{}` for values `{}`".format(
task.label, scratch_pad[key], key
)
)
else:
scratch_pad[key] = task.label
@verifications.add("full_task_graph")
def verify_trust_domain_v2_routes(task, taskgraph, scratch_pad, graph_config):
"""
This function ensures that any two tasks have distinct ``index.{trust-domain}.v2`` routes.
"""
if task is None:
return
route_prefix = "index.{}.v2".format(graph_config["trust-domain"])
task_dict = task.task
routes = task_dict.get("routes", [])
for route in routes:
if route.startswith(route_prefix):
if route in scratch_pad:
raise Exception(
"conflict between {}:{} for route: {}".format(
task.label, scratch_pad[route], route
)
)
else:
scratch_pad[route] = task.label
@verifications.add("full_task_graph")
def verify_routes_notification_filters(task, taskgraph, scratch_pad, graph_config):
"""
This function ensures that only understood filters for notifications are
specified.
See: https://docs.taskcluster.net/reference/core/taskcluster-notify/docs/usage
"""
if task is None:
return
route_prefix = "notify."
valid_filters = ("on-any", "on-completed", "on-failed", "on-exception")
task_dict = task.task
routes = task_dict.get("routes", [])
for route in routes:
if route.startswith(route_prefix):
# Get the filter of the route
route_filter = route.split(".")[-1]
if route_filter not in valid_filters:
raise Exception(
"{} has invalid notification filter ({})".format(
task.label, route_filter
)
)
@verifications.add("full_task_graph")
def verify_dependency_tiers(task, taskgraph, scratch_pad, graph_config):
tiers = scratch_pad
if task is not None:
tiers[task.label] = (
task.task.get("extra", {}).get("treeherder", {}).get("tier", sys.maxsize)
)
else:
def printable_tier(tier):
if tier == sys.maxsize:
return "unknown"
return tier
for task in taskgraph.tasks.values():
tier = tiers[task.label]
for d in task.dependencies.values():
if taskgraph[d].task.get("workerType") == "always-optimized":
continue
if "dummy" in taskgraph[d].kind:
continue
if tier < tiers[d]:
raise Exception(
"{} (tier {}) cannot depend on {} (tier {})".format(
task.label,
printable_tier(tier),
d,
printable_tier(tiers[d]),
)
)
@verifications.add("optimized_task_graph")
def verify_always_optimized(task, taskgraph, scratch_pad, graph_config):
"""
This function ensures that always-optimized tasks have been optimized.
"""
if task is None:
return
if task.task.get("workerType") == "always-optimized":
raise Exception(f"Could not optimize the task {task.label!r}")

View File

@ -0,0 +1,75 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import attr
from .keyed_by import evaluate_keyed_by
from .memoize import memoize
@attr.s
class _BuiltinWorkerType:
provisioner = attr.ib(str)
worker_type = attr.ib(str)
@property
def implementation(self):
"""
Since the list of built-in worker-types is small and fixed, we can get
away with punning the implementation name (in
`taskgraph.transforms.task`) and the worker_type.
"""
return self.worker_type
_BUILTIN_TYPES = {
"always-optimized": _BuiltinWorkerType("invalid", "always-optimized"),
"succeed": _BuiltinWorkerType("built-in", "succeed"),
}
@memoize
def worker_type_implementation(graph_config, worker_type):
"""Get the worker implementation and OS for the given workerType, where the
OS represents the host system, not the target OS, in the case of
cross-compiles."""
if worker_type in _BUILTIN_TYPES:
# For the built-in worker-types, we use an `implementation that matches
# the worker-type.
return _BUILTIN_TYPES[worker_type].implementation, None
worker_config = evaluate_keyed_by(
{"by-worker-type": graph_config["workers"]["aliases"]},
"worker-types.yml",
{"worker-type": worker_type},
)
return worker_config["implementation"], worker_config.get("os")
@memoize
def get_worker_type(graph_config, alias, level):
"""
Get the worker type based, evaluating aliases from the graph config.
"""
if alias in _BUILTIN_TYPES:
builtin_type = _BUILTIN_TYPES[alias]
return builtin_type.provisioner, builtin_type.worker_type
level = str(level)
worker_config = evaluate_keyed_by(
{"by-alias": graph_config["workers"]["aliases"]},
"graph_config.workers.aliases",
{"alias": alias},
)
provisioner = evaluate_keyed_by(
worker_config["provisioner"],
alias,
{"level": level},
).format(level=level)
worker_type = evaluate_keyed_by(
worker_config["worker-type"],
alias,
{"level": level},
).format(level=level, alias=alias)
return provisioner, worker_type

View File

@ -0,0 +1,36 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import os
from yaml.loader import SafeLoader
class UnicodeLoader(SafeLoader):
def construct_yaml_str(self, node):
return self.construct_scalar(node)
UnicodeLoader.add_constructor("tag:yaml.org,2002:str", UnicodeLoader.construct_yaml_str)
def load_stream(stream):
"""
Parse the first YAML document in a stream
and produce the corresponding Python object.
"""
loader = UnicodeLoader(stream)
try:
return loader.get_single_data()
finally:
loader.dispose()
def load_yaml(*parts):
"""Convenience function to load a YAML file in the given path. This is
useful for loading kind configuration files from the kind path."""
filename = os.path.join(*parts)
with open(filename, "rb") as f:
return load_stream(f)