Bug 1729060 - [taskgraph] Sync standalone taskgraph's --diff implementation, r=taskgraph-reviewers,bhearsum

This syncs the patches from bug 1720715 in standalone taskgraph.

Differential Revision: https://phabricator.services.mozilla.com/D124832
This commit is contained in:
Andrew Halberstadt 2021-09-08 20:31:45 +00:00
parent b8ec12a7f7
commit 2484082a96
5 changed files with 285 additions and 47 deletions

View File

@ -264,6 +264,9 @@ class TaskGraphGenerator:
parameters = self._parameters
self.verify_parameters(parameters)
logger.info("Using {}".format(parameters))
logger.debug("Dumping parameters:\n{}".format(repr(parameters)))
filters = parameters.get("filters", [])
# Always add legacy target tasks method until we deprecate that API.
if "target_tasks_method" not in filters:

View File

@ -2,16 +2,23 @@
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import atexit
import os
import re
import shutil
import subprocess
import sys
import tempfile
import traceback
import argparse
import logging
import json
from collections import namedtuple
from concurrent.futures import ProcessPoolExecutor, as_completed
from pathlib import Path
from typing import Any, List
import appdirs
import yaml
Command = namedtuple("Command", ["func", "args", "kwargs", "defaults"])
@ -90,33 +97,79 @@ FORMAT_METHODS = {
}
def format_taskgraph(options):
def format_taskgraph(options, parameters, logfile=None):
import taskgraph
import taskgraph.parameters
import taskgraph.generator
from taskgraph.generator import TaskGraphGenerator
from taskgraph.parameters import parameters_loader
if logfile:
oldhandler = logging.root.handlers[-1]
logging.root.removeHandler(oldhandler)
handler = logging.FileHandler(logfile, mode="w")
handler.setFormatter(oldhandler.formatter)
logging.root.addHandler(handler)
if options["fast"]:
taskgraph.fast = True
try:
parameters = taskgraph.parameters.parameters_loader(
options["parameters"],
overrides={"target-kind": options.get("target_kind")},
strict=False,
)
parameters = parameters_loader(
parameters,
overrides={"target-kind": options.get("target_kind")},
strict=False,
)
tgg = taskgraph.generator.TaskGraphGenerator(
root_dir=options.get("root"), parameters=parameters
)
tgg = TaskGraphGenerator(root_dir=options.get("root"), parameters=parameters)
tg = getattr(tgg, options["graph_attr"])
tg = get_filtered_taskgraph(tg, options["tasks_regex"])
tg = getattr(tgg, options["graph_attr"])
tg = get_filtered_taskgraph(tg, options["tasks_regex"])
format_method = FORMAT_METHODS[options["format"] or "labels"]
return format_method(tg)
format_method = FORMAT_METHODS[options["format"] or "labels"]
return format_method(tg)
except Exception:
traceback.print_exc()
sys.exit(1)
def generate_taskgraph(options, parameters, logdir):
from taskgraph.parameters import Parameters
futures = {}
logfile = None
with ProcessPoolExecutor() as executor:
for spec in parameters:
if logdir:
logfile = os.path.join(
logdir,
"{}_{}.log".format(
options["graph_attr"], Parameters.format_spec(spec)
),
)
f = executor.submit(format_taskgraph, options, spec, logfile)
futures[f] = spec
for future in as_completed(futures):
spec = futures[future]
e = future.exception()
if e:
out = "".join(traceback.format_exception(type(e), e, e.__traceback__))
else:
out = future.result()
params_name = Parameters.format_spec(spec)
fh = None
path = options["output_file"]
if path:
# Substitute params name into file path if necessary
if len(parameters) > 1 and "{params}" not in path:
name, ext = os.path.splitext(path)
name += "_{params}"
path = name + ext
path = path.format(params=params_name)
fh = open(path, "w")
else:
print(
"Dumping result with parameters from {}:".format(params_name),
file=sys.stderr,
)
print(out + "\n", file=fh)
@command(
@ -179,8 +232,16 @@ def format_taskgraph(options):
@argument(
"--parameters",
"-p",
default="",
help="parameters file (.yml or .json; see " "`taskcluster/docs/parameters.rst`)`",
default=None,
action="append",
help="Parameters to use for the generation. Can be a path to file (.yml or "
".json; see `taskcluster/docs/parameters.rst`), a directory (containing "
"parameters files), a url, of the form `project=mozilla-central` to download "
"latest parameters file for the specified project from CI, or of the form "
"`task-id=<decision task id>` to download parameters from the specified "
"decision task. Can be specified multiple times, in which case multiple "
"generations will happen from the same invocation (one per parameters "
"specified).",
)
@argument(
"--no-optimize",
@ -210,18 +271,147 @@ def format_taskgraph(options):
@argument(
"-F",
"--fast",
dest="fast",
default=False,
action="store_true",
help="enable fast task generation for local debugging.",
)
@argument(
"--diff",
const="default",
nargs="?",
default=None,
help="Generate and diff the current taskgraph against another revision. "
"Without args the base revision will be used. A revision specifier such as "
"the hash or `.~1` (hg) or `HEAD~1` (git) can be used as well.",
)
def show_taskgraph(options):
out = format_taskgraph(options)
from mozversioncontrol import get_repository_object as get_repository
from taskgraph.parameters import Parameters
fh = options["output_file"]
if fh:
fh = open(fh, "w")
print(out, file=fh)
if options.pop("verbose", False):
logging.root.setLevel(logging.DEBUG)
repo = None
cur_ref = None
diffdir = None
if options["diff"]:
repo = get_repository(os.getcwd())
if not repo.working_directory_clean():
print("abort: can't diff taskgraph with dirty working directory")
return 1
# We want to return the working directory to the current state
# as best we can after we're done. In all known cases, using
# branch or bookmark (which are both available on the VCS object)
# as `branch` is preferable to a specific revision.
cur_ref = repo.branch or repo.head_ref[:12]
diffdir = tempfile.mkdtemp()
atexit.register(
shutil.rmtree, diffdir
) # make sure the directory gets cleaned up
options["output_file"] = os.path.join(
diffdir, f"{options['graph_attr']}_{cur_ref}"
)
print(f"Generating {options['graph_attr']} @ {cur_ref}", file=sys.stderr)
parameters: List[Any[str, None]] = options.pop("parameters")
if not parameters:
parameters = [None] # will use default values
for param in parameters[:]:
if param is None or not os.path.isdir(param):
continue
parameters.remove(param)
parameters.extend(
[
p.as_posix()
for p in Path(param).iterdir()
if p.suffix in (".yml", ".json")
]
)
logdir = None
if len(parameters) > 1:
# Log to separate files for each process instead of stderr to
# avoid interleaving.
logdir = appdirs.user_log_dir("taskgraph")
if not os.path.isdir(logdir):
os.makedirs(logdir)
generate_taskgraph(options, parameters, logdir)
if options["diff"]:
assert diffdir is not None
assert repo is not None
# Some transforms use global state for checks, so will fail
# when running taskgraph a second time in the same session.
# Reload all taskgraph modules to avoid this.
for mod in sys.modules.copy():
if mod != __name__ and mod.startswith("taskgraph"):
del sys.modules[mod]
if options["diff"] == "default":
base_ref = repo.base_ref
else:
base_ref = options["diff"]
try:
repo.update(base_ref)
base_ref = repo.head_ref[:12]
options["output_file"] = os.path.join(
diffdir, f"{options['graph_attr']}_{base_ref}"
)
print(f"Generating {options['graph_attr']} @ {base_ref}", file=sys.stderr)
generate_taskgraph(options, parameters, logdir)
finally:
repo.update(cur_ref)
# Generate diff(s)
diffcmd = [
"diff",
"-U20",
"--report-identical-files",
f"--label={options['graph_attr']}@{base_ref}",
f"--label={options['graph_attr']}@{cur_ref}",
]
for spec in parameters:
base_path = os.path.join(diffdir, f"{options['graph_attr']}_{base_ref}")
cur_path = os.path.join(diffdir, f"{options['graph_attr']}_{cur_ref}")
params_name = None
if len(parameters) > 1:
params_name = Parameters.format_spec(spec)
base_path += f"_{params_name}"
cur_path += f"_{params_name}"
try:
diff_output = subprocess.run(
diffcmd + [base_path, cur_path],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
universal_newlines=True,
check=True,
).stdout
except subprocess.CalledProcessError as e:
# returncode 1 simply means diffs were found
if e.returncode != 1:
print(e.stderr, file=sys.stderr)
raise
diff_output = e.output
if len(parameters) > 1:
assert params_name is not None
print(f"Diff from {params_name}:")
print(diff_output)
if len(parameters) > 1:
print("See '{}' for logs".format(logdir), file=sys.stderr)
@command("build-image", help="Build a Docker image")

View File

@ -2,11 +2,13 @@
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import logging
import os.path
import hashlib
import json
import logging
import os
from datetime import datetime
from pprint import pformat
from urllib.parse import urlparse
from mozbuild.util import ReadOnlyDict, memoize
from mozversioncontrol import get_repository_object
@ -115,6 +117,8 @@ class Parameters(ReadOnlyDict):
def __init__(self, strict=True, **kwargs):
self.strict = strict
self.spec = kwargs.pop("spec", "defaults")
self._id = None
if not self.strict:
# apply defaults to missing parameters
@ -122,6 +126,38 @@ class Parameters(ReadOnlyDict):
ReadOnlyDict.__init__(self, **kwargs)
@property
def id(self):
if not self._id:
self._id = hashlib.sha256(
json.dumps(self, sort_keys=True).encode("utf-8")
).hexdigest()[:12]
return self._id
@staticmethod
def format_spec(spec):
"""
Get a friendly identifier from a parameters specifier.
Args:
spec (str): Parameters specifier.
Returns:
str: Name to identify parameters by.
"""
if spec is None:
return "defaults"
if any(spec.startswith(s) for s in ("task-id=", "project=")):
return spec
result = urlparse(spec)
if result.scheme in ("http", "https"):
spec = result.path
return os.path.splitext(os.path.basename(spec))[0]
@staticmethod
def _fill_defaults(**kwargs):
now = datetime.utcnow()
@ -227,8 +263,14 @@ class Parameters(ReadOnlyDict):
"""
return release_level(self["project"])
def __str__(self):
return f"Parameters(id={self.id}) (from {self.format_spec(self.spec)})"
def load_parameters_file(filename, strict=True, overrides=None, trust_domain=None):
def __repr__(self):
return pformat(dict(self), indent=2)
def load_parameters_file(spec, strict=True, overrides=None, trust_domain=None):
"""
Load parameters from a path, url, decision task-id or project.
@ -243,18 +285,18 @@ def load_parameters_file(filename, strict=True, overrides=None, trust_domain=Non
if overrides is None:
overrides = {}
if not filename:
if not spec:
return Parameters(strict=strict, **overrides)
try:
# reading parameters from a local parameters.yml file
f = open(filename)
f = open(spec)
except OSError:
# fetching parameters.yml using task task-id, project or supplied url
task_id = None
if filename.startswith("task-id="):
task_id = filename.split("=")[1]
elif filename.startswith("project="):
if spec.startswith("task-id="):
task_id = spec.split("=")[1]
elif spec.startswith("project="):
if trust_domain is None:
raise ValueError(
"Can't specify parameters by project "
@ -262,33 +304,33 @@ def load_parameters_file(filename, strict=True, overrides=None, trust_domain=Non
)
index = "{trust_domain}.v2.{project}.latest.taskgraph.decision".format(
trust_domain=trust_domain,
project=filename.split("=")[1],
project=spec.split("=")[1],
)
task_id = find_task_id(index)
if task_id:
filename = get_artifact_url(task_id, "public/parameters.yml")
logger.info(f"Loading parameters from {filename}")
resp = requests.get(filename, stream=True)
spec = get_artifact_url(task_id, "public/parameters.yml")
logger.info(f"Loading parameters from {spec}")
resp = requests.get(spec, stream=True)
resp.raise_for_status()
f = resp.raw
if filename.endswith(".yml"):
if spec.endswith(".yml"):
kwargs = yaml.load_stream(f)
elif filename.endswith(".json"):
elif spec.endswith(".json"):
kwargs = json.load(f)
else:
raise TypeError(f"Parameters file `{filename}` is not JSON or YAML")
raise TypeError(f"Parameters file `{spec}` is not JSON or YAML")
kwargs.update(overrides)
return Parameters(strict=strict, **kwargs)
def parameters_loader(filename, strict=True, overrides=None):
def parameters_loader(spec, strict=True, overrides=None):
def get_parameters(graph_config):
parameters = load_parameters_file(
filename,
spec,
strict=strict,
overrides=overrides,
trust_domain=graph_config["trust-domain"],

View File

@ -16,6 +16,7 @@
# If not you may need to specify `--no-binary :<package1>,<package2>:` to get
# the source distribution instead for those particular packages.
appdirs==1.4.4
attrs==19.1.0
blessings==1.7
compare-locales==8.1.0

View File

@ -1,7 +1,9 @@
appdirs==1.4.4 \
--hash=sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41 \
--hash=sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128
# via glean-parser
# via
# -r requirements-mach-vendor-python.in
# glean-parser
atomicwrites==1.1.5 \
--hash=sha256:240831ea22da9ab882b551b31d4225591e5e447a68c5e188db5b89ca1d487585 \
--hash=sha256:a24da68318b08ac9c9c45029f4a10371ab5b20e4226738e150e6e7c571630ae6