mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-11-23 13:50:11 +00:00
b71edfaa4e
This is the first commit in a series that will reformat all the python files in the LLVM repository. Reformatting is done with `black`. See more information here: https://discourse.llvm.org/t/rfc-document-and-standardize-python-code-style Reviewed By: jhenderson, JDevlieghere, MatzeB Differential Revision: https://reviews.llvm.org/D150545
489 lines
16 KiB
Python
Executable File
489 lines
16 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
This script:
|
|
- Builds clang with user-defined flags
|
|
- Uses that clang to build an instrumented clang, which can be used to collect
|
|
PGO samples
|
|
- Builds a user-defined set of sources (default: clang) to act as a
|
|
"benchmark" to generate a PGO profile
|
|
- Builds clang once more with the PGO profile generated above
|
|
|
|
This is a total of four clean builds of clang (by default). This may take a
|
|
while. :)
|
|
|
|
This scripts duplicates https://llvm.org/docs/AdvancedBuilds.html#multi-stage-pgo
|
|
Eventually, it will be updated to instead call the cmake cache mentioned there.
|
|
"""
|
|
|
|
import argparse
|
|
import collections
|
|
import multiprocessing
|
|
import os
|
|
import shlex
|
|
import shutil
|
|
import subprocess
|
|
import sys
|
|
|
|
### User configuration
|
|
|
|
|
|
# If you want to use a different 'benchmark' than building clang, make this
|
|
# function do what you want. out_dir is the build directory for clang, so all
|
|
# of the clang binaries will live under "${out_dir}/bin/". Using clang in
|
|
# ${out_dir} will magically have the profiles go to the right place.
|
|
#
|
|
# You may assume that out_dir is a freshly-built directory that you can reach
|
|
# in to build more things, if you'd like.
|
|
def _run_benchmark(env, out_dir, include_debug_info):
|
|
"""The 'benchmark' we run to generate profile data."""
|
|
target_dir = env.output_subdir("instrumentation_run")
|
|
|
|
# `check-llvm` and `check-clang` are cheap ways to increase coverage. The
|
|
# former lets us touch on the non-x86 backends a bit if configured, and the
|
|
# latter gives us more C to chew on (and will send us through diagnostic
|
|
# paths a fair amount, though the `if (stuff_is_broken) { diag() ... }`
|
|
# branches should still heavily be weighted in the not-taken direction,
|
|
# since we built all of LLVM/etc).
|
|
_build_things_in(env, out_dir, what=["check-llvm", "check-clang"])
|
|
|
|
# Building tblgen gets us coverage; don't skip it. (out_dir may also not
|
|
# have them anyway, but that's less of an issue)
|
|
cmake = _get_cmake_invocation_for_bootstrap_from(env, out_dir, skip_tablegens=False)
|
|
|
|
if include_debug_info:
|
|
cmake.add_flag("CMAKE_BUILD_TYPE", "RelWithDebInfo")
|
|
|
|
_run_fresh_cmake(env, cmake, target_dir)
|
|
|
|
# Just build all the things. The more data we have, the better.
|
|
_build_things_in(env, target_dir, what=["all"])
|
|
|
|
|
|
### Script
|
|
|
|
|
|
class CmakeInvocation:
|
|
_cflags = ["CMAKE_C_FLAGS", "CMAKE_CXX_FLAGS"]
|
|
_ldflags = [
|
|
"CMAKE_EXE_LINKER_FLAGS",
|
|
"CMAKE_MODULE_LINKER_FLAGS",
|
|
"CMAKE_SHARED_LINKER_FLAGS",
|
|
]
|
|
|
|
def __init__(self, cmake, maker, cmake_dir):
|
|
self._prefix = [cmake, "-G", maker, cmake_dir]
|
|
|
|
# Map of str -> (list|str).
|
|
self._flags = {}
|
|
for flag in CmakeInvocation._cflags + CmakeInvocation._ldflags:
|
|
self._flags[flag] = []
|
|
|
|
def add_new_flag(self, key, value):
|
|
self.add_flag(key, value, allow_overwrites=False)
|
|
|
|
def add_flag(self, key, value, allow_overwrites=True):
|
|
if key not in self._flags:
|
|
self._flags[key] = value
|
|
return
|
|
|
|
existing_value = self._flags[key]
|
|
if isinstance(existing_value, list):
|
|
existing_value.append(value)
|
|
return
|
|
|
|
if not allow_overwrites:
|
|
raise ValueError("Invalid overwrite of %s requested" % key)
|
|
|
|
self._flags[key] = value
|
|
|
|
def add_cflags(self, flags):
|
|
# No, I didn't intend to append ['-', 'O', '2'] to my flags, thanks :)
|
|
assert not isinstance(flags, str)
|
|
for f in CmakeInvocation._cflags:
|
|
self._flags[f].extend(flags)
|
|
|
|
def add_ldflags(self, flags):
|
|
assert not isinstance(flags, str)
|
|
for f in CmakeInvocation._ldflags:
|
|
self._flags[f].extend(flags)
|
|
|
|
def to_args(self):
|
|
args = self._prefix.copy()
|
|
for key, value in sorted(self._flags.items()):
|
|
if isinstance(value, list):
|
|
# We preload all of the list-y values (cflags, ...). If we've
|
|
# nothing to add, don't.
|
|
if not value:
|
|
continue
|
|
value = " ".join(value)
|
|
|
|
arg = "-D" + key
|
|
if value != "":
|
|
arg += "=" + value
|
|
args.append(arg)
|
|
return args
|
|
|
|
|
|
class Env:
|
|
def __init__(self, llvm_dir, use_make, output_dir, default_cmake_args, dry_run):
|
|
self.llvm_dir = llvm_dir
|
|
self.use_make = use_make
|
|
self.output_dir = output_dir
|
|
self.default_cmake_args = default_cmake_args.copy()
|
|
self.dry_run = dry_run
|
|
|
|
def get_default_cmake_args_kv(self):
|
|
return self.default_cmake_args.items()
|
|
|
|
def get_cmake_maker(self):
|
|
return "Ninja" if not self.use_make else "Unix Makefiles"
|
|
|
|
def get_make_command(self):
|
|
if self.use_make:
|
|
return ["make", "-j{}".format(multiprocessing.cpu_count())]
|
|
return ["ninja"]
|
|
|
|
def output_subdir(self, name):
|
|
return os.path.join(self.output_dir, name)
|
|
|
|
def has_llvm_subproject(self, name):
|
|
if name == "compiler-rt":
|
|
subdir = "../compiler-rt"
|
|
elif name == "clang":
|
|
subdir = "../clang"
|
|
else:
|
|
raise ValueError("Unknown subproject: %s" % name)
|
|
|
|
return os.path.isdir(os.path.join(self.llvm_dir, subdir))
|
|
|
|
# Note that we don't allow capturing stdout/stderr. This works quite nicely
|
|
# with dry_run.
|
|
def run_command(self, cmd, cwd=None, check=False, silent_unless_error=False):
|
|
print("Running `%s` in %s" % (cmd, shlex.quote(cwd or os.getcwd())))
|
|
|
|
if self.dry_run:
|
|
return
|
|
|
|
if silent_unless_error:
|
|
stdout, stderr = subprocess.PIPE, subprocess.STDOUT
|
|
else:
|
|
stdout, stderr = None, None
|
|
|
|
# Don't use subprocess.run because it's >= py3.5 only, and it's not too
|
|
# much extra effort to get what it gives us anyway.
|
|
popen = subprocess.Popen(
|
|
cmd, stdin=subprocess.DEVNULL, stdout=stdout, stderr=stderr, cwd=cwd
|
|
)
|
|
stdout, _ = popen.communicate()
|
|
return_code = popen.wait(timeout=0)
|
|
|
|
if not return_code:
|
|
return
|
|
|
|
if silent_unless_error:
|
|
print(stdout.decode("utf-8", "ignore"))
|
|
|
|
if check:
|
|
raise subprocess.CalledProcessError(
|
|
returncode=return_code, cmd=cmd, output=stdout, stderr=None
|
|
)
|
|
|
|
|
|
def _get_default_cmake_invocation(env):
|
|
inv = CmakeInvocation(
|
|
cmake="cmake", maker=env.get_cmake_maker(), cmake_dir=env.llvm_dir
|
|
)
|
|
for key, value in env.get_default_cmake_args_kv():
|
|
inv.add_new_flag(key, value)
|
|
return inv
|
|
|
|
|
|
def _get_cmake_invocation_for_bootstrap_from(env, out_dir, skip_tablegens=True):
|
|
clang = os.path.join(out_dir, "bin", "clang")
|
|
cmake = _get_default_cmake_invocation(env)
|
|
cmake.add_new_flag("CMAKE_C_COMPILER", clang)
|
|
cmake.add_new_flag("CMAKE_CXX_COMPILER", clang + "++")
|
|
|
|
# We often get no value out of building new tblgens; the previous build
|
|
# should have them. It's still correct to build them, just slower.
|
|
def add_tablegen(key, binary):
|
|
path = os.path.join(out_dir, "bin", binary)
|
|
|
|
# Check that this exists, since the user's allowed to specify their own
|
|
# stage1 directory (which is generally where we'll source everything
|
|
# from). Dry runs should hope for the best from our user, as well.
|
|
if env.dry_run or os.path.exists(path):
|
|
cmake.add_new_flag(key, path)
|
|
|
|
if skip_tablegens:
|
|
add_tablegen("LLVM_TABLEGEN", "llvm-tblgen")
|
|
add_tablegen("CLANG_TABLEGEN", "clang-tblgen")
|
|
|
|
return cmake
|
|
|
|
|
|
def _build_things_in(env, target_dir, what):
|
|
cmd = env.get_make_command() + what
|
|
env.run_command(cmd, cwd=target_dir, check=True)
|
|
|
|
|
|
def _run_fresh_cmake(env, cmake, target_dir):
|
|
if not env.dry_run:
|
|
try:
|
|
shutil.rmtree(target_dir)
|
|
except FileNotFoundError:
|
|
pass
|
|
|
|
os.makedirs(target_dir, mode=0o755)
|
|
|
|
cmake_args = cmake.to_args()
|
|
env.run_command(cmake_args, cwd=target_dir, check=True, silent_unless_error=True)
|
|
|
|
|
|
def _build_stage1_clang(env):
|
|
target_dir = env.output_subdir("stage1")
|
|
cmake = _get_default_cmake_invocation(env)
|
|
_run_fresh_cmake(env, cmake, target_dir)
|
|
_build_things_in(env, target_dir, what=["clang", "llvm-profdata", "profile"])
|
|
return target_dir
|
|
|
|
|
|
def _generate_instrumented_clang_profile(env, stage1_dir, profile_dir, output_file):
|
|
llvm_profdata = os.path.join(stage1_dir, "bin", "llvm-profdata")
|
|
if env.dry_run:
|
|
profiles = [os.path.join(profile_dir, "*.profraw")]
|
|
else:
|
|
profiles = [
|
|
os.path.join(profile_dir, f)
|
|
for f in os.listdir(profile_dir)
|
|
if f.endswith(".profraw")
|
|
]
|
|
cmd = [llvm_profdata, "merge", "-output=" + output_file] + profiles
|
|
env.run_command(cmd, check=True)
|
|
|
|
|
|
def _build_instrumented_clang(env, stage1_dir):
|
|
assert os.path.isabs(stage1_dir)
|
|
|
|
target_dir = os.path.join(env.output_dir, "instrumented")
|
|
cmake = _get_cmake_invocation_for_bootstrap_from(env, stage1_dir)
|
|
cmake.add_new_flag("LLVM_BUILD_INSTRUMENTED", "IR")
|
|
|
|
# libcxx's configure step messes with our link order: we'll link
|
|
# libclang_rt.profile after libgcc, and the former requires atexit from the
|
|
# latter. So, configure checks fail.
|
|
#
|
|
# Since we don't need libcxx or compiler-rt anyway, just disable them.
|
|
cmake.add_new_flag("LLVM_BUILD_RUNTIME", "No")
|
|
|
|
_run_fresh_cmake(env, cmake, target_dir)
|
|
_build_things_in(env, target_dir, what=["clang", "lld"])
|
|
|
|
profiles_dir = os.path.join(target_dir, "profiles")
|
|
return target_dir, profiles_dir
|
|
|
|
|
|
def _build_optimized_clang(env, stage1_dir, profdata_file):
|
|
if not env.dry_run and not os.path.exists(profdata_file):
|
|
raise ValueError(
|
|
"Looks like the profdata file at %s doesn't exist" % profdata_file
|
|
)
|
|
|
|
target_dir = os.path.join(env.output_dir, "optimized")
|
|
cmake = _get_cmake_invocation_for_bootstrap_from(env, stage1_dir)
|
|
cmake.add_new_flag("LLVM_PROFDATA_FILE", os.path.abspath(profdata_file))
|
|
|
|
# We'll get complaints about hash mismatches in `main` in tools/etc. Ignore
|
|
# it.
|
|
cmake.add_cflags(["-Wno-backend-plugin"])
|
|
_run_fresh_cmake(env, cmake, target_dir)
|
|
_build_things_in(env, target_dir, what=["clang"])
|
|
return target_dir
|
|
|
|
|
|
Args = collections.namedtuple(
|
|
"Args",
|
|
[
|
|
"do_optimized_build",
|
|
"include_debug_info",
|
|
"profile_location",
|
|
"stage1_dir",
|
|
],
|
|
)
|
|
|
|
|
|
def _parse_args():
|
|
parser = argparse.ArgumentParser(
|
|
description="Builds LLVM and Clang with instrumentation, collects "
|
|
"instrumentation profiles for them, and (optionally) builds things "
|
|
"with these PGO profiles. By default, it's assumed that you're "
|
|
"running this from your LLVM root, and all build artifacts will be "
|
|
"saved to $PWD/out."
|
|
)
|
|
parser.add_argument(
|
|
"--cmake-extra-arg",
|
|
action="append",
|
|
default=[],
|
|
help="an extra arg to pass to all cmake invocations. Note that this "
|
|
"is interpreted as a -D argument, e.g. --cmake-extra-arg FOO=BAR will "
|
|
"be passed as -DFOO=BAR. This may be specified multiple times.",
|
|
)
|
|
parser.add_argument(
|
|
"--dry-run", action="store_true", help="print commands instead of running them"
|
|
)
|
|
parser.add_argument(
|
|
"--llvm-dir",
|
|
default=".",
|
|
help="directory containing an LLVM checkout (default: $PWD)",
|
|
)
|
|
parser.add_argument(
|
|
"--no-optimized-build",
|
|
action="store_true",
|
|
help="disable the final, PGO-optimized build",
|
|
)
|
|
parser.add_argument(
|
|
"--out-dir", help="directory to write artifacts to (default: $llvm_dir/out)"
|
|
)
|
|
parser.add_argument(
|
|
"--profile-output",
|
|
help="where to output the profile (default is $out/pgo_profile.prof)",
|
|
)
|
|
parser.add_argument(
|
|
"--stage1-dir",
|
|
help="instead of having an initial build of everything, use the given "
|
|
"directory. It is expected that this directory will have clang, "
|
|
"llvm-profdata, and the appropriate libclang_rt.profile already built",
|
|
)
|
|
parser.add_argument(
|
|
"--use-debug-info-in-benchmark",
|
|
action="store_true",
|
|
help="use a regular build instead of RelWithDebInfo in the benchmark. "
|
|
"This increases benchmark execution time and disk space requirements, "
|
|
"but gives more coverage over debuginfo bits in LLVM and clang.",
|
|
)
|
|
parser.add_argument(
|
|
"--use-make",
|
|
action="store_true",
|
|
default=shutil.which("ninja") is None,
|
|
help="use Makefiles instead of ninja",
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
llvm_dir = os.path.abspath(args.llvm_dir)
|
|
if args.out_dir is None:
|
|
output_dir = os.path.join(llvm_dir, "out")
|
|
else:
|
|
output_dir = os.path.abspath(args.out_dir)
|
|
|
|
extra_args = {
|
|
"CMAKE_BUILD_TYPE": "Release",
|
|
"LLVM_ENABLE_PROJECTS": "clang;compiler-rt;lld",
|
|
}
|
|
for arg in args.cmake_extra_arg:
|
|
if arg.startswith("-D"):
|
|
arg = arg[2:]
|
|
elif arg.startswith("-"):
|
|
raise ValueError(
|
|
"Unknown not- -D arg encountered; you may need "
|
|
"to tweak the source..."
|
|
)
|
|
split = arg.split("=", 1)
|
|
if len(split) == 1:
|
|
key, val = split[0], ""
|
|
else:
|
|
key, val = split
|
|
extra_args[key] = val
|
|
|
|
env = Env(
|
|
default_cmake_args=extra_args,
|
|
dry_run=args.dry_run,
|
|
llvm_dir=llvm_dir,
|
|
output_dir=output_dir,
|
|
use_make=args.use_make,
|
|
)
|
|
|
|
if args.profile_output is not None:
|
|
profile_location = args.profile_output
|
|
else:
|
|
profile_location = os.path.join(env.output_dir, "pgo_profile.prof")
|
|
|
|
result_args = Args(
|
|
do_optimized_build=not args.no_optimized_build,
|
|
include_debug_info=args.use_debug_info_in_benchmark,
|
|
profile_location=profile_location,
|
|
stage1_dir=args.stage1_dir,
|
|
)
|
|
|
|
return env, result_args
|
|
|
|
|
|
def _looks_like_llvm_dir(directory):
|
|
"""Arbitrary set of heuristics to determine if `directory` is an llvm dir.
|
|
|
|
Errs on the side of false-positives."""
|
|
|
|
contents = set(os.listdir(directory))
|
|
expected_contents = [
|
|
"CODE_OWNERS.TXT",
|
|
"cmake",
|
|
"docs",
|
|
"include",
|
|
"utils",
|
|
]
|
|
|
|
if not all(c in contents for c in expected_contents):
|
|
return False
|
|
|
|
try:
|
|
include_listing = os.listdir(os.path.join(directory, "include"))
|
|
except NotADirectoryError:
|
|
return False
|
|
|
|
return "llvm" in include_listing
|
|
|
|
|
|
def _die(*args, **kwargs):
|
|
kwargs["file"] = sys.stderr
|
|
print(*args, **kwargs)
|
|
sys.exit(1)
|
|
|
|
|
|
def _main():
|
|
env, args = _parse_args()
|
|
|
|
if not _looks_like_llvm_dir(env.llvm_dir):
|
|
_die("Looks like %s isn't an LLVM directory; please see --help" % env.llvm_dir)
|
|
if not env.has_llvm_subproject("clang"):
|
|
_die("Need a clang checkout at tools/clang")
|
|
if not env.has_llvm_subproject("compiler-rt"):
|
|
_die("Need a compiler-rt checkout at projects/compiler-rt")
|
|
|
|
def status(*args):
|
|
print(*args, file=sys.stderr)
|
|
|
|
if args.stage1_dir is None:
|
|
status("*** Building stage1 clang...")
|
|
stage1_out = _build_stage1_clang(env)
|
|
else:
|
|
stage1_out = args.stage1_dir
|
|
|
|
status("*** Building instrumented clang...")
|
|
instrumented_out, profile_dir = _build_instrumented_clang(env, stage1_out)
|
|
status("*** Running profdata benchmarks...")
|
|
_run_benchmark(env, instrumented_out, args.include_debug_info)
|
|
status("*** Generating profile...")
|
|
_generate_instrumented_clang_profile(
|
|
env, stage1_out, profile_dir, args.profile_location
|
|
)
|
|
|
|
print("Final profile:", args.profile_location)
|
|
if args.do_optimized_build:
|
|
status("*** Building PGO-optimized binaries...")
|
|
optimized_out = _build_optimized_clang(env, stage1_out, args.profile_location)
|
|
print("Final build directory:", optimized_out)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
_main()
|