mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-23 12:51:06 +00:00
0f9fb873ff
Instead, position MOZ_OPTIMIZE_FLAGS to -O3 under PGO on Linux. This creates a dependency on PGO from toolchain.configure, break it by splitting moz_optimize in two functions, one for the optimization status and one for the optimization flags. Differential Revision: https://phabricator.services.mozilla.com/D214843
447 lines
14 KiB
Python
447 lines
14 KiB
Python
# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
|
|
# vim: set filetype=python:
|
|
# This Source Code Form is subject to the terms of the Mozilla Public
|
|
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
|
# PGO
|
|
# ==============================================================
|
|
llvm_profdata = check_prog(
|
|
"LLVM_PROFDATA", ["llvm-profdata"], allow_missing=True, paths=clang_search_path
|
|
)
|
|
|
|
|
|
@depends_if(llvm_profdata)
|
|
@checking("whether llvm-profdata supports 'order' subcommand")
|
|
def llvm_profdata_order(profdata):
|
|
retcode, _, _ = get_cmd_output(profdata, "order", "--help")
|
|
return retcode == 0
|
|
|
|
|
|
option(
|
|
"--enable-profile-generate",
|
|
env="MOZ_PROFILE_GENERATE",
|
|
nargs="?",
|
|
choices=("cross",),
|
|
help="Build a PGO instrumented binary",
|
|
)
|
|
|
|
enable_profile_generate = depends_if("--enable-profile-generate")(lambda _: True)
|
|
|
|
|
|
imply_option("MOZ_PGO", enable_profile_generate)
|
|
|
|
set_config("MOZ_PROFILE_GENERATE", enable_profile_generate)
|
|
|
|
set_define("MOZ_PROFILE_GENERATE", enable_profile_generate)
|
|
|
|
option(
|
|
"--enable-profile-use",
|
|
env="MOZ_PROFILE_USE",
|
|
nargs="?",
|
|
choices=("cross",),
|
|
help="Use a generated profile during the build",
|
|
)
|
|
enable_profile_use = depends_if("--enable-profile-use")(lambda _: True)
|
|
|
|
imply_option("MOZ_PGO", enable_profile_use)
|
|
set_config("MOZ_PROFILE_USE", enable_profile_use)
|
|
|
|
|
|
option(
|
|
"--with-pgo-profile-path",
|
|
help="Path to the directory with unmerged profile data to use during the build"
|
|
", or to a merged profdata file",
|
|
nargs=1,
|
|
)
|
|
|
|
|
|
@depends(
|
|
"--with-pgo-profile-path",
|
|
"--enable-profile-use",
|
|
llvm_profdata,
|
|
build_environment,
|
|
)
|
|
@imports("os")
|
|
def pgo_profile_path(path, pgo_use, profdata, build_env):
|
|
topobjdir = build_env.topobjdir
|
|
if topobjdir.endswith("/js/src"):
|
|
topobjdir = topobjdir[:-7]
|
|
|
|
if not path:
|
|
return os.path.join(topobjdir, "instrumented", "merged.profdata")
|
|
if path and not pgo_use:
|
|
die("Pass --enable-profile-use to use --with-pgo-profile-path.")
|
|
if path and not profdata:
|
|
die("LLVM_PROFDATA must be set to process the pgo profile.")
|
|
if not os.path.isfile(path[0]):
|
|
die("Argument to --with-pgo-profile-path must be a file.")
|
|
if not os.path.isabs(path[0]):
|
|
die("Argument to --with-pgo-profile-path must be an absolute path.")
|
|
return path[0]
|
|
|
|
|
|
set_config("PGO_PROFILE_PATH", pgo_profile_path)
|
|
|
|
|
|
@depends(
|
|
"--enable-profile-use",
|
|
pgo_profile_path,
|
|
llvm_profdata,
|
|
llvm_profdata_order,
|
|
build_environment,
|
|
)
|
|
def orderfile_path(profile_use, path, profdata, profdata_order, build_env):
|
|
if not profile_use:
|
|
return None
|
|
|
|
if not profdata_order:
|
|
return None
|
|
|
|
topobjdir = build_env.topobjdir
|
|
|
|
orderfile = os.path.join(topobjdir, "orderfile.txt")
|
|
check_cmd_output(profdata, "order", path, "-o", orderfile)
|
|
return orderfile
|
|
|
|
|
|
pgo_temporal = c_compiler.try_compile(
|
|
flags=["-fprofile-generate", "-mllvm", "-pgo-temporal-instrumentation"],
|
|
check_msg="whether the C compiler supports temporal instrumentation",
|
|
when="--enable-profile-generate",
|
|
)
|
|
|
|
|
|
@depends(
|
|
c_compiler,
|
|
select_linker,
|
|
target,
|
|
pgo_profile_path,
|
|
orderfile_path,
|
|
target_is_windows,
|
|
pgo_temporal,
|
|
)
|
|
@imports("multiprocessing")
|
|
def pgo_flags(
|
|
compiler, linker, target, profdata, orderfile, target_is_windows, pgo_temporal
|
|
):
|
|
if compiler.type == "gcc":
|
|
return namespace(
|
|
gen_cflags=["-fprofile-generate"],
|
|
gen_ldflags=["-fprofile-generate"],
|
|
use_cflags=["-fprofile-use", "-fprofile-correction", "-Wcoverage-mismatch"],
|
|
use_ldflags=["-fprofile-use"],
|
|
)
|
|
|
|
if compiler.type in ("clang-cl", "clang"):
|
|
prefix = ""
|
|
if compiler.type == "clang-cl":
|
|
prefix = "/clang:"
|
|
gen_ldflags = None
|
|
else:
|
|
gen_ldflags = ["-fprofile-generate"]
|
|
|
|
use_ldflags = []
|
|
if orderfile:
|
|
if compiler.type == "clang-cl":
|
|
use_ldflags += [
|
|
"-ORDER:@" + orderfile,
|
|
"/ignore:4037", # Disable warn missing order symbol
|
|
]
|
|
elif linker.KIND == "ld64" or (linker.KIND == "lld" and target.os == "OSX"):
|
|
use_ldflags += ["-Wl,-order_file", orderfile]
|
|
elif linker.KIND == "lld":
|
|
use_ldflags += [
|
|
"-Wl,--symbol-ordering-file",
|
|
orderfile,
|
|
"-Wl,--no-warn-symbol-ordering",
|
|
]
|
|
|
|
if use_ldflags:
|
|
log.info("Activating PGO-based orderfile")
|
|
|
|
gen_cflags = [prefix + "-fprofile-generate"]
|
|
|
|
if pgo_temporal:
|
|
gen_cflags += ["-mllvm", "-pgo-temporal-instrumentation"]
|
|
|
|
if target_is_windows:
|
|
# native llvm-profdata.exe on Windows can't read profile data
|
|
# if name compression is enabled (which cross-compiling enables
|
|
# by default)
|
|
gen_cflags += ["-mllvm", "-enable-name-compression=false"]
|
|
|
|
return namespace(
|
|
gen_cflags=gen_cflags,
|
|
gen_ldflags=gen_ldflags,
|
|
use_cflags=[
|
|
prefix + "-fprofile-use=%s" % profdata,
|
|
# Some error messages about mismatched profile data
|
|
# come in via -Wbackend-plugin, so disable those too.
|
|
"-Wno-error=backend-plugin",
|
|
],
|
|
use_ldflags=use_ldflags,
|
|
)
|
|
|
|
|
|
set_config("PROFILE_GEN_CFLAGS", pgo_flags.gen_cflags)
|
|
set_config("PROFILE_GEN_LDFLAGS", pgo_flags.gen_ldflags)
|
|
set_config("PROFILE_USE_CFLAGS", pgo_flags.use_cflags)
|
|
set_config("PROFILE_USE_LDFLAGS", pgo_flags.use_ldflags)
|
|
|
|
option(
|
|
"--with-pgo-jarlog",
|
|
help="Use the provided jarlog file when packaging during a profile-use " "build",
|
|
nargs=1,
|
|
)
|
|
|
|
set_config("PGO_JARLOG_PATH", depends_if("--with-pgo-jarlog")(lambda p: p))
|
|
|
|
|
|
@depends("MOZ_PGO", "--enable-profile-use", "--enable-profile-generate", c_compiler)
|
|
def moz_pgo_rust(pgo, profile_use, profile_generate, c_compiler):
|
|
if not pgo:
|
|
return
|
|
|
|
# Enabling PGO through MOZ_PGO only and not --enable* flags.
|
|
if not profile_use and not profile_generate:
|
|
return
|
|
|
|
if profile_use and profile_generate:
|
|
die("Cannot build with --enable-profile-use and --enable-profile-generate.")
|
|
|
|
want_cross = (len(profile_use) and profile_use[0] == "cross") or (
|
|
len(profile_generate) and profile_generate[0] == "cross"
|
|
)
|
|
|
|
if not want_cross:
|
|
return
|
|
|
|
if c_compiler.type == "gcc":
|
|
die("Cannot use cross-language PGO with GCC.")
|
|
|
|
return True
|
|
|
|
|
|
set_config("MOZ_PGO_RUST", moz_pgo_rust)
|
|
|
|
# LTO
|
|
# ==============================================================
|
|
|
|
option(
|
|
"--enable-lto",
|
|
env="MOZ_LTO",
|
|
nargs="*",
|
|
choices=("full", "thin", "cross"),
|
|
help="Enable LTO",
|
|
)
|
|
|
|
option(
|
|
env="MOZ_LD64_KNOWN_GOOD",
|
|
nargs=1,
|
|
help="Indicate that ld64 is free of symbol aliasing bugs.",
|
|
)
|
|
|
|
imply_option("MOZ_LD64_KNOWN_GOOD", depends_if("MOZ_AUTOMATION")(lambda _: True))
|
|
|
|
|
|
@depends(
|
|
"--enable-lto",
|
|
c_compiler,
|
|
select_linker,
|
|
"MOZ_LD64_KNOWN_GOOD",
|
|
target,
|
|
"--enable-profile-generate",
|
|
pass_manager.enabled,
|
|
"--enable-profile-use",
|
|
"MOZ_AUTOMATION",
|
|
)
|
|
@imports("multiprocessing")
|
|
def lto(
|
|
values,
|
|
c_compiler,
|
|
select_linker,
|
|
ld64_known_good,
|
|
target,
|
|
instrumented_build,
|
|
pass_manager,
|
|
pgo_build,
|
|
moz_automation,
|
|
):
|
|
cflags = []
|
|
ldflags = []
|
|
enabled = None
|
|
rust_lto = False
|
|
|
|
if not values:
|
|
return
|
|
|
|
# Sanitize LTO modes.
|
|
if "full" in values and "thin" in values:
|
|
die("incompatible --enable-lto choices 'full' and 'thin'")
|
|
|
|
# If a value was given to --enable-lto, use that. Otherwise, make the lto
|
|
# mode explicit, using full with gcc, and full or thin with clang depending
|
|
# on the performance benefit.
|
|
# Defaulting to full LTO is costly in terms of compilation time, so we only
|
|
# default to it if MOZ_AUTOMATION and PGO are on, and for some platforms.
|
|
# Based on speedometer3 scores, full lto + pgo is beneficial for Linux and
|
|
# Windows for x86_64 targets.
|
|
if values == () or values == ("cross",):
|
|
if c_compiler.type == "gcc":
|
|
values += ("full",)
|
|
elif (
|
|
pgo_build
|
|
and moz_automation
|
|
and target.os in ("WINNT", "GNU")
|
|
and target.cpu == "x86_64"
|
|
):
|
|
values += ("full",)
|
|
else:
|
|
values += ("thin",)
|
|
|
|
if instrumented_build:
|
|
log.warning("Disabling LTO because --enable-profile-generate is specified")
|
|
return
|
|
|
|
if c_compiler.type == "gcc":
|
|
if "cross" in values:
|
|
die("Cross-language LTO is not supported with GCC.")
|
|
if "thin" in values:
|
|
die(
|
|
"gcc does not support thin LTO. Use `--enable-lto` "
|
|
"to enable full LTO for gcc."
|
|
)
|
|
|
|
if (
|
|
target.kernel == "Darwin"
|
|
and "cross" in values
|
|
and select_linker.KIND == "ld64"
|
|
and not ld64_known_good
|
|
):
|
|
die(
|
|
"The Mac linker is known to have a bug that affects cross-language "
|
|
"LTO. If you know that your linker is free from this bug, please "
|
|
"set the environment variable `MOZ_LD64_KNOWN_GOOD=1` and re-run "
|
|
"configure."
|
|
)
|
|
|
|
if c_compiler.type == "clang":
|
|
if "full" in values:
|
|
cflags.append("-flto")
|
|
ldflags.append("-flto")
|
|
else:
|
|
cflags.append("-flto=thin")
|
|
ldflags.append("-flto=thin")
|
|
|
|
if target.os == "Android" and "cross" in values:
|
|
# Work around https://github.com/rust-lang/rust/issues/90088
|
|
# by enabling the highest level of SSE the rust targets default
|
|
# to.
|
|
# https://github.com/rust-lang/rust/blob/bdfcb88e8b6203ccb46a2fb6649979b773efc8ac/compiler/rustc_target/src/spec/i686_linux_android.rs#L13
|
|
# https://github.com/rust-lang/rust/blob/8d1083e319841624f64400e1524805a40d725439/compiler/rustc_target/src/spec/x86_64_linux_android.rs#L7
|
|
if target.cpu == "x86":
|
|
ldflags.append("-Wl,-plugin-opt=-mattr=+ssse3")
|
|
elif target.cpu == "x86_64":
|
|
ldflags.append("-Wl,-plugin-opt=-mattr=+sse4.2")
|
|
elif c_compiler.type == "clang-cl":
|
|
if "full" in values:
|
|
cflags.append("-flto")
|
|
else:
|
|
cflags.append("-flto=thin")
|
|
# With clang-cl, -flto can only be used with -c or -fuse-ld=lld.
|
|
# AC_TRY_LINKs during configure don't have -c, so pass -fuse-ld=lld.
|
|
cflags.append("-fuse-ld=lld")
|
|
|
|
# Explicitly set the CPU to optimize for so the linker doesn't
|
|
# choose a poor default. Rust compilation by default uses the
|
|
# pentium4 CPU on x86:
|
|
#
|
|
# https://github.com/rust-lang/rust/blob/049a49b91151a88c95fa0d62a53fd0a0ac2c3af9/compiler/rustc_target/src/spec/i686_pc_windows_msvc.rs#L5
|
|
#
|
|
# which specifically supports "long" (multi-byte) nops. See
|
|
# https://bugzilla.mozilla.org/show_bug.cgi?id=1568450#c8 for details.
|
|
#
|
|
# The pentium4 seems like kind of a weird CPU to optimize for, but
|
|
# it seems to have worked out OK thus far. LLVM does not seem to
|
|
# specifically schedule code for the pentium4's deep pipeline, so
|
|
# that probably contributes to it being an OK default for our
|
|
# purposes.
|
|
if target.cpu == "x86":
|
|
ldflags.append("-mllvm:-mcpu=pentium4")
|
|
# This is also the CPU that Rust uses. The LLVM source code
|
|
# recommends this as the "generic 64-bit specific x86 processor model":
|
|
#
|
|
# https://github.com/llvm/llvm-project/blob/e7694f34ab6a12b8bb480cbfcb396d0a64fe965f/llvm/lib/Target/X86/X86.td#L1165-L1187
|
|
if target.cpu == "x86_64":
|
|
ldflags.append("-mllvm:-mcpu=x86-64")
|
|
# We do not need special flags for arm64. Hooray for fixed-length
|
|
# instruction sets.
|
|
else:
|
|
num_cores = multiprocessing.cpu_count()
|
|
cflags.append("-flto")
|
|
cflags.append("-flifetime-dse=1")
|
|
|
|
ldflags.append("-flto=%s" % num_cores)
|
|
ldflags.append("-flifetime-dse=1")
|
|
|
|
# Tell LTO not to inline functions above a certain size, to mitigate
|
|
# binary size growth while still getting good performance.
|
|
# (For hot functions, PGO will put a multiplier on this limit.)
|
|
if target.os == "WINNT":
|
|
ldflags.append("-mllvm:-import-instr-limit=10")
|
|
elif target.kernel == "Darwin":
|
|
ldflags.append("-Wl,-mllvm,-import-instr-limit=10")
|
|
elif c_compiler.type == "clang":
|
|
ldflags.append("-Wl,-plugin-opt=-import-instr-limit=10")
|
|
|
|
# If we're using the new pass manager, we can also enable the new PM
|
|
# during LTO. Further we can use the resulting size savings to increase
|
|
# the import limit in hot functions.
|
|
if pass_manager:
|
|
if target.os == "WINNT":
|
|
if c_compiler.version >= "12.0.0" and c_compiler.version < "13.0.0":
|
|
ldflags.append("-opt:ltonewpassmanager")
|
|
if c_compiler.version >= "12.0.0":
|
|
ldflags.append("-mllvm:-import-hot-multiplier=30")
|
|
elif target.kernel == "Darwin":
|
|
ldflags.append("-Wl,-mllvm,-import-hot-multiplier=30")
|
|
else:
|
|
if c_compiler.version < "13.0.0":
|
|
ldflags.append("-Wl,-plugin-opt=new-pass-manager")
|
|
ldflags.append("-Wl,-plugin-opt=-import-hot-multiplier=30")
|
|
|
|
# Pick Rust LTO mode in case of cross lTO. Thin is the default.
|
|
if "cross" in values:
|
|
rust_lto = "full" if "full" in values else "thin"
|
|
else:
|
|
rust_lto = ""
|
|
|
|
return namespace(
|
|
enabled=True,
|
|
cflags=cflags,
|
|
ldflags=ldflags,
|
|
rust_lto=rust_lto,
|
|
)
|
|
|
|
|
|
@depends(
|
|
dso_flags,
|
|
when=building_with_gnu_compatible_cc
|
|
& gcc_use_gnu_ld
|
|
& ~developer_options
|
|
& ~enable_profile_generate,
|
|
)
|
|
def remove_dead_symbols(dso_flags):
|
|
dso_flags.ldopts.append("-Wl,--gc-sections")
|
|
|
|
|
|
add_old_configure_assignment("MOZ_LTO", lto.enabled)
|
|
set_config("MOZ_LTO", lto.enabled)
|
|
set_define("MOZ_LTO", lto.enabled)
|
|
set_config("MOZ_LTO_CFLAGS", lto.cflags)
|
|
set_config("MOZ_LTO_LDFLAGS", lto.ldflags)
|
|
set_config("MOZ_LTO_RUST_CROSS", lto.rust_lto)
|
|
add_old_configure_assignment("MOZ_LTO_CFLAGS", lto.cflags)
|
|
add_old_configure_assignment("MOZ_LTO_LDFLAGS", lto.ldflags)
|