mirror of
https://github.com/RPCS3/llvm.git
synced 2026-01-31 01:25:19 +01:00
Summary: It does not currently make sense to use WebAssembly features in some functions but not others, so this CL adds an IR pass that takes the union of all used feature sets and applies it to each function in the module. This allows us to prevent atomics from being lowered away if some function has opted in to using them. When atomics is not enabled anywhere, we detect whether there exists any atomic operations or thread local storage that would be stripped and disallow linking with objects that contain atomics if and only if atomics or tls are stripped. When atomics is enabled, mark it as used but do not require it of other objects in the link. These changes allow libraries that do not use atomics to be built once and linked into both single-threaded and multithreaded binaries. Reviewers: aheejin, sbc100, dschuff Subscribers: jgravelle-google, hiraditya, sunfish, jfb, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D59625 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@357226 91177308-0d34-0410-b5e6-96231b3b80d8
486 lines
18 KiB
C++
486 lines
18 KiB
C++
//===- WebAssemblyTargetMachine.cpp - Define TargetMachine for WebAssembly -==//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
///
|
|
/// \file
|
|
/// This file defines the WebAssembly-specific subclass of TargetMachine.
|
|
///
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "WebAssemblyTargetMachine.h"
|
|
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
|
|
#include "WebAssembly.h"
|
|
#include "WebAssemblyMachineFunctionInfo.h"
|
|
#include "WebAssemblyTargetObjectFile.h"
|
|
#include "WebAssemblyTargetTransformInfo.h"
|
|
#include "llvm/CodeGen/MIRParser/MIParser.h"
|
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
|
#include "llvm/CodeGen/Passes.h"
|
|
#include "llvm/CodeGen/RegAllocRegistry.h"
|
|
#include "llvm/CodeGen/TargetPassConfig.h"
|
|
#include "llvm/IR/Function.h"
|
|
#include "llvm/Support/TargetRegistry.h"
|
|
#include "llvm/Target/TargetOptions.h"
|
|
#include "llvm/Transforms/Scalar.h"
|
|
#include "llvm/Transforms/Scalar/LowerAtomic.h"
|
|
#include "llvm/Transforms/Utils.h"
|
|
using namespace llvm;
|
|
|
|
#define DEBUG_TYPE "wasm"
|
|
|
|
// Emscripten's asm.js-style exception handling
|
|
static cl::opt<bool> EnableEmException(
|
|
"enable-emscripten-cxx-exceptions",
|
|
cl::desc("WebAssembly Emscripten-style exception handling"),
|
|
cl::init(false));
|
|
|
|
// Emscripten's asm.js-style setjmp/longjmp handling
|
|
static cl::opt<bool> EnableEmSjLj(
|
|
"enable-emscripten-sjlj",
|
|
cl::desc("WebAssembly Emscripten-style setjmp/longjmp handling"),
|
|
cl::init(false));
|
|
|
|
extern "C" void LLVMInitializeWebAssemblyTarget() {
|
|
// Register the target.
|
|
RegisterTargetMachine<WebAssemblyTargetMachine> X(
|
|
getTheWebAssemblyTarget32());
|
|
RegisterTargetMachine<WebAssemblyTargetMachine> Y(
|
|
getTheWebAssemblyTarget64());
|
|
|
|
// Register backend passes
|
|
auto &PR = *PassRegistry::getPassRegistry();
|
|
initializeWebAssemblyAddMissingPrototypesPass(PR);
|
|
initializeWebAssemblyLowerEmscriptenEHSjLjPass(PR);
|
|
initializeLowerGlobalDtorsPass(PR);
|
|
initializeFixFunctionBitcastsPass(PR);
|
|
initializeOptimizeReturnedPass(PR);
|
|
initializeWebAssemblyArgumentMovePass(PR);
|
|
initializeWebAssemblySetP2AlignOperandsPass(PR);
|
|
initializeWebAssemblyReplacePhysRegsPass(PR);
|
|
initializeWebAssemblyPrepareForLiveIntervalsPass(PR);
|
|
initializeWebAssemblyOptimizeLiveIntervalsPass(PR);
|
|
initializeWebAssemblyMemIntrinsicResultsPass(PR);
|
|
initializeWebAssemblyRegStackifyPass(PR);
|
|
initializeWebAssemblyRegColoringPass(PR);
|
|
initializeWebAssemblyExplicitLocalsPass(PR);
|
|
initializeWebAssemblyFixIrreducibleControlFlowPass(PR);
|
|
initializeWebAssemblyLateEHPreparePass(PR);
|
|
initializeWebAssemblyExceptionInfoPass(PR);
|
|
initializeWebAssemblyCFGSortPass(PR);
|
|
initializeWebAssemblyCFGStackifyPass(PR);
|
|
initializeWebAssemblyLowerBrUnlessPass(PR);
|
|
initializeWebAssemblyRegNumberingPass(PR);
|
|
initializeWebAssemblyPeepholePass(PR);
|
|
initializeWebAssemblyCallIndirectFixupPass(PR);
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// WebAssembly Lowering public interface.
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
|
|
if (!RM.hasValue()) {
|
|
// Default to static relocation model. This should always be more optimial
|
|
// than PIC since the static linker can determine all global addresses and
|
|
// assume direct function calls.
|
|
return Reloc::Static;
|
|
}
|
|
return *RM;
|
|
}
|
|
|
|
/// Create an WebAssembly architecture model.
|
|
///
|
|
WebAssemblyTargetMachine::WebAssemblyTargetMachine(
|
|
const Target &T, const Triple &TT, StringRef CPU, StringRef FS,
|
|
const TargetOptions &Options, Optional<Reloc::Model> RM,
|
|
Optional<CodeModel::Model> CM, CodeGenOpt::Level OL, bool JIT)
|
|
: LLVMTargetMachine(T,
|
|
TT.isArch64Bit() ? "e-m:e-p:64:64-i64:64-n32:64-S128"
|
|
: "e-m:e-p:32:32-i64:64-n32:64-S128",
|
|
TT, CPU, FS, Options, getEffectiveRelocModel(RM),
|
|
getEffectiveCodeModel(CM, CodeModel::Large), OL),
|
|
TLOF(new WebAssemblyTargetObjectFile()) {
|
|
// WebAssembly type-checks instructions, but a noreturn function with a return
|
|
// type that doesn't match the context will cause a check failure. So we lower
|
|
// LLVM 'unreachable' to ISD::TRAP and then lower that to WebAssembly's
|
|
// 'unreachable' instructions which is meant for that case.
|
|
this->Options.TrapUnreachable = true;
|
|
|
|
// WebAssembly treats each function as an independent unit. Force
|
|
// -ffunction-sections, effectively, so that we can emit them independently.
|
|
this->Options.FunctionSections = true;
|
|
this->Options.DataSections = true;
|
|
this->Options.UniqueSectionNames = true;
|
|
|
|
initAsmInfo();
|
|
|
|
// Note that we don't use setRequiresStructuredCFG(true). It disables
|
|
// optimizations than we're ok with, and want, such as critical edge
|
|
// splitting and tail merging.
|
|
}
|
|
|
|
WebAssemblyTargetMachine::~WebAssemblyTargetMachine() = default; // anchor.
|
|
|
|
const WebAssemblySubtarget *
|
|
WebAssemblyTargetMachine::getSubtargetImpl(std::string CPU,
|
|
std::string FS) const {
|
|
auto &I = SubtargetMap[CPU + FS];
|
|
if (!I) {
|
|
I = llvm::make_unique<WebAssemblySubtarget>(TargetTriple, CPU, FS, *this);
|
|
}
|
|
return I.get();
|
|
}
|
|
|
|
const WebAssemblySubtarget *
|
|
WebAssemblyTargetMachine::getSubtargetImpl(const Function &F) const {
|
|
Attribute CPUAttr = F.getFnAttribute("target-cpu");
|
|
Attribute FSAttr = F.getFnAttribute("target-features");
|
|
|
|
std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
|
|
? CPUAttr.getValueAsString().str()
|
|
: TargetCPU;
|
|
std::string FS = !FSAttr.hasAttribute(Attribute::None)
|
|
? FSAttr.getValueAsString().str()
|
|
: TargetFS;
|
|
|
|
// This needs to be done before we create a new subtarget since any
|
|
// creation will depend on the TM and the code generation flags on the
|
|
// function that reside in TargetOptions.
|
|
resetTargetOptions(F);
|
|
|
|
return getSubtargetImpl(CPU, FS);
|
|
}
|
|
|
|
namespace {
|
|
|
|
class CoalesceFeaturesAndStripAtomics final : public ModulePass {
|
|
// Take the union of all features used in the module and use it for each
|
|
// function individually, since having multiple feature sets in one module
|
|
// currently does not make sense for WebAssembly. If atomics are not enabled,
|
|
// also strip atomic operations and thread local storage.
|
|
static char ID;
|
|
WebAssemblyTargetMachine *WasmTM;
|
|
|
|
public:
|
|
CoalesceFeaturesAndStripAtomics(WebAssemblyTargetMachine *WasmTM)
|
|
: ModulePass(ID), WasmTM(WasmTM) {}
|
|
|
|
bool runOnModule(Module &M) override {
|
|
FeatureBitset Features = coalesceFeatures(M);
|
|
|
|
std::string FeatureStr = getFeatureString(Features);
|
|
for (auto &F : M)
|
|
replaceFeatures(F, FeatureStr);
|
|
|
|
bool Stripped = false;
|
|
if (!Features[WebAssembly::FeatureAtomics]) {
|
|
Stripped |= stripAtomics(M);
|
|
Stripped |= stripThreadLocals(M);
|
|
}
|
|
|
|
recordFeatures(M, Features, Stripped);
|
|
|
|
// Conservatively assume we have made some change
|
|
return true;
|
|
}
|
|
|
|
private:
|
|
FeatureBitset coalesceFeatures(const Module &M) {
|
|
FeatureBitset Features =
|
|
WasmTM
|
|
->getSubtargetImpl(WasmTM->getTargetCPU(),
|
|
WasmTM->getTargetFeatureString())
|
|
->getFeatureBits();
|
|
for (auto &F : M)
|
|
Features |= WasmTM->getSubtargetImpl(F)->getFeatureBits();
|
|
return Features;
|
|
}
|
|
|
|
std::string getFeatureString(const FeatureBitset &Features) {
|
|
std::string Ret;
|
|
for (const SubtargetFeatureKV &KV : WebAssemblyFeatureKV) {
|
|
if (Features[KV.Value])
|
|
Ret += (StringRef("+") + KV.Key + ",").str();
|
|
}
|
|
return Ret;
|
|
}
|
|
|
|
void replaceFeatures(Function &F, const std::string &Features) {
|
|
F.removeFnAttr("target-features");
|
|
F.removeFnAttr("target-cpu");
|
|
F.addFnAttr("target-features", Features);
|
|
}
|
|
|
|
bool stripAtomics(Module &M) {
|
|
// Detect whether any atomics will be lowered, since there is no way to tell
|
|
// whether the LowerAtomic pass lowers e.g. stores.
|
|
bool Stripped = false;
|
|
for (auto &F : M) {
|
|
for (auto &B : F) {
|
|
for (auto &I : B) {
|
|
if (I.isAtomic()) {
|
|
Stripped = true;
|
|
goto done;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
done:
|
|
if (!Stripped)
|
|
return false;
|
|
|
|
LowerAtomicPass Lowerer;
|
|
FunctionAnalysisManager FAM;
|
|
for (auto &F : M)
|
|
Lowerer.run(F, FAM);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool stripThreadLocals(Module &M) {
|
|
bool Stripped = false;
|
|
for (auto &GV : M.globals()) {
|
|
if (GV.getThreadLocalMode() !=
|
|
GlobalValue::ThreadLocalMode::NotThreadLocal) {
|
|
Stripped = true;
|
|
GV.setThreadLocalMode(GlobalValue::ThreadLocalMode::NotThreadLocal);
|
|
}
|
|
}
|
|
return Stripped;
|
|
}
|
|
|
|
void recordFeatures(Module &M, const FeatureBitset &Features, bool Stripped) {
|
|
for (const SubtargetFeatureKV &KV : WebAssemblyFeatureKV) {
|
|
std::string MDKey = (StringRef("wasm-feature-") + KV.Key).str();
|
|
if (KV.Value == WebAssembly::FeatureAtomics && Stripped) {
|
|
// "atomics" is special: code compiled without atomics may have had its
|
|
// atomics lowered to nonatomic operations. In that case, atomics is
|
|
// disallowed to prevent unsafe linking with atomics-enabled objects.
|
|
assert(!Features[WebAssembly::FeatureAtomics]);
|
|
M.addModuleFlag(Module::ModFlagBehavior::Error, MDKey,
|
|
wasm::WASM_FEATURE_PREFIX_DISALLOWED);
|
|
} else if (Features[KV.Value]) {
|
|
// Otherwise features are marked Used or not mentioned
|
|
M.addModuleFlag(Module::ModFlagBehavior::Error, MDKey,
|
|
wasm::WASM_FEATURE_PREFIX_USED);
|
|
}
|
|
}
|
|
}
|
|
};
|
|
char CoalesceFeaturesAndStripAtomics::ID = 0;
|
|
|
|
/// WebAssembly Code Generator Pass Configuration Options.
|
|
class WebAssemblyPassConfig final : public TargetPassConfig {
|
|
public:
|
|
WebAssemblyPassConfig(WebAssemblyTargetMachine &TM, PassManagerBase &PM)
|
|
: TargetPassConfig(TM, PM) {}
|
|
|
|
WebAssemblyTargetMachine &getWebAssemblyTargetMachine() const {
|
|
return getTM<WebAssemblyTargetMachine>();
|
|
}
|
|
|
|
FunctionPass *createTargetRegisterAllocator(bool) override;
|
|
|
|
void addIRPasses() override;
|
|
bool addInstSelector() override;
|
|
void addPostRegAlloc() override;
|
|
bool addGCPasses() override { return false; }
|
|
void addPreEmitPass() override;
|
|
|
|
// No reg alloc
|
|
bool addRegAssignmentFast() override { return false; }
|
|
|
|
// No reg alloc
|
|
bool addRegAssignmentOptimized() override { return false; }
|
|
};
|
|
} // end anonymous namespace
|
|
|
|
TargetTransformInfo
|
|
WebAssemblyTargetMachine::getTargetTransformInfo(const Function &F) {
|
|
return TargetTransformInfo(WebAssemblyTTIImpl(this, F));
|
|
}
|
|
|
|
TargetPassConfig *
|
|
WebAssemblyTargetMachine::createPassConfig(PassManagerBase &PM) {
|
|
return new WebAssemblyPassConfig(*this, PM);
|
|
}
|
|
|
|
FunctionPass *WebAssemblyPassConfig::createTargetRegisterAllocator(bool) {
|
|
return nullptr; // No reg alloc
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// The following functions are called from lib/CodeGen/Passes.cpp to modify
|
|
// the CodeGen pass sequence.
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
void WebAssemblyPassConfig::addIRPasses() {
|
|
// Runs LowerAtomicPass if necessary
|
|
addPass(new CoalesceFeaturesAndStripAtomics(&getWebAssemblyTargetMachine()));
|
|
|
|
// This is a no-op if atomics are not used in the module
|
|
addPass(createAtomicExpandPass());
|
|
|
|
// Add signatures to prototype-less function declarations
|
|
addPass(createWebAssemblyAddMissingPrototypes());
|
|
|
|
// Lower .llvm.global_dtors into .llvm_global_ctors with __cxa_atexit calls.
|
|
addPass(createWebAssemblyLowerGlobalDtors());
|
|
|
|
// Fix function bitcasts, as WebAssembly requires caller and callee signatures
|
|
// to match.
|
|
addPass(createWebAssemblyFixFunctionBitcasts());
|
|
|
|
// Optimize "returned" function attributes.
|
|
if (getOptLevel() != CodeGenOpt::None)
|
|
addPass(createWebAssemblyOptimizeReturned());
|
|
|
|
// If exception handling is not enabled and setjmp/longjmp handling is
|
|
// enabled, we lower invokes into calls and delete unreachable landingpad
|
|
// blocks. Lowering invokes when there is no EH support is done in
|
|
// TargetPassConfig::addPassesToHandleExceptions, but this runs after this
|
|
// function and SjLj handling expects all invokes to be lowered before.
|
|
if (!EnableEmException &&
|
|
TM->Options.ExceptionModel == ExceptionHandling::None) {
|
|
addPass(createLowerInvokePass());
|
|
// The lower invoke pass may create unreachable code. Remove it in order not
|
|
// to process dead blocks in setjmp/longjmp handling.
|
|
addPass(createUnreachableBlockEliminationPass());
|
|
}
|
|
|
|
// Handle exceptions and setjmp/longjmp if enabled.
|
|
if (EnableEmException || EnableEmSjLj)
|
|
addPass(createWebAssemblyLowerEmscriptenEHSjLj(EnableEmException,
|
|
EnableEmSjLj));
|
|
|
|
TargetPassConfig::addIRPasses();
|
|
}
|
|
|
|
bool WebAssemblyPassConfig::addInstSelector() {
|
|
(void)TargetPassConfig::addInstSelector();
|
|
addPass(
|
|
createWebAssemblyISelDag(getWebAssemblyTargetMachine(), getOptLevel()));
|
|
// Run the argument-move pass immediately after the ScheduleDAG scheduler
|
|
// so that we can fix up the ARGUMENT instructions before anything else
|
|
// sees them in the wrong place.
|
|
addPass(createWebAssemblyArgumentMove());
|
|
// Set the p2align operands. This information is present during ISel, however
|
|
// it's inconvenient to collect. Collect it now, and update the immediate
|
|
// operands.
|
|
addPass(createWebAssemblySetP2AlignOperands());
|
|
return false;
|
|
}
|
|
|
|
void WebAssemblyPassConfig::addPostRegAlloc() {
|
|
// TODO: The following CodeGen passes don't currently support code containing
|
|
// virtual registers. Consider removing their restrictions and re-enabling
|
|
// them.
|
|
|
|
// These functions all require the NoVRegs property.
|
|
disablePass(&MachineCopyPropagationID);
|
|
disablePass(&PostRAMachineSinkingID);
|
|
disablePass(&PostRASchedulerID);
|
|
disablePass(&FuncletLayoutID);
|
|
disablePass(&StackMapLivenessID);
|
|
disablePass(&LiveDebugValuesID);
|
|
disablePass(&PatchableFunctionID);
|
|
disablePass(&ShrinkWrapID);
|
|
|
|
// This pass hurts code size for wasm because it can generate irreducible
|
|
// control flow.
|
|
disablePass(&MachineBlockPlacementID);
|
|
|
|
TargetPassConfig::addPostRegAlloc();
|
|
}
|
|
|
|
void WebAssemblyPassConfig::addPreEmitPass() {
|
|
TargetPassConfig::addPreEmitPass();
|
|
|
|
// Rewrite pseudo call_indirect instructions as real instructions.
|
|
// This needs to run before register stackification, because we change the
|
|
// order of the arguments.
|
|
addPass(createWebAssemblyCallIndirectFixup());
|
|
|
|
// Eliminate multiple-entry loops.
|
|
addPass(createWebAssemblyFixIrreducibleControlFlow());
|
|
|
|
// Do various transformations for exception handling.
|
|
// Every CFG-changing optimizations should come before this.
|
|
addPass(createWebAssemblyLateEHPrepare());
|
|
|
|
// Now that we have a prologue and epilogue and all frame indices are
|
|
// rewritten, eliminate SP and FP. This allows them to be stackified,
|
|
// colored, and numbered with the rest of the registers.
|
|
addPass(createWebAssemblyReplacePhysRegs());
|
|
|
|
// Preparations and optimizations related to register stackification.
|
|
if (getOptLevel() != CodeGenOpt::None) {
|
|
// LiveIntervals isn't commonly run this late. Re-establish preconditions.
|
|
addPass(createWebAssemblyPrepareForLiveIntervals());
|
|
|
|
// Depend on LiveIntervals and perform some optimizations on it.
|
|
addPass(createWebAssemblyOptimizeLiveIntervals());
|
|
|
|
// Prepare memory intrinsic calls for register stackifying.
|
|
addPass(createWebAssemblyMemIntrinsicResults());
|
|
|
|
// Mark registers as representing wasm's value stack. This is a key
|
|
// code-compression technique in WebAssembly. We run this pass (and
|
|
// MemIntrinsicResults above) very late, so that it sees as much code as
|
|
// possible, including code emitted by PEI and expanded by late tail
|
|
// duplication.
|
|
addPass(createWebAssemblyRegStackify());
|
|
|
|
// Run the register coloring pass to reduce the total number of registers.
|
|
// This runs after stackification so that it doesn't consider registers
|
|
// that become stackified.
|
|
addPass(createWebAssemblyRegColoring());
|
|
}
|
|
|
|
// Insert explicit local.get and local.set operators.
|
|
addPass(createWebAssemblyExplicitLocals());
|
|
|
|
// Sort the blocks of the CFG into topological order, a prerequisite for
|
|
// BLOCK and LOOP markers.
|
|
addPass(createWebAssemblyCFGSort());
|
|
|
|
// Insert BLOCK and LOOP markers.
|
|
addPass(createWebAssemblyCFGStackify());
|
|
|
|
// Lower br_unless into br_if.
|
|
addPass(createWebAssemblyLowerBrUnless());
|
|
|
|
// Perform the very last peephole optimizations on the code.
|
|
if (getOptLevel() != CodeGenOpt::None)
|
|
addPass(createWebAssemblyPeephole());
|
|
|
|
// Create a mapping from LLVM CodeGen virtual registers to wasm registers.
|
|
addPass(createWebAssemblyRegNumbering());
|
|
}
|
|
|
|
yaml::MachineFunctionInfo *
|
|
WebAssemblyTargetMachine::createDefaultFuncInfoYAML() const {
|
|
return new yaml::WebAssemblyFunctionInfo();
|
|
}
|
|
|
|
yaml::MachineFunctionInfo *WebAssemblyTargetMachine::convertFuncInfoToYAML(
|
|
const MachineFunction &MF) const {
|
|
const auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>();
|
|
return new yaml::WebAssemblyFunctionInfo(*MFI);
|
|
}
|
|
|
|
bool WebAssemblyTargetMachine::parseMachineFunctionInfo(
|
|
const yaml::MachineFunctionInfo &MFI, PerFunctionMIParsingState &PFS,
|
|
SMDiagnostic &Error, SMRange &SourceRange) const {
|
|
const auto &YamlMFI =
|
|
reinterpret_cast<const yaml::WebAssemblyFunctionInfo &>(MFI);
|
|
MachineFunction &MF = PFS.MF;
|
|
MF.getInfo<WebAssemblyFunctionInfo>()->initializeBaseYamlFields(YamlMFI);
|
|
return false;
|
|
}
|