[WebAssembly] Support for register stackifying with load and store instructions.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254076 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Dan Gohman 2015-11-25 16:55:01 +00:00
parent cb9cb80629
commit dcdd1c138c
18 changed files with 439 additions and 77 deletions

View File

@ -20,11 +20,14 @@ add_llvm_target(WebAssemblyCodeGen
WebAssemblyInstrInfo.cpp
WebAssemblyMachineFunctionInfo.cpp
WebAssemblyMCInstLower.cpp
WebAssemblyOptimizeReturned.cpp
WebAssemblyPeephole.cpp
WebAssemblyRegisterInfo.cpp
WebAssemblyRegColoring.cpp
WebAssemblyRegNumbering.cpp
WebAssemblyRegStackify.cpp
WebAssemblySelectionDAGInfo.cpp
WebAssemblyStoreResults.cpp
WebAssemblySubtarget.cpp
WebAssemblyTargetMachine.cpp
WebAssemblyTargetTransformInfo.cpp

View File

@ -22,7 +22,7 @@ Interesting work that remains to be done:
//===---------------------------------------------------------------------===//
set_local and store instructions have a return value. We should (a) model this,
set_local instructions have a return value. We should (a) model this,
and (b) write optimizations which take advantage of it. Keep in mind that
many set_local instructions are implicit!

View File

@ -23,13 +23,17 @@ namespace llvm {
class WebAssemblyTargetMachine;
class FunctionPass;
FunctionPass *createWebAssemblyOptimizeReturned();
FunctionPass *createWebAssemblyISelDag(WebAssemblyTargetMachine &TM,
CodeGenOpt::Level OptLevel);
FunctionPass *createWebAssemblyStoreResults();
FunctionPass *createWebAssemblyRegStackify();
FunctionPass *createWebAssemblyRegColoring();
FunctionPass *createWebAssemblyCFGStackify();
FunctionPass *createWebAssemblyRegNumbering();
FunctionPass *createWebAssemblyPeephole();
FunctionPass *createWebAssemblyRelooper();

View File

@ -74,6 +74,7 @@ private:
// Custom lowering hooks.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
};

View File

@ -0,0 +1,73 @@
//===-- WebAssemblyOptimizeReturned.cpp - Optimize "returned" attributes --===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief Optimize calls with "returned" attributes for WebAssembly.
///
//===----------------------------------------------------------------------===//
#include "WebAssembly.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
#define DEBUG_TYPE "wasm-optimize-returned"
namespace {
class OptimizeReturned final : public FunctionPass,
public InstVisitor<OptimizeReturned> {
const char *getPassName() const override {
return "WebAssembly Optimize Returned";
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
FunctionPass::getAnalysisUsage(AU);
}
bool runOnFunction(Function &F) override;
DominatorTree *DT;
public:
static char ID;
OptimizeReturned() : FunctionPass(ID), DT(nullptr) {}
void visitCallSite(CallSite CS);
};
} // End anonymous namespace
char OptimizeReturned::ID = 0;
FunctionPass *llvm::createWebAssemblyOptimizeReturned() {
return new OptimizeReturned();
}
void OptimizeReturned::visitCallSite(CallSite CS) {
for (unsigned i = 0, e = CS.getNumArgOperands(); i < e; ++i)
if (CS.paramHasAttr(1 + i, Attribute::Returned)) {
Instruction *Inst = CS.getInstruction();
Value *Arg = CS.getArgOperand(i);
// Like replaceDominatedUsesWith but using Instruction/Use dominance.
for (auto UI = Arg->use_begin(), UE = Arg->use_end(); UI != UE;) {
Use &U = *UI++;
if (DT->dominates(Inst, U))
U.set(Inst);
}
}
}
bool OptimizeReturned::runOnFunction(Function &F) {
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
visit(F);
return true;
}

View File

@ -0,0 +1,77 @@
//===-- WebAssemblyPeephole.cpp - WebAssembly Peephole Optimiztions -------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief Late peephole optimizations for WebAssembly.
///
//===----------------------------------------------------------------------===//
#include "WebAssembly.h"
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
using namespace llvm;
#define DEBUG_TYPE "wasm-peephole"
namespace {
class WebAssemblyPeephole final : public MachineFunctionPass {
const char *getPassName() const override {
return "WebAssembly late peephole optimizer";
}
bool runOnMachineFunction(MachineFunction &MF) override;
public:
static char ID;
WebAssemblyPeephole() : MachineFunctionPass(ID) {}
};
} // end anonymous namespace
char WebAssemblyPeephole::ID = 0;
FunctionPass *llvm::createWebAssemblyPeephole() {
return new WebAssemblyPeephole();
}
bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
MachineRegisterInfo &MRI = MF.getRegInfo();
WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
for (auto &MBB : MF)
for (auto &MI : MBB)
switch (MI.getOpcode()) {
default:
break;
case WebAssembly::STORE8_I32:
case WebAssembly::STORE16_I32:
case WebAssembly::STORE8_I64:
case WebAssembly::STORE16_I64:
case WebAssembly::STORE32_I64:
case WebAssembly::STORE_F32:
case WebAssembly::STORE_F64:
case WebAssembly::STORE_I32:
case WebAssembly::STORE_I64: {
// Store instructions return their value operand. If we ended up using
// the same register for both, replace it with a dead def so that it
// can use $discard instead.
MachineOperand &MO = MI.getOperand(0);
unsigned OldReg = MO.getReg();
if (OldReg == MI.getOperand(2).getReg()) {
unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
MO.setReg(NewReg);
MO.setIsDead();
MFI.stackifyVReg(NewReg);
}
}
}
return Changed;
}

View File

@ -23,6 +23,7 @@
#include "WebAssembly.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_*
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
@ -40,6 +41,7 @@ class WebAssemblyRegStackify final : public MachineFunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<AAResultsWrapperPass>();
AU.addPreserved<MachineBlockFrequencyInfo>();
AU.addPreservedID(MachineDominatorsID);
MachineFunctionPass::getAnalysisUsage(AU);
@ -70,6 +72,24 @@ static void ImposeStackOrdering(MachineInstr *MI) {
/*isImp=*/true));
}
// Test whether it's safe to move Def to just before Insert. Note that this
// doesn't account for physical register dependencies, because WebAssembly
// doesn't have any (other than special ones like EXPR_STACK).
// TODO: Compute memory dependencies in a way that doesn't require always
// walking the block.
// TODO: Compute memory dependencies in a way that uses AliasAnalysis to be
// more precise.
static bool IsSafeToMove(const MachineInstr *Def, const MachineInstr *Insert,
AliasAnalysis &AA) {
bool SawStore = false, SawSideEffects = false;
MachineBasicBlock::const_iterator D(Def), I(Insert);
for (--I; I != D; --I)
SawSideEffects |= I->isSafeToMove(&AA, SawStore);
return !(SawStore && Def->mayLoad() && !Def->isInvariantLoad(&AA)) &&
!(SawSideEffects && !Def->isSafeToMove(&AA, SawStore));
}
bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "********** Register Stackifying **********\n"
"********** Function: "
@ -78,6 +98,7 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
MachineRegisterInfo &MRI = MF.getRegInfo();
WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
// Walk the instructions from the bottom up. Currently we don't look past
// block boundaries, and the blocks aren't ordered so the block visitation
@ -90,12 +111,17 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
if (Insert->getOpcode() == TargetOpcode::PHI)
break;
// Don't nest anything inside an inline asm, because we don't have
// constraints for $push inputs.
if (Insert->getOpcode() == TargetOpcode::INLINEASM)
break;
// Iterate through the inputs in reverse order, since we'll be pulling
// operands off the stack in FIFO order.
bool AnyStackified = false;
for (MachineOperand &Op : reverse(Insert->uses())) {
// We're only interested in explicit virtual register operands.
if (!Op.isReg() || Op.isImplicit())
if (!Op.isReg() || Op.isImplicit() || !Op.isUse())
continue;
unsigned Reg = Op.getReg();
@ -112,6 +138,15 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
if (Def->getOpcode() == TargetOpcode::IMPLICIT_DEF)
continue;
// Don't nest an INLINE_ASM def into anything, because we don't have
// constraints for $pop outputs.
if (Def->getOpcode() == TargetOpcode::INLINEASM)
continue;
// Don't nest PHIs inside of anything.
if (Def->getOpcode() == TargetOpcode::PHI)
continue;
// Argument instructions represent live-in registers and not real
// instructions.
if (Def->getOpcode() == WebAssembly::ARGUMENT_I32 ||
@ -124,8 +159,7 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
// they be trivially clonable.
// TODO: Eventually we'll relax this, to take advantage of set_local
// returning its result.
bool OneUse = MRI.hasOneUse(Reg);
if (!OneUse && !Def->isMoveImmediate())
if (!MRI.hasOneUse(Reg))
continue;
// For now, be conservative and don't look across block boundaries,
@ -134,35 +168,19 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
if (Def->getParent() != &MBB && !Def->isMoveImmediate())
continue;
// For now, be simple and don't reorder loads, stores, or side effects.
// TODO: Be more aggressive.
if ((Def->mayLoad() || Def->mayStore() ||
Def->hasUnmodeledSideEffects()))
// Don't move instructions that have side effects or memory dependencies
// or other complications.
if (!IsSafeToMove(Def, Insert, AA))
continue;
Changed = true;
AnyStackified = true;
if (OneUse) {
// Move the def down and nest it in the current instruction.
MBB.insert(MachineBasicBlock::instr_iterator(Insert),
Def->removeFromParent());
MFI.stackifyVReg(Reg);
ImposeStackOrdering(Def);
Insert = Def;
} else {
// Clone the def down and nest it in the current instruction.
MachineInstr *Clone = MF.CloneMachineInstr(Def);
unsigned OldReg = Def->getOperand(0).getReg();
unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
assert(Op.getReg() == OldReg);
assert(Clone->getOperand(0).getReg() == OldReg);
Op.setReg(NewReg);
Clone->getOperand(0).setReg(NewReg);
MBB.insert(MachineBasicBlock::instr_iterator(Insert), Clone);
MFI.stackifyVReg(Reg);
ImposeStackOrdering(Clone);
Insert = Clone;
}
// Move the def down and nest it in the current instruction.
MBB.insert(MachineBasicBlock::instr_iterator(Insert),
Def->removeFromParent());
MFI.stackifyVReg(Reg);
ImposeStackOrdering(Def);
Insert = Def;
}
if (AnyStackified)
ImposeStackOrdering(&MI);

View File

@ -0,0 +1,102 @@
//===-- WebAssemblyStoreResults.cpp - Optimize using store result values --===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief This file implements an optimization pass using store result values.
///
/// WebAssembly's store instructions return the stored value, specifically to
/// enable the optimization of reducing get_local/set_local traffic, which is
/// what we're doing here.
///
//===----------------------------------------------------------------------===//
#include "WebAssembly.h"
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblySubtarget.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
#define DEBUG_TYPE "wasm-store-results"
namespace {
class WebAssemblyStoreResults final : public MachineFunctionPass {
public:
static char ID; // Pass identification, replacement for typeid
WebAssemblyStoreResults() : MachineFunctionPass(ID) {}
const char *getPassName() const override {
return "WebAssembly Store Results";
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<MachineBlockFrequencyInfo>();
AU.addPreserved<MachineBlockFrequencyInfo>();
AU.addRequired<MachineDominatorTree>();
AU.addPreserved<MachineDominatorTree>();
MachineFunctionPass::getAnalysisUsage(AU);
}
bool runOnMachineFunction(MachineFunction &MF) override;
private:
};
} // end anonymous namespace
char WebAssemblyStoreResults::ID = 0;
FunctionPass *llvm::createWebAssemblyStoreResults() {
return new WebAssemblyStoreResults();
}
bool WebAssemblyStoreResults::runOnMachineFunction(MachineFunction &MF) {
DEBUG({
dbgs() << "********** Store Results **********\n"
<< "********** Function: " << MF.getName() << '\n';
});
const MachineRegisterInfo &MRI = MF.getRegInfo();
MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
for (auto &MBB : MF)
for (auto &MI : MBB)
switch (MI.getOpcode()) {
default:
break;
case WebAssembly::STORE8_I32:
case WebAssembly::STORE16_I32:
case WebAssembly::STORE8_I64:
case WebAssembly::STORE16_I64:
case WebAssembly::STORE32_I64:
case WebAssembly::STORE_F32:
case WebAssembly::STORE_F64:
case WebAssembly::STORE_I32:
case WebAssembly::STORE_I64:
unsigned ToReg = MI.getOperand(0).getReg();
unsigned FromReg = MI.getOperand(2).getReg();
for (auto I = MRI.use_begin(FromReg), E = MRI.use_end(); I != E;) {
MachineOperand &O = *I++;
MachineInstr *Where = O.getParent();
if (Where->getOpcode() == TargetOpcode::PHI)
Where = Where->getOperand(&O - &Where->getOperand(0) + 1)
.getMBB()
->getFirstTerminator();
if (&MI == Where || !MDT.dominates(&MI, Where))
continue;
O.setReg(ToReg);
}
}
return true;
}

View File

@ -143,6 +143,9 @@ void WebAssemblyPassConfig::addIRPasses() {
// control specifically what gets lowered.
addPass(createAtomicExpandPass(TM));
// Optimize "returned" function attributes.
addPass(createWebAssemblyOptimizeReturned());
TargetPassConfig::addIRPasses();
}
@ -157,6 +160,9 @@ bool WebAssemblyPassConfig::addInstSelector() {
bool WebAssemblyPassConfig::addILPOpts() { return true; }
void WebAssemblyPassConfig::addPreRegAlloc() {
// Prepare store instructions for register stackifying.
addPass(createWebAssemblyStoreResults());
// Mark registers as representing wasm's expression stack.
addPass(createWebAssemblyRegStackify());
}
@ -183,4 +189,5 @@ void WebAssemblyPassConfig::addPreSched2() {}
void WebAssemblyPassConfig::addPreEmitPass() {
addPass(createWebAssemblyCFGStackify());
addPass(createWebAssemblyRegNumbering());
addPass(createWebAssemblyPeephole());
}

View File

@ -174,7 +174,7 @@ exit:
; CHECK-LABEL: single_block:
; CHECK-NOT: br
; CHECK: return ${{[0-9]+}}{{$}}
; CHECK: return $pop{{[0-9]+}}{{$}}
define i32 @single_block(i32* %p) {
entry:
store volatile i32 0, i32* %p

View File

@ -11,8 +11,8 @@ target triple = "wasm32-unknown-unknown"
; CHECK: foo:
; CHECK: i32.const $push0=, answer{{$}}
; CHECK-NEXT: i32.load $0=, $pop0{{$}}
; CHECK-NEXT: return $0{{$}}
; CHECK-NEXT: i32.load $push1=, $pop0{{$}}
; CHECK-NEXT: return $pop1{{$}}
define i32 @foo() {
%a = load i32, i32* @answer
ret i32 %a

View File

@ -6,8 +6,8 @@ target datalayout = "e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
; CHECK-LABEL: sext_i8_i32:
; CHECK: i32.load8_s $0=, $0{{$}}
; CHECK-NEXT: return $0{{$}}
; CHECK: i32.load8_s $push0=, $0{{$}}
; CHECK-NEXT: return $pop0{{$}}
define i32 @sext_i8_i32(i8 *%p) {
%v = load i8, i8* %p
%e = sext i8 %v to i32
@ -15,8 +15,8 @@ define i32 @sext_i8_i32(i8 *%p) {
}
; CHECK-LABEL: zext_i8_i32:
; CHECK: i32.load8_u $0=, $0{{$}}
; CHECK-NEXT: return $0{{$}}
; CHECK: i32.load8_u $push0=, $0{{$}}
; CHECK-NEXT: return $pop0{{$}}
define i32 @zext_i8_i32(i8 *%p) {
%v = load i8, i8* %p
%e = zext i8 %v to i32
@ -24,8 +24,8 @@ define i32 @zext_i8_i32(i8 *%p) {
}
; CHECK-LABEL: sext_i16_i32:
; CHECK: i32.load16_s $0=, $0{{$}}
; CHECK-NEXT: return $0{{$}}
; CHECK: i32.load16_s $push0=, $0{{$}}
; CHECK-NEXT: return $pop0{{$}}
define i32 @sext_i16_i32(i16 *%p) {
%v = load i16, i16* %p
%e = sext i16 %v to i32
@ -33,8 +33,8 @@ define i32 @sext_i16_i32(i16 *%p) {
}
; CHECK-LABEL: zext_i16_i32:
; CHECK: i32.load16_u $0=, $0{{$}}
; CHECK-NEXT: return $0{{$}}
; CHECK: i32.load16_u $push0=, $0{{$}}
; CHECK-NEXT: return $pop0{{$}}
define i32 @zext_i16_i32(i16 *%p) {
%v = load i16, i16* %p
%e = zext i16 %v to i32
@ -42,8 +42,8 @@ define i32 @zext_i16_i32(i16 *%p) {
}
; CHECK-LABEL: sext_i8_i64:
; CHECK: i64.load8_s $1=, $0{{$}}
; CHECK-NEXT: return $1{{$}}
; CHECK: i64.load8_s $push0=, $0{{$}}
; CHECK-NEXT: return $pop0{{$}}
define i64 @sext_i8_i64(i8 *%p) {
%v = load i8, i8* %p
%e = sext i8 %v to i64
@ -51,8 +51,8 @@ define i64 @sext_i8_i64(i8 *%p) {
}
; CHECK-LABEL: zext_i8_i64:
; CHECK: i64.load8_u $1=, $0{{$}}
; CHECK-NEXT: return $1{{$}}
; CHECK: i64.load8_u $push0=, $0{{$}}
; CHECK-NEXT: return $pop0{{$}}
define i64 @zext_i8_i64(i8 *%p) {
%v = load i8, i8* %p
%e = zext i8 %v to i64
@ -60,8 +60,8 @@ define i64 @zext_i8_i64(i8 *%p) {
}
; CHECK-LABEL: sext_i16_i64:
; CHECK: i64.load16_s $1=, $0{{$}}
; CHECK-NEXT: return $1{{$}}
; CHECK: i64.load16_s $push0=, $0{{$}}
; CHECK-NEXT: return $pop0{{$}}
define i64 @sext_i16_i64(i16 *%p) {
%v = load i16, i16* %p
%e = sext i16 %v to i64
@ -69,8 +69,8 @@ define i64 @sext_i16_i64(i16 *%p) {
}
; CHECK-LABEL: zext_i16_i64:
; CHECK: i64.load16_u $1=, $0{{$}}
; CHECK-NEXT: return $1{{$}}
; CHECK: i64.load16_u $push0=, $0{{$}}
; CHECK-NEXT: return $pop0{{$}}
define i64 @zext_i16_i64(i16 *%p) {
%v = load i16, i16* %p
%e = zext i16 %v to i64
@ -78,8 +78,8 @@ define i64 @zext_i16_i64(i16 *%p) {
}
; CHECK-LABEL: sext_i32_i64:
; CHECK: i64.load32_s $1=, $0{{$}}
; CHECK-NEXT: return $1{{$}}
; CHECK: i64.load32_s $push0=, $0{{$}}
; CHECK-NEXT: return $pop0{{$}}
define i64 @sext_i32_i64(i32 *%p) {
%v = load i32, i32* %p
%e = sext i32 %v to i64
@ -87,8 +87,8 @@ define i64 @sext_i32_i64(i32 *%p) {
}
; CHECK-LABEL: zext_i32_i64:
; CHECK: i64.load32_u $1=, $0{{$}}
; CHECK: return $1{{$}}
; CHECK: i64.load32_u $push0=, $0{{$}}
; CHECK: return $pop0{{$}}
define i64 @zext_i32_i64(i32 *%p) {
%v = load i32, i32* %p
%e = zext i32 %v to i64

View File

@ -6,8 +6,8 @@ target datalayout = "e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
; CHECK-LABEL: load_u_i1_i32:
; CHECK: i32.load8_u $[[NUM0:[0-9]+]]=, $0{{$}}
; CHECK-NEXT: return $[[NUM0]]{{$}}
; CHECK: i32.load8_u $push[[NUM0:[0-9]+]]=, $0{{$}}
; CHECK-NEXT: return $pop[[NUM0]]{{$}}
define i32 @load_u_i1_i32(i1* %p) {
%v = load i1, i1* %p
%e = zext i1 %v to i32
@ -15,9 +15,9 @@ define i32 @load_u_i1_i32(i1* %p) {
}
; CHECK-LABEL: load_s_i1_i32:
; CHECK: i32.load8_u $[[NUM0:[0-9]+]]=, $0{{$}}
; CHECK: i32.load8_u $push[[NUM0:[0-9]+]]=, $0{{$}}
; CHECK-NEXT: i32.const $[[NUM1:[0-9]+]]=, 31{{$}}
; CHECK-NEXT: shl $push[[NUM2:[0-9]+]]=, $[[NUM0]], $[[NUM1]]{{$}}
; CHECK-NEXT: shl $push[[NUM2:[0-9]+]]=, $pop[[NUM0]], $[[NUM1]]{{$}}
; CHECK-NEXT: shr_s $push[[NUM3:[0-9]+]]=, $pop[[NUM2]], $[[NUM1]]{{$}}
; CHECK-NEXT: return $pop[[NUM3]]{{$}}
define i32 @load_s_i1_i32(i1* %p) {
@ -27,8 +27,8 @@ define i32 @load_s_i1_i32(i1* %p) {
}
; CHECK-LABEL: load_u_i1_i64:
; CHECK: i64.load8_u $[[NUM0:[0-9]+]]=, $0{{$}}
; CHECK-NEXT: return $[[NUM0]]{{$}}
; CHECK: i64.load8_u $push[[NUM0:[0-9]+]]=, $0{{$}}
; CHECK-NEXT: return $pop[[NUM0]]{{$}}
define i64 @load_u_i1_i64(i1* %p) {
%v = load i1, i1* %p
%e = zext i1 %v to i64
@ -36,9 +36,9 @@ define i64 @load_u_i1_i64(i1* %p) {
}
; CHECK-LABEL: load_s_i1_i64:
; CHECK: i64.load8_u $[[NUM0:[0-9]+]]=, $0{{$}}
; CHECK: i64.load8_u $push[[NUM0:[0-9]+]]=, $0{{$}}
; CHECK-NEXT: i64.const $[[NUM1:[0-9]+]]=, 63{{$}}
; CHECK-NEXT: shl $push[[NUM2:[0-9]+]]=, $[[NUM0]], $[[NUM1]]{{$}}
; CHECK-NEXT: shl $push[[NUM2:[0-9]+]]=, $pop[[NUM0]], $[[NUM1]]{{$}}
; CHECK-NEXT: shr_s $push[[NUM3:[0-9]+]]=, $pop[[NUM2]], $[[NUM1]]{{$}}
; CHECK-NEXT: return $pop[[NUM3]]{{$}}
define i64 @load_s_i1_i64(i1* %p) {

View File

@ -8,8 +8,8 @@ target triple = "wasm32-unknown-unknown"
; CHECK-LABEL: ldi32:
; CHECK-NEXT: .param i32{{$}}
; CHECK-NEXT: .result i32{{$}}
; CHECK-NEXT: i32.load $[[NUM:[0-9]+]]=, $0{{$}}
; CHECK-NEXT: return $[[NUM]]{{$}}
; CHECK-NEXT: i32.load $push[[NUM:[0-9]+]]=, $0{{$}}
; CHECK-NEXT: return $pop[[NUM]]{{$}}
define i32 @ldi32(i32 *%p) {
%v = load i32, i32* %p
ret i32 %v
@ -18,9 +18,8 @@ define i32 @ldi32(i32 *%p) {
; CHECK-LABEL: ldi64:
; CHECK-NEXT: .param i32{{$}}
; CHECK-NEXT: .result i64{{$}}
; CHECK-NEXT: .local i64{{$}}
; CHECK-NEXT: i64.load $[[NUM:[0-9]+]]=, $0{{$}}
; CHECK-NEXT: return $[[NUM]]{{$}}
; CHECK-NEXT: i64.load $push[[NUM:[0-9]+]]=, $0{{$}}
; CHECK-NEXT: return $pop[[NUM]]{{$}}
define i64 @ldi64(i64 *%p) {
%v = load i64, i64* %p
ret i64 %v
@ -29,9 +28,8 @@ define i64 @ldi64(i64 *%p) {
; CHECK-LABEL: ldf32:
; CHECK-NEXT: .param i32{{$}}
; CHECK-NEXT: .result f32{{$}}
; CHECK-NEXT: .local f32{{$}}
; CHECK-NEXT: f32.load $[[NUM:[0-9]+]]=, $0{{$}}
; CHECK-NEXT: return $[[NUM]]{{$}}
; CHECK-NEXT: f32.load $push[[NUM:[0-9]+]]=, $0{{$}}
; CHECK-NEXT: return $pop[[NUM]]{{$}}
define float @ldf32(float *%p) {
%v = load float, float* %p
ret float %v
@ -40,9 +38,8 @@ define float @ldf32(float *%p) {
; CHECK-LABEL: ldf64:
; CHECK-NEXT: .param i32{{$}}
; CHECK-NEXT: .result f64{{$}}
; CHECK-NEXT: .local f64{{$}}
; CHECK-NEXT: f64.load $[[NUM:[0-9]+]]=, $0{{$}}
; CHECK-NEXT: return $[[NUM]]{{$}}
; CHECK-NEXT: f64.load $push[[NUM:[0-9]+]]=, $0{{$}}
; CHECK-NEXT: return $pop[[NUM]]{{$}}
define double @ldf64(double *%p) {
%v = load double, double* %p
ret double %v

View File

@ -10,9 +10,8 @@ declare void @llvm.wasm.grow.memory.i32(i32) nounwind
; CHECK-LABEL: memory_size:
; CHECK-NEXT: .result i32{{$}}
; CHECK-NEXT: .local i32{{$}}
; CHECK-NEXT: memory_size $0={{$}}
; CHECK-NEXT: return $0{{$}}
; CHECK-NEXT: memory_size $push0={{$}}
; CHECK-NEXT: return $pop0{{$}}
define i32 @memory_size() {
%a = call i32 @llvm.wasm.memory.size.i32()
ret i32 %a

View File

@ -10,9 +10,8 @@ declare void @llvm.wasm.grow.memory.i64(i64) nounwind
; CHECK-LABEL: memory_size:
; CHECK-NEXT: .result i64{{$}}
; CHECK-NEXT: .local i64{{$}}
; CHECK-NEXT: memory_size $0={{$}}
; CHECK-NEXT: return $0{{$}}
; CHECK-NEXT: memory_size $push0={{$}}
; CHECK-NEXT: return $pop0{{$}}
define i64 @memory_size() {
%a = call i64 @llvm.wasm.memory.size.i64()
ret i64 %a

View File

@ -0,0 +1,47 @@
; RUN: llc < %s -asm-verbose=false | FileCheck %s
; Test the register stackifier pass.
target datalayout = "e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
; No because of pointer aliasing.
; CHECK-LABEL: no0:
; CHECK: return $1{{$}}
define i32 @no0(i32* %p, i32* %q) {
%t = load i32, i32* %q
store i32 0, i32* %p
ret i32 %t
}
; No because of side effects.
; CHECK-LABEL: no1:
; CHECK: return $1{{$}}
define i32 @no1(i32* %p, i32* dereferenceable(4) %q) {
%t = load volatile i32, i32* %q, !invariant.load !0
store volatile i32 0, i32* %p
ret i32 %t
}
; Yes because of invariant load and no side effects.
; CHECK-LABEL: yes0:
; CHECK: return $pop0{{$}}
define i32 @yes0(i32* %p, i32* dereferenceable(4) %q) {
%t = load i32, i32* %q, !invariant.load !0
store i32 0, i32* %p
ret i32 %t
}
; Yes because of no intervening side effects.
; CHECK-LABEL: yes1:
; CHECK: return $pop0{{$}}
define i32 @yes1(i32* %q) {
%t = load volatile i32, i32* %q
ret i32 %t
}
!0 = !{}

View File

@ -0,0 +1,35 @@
; RUN: llc < %s -asm-verbose=false | FileCheck %s
; Test that the "returned" attribute is optimized effectively.
target datalayout = "e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
; CHECK-LABEL: _Z3foov:
; CHECK-NEXT: .result i32{{$}}
; CHECK-NEXT: i32.const $push0=, 1{{$}}
; CHECK-NEXT: call $push1=, _Znwm, $pop0{{$}}
; CHECK-NEXT: call $push2=, _ZN5AppleC1Ev, $pop1{{$}}
; CHECK-NEXT: return $pop2{{$}}
%class.Apple = type { i8 }
declare noalias i8* @_Znwm(i32)
declare %class.Apple* @_ZN5AppleC1Ev(%class.Apple* returned)
define %class.Apple* @_Z3foov() {
entry:
%call = tail call noalias i8* @_Znwm(i32 1)
%0 = bitcast i8* %call to %class.Apple*
%call1 = tail call %class.Apple* @_ZN5AppleC1Ev(%class.Apple* %0)
ret %class.Apple* %0
}
; CHECK-LABEL: _Z3barPvS_l:
; CHECK-NEXT: .param i32, i32, i32{{$}}
; CHECK-NEXT: .result i32{{$}}
; CHECK-NEXT: call $push0=, memcpy, $0, $1, $2{{$}}
; CHECK-NEXT: return $pop0{{$}}
declare i8* @memcpy(i8* returned, i8*, i32)
define i8* @_Z3barPvS_l(i8* %p, i8* %s, i32 %n) {
entry:
%call = tail call i8* @memcpy(i8* %p, i8* %s, i32 %n)
ret i8* %p
}