[RISCV] Add machine function pass to merge base + offset

Summary:
   In r333455 we added a peephole to fix the corner cases that result
   from separating base + offset lowering of global address.The
   peephole didn't handle some of the cases because it only has a basic
   block view instead of a function level view.

   This patch replaces that logic with a machine function pass. In
   addition to handling the original cases it handles uses of the global
   address across blocks in function and folding an offset from LW\SW
   instruction. This pass won't run for OptNone compilation, so there
   will be a negative impact overall vs the old approach at O0.

Reviewers: asb, apazos, mgrang

Reviewed By: asb

Subscribers: MartinMosbeck, brucehoult, the_o, rogfer01, mgorny, rbar, johnrusso, simoncook, niosHD, kito-cheng, shiva0217, zzheng, llvm-commits, edward-jones

Differential Revision: https://reviews.llvm.org/D47857

llvm-svn: 335786
This commit is contained in:
Sameer AbuAsal 2018-06-27 20:51:42 +00:00
parent ca4ba5cdc7
commit 92233f14e9
6 changed files with 334 additions and 245 deletions

View File

@ -20,6 +20,7 @@ add_llvm_target(RISCVCodeGen
RISCVISelDAGToDAG.cpp
RISCVISelLowering.cpp
RISCVMCInstLower.cpp
RISCVMergeBaseOffset.cpp
RISCVRegisterInfo.cpp
RISCVSubtarget.cpp
RISCVTargetMachine.cpp

View File

@ -25,6 +25,7 @@ class MCInst;
class MCOperand;
class MachineInstr;
class MachineOperand;
class PassRegistry;
void LowerRISCVMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
const AsmPrinter &AP);
@ -32,6 +33,9 @@ bool LowerRISCVMachineOperandToMCOperand(const MachineOperand &MO,
MCOperand &MCOp, const AsmPrinter &AP);
FunctionPass *createRISCVISelDag(RISCVTargetMachine &TM);
FunctionPass *createRISCVMergeBaseOffsetOptPass();
void initializeRISCVMergeBaseOffsetOptPass(PassRegistry &);
}
#endif

View File

@ -56,14 +56,12 @@ public:
private:
void doPeepholeLoadStoreADDI();
void doPeepholeGlobalAddiLuiOffset();
void doPeepholeBuildPairF64SplitF64();
};
}
void RISCVDAGToDAGISel::PostprocessISelDAG() {
doPeepholeLoadStoreADDI();
doPeepholeGlobalAddiLuiOffset();
doPeepholeBuildPairF64SplitF64();
}
@ -130,212 +128,6 @@ bool RISCVDAGToDAGISel::SelectAddrFI(SDValue Addr, SDValue &Base) {
return false;
}
// Detect the pattern lui %hi(global) --> ADDI %lo(global)
// HiLUI LoADDI
static bool detectLuiAddiGlobal(SDNode *Tail, unsigned &Idx, SDValue &LoADDI,
SDValue &HiLUI, GlobalAddressSDNode *&GAlo,
GlobalAddressSDNode *&GAhi) {
// Try to detect the pattern on every operand of the tail instruction.
for (Idx = 0; Idx < Tail->getNumOperands(); Idx++) {
LoADDI = Tail->getOperand(Idx);
// LoADDI should only be used by one instruction (Tail).
if (!LoADDI->isMachineOpcode() ||
!(LoADDI->getMachineOpcode() == RISCV::ADDI) ||
!isa<GlobalAddressSDNode>(LoADDI->getOperand(1)) ||
!LoADDI->hasOneUse())
continue;
// Check for existence of %lo target flag.
GAlo = cast<GlobalAddressSDNode>(LoADDI->getOperand(1));
if (!(GAlo->getTargetFlags() == RISCVII::MO_LO) ||
!(GAlo->getOffset() == 0))
return false;
// Check for existence of %hi target flag.
HiLUI = LoADDI->getOperand(0);
if (!HiLUI->isMachineOpcode() ||
!(HiLUI->getMachineOpcode() == RISCV::LUI) ||
!isa<GlobalAddressSDNode>(HiLUI->getOperand(0)) || !HiLUI->hasOneUse())
return false;
GAhi = cast<GlobalAddressSDNode>(HiLUI->getOperand(0));
if (!(GAhi->getTargetFlags() == RISCVII::MO_HI) ||
!(GAhi->getOffset() == 0))
return false;
return true;
}
return false;
}
static bool matchLuiOffset(SDValue &OffsetLUI, int64_t &Offset) {
if (!OffsetLUI->isMachineOpcode() ||
!(OffsetLUI->getMachineOpcode() == RISCV::LUI) ||
!isa<ConstantSDNode>(OffsetLUI->getOperand(0)))
return false;
Offset = cast<ConstantSDNode>(OffsetLUI->getOperand(0))->getSExtValue();
Offset = Offset << 12;
LLVM_DEBUG(dbgs() << " Detected \" LUI Offset_hi\"\n");
return true;
}
static bool matchAddiLuiOffset(SDValue &OffsetLoADDI, int64_t &Offset) {
// LoADDI should only be used by the tail instruction only.
if (!OffsetLoADDI->isMachineOpcode() ||
!(OffsetLoADDI->getMachineOpcode() == RISCV::ADDI) ||
!isa<ConstantSDNode>(OffsetLoADDI->getOperand(1)) ||
!OffsetLoADDI->hasOneUse())
return false;
int64_t OffLo =
cast<ConstantSDNode>(OffsetLoADDI->getOperand(1))->getZExtValue();
// HiLUI should only be used by the loADDI.
SDValue OffsetHiLUI = (OffsetLoADDI->getOperand(0));
if (!OffsetHiLUI->isMachineOpcode() ||
!(OffsetHiLUI->getMachineOpcode() == RISCV::LUI) ||
!isa<ConstantSDNode>(OffsetHiLUI->getOperand(0)) ||
!OffsetHiLUI->hasOneUse())
return false;
int64_t OffHi =
cast<ConstantSDNode>(OffsetHiLUI->getOperand(0))->getSExtValue();
Offset = (OffHi << 12) + OffLo;
LLVM_DEBUG(dbgs() << " Detected \" ADDI (LUI Offset_hi), Offset_lo\"\n");
return true;
}
static void updateTailInstrUsers(SDNode *Tail, SelectionDAG *CurDAG,
GlobalAddressSDNode *GAhi,
GlobalAddressSDNode *GAlo,
SDValue &GlobalHiLUI, SDValue &GlobalLoADDI,
int64_t Offset) {
// Update the offset in GAhi and GAlo.
SDLoc DL(Tail->getOperand(1));
SDValue GAHiNew = CurDAG->getTargetGlobalAddress(GAhi->getGlobal(), DL,
GlobalHiLUI.getValueType(),
Offset, RISCVII::MO_HI);
SDValue GALoNew = CurDAG->getTargetGlobalAddress(GAlo->getGlobal(), DL,
GlobalLoADDI.getValueType(),
Offset, RISCVII::MO_LO);
CurDAG->UpdateNodeOperands(GlobalHiLUI.getNode(), GAHiNew);
CurDAG->UpdateNodeOperands(GlobalLoADDI.getNode(), GlobalHiLUI, GALoNew);
// Update all uses of the Tail with the GlobalLoADDI. After
// this Tail will be a dead node.
SDValue From = SDValue(Tail, 0);
CurDAG->ReplaceAllUsesOfValuesWith(&From, &GlobalLoADDI, 1);
}
// TODO: This transformation might be better implemeted in a Machine Funtion
// Pass as discussed here: https://reviews.llvm.org/D45748.
//
// Merge the offset of address calculation into the offset field
// of a global address node in a global address lowering sequence ("LUI
// %hi(global) --> add %lo(global)") under the following conditions: 1) The
// offset field in the global address lowering sequence is zero. 2) The lowered
// global address is only used in one node, referred to as "Tail".
// This peephole does the following transformations to merge the offset:
// 1) ADDI (ADDI (LUI %hi(global)) %lo(global)), offset
// --->
// ADDI (LUI %hi(global + offset)) %lo(global + offset).
//
// This generates:
// lui a0, hi (global + offset)
// add a0, a0, lo (global + offset)
// Instead of
// lui a0, hi (global)
// addi a0, hi (global)
// addi a0, offset
// This pattern is for cases when the offset is small enough to fit in the
// immediate filed of ADDI (less than 12 bits).
// 2) ADD ((ADDI (LUI %hi(global)) %lo(global)), (LUI hi_offset))
// --->
// offset = hi_offset << 12
// ADDI (LUI %hi(global + offset)) %lo(global + offset)
// Which generates the ASM:
// lui a0, hi(global + offset)
// addi a0, lo(global + offset)
// Instead of:
// lui a0, hi(global)
// addi a0, lo(global)
// lui a1, (offset)
// add a0, a0, a1
// This pattern is for cases when the offset doesn't fit in an immediate field
// of ADDI but the lower 12 bits are all zeros.
// 3) ADD ((ADDI (LUI %hi(global)) %lo(global)), (ADDI lo_offset, (LUI
// hi_offset)))
// --->
// ADDI (LUI %hi(global + offset)) %lo(global + offset)
// Which generates the ASM:
// lui a1, %hi(global + offhi20<<12 + offlo12)
// addi a1, %lo(global + offhi20<<12 + offlo12)
// Instead of:
// lui a0, hi(global)
// addi a0, lo(global)
// lui a1, (offhi20)
// addi a1, (offlo12)
// add a0, a0, a1
// This pattern is for cases when the offset doesn't fit in an immediate field
// of ADDI and both the lower 1 bits and high 20 bits are non zero.
void RISCVDAGToDAGISel::doPeepholeGlobalAddiLuiOffset() {
SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
++Position;
SelectionDAG::allnodes_iterator Begin(CurDAG->allnodes_begin());
while (Position != Begin) {
SDNode *Tail = &*--Position;
// Skip dead nodes and any non-machine opcodes.
if (Tail->use_empty() || !Tail->isMachineOpcode())
continue;
// The tail instruction can be an ADD or an ADDI.
if (!Tail->isMachineOpcode() || !(Tail->getMachineOpcode() == RISCV::ADD ||
Tail->getMachineOpcode() == RISCV::ADDI))
continue;
// First detect the global address part of pattern:
// (lui %hi(global) --> Addi %lo(global))
unsigned GlobalLoADDiIdx;
SDValue GlobalLoADDI;
SDValue GlobalHiLUI;
GlobalAddressSDNode *GAhi;
GlobalAddressSDNode *GAlo;
if (!detectLuiAddiGlobal(Tail, GlobalLoADDiIdx, GlobalLoADDI, GlobalHiLUI,
GAlo, GAhi))
continue;
LLVM_DEBUG(dbgs() << " Detected \"ADDI LUI %hi(global), %lo(global)\n");
// Detect the offset part for the address calculation by looking at the
// other operand of the tail instruction:
int64_t Offset;
if (Tail->getMachineOpcode() == RISCV::ADD) {
// If the Tail is an ADD instruction, the offset can be in two forms:
// 1) LUI hi_Offset followed by:
// ADDI lo_offset
// This happens in case the offset has non zero bits in
// both hi 20 and lo 12 bits.
// 2) LUI (offset20)
// This happens in case the lower 12 bits of the offset are zeros.
SDValue OffsetVal = Tail->getOperand(1 - GlobalLoADDiIdx);
if (!matchAddiLuiOffset(OffsetVal, Offset) &&
!matchLuiOffset(OffsetVal, Offset))
continue;
} else
// The Tail is an ADDI instruction:
Offset = cast<ConstantSDNode>(Tail->getOperand(1 - GlobalLoADDiIdx))
->getSExtValue();
LLVM_DEBUG(
dbgs()
<< " Fold offset value into global offset of LUI %hi and ADDI %lo\n");
LLVM_DEBUG(dbgs() << "\tTail:");
LLVM_DEBUG(Tail->dump(CurDAG));
LLVM_DEBUG(dbgs() << "\tGlobalHiLUI:");
LLVM_DEBUG(GlobalHiLUI->dump(CurDAG));
LLVM_DEBUG(dbgs() << "\tGlobalLoADDI:");
LLVM_DEBUG(GlobalLoADDI->dump(CurDAG));
LLVM_DEBUG(dbgs() << "\n");
updateTailInstrUsers(Tail, CurDAG, GAhi, GAlo, GlobalHiLUI, GlobalLoADDI,
Offset);
}
CurDAG->RemoveDeadNodes();
}
// Merge an ADDI into the offset of a load/store instruction where possible.
// (load (add base, off), 0) -> (load base, off)
// (store val, (add base, off)) -> (store val, base, off)

View File

@ -0,0 +1,286 @@
//===----- RISCVMergeBaseOffset.cpp - Optimise address calculations ------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Merge the offset of address calculation into the offset field
// of instructions in a global address lowering sequence. This pass transforms:
// lui vreg1, %hi(s)
// addi vreg2, vreg1, %lo(s)
// addi vreg3, verg2, Offset
//
// Into:
// lui vreg1, %hi(s+Offset)
// addi vreg2, vreg1, %lo(s+Offset)
//
// The transformation is carried out under certain conditions:
// 1) The offset field in the base of global address lowering sequence is zero.
// 2) The lowered global address has only one use.
//
// The offset field can be in a different form. This pass handles all of them.
//===----------------------------------------------------------------------===//
#include "RISCV.h"
#include "RISCVTargetMachine.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetOptions.h"
#include <set>
using namespace llvm;
#define DEBUG_TYPE "riscv-merge-base-offset"
#define RISCV_MERGE_BASE_OFFSET_NAME "RISCV Merge Base Offset"
namespace {
struct RISCVMergeBaseOffsetOpt : public MachineFunctionPass {
static char ID;
const MachineFunction *MF;
bool runOnMachineFunction(MachineFunction &Fn) override;
bool detectLuiAddiGlobal(MachineInstr &LUI, MachineInstr *&ADDI);
bool detectAndFoldOffset(MachineInstr &HiLUI, MachineInstr &LoADDI);
void foldOffset(MachineInstr &HiLUI, MachineInstr &LoADDI, MachineInstr &Tail,
int64_t Offset);
bool matchLargeOffset(MachineInstr &TailAdd, unsigned GSReg, int64_t &Offset);
RISCVMergeBaseOffsetOpt() : MachineFunctionPass(ID) {}
MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().set(
MachineFunctionProperties::Property::IsSSA);
}
StringRef getPassName() const override {
return RISCV_MERGE_BASE_OFFSET_NAME;
}
private:
MachineRegisterInfo *MRI;
std::set<MachineInstr *> DeadInstrs;
};
}; // end anonymous namespace
char RISCVMergeBaseOffsetOpt::ID = 0;
INITIALIZE_PASS(RISCVMergeBaseOffsetOpt, "riscv-merge-base-offset",
RISCV_MERGE_BASE_OFFSET_NAME, false, false)
// Detect the pattern:
// lui vreg1, %hi(s)
// addi vreg2, vreg1, %lo(s)
//
// Pattern only accepted if:
// 1) ADDI has only one use.
// 2) LUI has only one use; which is the ADDI.
// 3) Both ADDI and LUI have GlobalAddress type which indicates that these
// are generated from global address lowering.
// 4) Offset value in the Global Address is 0.
bool RISCVMergeBaseOffsetOpt::detectLuiAddiGlobal(MachineInstr &HiLUI,
MachineInstr *&LoADDI) {
if (HiLUI.getOpcode() != RISCV::LUI ||
HiLUI.getOperand(1).getTargetFlags() != RISCVII::MO_HI ||
HiLUI.getOperand(1).getType() != MachineOperand::MO_GlobalAddress ||
HiLUI.getOperand(1).getOffset() != 0 ||
!MRI->hasOneUse(HiLUI.getOperand(0).getReg()))
return false;
unsigned HiLuiDestReg = HiLUI.getOperand(0).getReg();
LoADDI = MRI->use_begin(HiLuiDestReg)->getParent();
if (LoADDI->getOpcode() != RISCV::ADDI ||
LoADDI->getOperand(2).getTargetFlags() != RISCVII::MO_LO ||
LoADDI->getOperand(2).getType() != MachineOperand::MO_GlobalAddress ||
LoADDI->getOperand(2).getOffset() != 0 ||
!MRI->hasOneUse(LoADDI->getOperand(0).getReg()))
return false;
return true;
}
// Update the offset in HiLUI and LoADDI instructions.
// Delete the tail instruction and update all the uses to use the
// output from LoADDI.
void RISCVMergeBaseOffsetOpt::foldOffset(MachineInstr &HiLUI,
MachineInstr &LoADDI,
MachineInstr &Tail, int64_t Offset) {
// Put the offset back in HiLUI and the LoADDI
HiLUI.getOperand(1).setOffset(Offset);
LoADDI.getOperand(2).setOffset(Offset);
// Delete the tail instruction.
DeadInstrs.insert(&Tail);
MRI->replaceRegWith(Tail.getOperand(0).getReg(),
LoADDI.getOperand(0).getReg());
LLVM_DEBUG(dbgs() << " Merged offset " << Offset << " into base.\n"
<< " " << HiLUI << " " << LoADDI;);
}
// Detect patterns for large offsets that are passed into an ADD instruction.
//
// Base address lowering is of the form:
// HiLUI: lui vreg1, %hi(s)
// LoADDI: addi vreg2, vreg1, %lo(s)
// / \
// / \
// / \
// / The large offset can be of two forms: \
// 1) Offset that has non zero bits in lower 2) Offset that has non zero
// 12 bits and upper 20 bits bits in upper 20 bits only
// OffseLUI: lui vreg3, 4
// OffsetTail: addi voff, vreg3, 188 OffsetTail: lui voff, 128
// \ /
// \ /
// \ /
// \ /
// TailAdd: add vreg4, vreg2, voff
bool RISCVMergeBaseOffsetOpt::matchLargeOffset(MachineInstr &TailAdd,
unsigned GAReg,
int64_t &Offset) {
assert((TailAdd.getOpcode() == RISCV::ADD) && "Expected ADD instruction!");
unsigned Rs = TailAdd.getOperand(1).getReg();
unsigned Rt = TailAdd.getOperand(2).getReg();
unsigned Reg = Rs == GAReg ? Rt : Rs;
// Can't fold if the register has more than one use.
if (!MRI->hasOneUse(Reg))
return false;
// This can point to an ADDI or a LUI:
MachineInstr &OffsetTail = *MRI->getVRegDef(Reg);
if (OffsetTail.getOpcode() == RISCV::ADDI) {
// The offset value has non zero bits in both %hi and %lo parts.
// Detect an ADDI that feeds from a LUI instruction.
MachineOperand &AddiImmOp = OffsetTail.getOperand(2);
if (AddiImmOp.getTargetFlags() != RISCVII::MO_None)
return false;
int64_t OffLo = AddiImmOp.getImm();
MachineInstr &OffsetLui =
*MRI->getVRegDef(OffsetTail.getOperand(1).getReg());
MachineOperand &LuiImmOp = OffsetLui.getOperand(1);
if (OffsetLui.getOpcode() != RISCV::LUI ||
LuiImmOp.getTargetFlags() != RISCVII::MO_None ||
!MRI->hasOneUse(OffsetLui.getOperand(0).getReg()))
return false;
int64_t OffHi = OffsetLui.getOperand(1).getImm();
Offset = (OffHi << 12) + OffLo;
LLVM_DEBUG(dbgs() << " Offset Instrs: " << OffsetTail
<< " " << OffsetLui);
DeadInstrs.insert(&OffsetTail);
DeadInstrs.insert(&OffsetLui);
return true;
} else if (OffsetTail.getOpcode() == RISCV::LUI) {
// The offset value has all zero bits in the lower 12 bits. Only LUI
// exists.
LLVM_DEBUG(dbgs() << " Offset Instr: " << OffsetTail);
Offset = OffsetTail.getOperand(1).getImm() << 12;
DeadInstrs.insert(&OffsetTail);
return true;
}
return false;
}
bool RISCVMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &HiLUI,
MachineInstr &LoADDI) {
unsigned DestReg = LoADDI.getOperand(0).getReg();
assert(MRI->hasOneUse(DestReg) && "expected one use for LoADDI");
// LoADDI has only one use.
MachineInstr &Tail = *MRI->use_begin(DestReg)->getParent();
switch (Tail.getOpcode()) {
default:
LLVM_DEBUG(dbgs() << "Don't know how to get offset from this instr:"
<< Tail);
return false;
case RISCV::ADDI: {
// Offset is simply an immediate operand.
int64_t Offset = Tail.getOperand(2).getImm();
LLVM_DEBUG(dbgs() << " Offset Instr: " << Tail);
foldOffset(HiLUI, LoADDI, Tail, Offset);
return true;
} break;
case RISCV::ADD: {
// The offset is too large to fit in the immediate field of ADDI.
// This can be in two forms:
// 1) LUI hi_Offset followed by:
// ADDI lo_offset
// This happens in case the offset has non zero bits in
// both hi 20 and lo 12 bits.
// 2) LUI (offset20)
// This happens in case the lower 12 bits of the offset are zeros.
int64_t Offset;
if (!matchLargeOffset(Tail, DestReg, Offset))
return false;
foldOffset(HiLUI, LoADDI, Tail, Offset);
return true;
} break;
case RISCV::LB:
case RISCV::LH:
case RISCV::LW:
case RISCV::LBU:
case RISCV::LHU:
case RISCV::LWU:
case RISCV::LD:
case RISCV::FLW:
case RISCV::FLD:
case RISCV::SB:
case RISCV::SH:
case RISCV::SW:
case RISCV::SD:
case RISCV::FSW:
case RISCV::FSD: {
// Transforms the sequence: Into:
// HiLUI: lui vreg1, %hi(foo) ---> lui vreg1, %hi(foo+8)
// LoADDI: addi vreg2, vreg1, %lo(foo) ---> lw vreg3, lo(foo+8)(vreg1)
// Tail: lw vreg3, 8(vreg2)
if (Tail.getOperand(1).isFI())
return false;
// Register defined by LoADDI should be used in the base part of the
// load\store instruction. Otherwise, no folding possible.
unsigned BaseAddrReg = Tail.getOperand(1).getReg();
if (DestReg != BaseAddrReg)
return false;
MachineOperand &TailImmOp = Tail.getOperand(2);
int64_t Offset = TailImmOp.getImm();
// Update the offsets in global address lowering.
HiLUI.getOperand(1).setOffset(Offset);
// Update the immediate in the Tail instruction to add the offset.
Tail.RemoveOperand(2);
MachineOperand &ImmOp = LoADDI.getOperand(2);
ImmOp.setOffset(Offset);
Tail.addOperand(ImmOp);
// Update the base reg in the Tail instruction to feed from LUI.
// Output of HiLUI is only used in LoADDI, no need to use
// MRI->replaceRegWith().
Tail.getOperand(1).setReg(HiLUI.getOperand(0).getReg());
DeadInstrs.insert(&LoADDI);
return true;
} break;
}
return false;
}
bool RISCVMergeBaseOffsetOpt::runOnMachineFunction(MachineFunction &Fn) {
if (skipFunction(Fn.getFunction()))
return false;
DeadInstrs.clear();
MRI = &Fn.getRegInfo();
for (MachineBasicBlock &MBB : Fn) {
LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n");
for (MachineInstr &HiLUI : MBB) {
MachineInstr *LoADDI = nullptr;
if (!detectLuiAddiGlobal(HiLUI, LoADDI))
continue;
LLVM_DEBUG(dbgs() << " Found lowered global address with one use: "
<< *LoADDI->getOperand(2).getGlobal() << "\n");
// If the use count is only one, merge the offset
detectAndFoldOffset(HiLUI, *LoADDI);
}
}
// Delete dead instructions.
for (auto *MI : DeadInstrs)
MI->eraseFromParent();
return true;
}
/// Returns an instance of the Merge Base Offset Optimization pass.
FunctionPass *llvm::createRISCVMergeBaseOffsetOptPass() {
return new RISCVMergeBaseOffsetOpt();
}

View File

@ -78,6 +78,7 @@ public:
void addIRPasses() override;
bool addInstSelector() override;
void addPreEmitPass() override;
void addPreRegAlloc() override;
};
}
@ -97,3 +98,7 @@ bool RISCVPassConfig::addInstSelector() {
}
void RISCVPassConfig::addPreEmitPass() { addPass(&BranchRelaxationPassID); }
void RISCVPassConfig::addPreRegAlloc() {
addPass(createRISCVMergeBaseOffsetOptPass());
}

View File

@ -49,6 +49,23 @@ if.end: ; preds = %if.then, %entry
ret void
}
; This test checks that the offset is reconstructed correctly when
; "addi" of the big offset has a negative immediate.
; without peephole this generates:
; lui a1, %hi(g)
; addi a1, a0, %lo(g)
; lui a0, 18 ---> offset
; addi a0, a0, -160
; add a0, a0, a1 ---> base + offset.
define i8* @big_offset_neg_addi() {
; CHECK-LABEL: big_offset_neg_addi:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(g+73568)
; CHECK-NEXT: addi a0, a0, %lo(g+73568)
; CHECK-NEXT: ret
ret i8* getelementptr inbounds ([1048576 x i8], [1048576 x i8]* @g, i32 0, i32 73568)
}
; This test checks for the case where the offset is only an LUI.
; without peephole this generates:
; lui a0, %hi(g)
@ -84,36 +101,19 @@ entry:
ret i32* getelementptr inbounds (%struct.S, %struct.S* @s, i32 0, i32 1)
}
; TODO: In this case we get a better sequence if the offset didn't get didn't
; get merged back in %if.end and %if.then. The current peephole is not able to
; detect the shared global address node across blocks.
; Without the peephole we can generate:
;# %bb.0: # %entry
; lui a0, %hi(s)
; addi a0, a0, %lo(s)
; lw a1, 164(a0)
; beqz a1, .LBB0_2
;# %bb.1: # %if.end
; addi a0, a0, 168
; ret
;.LBB0_2: # %if.then
; addi a0, a0, 160
; ret
; Function Attrs: norecurse nounwind optsize readonly
define dso_local i32* @control_flow_no_mem(i32 %n) local_unnamed_addr #1 {
; CHECK-LABEL: control_flow_no_mem:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lui a0, %hi(s)
; CHECK-NEXT: addi a0, a0, %lo(s)
; CHECK-NEXT: lw a0, 164(a0)
; CHECK-NEXT: beqz a0, .LBB5_2
; CHECK-NEXT: lw a1, 164(a0)
; CHECK-NEXT: beqz a1, .LBB6_2
; CHECK-NEXT: # %bb.1: # %if.end
; CHECK-NEXT: lui a0, %hi(s+168)
; CHECK-NEXT: addi a0, a0, %lo(s+168)
; CHECK-NEXT: addi a0, a0, 168
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB5_2: # %if.then
; CHECK-NEXT: lui a0, %hi(s+160)
; CHECK-NEXT: addi a0, a0, %lo(s+160)
; CHECK-NEXT: .LBB6_2: # %if.then
; CHECK-NEXT: addi a0, a0, 160
; CHECK-NEXT: ret
entry:
%0 = load i32, i32* getelementptr inbounds (%struct.S, %struct.S* @s, i32 0, i32 2), align 4
@ -125,32 +125,21 @@ if.end: ; preds = %if.then, %entry
ret i32* getelementptr inbounds (%struct.S, %struct.S* @s, i32 0, i32 3)
}
;TODO: Offset shouln't be separated in this case. We get shorter sequence if it
; is merged in the LUI %hi and the ADDI %lo, the "ADDI" could be folded in the
; immediate part of "lhu" genertating the sequence:
; lui a0, %hi(foo +8)
; lhu a0, %lo(foo+8)(a0)
; instead of:
; lui a0, %hi(foo)
; addi a0, a0, %lo(foo)
; lhu a0, 8(a0)
define dso_local i32 @load_half() nounwind {
; CHECK-LABEL: load_half:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: sw ra, 12(sp)
; CHECK-NEXT: lui a0, %hi(foo)
; CHECK-NEXT: addi a0, a0, %lo(foo)
; CHECK-NEXT: lhu a0, 8(a0)
; CHECK-NEXT: lui a0, %hi(foo+8)
; CHECK-NEXT: lhu a0, %lo(foo+8)(a0)
; CHECK-NEXT: addi a1, zero, 140
; CHECK-NEXT: bne a0, a1, .LBB6_2
; CHECK-NEXT: bne a0, a1, .LBB7_2
; CHECK-NEXT: # %bb.1: # %if.end
; CHECK-NEXT: mv a0, zero
; CHECK-NEXT: lw ra, 12(sp)
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB6_2: # %if.then
; CHECK-NEXT: .LBB7_2: # %if.then
; CHECK-NEXT: call abort
entry:
%0 = load i16, i16* getelementptr inbounds ([6 x i16], [6 x i16]* @foo, i32 0, i32 4), align 2
@ -166,3 +155,15 @@ if.end:
}
declare void @abort()
define dso_local void @one_store() local_unnamed_addr {
; CHECK-LABEL: one_store:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lui a0, %hi(s+160)
; CHECK-NEXT: addi a1, zero, 10
; CHECK-NEXT: sw a1, %lo(s+160)(a0)
; CHECK-NEXT: ret
entry:
store i32 10, i32* getelementptr inbounds (%struct.S, %struct.S* @s, i32 0, i32 1), align 4
ret void
}