[mips] Handle branch expansion corner cases

When potential jump instruction and target are in the same segment, use
jump instruction with immediate field.

In cases where offset does not fit immediate value of a bc/j instructions,
offset is stored into register, and then jump register instruction is used.

Differential Revision: https://reviews.llvm.org/D48019

llvm-svn: 339126
This commit is contained in:
Aleksandar Beserminji 2018-08-07 10:45:45 +00:00
parent c2e9926549
commit 949a17c016
5 changed files with 314 additions and 94 deletions

View File

@ -128,6 +128,7 @@ struct MBBInfo {
uint64_t Size = 0;
bool HasLongBranch = false;
MachineInstr *Br = nullptr;
uint64_t Offset = 0;
MBBInfo() = default;
};
@ -154,8 +155,11 @@ private:
void splitMBB(MachineBasicBlock *MBB);
void initMBBInfo();
int64_t computeOffset(const MachineInstr *Br);
uint64_t computeOffsetFromTheBeginning(int MBB);
void replaceBranch(MachineBasicBlock &MBB, Iter Br, const DebugLoc &DL,
MachineBasicBlock *MBBOpnd);
bool buildProperJumpMI(MachineBasicBlock *MBB,
MachineBasicBlock::iterator Pos, DebugLoc DL);
void expandToLongBranch(MBBInfo &Info);
bool handleForbiddenSlot();
bool handlePossibleLongBranch();
@ -167,7 +171,6 @@ private:
SmallVector<MBBInfo, 16> MBBInfos;
bool IsPIC;
MipsABIInfo ABI;
unsigned LongBranchSeqSize;
bool ForceLongBranchFirstPass = false;
};
@ -176,7 +179,7 @@ private:
char MipsBranchExpansion::ID = 0;
INITIALIZE_PASS(MipsBranchExpansion, DEBUG_TYPE,
"Expand out of range branch instructions and prevent forbidden"
"Expand out of range branch instructions and fix forbidden"
" slot hazards",
false, false)
@ -294,14 +297,6 @@ void MipsBranchExpansion::initMBBInfo() {
for (MachineBasicBlock::instr_iterator MI = MBB->instr_begin();
MI != MBB->instr_end(); ++MI)
MBBInfos[I].Size += TII->getInstSizeInBytes(*MI);
// Search for MBB's branch instruction.
ReverseIter End = MBB->rend();
ReverseIter Br = getNonDebugInstr(MBB->rbegin(), End);
if ((Br != End) && !Br->isIndirectBranch() &&
(Br->isConditionalBranch() || (Br->isUnconditionalBranch() && IsPIC)))
MBBInfos[I].Br = &*Br;
}
}
@ -326,6 +321,14 @@ int64_t MipsBranchExpansion::computeOffset(const MachineInstr *Br) {
return -Offset + 4;
}
// Returns the distance in bytes up until MBB
uint64_t MipsBranchExpansion::computeOffsetFromTheBeginning(int MBB) {
uint64_t Offset = 0;
for (int N = 0; N < MBB; ++N)
Offset += MBBInfos[N].Size;
return Offset;
}
// Replace Br with a branch which has the opposite condition code and a
// MachineBasicBlock operand MBBOpnd.
void MipsBranchExpansion::replaceBranch(MachineBasicBlock &MBB, Iter Br,
@ -359,6 +362,35 @@ void MipsBranchExpansion::replaceBranch(MachineBasicBlock &MBB, Iter Br,
Br->eraseFromParent();
}
bool MipsBranchExpansion::buildProperJumpMI(MachineBasicBlock *MBB,
MachineBasicBlock::iterator Pos,
DebugLoc DL) {
bool HasR6 = ABI.IsN64() ? STI->hasMips64r6() : STI->hasMips32r6();
bool AddImm = HasR6 && !STI->useIndirectJumpsHazard();
unsigned JR = ABI.IsN64() ? Mips::JR64 : Mips::JR;
unsigned JIC = ABI.IsN64() ? Mips::JIC64 : Mips::JIC;
unsigned JR_HB = ABI.IsN64() ? Mips::JR_HB64 : Mips::JR_HB;
unsigned JR_HB_R6 = ABI.IsN64() ? Mips::JR_HB64_R6 : Mips::JR_HB_R6;
unsigned JumpOp;
if (STI->useIndirectJumpsHazard())
JumpOp = HasR6 ? JR_HB_R6 : JR_HB;
else
JumpOp = HasR6 ? JIC : JR;
if (JumpOp == Mips::JIC && STI->inMicroMipsMode())
JumpOp = Mips::JIC_MMR6;
unsigned ATReg = ABI.IsN64() ? Mips::AT_64 : Mips::AT;
MachineInstrBuilder Instr =
BuildMI(*MBB, Pos, DL, TII->get(JumpOp)).addReg(ATReg);
if (AddImm)
Instr.addImm(0);
return !AddImm;
}
// Expand branch instructions to long branches.
// TODO: This function has to be fixed for beqz16 and bnez16, because it
// currently assumes that all branches have 16-bit offsets, and will produce
@ -479,33 +511,21 @@ void MipsBranchExpansion::expandToLongBranch(MBBInfo &I) {
// In NaCl, modifying the sp is not allowed in branch delay slot.
// For MIPS32R6, we can skip using a delay slot branch.
if (STI->isTargetNaCl() ||
(STI->hasMips32r6() && !STI->useIndirectJumpsHazard()))
BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::ADDiu), Mips::SP)
bool hasDelaySlot = buildProperJumpMI(BalTgtMBB, Pos, DL);
if (STI->isTargetNaCl() || !hasDelaySlot) {
BuildMI(*BalTgtMBB, std::prev(Pos), DL, TII->get(Mips::ADDiu), Mips::SP)
.addReg(Mips::SP)
.addImm(8);
if (STI->hasMips32r6() && !STI->useIndirectJumpsHazard()) {
const unsigned JICOp =
STI->inMicroMipsMode() ? Mips::JIC_MMR6 : Mips::JIC;
BuildMI(*BalTgtMBB, Pos, DL, TII->get(JICOp))
.addReg(Mips::AT)
.addImm(0);
} else {
unsigned JROp =
STI->useIndirectJumpsHazard()
? (STI->hasMips32r6() ? Mips::JR_HB_R6 : Mips::JR_HB)
: Mips::JR;
BuildMI(*BalTgtMBB, Pos, DL, TII->get(JROp)).addReg(Mips::AT);
}
if (hasDelaySlot) {
if (STI->isTargetNaCl()) {
BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::NOP));
} else
} else {
BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::ADDiu), Mips::SP)
.addReg(Mips::SP)
.addImm(8);
}
BalTgtMBB->rbegin()->bundleWithPred();
}
} else {
@ -597,46 +617,94 @@ void MipsBranchExpansion::expandToLongBranch(MBBInfo &I) {
.addReg(Mips::SP_64)
.addImm(0);
if (STI->hasMips64r6() && !STI->useIndirectJumpsHazard()) {
BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::SP_64)
bool hasDelaySlot = buildProperJumpMI(BalTgtMBB, Pos, DL);
// If there is no delay slot, Insert stack adjustment before
if (!hasDelaySlot) {
BuildMI(*BalTgtMBB, std::prev(Pos), DL, TII->get(Mips::DADDiu),
Mips::SP_64)
.addReg(Mips::SP_64)
.addImm(16);
BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JIC64))
.addReg(Mips::AT_64)
.addImm(0);
} else {
unsigned JROp =
STI->useIndirectJumpsHazard()
? (STI->hasMips32r6() ? Mips::JR_HB64_R6 : Mips::JR_HB64)
: Mips::JR64;
BuildMI(*BalTgtMBB, Pos, DL, TII->get(JROp)).addReg(Mips::AT_64);
BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::SP_64)
.addReg(Mips::SP_64)
.addImm(16);
BalTgtMBB->rbegin()->bundleWithPred();
}
}
assert(LongBrMBB->size() + BalTgtMBB->size() == LongBranchSeqSize);
} else {
// Pre R6: R6:
// $longbr: $longbr:
// j $tgt bc $tgt
// nop $fallthrough
// $fallthrough:
//
} else { // Not PIC
Pos = LongBrMBB->begin();
LongBrMBB->addSuccessor(TgtMBB);
if (STI->hasMips32r6())
// Compute the position of the potentiall jump instruction (basic blocks
// before + 4 for the instruction)
uint64_t JOffset = computeOffsetFromTheBeginning(MBB->getNumber()) +
MBBInfos[MBB->getNumber()].Size + 4;
uint64_t TgtMBBOffset = computeOffsetFromTheBeginning(TgtMBB->getNumber());
// If it's a forward jump, then TgtMBBOffset will be shifted by two
// instructions
if (JOffset < TgtMBBOffset)
TgtMBBOffset += 2 * 4;
// Compare 4 upper bits to check if it's the same segment
bool SameSegmentJump = JOffset >> 28 == TgtMBBOffset >> 28;
if (STI->hasMips32r6() && TII->isBranchOffsetInRange(Mips::BC, I.Offset)) {
// R6:
// $longbr:
// bc $tgt
// $fallthrough:
//
BuildMI(*LongBrMBB, Pos, DL,
TII->get(STI->inMicroMipsMode() ? Mips::BC_MMR6 : Mips::BC))
.addMBB(TgtMBB);
else
} else if (SameSegmentJump) {
// Pre R6:
// $longbr:
// j $tgt
// nop
// $fallthrough:
//
MIBundleBuilder(*LongBrMBB, Pos)
.append(BuildMI(*MFp, DL, TII->get(Mips::J)).addMBB(TgtMBB))
.append(BuildMI(*MFp, DL, TII->get(Mips::NOP)));
assert(LongBrMBB->size() == LongBranchSeqSize);
} else {
// At this point, offset where we need to branch does not fit into
// immediate field of the branch instruction and is not in the same
// segment as jump instruction. Therefore we will break it into couple
// instructions, where we first load the offset into register, and then we
// do branch register.
if (ABI.IsN64()) {
BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LONG_BRANCH_LUi))
.addReg(Mips::AT_64)
.addMBB(TgtMBB, MipsII::MO_HIGHEST);
BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LONG_BRANCH_DADDiu),
Mips::AT_64)
.addReg(Mips::AT_64)
.addMBB(TgtMBB, MipsII::MO_HIGHER);
BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::DSLL), Mips::AT_64)
.addReg(Mips::AT_64)
.addImm(16);
BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LONG_BRANCH_DADDiu),
Mips::AT_64)
.addReg(Mips::AT_64)
.addMBB(TgtMBB, MipsII::MO_ABS_HI);
BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::DSLL), Mips::AT_64)
.addReg(Mips::AT_64)
.addImm(16);
BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LONG_BRANCH_DADDiu),
Mips::AT_64)
.addReg(Mips::AT_64)
.addMBB(TgtMBB, MipsII::MO_ABS_LO);
} else {
BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LONG_BRANCH_LUi))
.addReg(Mips::AT)
.addMBB(TgtMBB, MipsII::MO_ABS_HI);
BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LONG_BRANCH_ADDiu),
Mips::AT)
.addReg(Mips::AT)
.addMBB(TgtMBB, MipsII::MO_ABS_LO);
}
buildProperJumpMI(LongBrMBB, Pos, DL);
}
}
if (I.Br->isUnconditionalBranch()) {
@ -666,8 +734,6 @@ bool MipsBranchExpansion::handleForbiddenSlot() {
if (!STI->hasMips32r6() || STI->inMicroMipsMode())
return false;
const MipsInstrInfo *TII = STI->getInstrInfo();
bool Changed = false;
for (MachineFunction::iterator FI = MFp->begin(); FI != MFp->end(); ++FI) {
@ -704,66 +770,65 @@ bool MipsBranchExpansion::handleForbiddenSlot() {
}
bool MipsBranchExpansion::handlePossibleLongBranch() {
LongBranchSeqSize = IsPIC ? ((ABI.IsN64() || STI->isTargetNaCl()) ? 10 : 9)
: (STI->hasMips32r6() ? 1 : 2);
if (STI->inMips16Mode() || !STI->enableLongBranchPass())
return false;
if (SkipLongBranch)
return false;
initMBBInfo();
SmallVectorImpl<MBBInfo>::iterator I, E = MBBInfos.end();
bool EverMadeChange = false, MadeChange = true;
while (MadeChange) {
MadeChange = false;
initMBBInfo();
for (unsigned I = 0, E = MBBInfos.size(); I < E; ++I) {
MachineBasicBlock *MBB = MFp->getBlockNumbered(I);
// Search for MBB's branch instruction.
ReverseIter End = MBB->rend();
ReverseIter Br = getNonDebugInstr(MBB->rbegin(), End);
if ((Br != End) && Br->isBranch() && !Br->isIndirectBranch() &&
(Br->isConditionalBranch() ||
(Br->isUnconditionalBranch() && IsPIC))) {
int64_t Offset = computeOffset(&*Br);
if (STI->isTargetNaCl()) {
// The offset calculation does not include sandboxing instructions
// that will be added later in the MC layer. Since at this point we
// don't know the exact amount of code that "sandboxing" will add, we
// conservatively estimate that code will not grow more than 100%.
Offset *= 2;
}
if (ForceLongBranchFirstPass ||
!TII->isBranchOffsetInRange(Br->getOpcode(), Offset)) {
MBBInfos[I].Offset = Offset;
MBBInfos[I].Br = &*Br;
}
}
} // End for
ForceLongBranchFirstPass = false;
SmallVectorImpl<MBBInfo>::iterator I, E = MBBInfos.end();
for (I = MBBInfos.begin(); I != E; ++I) {
// Skip if this MBB doesn't have a branch or the branch has already been
// converted to a long branch.
if (!I->Br || I->HasLongBranch)
if (!I->Br)
continue;
int64_t Offset = computeOffset(I->Br);
if (STI->isTargetNaCl()) {
// The offset calculation does not include sandboxing instructions
// that will be added later in the MC layer. Since at this point we
// don't know the exact amount of code that "sandboxing" will add, we
// conservatively estimate that code will not grow more than 100%.
Offset *= 2;
}
// Check if offset fits into the immediate field of the branch.
if (!ForceLongBranchFirstPass &&
TII->isBranchOffsetInRange(I->Br->getOpcode(), Offset))
continue;
I->HasLongBranch = true;
I->Size += LongBranchSeqSize * 4;
expandToLongBranch(*I);
++LongBranches;
EverMadeChange = MadeChange = true;
}
MFp->RenumberBlocks();
}
ForceLongBranchFirstPass = false;
if (!EverMadeChange)
return false;
// Do the expansion.
for (I = MBBInfos.begin(); I != E; ++I)
if (I->HasLongBranch) {
expandToLongBranch(*I);
}
MFp->RenumberBlocks();
return true;
return EverMadeChange;
}
bool MipsBranchExpansion::runOnMachineFunction(MachineFunction &MF) {

View File

@ -280,6 +280,8 @@ bool MipsInstrInfo::isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset)
switch (BranchOpc) {
case Mips::B:
case Mips::BAL:
case Mips::BAL_BR:
case Mips::BAL_BR_MM:
case Mips::BC1F:
case Mips::BC1FL:
case Mips::BC1T:

View File

@ -0,0 +1,37 @@
; RUN: llc -O0 -mtriple=mips-img-linux-gnu -mcpu=mips32r6 < %s -o - | FileCheck %s --check-prefixes=CHECK32R6
; RUN: llc -O0 -mtriple=mips64-img-linux-gnu -mcpu=mips64r6 < %s -o - | FileCheck %s --check-prefixes=CHECK64R6
declare i32 @foo(...)
define i32 @boo1(i32 signext %argc) {
; CHECK-LABEL: test_label_1:
; CHECK32R6: j $BB0_3
; CHECK32R6-NEXT: nop
; CHECK64R6: j .LBB0_5
; CHECK64R6-NEXT: nop
entry:
%retval = alloca i32, align 4
%argc.addr = alloca i32, align 4
store i32 0, i32* %retval, align 4
store i32 %argc, i32* %argc.addr, align 4
call void asm sideeffect "test_label_1:", "~{$1}"()
%0 = load i32, i32* %argc.addr, align 4
%cmp = icmp sgt i32 %0, 1
br i1 %cmp, label %if.then, label %if.end
if.then:
call void asm sideeffect ".space 68435052", "~{$1}"()
%call = call i32 bitcast (i32 (...)* @foo to i32 ()*)()
store i32 %call, i32* %retval, align 4
br label %return
if.end:
store i32 0, i32* %retval, align 4
br label %return
return:
%1 = load i32, i32* %retval, align 4
ret i32 %1
}

View File

@ -0,0 +1,41 @@
; RUN: llc -O0 -mtriple=mips-img-linux-gnu -mcpu=mips32r2 < %s -o - | FileCheck %s --check-prefixes=CHECK32
; RUN: llc -O0 -mtriple=mips-img-linux-gnu -mcpu=mips32r6 < %s -o - | FileCheck %s --check-prefixes=CHECK32
; RUN: llc -O0 -mtriple=mips64-img-linux-gnu -mcpu=mips64r2 < %s -o - | FileCheck %s --check-prefixes=CHECK64R2
; RUN: llc -O0 -mtriple=mips64-img-linux-gnu -mcpu=mips64r6 < %s -o - | FileCheck %s --check-prefixes=CHECK64R6
declare i32 @foo(...)
define i32 @boo2(i32 signext %argc) {
; CHECK-LABEL: test_label_2:
; CHECK32: j $BB0_5
; CHECK32-NEXT: nop
; CHECK64R2: j .LBB0_5
; CHECK64R2-NEXT: nop
; CHECK64R6: j .LBB0_5
; CHECK64R6-NEXT: nop
entry:
%retval = alloca i32, align 4
%argc.addr = alloca i32, align 4
store i32 0, i32* %retval, align 4
store i32 %argc, i32* %argc.addr, align 4
call void asm sideeffect "test_label_2:", "~{$1}"()
%0 = load i32, i32* %argc.addr, align 4
%cmp = icmp sgt i32 %0, 1
br i1 %cmp, label %if.then, label %if.end
if.then:
call void asm sideeffect ".space 268435052", "~{$1}"()
%call = call i32 bitcast (i32 (...)* @foo to i32 ()*)()
store i32 %call, i32* %retval, align 4
br label %return
if.end:
store i32 0, i32* %retval, align 4
br label %return
return:
%1 = load i32, i32* %retval, align 4
ret i32 %1
}

View File

@ -0,0 +1,75 @@
; RUN: llc -O0 -mtriple=mips-img-linux-gnu -mcpu=mips32r2 < %s -o - | FileCheck %s --check-prefixes=CHECK32R2
; RUN: llc -O0 -mtriple=mips-img-linux-gnu -mcpu=mips32r6 < %s -o - | FileCheck %s --check-prefixes=CHECK32R6
; RUN: llc -O0 -mtriple=mips-img-linux-gnu -mcpu=mips32r2 -mattr=+use-indirect-jump-hazard < %s -o - | FileCheck %s --check-prefixes=CHECK32-IJH
; RUN: llc -O0 -mtriple=mips-img-linux-gnu -mcpu=mips32r6 -mattr=+use-indirect-jump-hazard < %s -o - | FileCheck %s --check-prefixes=CHECK32-IJH
; RUN: llc -O0 -mtriple=mips64-img-linux-gnu -mcpu=mips64r2 < %s -o - | FileCheck %s --check-prefixes=CHECK64R2
; RUN: llc -O0 -mtriple=mips64-img-linux-gnu -mcpu=mips64r6 < %s -o - | FileCheck %s --check-prefixes=CHECK64R6
; RUN: llc -O0 -mtriple=mips64-img-linux-gnu -mcpu=mips64r2 -mattr=+use-indirect-jump-hazard < %s -o - | FileCheck %s --check-prefixes=CHECK64-IJH
; RUN: llc -O0 -mtriple=mips64-img-linux-gnu -mcpu=mips64r6 -mattr=+use-indirect-jump-hazard < %s -o - | FileCheck %s --check-prefixes=CHECK64-IJH
declare i32 @foo(...)
define i32 @boo3(i32 signext %argc) {
; CHECK-LABEL: test_label_3:
; CHECK32R2: lui $1, %hi($BB0_4)
; CHECK32R2-NEXT: addiu $1, $1, %lo($BB0_4)
; CHECK32R2-NEXT: jr $1
; CHECK32R6: lui $1, %hi($BB0_4)
; CHECK32R6-NEXT: addiu $1, $1, %lo($BB0_4)
; CHECK32R6-NEXT: jrc $1
; CHECK32-IJH: lui $1, %hi($BB0_4)
; CHECK32-IJH-NEXT: addiu $1, $1, %lo($BB0_4)
; CHECK32-IJH-NEXT: jr.hb $1
; CHECK64R2: lui $1, %highest(.LBB0_4)
; CHECK64R2-NEXT: daddiu $1, $1, %higher(.LBB0_4)
; CHECK64R2-NEXT: dsll $1, $1, 16
; CHECK64R2-NEXT: daddiu $1, $1, %hi(.LBB0_4)
; CHECK64R2-NEXT: dsll $1, $1, 16
; CHECK64R2-NEXT: daddiu $1, $1, %lo(.LBB0_4)
; CHECK64R2-NEXT: jr $1
; CHECK64R6: lui $1, %highest(.LBB0_4)
; CHECK64R6-NEXT: daddiu $1, $1, %higher(.LBB0_4)
; CHECK64R6-NEXT: dsll $1, $1, 16
; CHECK64R6-NEXT: daddiu $1, $1, %hi(.LBB0_4)
; CHECK64R6-NEXT: dsll $1, $1, 16
; CHECK64R6-NEXT: daddiu $1, $1, %lo(.LBB0_4)
; CHECK64R6-NEXT: jrc $1
; CHECK64-IJH: lui $1, %highest(.LBB0_4)
; CHECK64-IJH-NEXT: daddiu $1, $1, %higher(.LBB0_4)
; CHECK64-IJH-NEXT: dsll $1, $1, 16
; CHECK64-IJH-NEXT: daddiu $1, $1, %hi(.LBB0_4)
; CHECK64-IJH-NEXT: dsll $1, $1, 16
; CHECK64-IJH-NEXT: daddiu $1, $1, %lo(.LBB0_4)
; CHECK64-IJH-NEXT: jr.hb $1
entry:
%retval = alloca i32, align 4
%argc.addr = alloca i32, align 4
store i32 0, i32* %retval, align 4
store i32 %argc, i32* %argc.addr, align 4
call void asm sideeffect "test_label_3:", "~{$1}"()
%0 = load i32, i32* %argc.addr, align 4
%cmp = icmp sgt i32 %0, 1
br i1 %cmp, label %if.then, label %if.end
if.then:
call void asm sideeffect ".space 268435452", "~{$1}"()
%call = call i32 bitcast (i32 (...)* @foo to i32 ()*)()
store i32 %call, i32* %retval, align 4
br label %return
if.end:
store i32 0, i32* %retval, align 4
br label %return
return:
%1 = load i32, i32* %retval, align 4
ret i32 %1
}