GlobalISel: combine extracts & sequences created for legalization

Legalization ends up creating many G_SEQUENCE/G_EXTRACT pairs which leads to
inefficient codegen (even for -O0), so add a quick pass over the function to
remove them again.

llvm-svn: 280155
This commit is contained in:
Tim Northover 2016-08-30 20:51:25 +00:00
parent ea8814c4f7
commit aaa2a927bc
6 changed files with 198 additions and 11 deletions

View File

@ -293,6 +293,7 @@ public:
/// Idxs[0] + N)` of \p Src and similarly for subsequent bit-indexes.
///
/// \pre setBasicBlock or setMI must have been called.
/// \pre \p Indices must be in ascending order of bit position.
///
/// \return a MachineInstrBuilder for the newly created instruction.
MachineInstrBuilder buildExtract(ArrayRef<LLT> ResTys,
@ -311,7 +312,7 @@ public:
/// destination register.
/// \pre The bits defined by each Op (derived from index and scalar size) must
/// not overlap.
/// \pre Each source operand must have a
/// \pre \p Indices must be in ascending order of bit position.
///
/// \return a MachineInstrBuilder for the newly created instruction.
MachineInstrBuilder buildSequence(LLT ResTy, unsigned Res,

View File

@ -26,6 +26,8 @@
namespace llvm {
class MachineRegisterInfo;
class MachineLegalizePass : public MachineFunctionPass {
public:
static char ID;
@ -55,6 +57,9 @@ public:
MachineFunctionProperties::Property::Legalized);
}
bool combineExtracts(MachineInstr &MI, MachineRegisterInfo &MRI,
const TargetInstrInfo &TII);
bool runOnMachineFunction(MachineFunction &MF) override;
};
} // End namespace llvm.

View File

@ -193,6 +193,8 @@ MachineInstrBuilder MachineIRBuilder::buildExtract(ArrayRef<LLT> ResTys,
assert(ResTys.size() == Results.size() && Results.size() == Indices.size() &&
"inconsistent number of regs");
assert(!Results.empty() && "invalid trivial extract");
assert(std::is_sorted(Indices.begin(), Indices.end()) &&
"extract offsets must be in ascending order");
auto MIB = BuildMI(getMF(), DL, getTII().get(TargetOpcode::G_EXTRACT));
for (unsigned i = 0; i < ResTys.size(); ++i)
@ -222,6 +224,8 @@ MachineIRBuilder::buildSequence(LLT ResTy, unsigned Res,
assert(OpTys.size() == Ops.size() && Ops.size() == Indices.size() &&
"incompatible args");
assert(!Ops.empty() && "invalid trivial sequence");
assert(std::is_sorted(Indices.begin(), Indices.end()) &&
"sequence offsets must be in ascending order");
MachineInstrBuilder MIB =
buildInstr(TargetOpcode::G_SEQUENCE, LLT::scalar(ResTy.getSizeInBits()));

View File

@ -19,6 +19,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#define DEBUG_TYPE "legalize-mir"
@ -46,6 +47,70 @@ void MachineLegalizePass::getAnalysisUsage(AnalysisUsage &AU) const {
void MachineLegalizePass::init(MachineFunction &MF) {
}
bool MachineLegalizePass::combineExtracts(MachineInstr &MI,
MachineRegisterInfo &MRI,
const TargetInstrInfo &TII) {
bool Changed = false;
if (MI.getOpcode() != TargetOpcode::G_EXTRACT)
return Changed;
unsigned NumDefs = (MI.getNumOperands() - 1) / 2;
unsigned SrcReg = MI.getOperand(NumDefs).getReg();
MachineInstr &SeqI = *MRI.def_instr_begin(SrcReg);
if (SeqI.getOpcode() != TargetOpcode::G_SEQUENCE)
return Changed;
unsigned NumSeqSrcs = (SeqI.getNumOperands() - 1) / 2;
bool AllDefsReplaced = true;
// Try to match each register extracted with a corresponding insertion formed
// by the G_SEQUENCE.
for (unsigned Idx = 0, SeqIdx = 0; Idx < NumDefs; ++Idx) {
MachineOperand &ExtractMO = MI.getOperand(Idx);
assert(ExtractMO.isReg() && ExtractMO.isDef() &&
"unexpected extract operand");
unsigned ExtractReg = ExtractMO.getReg();
unsigned ExtractPos = MI.getOperand(NumDefs + Idx + 1).getImm();
while (SeqIdx < NumSeqSrcs &&
SeqI.getOperand(2 * SeqIdx + 2).getImm() < ExtractPos)
++SeqIdx;
if (SeqIdx == NumSeqSrcs ||
SeqI.getOperand(2 * SeqIdx + 2).getImm() != ExtractPos ||
SeqI.getType(SeqIdx + 1) != MI.getType(Idx)) {
AllDefsReplaced = false;
continue;
}
unsigned OrigReg = SeqI.getOperand(2 * SeqIdx + 1).getReg();
assert(!TargetRegisterInfo::isPhysicalRegister(OrigReg) &&
"unexpected physical register in G_SEQUENCE");
// Finally we can replace the uses.
for (auto &Use : MRI.use_operands(ExtractReg)) {
Changed = true;
Use.setReg(OrigReg);
}
}
if (AllDefsReplaced) {
// If SeqI was the next instruction in the BB and we removed it, we'd break
// the outer iteration.
assert(std::next(MachineBasicBlock::iterator(MI)) != SeqI &&
"G_SEQUENCE does not dominate G_EXTRACT");
MI.eraseFromParent();
if (MRI.use_empty(SrcReg))
SeqI.eraseFromParent();
Changed = true;
}
return Changed;
}
bool MachineLegalizePass::runOnMachineFunction(MachineFunction &MF) {
// If the ISel pipeline failed, do not bother running that pass.
if (MF.getProperties().hasProperty(
@ -94,5 +159,19 @@ bool MachineLegalizePass::runOnMachineFunction(MachineFunction &MF) {
Changed |= Res == MachineLegalizeHelper::Legalized;
}
MachineRegisterInfo &MRI = MF.getRegInfo();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
for (auto &MBB : MF) {
for (auto MI = MBB.begin(); MI != MBB.end(); MI = NextMI) {
// Get the next Instruction before we try to legalize, because there's a
// good chance MI will be deleted.
NextMI = std::next(MI);
Changed |= combineExtracts(*MI, MRI, TII);
}
}
return Changed;
}

View File

@ -33,13 +33,16 @@ body: |
bb.0.entry:
liveins: %x0, %x1, %x2, %x3
; CHECK-LABEL: name: test_scalar_add_big
; CHECK-DAG: [[LHS_LO:%.*]](64), [[LHS_HI:%.*]](64) = G_EXTRACT { s64, s64, s128 } %4, 0, 64
; CHECK-DAG: [[RHS_LO:%.*]](64), [[RHS_HI:%.*]](64) = G_EXTRACT { s64, s64, s128 } %5, 0, 64
; CHECK-NOT: G_EXTRACT
; CHECK-NOT: G_SEQUENCE
; CHECK-DAG: [[CARRY0_32:%.*]](32) = G_CONSTANT s32 0
; CHECK-DAG: [[CARRY0:%[0-9]+]](1) = G_TRUNC { s1, s32 } [[CARRY0_32]]
; CHECK: [[RES_LO:%.*]](64), [[CARRY:%.*]](1) = G_UADDE s64 [[LHS_LO]], [[RHS_LO]], [[CARRY0]]
; CHECK: [[RES_HI:%.*]](64), {{%.*}}(1) = G_UADDE s64 [[LHS_HI]], [[RHS_HI]], [[CARRY]]
; CHECK: %6(128) = G_SEQUENCE { s128, s64, s64 } [[RES_LO]], 0, [[RES_HI]], 64
; CHECK: [[RES_LO:%.*]](64), [[CARRY:%.*]](1) = G_UADDE s64 %0, %2, [[CARRY0]]
; CHECK: [[RES_HI:%.*]](64), {{%.*}}(1) = G_UADDE s64 %1, %3, [[CARRY]]
; CHECK-NOT: G_EXTRACT
; CHECK-NOT: G_SEQUENCE
; CHECK: %x0 = COPY [[RES_LO]]
; CHECK: %x1 = COPY [[RES_HI]]
%0(64) = COPY %x0
%1(64) = COPY %x1
@ -93,11 +96,14 @@ body: |
bb.0.entry:
liveins: %q0, %q1, %q2, %q3
; CHECK-LABEL: name: test_vector_add
; CHECK-DAG: [[LHS_LO:%.*]](128), [[LHS_HI:%.*]](128) = G_EXTRACT { s128, s128, s256 } %4, 0, 128
; CHECK-DAG: [[RHS_LO:%.*]](128), [[RHS_HI:%.*]](128) = G_EXTRACT { s128, s128, s256 } %5, 0, 128
; CHECK: [[RES_LO:%.*]](128) = G_ADD <2 x s64> [[LHS_LO]], [[RHS_LO]]
; CHECK: [[RES_HI:%.*]](128) = G_ADD <2 x s64> [[LHS_HI]], [[RHS_HI]]
; CHECK: %6(256) = G_SEQUENCE { s256, s128, s128 } [[RES_LO]], 0, [[RES_HI]], 128
; CHECK-NOT: G_EXTRACT
; CHECK-NOT: G_SEQUENCE
; CHECK: [[RES_LO:%.*]](128) = G_ADD <2 x s64> %0, %2
; CHECK: [[RES_HI:%.*]](128) = G_ADD <2 x s64> %1, %3
; CHECK-NOT: G_EXTRACT
; CHECK-NOT: G_SEQUENCE
; CHECK: %q0 = COPY [[RES_LO]]
; CHECK: %q1 = COPY [[RES_HI]]
%0(128) = COPY %q0
%1(128) = COPY %q1

View File

@ -0,0 +1,92 @@
# RUN: llc -O0 -run-pass=legalize-mir -global-isel %s -o - 2>&1 | FileCheck %s
--- |
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-apple-ios"
define void @test_combines() {
entry:
ret void
}
...
---
name: test_combines
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
- { id: 2, class: _ }
- { id: 3, class: _ }
- { id: 4, class: _ }
- { id: 5, class: _ }
- { id: 6, class: _ }
- { id: 7, class: _ }
- { id: 8, class: _ }
- { id: 9, class: _ }
- { id: 10, class: _ }
- { id: 11, class: _ }
- { id: 12, class: _ }
- { id: 13, class: _ }
- { id: 14, class: _ }
- { id: 15, class: _ }
- { id: 16, class: _ }
- { id: 17, class: _ }
- { id: 18, class: _ }
- { id: 19, class: _ }
- { id: 20, class: _ }
- { id: 21, class: _ }
- { id: 22, class: _ }
- { id: 23, class: _ }
- { id: 24, class: _ }
body: |
bb.0.entry:
liveins: %w0, %w1, %x2, %x3
%0(32) = COPY %w0
%1(32) = COPY %w1
%2(8) = G_TRUNC { s8, s32 } %0
; Only one of these extracts can be eliminated, the offsets don't match
; properly in the other cases.
; CHECK-LABEL: name: test_combines
; CHECK: %3(32) = G_SEQUENCE { s32, s8 } %2, 1
; CHECK: %4(8) = G_EXTRACT { s8, s32 } %3, 0
; CHECK-NOT: G_EXTRACT
; CHECK: %6(8) = G_EXTRACT { s8, s32 } %3, 2
; CHECK: %7(32) = G_ZEXT { s32, s8 } %2
%3(32) = G_SEQUENCE { s32, s8 } %2, 1
%4(8) = G_EXTRACT { s8, s32 } %3, 0
%5(8) = G_EXTRACT { s8, s32 } %3, 1
%6(8) = G_EXTRACT { s8, s32 } %3, 2
%7(32) = G_ZEXT { s32, s8 } %5
; Similarly, here the types don't match.
; CHECK: %10(32) = G_SEQUENCE { s32, s16, s16 } %8, 0, %9, 16
; CHECK: %11(1) = G_EXTRACT { s1, s32 } %10, 0
; CHECK: %12(32) = G_EXTRACT { s32, s32 } %10, 0
%8(16) = G_TRUNC { s16, s32 } %0
%9(16) = G_ADD s16 %8, %8
%10(32) = G_SEQUENCE { s32, s16, s16 } %8, 0, %9, 16
%11(1) = G_EXTRACT { s1, s32 } %10, 0
%12(32) = G_EXTRACT { s32, s32 } %10, 0
; CHECK-NOT: G_EXTRACT
; CHECK: %15(16) = G_ADD s16 %8, %9
%13(16), %14(16) = G_EXTRACT { s16, s16, s32 } %10, 0, 16
%15(16) = G_ADD s16 %13, %14
; CHECK: %18(64) = G_EXTRACT { <2 x s32>, s128 } %17, 0
; CHECK: %19(64) = G_ADD <2 x s32> %18, %18
%16(64) = COPY %x0
%17(128) = G_SEQUENCE { s128, s64, s64 } %16, 0, %16, 64
%18(64) = G_EXTRACT { <2 x s32>, s128 } %17, 0
%19(64) = G_ADD <2 x s32> %18, %18
; CHECK-NOT: G_SEQUENCE
; CHECK-NOT: G_EXTRACT
; CHECK: %24(32) = G_ADD s32 %0, %20
%20(32) = G_ADD s32 %0, %0
%21(64) = G_SEQUENCE { s64, s32, s32 } %0, 0, %20, 32
%22(32) = G_EXTRACT { s32, s64 } %21, 0
%23(32) = G_EXTRACT { s32, s64 } %21, 32
%24(32) = G_ADD s32 %22, %23
...