[Hexagon] Bitwise operations for insert/extract word not simplified

Change the bit simplifier to generate REG_SEQUENCE instructions in
addition to COPY, which will handle cases of word insert/extract.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@276787 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Krzysztof Parzyszek 2016-07-26 18:30:11 +00:00
parent 2b8ecef8b7
commit 17a42256ee
4 changed files with 132 additions and 29 deletions

View File

@ -1249,6 +1249,8 @@ bool RedundantInstrElimination::usedBitsEqual(BitTracker::RegisterRef RD,
bool RedundantInstrElimination::processBlock(MachineBasicBlock &B,
const RegisterSet&) {
if (!BT.reached(&B))
return false;
bool Changed = false;
for (auto I = B.begin(), E = B.end(), NextI = I; I != E; ++I) {
@ -1295,7 +1297,15 @@ bool RedundantInstrElimination::processBlock(MachineBasicBlock &B,
BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR)
.addReg(RS.Reg, 0, RS.Sub);
HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
BT.put(BitTracker::RegisterRef(NewR), SC);
// Do not update the bit tracker. This pass can create copies between
// registers that don't have the exact same values. Updating the
// tracker here may be tricky. E.g.
// vreg1 = inst vreg2 ; vreg1 != vreg2, but used bits are equal
//
// vreg3 = copy vreg2 ; <- inserted
// ... = vreg3 ; <- replaced from vreg2
// Indirectly, we can create a "copy" between vreg1 and vreg2 even
// though their exact values do not match.
Changed = true;
break;
}
@ -1317,8 +1327,8 @@ namespace {
MachineRegisterInfo &mri)
: Transformation(true), HII(hii), MRI(mri), BT(bt) {}
bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
static bool isTfrConst(const MachineInstr &MI);
private:
bool isTfrConst(const MachineInstr &MI) const;
bool isConst(unsigned R, int64_t &V) const;
unsigned genTfrConst(const TargetRegisterClass *RC, int64_t C,
MachineBasicBlock &B, MachineBasicBlock::iterator At, DebugLoc &DL);
@ -1346,7 +1356,7 @@ bool ConstGeneration::isConst(unsigned R, int64_t &C) const {
return true;
}
bool ConstGeneration::isTfrConst(const MachineInstr &MI) const {
bool ConstGeneration::isTfrConst(const MachineInstr &MI) {
unsigned Opc = MI.getOpcode();
switch (Opc) {
case Hexagon::A2_combineii:
@ -1413,6 +1423,8 @@ unsigned ConstGeneration::genTfrConst(const TargetRegisterClass *RC, int64_t C,
bool ConstGeneration::processBlock(MachineBasicBlock &B, const RegisterSet&) {
if (!BT.reached(&B))
return false;
bool Changed = false;
RegisterSet Defs;
@ -1426,14 +1438,16 @@ bool ConstGeneration::processBlock(MachineBasicBlock &B, const RegisterSet&) {
unsigned DR = Defs.find_first();
if (!TargetRegisterInfo::isVirtualRegister(DR))
continue;
int64_t C;
if (isConst(DR, C)) {
uint64_t U;
const BitTracker::RegisterCell &DRC = BT.lookup(DR);
if (HBS::getConst(DRC, 0, DRC.width(), U)) {
int64_t C = U;
DebugLoc DL = I->getDebugLoc();
auto At = I->isPHI() ? B.getFirstNonPHI() : I;
unsigned ImmReg = genTfrConst(MRI.getRegClass(DR), C, B, At, DL);
if (ImmReg) {
HBS::replaceReg(DR, ImmReg, MRI);
BT.put(ImmReg, BT.lookup(DR));
BT.put(ImmReg, DRC);
Changed = true;
}
}
@ -1467,6 +1481,7 @@ namespace {
const HexagonInstrInfo &HII;
MachineRegisterInfo &MRI;
BitTracker &BT;
RegisterSet Forbidden;
};
class CopyPropagation : public Transformation {
@ -1491,17 +1506,20 @@ bool CopyGeneration::findMatch(const BitTracker::RegisterRef &Inp,
if (!BT.has(Inp.Reg))
return false;
const BitTracker::RegisterCell &InpRC = BT.lookup(Inp.Reg);
auto *FRC = HBS::getFinalVRegClass(Inp, MRI);
unsigned B, W;
if (!HBS::getSubregMask(Inp, B, W, MRI))
return false;
for (unsigned R = AVs.find_first(); R; R = AVs.find_next(R)) {
if (!BT.has(R) || !HBS::isTransparentCopy(R, Inp, MRI))
if (!BT.has(R) || Forbidden[R])
continue;
const BitTracker::RegisterCell &RC = BT.lookup(R);
unsigned RW = RC.width();
if (W == RW) {
if (MRI.getRegClass(Inp.Reg) != MRI.getRegClass(R))
if (FRC != MRI.getRegClass(R))
continue;
if (!HBS::isTransparentCopy(R, Inp, MRI))
continue;
if (!HBS::isEqual(InpRC, B, RC, 0, W))
continue;
@ -1524,7 +1542,8 @@ bool CopyGeneration::findMatch(const BitTracker::RegisterRef &Inp,
else
continue;
Out.Reg = R;
return true;
if (HBS::isTransparentCopy(Out, Inp, MRI))
return true;
}
return false;
}
@ -1532,6 +1551,8 @@ bool CopyGeneration::findMatch(const BitTracker::RegisterRef &Inp,
bool CopyGeneration::processBlock(MachineBasicBlock &B,
const RegisterSet &AVs) {
if (!BT.reached(&B))
return false;
RegisterSet AVB(AVs);
bool Changed = false;
RegisterSet Defs;
@ -1543,20 +1564,44 @@ bool CopyGeneration::processBlock(MachineBasicBlock &B,
HBS::getInstrDefs(*I, Defs);
unsigned Opc = I->getOpcode();
if (CopyPropagation::isCopyReg(Opc))
if (CopyPropagation::isCopyReg(Opc) || ConstGeneration::isTfrConst(*I))
continue;
DebugLoc DL = I->getDebugLoc();
auto At = I->isPHI() ? B.getFirstNonPHI() : I;
for (unsigned R = Defs.find_first(); R; R = Defs.find_next(R)) {
BitTracker::RegisterRef MR;
if (!findMatch(R, MR, AVB))
auto *FRC = HBS::getFinalVRegClass(R, MRI);
if (findMatch(R, MR, AVB)) {
unsigned NewR = MRI.createVirtualRegister(FRC);
BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR)
.addReg(MR.Reg, 0, MR.Sub);
BT.put(BitTracker::RegisterRef(NewR), BT.get(MR));
HBS::replaceReg(R, NewR, MRI);
Forbidden.insert(R);
continue;
DebugLoc DL = I->getDebugLoc();
auto *FRC = HBS::getFinalVRegClass(MR, MRI);
unsigned NewR = MRI.createVirtualRegister(FRC);
auto At = I->isPHI() ? B.getFirstNonPHI() : I;
BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR)
.addReg(MR.Reg, 0, MR.Sub);
BT.put(BitTracker::RegisterRef(NewR), BT.get(MR));
}
if (FRC == &Hexagon::DoubleRegsRegClass) {
// Try to generate REG_SEQUENCE.
BitTracker::RegisterRef TL = { R, Hexagon::subreg_loreg };
BitTracker::RegisterRef TH = { R, Hexagon::subreg_hireg };
BitTracker::RegisterRef ML, MH;
if (findMatch(TL, ML, AVB) && findMatch(TH, MH, AVB)) {
auto *FRC = HBS::getFinalVRegClass(R, MRI);
unsigned NewR = MRI.createVirtualRegister(FRC);
BuildMI(B, At, DL, HII.get(TargetOpcode::REG_SEQUENCE), NewR)
.addReg(ML.Reg, 0, ML.Sub)
.addImm(Hexagon::subreg_loreg)
.addReg(MH.Reg, 0, MH.Sub)
.addImm(Hexagon::subreg_hireg);
BT.put(BitTracker::RegisterRef(NewR), BT.get(R));
HBS::replaceReg(R, NewR, MRI);
Forbidden.insert(R);
}
}
}
}
@ -2121,6 +2166,8 @@ bool BitSimplification::simplifyTstbit(MachineInstr *MI,
bool BitSimplification::processBlock(MachineBasicBlock &B,
const RegisterSet &AVs) {
if (!BT.reached(&B))
return false;
bool Changed = false;
RegisterSet AVB = AVs;
RegisterSet Defs;
@ -2203,7 +2250,11 @@ bool HexagonBitSimplify::runOnMachineFunction(MachineFunction &MF) {
RegisterSet ARE; // Available registers for RIE.
RedundantInstrElimination RIE(BT, HII, MRI);
Changed |= visitBlock(Entry, RIE, ARE);
bool Ried = visitBlock(Entry, RIE, ARE);
if (Ried) {
Changed = true;
BT.run();
}
RegisterSet ACG; // Available registers for CG.
CopyGeneration CopyG(BT, HII, MRI);

View File

@ -138,8 +138,21 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI,
if (NumDefs == 0)
return false;
if (MI.mayLoad())
return evaluateLoad(MI, Inputs, Outputs);
using namespace Hexagon;
unsigned Opc = MI.getOpcode();
if (MI.mayLoad()) {
switch (Opc) {
// These instructions may be marked as mayLoad, but they are generating
// immediate values, so skip them.
case CONST32:
case CONST32_Int_Real:
case CONST64_Int_Real:
break;
default:
return evaluateLoad(MI, Inputs, Outputs);
}
}
// Check COPY instructions that copy formal parameters into virtual
// registers. Such parameters can be sign- or zero-extended at the
@ -174,8 +187,6 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI,
}
RegisterRefs Reg(MI);
unsigned Opc = MI.getOpcode();
using namespace Hexagon;
#define op(i) MI.getOperand(i)
#define rc(i) RegisterCell::ref(getCell(Reg[i], Inputs))
#define im(i) MI.getOperand(i).getImm()
@ -246,9 +257,7 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI,
case A2_tfrsi:
case A2_tfrpi:
case CONST32:
case CONST32_Float_Real:
case CONST32_Int_Real:
case CONST64_Float_Real:
case CONST64_Int_Real:
return rr0(eIMM(im(1), W0), Outputs);
case TFR_PdFalse:

View File

@ -0,0 +1,43 @@
; RUN: llc -march=hexagon -disable-hsdr < %s | FileCheck %s
; Check that we don't generate any bitwise operations.
; CHECK-NOT: = or(
; CHECK-NOT: = and(
target triple = "hexagon"
define i32 @fred(i32* nocapture readonly %p, i32 %n) #0 {
entry:
%t.sroa.0.048 = load i32, i32* %p, align 4
%cmp49 = icmp ugt i32 %n, 1
br i1 %cmp49, label %for.body, label %for.end
for.body: ; preds = %entry, %for.body
%t.sroa.0.052 = phi i32 [ %t.sroa.0.0, %for.body ], [ %t.sroa.0.048, %entry ]
%t.sroa.11.051 = phi i64 [ %t.sroa.11.0.extract.shift, %for.body ], [ 0, %entry ]
%i.050 = phi i32 [ %inc, %for.body ], [ 1, %entry ]
%t.sroa.0.0.insert.ext = zext i32 %t.sroa.0.052 to i64
%t.sroa.0.0.insert.insert = or i64 %t.sroa.0.0.insert.ext, %t.sroa.11.051
%0 = tail call i64 @llvm.hexagon.A2.addp(i64 %t.sroa.0.0.insert.insert, i64 %t.sroa.0.0.insert.insert)
%t.sroa.11.0.extract.shift = and i64 %0, -4294967296
%arrayidx4 = getelementptr inbounds i32, i32* %p, i32 %i.050
%inc = add nuw i32 %i.050, 1
%t.sroa.0.0 = load i32, i32* %arrayidx4, align 4
%exitcond = icmp eq i32 %inc, %n
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
%t.sroa.0.0.lcssa = phi i32 [ %t.sroa.0.048, %entry ], [ %t.sroa.0.0, %for.body ]
%t.sroa.11.0.lcssa = phi i64 [ 0, %entry ], [ %t.sroa.11.0.extract.shift, %for.body ]
%t.sroa.0.0.insert.ext17 = zext i32 %t.sroa.0.0.lcssa to i64
%t.sroa.0.0.insert.insert19 = or i64 %t.sroa.0.0.insert.ext17, %t.sroa.11.0.lcssa
%1 = tail call i64 @llvm.hexagon.A2.addp(i64 %t.sroa.0.0.insert.insert19, i64 %t.sroa.0.0.insert.insert19)
%t.sroa.11.0.extract.shift41 = lshr i64 %1, 32
%t.sroa.11.0.extract.trunc42 = trunc i64 %t.sroa.11.0.extract.shift41 to i32
ret i32 %t.sroa.11.0.extract.trunc42
}
declare i64 @llvm.hexagon.A2.addp(i64, i64) #1
attributes #0 = { norecurse nounwind readonly }
attributes #1 = { nounwind readnone }

View File

@ -1,6 +1,6 @@
; RUN: llc -march=hexagon -mcpu=hexagonv4 -disable-hsdr < %s | FileCheck %s
; CHECK: r{{[0-9]}}:{{[0-9]}} = combine({{r[0-9]|#0}}, r{{[0-9]}})
; CHECK: r{{[0-9]}}:{{[0-9]}} |= asl(r{{[0-9]}}:{{[0-9]}}, #32)
; RUN: llc -march=hexagon -disable-hsdr < %s | FileCheck %s
; CHECK-DAG: r0 = memw
; CHECK-DAG: r1 = memw
%struct.small = type { i32, i32 }
@ -8,7 +8,7 @@
define void @foo() nounwind {
entry:
%0 = load i64, i64* bitcast (%struct.small* @s1 to i64*), align 1
%0 = load i64, i64* bitcast (%struct.small* @s1 to i64*), align 4
call void @bar(i64 %0)
ret void
}