[AArch64] Lower sdiv x, pow2 using add + select + shift.

The target-independent DAGcombiner will generate:
asr w1, X, #31 w1 = splat sign bit.
add X, X, w1, lsr #28 X = X + 0 or pow2-1
asr w0, X, asr #4 w0 = X/pow2

However, the add + shifts is expensive, so generate:
add w0, X, 15 w0 = X + pow2-1
cmp X, wzr X - 0
csel X, w0, X, lt X = (X < 0) ? X + pow2-1 : X;
asr w0, X, asr 4 w0 = X/pow2

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@213758 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chad Rosier 2014-07-23 14:57:52 +00:00
parent 3922da8ae8
commit 67c325e9f0
5 changed files with 140 additions and 3 deletions

View File

@ -2545,6 +2545,11 @@ public:
SDValue BuildUDIV(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
bool IsAfterLegalization,
std::vector<SDNode *> *Created) const;
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor,
SelectionDAG &DAG,
std::vector<SDNode *> *Created) const {
return SDValue();
}
//===--------------------------------------------------------------------===//
// Legalization utility functions

View File

@ -304,6 +304,7 @@ namespace {
SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
SDValue BuildSDIV(SDNode *N);
SDValue BuildSDIVPow2(SDNode *N);
SDValue BuildUDIV(SDNode *N);
SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
bool DemandHighBits = true);
@ -2033,6 +2034,11 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
if (TLI.isPow2DivCheap())
return SDValue();
// Target-specific implementation of sdiv x, pow2.
SDValue Res = BuildSDIVPow2(N);
if (Res.getNode())
return Res;
unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
// Splat the sign bit into the register
@ -11482,9 +11488,9 @@ SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0,
return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
}
/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
/// return a DAG expression to select that will generate the same value by
/// multiplying by a magic number. See:
/// BuildSDIV - Given an ISD::SDIV node expressing a divide by constant, return
/// a DAG expression to select that will generate the same value by multiplying
/// by a magic number. See:
/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
SDValue DAGCombiner::BuildSDIV(SDNode *N) {
ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
@ -11504,6 +11510,26 @@ SDValue DAGCombiner::BuildSDIV(SDNode *N) {
return S;
}
/// BuildSDIVPow2 - Given an ISD::SDIV node expressing a divide by constant
/// power of 2, return a DAG expression to select that will generate the same
/// value by right shifting.
SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
if (!C)
return SDValue();
// Avoid division by zero.
if (!C->getAPIntValue())
return SDValue();
std::vector<SDNode *> Built;
SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, &Built);
for (SDNode *N : Built)
AddToWorklist(N);
return S;
}
/// BuildUDIV - Given an ISD::UDIV node expressing a divide by constant,
/// return a DAG expression to select that will generate the same value by
/// multiplying by a magic number. See:

View File

@ -6382,6 +6382,48 @@ static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG,
return performIntegerAbsCombine(N, DAG);
}
SDValue
AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
SelectionDAG &DAG,
std::vector<SDNode *> *Created) const {
// fold (sdiv X, pow2)
EVT VT = N->getValueType(0);
if ((VT != MVT::i32 && VT != MVT::i64) ||
!(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
return SDValue();
SDLoc DL(N);
SDValue N0 = N->getOperand(0);
unsigned Lg2 = Divisor.countTrailingZeros();
SDValue Zero = DAG.getConstant(0, VT);
SDValue Pow2MinusOne = DAG.getConstant((1 << Lg2) - 1, VT);
// Add (N0 < 0) ? Pow2 - 1 : 0;
SDValue CCVal;
SDValue Cmp = getAArch64Cmp(N0, Zero, ISD::SETLT, CCVal, DAG, DL);
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
SDValue CSel = DAG.getNode(AArch64ISD::CSEL, DL, VT, Add, N0, CCVal, Cmp);
if (Created) {
Created->push_back(Cmp.getNode());
Created->push_back(Add.getNode());
Created->push_back(CSel.getNode());
}
// Divide by pow2.
SDValue SRA =
DAG.getNode(ISD::SRA, DL, VT, CSel, DAG.getConstant(Lg2, MVT::i64));
// If we're dividing by a positive value, we're done. Otherwise, we must
// negate the result.
if (Divisor.isNonNegative())
return SRA;
if (Created)
Created->push_back(SRA.getNode());
return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), SRA);
}
static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {

View File

@ -424,6 +424,9 @@ private:
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
std::vector<SDNode *> *Created) const;
ConstraintType
getConstraintType(const std::string &Constraint) const override;
unsigned getRegisterByName(const char* RegName, EVT VT) const override;

View File

@ -0,0 +1,61 @@
; RUN: llc -mtriple=arm64-linux-gnu -o - %s | FileCheck %s
define i32 @test1(i32 %x) {
; CHECK-LABEL: test1
; CHECK: add w8, w0, #7
; CHECK: cmp w0, #0
; CHECK: csel w8, w8, w0, lt
; CHECK: asr w0, w8, #3
%div = sdiv i32 %x, 8
ret i32 %div
}
define i32 @test2(i32 %x) {
; CHECK-LABEL: test2
; CHECK: add w8, w0, #7
; CHECK: cmp w0, #0
; CHECK: csel w8, w8, w0, lt
; CHECK: neg w0, w8, asr #3
%div = sdiv i32 %x, -8
ret i32 %div
}
define i32 @test3(i32 %x) {
; CHECK-LABEL: test3
; CHECK: add w8, w0, #31
; CHECK: cmp w0, #0
; CHECK: csel w8, w8, w0, lt
; CHECK: asr w0, w8, #5
%div = sdiv i32 %x, 32
ret i32 %div
}
define i64 @test4(i64 %x) {
; CHECK-LABEL: test4
; CHECK: add x8, x0, #7
; CHECK: cmp x0, #0
; CHECK: csel x8, x8, x0, lt
; CHECK: asr x0, x8, #3
%div = sdiv i64 %x, 8
ret i64 %div
}
define i64 @test5(i64 %x) {
; CHECK-LABEL: test5
; CHECK: add x8, x0, #7
; CHECK: cmp x0, #0
; CHECK: csel x8, x8, x0, lt
; CHECK: neg x0, x8, asr #3
%div = sdiv i64 %x, -8
ret i64 %div
}
define i64 @test6(i64 %x) {
; CHECK-LABEL: test6
; CHECK: add x8, x0, #63
; CHECK: cmp x0, #0
; CHECK: csel x8, x8, x0, lt
; CHECK: asr x0, x8, #6
%div = sdiv i64 %x, 64
ret i64 %div
}