mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-19 09:57:42 +00:00
[AArch64] Lower sdiv x, pow2 using add + select + shift.
The target-independent DAGcombiner will generate: asr w1, X, #31 w1 = splat sign bit. add X, X, w1, lsr #28 X = X + 0 or pow2-1 asr w0, X, asr #4 w0 = X/pow2 However, the add + shifts is expensive, so generate: add w0, X, 15 w0 = X + pow2-1 cmp X, wzr X - 0 csel X, w0, X, lt X = (X < 0) ? X + pow2-1 : X; asr w0, X, asr 4 w0 = X/pow2 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@213758 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
3922da8ae8
commit
67c325e9f0
@ -2545,6 +2545,11 @@ public:
|
||||
SDValue BuildUDIV(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
|
||||
bool IsAfterLegalization,
|
||||
std::vector<SDNode *> *Created) const;
|
||||
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor,
|
||||
SelectionDAG &DAG,
|
||||
std::vector<SDNode *> *Created) const {
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Legalization utility functions
|
||||
|
@ -304,6 +304,7 @@ namespace {
|
||||
SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
|
||||
SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
|
||||
SDValue BuildSDIV(SDNode *N);
|
||||
SDValue BuildSDIVPow2(SDNode *N);
|
||||
SDValue BuildUDIV(SDNode *N);
|
||||
SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
|
||||
bool DemandHighBits = true);
|
||||
@ -2033,6 +2034,11 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
|
||||
if (TLI.isPow2DivCheap())
|
||||
return SDValue();
|
||||
|
||||
// Target-specific implementation of sdiv x, pow2.
|
||||
SDValue Res = BuildSDIVPow2(N);
|
||||
if (Res.getNode())
|
||||
return Res;
|
||||
|
||||
unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
|
||||
|
||||
// Splat the sign bit into the register
|
||||
@ -11482,9 +11488,9 @@ SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0,
|
||||
return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
|
||||
}
|
||||
|
||||
/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
|
||||
/// return a DAG expression to select that will generate the same value by
|
||||
/// multiplying by a magic number. See:
|
||||
/// BuildSDIV - Given an ISD::SDIV node expressing a divide by constant, return
|
||||
/// a DAG expression to select that will generate the same value by multiplying
|
||||
/// by a magic number. See:
|
||||
/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
|
||||
SDValue DAGCombiner::BuildSDIV(SDNode *N) {
|
||||
ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
|
||||
@ -11504,6 +11510,26 @@ SDValue DAGCombiner::BuildSDIV(SDNode *N) {
|
||||
return S;
|
||||
}
|
||||
|
||||
/// BuildSDIVPow2 - Given an ISD::SDIV node expressing a divide by constant
|
||||
/// power of 2, return a DAG expression to select that will generate the same
|
||||
/// value by right shifting.
|
||||
SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
|
||||
ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
|
||||
if (!C)
|
||||
return SDValue();
|
||||
|
||||
// Avoid division by zero.
|
||||
if (!C->getAPIntValue())
|
||||
return SDValue();
|
||||
|
||||
std::vector<SDNode *> Built;
|
||||
SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, &Built);
|
||||
|
||||
for (SDNode *N : Built)
|
||||
AddToWorklist(N);
|
||||
return S;
|
||||
}
|
||||
|
||||
/// BuildUDIV - Given an ISD::UDIV node expressing a divide by constant,
|
||||
/// return a DAG expression to select that will generate the same value by
|
||||
/// multiplying by a magic number. See:
|
||||
|
@ -6382,6 +6382,48 @@ static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG,
|
||||
return performIntegerAbsCombine(N, DAG);
|
||||
}
|
||||
|
||||
SDValue
|
||||
AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
|
||||
SelectionDAG &DAG,
|
||||
std::vector<SDNode *> *Created) const {
|
||||
// fold (sdiv X, pow2)
|
||||
EVT VT = N->getValueType(0);
|
||||
if ((VT != MVT::i32 && VT != MVT::i64) ||
|
||||
!(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
|
||||
return SDValue();
|
||||
|
||||
SDLoc DL(N);
|
||||
SDValue N0 = N->getOperand(0);
|
||||
unsigned Lg2 = Divisor.countTrailingZeros();
|
||||
SDValue Zero = DAG.getConstant(0, VT);
|
||||
SDValue Pow2MinusOne = DAG.getConstant((1 << Lg2) - 1, VT);
|
||||
|
||||
// Add (N0 < 0) ? Pow2 - 1 : 0;
|
||||
SDValue CCVal;
|
||||
SDValue Cmp = getAArch64Cmp(N0, Zero, ISD::SETLT, CCVal, DAG, DL);
|
||||
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
|
||||
SDValue CSel = DAG.getNode(AArch64ISD::CSEL, DL, VT, Add, N0, CCVal, Cmp);
|
||||
|
||||
if (Created) {
|
||||
Created->push_back(Cmp.getNode());
|
||||
Created->push_back(Add.getNode());
|
||||
Created->push_back(CSel.getNode());
|
||||
}
|
||||
|
||||
// Divide by pow2.
|
||||
SDValue SRA =
|
||||
DAG.getNode(ISD::SRA, DL, VT, CSel, DAG.getConstant(Lg2, MVT::i64));
|
||||
|
||||
// If we're dividing by a positive value, we're done. Otherwise, we must
|
||||
// negate the result.
|
||||
if (Divisor.isNonNegative())
|
||||
return SRA;
|
||||
|
||||
if (Created)
|
||||
Created->push_back(SRA.getNode());
|
||||
return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), SRA);
|
||||
}
|
||||
|
||||
static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
const AArch64Subtarget *Subtarget) {
|
||||
|
@ -424,6 +424,9 @@ private:
|
||||
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
|
||||
std::vector<SDNode *> *Created) const;
|
||||
|
||||
ConstraintType
|
||||
getConstraintType(const std::string &Constraint) const override;
|
||||
unsigned getRegisterByName(const char* RegName, EVT VT) const override;
|
||||
|
61
test/CodeGen/AArch64/sdivpow2.ll
Normal file
61
test/CodeGen/AArch64/sdivpow2.ll
Normal file
@ -0,0 +1,61 @@
|
||||
; RUN: llc -mtriple=arm64-linux-gnu -o - %s | FileCheck %s
|
||||
|
||||
define i32 @test1(i32 %x) {
|
||||
; CHECK-LABEL: test1
|
||||
; CHECK: add w8, w0, #7
|
||||
; CHECK: cmp w0, #0
|
||||
; CHECK: csel w8, w8, w0, lt
|
||||
; CHECK: asr w0, w8, #3
|
||||
%div = sdiv i32 %x, 8
|
||||
ret i32 %div
|
||||
}
|
||||
|
||||
define i32 @test2(i32 %x) {
|
||||
; CHECK-LABEL: test2
|
||||
; CHECK: add w8, w0, #7
|
||||
; CHECK: cmp w0, #0
|
||||
; CHECK: csel w8, w8, w0, lt
|
||||
; CHECK: neg w0, w8, asr #3
|
||||
%div = sdiv i32 %x, -8
|
||||
ret i32 %div
|
||||
}
|
||||
|
||||
define i32 @test3(i32 %x) {
|
||||
; CHECK-LABEL: test3
|
||||
; CHECK: add w8, w0, #31
|
||||
; CHECK: cmp w0, #0
|
||||
; CHECK: csel w8, w8, w0, lt
|
||||
; CHECK: asr w0, w8, #5
|
||||
%div = sdiv i32 %x, 32
|
||||
ret i32 %div
|
||||
}
|
||||
|
||||
define i64 @test4(i64 %x) {
|
||||
; CHECK-LABEL: test4
|
||||
; CHECK: add x8, x0, #7
|
||||
; CHECK: cmp x0, #0
|
||||
; CHECK: csel x8, x8, x0, lt
|
||||
; CHECK: asr x0, x8, #3
|
||||
%div = sdiv i64 %x, 8
|
||||
ret i64 %div
|
||||
}
|
||||
|
||||
define i64 @test5(i64 %x) {
|
||||
; CHECK-LABEL: test5
|
||||
; CHECK: add x8, x0, #7
|
||||
; CHECK: cmp x0, #0
|
||||
; CHECK: csel x8, x8, x0, lt
|
||||
; CHECK: neg x0, x8, asr #3
|
||||
%div = sdiv i64 %x, -8
|
||||
ret i64 %div
|
||||
}
|
||||
|
||||
define i64 @test6(i64 %x) {
|
||||
; CHECK-LABEL: test6
|
||||
; CHECK: add x8, x0, #63
|
||||
; CHECK: cmp x0, #0
|
||||
; CHECK: csel x8, x8, x0, lt
|
||||
; CHECK: asr x0, x8, #6
|
||||
%div = sdiv i64 %x, 64
|
||||
ret i64 %div
|
||||
}
|
Loading…
Reference in New Issue
Block a user