mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-27 13:42:24 +00:00
[FastISel][AArch64] Custom lower sdiv by power-of-2.
Emit an optimized instruction sequence for sdiv by power-of-2 depending on the exact flag. This fixes rdar://problem/18224511. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@217986 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
69e88e8b40
commit
7516444a26
@ -133,6 +133,7 @@ private:
|
||||
bool selectShift(const Instruction *I);
|
||||
bool selectBitCast(const Instruction *I);
|
||||
bool selectFRem(const Instruction *I);
|
||||
bool selectSDiv(const Instruction *I);
|
||||
|
||||
// Utility helper routines.
|
||||
bool isTypeLegal(Type *Ty, MVT &VT);
|
||||
@ -3980,6 +3981,75 @@ bool AArch64FastISel::selectFRem(const Instruction *I) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AArch64FastISel::selectSDiv(const Instruction *I) {
|
||||
MVT VT;
|
||||
if (!isTypeLegal(I->getType(), VT))
|
||||
return false;
|
||||
|
||||
if (!isa<ConstantInt>(I->getOperand(1)))
|
||||
return selectBinaryOp(I, ISD::SDIV);
|
||||
|
||||
const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
|
||||
if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
|
||||
!(C.isPowerOf2() || (-C).isPowerOf2()))
|
||||
return selectBinaryOp(I, ISD::SDIV);
|
||||
|
||||
unsigned Lg2 = C.countTrailingZeros();
|
||||
unsigned Src0Reg = getRegForValue(I->getOperand(0));
|
||||
if (!Src0Reg)
|
||||
return false;
|
||||
bool Src0IsKill = hasTrivialKill(I->getOperand(0));
|
||||
|
||||
if (cast<BinaryOperator>(I)->isExact()) {
|
||||
unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2);
|
||||
if (!ResultReg)
|
||||
return false;
|
||||
updateValueMap(I, ResultReg);
|
||||
return true;
|
||||
}
|
||||
|
||||
unsigned Pow2MinusOne = (1 << Lg2) - 1;
|
||||
unsigned AddReg = emitAddSub_ri(/*UseAdd=*/true, VT, Src0Reg,
|
||||
/*IsKill=*/false, Pow2MinusOne);
|
||||
if (!AddReg)
|
||||
return false;
|
||||
|
||||
// (Src0 < 0) ? Pow2 - 1 : 0;
|
||||
if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0))
|
||||
return false;
|
||||
|
||||
unsigned SelectOpc;
|
||||
const TargetRegisterClass *RC;
|
||||
if (VT == MVT::i64) {
|
||||
SelectOpc = AArch64::CSELXr;
|
||||
RC = &AArch64::GPR64RegClass;
|
||||
} else {
|
||||
SelectOpc = AArch64::CSELWr;
|
||||
RC = &AArch64::GPR32RegClass;
|
||||
}
|
||||
unsigned SelectReg =
|
||||
fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg,
|
||||
Src0IsKill, AArch64CC::LT);
|
||||
if (!SelectReg)
|
||||
return false;
|
||||
|
||||
// Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
|
||||
// negate the result.
|
||||
unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
|
||||
unsigned ResultReg;
|
||||
if (C.isNegative())
|
||||
ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true,
|
||||
SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2);
|
||||
else
|
||||
ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2);
|
||||
|
||||
if (!ResultReg)
|
||||
return false;
|
||||
|
||||
updateValueMap(I, ResultReg);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
|
||||
switch (I->getOpcode()) {
|
||||
default:
|
||||
@ -3989,6 +4059,8 @@ bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
|
||||
return selectAddSub(I);
|
||||
case Instruction::Mul:
|
||||
return selectMul(I);
|
||||
case Instruction::SDiv:
|
||||
return selectSDiv(I);
|
||||
case Instruction::SRem:
|
||||
if (!selectBinaryOp(I, ISD::SREM))
|
||||
return selectRem(I, ISD::SREM);
|
||||
|
56
test/CodeGen/AArch64/fast-isel-sdiv.ll
Normal file
56
test/CodeGen/AArch64/fast-isel-sdiv.ll
Normal file
@ -0,0 +1,56 @@
|
||||
; RUN: llc -mtriple=aarch64-apple-darwin -verify-machineinstrs < %s | FileCheck %s
|
||||
; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
|
||||
|
||||
define i32 @sdiv_i32_exact(i32 %a) {
|
||||
; CHECK-LABEL: sdiv_i32_exact
|
||||
; CHECK: asr {{w[0-9]+}}, w0, #3
|
||||
%1 = sdiv exact i32 %a, 8
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define i32 @sdiv_i32_pos(i32 %a) {
|
||||
; CHECK-LABEL: sdiv_i32_pos
|
||||
; CHECK: add [[REG1:w[0-9]+]], w0, #7
|
||||
; CHECK-NEXT: cmp w0, #0
|
||||
; CHECK-NEXT: csel [[REG2:w[0-9]+]], [[REG1]], w0, lt
|
||||
; CHECK-NEXT: asr {{w[0-9]+}}, [[REG2]], #3
|
||||
%1 = sdiv i32 %a, 8
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define i32 @sdiv_i32_neg(i32 %a) {
|
||||
; CHECK-LABEL: sdiv_i32_neg
|
||||
; CHECK: add [[REG1:w[0-9]+]], w0, #7
|
||||
; CHECK-NEXT: cmp w0, #0
|
||||
; CHECK-NEXT: csel [[REG2:w[0-9]+]], [[REG1]], w0, lt
|
||||
; CHECK-NEXT: neg {{w[0-9]+}}, [[REG2]], asr #3
|
||||
%1 = sdiv i32 %a, -8
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define i64 @sdiv_i64_exact(i64 %a) {
|
||||
; CHECK-LABEL: sdiv_i64_exact
|
||||
; CHECK: asr {{x[0-9]+}}, x0, #4
|
||||
%1 = sdiv exact i64 %a, 16
|
||||
ret i64 %1
|
||||
}
|
||||
|
||||
define i64 @sdiv_i64_pos(i64 %a) {
|
||||
; CHECK-LABEL: sdiv_i64_pos
|
||||
; CHECK: add [[REG1:x[0-9]+]], x0, #15
|
||||
; CHECK-NEXT: cmp x0, #0
|
||||
; CHECK-NEXT: csel [[REG2:x[0-9]+]], [[REG1]], x0, lt
|
||||
; CHECK-NEXT: asr {{x[0-9]+}}, [[REG2]], #4
|
||||
%1 = sdiv i64 %a, 16
|
||||
ret i64 %1
|
||||
}
|
||||
|
||||
define i64 @sdiv_i64_neg(i64 %a) {
|
||||
; CHECK-LABEL: sdiv_i64_neg
|
||||
; CHECK: add [[REG1:x[0-9]+]], x0, #15
|
||||
; CHECK-NEXT: cmp x0, #0
|
||||
; CHECK-NEXT: csel [[REG2:x[0-9]+]], [[REG1]], x0, lt
|
||||
; CHECK-NEXT: neg {{x[0-9]+}}, [[REG2]], asr #4
|
||||
%1 = sdiv i64 %a, -16
|
||||
ret i64 %1
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user