mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-24 13:06:56 +00:00
Bypass Slow Divides
* Only apply divide bypass optimization when not optimizing for size. * Fixed bug caused by constant for 0 value of type Int32, used dividend type to generate the constant instead. * For atom x86-64 apply the divide bypass to use 16-bit divides instead of 64-bit divides when operand values are small enough. * Added lit tests for 64-bit divide bypass. Patch by Tyler Nowicki! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176442 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
ff4faabd2d
commit
9a2cfffdb6
@ -181,9 +181,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
||||
setSchedulingPreference(Sched::RegPressure);
|
||||
setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
|
||||
|
||||
// Bypass i32 with i8 on Atom when compiling with O2
|
||||
if (Subtarget->hasSlowDivide() && TM.getOptLevel() >= CodeGenOpt::Default)
|
||||
// Bypass expensive divides on Atom when compiling with O2
|
||||
if (Subtarget->hasSlowDivide() && TM.getOptLevel() >= CodeGenOpt::Default) {
|
||||
addBypassSlowDiv(32, 8);
|
||||
if (Subtarget->is64Bit())
|
||||
addBypassSlowDiv(64, 16);
|
||||
}
|
||||
|
||||
if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing()) {
|
||||
// Setup Windows compiler runtime calls.
|
||||
|
@ -154,7 +154,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
|
||||
|
||||
/// This optimization identifies DIV instructions that can be
|
||||
/// profitably bypassed and carried out with a shorter, faster divide.
|
||||
if (TLI && TLI->isSlowDivBypassed()) {
|
||||
if (!OptSize && TLI && TLI->isSlowDivBypassed()) {
|
||||
const DenseMap<unsigned int, unsigned int> &BypassWidths =
|
||||
TLI->getBypassSlowDivWidths();
|
||||
for (Function::iterator I = F.begin(); I != F.end(); I++)
|
||||
|
@ -163,7 +163,7 @@ static bool insertFastDiv(Function &F,
|
||||
Value *AndV = MainBuilder.CreateAnd(OrV, BitMask);
|
||||
|
||||
// Compare operand values and branch
|
||||
Value *ZeroV = MainBuilder.getInt32(0);
|
||||
Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0);
|
||||
Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV);
|
||||
MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB);
|
||||
|
||||
@ -244,7 +244,7 @@ bool llvm::bypassSlowDivision(Function &F,
|
||||
|
||||
// Get bitwidth of div/rem instruction
|
||||
IntegerType *T = cast<IntegerType>(J->getType());
|
||||
int bitwidth = T->getBitWidth();
|
||||
unsigned int bitwidth = T->getBitWidth();
|
||||
|
||||
// Continue if bitwidth is not bypassed
|
||||
DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth);
|
||||
|
46
test/CodeGen/X86/atom-bypass-slow-division-64.ll
Normal file
46
test/CodeGen/X86/atom-bypass-slow-division-64.ll
Normal file
@ -0,0 +1,46 @@
|
||||
; RUN: llc < %s -mcpu=atom -mtriple=i686-linux -march=x86-64 | FileCheck %s
|
||||
|
||||
; Additional tests for 64-bit divide bypass
|
||||
|
||||
define i64 @Test_get_quotient(i64 %a, i64 %b) nounwind {
|
||||
; CHECK: Test_get_quotient:
|
||||
; CHECK: orq %rsi, %rcx
|
||||
; CHECK-NEXT: testq $-65536, %rcx
|
||||
; CHECK-NEXT: je
|
||||
; CHECK: idivq
|
||||
; CHECK: ret
|
||||
; CHECK: divw
|
||||
; CHECK: ret
|
||||
%result = sdiv i64 %a, %b
|
||||
ret i64 %result
|
||||
}
|
||||
|
||||
define i64 @Test_get_remainder(i64 %a, i64 %b) nounwind {
|
||||
; CHECK: Test_get_remainder:
|
||||
; CHECK: orq %rsi, %rcx
|
||||
; CHECK-NEXT: testq $-65536, %rcx
|
||||
; CHECK-NEXT: je
|
||||
; CHECK: idivq
|
||||
; CHECK: ret
|
||||
; CHECK: divw
|
||||
; CHECK: ret
|
||||
%result = srem i64 %a, %b
|
||||
ret i64 %result
|
||||
}
|
||||
|
||||
define i64 @Test_get_quotient_and_remainder(i64 %a, i64 %b) nounwind {
|
||||
; CHECK: Test_get_quotient_and_remainder:
|
||||
; CHECK: orq %rsi, %rcx
|
||||
; CHECK-NEXT: testq $-65536, %rcx
|
||||
; CHECK-NEXT: je
|
||||
; CHECK: idivq
|
||||
; CHECK: divw
|
||||
; CHECK: addq
|
||||
; CHECK: ret
|
||||
; CHECK-NOT: idivq
|
||||
; CHECK-NOT: divw
|
||||
%resultdiv = sdiv i64 %a, %b
|
||||
%resultrem = srem i64 %a, %b
|
||||
%result = add i64 %resultdiv, %resultrem
|
||||
ret i64 %result
|
||||
}
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc < %s -mcpu=atom -mtriple=i686-linux | FileCheck %s
|
||||
; RUN: llc < %s -mcpu=atom -mtriple=i686-linux | FileCheck %s
|
||||
|
||||
define i32 @Test_get_quotient(i32 %a, i32 %b) nounwind {
|
||||
; CHECK: Test_get_quotient:
|
||||
|
Loading…
Reference in New Issue
Block a user