mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-13 23:18:58 +00:00
Scaling up values in ARMBaseInstrInfo::isProfitableToIfCvt() before they are scaled by a probability to avoid precision issue.
In ARMBaseInstrInfo::isProfitableToIfCvt(), there is a simple cost model in which the number of cycles is scaled by a probability to estimate the cost. However, when the number of cycles is small (which is usually the case), there is a precision issue after the computation. To avoid this issue, this patch scales those cycles by 1024 (chosen to make the multiplication a litter faster) before they are scaled by the probability. Other variables are also scaled up for the final comparison. Differential Revision: http://reviews.llvm.org/D12742 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@248018 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
8aa8c4d086
commit
759d988de3
@ -1670,11 +1670,14 @@ isProfitableToIfCvt(MachineBasicBlock &MBB,
|
||||
}
|
||||
|
||||
// Attempt to estimate the relative costs of predication versus branching.
|
||||
unsigned UnpredCost = Probability.scale(NumCycles);
|
||||
UnpredCost += 1; // The branch itself
|
||||
UnpredCost += Subtarget.getMispredictionPenalty() / 10;
|
||||
// Here we scale up each component of UnpredCost to avoid precision issue when
|
||||
// scaling NumCycles by Probability.
|
||||
const unsigned ScalingUpFactor = 1024;
|
||||
unsigned UnpredCost = Probability.scale(NumCycles * ScalingUpFactor);
|
||||
UnpredCost += ScalingUpFactor; // The branch itself
|
||||
UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
|
||||
|
||||
return (NumCycles + ExtraPredCycles) <= UnpredCost;
|
||||
return (NumCycles + ExtraPredCycles) * ScalingUpFactor <= UnpredCost;
|
||||
}
|
||||
|
||||
bool ARMBaseInstrInfo::
|
||||
@ -1687,13 +1690,17 @@ isProfitableToIfCvt(MachineBasicBlock &TMBB,
|
||||
return false;
|
||||
|
||||
// Attempt to estimate the relative costs of predication versus branching.
|
||||
unsigned TUnpredCost = Probability.scale(TCycles);
|
||||
unsigned FUnpredCost = Probability.getCompl().scale(FCycles);
|
||||
// Here we scale up each component of UnpredCost to avoid precision issue when
|
||||
// scaling TCycles/FCycles by Probability.
|
||||
const unsigned ScalingUpFactor = 1024;
|
||||
unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor);
|
||||
unsigned FUnpredCost =
|
||||
Probability.getCompl().scale(FCycles * ScalingUpFactor);
|
||||
unsigned UnpredCost = TUnpredCost + FUnpredCost;
|
||||
UnpredCost += 1; // The branch itself
|
||||
UnpredCost += Subtarget.getMispredictionPenalty() / 10;
|
||||
UnpredCost += 1 * ScalingUpFactor; // The branch itself
|
||||
UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
|
||||
|
||||
return (TCycles + FCycles + TExtra + FExtra) <= UnpredCost;
|
||||
return (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor <= UnpredCost;
|
||||
}
|
||||
|
||||
bool
|
||||
|
@ -1,8 +1,13 @@
|
||||
; REQUIRES: asserts
|
||||
; RUN: llc < %s -mtriple=thumbv7-apple-ios -stats 2>&1 | not grep "Number of pipeline stalls"
|
||||
; RUN: llc < %s -mtriple=thumbv7-apple-ios -disable-ifcvt-diamond -stats 2>&1 | FileCheck %s
|
||||
; Evaluate the two vld1.8 instructions in separate MBB's,
|
||||
; instead of stalling on one and conditionally overwriting its result.
|
||||
;
|
||||
; Update: After if-conversion the two vld1.8 instructions are in the same MBB
|
||||
; again. So we disable this if-conversion to eliminate its influence to this
|
||||
; test.
|
||||
|
||||
; CHECK-NOT: Number of pipeline stalls
|
||||
define <16 x i8> @multiselect(i32 %avail, i8* %foo, i8* %bar) {
|
||||
entry:
|
||||
%vld1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %foo, i32 1)
|
||||
|
@ -1,10 +1,8 @@
|
||||
; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
|
||||
|
||||
; Do not if-convert when branches go to the different loops.
|
||||
; CHECK-LABEL: t:
|
||||
; CHECK-NOT: subgt
|
||||
; CHECK-NOT: suble
|
||||
; Don't use
|
||||
; CHECK: subgt
|
||||
; CHECK: suble
|
||||
define i32 @t(i32 %a, i32 %b) {
|
||||
entry:
|
||||
%tmp1434 = icmp eq i32 %a, %b ; <i1> [#uses=1]
|
||||
|
@ -77,8 +77,8 @@ declare void @terminatev()
|
||||
; CHECK: blx __Znwm
|
||||
; CHECK: {{.*}}@ %entry.do.body.i.i.i_crit_edge
|
||||
; CHECK: str r0, [sp, [[OFFSET:#[0-9]+]]]
|
||||
; CHECK: ldr [[R0:r[0-9]+]], [sp, [[OFFSET]]]
|
||||
; CHECK: {{.*}}@ %do.body.i.i.i
|
||||
; CHECK: ldr [[R0:r[0-9]+]], [sp, [[OFFSET]]]
|
||||
; CHECK: cbz [[R0]]
|
||||
|
||||
%"class.std::__1::basic_string" = type { %"class.std::__1::__compressed_pair" }
|
||||
|
@ -59,9 +59,6 @@ for.body.1: ; preds = %for.body
|
||||
br i1 %cmp.1, label %for.body.2, label %for.end
|
||||
|
||||
for.body.2: ; preds = %for.body.1
|
||||
; CHECK: %for.body.2
|
||||
; CHECK: ldrb {{r[0-9]+|lr}}, [{{r[0-9]+|lr}}, {{r[0-9]+|lr}}]!
|
||||
; CHECK: ldrb {{r[0-9]+|lr}}, [{{r[0-9]+|lr}}, {{r[0-9]+|lr}}]!
|
||||
%arrayidx.2 = getelementptr inbounds i8, i8* %a, i32 %add5.1
|
||||
%4 = load i8, i8* %arrayidx.2, align 1
|
||||
%conv6.2 = zext i8 %4 to i32
|
||||
|
@ -24,11 +24,10 @@ cond_next:
|
||||
|
||||
define i32 @t2(i32 %a, i32 %b) nounwind {
|
||||
entry:
|
||||
; Do not if-convert when branches go to the different loops.
|
||||
; CHECK-LABEL: t2:
|
||||
; CHECK-NOT: ite gt
|
||||
; CHECK-NOT: subgt
|
||||
; CHECK-NOT: suble
|
||||
; CHECK: ite gt
|
||||
; CHECK: subgt
|
||||
; CHECK: suble
|
||||
%tmp1434 = icmp eq i32 %a, %b ; <i1> [#uses=1]
|
||||
br i1 %tmp1434, label %bb17, label %bb.outer
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user