mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-03 02:08:54 +00:00
[X86] Use ADD/SUB instead of INC/DEC for Haswell and Broadwell CPUs
Differential Revision: http://reviews.llvm.org/D5934 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@222141 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
ae3738f4a7
commit
19e8fe05dc
@ -280,7 +280,7 @@ def : ProcessorModel<"core-avx2", HaswellModel,
|
||||
FeaturePOPCNT, FeatureAES, FeaturePCLMUL, FeatureRDRAND,
|
||||
FeatureF16C, FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT,
|
||||
FeatureBMI, FeatureBMI2, FeatureFMA, FeatureRTM,
|
||||
FeatureHLE]>;
|
||||
FeatureHLE, FeatureSlowIncDec]>;
|
||||
|
||||
// Broadwell
|
||||
def : ProcessorModel<"broadwell", HaswellModel,
|
||||
@ -288,7 +288,8 @@ def : ProcessorModel<"broadwell", HaswellModel,
|
||||
FeaturePOPCNT, FeatureAES, FeaturePCLMUL, FeatureRDRAND,
|
||||
FeatureF16C, FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT,
|
||||
FeatureBMI, FeatureBMI2, FeatureFMA, FeatureRTM,
|
||||
FeatureHLE, FeatureADX, FeatureRDSEED, FeatureSMAP]>;
|
||||
FeatureHLE, FeatureADX, FeatureRDSEED, FeatureSMAP,
|
||||
FeatureSlowIncDec]>;
|
||||
// KNL
|
||||
// FIXME: define KNL model
|
||||
def : ProcessorModel<"knl", HaswellModel,
|
||||
|
@ -6,7 +6,7 @@ target triple = "x86_64-apple-macosx10.10.0"
|
||||
; CHECK: [[BODYLBL:LBB.+]]:
|
||||
; CHECK: vfmaddsub231pd %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
|
||||
; CHECK: [[INCLBL:LBB.+]]:
|
||||
; CHECK: incl [[INDREG:%[a-z0-9]+]]
|
||||
; CHECK: addl $1, [[INDREG:%[a-z0-9]+]]
|
||||
; CHECK: cmpl {{%.+}}, [[INDREG]]
|
||||
; CHECK: jl [[BODYLBL]]
|
||||
define <4 x double> @fmaddsubpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
|
||||
@ -35,7 +35,7 @@ for.end:
|
||||
; CHECK: [[BODYLBL:LBB.+]]:
|
||||
; CHECK: vfmsubadd231pd %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
|
||||
; CHECK: [[INCLBL:LBB.+]]:
|
||||
; CHECK: incl [[INDREG:%[a-z0-9]+]]
|
||||
; CHECK: addl $1, [[INDREG:%[a-z0-9]+]]
|
||||
; CHECK: cmpl {{%.+}}, [[INDREG]]
|
||||
; CHECK: jl [[BODYLBL]]
|
||||
define <4 x double> @fmsubaddpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
|
||||
@ -64,7 +64,7 @@ for.end:
|
||||
; CHECK: [[BODYLBL:LBB.+]]:
|
||||
; CHECK: vfmadd231pd %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
|
||||
; CHECK: [[INCLBL:LBB.+]]:
|
||||
; CHECK: incl [[INDREG:%[a-z0-9]+]]
|
||||
; CHECK: addl $1, [[INDREG:%[a-z0-9]+]]
|
||||
; CHECK: cmpl {{%.+}}, [[INDREG]]
|
||||
; CHECK: jl [[BODYLBL]]
|
||||
define <4 x double> @fmaddpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
|
||||
@ -93,7 +93,7 @@ for.end:
|
||||
; CHECK: [[BODYLBL:LBB.+]]:
|
||||
; CHECK: vfmsub231pd %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
|
||||
; CHECK: [[INCLBL:LBB.+]]:
|
||||
; CHECK: incl [[INDREG:%[a-z0-9]+]]
|
||||
; CHECK: addl $1, [[INDREG:%[a-z0-9]+]]
|
||||
; CHECK: cmpl {{%.+}}, [[INDREG]]
|
||||
; CHECK: jl [[BODYLBL]]
|
||||
define <4 x double> @fmsubpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
|
||||
@ -128,7 +128,7 @@ declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4
|
||||
; CHECK: [[BODYLBL:LBB.+]]:
|
||||
; CHECK: vfmaddsub231ps %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
|
||||
; CHECK: [[INCLBL:LBB.+]]:
|
||||
; CHECK: incl [[INDREG:%[a-z0-9]+]]
|
||||
; CHECK: addl $1, [[INDREG:%[a-z0-9]+]]
|
||||
; CHECK: cmpl {{%.+}}, [[INDREG]]
|
||||
; CHECK: jl [[BODYLBL]]
|
||||
define <8 x float> @fmaddsubps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
|
||||
@ -157,7 +157,7 @@ for.end:
|
||||
; CHECK: [[BODYLBL:LBB.+]]:
|
||||
; CHECK: vfmsubadd231ps %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
|
||||
; CHECK: [[INCLBL:LBB.+]]:
|
||||
; CHECK: incl [[INDREG:%[a-z0-9]+]]
|
||||
; CHECK: addl $1, [[INDREG:%[a-z0-9]+]]
|
||||
; CHECK: cmpl {{%.+}}, [[INDREG]]
|
||||
; CHECK: jl [[BODYLBL]]
|
||||
define <8 x float> @fmsubaddps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
|
||||
@ -186,7 +186,7 @@ for.end:
|
||||
; CHECK: [[BODYLBL:LBB.+]]:
|
||||
; CHECK: vfmadd231ps %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
|
||||
; CHECK: [[INCLBL:LBB.+]]:
|
||||
; CHECK: incl [[INDREG:%[a-z0-9]+]]
|
||||
; CHECK: addl $1, [[INDREG:%[a-z0-9]+]]
|
||||
; CHECK: cmpl {{%.+}}, [[INDREG]]
|
||||
; CHECK: jl [[BODYLBL]]
|
||||
define <8 x float> @fmaddps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
|
||||
@ -215,7 +215,7 @@ for.end:
|
||||
; CHECK: [[BODYLBL:LBB.+]]:
|
||||
; CHECK: vfmsub231ps %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
|
||||
; CHECK: [[INCLBL:LBB.+]]:
|
||||
; CHECK: incl [[INDREG:%[a-z0-9]+]]
|
||||
; CHECK: addl $1, [[INDREG:%[a-z0-9]+]]
|
||||
; CHECK: cmpl {{%.+}}, [[INDREG]]
|
||||
; CHECK: jl [[BODYLBL]]
|
||||
define <8 x float> @fmsubps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
|
||||
|
80
test/CodeGen/X86/slow-incdec.ll
Normal file
80
test/CodeGen/X86/slow-incdec.ll
Normal file
@ -0,0 +1,80 @@
|
||||
; RUN: llc -mtriple=i386-unknown-linux-gnu -mattr=-slow-incdec < %s | FileCheck -check-prefix=INCDEC %s
|
||||
; RUN: llc -mtriple=i386-unknown-linux-gnu -mattr=+slow-incdec < %s | FileCheck -check-prefix=ADD %s
|
||||
|
||||
; check -mattr=-slow-incdec
|
||||
; INCDEC-NOT: addl $-1
|
||||
; INCDEC: dec
|
||||
; INCDEC-NOT: addl $1
|
||||
; INCDEC: inc
|
||||
|
||||
; check -mattr=+slow-incdec
|
||||
; ADD: addl $-1
|
||||
; ADD-NOT: dec
|
||||
; ADD: addl $1
|
||||
; ADD-NOT: inc
|
||||
|
||||
; Function Attrs: nounwind readonly
|
||||
define i32 @slow_1(i32* nocapture readonly %a, i32 %s) #0 {
|
||||
entry:
|
||||
%cmp5 = icmp eq i32 %s, 0
|
||||
br i1 %cmp5, label %for.end, label %for.body.preheader
|
||||
|
||||
for.body.preheader: ; preds = %entry
|
||||
br label %for.body
|
||||
|
||||
for.cond: ; preds = %for.body
|
||||
%cmp = icmp eq i32 %dec, 0
|
||||
br i1 %cmp, label %for.end.loopexit, label %for.body
|
||||
|
||||
for.body: ; preds = %for.body.preheader, %for.cond
|
||||
%i.06 = phi i32 [ %dec, %for.cond ], [ %s, %for.body.preheader ]
|
||||
%arrayidx = getelementptr inbounds i32* %a, i32 %i.06
|
||||
%0 = load i32* %arrayidx, align 4, !tbaa !1
|
||||
%cmp1 = icmp eq i32 %0, 0
|
||||
;
|
||||
%dec = add nsw i32 %i.06, -1
|
||||
br i1 %cmp1, label %for.end.loopexit, label %for.cond
|
||||
|
||||
for.end.loopexit: ; preds = %for.cond, %for.body
|
||||
%i.0.lcssa.ph = phi i32 [ 0, %for.cond ], [ %i.06, %for.body ]
|
||||
br label %for.end
|
||||
|
||||
for.end: ; preds = %for.end.loopexit, %entry
|
||||
%i.0.lcssa = phi i32 [ 0, %entry ], [ %i.0.lcssa.ph, %for.end.loopexit ]
|
||||
ret i32 %i.0.lcssa
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readonly
|
||||
define i32 @slow_2(i32* nocapture readonly %a, i32 %s) #0 {
|
||||
entry:
|
||||
%cmp5 = icmp eq i32 %s, 0
|
||||
br i1 %cmp5, label %for.end, label %for.body.preheader
|
||||
|
||||
for.body.preheader: ; preds = %entry
|
||||
br label %for.body
|
||||
|
||||
for.cond: ; preds = %for.body
|
||||
%cmp = icmp eq i32 %inc, 0
|
||||
br i1 %cmp, label %for.end.loopexit, label %for.body
|
||||
|
||||
for.body: ; preds = %for.body.preheader, %for.cond
|
||||
%i.06 = phi i32 [ %inc, %for.cond ], [ %s, %for.body.preheader ]
|
||||
%arrayidx = getelementptr inbounds i32* %a, i32 %i.06
|
||||
%0 = load i32* %arrayidx, align 4, !tbaa !1
|
||||
%cmp1 = icmp eq i32 %0, 0
|
||||
%inc = add nsw i32 %i.06, 1
|
||||
br i1 %cmp1, label %for.end.loopexit, label %for.cond
|
||||
|
||||
for.end.loopexit: ; preds = %for.cond, %for.body
|
||||
%i.0.lcssa.ph = phi i32 [ 0, %for.cond ], [ %i.06, %for.body ]
|
||||
br label %for.end
|
||||
|
||||
for.end: ; preds = %for.end.loopexit, %entry
|
||||
%i.0.lcssa = phi i32 [ 0, %entry ], [ %i.0.lcssa.ph, %for.end.loopexit ]
|
||||
ret i32 %i.0.lcssa
|
||||
}
|
||||
|
||||
!1 = metadata !{metadata !2, metadata !2, i64 0}
|
||||
!2 = metadata !{metadata !"int", metadata !3, i64 0}
|
||||
!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0}
|
||||
!4 = metadata !{metadata !"Simple C/C++ TBAA"}
|
Loading…
Reference in New Issue
Block a user