[X86] Use ADD/SUB instead of INC/DEC for Haswell and Broadwell CPUs

Differential Revision: http://reviews.llvm.org/D5934



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@222141 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Alexey Volkov 2014-11-17 16:17:51 +00:00
parent ae3738f4a7
commit 19e8fe05dc
3 changed files with 91 additions and 10 deletions

View File

@ -280,7 +280,7 @@ def : ProcessorModel<"core-avx2", HaswellModel,
FeaturePOPCNT, FeatureAES, FeaturePCLMUL, FeatureRDRAND,
FeatureF16C, FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT,
FeatureBMI, FeatureBMI2, FeatureFMA, FeatureRTM,
FeatureHLE]>;
FeatureHLE, FeatureSlowIncDec]>;
// Broadwell
def : ProcessorModel<"broadwell", HaswellModel,
@ -288,7 +288,8 @@ def : ProcessorModel<"broadwell", HaswellModel,
FeaturePOPCNT, FeatureAES, FeaturePCLMUL, FeatureRDRAND,
FeatureF16C, FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT,
FeatureBMI, FeatureBMI2, FeatureFMA, FeatureRTM,
FeatureHLE, FeatureADX, FeatureRDSEED, FeatureSMAP]>;
FeatureHLE, FeatureADX, FeatureRDSEED, FeatureSMAP,
FeatureSlowIncDec]>;
// KNL
// FIXME: define KNL model
def : ProcessorModel<"knl", HaswellModel,

View File

@ -6,7 +6,7 @@ target triple = "x86_64-apple-macosx10.10.0"
; CHECK: [[BODYLBL:LBB.+]]:
; CHECK: vfmaddsub231pd %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
; CHECK: [[INCLBL:LBB.+]]:
; CHECK: incl [[INDREG:%[a-z0-9]+]]
; CHECK: addl $1, [[INDREG:%[a-z0-9]+]]
; CHECK: cmpl {{%.+}}, [[INDREG]]
; CHECK: jl [[BODYLBL]]
define <4 x double> @fmaddsubpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
@ -35,7 +35,7 @@ for.end:
; CHECK: [[BODYLBL:LBB.+]]:
; CHECK: vfmsubadd231pd %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
; CHECK: [[INCLBL:LBB.+]]:
; CHECK: incl [[INDREG:%[a-z0-9]+]]
; CHECK: addl $1, [[INDREG:%[a-z0-9]+]]
; CHECK: cmpl {{%.+}}, [[INDREG]]
; CHECK: jl [[BODYLBL]]
define <4 x double> @fmsubaddpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
@ -64,7 +64,7 @@ for.end:
; CHECK: [[BODYLBL:LBB.+]]:
; CHECK: vfmadd231pd %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
; CHECK: [[INCLBL:LBB.+]]:
; CHECK: incl [[INDREG:%[a-z0-9]+]]
; CHECK: addl $1, [[INDREG:%[a-z0-9]+]]
; CHECK: cmpl {{%.+}}, [[INDREG]]
; CHECK: jl [[BODYLBL]]
define <4 x double> @fmaddpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
@ -93,7 +93,7 @@ for.end:
; CHECK: [[BODYLBL:LBB.+]]:
; CHECK: vfmsub231pd %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
; CHECK: [[INCLBL:LBB.+]]:
; CHECK: incl [[INDREG:%[a-z0-9]+]]
; CHECK: addl $1, [[INDREG:%[a-z0-9]+]]
; CHECK: cmpl {{%.+}}, [[INDREG]]
; CHECK: jl [[BODYLBL]]
define <4 x double> @fmsubpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
@ -128,7 +128,7 @@ declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4
; CHECK: [[BODYLBL:LBB.+]]:
; CHECK: vfmaddsub231ps %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
; CHECK: [[INCLBL:LBB.+]]:
; CHECK: incl [[INDREG:%[a-z0-9]+]]
; CHECK: addl $1, [[INDREG:%[a-z0-9]+]]
; CHECK: cmpl {{%.+}}, [[INDREG]]
; CHECK: jl [[BODYLBL]]
define <8 x float> @fmaddsubps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
@ -157,7 +157,7 @@ for.end:
; CHECK: [[BODYLBL:LBB.+]]:
; CHECK: vfmsubadd231ps %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
; CHECK: [[INCLBL:LBB.+]]:
; CHECK: incl [[INDREG:%[a-z0-9]+]]
; CHECK: addl $1, [[INDREG:%[a-z0-9]+]]
; CHECK: cmpl {{%.+}}, [[INDREG]]
; CHECK: jl [[BODYLBL]]
define <8 x float> @fmsubaddps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
@ -186,7 +186,7 @@ for.end:
; CHECK: [[BODYLBL:LBB.+]]:
; CHECK: vfmadd231ps %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
; CHECK: [[INCLBL:LBB.+]]:
; CHECK: incl [[INDREG:%[a-z0-9]+]]
; CHECK: addl $1, [[INDREG:%[a-z0-9]+]]
; CHECK: cmpl {{%.+}}, [[INDREG]]
; CHECK: jl [[BODYLBL]]
define <8 x float> @fmaddps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
@ -215,7 +215,7 @@ for.end:
; CHECK: [[BODYLBL:LBB.+]]:
; CHECK: vfmsub231ps %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
; CHECK: [[INCLBL:LBB.+]]:
; CHECK: incl [[INDREG:%[a-z0-9]+]]
; CHECK: addl $1, [[INDREG:%[a-z0-9]+]]
; CHECK: cmpl {{%.+}}, [[INDREG]]
; CHECK: jl [[BODYLBL]]
define <8 x float> @fmsubps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {

View File

@ -0,0 +1,80 @@
; RUN: llc -mtriple=i386-unknown-linux-gnu -mattr=-slow-incdec < %s | FileCheck -check-prefix=INCDEC %s
; RUN: llc -mtriple=i386-unknown-linux-gnu -mattr=+slow-incdec < %s | FileCheck -check-prefix=ADD %s
; check -mattr=-slow-incdec
; INCDEC-NOT: addl $-1
; INCDEC: dec
; INCDEC-NOT: addl $1
; INCDEC: inc
; check -mattr=+slow-incdec
; ADD: addl $-1
; ADD-NOT: dec
; ADD: addl $1
; ADD-NOT: inc
; Function Attrs: nounwind readonly
define i32 @slow_1(i32* nocapture readonly %a, i32 %s) #0 {
entry:
%cmp5 = icmp eq i32 %s, 0
br i1 %cmp5, label %for.end, label %for.body.preheader
for.body.preheader: ; preds = %entry
br label %for.body
for.cond: ; preds = %for.body
%cmp = icmp eq i32 %dec, 0
br i1 %cmp, label %for.end.loopexit, label %for.body
for.body: ; preds = %for.body.preheader, %for.cond
%i.06 = phi i32 [ %dec, %for.cond ], [ %s, %for.body.preheader ]
%arrayidx = getelementptr inbounds i32* %a, i32 %i.06
%0 = load i32* %arrayidx, align 4, !tbaa !1
%cmp1 = icmp eq i32 %0, 0
;
%dec = add nsw i32 %i.06, -1
br i1 %cmp1, label %for.end.loopexit, label %for.cond
for.end.loopexit: ; preds = %for.cond, %for.body
%i.0.lcssa.ph = phi i32 [ 0, %for.cond ], [ %i.06, %for.body ]
br label %for.end
for.end: ; preds = %for.end.loopexit, %entry
%i.0.lcssa = phi i32 [ 0, %entry ], [ %i.0.lcssa.ph, %for.end.loopexit ]
ret i32 %i.0.lcssa
}
; Function Attrs: nounwind readonly
define i32 @slow_2(i32* nocapture readonly %a, i32 %s) #0 {
entry:
%cmp5 = icmp eq i32 %s, 0
br i1 %cmp5, label %for.end, label %for.body.preheader
for.body.preheader: ; preds = %entry
br label %for.body
for.cond: ; preds = %for.body
%cmp = icmp eq i32 %inc, 0
br i1 %cmp, label %for.end.loopexit, label %for.body
for.body: ; preds = %for.body.preheader, %for.cond
%i.06 = phi i32 [ %inc, %for.cond ], [ %s, %for.body.preheader ]
%arrayidx = getelementptr inbounds i32* %a, i32 %i.06
%0 = load i32* %arrayidx, align 4, !tbaa !1
%cmp1 = icmp eq i32 %0, 0
%inc = add nsw i32 %i.06, 1
br i1 %cmp1, label %for.end.loopexit, label %for.cond
for.end.loopexit: ; preds = %for.cond, %for.body
%i.0.lcssa.ph = phi i32 [ 0, %for.cond ], [ %i.06, %for.body ]
br label %for.end
for.end: ; preds = %for.end.loopexit, %entry
%i.0.lcssa = phi i32 [ 0, %entry ], [ %i.0.lcssa.ph, %for.end.loopexit ]
ret i32 %i.0.lcssa
}
!1 = metadata !{metadata !2, metadata !2, i64 0}
!2 = metadata !{metadata !"int", metadata !3, i64 0}
!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0}
!4 = metadata !{metadata !"Simple C/C++ TBAA"}