mirror of
https://github.com/RPCSX/llvm.git
synced 2025-02-28 00:50:37 +00:00
[X86][InstCombine] Add support for scalar FMA intrinsics to SimplifyDemandedVectorElts.
This teaches SimplifyDemandedElts that the FMA can be removed if the lower element isn't used. It also teaches it that if upper elements of the first operand aren't used then we can simplify them. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@289377 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
70493cff6f
commit
98435b8bdf
@ -981,6 +981,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
|
||||
|
||||
bool MadeChange = false;
|
||||
APInt UndefElts2(VWidth, 0);
|
||||
APInt UndefElts3(VWidth, 0);
|
||||
Value *TmpV;
|
||||
switch (I->getOpcode()) {
|
||||
default: break;
|
||||
@ -1298,6 +1299,34 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
|
||||
UndefElts &= UndefElts2;
|
||||
break;
|
||||
|
||||
case Intrinsic::x86_fma_vfmadd_ss:
|
||||
case Intrinsic::x86_fma_vfmsub_ss:
|
||||
case Intrinsic::x86_fma_vfnmadd_ss:
|
||||
case Intrinsic::x86_fma_vfnmsub_ss:
|
||||
case Intrinsic::x86_fma_vfmadd_sd:
|
||||
case Intrinsic::x86_fma_vfmsub_sd:
|
||||
case Intrinsic::x86_fma_vfnmadd_sd:
|
||||
case Intrinsic::x86_fma_vfnmsub_sd:
|
||||
TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
|
||||
UndefElts, Depth + 1);
|
||||
if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
|
||||
TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
|
||||
UndefElts2, Depth + 1);
|
||||
if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
|
||||
TmpV = SimplifyDemandedVectorElts(II->getArgOperand(2), DemandedElts,
|
||||
UndefElts3, Depth + 1);
|
||||
if (TmpV) { II->setArgOperand(2, TmpV); MadeChange = true; }
|
||||
|
||||
// If lowest element of a scalar op isn't used then use Arg0.
|
||||
if (DemandedElts.getLoBits(1) != 1)
|
||||
return II->getArgOperand(0);
|
||||
|
||||
// Output elements are undefined if all three are undefined. Consider
|
||||
// things like undef&0. The result is known zero, not undef.
|
||||
UndefElts &= UndefElts2;
|
||||
UndefElts &= UndefElts3;
|
||||
break;
|
||||
|
||||
// SSE4A instructions leave the upper 64-bits of the 128-bit result
|
||||
// in an undefined state.
|
||||
case Intrinsic::x86_sse4a_extrq:
|
||||
|
@ -19,6 +19,32 @@ define <4 x float> @test_vfmadd_ss(<4 x float> %a, <4 x float> %b, <4 x float> %
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define float @test_vfmadd_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
|
||||
; CHECK-LABEL: @test_vfmadd_ss_0(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c)
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
|
||||
; CHECK-NEXT: ret float [[TMP2]]
|
||||
;
|
||||
%1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
|
||||
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
|
||||
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
|
||||
%4 = tail call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %3, <4 x float> %b, <4 x float> %c)
|
||||
%5 = extractelement <4 x float> %4, i32 0
|
||||
ret float %5
|
||||
}
|
||||
|
||||
define float @test_vfmadd_ss_1(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
|
||||
; CHECK-LABEL: @test_vfmadd_ss_1(
|
||||
; CHECK-NEXT: ret float 1.000000e+00
|
||||
;
|
||||
%1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
|
||||
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
|
||||
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
|
||||
%4 = tail call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %3, <4 x float> %b, <4 x float> %c)
|
||||
%5 = extractelement <4 x float> %4, i32 1
|
||||
ret float %5
|
||||
}
|
||||
|
||||
declare <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>)
|
||||
|
||||
define <2 x double> @test_vfmadd_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
|
||||
@ -32,6 +58,28 @@ define <2 x double> @test_vfmadd_sd(<2 x double> %a, <2 x double> %b, <2 x doubl
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
define double @test_vfmadd_sd_0(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
|
||||
; CHECK-LABEL: @test_vfmadd_sd_0(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c)
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
|
||||
; CHECK-NEXT: ret double [[TMP2]]
|
||||
;
|
||||
%1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
|
||||
%2 = tail call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %1, <2 x double> %b, <2 x double> %c)
|
||||
%3 = extractelement <2 x double> %2, i32 0
|
||||
ret double %3
|
||||
}
|
||||
|
||||
define double @test_vfmadd_sd_1(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
|
||||
; CHECK-LABEL: @test_vfmadd_sd_1(
|
||||
; CHECK-NEXT: ret double 1.000000e+00
|
||||
;
|
||||
%1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
|
||||
%2 = tail call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %1, <2 x double> %b, <2 x double> %c)
|
||||
%3 = extractelement <2 x double> %2, i32 1
|
||||
ret double %3
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>)
|
||||
|
||||
define <4 x float> @test_vfmsub_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
|
||||
@ -49,6 +97,32 @@ define <4 x float> @test_vfmsub_ss(<4 x float> %a, <4 x float> %b, <4 x float> %
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define float @test_vfmsub_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
|
||||
; CHECK-LABEL: @test_vfmsub_ss_0(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c)
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
|
||||
; CHECK-NEXT: ret float [[TMP2]]
|
||||
;
|
||||
%1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
|
||||
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
|
||||
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
|
||||
%4 = tail call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %3, <4 x float> %b, <4 x float> %c)
|
||||
%5 = extractelement <4 x float> %4, i32 0
|
||||
ret float %5
|
||||
}
|
||||
|
||||
define float @test_vfmsub_ss_1(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
|
||||
; CHECK-LABEL: @test_vfmsub_ss_1(
|
||||
; CHECK-NEXT: ret float 1.000000e+00
|
||||
;
|
||||
%1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
|
||||
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
|
||||
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
|
||||
%4 = tail call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %3, <4 x float> %b, <4 x float> %c)
|
||||
%5 = extractelement <4 x float> %4, i32 1
|
||||
ret float %5
|
||||
}
|
||||
|
||||
declare <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>)
|
||||
|
||||
define <2 x double> @test_vfmsub_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
|
||||
@ -62,6 +136,28 @@ define <2 x double> @test_vfmsub_sd(<2 x double> %a, <2 x double> %b, <2 x doubl
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
define double @test_vfmsub_sd_0(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
|
||||
; CHECK-LABEL: @test_vfmsub_sd_0(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c)
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
|
||||
; CHECK-NEXT: ret double [[TMP2]]
|
||||
;
|
||||
%1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
|
||||
%2 = tail call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %1, <2 x double> %b, <2 x double> %c)
|
||||
%3 = extractelement <2 x double> %2, i32 0
|
||||
ret double %3
|
||||
}
|
||||
|
||||
define double @test_vfmsub_sd_1(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
|
||||
; CHECK-LABEL: @test_vfmsub_sd_1(
|
||||
; CHECK-NEXT: ret double 1.000000e+00
|
||||
;
|
||||
%1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
|
||||
%2 = tail call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %1, <2 x double> %b, <2 x double> %c)
|
||||
%3 = extractelement <2 x double> %2, i32 1
|
||||
ret double %3
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>)
|
||||
|
||||
define <4 x float> @test_vfnmadd_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
|
||||
@ -79,6 +175,32 @@ define <4 x float> @test_vfnmadd_ss(<4 x float> %a, <4 x float> %b, <4 x float>
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define float @test_vfnmadd_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
|
||||
; CHECK-LABEL: @test_vfnmadd_ss_0(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c)
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
|
||||
; CHECK-NEXT: ret float [[TMP2]]
|
||||
;
|
||||
%1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
|
||||
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
|
||||
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
|
||||
%4 = tail call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %3, <4 x float> %b, <4 x float> %c)
|
||||
%5 = extractelement <4 x float> %4, i32 0
|
||||
ret float %5
|
||||
}
|
||||
|
||||
define float @test_vfnmadd_ss_1(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
|
||||
; CHECK-LABEL: @test_vfnmadd_ss_1(
|
||||
; CHECK-NEXT: ret float 1.000000e+00
|
||||
;
|
||||
%1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
|
||||
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
|
||||
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
|
||||
%4 = tail call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %3, <4 x float> %b, <4 x float> %c)
|
||||
%5 = extractelement <4 x float> %4, i32 1
|
||||
ret float %5
|
||||
}
|
||||
|
||||
declare <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>)
|
||||
|
||||
define <2 x double> @test_vfnmadd_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
|
||||
@ -92,6 +214,28 @@ define <2 x double> @test_vfnmadd_sd(<2 x double> %a, <2 x double> %b, <2 x doub
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
define double @test_vfnmadd_sd_0(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
|
||||
; CHECK-LABEL: @test_vfnmadd_sd_0(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c)
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
|
||||
; CHECK-NEXT: ret double [[TMP2]]
|
||||
;
|
||||
%1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
|
||||
%2 = tail call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %1, <2 x double> %b, <2 x double> %c)
|
||||
%3 = extractelement <2 x double> %2, i32 0
|
||||
ret double %3
|
||||
}
|
||||
|
||||
define double @test_vfnmadd_sd_1(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
|
||||
; CHECK-LABEL: @test_vfnmadd_sd_1(
|
||||
; CHECK-NEXT: ret double 1.000000e+00
|
||||
;
|
||||
%1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
|
||||
%2 = tail call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %1, <2 x double> %b, <2 x double> %c)
|
||||
%3 = extractelement <2 x double> %2, i32 1
|
||||
ret double %3
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>)
|
||||
|
||||
define <4 x float> @test_vfnmsub_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
|
||||
@ -109,6 +253,32 @@ define <4 x float> @test_vfnmsub_ss(<4 x float> %a, <4 x float> %b, <4 x float>
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define float @test_vfnmsub_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
|
||||
; CHECK-LABEL: @test_vfnmsub_ss_0(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c)
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
|
||||
; CHECK-NEXT: ret float [[TMP2]]
|
||||
;
|
||||
%1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
|
||||
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
|
||||
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
|
||||
%4 = tail call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %3, <4 x float> %b, <4 x float> %c)
|
||||
%5 = extractelement <4 x float> %4, i32 0
|
||||
ret float %5
|
||||
}
|
||||
|
||||
define float @test_vfnmsub_ss_1(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
|
||||
; CHECK-LABEL: @test_vfnmsub_ss_1(
|
||||
; CHECK-NEXT: ret float 1.000000e+00
|
||||
;
|
||||
%1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
|
||||
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
|
||||
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
|
||||
%4 = tail call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %3, <4 x float> %b, <4 x float> %c)
|
||||
%5 = extractelement <4 x float> %4, i32 1
|
||||
ret float %5
|
||||
}
|
||||
|
||||
declare <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>)
|
||||
|
||||
define <2 x double> @test_vfnmsub_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
|
||||
@ -121,3 +291,25 @@ define <2 x double> @test_vfnmsub_sd(<2 x double> %a, <2 x double> %b, <2 x doub
|
||||
%res = tail call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a, <2 x double> %1, <2 x double> %2)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
define double @test_vfnmsub_sd_0(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
|
||||
; CHECK-LABEL: @test_vfnmsub_sd_0(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c)
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
|
||||
; CHECK-NEXT: ret double [[TMP2]]
|
||||
;
|
||||
%1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
|
||||
%2 = tail call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %1, <2 x double> %b, <2 x double> %c)
|
||||
%3 = extractelement <2 x double> %2, i32 0
|
||||
ret double %3
|
||||
}
|
||||
|
||||
define double @test_vfnmsub_sd_1(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
|
||||
; CHECK-LABEL: @test_vfnmsub_sd_1(
|
||||
; CHECK-NEXT: ret double 1.000000e+00
|
||||
;
|
||||
%1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
|
||||
%2 = tail call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %1, <2 x double> %b, <2 x double> %c)
|
||||
%3 = extractelement <2 x double> %2, i32 1
|
||||
ret double %3
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user