mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-11 13:37:07 +00:00
[X86] Remove the last of the 'x86.fma.' intrinsics and autoupgrade them to 'llvm.fma'. Add upgrade tests for all.
Still need to remove the AVX512 masked versions. llvm-svn: 336383
This commit is contained in:
parent
a5d33b74f3
commit
1b56628f9e
@ -1912,23 +1912,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_fma_vfmaddsub_ps : // FIXME: remove this intrinsic.
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfmaddsub_pd : // FIXME: remove this intrinsic.
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfmaddsub_ps_256 : // FIXME: remove this intrinsic.
|
||||
Intrinsic<[llvm_v8f32_ty],
|
||||
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfmaddsub_pd_256 : // FIXME: remove this intrinsic.
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vfmadd_pd_128 : // FIXME: remove this intrinsic.
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
|
||||
|
@ -76,6 +76,7 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
|
||||
Name=="ssse3.pabs.d.128" || // Added in 6.0
|
||||
Name.startswith("fma.vfmadd.") || // Added in 7.0
|
||||
Name.startswith("fma.vfmsub.") || // Added in 7.0
|
||||
Name.startswith("fma.vfmaddsub.") || // Added in 7.0
|
||||
Name.startswith("fma.vfmsubadd.") || // Added in 7.0
|
||||
Name.startswith("fma.vfnmadd.") || // Added in 7.0
|
||||
Name.startswith("fma.vfnmsub.") || // Added in 7.0
|
||||
@ -2778,25 +2779,30 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
||||
if (IsScalar)
|
||||
Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep,
|
||||
(uint64_t)0);
|
||||
} else if (IsX86 && Name.startswith("fma.vfmsubadd.p")) {
|
||||
// Handle FSUBADD.
|
||||
unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
|
||||
unsigned EltWidth = CI->getType()->getScalarSizeInBits();
|
||||
Intrinsic::ID IID;
|
||||
if (VecWidth == 128 && EltWidth == 32)
|
||||
IID = Intrinsic::x86_fma_vfmaddsub_ps;
|
||||
else if (VecWidth == 128 && EltWidth == 64)
|
||||
IID = Intrinsic::x86_fma_vfmaddsub_pd;
|
||||
else if (VecWidth == 256 && EltWidth == 32)
|
||||
IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
|
||||
else if (VecWidth == 256 && EltWidth == 64)
|
||||
IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
|
||||
else
|
||||
llvm_unreachable("Unexpected intrinsic");
|
||||
Value *Arg2 = Builder.CreateFNeg(CI->getArgOperand(2));
|
||||
Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1), Arg2 };
|
||||
Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
|
||||
Ops);
|
||||
} else if (IsX86 && (Name.startswith("fma.vfmaddsub.p") ||
|
||||
Name.startswith("fma.vfmsubadd.p"))) {
|
||||
bool IsSubAdd = Name[7] == 's';
|
||||
int NumElts = CI->getType()->getVectorNumElements();
|
||||
|
||||
|
||||
|
||||
Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
|
||||
CI->getArgOperand(2) };
|
||||
|
||||
Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
|
||||
Ops[0]->getType());
|
||||
Value *Odd = Builder.CreateCall(FMA, Ops);
|
||||
Ops[2] = Builder.CreateFNeg(Ops[2]);
|
||||
Value *Even = Builder.CreateCall(FMA, Ops);
|
||||
|
||||
if (IsSubAdd)
|
||||
std::swap(Even, Odd);
|
||||
|
||||
SmallVector<uint32_t, 32> Idxs(NumElts);
|
||||
for (int i = 0; i != NumElts; ++i)
|
||||
Idxs[i] = i + (i % 2) * NumElts;
|
||||
|
||||
Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
|
||||
} else if (IsX86 && (Name.startswith("avx512.mask.pternlog.") ||
|
||||
Name.startswith("avx512.maskz.pternlog."))) {
|
||||
bool ZeroMask = Name[11] == 'z';
|
||||
|
@ -1249,10 +1249,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86_INTRINSIC_DATA(avx512_vpshrd_w_512, INTR_TYPE_3OP_IMM8, X86ISD::VSHRD, 0),
|
||||
X86_INTRINSIC_DATA(bmi_bextr_32, INTR_TYPE_2OP, X86ISD::BEXTR, 0),
|
||||
X86_INTRINSIC_DATA(bmi_bextr_64, INTR_TYPE_2OP, X86ISD::BEXTR, 0),
|
||||
X86_INTRINSIC_DATA(fma_vfmaddsub_pd, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
|
||||
X86_INTRINSIC_DATA(fma_vfmaddsub_pd_256, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
|
||||
X86_INTRINSIC_DATA(fma_vfmaddsub_ps, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
|
||||
X86_INTRINSIC_DATA(fma_vfmaddsub_ps_256, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
|
||||
X86_INTRINSIC_DATA(fma4_vfmadd_sd, INTR_TYPE_3OP, X86ISD::FMADD4S, 0),
|
||||
X86_INTRINSIC_DATA(fma4_vfmadd_ss, INTR_TYPE_3OP, X86ISD::FMADD4S, 0),
|
||||
X86_INTRINSIC_DATA(sse_cmp_ps, INTR_TYPE_3OP, X86ISD::CMPP, 0),
|
||||
|
1036
test/CodeGen/X86/fma-intrinsics-x86-upgrade.ll
Normal file
1036
test/CodeGen/X86/fma-intrinsics-x86-upgrade.ll
Normal file
File diff suppressed because it is too large
Load Diff
@ -25,8 +25,12 @@ define <4 x float> @test_x86_fma_vfmadd_ss(<4 x float> %a0, <4 x float> %a1, <4
|
||||
; CHECK-FMA-WIN-NEXT: vfmadd132ss (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x99,0x02]
|
||||
; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) + xmm1
|
||||
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
|
||||
ret <4 x float> %res
|
||||
%1 = extractelement <4 x float> %a0, i64 0
|
||||
%2 = extractelement <4 x float> %a1, i64 0
|
||||
%3 = extractelement <4 x float> %a2, i64 0
|
||||
%4 = call float @llvm.fma.f32(float %1, float %2, float %3)
|
||||
%5 = insertelement <4 x float> %a0, float %4, i64 0
|
||||
ret <4 x float> %5
|
||||
}
|
||||
|
||||
define <4 x float> @test_x86_fma_vfmadd_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
|
||||
@ -52,10 +56,13 @@ define <4 x float> @test_x86_fma_vfmadd_bac_ss(<4 x float> %a0, <4 x float> %a1,
|
||||
; CHECK-FMA-WIN-NEXT: vfmadd132ss (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x99,0x01]
|
||||
; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) + xmm1
|
||||
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2)
|
||||
ret <4 x float> %res
|
||||
%1 = extractelement <4 x float> %a1, i64 0
|
||||
%2 = extractelement <4 x float> %a0, i64 0
|
||||
%3 = extractelement <4 x float> %a2, i64 0
|
||||
%4 = call float @llvm.fma.f32(float %1, float %2, float %3)
|
||||
%5 = insertelement <4 x float> %a1, float %4, i64 0
|
||||
ret <4 x float> %5
|
||||
}
|
||||
declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>)
|
||||
|
||||
define <2 x double> @test_x86_fma_vfmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfmadd_sd:
|
||||
@ -78,8 +85,12 @@ define <2 x double> @test_x86_fma_vfmadd_sd(<2 x double> %a0, <2 x double> %a1,
|
||||
; CHECK-FMA-WIN-NEXT: vfmadd132sd (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x99,0x02]
|
||||
; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) + xmm1
|
||||
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
|
||||
ret <2 x double> %res
|
||||
%1 = extractelement <2 x double> %a0, i64 0
|
||||
%2 = extractelement <2 x double> %a1, i64 0
|
||||
%3 = extractelement <2 x double> %a2, i64 0
|
||||
%4 = call double @llvm.fma.f64(double %1, double %2, double %3)
|
||||
%5 = insertelement <2 x double> %a0, double %4, i64 0
|
||||
ret <2 x double> %5
|
||||
}
|
||||
|
||||
define <2 x double> @test_x86_fma_vfmadd_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
|
||||
@ -105,10 +116,13 @@ define <2 x double> @test_x86_fma_vfmadd_bac_sd(<2 x double> %a0, <2 x double> %
|
||||
; CHECK-FMA-WIN-NEXT: vfmadd132sd (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x99,0x01]
|
||||
; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) + xmm1
|
||||
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2)
|
||||
ret <2 x double> %res
|
||||
%1 = extractelement <2 x double> %a1, i64 0
|
||||
%2 = extractelement <2 x double> %a0, i64 0
|
||||
%3 = extractelement <2 x double> %a2, i64 0
|
||||
%4 = call double @llvm.fma.f64(double %1, double %2, double %3)
|
||||
%5 = insertelement <2 x double> %a1, double %4, i64 0
|
||||
ret <2 x double> %5
|
||||
}
|
||||
declare <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>)
|
||||
|
||||
define <4 x float> @test_x86_fma_vfmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfmadd_ps:
|
||||
@ -130,10 +144,9 @@ define <4 x float> @test_x86_fma_vfmadd_ps(<4 x float> %a0, <4 x float> %a1, <4
|
||||
; CHECK-FMA-WIN-NEXT: vfmadd213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xa8,0x00]
|
||||
; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) + mem
|
||||
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
|
||||
ret <4 x float> %res
|
||||
%1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
|
||||
ret <4 x float> %1
|
||||
}
|
||||
declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>)
|
||||
|
||||
define <2 x double> @test_x86_fma_vfmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfmadd_pd:
|
||||
@ -155,10 +168,9 @@ define <2 x double> @test_x86_fma_vfmadd_pd(<2 x double> %a0, <2 x double> %a1,
|
||||
; CHECK-FMA-WIN-NEXT: vfmadd213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xa8,0x00]
|
||||
; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) + mem
|
||||
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
|
||||
ret <2 x double> %res
|
||||
%1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
|
||||
ret <2 x double> %1
|
||||
}
|
||||
declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>)
|
||||
|
||||
define <8 x float> @test_x86_fma_vfmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfmadd_ps_256:
|
||||
@ -180,10 +192,9 @@ define <8 x float> @test_x86_fma_vfmadd_ps_256(<8 x float> %a0, <8 x float> %a1,
|
||||
; CHECK-FMA-WIN-NEXT: vfmadd213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xa8,0x00]
|
||||
; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) + mem
|
||||
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
|
||||
ret <8 x float> %res
|
||||
%1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
|
||||
ret <8 x float> %1
|
||||
}
|
||||
declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
|
||||
|
||||
define <4 x double> @test_x86_fma_vfmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfmadd_pd_256:
|
||||
@ -205,10 +216,9 @@ define <4 x double> @test_x86_fma_vfmadd_pd_256(<4 x double> %a0, <4 x double> %
|
||||
; CHECK-FMA-WIN-NEXT: vfmadd213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xa8,0x00]
|
||||
; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) + mem
|
||||
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
|
||||
ret <4 x double> %res
|
||||
%1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
|
||||
ret <4 x double> %1
|
||||
}
|
||||
declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
|
||||
|
||||
; VFMSUB
|
||||
define <4 x float> @test_x86_fma_vfmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
|
||||
@ -232,8 +242,13 @@ define <4 x float> @test_x86_fma_vfmsub_ss(<4 x float> %a0, <4 x float> %a1, <4
|
||||
; CHECK-FMA-WIN-NEXT: vfmsub132ss (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x9b,0x02]
|
||||
; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) - xmm1
|
||||
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
|
||||
ret <4 x float> %res
|
||||
%1 = extractelement <4 x float> %a0, i64 0
|
||||
%2 = extractelement <4 x float> %a1, i64 0
|
||||
%3 = extractelement <4 x float> %a2, i64 0
|
||||
%4 = fsub float -0.000000e+00, %3
|
||||
%5 = call float @llvm.fma.f32(float %1, float %2, float %4)
|
||||
%6 = insertelement <4 x float> %a0, float %5, i64 0
|
||||
ret <4 x float> %6
|
||||
}
|
||||
|
||||
define <4 x float> @test_x86_fma_vfmsub_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
|
||||
@ -259,10 +274,14 @@ define <4 x float> @test_x86_fma_vfmsub_bac_ss(<4 x float> %a0, <4 x float> %a1,
|
||||
; CHECK-FMA-WIN-NEXT: vfmsub132ss (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x9b,0x01]
|
||||
; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) - xmm1
|
||||
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2)
|
||||
ret <4 x float> %res
|
||||
%1 = extractelement <4 x float> %a1, i64 0
|
||||
%2 = extractelement <4 x float> %a0, i64 0
|
||||
%3 = extractelement <4 x float> %a2, i64 0
|
||||
%4 = fsub float -0.000000e+00, %3
|
||||
%5 = call float @llvm.fma.f32(float %1, float %2, float %4)
|
||||
%6 = insertelement <4 x float> %a1, float %5, i64 0
|
||||
ret <4 x float> %6
|
||||
}
|
||||
declare <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>)
|
||||
|
||||
define <2 x double> @test_x86_fma_vfmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfmsub_sd:
|
||||
@ -285,8 +304,13 @@ define <2 x double> @test_x86_fma_vfmsub_sd(<2 x double> %a0, <2 x double> %a1,
|
||||
; CHECK-FMA-WIN-NEXT: vfmsub132sd (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x9b,0x02]
|
||||
; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) - xmm1
|
||||
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
|
||||
ret <2 x double> %res
|
||||
%1 = extractelement <2 x double> %a0, i64 0
|
||||
%2 = extractelement <2 x double> %a1, i64 0
|
||||
%3 = extractelement <2 x double> %a2, i64 0
|
||||
%4 = fsub double -0.000000e+00, %3
|
||||
%5 = call double @llvm.fma.f64(double %1, double %2, double %4)
|
||||
%6 = insertelement <2 x double> %a0, double %5, i64 0
|
||||
ret <2 x double> %6
|
||||
}
|
||||
|
||||
define <2 x double> @test_x86_fma_vfmsub_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
|
||||
@ -312,10 +336,14 @@ define <2 x double> @test_x86_fma_vfmsub_bac_sd(<2 x double> %a0, <2 x double> %
|
||||
; CHECK-FMA-WIN-NEXT: vfmsub132sd (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x9b,0x01]
|
||||
; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) - xmm1
|
||||
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2)
|
||||
ret <2 x double> %res
|
||||
%1 = extractelement <2 x double> %a1, i64 0
|
||||
%2 = extractelement <2 x double> %a0, i64 0
|
||||
%3 = extractelement <2 x double> %a2, i64 0
|
||||
%4 = fsub double -0.000000e+00, %3
|
||||
%5 = call double @llvm.fma.f64(double %1, double %2, double %4)
|
||||
%6 = insertelement <2 x double> %a1, double %5, i64 0
|
||||
ret <2 x double> %6
|
||||
}
|
||||
declare <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>)
|
||||
|
||||
define <4 x float> @test_x86_fma_vfmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfmsub_ps:
|
||||
@ -337,10 +365,10 @@ define <4 x float> @test_x86_fma_vfmsub_ps(<4 x float> %a0, <4 x float> %a1, <4
|
||||
; CHECK-FMA-WIN-NEXT: vfmsub213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xaa,0x00]
|
||||
; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) - mem
|
||||
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
|
||||
ret <4 x float> %res
|
||||
%1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
|
||||
%2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %1)
|
||||
ret <4 x float> %2
|
||||
}
|
||||
declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>)
|
||||
|
||||
define <2 x double> @test_x86_fma_vfmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfmsub_pd:
|
||||
@ -362,10 +390,10 @@ define <2 x double> @test_x86_fma_vfmsub_pd(<2 x double> %a0, <2 x double> %a1,
|
||||
; CHECK-FMA-WIN-NEXT: vfmsub213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xaa,0x00]
|
||||
; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) - mem
|
||||
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
|
||||
ret <2 x double> %res
|
||||
%1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
|
||||
%2 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %1)
|
||||
ret <2 x double> %2
|
||||
}
|
||||
declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>)
|
||||
|
||||
define <8 x float> @test_x86_fma_vfmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfmsub_ps_256:
|
||||
@ -387,10 +415,10 @@ define <8 x float> @test_x86_fma_vfmsub_ps_256(<8 x float> %a0, <8 x float> %a1,
|
||||
; CHECK-FMA-WIN-NEXT: vfmsub213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xaa,0x00]
|
||||
; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) - mem
|
||||
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
|
||||
ret <8 x float> %res
|
||||
%1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
|
||||
%2 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %1)
|
||||
ret <8 x float> %2
|
||||
}
|
||||
declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
|
||||
|
||||
define <4 x double> @test_x86_fma_vfmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfmsub_pd_256:
|
||||
@ -412,10 +440,10 @@ define <4 x double> @test_x86_fma_vfmsub_pd_256(<4 x double> %a0, <4 x double> %
|
||||
; CHECK-FMA-WIN-NEXT: vfmsub213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xaa,0x00]
|
||||
; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) - mem
|
||||
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
|
||||
ret <4 x double> %res
|
||||
%1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
|
||||
%2 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %1)
|
||||
ret <4 x double> %2
|
||||
}
|
||||
declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
|
||||
|
||||
; VFNMADD
|
||||
define <4 x float> @test_x86_fma_vfnmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
|
||||
@ -439,8 +467,13 @@ define <4 x float> @test_x86_fma_vfnmadd_ss(<4 x float> %a0, <4 x float> %a1, <4
|
||||
; CHECK-FMA-WIN-NEXT: vfnmadd132ss (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x9d,0x02]
|
||||
; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) + xmm1
|
||||
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
|
||||
ret <4 x float> %res
|
||||
%1 = extractelement <4 x float> %a0, i64 0
|
||||
%2 = extractelement <4 x float> %a1, i64 0
|
||||
%3 = extractelement <4 x float> %a2, i64 0
|
||||
%4 = fsub float -0.000000e+00, %2
|
||||
%5 = call float @llvm.fma.f32(float %1, float %4, float %3)
|
||||
%6 = insertelement <4 x float> %a0, float %5, i64 0
|
||||
ret <4 x float> %6
|
||||
}
|
||||
|
||||
define <4 x float> @test_x86_fma_vfnmadd_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
|
||||
@ -466,10 +499,14 @@ define <4 x float> @test_x86_fma_vfnmadd_bac_ss(<4 x float> %a0, <4 x float> %a1
|
||||
; CHECK-FMA-WIN-NEXT: vfnmadd132ss (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x9d,0x01]
|
||||
; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) + xmm1
|
||||
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2)
|
||||
ret <4 x float> %res
|
||||
%1 = extractelement <4 x float> %a1, i64 0
|
||||
%2 = extractelement <4 x float> %a0, i64 0
|
||||
%3 = extractelement <4 x float> %a2, i64 0
|
||||
%4 = fsub float -0.000000e+00, %2
|
||||
%5 = call float @llvm.fma.f32(float %1, float %4, float %3)
|
||||
%6 = insertelement <4 x float> %a1, float %5, i64 0
|
||||
ret <4 x float> %6
|
||||
}
|
||||
declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>)
|
||||
|
||||
define <2 x double> @test_x86_fma_vfnmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_sd:
|
||||
@ -492,8 +529,13 @@ define <2 x double> @test_x86_fma_vfnmadd_sd(<2 x double> %a0, <2 x double> %a1,
|
||||
; CHECK-FMA-WIN-NEXT: vfnmadd132sd (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x9d,0x02]
|
||||
; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) + xmm1
|
||||
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
|
||||
ret <2 x double> %res
|
||||
%1 = extractelement <2 x double> %a0, i64 0
|
||||
%2 = extractelement <2 x double> %a1, i64 0
|
||||
%3 = extractelement <2 x double> %a2, i64 0
|
||||
%4 = fsub double -0.000000e+00, %2
|
||||
%5 = call double @llvm.fma.f64(double %1, double %4, double %3)
|
||||
%6 = insertelement <2 x double> %a0, double %5, i64 0
|
||||
ret <2 x double> %6
|
||||
}
|
||||
|
||||
define <2 x double> @test_x86_fma_vfnmadd_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
|
||||
@ -519,10 +561,14 @@ define <2 x double> @test_x86_fma_vfnmadd_bac_sd(<2 x double> %a0, <2 x double>
|
||||
; CHECK-FMA-WIN-NEXT: vfnmadd132sd (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x9d,0x01]
|
||||
; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) + xmm1
|
||||
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2)
|
||||
ret <2 x double> %res
|
||||
%1 = extractelement <2 x double> %a1, i64 0
|
||||
%2 = extractelement <2 x double> %a0, i64 0
|
||||
%3 = extractelement <2 x double> %a2, i64 0
|
||||
%4 = fsub double -0.000000e+00, %2
|
||||
%5 = call double @llvm.fma.f64(double %1, double %4, double %3)
|
||||
%6 = insertelement <2 x double> %a1, double %5, i64 0
|
||||
ret <2 x double> %6
|
||||
}
|
||||
declare <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>)
|
||||
|
||||
define <4 x float> @test_x86_fma_vfnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_ps:
|
||||
@ -544,10 +590,10 @@ define <4 x float> @test_x86_fma_vfnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4
|
||||
; CHECK-FMA-WIN-NEXT: vfnmadd213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xac,0x00]
|
||||
; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) + mem
|
||||
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
|
||||
ret <4 x float> %res
|
||||
%1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0
|
||||
%2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1, <4 x float> %a1, <4 x float> %a2)
|
||||
ret <4 x float> %2
|
||||
}
|
||||
declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>)
|
||||
|
||||
define <2 x double> @test_x86_fma_vfnmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_pd:
|
||||
@ -569,10 +615,10 @@ define <2 x double> @test_x86_fma_vfnmadd_pd(<2 x double> %a0, <2 x double> %a1,
|
||||
; CHECK-FMA-WIN-NEXT: vfnmadd213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xac,0x00]
|
||||
; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) + mem
|
||||
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
|
||||
ret <2 x double> %res
|
||||
%1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a0
|
||||
%2 = call <2 x double> @llvm.fma.v2f64(<2 x double> %1, <2 x double> %a1, <2 x double> %a2)
|
||||
ret <2 x double> %2
|
||||
}
|
||||
declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>)
|
||||
|
||||
define <8 x float> @test_x86_fma_vfnmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_ps_256:
|
||||
@ -594,10 +640,10 @@ define <8 x float> @test_x86_fma_vfnmadd_ps_256(<8 x float> %a0, <8 x float> %a1
|
||||
; CHECK-FMA-WIN-NEXT: vfnmadd213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xac,0x00]
|
||||
; CHECK-FMA-WIN-NEXT: # ymm0 = -(ymm1 * ymm0) + mem
|
||||
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
|
||||
ret <8 x float> %res
|
||||
%1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0
|
||||
%2 = call <8 x float> @llvm.fma.v8f32(<8 x float> %1, <8 x float> %a1, <8 x float> %a2)
|
||||
ret <8 x float> %2
|
||||
}
|
||||
declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
|
||||
|
||||
define <4 x double> @test_x86_fma_vfnmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_pd_256:
|
||||
@ -619,10 +665,10 @@ define <4 x double> @test_x86_fma_vfnmadd_pd_256(<4 x double> %a0, <4 x double>
|
||||
; CHECK-FMA-WIN-NEXT: vfnmadd213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xac,0x00]
|
||||
; CHECK-FMA-WIN-NEXT: # ymm0 = -(ymm1 * ymm0) + mem
|
||||
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
|
||||
ret <4 x double> %res
|
||||
%1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a0
|
||||
%2 = call <4 x double> @llvm.fma.v4f64(<4 x double> %1, <4 x double> %a1, <4 x double> %a2)
|
||||
ret <4 x double> %2
|
||||
}
|
||||
declare <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
|
||||
|
||||
; VFNMSUB
|
||||
define <4 x float> @test_x86_fma_vfnmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
|
||||
@ -646,8 +692,14 @@ define <4 x float> @test_x86_fma_vfnmsub_ss(<4 x float> %a0, <4 x float> %a1, <4
|
||||
; CHECK-FMA-WIN-NEXT: vfnmsub132ss (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x9f,0x02]
|
||||
; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) - xmm1
|
||||
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
|
||||
ret <4 x float> %res
|
||||
%1 = extractelement <4 x float> %a0, i64 0
|
||||
%2 = extractelement <4 x float> %a1, i64 0
|
||||
%3 = extractelement <4 x float> %a2, i64 0
|
||||
%4 = fsub float -0.000000e+00, %2
|
||||
%5 = fsub float -0.000000e+00, %3
|
||||
%6 = call float @llvm.fma.f32(float %1, float %4, float %5)
|
||||
%7 = insertelement <4 x float> %a0, float %6, i64 0
|
||||
ret <4 x float> %7
|
||||
}
|
||||
|
||||
define <4 x float> @test_x86_fma_vfnmsub_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
|
||||
@ -673,10 +725,15 @@ define <4 x float> @test_x86_fma_vfnmsub_bac_ss(<4 x float> %a0, <4 x float> %a1
|
||||
; CHECK-FMA-WIN-NEXT: vfnmsub132ss (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x9f,0x01]
|
||||
; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) - xmm1
|
||||
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2)
|
||||
ret <4 x float> %res
|
||||
%1 = extractelement <4 x float> %a1, i64 0
|
||||
%2 = extractelement <4 x float> %a0, i64 0
|
||||
%3 = extractelement <4 x float> %a2, i64 0
|
||||
%4 = fsub float -0.000000e+00, %2
|
||||
%5 = fsub float -0.000000e+00, %3
|
||||
%6 = call float @llvm.fma.f32(float %1, float %4, float %5)
|
||||
%7 = insertelement <4 x float> %a1, float %6, i64 0
|
||||
ret <4 x float> %7
|
||||
}
|
||||
declare <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>)
|
||||
|
||||
define <2 x double> @test_x86_fma_vfnmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_sd:
|
||||
@ -699,8 +756,14 @@ define <2 x double> @test_x86_fma_vfnmsub_sd(<2 x double> %a0, <2 x double> %a1,
|
||||
; CHECK-FMA-WIN-NEXT: vfnmsub132sd (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x9f,0x02]
|
||||
; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) - xmm1
|
||||
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
|
||||
ret <2 x double> %res
|
||||
%1 = extractelement <2 x double> %a0, i64 0
|
||||
%2 = extractelement <2 x double> %a1, i64 0
|
||||
%3 = extractelement <2 x double> %a2, i64 0
|
||||
%4 = fsub double -0.000000e+00, %2
|
||||
%5 = fsub double -0.000000e+00, %3
|
||||
%6 = call double @llvm.fma.f64(double %1, double %4, double %5)
|
||||
%7 = insertelement <2 x double> %a0, double %6, i64 0
|
||||
ret <2 x double> %7
|
||||
}
|
||||
|
||||
define <2 x double> @test_x86_fma_vfnmsub_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
|
||||
@ -726,10 +789,15 @@ define <2 x double> @test_x86_fma_vfnmsub_bac_sd(<2 x double> %a0, <2 x double>
|
||||
; CHECK-FMA-WIN-NEXT: vfnmsub132sd (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x9f,0x01]
|
||||
; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) - xmm1
|
||||
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2)
|
||||
ret <2 x double> %res
|
||||
%1 = extractelement <2 x double> %a1, i64 0
|
||||
%2 = extractelement <2 x double> %a0, i64 0
|
||||
%3 = extractelement <2 x double> %a2, i64 0
|
||||
%4 = fsub double -0.000000e+00, %2
|
||||
%5 = fsub double -0.000000e+00, %3
|
||||
%6 = call double @llvm.fma.f64(double %1, double %4, double %5)
|
||||
%7 = insertelement <2 x double> %a1, double %6, i64 0
|
||||
ret <2 x double> %7
|
||||
}
|
||||
declare <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>)
|
||||
|
||||
define <4 x float> @test_x86_fma_vfnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_ps:
|
||||
@ -751,10 +819,11 @@ define <4 x float> @test_x86_fma_vfnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4
|
||||
; CHECK-FMA-WIN-NEXT: vfnmsub213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xae,0x00]
|
||||
; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) - mem
|
||||
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
|
||||
ret <4 x float> %res
|
||||
%1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0
|
||||
%2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
|
||||
%3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1, <4 x float> %a1, <4 x float> %2)
|
||||
ret <4 x float> %3
|
||||
}
|
||||
declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>)
|
||||
|
||||
define <2 x double> @test_x86_fma_vfnmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_pd:
|
||||
@ -776,10 +845,11 @@ define <2 x double> @test_x86_fma_vfnmsub_pd(<2 x double> %a0, <2 x double> %a1,
|
||||
; CHECK-FMA-WIN-NEXT: vfnmsub213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xae,0x00]
|
||||
; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) - mem
|
||||
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
|
||||
ret <2 x double> %res
|
||||
%1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a0
|
||||
%2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
|
||||
%3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %1, <2 x double> %a1, <2 x double> %2)
|
||||
ret <2 x double> %3
|
||||
}
|
||||
declare <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>)
|
||||
|
||||
define <8 x float> @test_x86_fma_vfnmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_ps_256:
|
||||
@ -801,10 +871,11 @@ define <8 x float> @test_x86_fma_vfnmsub_ps_256(<8 x float> %a0, <8 x float> %a1
|
||||
; CHECK-FMA-WIN-NEXT: vfnmsub213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xae,0x00]
|
||||
; CHECK-FMA-WIN-NEXT: # ymm0 = -(ymm1 * ymm0) - mem
|
||||
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
|
||||
ret <8 x float> %res
|
||||
%1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0
|
||||
%2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
|
||||
%3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %1, <8 x float> %a1, <8 x float> %2)
|
||||
ret <8 x float> %3
|
||||
}
|
||||
declare <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
|
||||
|
||||
define <4 x double> @test_x86_fma_vfnmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_pd_256:
|
||||
@ -826,10 +897,11 @@ define <4 x double> @test_x86_fma_vfnmsub_pd_256(<4 x double> %a0, <4 x double>
|
||||
; CHECK-FMA-WIN-NEXT: vfnmsub213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xae,0x00]
|
||||
; CHECK-FMA-WIN-NEXT: # ymm0 = -(ymm1 * ymm0) - mem
|
||||
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
|
||||
ret <4 x double> %res
|
||||
%1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a0
|
||||
%2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
|
||||
%3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %1, <4 x double> %a1, <4 x double> %2)
|
||||
ret <4 x double> %3
|
||||
}
|
||||
declare <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
|
||||
|
||||
; VFMADDSUB
|
||||
define <4 x float> @test_x86_fma_vfmaddsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
|
||||
@ -852,10 +924,12 @@ define <4 x float> @test_x86_fma_vfmaddsub_ps(<4 x float> %a0, <4 x float> %a1,
|
||||
; CHECK-FMA-WIN-NEXT: vfmaddsub213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xa6,0x00]
|
||||
; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) +/- mem
|
||||
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
|
||||
ret <4 x float> %res
|
||||
%1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
|
||||
%2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
|
||||
%3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %2)
|
||||
%4 = shufflevector <4 x float> %3, <4 x float> %1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
|
||||
ret <4 x float> %4
|
||||
}
|
||||
declare <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float>, <4 x float>, <4 x float>)
|
||||
|
||||
define <2 x double> @test_x86_fma_vfmaddsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfmaddsub_pd:
|
||||
@ -877,10 +951,12 @@ define <2 x double> @test_x86_fma_vfmaddsub_pd(<2 x double> %a0, <2 x double> %a
|
||||
; CHECK-FMA-WIN-NEXT: vfmaddsub213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xa6,0x00]
|
||||
; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) +/- mem
|
||||
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
|
||||
ret <2 x double> %res
|
||||
%1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
|
||||
%2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
|
||||
%3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %2)
|
||||
%4 = shufflevector <2 x double> %3, <2 x double> %1, <2 x i32> <i32 0, i32 3>
|
||||
ret <2 x double> %4
|
||||
}
|
||||
declare <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double>, <2 x double>, <2 x double>)
|
||||
|
||||
define <8 x float> @test_x86_fma_vfmaddsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfmaddsub_ps_256:
|
||||
@ -902,10 +978,12 @@ define <8 x float> @test_x86_fma_vfmaddsub_ps_256(<8 x float> %a0, <8 x float> %
|
||||
; CHECK-FMA-WIN-NEXT: vfmaddsub213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xa6,0x00]
|
||||
; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) +/- mem
|
||||
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
|
||||
ret <8 x float> %res
|
||||
%1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
|
||||
%2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
|
||||
%3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %2)
|
||||
%4 = shufflevector <8 x float> %3, <8 x float> %1, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
|
||||
ret <8 x float> %4
|
||||
}
|
||||
declare <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
|
||||
|
||||
define <4 x double> @test_x86_fma_vfmaddsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfmaddsub_pd_256:
|
||||
@ -927,10 +1005,12 @@ define <4 x double> @test_x86_fma_vfmaddsub_pd_256(<4 x double> %a0, <4 x double
|
||||
; CHECK-FMA-WIN-NEXT: vfmaddsub213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xa6,0x00]
|
||||
; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) +/- mem
|
||||
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
|
||||
ret <4 x double> %res
|
||||
%1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
|
||||
%2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
|
||||
%3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %2)
|
||||
%4 = shufflevector <4 x double> %3, <4 x double> %1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
|
||||
ret <4 x double> %4
|
||||
}
|
||||
declare <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
|
||||
|
||||
; VFMSUBADD
|
||||
define <4 x float> @test_x86_fma_vfmsubadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
|
||||
@ -953,10 +1033,12 @@ define <4 x float> @test_x86_fma_vfmsubadd_ps(<4 x float> %a0, <4 x float> %a1,
|
||||
; CHECK-FMA-WIN-NEXT: vfmsubadd213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xa7,0x00]
|
||||
; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) -/+ mem
|
||||
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
|
||||
ret <4 x float> %res
|
||||
%1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
|
||||
%2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
|
||||
%3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %2)
|
||||
%4 = shufflevector <4 x float> %1, <4 x float> %3, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
|
||||
ret <4 x float> %4
|
||||
}
|
||||
declare <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float>, <4 x float>, <4 x float>)
|
||||
|
||||
define <2 x double> @test_x86_fma_vfmsubadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfmsubadd_pd:
|
||||
@ -978,10 +1060,12 @@ define <2 x double> @test_x86_fma_vfmsubadd_pd(<2 x double> %a0, <2 x double> %a
|
||||
; CHECK-FMA-WIN-NEXT: vfmsubadd213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xa7,0x00]
|
||||
; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) -/+ mem
|
||||
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
|
||||
ret <2 x double> %res
|
||||
%1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
|
||||
%2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
|
||||
%3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %2)
|
||||
%4 = shufflevector <2 x double> %1, <2 x double> %3, <2 x i32> <i32 0, i32 3>
|
||||
ret <2 x double> %4
|
||||
}
|
||||
declare <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double>, <2 x double>, <2 x double>)
|
||||
|
||||
define <8 x float> @test_x86_fma_vfmsubadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfmsubadd_ps_256:
|
||||
@ -1003,10 +1087,12 @@ define <8 x float> @test_x86_fma_vfmsubadd_ps_256(<8 x float> %a0, <8 x float> %
|
||||
; CHECK-FMA-WIN-NEXT: vfmsubadd213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xa7,0x00]
|
||||
; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) -/+ mem
|
||||
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
|
||||
ret <8 x float> %res
|
||||
%1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
|
||||
%2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
|
||||
%3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %2)
|
||||
%4 = shufflevector <8 x float> %1, <8 x float> %3, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
|
||||
ret <8 x float> %4
|
||||
}
|
||||
declare <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
|
||||
|
||||
define <4 x double> @test_x86_fma_vfmsubadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
|
||||
; CHECK-FMA-LABEL: test_x86_fma_vfmsubadd_pd_256:
|
||||
@ -1028,9 +1114,18 @@ define <4 x double> @test_x86_fma_vfmsubadd_pd_256(<4 x double> %a0, <4 x double
|
||||
; CHECK-FMA-WIN-NEXT: vfmsubadd213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xa7,0x00]
|
||||
; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) -/+ mem
|
||||
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
|
||||
ret <4 x double> %res
|
||||
%1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
|
||||
%2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
|
||||
%3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %2)
|
||||
%4 = shufflevector <4 x double> %1, <4 x double> %3, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
|
||||
ret <4 x double> %4
|
||||
}
|
||||
declare <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
|
||||
|
||||
declare float @llvm.fma.f32(float, float, float)
|
||||
declare double @llvm.fma.f64(double, double, double)
|
||||
declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
|
||||
declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
|
||||
declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>)
|
||||
declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>)
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
250
test/CodeGen/X86/fma4-intrinsics-x86-upgrade.ll
Normal file
250
test/CodeGen/X86/fma4-intrinsics-x86-upgrade.ll
Normal file
@ -0,0 +1,250 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+fma4,-fma -show-mc-encoding | FileCheck %s --check-prefix=CHECK
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+fma4,+fma -show-mc-encoding | FileCheck %s --check-prefix=CHECK
|
||||
|
||||
define <4 x float> @test_x86_fma_vfmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
|
||||
; CHECK-LABEL: test_x86_fma_vfmadd_ps:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x68,0xc2,0x10]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>)
|
||||
|
||||
define <2 x double> @test_x86_fma_vfmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
|
||||
; CHECK-LABEL: test_x86_fma_vfmadd_pd:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x69,0xc2,0x10]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>)
|
||||
|
||||
define <8 x float> @test_x86_fma_vfmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
|
||||
; CHECK-LABEL: test_x86_fma_vfmadd_ps_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x68,0xc2,0x10]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
|
||||
|
||||
define <4 x double> @test_x86_fma_vfmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
|
||||
; CHECK-LABEL: test_x86_fma_vfmadd_pd_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x69,0xc2,0x10]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
|
||||
ret <4 x double> %res
|
||||
}
|
||||
declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
|
||||
|
||||
; VFMSUB
|
||||
define <4 x float> @test_x86_fma_vfmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
|
||||
; CHECK-LABEL: test_x86_fma_vfmsub_ps:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6c,0xc2,0x10]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>)
|
||||
|
||||
define <2 x double> @test_x86_fma_vfmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
|
||||
; CHECK-LABEL: test_x86_fma_vfmsub_pd:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6d,0xc2,0x10]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>)
|
||||
|
||||
define <8 x float> @test_x86_fma_vfmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
|
||||
; CHECK-LABEL: test_x86_fma_vfmsub_ps_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x6c,0xc2,0x10]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
|
||||
|
||||
define <4 x double> @test_x86_fma_vfmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
|
||||
; CHECK-LABEL: test_x86_fma_vfmsub_pd_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x6d,0xc2,0x10]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
|
||||
ret <4 x double> %res
|
||||
}
|
||||
declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
|
||||
|
||||
; VFNMADD
|
||||
define <4 x float> @test_x86_fma_vfnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
|
||||
; CHECK-LABEL: test_x86_fma_vfnmadd_ps:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x78,0xc2,0x10]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>)
|
||||
|
||||
define <2 x double> @test_x86_fma_vfnmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
|
||||
; CHECK-LABEL: test_x86_fma_vfnmadd_pd:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x79,0xc2,0x10]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>)
|
||||
|
||||
define <8 x float> @test_x86_fma_vfnmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
|
||||
; CHECK-LABEL: test_x86_fma_vfnmadd_ps_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x78,0xc2,0x10]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
|
||||
|
||||
define <4 x double> @test_x86_fma_vfnmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
|
||||
; CHECK-LABEL: test_x86_fma_vfnmadd_pd_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x79,0xc2,0x10]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
|
||||
ret <4 x double> %res
|
||||
}
|
||||
declare <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
|
||||
|
||||
; VFNMSUB
|
||||
define <4 x float> @test_x86_fma_vfnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
|
||||
; CHECK-LABEL: test_x86_fma_vfnmsub_ps:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7c,0xc2,0x10]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>)
|
||||
|
||||
define <2 x double> @test_x86_fma_vfnmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
|
||||
; CHECK-LABEL: test_x86_fma_vfnmsub_pd:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7d,0xc2,0x10]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
declare <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>)
|
||||
|
||||
define <8 x float> @test_x86_fma_vfnmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
|
||||
; CHECK-LABEL: test_x86_fma_vfnmsub_ps_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x7c,0xc2,0x10]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
declare <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
|
||||
|
||||
define <4 x double> @test_x86_fma_vfnmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
|
||||
; CHECK-LABEL: test_x86_fma_vfnmsub_pd_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x7d,0xc2,0x10]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
|
||||
ret <4 x double> %res
|
||||
}
|
||||
declare <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
|
||||
|
||||
; VFMADDSUB
|
||||
define <4 x float> @test_x86_fma_vfmaddsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
|
||||
; CHECK-LABEL: test_x86_fma_vfmaddsub_ps:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5c,0xc2,0x10]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float>, <4 x float>, <4 x float>)
|
||||
|
||||
define <2 x double> @test_x86_fma_vfmaddsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
|
||||
; CHECK-LABEL: test_x86_fma_vfmaddsub_pd:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5d,0xc2,0x10]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
declare <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double>, <2 x double>, <2 x double>)
|
||||
|
||||
define <8 x float> @test_x86_fma_vfmaddsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
|
||||
; CHECK-LABEL: test_x86_fma_vfmaddsub_ps_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5c,0xc2,0x10]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
declare <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
|
||||
|
||||
define <4 x double> @test_x86_fma_vfmaddsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
|
||||
; CHECK-LABEL: test_x86_fma_vfmaddsub_pd_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5d,0xc2,0x10]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
|
||||
ret <4 x double> %res
|
||||
}
|
||||
declare <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
|
||||
|
||||
; VFMSUBADD
|
||||
define <4 x float> @test_x86_fma_vfmsubadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
|
||||
; CHECK-LABEL: test_x86_fma_vfmsubadd_ps:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5e,0xc2,0x10]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float>, <4 x float>, <4 x float>)
|
||||
|
||||
define <2 x double> @test_x86_fma_vfmsubadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
|
||||
; CHECK-LABEL: test_x86_fma_vfmsubadd_pd:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5f,0xc2,0x10]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
declare <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double>, <2 x double>, <2 x double>)
|
||||
|
||||
define <8 x float> @test_x86_fma_vfmsubadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
|
||||
; CHECK-LABEL: test_x86_fma_vfmsubadd_ps_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5e,0xc2,0x10]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
declare <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
|
||||
|
||||
define <4 x double> @test_x86_fma_vfmsubadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
|
||||
; CHECK-LABEL: test_x86_fma_vfmsubadd_pd_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5f,0xc2,0x10]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
|
||||
ret <4 x double> %res
|
||||
}
|
||||
declare <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
|
||||
|
||||
attributes #0 = { nounwind }
|
@ -46,40 +46,36 @@ define <4 x float> @test_x86_fma_vfmadd_ps(<4 x float> %a0, <4 x float> %a1, <4
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x68,0xc2,0x10]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
|
||||
ret <4 x float> %res
|
||||
%1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
|
||||
ret <4 x float> %1
|
||||
}
|
||||
declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>)
|
||||
|
||||
define <2 x double> @test_x86_fma_vfmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
|
||||
; CHECK-LABEL: test_x86_fma_vfmadd_pd:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x69,0xc2,0x10]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
|
||||
ret <2 x double> %res
|
||||
%1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
|
||||
ret <2 x double> %1
|
||||
}
|
||||
declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>)
|
||||
|
||||
define <8 x float> @test_x86_fma_vfmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
|
||||
; CHECK-LABEL: test_x86_fma_vfmadd_ps_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x68,0xc2,0x10]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
|
||||
ret <8 x float> %res
|
||||
%1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
|
||||
ret <8 x float> %1
|
||||
}
|
||||
declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
|
||||
|
||||
define <4 x double> @test_x86_fma_vfmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
|
||||
; CHECK-LABEL: test_x86_fma_vfmadd_pd_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x69,0xc2,0x10]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
|
||||
ret <4 x double> %res
|
||||
%1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
|
||||
ret <4 x double> %1
|
||||
}
|
||||
declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
|
||||
|
||||
; VFMSUB
|
||||
define <4 x float> @test_x86_fma_vfmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
|
||||
@ -87,40 +83,40 @@ define <4 x float> @test_x86_fma_vfmsub_ps(<4 x float> %a0, <4 x float> %a1, <4
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6c,0xc2,0x10]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
|
||||
ret <4 x float> %res
|
||||
%1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
|
||||
%2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %1)
|
||||
ret <4 x float> %2
|
||||
}
|
||||
declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>)
|
||||
|
||||
define <2 x double> @test_x86_fma_vfmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
|
||||
; CHECK-LABEL: test_x86_fma_vfmsub_pd:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6d,0xc2,0x10]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
|
||||
ret <2 x double> %res
|
||||
%1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
|
||||
%2 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %1)
|
||||
ret <2 x double> %2
|
||||
}
|
||||
declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>)
|
||||
|
||||
define <8 x float> @test_x86_fma_vfmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
|
||||
; CHECK-LABEL: test_x86_fma_vfmsub_ps_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x6c,0xc2,0x10]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
|
||||
ret <8 x float> %res
|
||||
%1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
|
||||
%2 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %1)
|
||||
ret <8 x float> %2
|
||||
}
|
||||
declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
|
||||
|
||||
define <4 x double> @test_x86_fma_vfmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
|
||||
; CHECK-LABEL: test_x86_fma_vfmsub_pd_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x6d,0xc2,0x10]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
|
||||
ret <4 x double> %res
|
||||
%1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
|
||||
%2 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %1)
|
||||
ret <4 x double> %2
|
||||
}
|
||||
declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
|
||||
|
||||
; VFNMADD
|
||||
define <4 x float> @test_x86_fma_vfnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
|
||||
@ -128,40 +124,40 @@ define <4 x float> @test_x86_fma_vfnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x78,0xc2,0x10]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
|
||||
ret <4 x float> %res
|
||||
%1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0
|
||||
%2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1, <4 x float> %a1, <4 x float> %a2)
|
||||
ret <4 x float> %2
|
||||
}
|
||||
declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>)
|
||||
|
||||
define <2 x double> @test_x86_fma_vfnmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
|
||||
; CHECK-LABEL: test_x86_fma_vfnmadd_pd:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x79,0xc2,0x10]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
|
||||
ret <2 x double> %res
|
||||
%1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a0
|
||||
%2 = call <2 x double> @llvm.fma.v2f64(<2 x double> %1, <2 x double> %a1, <2 x double> %a2)
|
||||
ret <2 x double> %2
|
||||
}
|
||||
declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>)
|
||||
|
||||
define <8 x float> @test_x86_fma_vfnmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
|
||||
; CHECK-LABEL: test_x86_fma_vfnmadd_ps_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x78,0xc2,0x10]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
|
||||
ret <8 x float> %res
|
||||
%1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0
|
||||
%2 = call <8 x float> @llvm.fma.v8f32(<8 x float> %1, <8 x float> %a1, <8 x float> %a2)
|
||||
ret <8 x float> %2
|
||||
}
|
||||
declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
|
||||
|
||||
define <4 x double> @test_x86_fma_vfnmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
|
||||
; CHECK-LABEL: test_x86_fma_vfnmadd_pd_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x79,0xc2,0x10]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
|
||||
ret <4 x double> %res
|
||||
%1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a0
|
||||
%2 = call <4 x double> @llvm.fma.v4f64(<4 x double> %1, <4 x double> %a1, <4 x double> %a2)
|
||||
ret <4 x double> %2
|
||||
}
|
||||
declare <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
|
||||
|
||||
; VFNMSUB
|
||||
define <4 x float> @test_x86_fma_vfnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
|
||||
@ -169,40 +165,44 @@ define <4 x float> @test_x86_fma_vfnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7c,0xc2,0x10]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
|
||||
ret <4 x float> %res
|
||||
%1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0
|
||||
%2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
|
||||
%3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1, <4 x float> %a1, <4 x float> %2)
|
||||
ret <4 x float> %3
|
||||
}
|
||||
declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>)
|
||||
|
||||
define <2 x double> @test_x86_fma_vfnmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
|
||||
; CHECK-LABEL: test_x86_fma_vfnmsub_pd:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7d,0xc2,0x10]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
|
||||
ret <2 x double> %res
|
||||
%1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a0
|
||||
%2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
|
||||
%3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %1, <2 x double> %a1, <2 x double> %2)
|
||||
ret <2 x double> %3
|
||||
}
|
||||
declare <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>)
|
||||
|
||||
define <8 x float> @test_x86_fma_vfnmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
|
||||
; CHECK-LABEL: test_x86_fma_vfnmsub_ps_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x7c,0xc2,0x10]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
|
||||
ret <8 x float> %res
|
||||
%1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0
|
||||
%2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
|
||||
%3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %1, <8 x float> %a1, <8 x float> %2)
|
||||
ret <8 x float> %3
|
||||
}
|
||||
declare <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
|
||||
|
||||
define <4 x double> @test_x86_fma_vfnmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
|
||||
; CHECK-LABEL: test_x86_fma_vfnmsub_pd_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x7d,0xc2,0x10]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
|
||||
ret <4 x double> %res
|
||||
%1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a0
|
||||
%2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
|
||||
%3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %1, <4 x double> %a1, <4 x double> %2)
|
||||
ret <4 x double> %3
|
||||
}
|
||||
declare <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
|
||||
|
||||
; VFMADDSUB
|
||||
define <4 x float> @test_x86_fma_vfmaddsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
|
||||
@ -210,40 +210,48 @@ define <4 x float> @test_x86_fma_vfmaddsub_ps(<4 x float> %a0, <4 x float> %a1,
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5c,0xc2,0x10]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
|
||||
ret <4 x float> %res
|
||||
%1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
|
||||
%2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
|
||||
%3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %2)
|
||||
%4 = shufflevector <4 x float> %3, <4 x float> %1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
|
||||
ret <4 x float> %4
|
||||
}
|
||||
declare <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float>, <4 x float>, <4 x float>)
|
||||
|
||||
define <2 x double> @test_x86_fma_vfmaddsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
|
||||
; CHECK-LABEL: test_x86_fma_vfmaddsub_pd:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5d,0xc2,0x10]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
|
||||
ret <2 x double> %res
|
||||
%1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
|
||||
%2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
|
||||
%3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %2)
|
||||
%4 = shufflevector <2 x double> %3, <2 x double> %1, <2 x i32> <i32 0, i32 3>
|
||||
ret <2 x double> %4
|
||||
}
|
||||
declare <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double>, <2 x double>, <2 x double>)
|
||||
|
||||
define <8 x float> @test_x86_fma_vfmaddsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
|
||||
; CHECK-LABEL: test_x86_fma_vfmaddsub_ps_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5c,0xc2,0x10]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
|
||||
ret <8 x float> %res
|
||||
%1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
|
||||
%2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
|
||||
%3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %2)
|
||||
%4 = shufflevector <8 x float> %3, <8 x float> %1, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
|
||||
ret <8 x float> %4
|
||||
}
|
||||
declare <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
|
||||
|
||||
define <4 x double> @test_x86_fma_vfmaddsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
|
||||
; CHECK-LABEL: test_x86_fma_vfmaddsub_pd_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5d,0xc2,0x10]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
|
||||
ret <4 x double> %res
|
||||
%1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
|
||||
%2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
|
||||
%3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %2)
|
||||
%4 = shufflevector <4 x double> %3, <4 x double> %1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
|
||||
ret <4 x double> %4
|
||||
}
|
||||
declare <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
|
||||
|
||||
; VFMSUBADD
|
||||
define <4 x float> @test_x86_fma_vfmsubadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
|
||||
@ -251,39 +259,52 @@ define <4 x float> @test_x86_fma_vfmsubadd_ps(<4 x float> %a0, <4 x float> %a1,
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5e,0xc2,0x10]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
|
||||
ret <4 x float> %res
|
||||
%1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
|
||||
%2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
|
||||
%3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %2)
|
||||
%4 = shufflevector <4 x float> %1, <4 x float> %3, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
|
||||
ret <4 x float> %4
|
||||
}
|
||||
declare <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float>, <4 x float>, <4 x float>)
|
||||
|
||||
define <2 x double> @test_x86_fma_vfmsubadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
|
||||
; CHECK-LABEL: test_x86_fma_vfmsubadd_pd:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5f,0xc2,0x10]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
|
||||
ret <2 x double> %res
|
||||
%1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
|
||||
%2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
|
||||
%3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %2)
|
||||
%4 = shufflevector <2 x double> %1, <2 x double> %3, <2 x i32> <i32 0, i32 3>
|
||||
ret <2 x double> %4
|
||||
}
|
||||
declare <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double>, <2 x double>, <2 x double>)
|
||||
|
||||
define <8 x float> @test_x86_fma_vfmsubadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
|
||||
; CHECK-LABEL: test_x86_fma_vfmsubadd_ps_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5e,0xc2,0x10]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
|
||||
ret <8 x float> %res
|
||||
%1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
|
||||
%2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
|
||||
%3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %2)
|
||||
%4 = shufflevector <8 x float> %1, <8 x float> %3, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
|
||||
ret <8 x float> %4
|
||||
}
|
||||
declare <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
|
||||
|
||||
define <4 x double> @test_x86_fma_vfmsubadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
|
||||
; CHECK-LABEL: test_x86_fma_vfmsubadd_pd_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5f,0xc2,0x10]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
|
||||
ret <4 x double> %res
|
||||
%1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
|
||||
%2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
|
||||
%3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %2)
|
||||
%4 = shufflevector <4 x double> %1, <4 x double> %3, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
|
||||
ret <4 x double> %4
|
||||
}
|
||||
declare <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
|
||||
|
||||
declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #2
|
||||
declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #2
|
||||
declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>) #2
|
||||
declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) #2
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
Loading…
Reference in New Issue
Block a user