[X86] Don't lower FABS/FNEG masking directly to a ConstantPool load. Just create a ConstantFPSDNode and let that be lowered.

This allows broadcast loads to used when available.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@279958 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Craig Topper 2016-08-29 04:49:31 +00:00
parent 79711e4f46
commit 51b695a52e
8 changed files with 192 additions and 82 deletions

View File

@ -14639,18 +14639,13 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
}
unsigned EltBits = EltVT.getSizeInBits();
LLVMContext *Context = DAG.getContext();
// For FABS, mask is 0x7f...; for FNEG, mask is 0x80...
APInt MaskElt =
IsFABS ? APInt::getSignedMaxValue(EltBits) : APInt::getSignBit(EltBits);
Constant *C = ConstantInt::get(*Context, MaskElt);
C = ConstantVector::getSplat(NumElts, C);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue CPIdx = DAG.getConstantPool(C, TLI.getPointerTy(DAG.getDataLayout()));
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
SDValue Mask = DAG.getLoad(
LogicVT, dl, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Alignment);
const fltSemantics &Sem =
EltVT == MVT::f64 ? APFloat::IEEEdouble :
(IsF128 ? APFloat::IEEEquad : APFloat::IEEEsingle);
SDValue Mask = DAG.getConstantFP(APFloat(Sem, MaskElt), dl, LogicVT);
SDValue Op0 = Op.getOperand(0);
bool IsFNABS = !IsFABS && (Op0.getOpcode() == ISD::FABS);

View File

@ -93,7 +93,7 @@ define <8 x i32> @VMOVZQI2PQI([0 x float]* nocapture %aFOO) nounwind {
define <16 x float> @fneg(<16 x float> %a) nounwind {
; CHECK-LABEL: fneg:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648]
; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00]
; CHECK-NEXT: vxorps %ymm2, %ymm0, %ymm0
; CHECK-NEXT: vxorps %ymm2, %ymm1, %ymm1
; CHECK-NEXT: retq

View File

@ -945,27 +945,30 @@ define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j,
define <16 x float> @test_fxor(<16 x float> %a) {
; AVX512F-LABEL: test_fxor:
; AVX512F: ## BB#0:
; AVX512F-NEXT: vpxorq {{.*}}(%rip), %zmm0, %zmm0
; AVX512F-NEXT: vbroadcastss {{.*}}(%rip), %zmm1
; AVX512F-NEXT: vpxorq %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: test_fxor:
; AVX512VL: ## BB#0:
; AVX512VL-NEXT: vpxorq {{.*}}(%rip), %zmm0, %zmm0
; AVX512VL-NEXT: vbroadcastss {{.*}}(%rip), %zmm1
; AVX512VL-NEXT: vpxorq %zmm1, %zmm0, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: test_fxor:
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: vpxorq {{.*}}(%rip), %zmm0, %zmm0
; AVX512BW-NEXT: vbroadcastss {{.*}}(%rip), %zmm1
; AVX512BW-NEXT: vpxorq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: test_fxor:
; AVX512DQ: ## BB#0:
; AVX512DQ-NEXT: vxorps {{.*}}(%rip), %zmm0, %zmm0
; AVX512DQ-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0
; AVX512DQ-NEXT: retq
;
; SKX-LABEL: test_fxor:
; SKX: ## BB#0:
; SKX-NEXT: vxorps {{.*}}(%rip), %zmm0, %zmm0
; SKX-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0
; SKX-NEXT: retq
%res = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
@ -973,10 +976,34 @@ define <16 x float> @test_fxor(<16 x float> %a) {
}
define <8 x float> @test_fxor_8f32(<8 x float> %a) {
; CHECK-LABEL: test_fxor_8f32:
; CHECK: ## BB#0:
; CHECK-NEXT: vxorps {{.*}}(%rip), %ymm0, %ymm0
; CHECK-NEXT: retq
; AVX512F-LABEL: test_fxor_8f32:
; AVX512F: ## BB#0:
; AVX512F-NEXT: vbroadcastss {{.*}}(%rip), %ymm1
; AVX512F-NEXT: vxorps %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: test_fxor_8f32:
; AVX512VL: ## BB#0:
; AVX512VL-NEXT: vbroadcastss {{.*}}(%rip), %ymm1
; AVX512VL-NEXT: vxorps %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: test_fxor_8f32:
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: vbroadcastss {{.*}}(%rip), %ymm1
; AVX512BW-NEXT: vxorps %ymm1, %ymm0, %ymm0
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: test_fxor_8f32:
; AVX512DQ: ## BB#0:
; AVX512DQ-NEXT: vbroadcastss {{.*}}(%rip), %ymm1
; AVX512DQ-NEXT: vxorps %ymm1, %ymm0, %ymm0
; AVX512DQ-NEXT: retq
;
; SKX-LABEL: test_fxor_8f32:
; SKX: ## BB#0:
; SKX-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0
; SKX-NEXT: retq
%res = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
ret <8 x float>%res
}
@ -984,27 +1011,30 @@ define <8 x float> @test_fxor_8f32(<8 x float> %a) {
define <8 x double> @fabs_v8f64(<8 x double> %p)
; AVX512F-LABEL: fabs_v8f64:
; AVX512F: ## BB#0:
; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
; AVX512F-NEXT: vbroadcastsd {{.*}}(%rip), %zmm1
; AVX512F-NEXT: vpandq %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: fabs_v8f64:
; AVX512VL: ## BB#0:
; AVX512VL-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
; AVX512VL-NEXT: vbroadcastsd {{.*}}(%rip), %zmm1
; AVX512VL-NEXT: vpandq %zmm1, %zmm0, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: fabs_v8f64:
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
; AVX512BW-NEXT: vbroadcastsd {{.*}}(%rip), %zmm1
; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: fabs_v8f64:
; AVX512DQ: ## BB#0:
; AVX512DQ-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0
; AVX512DQ-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
; AVX512DQ-NEXT: retq
;
; SKX-LABEL: fabs_v8f64:
; SKX: ## BB#0:
; SKX-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0
; SKX-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
; SKX-NEXT: retq
{
%t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
@ -1015,27 +1045,30 @@ declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
define <16 x float> @fabs_v16f32(<16 x float> %p)
; AVX512F-LABEL: fabs_v16f32:
; AVX512F: ## BB#0:
; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
; AVX512F-NEXT: vbroadcastss {{.*}}(%rip), %zmm1
; AVX512F-NEXT: vpandq %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: fabs_v16f32:
; AVX512VL: ## BB#0:
; AVX512VL-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
; AVX512VL-NEXT: vbroadcastss {{.*}}(%rip), %zmm1
; AVX512VL-NEXT: vpandq %zmm1, %zmm0, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: fabs_v16f32:
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
; AVX512BW-NEXT: vbroadcastss {{.*}}(%rip), %zmm1
; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: fabs_v16f32:
; AVX512DQ: ## BB#0:
; AVX512DQ-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0
; AVX512DQ-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0
; AVX512DQ-NEXT: retq
;
; SKX-LABEL: fabs_v16f32:
; SKX: ## BB#0:
; SKX-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0
; SKX-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0
; SKX-NEXT: retq
{
%t = call <16 x float> @llvm.fabs.v16f32(<16 x float> %p)

View File

@ -7,7 +7,7 @@
define <16 x float> @test1(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
; CHECK-LABEL: test1:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: vxorps {{.*}}(%rip), %zmm2, %zmm2
; CHECK-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm2, %zmm2
; CHECK-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm0
; CHECK-NEXT: retq
entry:
@ -25,7 +25,7 @@ define <16 x float> @test2(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
; CHECK-LABEL: test2:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm0
; CHECK-NEXT: vxorps {{.*}}(%rip), %zmm0, %zmm0
; CHECK-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0
; CHECK-NEXT: retq
entry:
%0 = tail call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 -1, i32 4) #2
@ -37,7 +37,7 @@ define <16 x float> @test3(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
; CHECK-LABEL: test3:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: vfnmadd213ps %zmm2, %zmm1, %zmm0
; CHECK-NEXT: vxorps {{.*}}(%rip), %zmm0, %zmm0
; CHECK-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0
; CHECK-NEXT: retq
entry:
%0 = tail call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 -1, i32 4) #2
@ -49,7 +49,7 @@ define <16 x float> @test4(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
; CHECK-LABEL: test4:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm0
; CHECK-NEXT: vxorps {{.*}}(%rip), %zmm0, %zmm0
; CHECK-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0
; CHECK-NEXT: retq
entry:
%0 = tail call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 -1, i32 4) #2
@ -60,7 +60,7 @@ entry:
define <16 x float> @test5(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
; CHECK-LABEL: test5:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: vxorps {{.*}}(%rip), %zmm2, %zmm2
; CHECK-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm2, %zmm2
; CHECK-NEXT: vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0
; CHECK-NEXT: retq
entry:
@ -73,7 +73,7 @@ define <16 x float> @test6(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
; CHECK-LABEL: test6:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: vfnmsub213ps {ru-sae}, %zmm2, %zmm1, %zmm0
; CHECK-NEXT: vxorps {{.*}}(%rip), %zmm0, %zmm0
; CHECK-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0
; CHECK-NEXT: retq
entry:
%0 = tail call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 -1, i32 2) #2
@ -86,7 +86,7 @@ define <8 x float> @test7(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
; CHECK-LABEL: test7:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0
; CHECK-NEXT: vxorps {{.*}}(%rip), %ymm0, %ymm0
; CHECK-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0
; CHECK-NEXT: retq
entry:
%0 = tail call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c) #2
@ -97,7 +97,7 @@ entry:
define <8 x float> @test8(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
; CHECK-LABEL: test8:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: vxorps {{.*}}(%rip), %ymm2, %ymm2
; CHECK-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm2, %ymm2
; CHECK-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0
; CHECK-NEXT: retq
entry:

View File

@ -1163,11 +1163,23 @@ define <4 x double> @test_v4f64_fneg_fmul(<4 x double> %x, <4 x double> %y) #0 {
}
define <4 x double> @test_v4f64_fneg_fmul_no_nsz(<4 x double> %x, <4 x double> %y) #0 {
; ALL-LABEL: test_v4f64_fneg_fmul_no_nsz:
; ALL: # BB#0:
; ALL-NEXT: vmulpd %ymm1, %ymm0, %ymm0
; ALL-NEXT: vxorpd {{.*}}(%rip), %ymm0, %ymm0
; ALL-NEXT: retq
; FMA-LABEL: test_v4f64_fneg_fmul_no_nsz:
; FMA: # BB#0:
; FMA-NEXT: vmulpd %ymm1, %ymm0, %ymm0
; FMA-NEXT: vxorpd {{.*}}(%rip), %ymm0, %ymm0
; FMA-NEXT: retq
;
; FMA4-LABEL: test_v4f64_fneg_fmul_no_nsz:
; FMA4: # BB#0:
; FMA4-NEXT: vmulpd %ymm1, %ymm0, %ymm0
; FMA4-NEXT: vxorpd {{.*}}(%rip), %ymm0, %ymm0
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_v4f64_fneg_fmul_no_nsz:
; AVX512: # BB#0:
; AVX512-NEXT: vmulpd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vxorpd {{.*}}(%rip){1to4}, %ymm0, %ymm0
; AVX512-NEXT: retq
%m = fmul <4 x double> %x, %y
%n = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %m
ret <4 x double> %n

View File

@ -787,7 +787,7 @@ define <8 x double> @test_v8f64_fneg_fmul_no_nsz(<8 x double> %x, <8 x double> %
; FMA: # BB#0:
; FMA-NEXT: vmulpd %ymm3, %ymm1, %ymm1
; FMA-NEXT: vmulpd %ymm2, %ymm0, %ymm0
; FMA-NEXT: vmovapd {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; FMA-NEXT: vmovapd {{.*#+}} ymm2 = [-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00]
; FMA-NEXT: vxorpd %ymm2, %ymm0, %ymm0
; FMA-NEXT: vxorpd %ymm2, %ymm1, %ymm1
; FMA-NEXT: retq
@ -796,7 +796,7 @@ define <8 x double> @test_v8f64_fneg_fmul_no_nsz(<8 x double> %x, <8 x double> %
; FMA4: # BB#0:
; FMA4-NEXT: vmulpd %ymm3, %ymm1, %ymm1
; FMA4-NEXT: vmulpd %ymm2, %ymm0, %ymm0
; FMA4-NEXT: vmovapd {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; FMA4-NEXT: vmovapd {{.*#+}} ymm2 = [-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00]
; FMA4-NEXT: vxorpd %ymm2, %ymm0, %ymm0
; FMA4-NEXT: vxorpd %ymm2, %ymm1, %ymm1
; FMA4-NEXT: retq
@ -804,7 +804,7 @@ define <8 x double> @test_v8f64_fneg_fmul_no_nsz(<8 x double> %x, <8 x double> %
; AVX512-LABEL: test_v8f64_fneg_fmul_no_nsz:
; AVX512: # BB#0:
; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vxorpd {{.*}}(%rip), %zmm0, %zmm0
; AVX512-NEXT: vxorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
; AVX512-NEXT: retq
%m = fmul <8 x double> %x, %y
%n = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %m

View File

@ -15,7 +15,7 @@ define void @foo(%struct.anon* byval %p) nounwind {
; CHECK-LABEL: foo:
; CHECK: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; CHECK-NEXT: movaps {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; CHECK-NEXT: movaps {{.*#+}} xmm2 = [-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00]
; CHECK-NEXT: xorps %xmm2, %xmm0
; CHECK-NEXT: cvtss2sd %xmm0, %xmm0
; CHECK-NEXT: xorps %xmm2, %xmm1

View File

@ -22,45 +22,111 @@ define <2 x double> @fabs_v2f64(<2 x double> %p) {
declare <2 x double> @llvm.fabs.v2f64(<2 x double> %p)
define <4 x float> @fabs_v4f32(<4 x float> %p) {
; X32-LABEL: fabs_v4f32:
; X32: # BB#0:
; X32-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0
; X32-NEXT: retl
; X32_AVX-LABEL: fabs_v4f32:
; X32_AVX: # BB#0:
; X32_AVX-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0
; X32_AVX-NEXT: retl
;
; X64-LABEL: fabs_v4f32:
; X64: # BB#0:
; X64-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
; X64-NEXT: retq
; X32_AVX512VL-LABEL: fabs_v4f32:
; X32_AVX512VL: # BB#0:
; X32_AVX512VL-NEXT: vbroadcastss {{\.LCPI.*}}, %xmm1
; X32_AVX512VL-NEXT: vandps %xmm1, %xmm0, %xmm0
; X32_AVX512VL-NEXT: retl
;
; X32_AVX512VLDQ-LABEL: fabs_v4f32:
; X32_AVX512VLDQ: # BB#0:
; X32_AVX512VLDQ-NEXT: vandps {{\.LCPI.*}}{1to4}, %xmm0, %xmm0
; X32_AVX512VLDQ-NEXT: retl
;
; X64_AVX-LABEL: fabs_v4f32:
; X64_AVX: # BB#0:
; X64_AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
; X64_AVX-NEXT: retq
;
; X64_AVX512VL-LABEL: fabs_v4f32:
; X64_AVX512VL: # BB#0:
; X64_AVX512VL-NEXT: vbroadcastss {{.*}}(%rip), %xmm1
; X64_AVX512VL-NEXT: vandps %xmm1, %xmm0, %xmm0
; X64_AVX512VL-NEXT: retq
;
; X64_AVX512VLDQ-LABEL: fabs_v4f32:
; X64_AVX512VLDQ: # BB#0:
; X64_AVX512VLDQ-NEXT: vandps {{.*}}(%rip){1to4}, %xmm0, %xmm0
; X64_AVX512VLDQ-NEXT: retq
%t = call <4 x float> @llvm.fabs.v4f32(<4 x float> %p)
ret <4 x float> %t
}
declare <4 x float> @llvm.fabs.v4f32(<4 x float> %p)
define <4 x double> @fabs_v4f64(<4 x double> %p) {
; X32-LABEL: fabs_v4f64:
; X32: # BB#0:
; X32-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0
; X32-NEXT: retl
; X32_AVX-LABEL: fabs_v4f64:
; X32_AVX: # BB#0:
; X32_AVX-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0
; X32_AVX-NEXT: retl
;
; X64-LABEL: fabs_v4f64:
; X64: # BB#0:
; X64-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
; X64-NEXT: retq
; X32_AVX512VL-LABEL: fabs_v4f64:
; X32_AVX512VL: # BB#0:
; X32_AVX512VL-NEXT: vbroadcastsd {{\.LCPI.*}}, %ymm1
; X32_AVX512VL-NEXT: vandpd %ymm1, %ymm0, %ymm0
; X32_AVX512VL-NEXT: retl
;
; X32_AVX512VLDQ-LABEL: fabs_v4f64:
; X32_AVX512VLDQ: # BB#0:
; X32_AVX512VLDQ-NEXT: vandpd {{\.LCPI.*}}{1to4}, %ymm0, %ymm0
; X32_AVX512VLDQ-NEXT: retl
;
; X64_AVX-LABEL: fabs_v4f64:
; X64_AVX: # BB#0:
; X64_AVX-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
; X64_AVX-NEXT: retq
;
; X64_AVX512VL-LABEL: fabs_v4f64:
; X64_AVX512VL: # BB#0:
; X64_AVX512VL-NEXT: vbroadcastsd {{.*}}(%rip), %ymm1
; X64_AVX512VL-NEXT: vandpd %ymm1, %ymm0, %ymm0
; X64_AVX512VL-NEXT: retq
;
; X64_AVX512VLDQ-LABEL: fabs_v4f64:
; X64_AVX512VLDQ: # BB#0:
; X64_AVX512VLDQ-NEXT: vandpd {{.*}}(%rip){1to4}, %ymm0, %ymm0
; X64_AVX512VLDQ-NEXT: retq
%t = call <4 x double> @llvm.fabs.v4f64(<4 x double> %p)
ret <4 x double> %t
}
declare <4 x double> @llvm.fabs.v4f64(<4 x double> %p)
define <8 x float> @fabs_v8f32(<8 x float> %p) {
; X32-LABEL: fabs_v8f32:
; X32: # BB#0:
; X32-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0
; X32-NEXT: retl
; X32_AVX-LABEL: fabs_v8f32:
; X32_AVX: # BB#0:
; X32_AVX-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0
; X32_AVX-NEXT: retl
;
; X64-LABEL: fabs_v8f32:
; X64: # BB#0:
; X64-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
; X64-NEXT: retq
; X32_AVX512VL-LABEL: fabs_v8f32:
; X32_AVX512VL: # BB#0:
; X32_AVX512VL-NEXT: vbroadcastss {{\.LCPI.*}}, %ymm1
; X32_AVX512VL-NEXT: vandps %ymm1, %ymm0, %ymm0
; X32_AVX512VL-NEXT: retl
;
; X32_AVX512VLDQ-LABEL: fabs_v8f32:
; X32_AVX512VLDQ: # BB#0:
; X32_AVX512VLDQ-NEXT: vandps {{\.LCPI.*}}{1to8}, %ymm0, %ymm0
; X32_AVX512VLDQ-NEXT: retl
;
; X64_AVX-LABEL: fabs_v8f32:
; X64_AVX: # BB#0:
; X64_AVX-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
; X64_AVX-NEXT: retq
;
; X64_AVX512VL-LABEL: fabs_v8f32:
; X64_AVX512VL: # BB#0:
; X64_AVX512VL-NEXT: vbroadcastss {{.*}}(%rip), %ymm1
; X64_AVX512VL-NEXT: vandps %ymm1, %ymm0, %ymm0
; X64_AVX512VL-NEXT: retq
;
; X64_AVX512VLDQ-LABEL: fabs_v8f32:
; X64_AVX512VLDQ: # BB#0:
; X64_AVX512VLDQ-NEXT: vandps {{.*}}(%rip){1to8}, %ymm0, %ymm0
; X64_AVX512VLDQ-NEXT: retq
%t = call <8 x float> @llvm.fabs.v8f32(<8 x float> %p)
ret <8 x float> %t
}
@ -69,36 +135,38 @@ declare <8 x float> @llvm.fabs.v8f32(<8 x float> %p)
define <8 x double> @fabs_v8f64(<8 x double> %p) {
; X32_AVX-LABEL: fabs_v8f64:
; X32_AVX: # BB#0:
; X32_AVX-NEXT: vmovaps {{.*#+}} ymm2 = [9223372036854775807,9223372036854775807,9223372036854775807,9223372036854775807]
; X32_AVX-NEXT: vmovaps {{.*#+}} ymm2 = [nan,nan,nan,nan]
; X32_AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
; X32_AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
; X32_AVX-NEXT: retl
;
; X32_AVX512VL-LABEL: fabs_v8f64:
; X32_AVX512VL: # BB#0:
; X32_AVX512VL-NEXT: vpandq {{\.LCPI.*}}, %zmm0, %zmm0
; X32_AVX512VL-NEXT: vbroadcastsd {{\.LCPI.*}}, %zmm1
; X32_AVX512VL-NEXT: vpandq %zmm1, %zmm0, %zmm0
; X32_AVX512VL-NEXT: retl
;
; X32_AVX512VLDQ-LABEL: fabs_v8f64:
; X32_AVX512VLDQ: # BB#0:
; X32_AVX512VLDQ-NEXT: vandps {{\.LCPI.*}}, %zmm0, %zmm0
; X32_AVX512VLDQ-NEXT: vandpd {{\.LCPI.*}}{1to8}, %zmm0, %zmm0
; X32_AVX512VLDQ-NEXT: retl
;
; X64_AVX-LABEL: fabs_v8f64:
; X64_AVX: # BB#0:
; X64_AVX-NEXT: vmovaps {{.*#+}} ymm2 = [9223372036854775807,9223372036854775807,9223372036854775807,9223372036854775807]
; X64_AVX-NEXT: vmovaps {{.*#+}} ymm2 = [nan,nan,nan,nan]
; X64_AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
; X64_AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
; X64_AVX-NEXT: retq
;
; X64_AVX512VL-LABEL: fabs_v8f64:
; X64_AVX512VL: # BB#0:
; X64_AVX512VL-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
; X64_AVX512VL-NEXT: vbroadcastsd {{.*}}(%rip), %zmm1
; X64_AVX512VL-NEXT: vpandq %zmm1, %zmm0, %zmm0
; X64_AVX512VL-NEXT: retq
;
; X64_AVX512VLDQ-LABEL: fabs_v8f64:
; X64_AVX512VLDQ: # BB#0:
; X64_AVX512VLDQ-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0
; X64_AVX512VLDQ-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
; X64_AVX512VLDQ-NEXT: retq
%t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
ret <8 x double> %t
@ -108,36 +176,38 @@ declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
define <16 x float> @fabs_v16f32(<16 x float> %p) {
; X32_AVX-LABEL: fabs_v16f32:
; X32_AVX: # BB#0:
; X32_AVX-NEXT: vmovaps {{.*#+}} ymm2 = [2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647]
; X32_AVX-NEXT: vmovaps {{.*#+}} ymm2 = [nan,nan,nan,nan,nan,nan,nan,nan]
; X32_AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
; X32_AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
; X32_AVX-NEXT: retl
;
; X32_AVX512VL-LABEL: fabs_v16f32:
; X32_AVX512VL: # BB#0:
; X32_AVX512VL-NEXT: vpandq {{\.LCPI.*}}, %zmm0, %zmm0
; X32_AVX512VL-NEXT: vbroadcastss {{\.LCPI.*}}, %zmm1
; X32_AVX512VL-NEXT: vpandq %zmm1, %zmm0, %zmm0
; X32_AVX512VL-NEXT: retl
;
; X32_AVX512VLDQ-LABEL: fabs_v16f32:
; X32_AVX512VLDQ: # BB#0:
; X32_AVX512VLDQ-NEXT: vandps {{\.LCPI.*}}, %zmm0, %zmm0
; X32_AVX512VLDQ-NEXT: vandps {{\.LCPI.*}}{1to16}, %zmm0, %zmm0
; X32_AVX512VLDQ-NEXT: retl
;
; X64_AVX-LABEL: fabs_v16f32:
; X64_AVX: # BB#0:
; X64_AVX-NEXT: vmovaps {{.*#+}} ymm2 = [2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647]
; X64_AVX-NEXT: vmovaps {{.*#+}} ymm2 = [nan,nan,nan,nan,nan,nan,nan,nan]
; X64_AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
; X64_AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
; X64_AVX-NEXT: retq
;
; X64_AVX512VL-LABEL: fabs_v16f32:
; X64_AVX512VL: # BB#0:
; X64_AVX512VL-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
; X64_AVX512VL-NEXT: vbroadcastss {{.*}}(%rip), %zmm1
; X64_AVX512VL-NEXT: vpandq %zmm1, %zmm0, %zmm0
; X64_AVX512VL-NEXT: retq
;
; X64_AVX512VLDQ-LABEL: fabs_v16f32:
; X64_AVX512VLDQ: # BB#0:
; X64_AVX512VLDQ-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0
; X64_AVX512VLDQ-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0
; X64_AVX512VLDQ-NEXT: retq
%t = call <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
ret <16 x float> %t