From 51b695a52e3e96e7c3faa8398949e8b1df74989d Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 29 Aug 2016 04:49:31 +0000 Subject: [PATCH] [X86] Don't lower FABS/FNEG masking directly to a ConstantPool load. Just create a ConstantFPSDNode and let that be lowered. This allows broadcast loads to used when available. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@279958 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 13 +-- test/CodeGen/X86/avx-basic.ll | 2 +- test/CodeGen/X86/avx512-arith.ll | 71 +++++++++---- test/CodeGen/X86/fma-fneg-combine.ll | 16 +-- test/CodeGen/X86/fma_patterns.ll | 22 +++- test/CodeGen/X86/fma_patterns_wide.ll | 6 +- test/CodeGen/X86/pr2656.ll | 2 +- test/CodeGen/X86/vec_fabs.ll | 142 +++++++++++++++++++------- 8 files changed, 192 insertions(+), 82 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 381b81f9945..ca9855e6ec9 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -14639,18 +14639,13 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) { } unsigned EltBits = EltVT.getSizeInBits(); - LLVMContext *Context = DAG.getContext(); // For FABS, mask is 0x7f...; for FNEG, mask is 0x80... APInt MaskElt = IsFABS ? APInt::getSignedMaxValue(EltBits) : APInt::getSignBit(EltBits); - Constant *C = ConstantInt::get(*Context, MaskElt); - C = ConstantVector::getSplat(NumElts, C); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - SDValue CPIdx = DAG.getConstantPool(C, TLI.getPointerTy(DAG.getDataLayout())); - unsigned Alignment = cast(CPIdx)->getAlignment(); - SDValue Mask = DAG.getLoad( - LogicVT, dl, DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Alignment); + const fltSemantics &Sem = + EltVT == MVT::f64 ? APFloat::IEEEdouble : + (IsF128 ? APFloat::IEEEquad : APFloat::IEEEsingle); + SDValue Mask = DAG.getConstantFP(APFloat(Sem, MaskElt), dl, LogicVT); SDValue Op0 = Op.getOperand(0); bool IsFNABS = !IsFABS && (Op0.getOpcode() == ISD::FABS); diff --git a/test/CodeGen/X86/avx-basic.ll b/test/CodeGen/X86/avx-basic.ll index b05dc71c175..e6cc95fcdb2 100644 --- a/test/CodeGen/X86/avx-basic.ll +++ b/test/CodeGen/X86/avx-basic.ll @@ -93,7 +93,7 @@ define <8 x i32> @VMOVZQI2PQI([0 x float]* nocapture %aFOO) nounwind { define <16 x float> @fneg(<16 x float> %a) nounwind { ; CHECK-LABEL: fneg: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648] +; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00] ; CHECK-NEXT: vxorps %ymm2, %ymm0, %ymm0 ; CHECK-NEXT: vxorps %ymm2, %ymm1, %ymm1 ; CHECK-NEXT: retq diff --git a/test/CodeGen/X86/avx512-arith.ll b/test/CodeGen/X86/avx512-arith.ll index 783983344cf..e24a13e32b1 100644 --- a/test/CodeGen/X86/avx512-arith.ll +++ b/test/CodeGen/X86/avx512-arith.ll @@ -945,27 +945,30 @@ define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j, define <16 x float> @test_fxor(<16 x float> %a) { ; AVX512F-LABEL: test_fxor: ; AVX512F: ## BB#0: -; AVX512F-NEXT: vpxorq {{.*}}(%rip), %zmm0, %zmm0 +; AVX512F-NEXT: vbroadcastss {{.*}}(%rip), %zmm1 +; AVX512F-NEXT: vpxorq %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: test_fxor: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpxorq {{.*}}(%rip), %zmm0, %zmm0 +; AVX512VL-NEXT: vbroadcastss {{.*}}(%rip), %zmm1 +; AVX512VL-NEXT: vpxorq %zmm1, %zmm0, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: test_fxor: ; AVX512BW: ## BB#0: -; AVX512BW-NEXT: vpxorq {{.*}}(%rip), %zmm0, %zmm0 +; AVX512BW-NEXT: vbroadcastss {{.*}}(%rip), %zmm1 +; AVX512BW-NEXT: vpxorq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: test_fxor: ; AVX512DQ: ## BB#0: -; AVX512DQ-NEXT: vxorps {{.*}}(%rip), %zmm0, %zmm0 +; AVX512DQ-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 ; AVX512DQ-NEXT: retq ; ; SKX-LABEL: test_fxor: ; SKX: ## BB#0: -; SKX-NEXT: vxorps {{.*}}(%rip), %zmm0, %zmm0 +; SKX-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 ; SKX-NEXT: retq %res = fsub <16 x float> , %a @@ -973,10 +976,34 @@ define <16 x float> @test_fxor(<16 x float> %a) { } define <8 x float> @test_fxor_8f32(<8 x float> %a) { -; CHECK-LABEL: test_fxor_8f32: -; CHECK: ## BB#0: -; CHECK-NEXT: vxorps {{.*}}(%rip), %ymm0, %ymm0 -; CHECK-NEXT: retq +; AVX512F-LABEL: test_fxor_8f32: +; AVX512F: ## BB#0: +; AVX512F-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 +; AVX512F-NEXT: vxorps %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: test_fxor_8f32: +; AVX512VL: ## BB#0: +; AVX512VL-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 +; AVX512VL-NEXT: vxorps %ymm1, %ymm0, %ymm0 +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: test_fxor_8f32: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 +; AVX512BW-NEXT: vxorps %ymm1, %ymm0, %ymm0 +; AVX512BW-NEXT: retq +; +; AVX512DQ-LABEL: test_fxor_8f32: +; AVX512DQ: ## BB#0: +; AVX512DQ-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 +; AVX512DQ-NEXT: vxorps %ymm1, %ymm0, %ymm0 +; AVX512DQ-NEXT: retq +; +; SKX-LABEL: test_fxor_8f32: +; SKX: ## BB#0: +; SKX-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0 +; SKX-NEXT: retq %res = fsub <8 x float> , %a ret <8 x float>%res } @@ -984,27 +1011,30 @@ define <8 x float> @test_fxor_8f32(<8 x float> %a) { define <8 x double> @fabs_v8f64(<8 x double> %p) ; AVX512F-LABEL: fabs_v8f64: ; AVX512F: ## BB#0: -; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 +; AVX512F-NEXT: vbroadcastsd {{.*}}(%rip), %zmm1 +; AVX512F-NEXT: vpandq %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: fabs_v8f64: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 +; AVX512VL-NEXT: vbroadcastsd {{.*}}(%rip), %zmm1 +; AVX512VL-NEXT: vpandq %zmm1, %zmm0, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: fabs_v8f64: ; AVX512BW: ## BB#0: -; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 +; AVX512BW-NEXT: vbroadcastsd {{.*}}(%rip), %zmm1 +; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: fabs_v8f64: ; AVX512DQ: ## BB#0: -; AVX512DQ-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0 +; AVX512DQ-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 ; AVX512DQ-NEXT: retq ; ; SKX-LABEL: fabs_v8f64: ; SKX: ## BB#0: -; SKX-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0 +; SKX-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 ; SKX-NEXT: retq { %t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p) @@ -1015,27 +1045,30 @@ declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p) define <16 x float> @fabs_v16f32(<16 x float> %p) ; AVX512F-LABEL: fabs_v16f32: ; AVX512F: ## BB#0: -; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 +; AVX512F-NEXT: vbroadcastss {{.*}}(%rip), %zmm1 +; AVX512F-NEXT: vpandq %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: fabs_v16f32: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 +; AVX512VL-NEXT: vbroadcastss {{.*}}(%rip), %zmm1 +; AVX512VL-NEXT: vpandq %zmm1, %zmm0, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: fabs_v16f32: ; AVX512BW: ## BB#0: -; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 +; AVX512BW-NEXT: vbroadcastss {{.*}}(%rip), %zmm1 +; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: fabs_v16f32: ; AVX512DQ: ## BB#0: -; AVX512DQ-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0 +; AVX512DQ-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0 ; AVX512DQ-NEXT: retq ; ; SKX-LABEL: fabs_v16f32: ; SKX: ## BB#0: -; SKX-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0 +; SKX-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0 ; SKX-NEXT: retq { %t = call <16 x float> @llvm.fabs.v16f32(<16 x float> %p) diff --git a/test/CodeGen/X86/fma-fneg-combine.ll b/test/CodeGen/X86/fma-fneg-combine.ll index 766bc01cb72..9d14da9610a 100644 --- a/test/CodeGen/X86/fma-fneg-combine.ll +++ b/test/CodeGen/X86/fma-fneg-combine.ll @@ -7,7 +7,7 @@ define <16 x float> @test1(<16 x float> %a, <16 x float> %b, <16 x float> %c) { ; CHECK-LABEL: test1: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vxorps {{.*}}(%rip), %zmm2, %zmm2 +; CHECK-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm2, %zmm2 ; CHECK-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm0 ; CHECK-NEXT: retq entry: @@ -25,7 +25,7 @@ define <16 x float> @test2(<16 x float> %a, <16 x float> %b, <16 x float> %c) { ; CHECK-LABEL: test2: ; CHECK: # BB#0: # %entry ; CHECK-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm0 -; CHECK-NEXT: vxorps {{.*}}(%rip), %zmm0, %zmm0 +; CHECK-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 ; CHECK-NEXT: retq entry: %0 = tail call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 -1, i32 4) #2 @@ -37,7 +37,7 @@ define <16 x float> @test3(<16 x float> %a, <16 x float> %b, <16 x float> %c) { ; CHECK-LABEL: test3: ; CHECK: # BB#0: # %entry ; CHECK-NEXT: vfnmadd213ps %zmm2, %zmm1, %zmm0 -; CHECK-NEXT: vxorps {{.*}}(%rip), %zmm0, %zmm0 +; CHECK-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 ; CHECK-NEXT: retq entry: %0 = tail call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 -1, i32 4) #2 @@ -49,7 +49,7 @@ define <16 x float> @test4(<16 x float> %a, <16 x float> %b, <16 x float> %c) { ; CHECK-LABEL: test4: ; CHECK: # BB#0: # %entry ; CHECK-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm0 -; CHECK-NEXT: vxorps {{.*}}(%rip), %zmm0, %zmm0 +; CHECK-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 ; CHECK-NEXT: retq entry: %0 = tail call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 -1, i32 4) #2 @@ -60,7 +60,7 @@ entry: define <16 x float> @test5(<16 x float> %a, <16 x float> %b, <16 x float> %c) { ; CHECK-LABEL: test5: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vxorps {{.*}}(%rip), %zmm2, %zmm2 +; CHECK-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm2, %zmm2 ; CHECK-NEXT: vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0 ; CHECK-NEXT: retq entry: @@ -73,7 +73,7 @@ define <16 x float> @test6(<16 x float> %a, <16 x float> %b, <16 x float> %c) { ; CHECK-LABEL: test6: ; CHECK: # BB#0: # %entry ; CHECK-NEXT: vfnmsub213ps {ru-sae}, %zmm2, %zmm1, %zmm0 -; CHECK-NEXT: vxorps {{.*}}(%rip), %zmm0, %zmm0 +; CHECK-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 ; CHECK-NEXT: retq entry: %0 = tail call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 -1, i32 2) #2 @@ -86,7 +86,7 @@ define <8 x float> @test7(<8 x float> %a, <8 x float> %b, <8 x float> %c) { ; CHECK-LABEL: test7: ; CHECK: # BB#0: # %entry ; CHECK-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 -; CHECK-NEXT: vxorps {{.*}}(%rip), %ymm0, %ymm0 +; CHECK-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0 ; CHECK-NEXT: retq entry: %0 = tail call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c) #2 @@ -97,7 +97,7 @@ entry: define <8 x float> @test8(<8 x float> %a, <8 x float> %b, <8 x float> %c) { ; CHECK-LABEL: test8: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vxorps {{.*}}(%rip), %ymm2, %ymm2 +; CHECK-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm2, %ymm2 ; CHECK-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 ; CHECK-NEXT: retq entry: diff --git a/test/CodeGen/X86/fma_patterns.ll b/test/CodeGen/X86/fma_patterns.ll index f5df00fd98d..e0efe4ae1b7 100644 --- a/test/CodeGen/X86/fma_patterns.ll +++ b/test/CodeGen/X86/fma_patterns.ll @@ -1163,11 +1163,23 @@ define <4 x double> @test_v4f64_fneg_fmul(<4 x double> %x, <4 x double> %y) #0 { } define <4 x double> @test_v4f64_fneg_fmul_no_nsz(<4 x double> %x, <4 x double> %y) #0 { -; ALL-LABEL: test_v4f64_fneg_fmul_no_nsz: -; ALL: # BB#0: -; ALL-NEXT: vmulpd %ymm1, %ymm0, %ymm0 -; ALL-NEXT: vxorpd {{.*}}(%rip), %ymm0, %ymm0 -; ALL-NEXT: retq +; FMA-LABEL: test_v4f64_fneg_fmul_no_nsz: +; FMA: # BB#0: +; FMA-NEXT: vmulpd %ymm1, %ymm0, %ymm0 +; FMA-NEXT: vxorpd {{.*}}(%rip), %ymm0, %ymm0 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v4f64_fneg_fmul_no_nsz: +; FMA4: # BB#0: +; FMA4-NEXT: vmulpd %ymm1, %ymm0, %ymm0 +; FMA4-NEXT: vxorpd {{.*}}(%rip), %ymm0, %ymm0 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v4f64_fneg_fmul_no_nsz: +; AVX512: # BB#0: +; AVX512-NEXT: vmulpd %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vxorpd {{.*}}(%rip){1to4}, %ymm0, %ymm0 +; AVX512-NEXT: retq %m = fmul <4 x double> %x, %y %n = fsub <4 x double> , %m ret <4 x double> %n diff --git a/test/CodeGen/X86/fma_patterns_wide.ll b/test/CodeGen/X86/fma_patterns_wide.ll index d41977cf937..042f7af67e6 100644 --- a/test/CodeGen/X86/fma_patterns_wide.ll +++ b/test/CodeGen/X86/fma_patterns_wide.ll @@ -787,7 +787,7 @@ define <8 x double> @test_v8f64_fneg_fmul_no_nsz(<8 x double> %x, <8 x double> % ; FMA: # BB#0: ; FMA-NEXT: vmulpd %ymm3, %ymm1, %ymm1 ; FMA-NEXT: vmulpd %ymm2, %ymm0, %ymm0 -; FMA-NEXT: vmovapd {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] +; FMA-NEXT: vmovapd {{.*#+}} ymm2 = [-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00] ; FMA-NEXT: vxorpd %ymm2, %ymm0, %ymm0 ; FMA-NEXT: vxorpd %ymm2, %ymm1, %ymm1 ; FMA-NEXT: retq @@ -796,7 +796,7 @@ define <8 x double> @test_v8f64_fneg_fmul_no_nsz(<8 x double> %x, <8 x double> % ; FMA4: # BB#0: ; FMA4-NEXT: vmulpd %ymm3, %ymm1, %ymm1 ; FMA4-NEXT: vmulpd %ymm2, %ymm0, %ymm0 -; FMA4-NEXT: vmovapd {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] +; FMA4-NEXT: vmovapd {{.*#+}} ymm2 = [-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00] ; FMA4-NEXT: vxorpd %ymm2, %ymm0, %ymm0 ; FMA4-NEXT: vxorpd %ymm2, %ymm1, %ymm1 ; FMA4-NEXT: retq @@ -804,7 +804,7 @@ define <8 x double> @test_v8f64_fneg_fmul_no_nsz(<8 x double> %x, <8 x double> % ; AVX512-LABEL: test_v8f64_fneg_fmul_no_nsz: ; AVX512: # BB#0: ; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vxorpd {{.*}}(%rip), %zmm0, %zmm0 +; AVX512-NEXT: vxorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 ; AVX512-NEXT: retq %m = fmul <8 x double> %x, %y %n = fsub <8 x double> , %m diff --git a/test/CodeGen/X86/pr2656.ll b/test/CodeGen/X86/pr2656.ll index 095ab831d48..0e0536a6869 100644 --- a/test/CodeGen/X86/pr2656.ll +++ b/test/CodeGen/X86/pr2656.ll @@ -15,7 +15,7 @@ define void @foo(%struct.anon* byval %p) nounwind { ; CHECK-LABEL: foo: ; CHECK: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-NEXT: movaps {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] +; CHECK-NEXT: movaps {{.*#+}} xmm2 = [-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00] ; CHECK-NEXT: xorps %xmm2, %xmm0 ; CHECK-NEXT: cvtss2sd %xmm0, %xmm0 ; CHECK-NEXT: xorps %xmm2, %xmm1 diff --git a/test/CodeGen/X86/vec_fabs.ll b/test/CodeGen/X86/vec_fabs.ll index a25f8769fd6..9edb6ffffb1 100644 --- a/test/CodeGen/X86/vec_fabs.ll +++ b/test/CodeGen/X86/vec_fabs.ll @@ -22,45 +22,111 @@ define <2 x double> @fabs_v2f64(<2 x double> %p) { declare <2 x double> @llvm.fabs.v2f64(<2 x double> %p) define <4 x float> @fabs_v4f32(<4 x float> %p) { -; X32-LABEL: fabs_v4f32: -; X32: # BB#0: -; X32-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0 -; X32-NEXT: retl +; X32_AVX-LABEL: fabs_v4f32: +; X32_AVX: # BB#0: +; X32_AVX-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0 +; X32_AVX-NEXT: retl ; -; X64-LABEL: fabs_v4f32: -; X64: # BB#0: -; X64-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 -; X64-NEXT: retq +; X32_AVX512VL-LABEL: fabs_v4f32: +; X32_AVX512VL: # BB#0: +; X32_AVX512VL-NEXT: vbroadcastss {{\.LCPI.*}}, %xmm1 +; X32_AVX512VL-NEXT: vandps %xmm1, %xmm0, %xmm0 +; X32_AVX512VL-NEXT: retl +; +; X32_AVX512VLDQ-LABEL: fabs_v4f32: +; X32_AVX512VLDQ: # BB#0: +; X32_AVX512VLDQ-NEXT: vandps {{\.LCPI.*}}{1to4}, %xmm0, %xmm0 +; X32_AVX512VLDQ-NEXT: retl +; +; X64_AVX-LABEL: fabs_v4f32: +; X64_AVX: # BB#0: +; X64_AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 +; X64_AVX-NEXT: retq +; +; X64_AVX512VL-LABEL: fabs_v4f32: +; X64_AVX512VL: # BB#0: +; X64_AVX512VL-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 +; X64_AVX512VL-NEXT: vandps %xmm1, %xmm0, %xmm0 +; X64_AVX512VL-NEXT: retq +; +; X64_AVX512VLDQ-LABEL: fabs_v4f32: +; X64_AVX512VLDQ: # BB#0: +; X64_AVX512VLDQ-NEXT: vandps {{.*}}(%rip){1to4}, %xmm0, %xmm0 +; X64_AVX512VLDQ-NEXT: retq %t = call <4 x float> @llvm.fabs.v4f32(<4 x float> %p) ret <4 x float> %t } declare <4 x float> @llvm.fabs.v4f32(<4 x float> %p) define <4 x double> @fabs_v4f64(<4 x double> %p) { -; X32-LABEL: fabs_v4f64: -; X32: # BB#0: -; X32-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0 -; X32-NEXT: retl +; X32_AVX-LABEL: fabs_v4f64: +; X32_AVX: # BB#0: +; X32_AVX-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0 +; X32_AVX-NEXT: retl ; -; X64-LABEL: fabs_v4f64: -; X64: # BB#0: -; X64-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 -; X64-NEXT: retq +; X32_AVX512VL-LABEL: fabs_v4f64: +; X32_AVX512VL: # BB#0: +; X32_AVX512VL-NEXT: vbroadcastsd {{\.LCPI.*}}, %ymm1 +; X32_AVX512VL-NEXT: vandpd %ymm1, %ymm0, %ymm0 +; X32_AVX512VL-NEXT: retl +; +; X32_AVX512VLDQ-LABEL: fabs_v4f64: +; X32_AVX512VLDQ: # BB#0: +; X32_AVX512VLDQ-NEXT: vandpd {{\.LCPI.*}}{1to4}, %ymm0, %ymm0 +; X32_AVX512VLDQ-NEXT: retl +; +; X64_AVX-LABEL: fabs_v4f64: +; X64_AVX: # BB#0: +; X64_AVX-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 +; X64_AVX-NEXT: retq +; +; X64_AVX512VL-LABEL: fabs_v4f64: +; X64_AVX512VL: # BB#0: +; X64_AVX512VL-NEXT: vbroadcastsd {{.*}}(%rip), %ymm1 +; X64_AVX512VL-NEXT: vandpd %ymm1, %ymm0, %ymm0 +; X64_AVX512VL-NEXT: retq +; +; X64_AVX512VLDQ-LABEL: fabs_v4f64: +; X64_AVX512VLDQ: # BB#0: +; X64_AVX512VLDQ-NEXT: vandpd {{.*}}(%rip){1to4}, %ymm0, %ymm0 +; X64_AVX512VLDQ-NEXT: retq %t = call <4 x double> @llvm.fabs.v4f64(<4 x double> %p) ret <4 x double> %t } declare <4 x double> @llvm.fabs.v4f64(<4 x double> %p) define <8 x float> @fabs_v8f32(<8 x float> %p) { -; X32-LABEL: fabs_v8f32: -; X32: # BB#0: -; X32-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0 -; X32-NEXT: retl +; X32_AVX-LABEL: fabs_v8f32: +; X32_AVX: # BB#0: +; X32_AVX-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0 +; X32_AVX-NEXT: retl ; -; X64-LABEL: fabs_v8f32: -; X64: # BB#0: -; X64-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 -; X64-NEXT: retq +; X32_AVX512VL-LABEL: fabs_v8f32: +; X32_AVX512VL: # BB#0: +; X32_AVX512VL-NEXT: vbroadcastss {{\.LCPI.*}}, %ymm1 +; X32_AVX512VL-NEXT: vandps %ymm1, %ymm0, %ymm0 +; X32_AVX512VL-NEXT: retl +; +; X32_AVX512VLDQ-LABEL: fabs_v8f32: +; X32_AVX512VLDQ: # BB#0: +; X32_AVX512VLDQ-NEXT: vandps {{\.LCPI.*}}{1to8}, %ymm0, %ymm0 +; X32_AVX512VLDQ-NEXT: retl +; +; X64_AVX-LABEL: fabs_v8f32: +; X64_AVX: # BB#0: +; X64_AVX-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 +; X64_AVX-NEXT: retq +; +; X64_AVX512VL-LABEL: fabs_v8f32: +; X64_AVX512VL: # BB#0: +; X64_AVX512VL-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 +; X64_AVX512VL-NEXT: vandps %ymm1, %ymm0, %ymm0 +; X64_AVX512VL-NEXT: retq +; +; X64_AVX512VLDQ-LABEL: fabs_v8f32: +; X64_AVX512VLDQ: # BB#0: +; X64_AVX512VLDQ-NEXT: vandps {{.*}}(%rip){1to8}, %ymm0, %ymm0 +; X64_AVX512VLDQ-NEXT: retq %t = call <8 x float> @llvm.fabs.v8f32(<8 x float> %p) ret <8 x float> %t } @@ -69,36 +135,38 @@ declare <8 x float> @llvm.fabs.v8f32(<8 x float> %p) define <8 x double> @fabs_v8f64(<8 x double> %p) { ; X32_AVX-LABEL: fabs_v8f64: ; X32_AVX: # BB#0: -; X32_AVX-NEXT: vmovaps {{.*#+}} ymm2 = [9223372036854775807,9223372036854775807,9223372036854775807,9223372036854775807] +; X32_AVX-NEXT: vmovaps {{.*#+}} ymm2 = [nan,nan,nan,nan] ; X32_AVX-NEXT: vandps %ymm2, %ymm0, %ymm0 ; X32_AVX-NEXT: vandps %ymm2, %ymm1, %ymm1 ; X32_AVX-NEXT: retl ; ; X32_AVX512VL-LABEL: fabs_v8f64: ; X32_AVX512VL: # BB#0: -; X32_AVX512VL-NEXT: vpandq {{\.LCPI.*}}, %zmm0, %zmm0 +; X32_AVX512VL-NEXT: vbroadcastsd {{\.LCPI.*}}, %zmm1 +; X32_AVX512VL-NEXT: vpandq %zmm1, %zmm0, %zmm0 ; X32_AVX512VL-NEXT: retl ; ; X32_AVX512VLDQ-LABEL: fabs_v8f64: ; X32_AVX512VLDQ: # BB#0: -; X32_AVX512VLDQ-NEXT: vandps {{\.LCPI.*}}, %zmm0, %zmm0 +; X32_AVX512VLDQ-NEXT: vandpd {{\.LCPI.*}}{1to8}, %zmm0, %zmm0 ; X32_AVX512VLDQ-NEXT: retl ; ; X64_AVX-LABEL: fabs_v8f64: ; X64_AVX: # BB#0: -; X64_AVX-NEXT: vmovaps {{.*#+}} ymm2 = [9223372036854775807,9223372036854775807,9223372036854775807,9223372036854775807] +; X64_AVX-NEXT: vmovaps {{.*#+}} ymm2 = [nan,nan,nan,nan] ; X64_AVX-NEXT: vandps %ymm2, %ymm0, %ymm0 ; X64_AVX-NEXT: vandps %ymm2, %ymm1, %ymm1 ; X64_AVX-NEXT: retq ; ; X64_AVX512VL-LABEL: fabs_v8f64: ; X64_AVX512VL: # BB#0: -; X64_AVX512VL-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 +; X64_AVX512VL-NEXT: vbroadcastsd {{.*}}(%rip), %zmm1 +; X64_AVX512VL-NEXT: vpandq %zmm1, %zmm0, %zmm0 ; X64_AVX512VL-NEXT: retq ; ; X64_AVX512VLDQ-LABEL: fabs_v8f64: ; X64_AVX512VLDQ: # BB#0: -; X64_AVX512VLDQ-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0 +; X64_AVX512VLDQ-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 ; X64_AVX512VLDQ-NEXT: retq %t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p) ret <8 x double> %t @@ -108,36 +176,38 @@ declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p) define <16 x float> @fabs_v16f32(<16 x float> %p) { ; X32_AVX-LABEL: fabs_v16f32: ; X32_AVX: # BB#0: -; X32_AVX-NEXT: vmovaps {{.*#+}} ymm2 = [2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647] +; X32_AVX-NEXT: vmovaps {{.*#+}} ymm2 = [nan,nan,nan,nan,nan,nan,nan,nan] ; X32_AVX-NEXT: vandps %ymm2, %ymm0, %ymm0 ; X32_AVX-NEXT: vandps %ymm2, %ymm1, %ymm1 ; X32_AVX-NEXT: retl ; ; X32_AVX512VL-LABEL: fabs_v16f32: ; X32_AVX512VL: # BB#0: -; X32_AVX512VL-NEXT: vpandq {{\.LCPI.*}}, %zmm0, %zmm0 +; X32_AVX512VL-NEXT: vbroadcastss {{\.LCPI.*}}, %zmm1 +; X32_AVX512VL-NEXT: vpandq %zmm1, %zmm0, %zmm0 ; X32_AVX512VL-NEXT: retl ; ; X32_AVX512VLDQ-LABEL: fabs_v16f32: ; X32_AVX512VLDQ: # BB#0: -; X32_AVX512VLDQ-NEXT: vandps {{\.LCPI.*}}, %zmm0, %zmm0 +; X32_AVX512VLDQ-NEXT: vandps {{\.LCPI.*}}{1to16}, %zmm0, %zmm0 ; X32_AVX512VLDQ-NEXT: retl ; ; X64_AVX-LABEL: fabs_v16f32: ; X64_AVX: # BB#0: -; X64_AVX-NEXT: vmovaps {{.*#+}} ymm2 = [2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647] +; X64_AVX-NEXT: vmovaps {{.*#+}} ymm2 = [nan,nan,nan,nan,nan,nan,nan,nan] ; X64_AVX-NEXT: vandps %ymm2, %ymm0, %ymm0 ; X64_AVX-NEXT: vandps %ymm2, %ymm1, %ymm1 ; X64_AVX-NEXT: retq ; ; X64_AVX512VL-LABEL: fabs_v16f32: ; X64_AVX512VL: # BB#0: -; X64_AVX512VL-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 +; X64_AVX512VL-NEXT: vbroadcastss {{.*}}(%rip), %zmm1 +; X64_AVX512VL-NEXT: vpandq %zmm1, %zmm0, %zmm0 ; X64_AVX512VL-NEXT: retq ; ; X64_AVX512VLDQ-LABEL: fabs_v16f32: ; X64_AVX512VLDQ: # BB#0: -; X64_AVX512VLDQ-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0 +; X64_AVX512VLDQ-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0 ; X64_AVX512VLDQ-NEXT: retq %t = call <16 x float> @llvm.fabs.v16f32(<16 x float> %p) ret <16 x float> %t