mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-26 12:46:00 +00:00
[X86] Don't lower FABS/FNEG masking directly to a ConstantPool load. Just create a ConstantFPSDNode and let that be lowered.
This allows broadcast loads to used when available. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@279958 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
79711e4f46
commit
51b695a52e
@ -14639,18 +14639,13 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
|
||||
}
|
||||
|
||||
unsigned EltBits = EltVT.getSizeInBits();
|
||||
LLVMContext *Context = DAG.getContext();
|
||||
// For FABS, mask is 0x7f...; for FNEG, mask is 0x80...
|
||||
APInt MaskElt =
|
||||
IsFABS ? APInt::getSignedMaxValue(EltBits) : APInt::getSignBit(EltBits);
|
||||
Constant *C = ConstantInt::get(*Context, MaskElt);
|
||||
C = ConstantVector::getSplat(NumElts, C);
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
SDValue CPIdx = DAG.getConstantPool(C, TLI.getPointerTy(DAG.getDataLayout()));
|
||||
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
|
||||
SDValue Mask = DAG.getLoad(
|
||||
LogicVT, dl, DAG.getEntryNode(), CPIdx,
|
||||
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Alignment);
|
||||
const fltSemantics &Sem =
|
||||
EltVT == MVT::f64 ? APFloat::IEEEdouble :
|
||||
(IsF128 ? APFloat::IEEEquad : APFloat::IEEEsingle);
|
||||
SDValue Mask = DAG.getConstantFP(APFloat(Sem, MaskElt), dl, LogicVT);
|
||||
|
||||
SDValue Op0 = Op.getOperand(0);
|
||||
bool IsFNABS = !IsFABS && (Op0.getOpcode() == ISD::FABS);
|
||||
|
@ -93,7 +93,7 @@ define <8 x i32> @VMOVZQI2PQI([0 x float]* nocapture %aFOO) nounwind {
|
||||
define <16 x float> @fneg(<16 x float> %a) nounwind {
|
||||
; CHECK-LABEL: fneg:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648]
|
||||
; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00]
|
||||
; CHECK-NEXT: vxorps %ymm2, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vxorps %ymm2, %ymm1, %ymm1
|
||||
; CHECK-NEXT: retq
|
||||
|
@ -945,27 +945,30 @@ define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j,
|
||||
define <16 x float> @test_fxor(<16 x float> %a) {
|
||||
; AVX512F-LABEL: test_fxor:
|
||||
; AVX512F: ## BB#0:
|
||||
; AVX512F-NEXT: vpxorq {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vbroadcastss {{.*}}(%rip), %zmm1
|
||||
; AVX512F-NEXT: vpxorq %zmm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: test_fxor:
|
||||
; AVX512VL: ## BB#0:
|
||||
; AVX512VL-NEXT: vpxorq {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vbroadcastss {{.*}}(%rip), %zmm1
|
||||
; AVX512VL-NEXT: vpxorq %zmm1, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: test_fxor:
|
||||
; AVX512BW: ## BB#0:
|
||||
; AVX512BW-NEXT: vpxorq {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vbroadcastss {{.*}}(%rip), %zmm1
|
||||
; AVX512BW-NEXT: vpxorq %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: test_fxor:
|
||||
; AVX512DQ: ## BB#0:
|
||||
; AVX512DQ-NEXT: vxorps {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: test_fxor:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vxorps {{.*}}(%rip), %zmm0, %zmm0
|
||||
; SKX-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
||||
; SKX-NEXT: retq
|
||||
|
||||
%res = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
|
||||
@ -973,10 +976,34 @@ define <16 x float> @test_fxor(<16 x float> %a) {
|
||||
}
|
||||
|
||||
define <8 x float> @test_fxor_8f32(<8 x float> %a) {
|
||||
; CHECK-LABEL: test_fxor_8f32:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vxorps {{.*}}(%rip), %ymm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
; AVX512F-LABEL: test_fxor_8f32:
|
||||
; AVX512F: ## BB#0:
|
||||
; AVX512F-NEXT: vbroadcastss {{.*}}(%rip), %ymm1
|
||||
; AVX512F-NEXT: vxorps %ymm1, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: test_fxor_8f32:
|
||||
; AVX512VL: ## BB#0:
|
||||
; AVX512VL-NEXT: vbroadcastss {{.*}}(%rip), %ymm1
|
||||
; AVX512VL-NEXT: vxorps %ymm1, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: test_fxor_8f32:
|
||||
; AVX512BW: ## BB#0:
|
||||
; AVX512BW-NEXT: vbroadcastss {{.*}}(%rip), %ymm1
|
||||
; AVX512BW-NEXT: vxorps %ymm1, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: test_fxor_8f32:
|
||||
; AVX512DQ: ## BB#0:
|
||||
; AVX512DQ-NEXT: vbroadcastss {{.*}}(%rip), %ymm1
|
||||
; AVX512DQ-NEXT: vxorps %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: test_fxor_8f32:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0
|
||||
; SKX-NEXT: retq
|
||||
%res = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
|
||||
ret <8 x float>%res
|
||||
}
|
||||
@ -984,27 +1011,30 @@ define <8 x float> @test_fxor_8f32(<8 x float> %a) {
|
||||
define <8 x double> @fabs_v8f64(<8 x double> %p)
|
||||
; AVX512F-LABEL: fabs_v8f64:
|
||||
; AVX512F: ## BB#0:
|
||||
; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vbroadcastsd {{.*}}(%rip), %zmm1
|
||||
; AVX512F-NEXT: vpandq %zmm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: fabs_v8f64:
|
||||
; AVX512VL: ## BB#0:
|
||||
; AVX512VL-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vbroadcastsd {{.*}}(%rip), %zmm1
|
||||
; AVX512VL-NEXT: vpandq %zmm1, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: fabs_v8f64:
|
||||
; AVX512BW: ## BB#0:
|
||||
; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vbroadcastsd {{.*}}(%rip), %zmm1
|
||||
; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: fabs_v8f64:
|
||||
; AVX512DQ: ## BB#0:
|
||||
; AVX512DQ-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: fabs_v8f64:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0
|
||||
; SKX-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
|
||||
; SKX-NEXT: retq
|
||||
{
|
||||
%t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
|
||||
@ -1015,27 +1045,30 @@ declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
|
||||
define <16 x float> @fabs_v16f32(<16 x float> %p)
|
||||
; AVX512F-LABEL: fabs_v16f32:
|
||||
; AVX512F: ## BB#0:
|
||||
; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vbroadcastss {{.*}}(%rip), %zmm1
|
||||
; AVX512F-NEXT: vpandq %zmm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: fabs_v16f32:
|
||||
; AVX512VL: ## BB#0:
|
||||
; AVX512VL-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vbroadcastss {{.*}}(%rip), %zmm1
|
||||
; AVX512VL-NEXT: vpandq %zmm1, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: fabs_v16f32:
|
||||
; AVX512BW: ## BB#0:
|
||||
; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vbroadcastss {{.*}}(%rip), %zmm1
|
||||
; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: fabs_v16f32:
|
||||
; AVX512DQ: ## BB#0:
|
||||
; AVX512DQ-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: fabs_v16f32:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0
|
||||
; SKX-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
||||
; SKX-NEXT: retq
|
||||
{
|
||||
%t = call <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
|
||||
|
@ -7,7 +7,7 @@
|
||||
define <16 x float> @test1(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
|
||||
; CHECK-LABEL: test1:
|
||||
; CHECK: # BB#0: # %entry
|
||||
; CHECK-NEXT: vxorps {{.*}}(%rip), %zmm2, %zmm2
|
||||
; CHECK-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm2, %zmm2
|
||||
; CHECK-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
@ -25,7 +25,7 @@ define <16 x float> @test2(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
|
||||
; CHECK-LABEL: test2:
|
||||
; CHECK: # BB#0: # %entry
|
||||
; CHECK-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm0
|
||||
; CHECK-NEXT: vxorps {{.*}}(%rip), %zmm0, %zmm0
|
||||
; CHECK-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%0 = tail call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 -1, i32 4) #2
|
||||
@ -37,7 +37,7 @@ define <16 x float> @test3(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
|
||||
; CHECK-LABEL: test3:
|
||||
; CHECK: # BB#0: # %entry
|
||||
; CHECK-NEXT: vfnmadd213ps %zmm2, %zmm1, %zmm0
|
||||
; CHECK-NEXT: vxorps {{.*}}(%rip), %zmm0, %zmm0
|
||||
; CHECK-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%0 = tail call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 -1, i32 4) #2
|
||||
@ -49,7 +49,7 @@ define <16 x float> @test4(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
|
||||
; CHECK-LABEL: test4:
|
||||
; CHECK: # BB#0: # %entry
|
||||
; CHECK-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm0
|
||||
; CHECK-NEXT: vxorps {{.*}}(%rip), %zmm0, %zmm0
|
||||
; CHECK-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%0 = tail call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 -1, i32 4) #2
|
||||
@ -60,7 +60,7 @@ entry:
|
||||
define <16 x float> @test5(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
|
||||
; CHECK-LABEL: test5:
|
||||
; CHECK: # BB#0: # %entry
|
||||
; CHECK-NEXT: vxorps {{.*}}(%rip), %zmm2, %zmm2
|
||||
; CHECK-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm2, %zmm2
|
||||
; CHECK-NEXT: vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
@ -73,7 +73,7 @@ define <16 x float> @test6(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
|
||||
; CHECK-LABEL: test6:
|
||||
; CHECK: # BB#0: # %entry
|
||||
; CHECK-NEXT: vfnmsub213ps {ru-sae}, %zmm2, %zmm1, %zmm0
|
||||
; CHECK-NEXT: vxorps {{.*}}(%rip), %zmm0, %zmm0
|
||||
; CHECK-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%0 = tail call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 -1, i32 2) #2
|
||||
@ -86,7 +86,7 @@ define <8 x float> @test7(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
|
||||
; CHECK-LABEL: test7:
|
||||
; CHECK: # BB#0: # %entry
|
||||
; CHECK-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0
|
||||
; CHECK-NEXT: vxorps {{.*}}(%rip), %ymm0, %ymm0
|
||||
; CHECK-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%0 = tail call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c) #2
|
||||
@ -97,7 +97,7 @@ entry:
|
||||
define <8 x float> @test8(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
|
||||
; CHECK-LABEL: test8:
|
||||
; CHECK: # BB#0: # %entry
|
||||
; CHECK-NEXT: vxorps {{.*}}(%rip), %ymm2, %ymm2
|
||||
; CHECK-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm2, %ymm2
|
||||
; CHECK-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
|
@ -1163,11 +1163,23 @@ define <4 x double> @test_v4f64_fneg_fmul(<4 x double> %x, <4 x double> %y) #0 {
|
||||
}
|
||||
|
||||
define <4 x double> @test_v4f64_fneg_fmul_no_nsz(<4 x double> %x, <4 x double> %y) #0 {
|
||||
; ALL-LABEL: test_v4f64_fneg_fmul_no_nsz:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vmulpd %ymm1, %ymm0, %ymm0
|
||||
; ALL-NEXT: vxorpd {{.*}}(%rip), %ymm0, %ymm0
|
||||
; ALL-NEXT: retq
|
||||
; FMA-LABEL: test_v4f64_fneg_fmul_no_nsz:
|
||||
; FMA: # BB#0:
|
||||
; FMA-NEXT: vmulpd %ymm1, %ymm0, %ymm0
|
||||
; FMA-NEXT: vxorpd {{.*}}(%rip), %ymm0, %ymm0
|
||||
; FMA-NEXT: retq
|
||||
;
|
||||
; FMA4-LABEL: test_v4f64_fneg_fmul_no_nsz:
|
||||
; FMA4: # BB#0:
|
||||
; FMA4-NEXT: vmulpd %ymm1, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vxorpd {{.*}}(%rip), %ymm0, %ymm0
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v4f64_fneg_fmul_no_nsz:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vmulpd %ymm1, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vxorpd {{.*}}(%rip){1to4}, %ymm0, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
%m = fmul <4 x double> %x, %y
|
||||
%n = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %m
|
||||
ret <4 x double> %n
|
||||
|
@ -787,7 +787,7 @@ define <8 x double> @test_v8f64_fneg_fmul_no_nsz(<8 x double> %x, <8 x double> %
|
||||
; FMA: # BB#0:
|
||||
; FMA-NEXT: vmulpd %ymm3, %ymm1, %ymm1
|
||||
; FMA-NEXT: vmulpd %ymm2, %ymm0, %ymm0
|
||||
; FMA-NEXT: vmovapd {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
|
||||
; FMA-NEXT: vmovapd {{.*#+}} ymm2 = [-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00]
|
||||
; FMA-NEXT: vxorpd %ymm2, %ymm0, %ymm0
|
||||
; FMA-NEXT: vxorpd %ymm2, %ymm1, %ymm1
|
||||
; FMA-NEXT: retq
|
||||
@ -796,7 +796,7 @@ define <8 x double> @test_v8f64_fneg_fmul_no_nsz(<8 x double> %x, <8 x double> %
|
||||
; FMA4: # BB#0:
|
||||
; FMA4-NEXT: vmulpd %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NEXT: vmulpd %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vmovapd {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
|
||||
; FMA4-NEXT: vmovapd {{.*#+}} ymm2 = [-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00]
|
||||
; FMA4-NEXT: vxorpd %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vxorpd %ymm2, %ymm1, %ymm1
|
||||
; FMA4-NEXT: retq
|
||||
@ -804,7 +804,7 @@ define <8 x double> @test_v8f64_fneg_fmul_no_nsz(<8 x double> %x, <8 x double> %
|
||||
; AVX512-LABEL: test_v8f64_fneg_fmul_no_nsz:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0
|
||||
; AVX512-NEXT: vxorpd {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512-NEXT: vxorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
|
||||
; AVX512-NEXT: retq
|
||||
%m = fmul <8 x double> %x, %y
|
||||
%n = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %m
|
||||
|
@ -15,7 +15,7 @@ define void @foo(%struct.anon* byval %p) nounwind {
|
||||
; CHECK-LABEL: foo:
|
||||
; CHECK: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: movaps {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
|
||||
; CHECK-NEXT: movaps {{.*#+}} xmm2 = [-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00]
|
||||
; CHECK-NEXT: xorps %xmm2, %xmm0
|
||||
; CHECK-NEXT: cvtss2sd %xmm0, %xmm0
|
||||
; CHECK-NEXT: xorps %xmm2, %xmm1
|
||||
|
@ -22,45 +22,111 @@ define <2 x double> @fabs_v2f64(<2 x double> %p) {
|
||||
declare <2 x double> @llvm.fabs.v2f64(<2 x double> %p)
|
||||
|
||||
define <4 x float> @fabs_v4f32(<4 x float> %p) {
|
||||
; X32-LABEL: fabs_v4f32:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0
|
||||
; X32-NEXT: retl
|
||||
; X32_AVX-LABEL: fabs_v4f32:
|
||||
; X32_AVX: # BB#0:
|
||||
; X32_AVX-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0
|
||||
; X32_AVX-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: fabs_v4f32:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64-NEXT: retq
|
||||
; X32_AVX512VL-LABEL: fabs_v4f32:
|
||||
; X32_AVX512VL: # BB#0:
|
||||
; X32_AVX512VL-NEXT: vbroadcastss {{\.LCPI.*}}, %xmm1
|
||||
; X32_AVX512VL-NEXT: vandps %xmm1, %xmm0, %xmm0
|
||||
; X32_AVX512VL-NEXT: retl
|
||||
;
|
||||
; X32_AVX512VLDQ-LABEL: fabs_v4f32:
|
||||
; X32_AVX512VLDQ: # BB#0:
|
||||
; X32_AVX512VLDQ-NEXT: vandps {{\.LCPI.*}}{1to4}, %xmm0, %xmm0
|
||||
; X32_AVX512VLDQ-NEXT: retl
|
||||
;
|
||||
; X64_AVX-LABEL: fabs_v4f32:
|
||||
; X64_AVX: # BB#0:
|
||||
; X64_AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64_AVX-NEXT: retq
|
||||
;
|
||||
; X64_AVX512VL-LABEL: fabs_v4f32:
|
||||
; X64_AVX512VL: # BB#0:
|
||||
; X64_AVX512VL-NEXT: vbroadcastss {{.*}}(%rip), %xmm1
|
||||
; X64_AVX512VL-NEXT: vandps %xmm1, %xmm0, %xmm0
|
||||
; X64_AVX512VL-NEXT: retq
|
||||
;
|
||||
; X64_AVX512VLDQ-LABEL: fabs_v4f32:
|
||||
; X64_AVX512VLDQ: # BB#0:
|
||||
; X64_AVX512VLDQ-NEXT: vandps {{.*}}(%rip){1to4}, %xmm0, %xmm0
|
||||
; X64_AVX512VLDQ-NEXT: retq
|
||||
%t = call <4 x float> @llvm.fabs.v4f32(<4 x float> %p)
|
||||
ret <4 x float> %t
|
||||
}
|
||||
declare <4 x float> @llvm.fabs.v4f32(<4 x float> %p)
|
||||
|
||||
define <4 x double> @fabs_v4f64(<4 x double> %p) {
|
||||
; X32-LABEL: fabs_v4f64:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
; X32_AVX-LABEL: fabs_v4f64:
|
||||
; X32_AVX: # BB#0:
|
||||
; X32_AVX-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0
|
||||
; X32_AVX-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: fabs_v4f64:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
; X32_AVX512VL-LABEL: fabs_v4f64:
|
||||
; X32_AVX512VL: # BB#0:
|
||||
; X32_AVX512VL-NEXT: vbroadcastsd {{\.LCPI.*}}, %ymm1
|
||||
; X32_AVX512VL-NEXT: vandpd %ymm1, %ymm0, %ymm0
|
||||
; X32_AVX512VL-NEXT: retl
|
||||
;
|
||||
; X32_AVX512VLDQ-LABEL: fabs_v4f64:
|
||||
; X32_AVX512VLDQ: # BB#0:
|
||||
; X32_AVX512VLDQ-NEXT: vandpd {{\.LCPI.*}}{1to4}, %ymm0, %ymm0
|
||||
; X32_AVX512VLDQ-NEXT: retl
|
||||
;
|
||||
; X64_AVX-LABEL: fabs_v4f64:
|
||||
; X64_AVX: # BB#0:
|
||||
; X64_AVX-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
|
||||
; X64_AVX-NEXT: retq
|
||||
;
|
||||
; X64_AVX512VL-LABEL: fabs_v4f64:
|
||||
; X64_AVX512VL: # BB#0:
|
||||
; X64_AVX512VL-NEXT: vbroadcastsd {{.*}}(%rip), %ymm1
|
||||
; X64_AVX512VL-NEXT: vandpd %ymm1, %ymm0, %ymm0
|
||||
; X64_AVX512VL-NEXT: retq
|
||||
;
|
||||
; X64_AVX512VLDQ-LABEL: fabs_v4f64:
|
||||
; X64_AVX512VLDQ: # BB#0:
|
||||
; X64_AVX512VLDQ-NEXT: vandpd {{.*}}(%rip){1to4}, %ymm0, %ymm0
|
||||
; X64_AVX512VLDQ-NEXT: retq
|
||||
%t = call <4 x double> @llvm.fabs.v4f64(<4 x double> %p)
|
||||
ret <4 x double> %t
|
||||
}
|
||||
declare <4 x double> @llvm.fabs.v4f64(<4 x double> %p)
|
||||
|
||||
define <8 x float> @fabs_v8f32(<8 x float> %p) {
|
||||
; X32-LABEL: fabs_v8f32:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
; X32_AVX-LABEL: fabs_v8f32:
|
||||
; X32_AVX: # BB#0:
|
||||
; X32_AVX-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0
|
||||
; X32_AVX-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: fabs_v8f32:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
; X32_AVX512VL-LABEL: fabs_v8f32:
|
||||
; X32_AVX512VL: # BB#0:
|
||||
; X32_AVX512VL-NEXT: vbroadcastss {{\.LCPI.*}}, %ymm1
|
||||
; X32_AVX512VL-NEXT: vandps %ymm1, %ymm0, %ymm0
|
||||
; X32_AVX512VL-NEXT: retl
|
||||
;
|
||||
; X32_AVX512VLDQ-LABEL: fabs_v8f32:
|
||||
; X32_AVX512VLDQ: # BB#0:
|
||||
; X32_AVX512VLDQ-NEXT: vandps {{\.LCPI.*}}{1to8}, %ymm0, %ymm0
|
||||
; X32_AVX512VLDQ-NEXT: retl
|
||||
;
|
||||
; X64_AVX-LABEL: fabs_v8f32:
|
||||
; X64_AVX: # BB#0:
|
||||
; X64_AVX-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
|
||||
; X64_AVX-NEXT: retq
|
||||
;
|
||||
; X64_AVX512VL-LABEL: fabs_v8f32:
|
||||
; X64_AVX512VL: # BB#0:
|
||||
; X64_AVX512VL-NEXT: vbroadcastss {{.*}}(%rip), %ymm1
|
||||
; X64_AVX512VL-NEXT: vandps %ymm1, %ymm0, %ymm0
|
||||
; X64_AVX512VL-NEXT: retq
|
||||
;
|
||||
; X64_AVX512VLDQ-LABEL: fabs_v8f32:
|
||||
; X64_AVX512VLDQ: # BB#0:
|
||||
; X64_AVX512VLDQ-NEXT: vandps {{.*}}(%rip){1to8}, %ymm0, %ymm0
|
||||
; X64_AVX512VLDQ-NEXT: retq
|
||||
%t = call <8 x float> @llvm.fabs.v8f32(<8 x float> %p)
|
||||
ret <8 x float> %t
|
||||
}
|
||||
@ -69,36 +135,38 @@ declare <8 x float> @llvm.fabs.v8f32(<8 x float> %p)
|
||||
define <8 x double> @fabs_v8f64(<8 x double> %p) {
|
||||
; X32_AVX-LABEL: fabs_v8f64:
|
||||
; X32_AVX: # BB#0:
|
||||
; X32_AVX-NEXT: vmovaps {{.*#+}} ymm2 = [9223372036854775807,9223372036854775807,9223372036854775807,9223372036854775807]
|
||||
; X32_AVX-NEXT: vmovaps {{.*#+}} ymm2 = [nan,nan,nan,nan]
|
||||
; X32_AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
|
||||
; X32_AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
|
||||
; X32_AVX-NEXT: retl
|
||||
;
|
||||
; X32_AVX512VL-LABEL: fabs_v8f64:
|
||||
; X32_AVX512VL: # BB#0:
|
||||
; X32_AVX512VL-NEXT: vpandq {{\.LCPI.*}}, %zmm0, %zmm0
|
||||
; X32_AVX512VL-NEXT: vbroadcastsd {{\.LCPI.*}}, %zmm1
|
||||
; X32_AVX512VL-NEXT: vpandq %zmm1, %zmm0, %zmm0
|
||||
; X32_AVX512VL-NEXT: retl
|
||||
;
|
||||
; X32_AVX512VLDQ-LABEL: fabs_v8f64:
|
||||
; X32_AVX512VLDQ: # BB#0:
|
||||
; X32_AVX512VLDQ-NEXT: vandps {{\.LCPI.*}}, %zmm0, %zmm0
|
||||
; X32_AVX512VLDQ-NEXT: vandpd {{\.LCPI.*}}{1to8}, %zmm0, %zmm0
|
||||
; X32_AVX512VLDQ-NEXT: retl
|
||||
;
|
||||
; X64_AVX-LABEL: fabs_v8f64:
|
||||
; X64_AVX: # BB#0:
|
||||
; X64_AVX-NEXT: vmovaps {{.*#+}} ymm2 = [9223372036854775807,9223372036854775807,9223372036854775807,9223372036854775807]
|
||||
; X64_AVX-NEXT: vmovaps {{.*#+}} ymm2 = [nan,nan,nan,nan]
|
||||
; X64_AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
|
||||
; X64_AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
|
||||
; X64_AVX-NEXT: retq
|
||||
;
|
||||
; X64_AVX512VL-LABEL: fabs_v8f64:
|
||||
; X64_AVX512VL: # BB#0:
|
||||
; X64_AVX512VL-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
|
||||
; X64_AVX512VL-NEXT: vbroadcastsd {{.*}}(%rip), %zmm1
|
||||
; X64_AVX512VL-NEXT: vpandq %zmm1, %zmm0, %zmm0
|
||||
; X64_AVX512VL-NEXT: retq
|
||||
;
|
||||
; X64_AVX512VLDQ-LABEL: fabs_v8f64:
|
||||
; X64_AVX512VLDQ: # BB#0:
|
||||
; X64_AVX512VLDQ-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0
|
||||
; X64_AVX512VLDQ-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
|
||||
; X64_AVX512VLDQ-NEXT: retq
|
||||
%t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
|
||||
ret <8 x double> %t
|
||||
@ -108,36 +176,38 @@ declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
|
||||
define <16 x float> @fabs_v16f32(<16 x float> %p) {
|
||||
; X32_AVX-LABEL: fabs_v16f32:
|
||||
; X32_AVX: # BB#0:
|
||||
; X32_AVX-NEXT: vmovaps {{.*#+}} ymm2 = [2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647]
|
||||
; X32_AVX-NEXT: vmovaps {{.*#+}} ymm2 = [nan,nan,nan,nan,nan,nan,nan,nan]
|
||||
; X32_AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
|
||||
; X32_AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
|
||||
; X32_AVX-NEXT: retl
|
||||
;
|
||||
; X32_AVX512VL-LABEL: fabs_v16f32:
|
||||
; X32_AVX512VL: # BB#0:
|
||||
; X32_AVX512VL-NEXT: vpandq {{\.LCPI.*}}, %zmm0, %zmm0
|
||||
; X32_AVX512VL-NEXT: vbroadcastss {{\.LCPI.*}}, %zmm1
|
||||
; X32_AVX512VL-NEXT: vpandq %zmm1, %zmm0, %zmm0
|
||||
; X32_AVX512VL-NEXT: retl
|
||||
;
|
||||
; X32_AVX512VLDQ-LABEL: fabs_v16f32:
|
||||
; X32_AVX512VLDQ: # BB#0:
|
||||
; X32_AVX512VLDQ-NEXT: vandps {{\.LCPI.*}}, %zmm0, %zmm0
|
||||
; X32_AVX512VLDQ-NEXT: vandps {{\.LCPI.*}}{1to16}, %zmm0, %zmm0
|
||||
; X32_AVX512VLDQ-NEXT: retl
|
||||
;
|
||||
; X64_AVX-LABEL: fabs_v16f32:
|
||||
; X64_AVX: # BB#0:
|
||||
; X64_AVX-NEXT: vmovaps {{.*#+}} ymm2 = [2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647]
|
||||
; X64_AVX-NEXT: vmovaps {{.*#+}} ymm2 = [nan,nan,nan,nan,nan,nan,nan,nan]
|
||||
; X64_AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
|
||||
; X64_AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
|
||||
; X64_AVX-NEXT: retq
|
||||
;
|
||||
; X64_AVX512VL-LABEL: fabs_v16f32:
|
||||
; X64_AVX512VL: # BB#0:
|
||||
; X64_AVX512VL-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
|
||||
; X64_AVX512VL-NEXT: vbroadcastss {{.*}}(%rip), %zmm1
|
||||
; X64_AVX512VL-NEXT: vpandq %zmm1, %zmm0, %zmm0
|
||||
; X64_AVX512VL-NEXT: retq
|
||||
;
|
||||
; X64_AVX512VLDQ-LABEL: fabs_v16f32:
|
||||
; X64_AVX512VLDQ: # BB#0:
|
||||
; X64_AVX512VLDQ-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0
|
||||
; X64_AVX512VLDQ-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
||||
; X64_AVX512VLDQ-NEXT: retq
|
||||
%t = call <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
|
||||
ret <16 x float> %t
|
||||
|
Loading…
x
Reference in New Issue
Block a user