mirror of
https://github.com/RPCS3/llvm.git
synced 2025-04-11 18:42:01 +00:00
VX-512: Fixed a bug in FP logic operation lowering
FP logic instructions are supported in DQ extension on AVX-512 target. I use integer operations instead. Added tests. I also enabled FABS in this patch in order to check ANDPS. The operations are FOR, FXOR, FAND, FANDN. The instructions, that supported for 512-bit vector under DQ are: VORPS/PD, VXORPS/PD, VANDPS/PD, FANDNPS/PD. Differential Revision: http://reviews.llvm.org/D15110 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254913 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
fad998fc36
commit
3b45f263c3
@ -1340,6 +1340,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
||||
setOperationAction(ISD::FDIV, MVT::v16f32, Legal);
|
||||
setOperationAction(ISD::FSQRT, MVT::v16f32, Legal);
|
||||
setOperationAction(ISD::FNEG, MVT::v16f32, Custom);
|
||||
setOperationAction(ISD::FABS, MVT::v16f32, Custom);
|
||||
|
||||
setOperationAction(ISD::FADD, MVT::v8f64, Legal);
|
||||
setOperationAction(ISD::FSUB, MVT::v8f64, Legal);
|
||||
@ -1347,6 +1348,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
||||
setOperationAction(ISD::FDIV, MVT::v8f64, Legal);
|
||||
setOperationAction(ISD::FSQRT, MVT::v8f64, Legal);
|
||||
setOperationAction(ISD::FNEG, MVT::v8f64, Custom);
|
||||
setOperationAction(ISD::FABS, MVT::v8f64, Custom);
|
||||
setOperationAction(ISD::FMA, MVT::v8f64, Legal);
|
||||
setOperationAction(ISD::FMA, MVT::v16f32, Legal);
|
||||
|
||||
@ -26339,6 +26341,31 @@ static SDValue PerformFNEGCombine(SDNode *N, SelectionDAG &DAG,
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
static SDValue lowerX86FPLogicOp(SDNode *N, SelectionDAG &DAG,
|
||||
const X86Subtarget *Subtarget) {
|
||||
EVT VT = N->getValueType(0);
|
||||
if (VT.is512BitVector() && !Subtarget->hasDQI()) {
|
||||
// VXORPS, VORPS, VANDPS, VANDNPS are supported only under DQ extention.
|
||||
// These logic operations may be executed in the integer domain.
|
||||
SDLoc dl(N);
|
||||
MVT IntScalar = MVT::getIntegerVT(VT.getScalarSizeInBits());
|
||||
MVT IntVT = MVT::getVectorVT(IntScalar, VT.getVectorNumElements());
|
||||
|
||||
SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, IntVT, N->getOperand(0));
|
||||
SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, IntVT, N->getOperand(1));
|
||||
unsigned IntOpcode = 0;
|
||||
switch (N->getOpcode()) {
|
||||
default: llvm_unreachable("Unexpected FP logic op");
|
||||
case X86ISD::FOR: IntOpcode = ISD::OR; break;
|
||||
case X86ISD::FXOR: IntOpcode = ISD::XOR; break;
|
||||
case X86ISD::FAND: IntOpcode = ISD::AND; break;
|
||||
case X86ISD::FANDN: IntOpcode = X86ISD::ANDNP; break;
|
||||
}
|
||||
SDValue IntOp = DAG.getNode(IntOpcode, dl, IntVT, Op0, Op1);
|
||||
return DAG.getNode(ISD::BITCAST, dl, VT, IntOp);
|
||||
}
|
||||
return SDValue();
|
||||
}
|
||||
/// Do target-specific dag combines on X86ISD::FOR and X86ISD::FXOR nodes.
|
||||
static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG,
|
||||
const X86Subtarget *Subtarget) {
|
||||
@ -26354,19 +26381,7 @@ static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG,
|
||||
if (C->getValueAPF().isPosZero())
|
||||
return N->getOperand(0);
|
||||
|
||||
EVT VT = N->getValueType(0);
|
||||
if (VT.is512BitVector() && !Subtarget->hasDQI()) {
|
||||
SDLoc dl(N);
|
||||
MVT IntScalar = MVT::getIntegerVT(VT.getScalarSizeInBits());
|
||||
MVT IntVT = MVT::getVectorVT(IntScalar, VT.getVectorNumElements());
|
||||
|
||||
SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, IntVT, N->getOperand(0));
|
||||
SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, IntVT, N->getOperand(1));
|
||||
unsigned IntOpcode = (N->getOpcode() == X86ISD::FOR) ? ISD::OR : ISD::XOR;
|
||||
SDValue IntOp = DAG.getNode(IntOpcode, dl, IntVT, Op0, Op1);
|
||||
return DAG.getNode(ISD::BITCAST, dl, VT, IntOp);
|
||||
}
|
||||
return SDValue();
|
||||
return lowerX86FPLogicOp(N, DAG, Subtarget);
|
||||
}
|
||||
|
||||
/// Do target-specific dag combines on X86ISD::FMIN and X86ISD::FMAX nodes.
|
||||
@ -26391,7 +26406,8 @@ static SDValue PerformFMinFMaxCombine(SDNode *N, SelectionDAG &DAG) {
|
||||
}
|
||||
|
||||
/// Do target-specific dag combines on X86ISD::FAND nodes.
|
||||
static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) {
|
||||
static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG,
|
||||
const X86Subtarget *Subtarget) {
|
||||
// FAND(0.0, x) -> 0.0
|
||||
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
|
||||
if (C->getValueAPF().isPosZero())
|
||||
@ -26402,11 +26418,12 @@ static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) {
|
||||
if (C->getValueAPF().isPosZero())
|
||||
return N->getOperand(1);
|
||||
|
||||
return SDValue();
|
||||
return lowerX86FPLogicOp(N, DAG, Subtarget);
|
||||
}
|
||||
|
||||
/// Do target-specific dag combines on X86ISD::FANDN nodes
|
||||
static SDValue PerformFANDNCombine(SDNode *N, SelectionDAG &DAG) {
|
||||
static SDValue PerformFANDNCombine(SDNode *N, SelectionDAG &DAG,
|
||||
const X86Subtarget *Subtarget) {
|
||||
// FANDN(0.0, x) -> x
|
||||
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
|
||||
if (C->getValueAPF().isPosZero())
|
||||
@ -26417,7 +26434,7 @@ static SDValue PerformFANDNCombine(SDNode *N, SelectionDAG &DAG) {
|
||||
if (C->getValueAPF().isPosZero())
|
||||
return N->getOperand(1);
|
||||
|
||||
return SDValue();
|
||||
return lowerX86FPLogicOp(N, DAG, Subtarget);
|
||||
}
|
||||
|
||||
static SDValue PerformBTCombine(SDNode *N,
|
||||
@ -27233,8 +27250,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
|
||||
case X86ISD::FOR: return PerformFORCombine(N, DAG, Subtarget);
|
||||
case X86ISD::FMIN:
|
||||
case X86ISD::FMAX: return PerformFMinFMaxCombine(N, DAG);
|
||||
case X86ISD::FAND: return PerformFANDCombine(N, DAG);
|
||||
case X86ISD::FANDN: return PerformFANDNCombine(N, DAG);
|
||||
case X86ISD::FAND: return PerformFANDCombine(N, DAG, Subtarget);
|
||||
case X86ISD::FANDN: return PerformFANDNCombine(N, DAG, Subtarget);
|
||||
case X86ISD::BT: return PerformBTCombine(N, DAG, DCI);
|
||||
case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG);
|
||||
case ISD::ANY_EXTEND:
|
||||
|
@ -770,6 +770,7 @@ def HasVLX : Predicate<"Subtarget->hasVLX()">,
|
||||
AssemblerPredicate<"FeatureVLX", "AVX-512 VL ISA">;
|
||||
def NoVLX : Predicate<"!Subtarget->hasVLX()">;
|
||||
def NoVLX_Or_NoBWI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasBWI()">;
|
||||
def NoVLX_Or_NoDQI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasDQI()">;
|
||||
|
||||
def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">;
|
||||
def HasAES : Predicate<"Subtarget->hasAES()">;
|
||||
|
@ -2906,7 +2906,7 @@ let isCodeGenOnly = 1 in {
|
||||
// Multiclass for vectors using the X86 logical operation aliases for FP.
|
||||
multiclass sse12_fp_packed_vector_logical_alias<
|
||||
bits<8> opc, string OpcodeStr, SDNode OpNode, OpndItins itins> {
|
||||
let Predicates = [HasAVX, NoVLX] in {
|
||||
let Predicates = [HasAVX, NoVLX_Or_NoDQI] in {
|
||||
defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
|
||||
VR128, v4f32, f128mem, loadv4f32, SSEPackedSingle, itins, 0>,
|
||||
PS, VEX_4V;
|
||||
|
@ -1,5 +1,6 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s
|
||||
|
||||
define <4 x double> @andpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
|
||||
; CHECK-LABEL: andpd256:
|
||||
|
@ -1,3 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck --check-prefix=CHECK --check-prefix=AVX512F %s
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vl | FileCheck --check-prefix=CHECK --check-prefix=AVX512VL %s
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512bw | FileCheck --check-prefix=CHECK --check-prefix=AVX512BW %s
|
||||
@ -823,3 +824,73 @@ define <16 x float> @test_fxor(<16 x float> %a) {
|
||||
ret <16 x float>%res
|
||||
}
|
||||
|
||||
define <8 x float> @test_fxor_8f32(<8 x float> %a) {
|
||||
; CHECK-LABEL: test_fxor_8f32:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vxorps {{.*}}(%rip), %ymm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
|
||||
ret <8 x float>%res
|
||||
}
|
||||
|
||||
define <8 x double> @fabs_v8f64(<8 x double> %p)
|
||||
; AVX512F-LABEL: fabs_v8f64:
|
||||
; AVX512F: ## BB#0:
|
||||
; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: fabs_v8f64:
|
||||
; AVX512VL: ## BB#0:
|
||||
; AVX512VL-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: fabs_v8f64:
|
||||
; AVX512BW: ## BB#0:
|
||||
; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: fabs_v8f64:
|
||||
; AVX512DQ: ## BB#0:
|
||||
; AVX512DQ-NEXT: vandpd {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: fabs_v8f64:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vandpd {{.*}}(%rip), %zmm0, %zmm0
|
||||
; SKX-NEXT: retq
|
||||
{
|
||||
%t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
|
||||
ret <8 x double> %t
|
||||
}
|
||||
declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
|
||||
|
||||
define <16 x float> @fabs_v16f32(<16 x float> %p)
|
||||
; AVX512F-LABEL: fabs_v16f32:
|
||||
; AVX512F: ## BB#0:
|
||||
; AVX512F-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: fabs_v16f32:
|
||||
; AVX512VL: ## BB#0:
|
||||
; AVX512VL-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: fabs_v16f32:
|
||||
; AVX512BW: ## BB#0:
|
||||
; AVX512BW-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: fabs_v16f32:
|
||||
; AVX512DQ: ## BB#0:
|
||||
; AVX512DQ-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: fabs_v16f32:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0
|
||||
; SKX-NEXT: retq
|
||||
{
|
||||
%t = call <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
|
||||
ret <16 x float> %t
|
||||
}
|
||||
declare <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s
|
||||
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s
|
||||
|
||||
define <2 x double> @fabs_v2f64(<2 x double> %p)
|
||||
{
|
||||
|
Loading…
x
Reference in New Issue
Block a user