mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-12 05:56:28 +00:00
[X86] VRNDSCALE* folding from masked and scalar ffloor and fceil patterns
This patch handles back-end folding of generic patterns created by lowering the X86 rounding intrinsics to native IR in cases where the instruction isn't a straightforward packed values rounding operation, but a masked operation or a scalar operation. Differential Revision: https://reviews.llvm.org/D45203 llvm-svn: 335037
This commit is contained in:
parent
8024a33ae9
commit
23c5041b95
@ -39121,9 +39121,31 @@ static SDValue combineScalarToVector(SDNode *N, SelectionDAG &DAG) {
|
||||
// TODO: SimplifyDemandedBits instead?
|
||||
if (VT == MVT::v1i1 && Src.getOpcode() == ISD::AND && Src.hasOneUse())
|
||||
if (auto *C = dyn_cast<ConstantSDNode>(Src.getOperand(1)))
|
||||
if (C->getAPIntValue().isOneValue())
|
||||
return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), MVT::v1i1,
|
||||
Src.getOperand(0));
|
||||
if (C->getAPIntValue().isOneValue()) {
|
||||
SDValue Mask = Src.getOperand(0);
|
||||
if (Mask.getOpcode() == ISD::TRUNCATE &&
|
||||
Mask.getOperand(0).getValueType() != MVT::i16)
|
||||
Mask = Mask.getOperand(0);
|
||||
return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), MVT::v1i1, Mask);
|
||||
}
|
||||
|
||||
// The result of AND may also be truncated. This occurs in code for lowered
|
||||
// masked scalar intrinsics.
|
||||
if (VT == MVT::v1i1 && Src.getOpcode() == ISD::TRUNCATE && Src.hasOneUse() &&
|
||||
Src.getOperand(0).getOpcode() == ISD::AND &&
|
||||
Src.getOperand(0).hasOneUse())
|
||||
if (auto *C = dyn_cast<ConstantSDNode>(Src.getOperand(0).getOperand(1)))
|
||||
if (C->getAPIntValue().isOneValue()) {
|
||||
SDValue Mask = Src.getOperand(0).getOperand(0);
|
||||
if (Mask.getOpcode() == ISD::TRUNCATE &&
|
||||
Mask.getOperand(0).getValueType() != MVT::i16)
|
||||
Mask = Mask.getOperand(0);
|
||||
// Check if the initial value is an i16. scalar_to_vector fails to
|
||||
// select for that type, so the combine should be aborted.
|
||||
if (Mask.getValueType() == MVT::i16)
|
||||
return SDValue();
|
||||
return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), MVT::v1i1, Mask);
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
@ -8781,16 +8781,50 @@ multiclass avx512_masked_scalar_imm<SDNode OpNode, string OpcPrefix, SDNode Move
|
||||
def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask,
|
||||
(OpNode (extractelt _.VT:$src2, (iPTR 0))),
|
||||
(extractelt _.VT:$dst, (iPTR 0))))),
|
||||
(!cast<Instruction>("V"#OpcPrefix#r_Intk)
|
||||
(!cast<Instruction>("V"#OpcPrefix#Zr_Intk)
|
||||
_.VT:$dst, OutMask, _.VT:$src1, _.VT:$src2, (i32 ImmV))>;
|
||||
|
||||
def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask,
|
||||
(OpNode (extractelt _.VT:$src2, (iPTR 0))), ZeroFP))),
|
||||
(!cast<Instruction>("V"#OpcPrefix#r_Intkz)
|
||||
(!cast<Instruction>("V"#OpcPrefix#Zr_Intkz)
|
||||
OutMask, _.VT:$src1, _.VT:$src2, (i32 ImmV))>;
|
||||
}
|
||||
}
|
||||
|
||||
defm : avx512_masked_scalar_imm<ffloor, "RNDSCALESS", X86Movss,
|
||||
(v1i1 (scalar_to_vector GR32:$mask)),
|
||||
v4f32x_info, fp32imm0, 0x01,
|
||||
(COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
|
||||
defm : avx512_masked_scalar_imm<ffloor, "RNDSCALESS", X86Movss,
|
||||
(v1i1 (scalar_to_vector GR8:$mask)),
|
||||
v4f32x_info, fp32imm0, 0x01,
|
||||
(COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
|
||||
defm : avx512_masked_scalar_imm<fceil, "RNDSCALESS", X86Movss,
|
||||
(v1i1 (scalar_to_vector GR32:$mask)),
|
||||
v4f32x_info, fp32imm0, 0x02,
|
||||
(COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
|
||||
defm : avx512_masked_scalar_imm<fceil, "RNDSCALESS", X86Movss,
|
||||
(v1i1 (scalar_to_vector GR8:$mask)),
|
||||
v4f32x_info, fp32imm0, 0x02,
|
||||
(COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
|
||||
defm : avx512_masked_scalar_imm<ffloor, "RNDSCALESD", X86Movsd,
|
||||
(v1i1 (scalar_to_vector GR32:$mask)),
|
||||
v2f64x_info, fp64imm0, 0x01,
|
||||
(COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
|
||||
defm : avx512_masked_scalar_imm<ffloor, "RNDSCALESD", X86Movsd,
|
||||
(v1i1 (scalar_to_vector GR8:$mask)),
|
||||
v2f64x_info, fp64imm0, 0x01,
|
||||
(COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
|
||||
defm : avx512_masked_scalar_imm<fceil, "RNDSCALESD", X86Movsd,
|
||||
(v1i1 (scalar_to_vector GR32:$mask)),
|
||||
v2f64x_info, fp64imm0, 0x02,
|
||||
(COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
|
||||
defm : avx512_masked_scalar_imm<fceil, "RNDSCALESD", X86Movsd,
|
||||
(v1i1 (scalar_to_vector GR8:$mask)),
|
||||
v2f64x_info, fp64imm0, 0x02,
|
||||
(COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
|
||||
|
||||
|
||||
//-------------------------------------------------
|
||||
// Integer truncate and extend operations
|
||||
//-------------------------------------------------
|
||||
@ -9936,10 +9970,18 @@ defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
|
||||
let Predicates = [HasAVX512] in {
|
||||
def : Pat<(v16f32 (ffloor VR512:$src)),
|
||||
(VRNDSCALEPSZrri VR512:$src, (i32 0x9))>;
|
||||
def : Pat<(v16f32 (vselect VK16WM:$mask, (ffloor VR512:$src), VR512:$dst)),
|
||||
(VRNDSCALEPSZrrik VR512:$dst, VK16WM:$mask, VR512:$src, (i32 0x9))>;
|
||||
def : Pat<(v16f32 (vselect VK16WM:$mask, (ffloor VR512:$src), v16f32_info.ImmAllZerosV)),
|
||||
(VRNDSCALEPSZrrikz VK16WM:$mask, VR512:$src, (i32 0x9))>;
|
||||
def : Pat<(v16f32 (fnearbyint VR512:$src)),
|
||||
(VRNDSCALEPSZrri VR512:$src, (i32 0xC))>;
|
||||
def : Pat<(v16f32 (fceil VR512:$src)),
|
||||
(VRNDSCALEPSZrri VR512:$src, (i32 0xA))>;
|
||||
def : Pat<(v16f32 (vselect VK16WM:$mask, (fceil VR512:$src), VR512:$dst)),
|
||||
(VRNDSCALEPSZrrik VR512:$dst, VK16WM:$mask, VR512:$src, (i32 0xA))>;
|
||||
def : Pat<(v16f32 (vselect VK16WM:$mask, (fceil VR512:$src), v16f32_info.ImmAllZerosV)),
|
||||
(VRNDSCALEPSZrrikz VK16WM:$mask, VR512:$src, (i32 0xA))>;
|
||||
def : Pat<(v16f32 (frint VR512:$src)),
|
||||
(VRNDSCALEPSZrri VR512:$src, (i32 0x4))>;
|
||||
def : Pat<(v16f32 (ftrunc VR512:$src)),
|
||||
@ -9958,10 +10000,18 @@ def : Pat<(v16f32 (ftrunc (loadv16f32 addr:$src))),
|
||||
|
||||
def : Pat<(v8f64 (ffloor VR512:$src)),
|
||||
(VRNDSCALEPDZrri VR512:$src, (i32 0x9))>;
|
||||
def : Pat<(v8f64 (vselect VK8WM:$mask, (ffloor VR512:$src), VR512:$dst)),
|
||||
(VRNDSCALEPDZrrik VR512:$dst, VK8WM:$mask, VR512:$src, (i32 0x9))>;
|
||||
def : Pat<(v8f64 (vselect VK8WM:$mask, (ffloor VR512:$src), v8f64_info.ImmAllZerosV)),
|
||||
(VRNDSCALEPDZrrikz VK8WM:$mask, VR512:$src, (i32 0x9))>;
|
||||
def : Pat<(v8f64 (fnearbyint VR512:$src)),
|
||||
(VRNDSCALEPDZrri VR512:$src, (i32 0xC))>;
|
||||
def : Pat<(v8f64 (fceil VR512:$src)),
|
||||
(VRNDSCALEPDZrri VR512:$src, (i32 0xA))>;
|
||||
def : Pat<(v8f64 (vselect VK8WM:$mask, (fceil VR512:$src), VR512:$dst)),
|
||||
(VRNDSCALEPDZrrik VR512:$dst, VK8WM:$mask, VR512:$src, (i32 0xA))>;
|
||||
def : Pat<(v8f64 (vselect VK8WM:$mask, (fceil VR512:$src), v8f64_info.ImmAllZerosV)),
|
||||
(VRNDSCALEPDZrrikz VK8WM:$mask, VR512:$src, (i32 0xA))>;
|
||||
def : Pat<(v8f64 (frint VR512:$src)),
|
||||
(VRNDSCALEPDZrri VR512:$src, (i32 0x4))>;
|
||||
def : Pat<(v8f64 (ftrunc VR512:$src)),
|
||||
@ -9982,10 +10032,18 @@ def : Pat<(v8f64 (ftrunc (loadv8f64 addr:$src))),
|
||||
let Predicates = [HasVLX] in {
|
||||
def : Pat<(v4f32 (ffloor VR128X:$src)),
|
||||
(VRNDSCALEPSZ128rri VR128X:$src, (i32 0x9))>;
|
||||
def : Pat<(v4f32 (vselect VK4WM:$mask, (ffloor VR128X:$src), VR128X:$dst)),
|
||||
(VRNDSCALEPSZ128rrik VR128X:$dst, VK4WM:$mask, VR128X:$src, (i32 0x9))>;
|
||||
def : Pat<(v4f32 (vselect VK4WM:$mask, (ffloor VR128X:$src), v4f32x_info.ImmAllZerosV)),
|
||||
(VRNDSCALEPSZ128rrikz VK4WM:$mask, VR128X:$src, (i32 0x9))>;
|
||||
def : Pat<(v4f32 (fnearbyint VR128X:$src)),
|
||||
(VRNDSCALEPSZ128rri VR128X:$src, (i32 0xC))>;
|
||||
def : Pat<(v4f32 (fceil VR128X:$src)),
|
||||
(VRNDSCALEPSZ128rri VR128X:$src, (i32 0xA))>;
|
||||
def : Pat<(v4f32 (vselect VK4WM:$mask, (fceil VR128X:$src), VR128X:$dst)),
|
||||
(VRNDSCALEPSZ128rrik VR128X:$dst, VK4WM:$mask, VR128X:$src, (i32 0xA))>;
|
||||
def : Pat<(v4f32 (vselect VK4WM:$mask, (fceil VR128X:$src), v4f32x_info.ImmAllZerosV)),
|
||||
(VRNDSCALEPSZ128rrikz VK4WM:$mask, VR128X:$src, (i32 0xA))>;
|
||||
def : Pat<(v4f32 (frint VR128X:$src)),
|
||||
(VRNDSCALEPSZ128rri VR128X:$src, (i32 0x4))>;
|
||||
def : Pat<(v4f32 (ftrunc VR128X:$src)),
|
||||
@ -10004,10 +10062,18 @@ def : Pat<(v4f32 (ftrunc (loadv4f32 addr:$src))),
|
||||
|
||||
def : Pat<(v2f64 (ffloor VR128X:$src)),
|
||||
(VRNDSCALEPDZ128rri VR128X:$src, (i32 0x9))>;
|
||||
def : Pat<(v2f64 (vselect VK2WM:$mask, (ffloor VR128X:$src), VR128X:$dst)),
|
||||
(VRNDSCALEPDZ128rrik VR128X:$dst, VK2WM:$mask, VR128X:$src, (i32 0x9))>;
|
||||
def : Pat<(v2f64 (vselect VK2WM:$mask, (ffloor VR128X:$src), v2f64x_info.ImmAllZerosV)),
|
||||
(VRNDSCALEPDZ128rrikz VK2WM:$mask, VR128X:$src, (i32 0x9))>;
|
||||
def : Pat<(v2f64 (fnearbyint VR128X:$src)),
|
||||
(VRNDSCALEPDZ128rri VR128X:$src, (i32 0xC))>;
|
||||
def : Pat<(v2f64 (fceil VR128X:$src)),
|
||||
(VRNDSCALEPDZ128rri VR128X:$src, (i32 0xA))>;
|
||||
def : Pat<(v2f64 (vselect VK2WM:$mask, (fceil VR128X:$src), VR128X:$dst)),
|
||||
(VRNDSCALEPDZ128rrik VR128X:$dst, VK2WM:$mask, VR128X:$src, (i32 0xA))>;
|
||||
def : Pat<(v2f64 (vselect VK2WM:$mask, (fceil VR128X:$src), v2f64x_info.ImmAllZerosV)),
|
||||
(VRNDSCALEPDZ128rrikz VK2WM:$mask, VR128X:$src, (i32 0xA))>;
|
||||
def : Pat<(v2f64 (frint VR128X:$src)),
|
||||
(VRNDSCALEPDZ128rri VR128X:$src, (i32 0x4))>;
|
||||
def : Pat<(v2f64 (ftrunc VR128X:$src)),
|
||||
@ -10026,10 +10092,18 @@ def : Pat<(v2f64 (ftrunc (loadv2f64 addr:$src))),
|
||||
|
||||
def : Pat<(v8f32 (ffloor VR256X:$src)),
|
||||
(VRNDSCALEPSZ256rri VR256X:$src, (i32 0x9))>;
|
||||
def : Pat<(v8f32 (vselect VK8WM:$mask, (ffloor VR256X:$src), VR256X:$dst)),
|
||||
(VRNDSCALEPSZ256rrik VR256X:$dst, VK8WM:$mask, VR256X:$src, (i32 0x9))>;
|
||||
def : Pat<(v8f32 (vselect VK8WM:$mask, (ffloor VR256X:$src), v8f32x_info.ImmAllZerosV)),
|
||||
(VRNDSCALEPSZ256rrikz VK8WM:$mask, VR256X:$src, (i32 0x9))>;
|
||||
def : Pat<(v8f32 (fnearbyint VR256X:$src)),
|
||||
(VRNDSCALEPSZ256rri VR256X:$src, (i32 0xC))>;
|
||||
def : Pat<(v8f32 (fceil VR256X:$src)),
|
||||
(VRNDSCALEPSZ256rri VR256X:$src, (i32 0xA))>;
|
||||
def : Pat<(v8f32 (vselect VK8WM:$mask, (fceil VR256X:$src), VR256X:$dst)),
|
||||
(VRNDSCALEPSZ256rrik VR256X:$dst, VK8WM:$mask, VR256X:$src, (i32 0xA))>;
|
||||
def : Pat<(v8f32 (vselect VK8WM:$mask, (fceil VR256X:$src), v8f32x_info.ImmAllZerosV)),
|
||||
(VRNDSCALEPSZ256rrikz VK8WM:$mask, VR256X:$src, (i32 0xA))>;
|
||||
def : Pat<(v8f32 (frint VR256X:$src)),
|
||||
(VRNDSCALEPSZ256rri VR256X:$src, (i32 0x4))>;
|
||||
def : Pat<(v8f32 (ftrunc VR256X:$src)),
|
||||
@ -10048,10 +10122,18 @@ def : Pat<(v8f32 (ftrunc (loadv8f32 addr:$src))),
|
||||
|
||||
def : Pat<(v4f64 (ffloor VR256X:$src)),
|
||||
(VRNDSCALEPDZ256rri VR256X:$src, (i32 0x9))>;
|
||||
def : Pat<(v4f64 (vselect VK4WM:$mask, (ffloor VR256X:$src), VR256X:$dst)),
|
||||
(VRNDSCALEPDZ256rrik VR256X:$dst, VK4WM:$mask, VR256X:$src, (i32 0x9))>;
|
||||
def : Pat<(v4f64 (vselect VK4WM:$mask, (ffloor VR256X:$src), v4f64x_info.ImmAllZerosV)),
|
||||
(VRNDSCALEPDZ256rrikz VK4WM:$mask, VR256X:$src, (i32 0x9))>;
|
||||
def : Pat<(v4f64 (fnearbyint VR256X:$src)),
|
||||
(VRNDSCALEPDZ256rri VR256X:$src, (i32 0xC))>;
|
||||
def : Pat<(v4f64 (fceil VR256X:$src)),
|
||||
(VRNDSCALEPDZ256rri VR256X:$src, (i32 0xA))>;
|
||||
def : Pat<(v4f64 (vselect VK4WM:$mask, (fceil VR256X:$src), VR256X:$dst)),
|
||||
(VRNDSCALEPDZ256rrik VR256X:$dst, VK4WM:$mask, VR256X:$src, (i32 0xA))>;
|
||||
def : Pat<(v4f64 (vselect VK4WM:$mask, (fceil VR256X:$src), v4f64x_info.ImmAllZerosV)),
|
||||
(VRNDSCALEPDZ256rrikz VK4WM:$mask, VR256X:$src, (i32 0xA))>;
|
||||
def : Pat<(v4f64 (frint VR256X:$src)),
|
||||
(VRNDSCALEPDZ256rri VR256X:$src, (i32 0x4))>;
|
||||
def : Pat<(v4f64 (ftrunc VR256X:$src)),
|
||||
|
@ -5944,6 +5944,15 @@ let Predicates = [UseSSE41] in {
|
||||
(ROUNDPDm addr:$src, (i32 0xB))>;
|
||||
}
|
||||
|
||||
defm : scalar_unary_math_imm_patterns<ffloor, "ROUNDSS", X86Movss,
|
||||
v4f32, 0x01, UseSSE41>;
|
||||
defm : scalar_unary_math_imm_patterns<fceil, "ROUNDSS", X86Movss,
|
||||
v4f32, 0x02, UseSSE41>;
|
||||
defm : scalar_unary_math_imm_patterns<ffloor, "ROUNDSD", X86Movsd,
|
||||
v2f64, 0x01, UseSSE41>;
|
||||
defm : scalar_unary_math_imm_patterns<fceil, "ROUNDSD", X86Movsd,
|
||||
v2f64, 0x02, UseSSE41>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SSE4.1 - Packed Bit Test
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user