mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-04 02:40:34 +00:00
[X86][XOP] Add support for combining target shuffles to VPERMIL2PD/VPERMIL2PS
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@278120 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
5a9fa77faf
commit
04876e5fe5
@ -3844,6 +3844,7 @@ static bool isTargetShuffleVariableMask(unsigned Opcode) {
|
||||
default: return false;
|
||||
case X86ISD::PSHUFB:
|
||||
case X86ISD::VPERMILPV:
|
||||
case X86ISD::VPERMIL2:
|
||||
case X86ISD::VPPERM:
|
||||
return true;
|
||||
}
|
||||
@ -25288,6 +25289,49 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
|
||||
return true;
|
||||
}
|
||||
|
||||
// With XOP, binary shuffles of 128/256-bit floating point vectors can combine
|
||||
// to VPERMIL2PD/VPERMIL2PS.
|
||||
if ((Depth >= 3 || HasVariableMask) && Subtarget.hasXOP() &&
|
||||
(MaskVT == MVT::v2f64 || MaskVT == MVT::v4f64 || MaskVT == MVT::v4f32 ||
|
||||
MaskVT == MVT::v8f32)) {
|
||||
// VPERMIL2 Operation.
|
||||
// Bits[3] - Match Bit.
|
||||
// Bits[2:1] - (Per Lane) PD Shuffle Mask.
|
||||
// Bits[2:0] - (Per Lane) PS Shuffle Mask.
|
||||
unsigned NumLanes = MaskVT.getSizeInBits() / 128;
|
||||
unsigned NumEltsPerLane = NumMaskElts / NumLanes;
|
||||
SmallVector<SDValue, 8> VPerm2Idx;
|
||||
MVT MaskIdxSVT = MVT::getIntegerVT(MaskVT.getScalarSizeInBits());
|
||||
MVT MaskIdxVT = MVT::getVectorVT(MaskIdxSVT, NumMaskElts);
|
||||
unsigned M2ZImm = 0;
|
||||
for (int M : Mask) {
|
||||
if (M == SM_SentinelUndef) {
|
||||
VPerm2Idx.push_back(DAG.getUNDEF(MaskIdxSVT));
|
||||
continue;
|
||||
}
|
||||
if (M == SM_SentinelZero) {
|
||||
M2ZImm = 2;
|
||||
VPerm2Idx.push_back(DAG.getConstant(8, DL, MaskIdxSVT));
|
||||
continue;
|
||||
}
|
||||
int Index = (M % NumEltsPerLane) + ((M / NumMaskElts) * NumEltsPerLane);
|
||||
Index = (MaskVT.getScalarSizeInBits() == 64 ? Index << 1 : Index);
|
||||
VPerm2Idx.push_back(DAG.getConstant(Index, DL, MaskIdxSVT));
|
||||
}
|
||||
V1 = DAG.getBitcast(MaskVT, V1);
|
||||
DCI.AddToWorklist(V1.getNode());
|
||||
V2 = DAG.getBitcast(MaskVT, V2);
|
||||
DCI.AddToWorklist(V2.getNode());
|
||||
SDValue VPerm2MaskOp = DAG.getBuildVector(MaskIdxVT, DL, VPerm2Idx);
|
||||
DCI.AddToWorklist(VPerm2MaskOp.getNode());
|
||||
Res = DAG.getNode(X86ISD::VPERMIL2, DL, MaskVT, V1, V2, VPerm2MaskOp,
|
||||
DAG.getConstant(M2ZImm, DL, MVT::i8));
|
||||
DCI.AddToWorklist(Res.getNode());
|
||||
DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
|
||||
/*AddTo*/ true);
|
||||
return true;
|
||||
}
|
||||
|
||||
// If we have 3 or more shuffle instructions or a chain involving a variable
|
||||
// mask, we can replace them with a single PSHUFB instruction profitably.
|
||||
// Intel's manuals suggest only using PSHUFB if doing so replacing 5
|
||||
|
@ -30,6 +30,16 @@ define <4 x double> @combine_vpermil2pd256_identity(<4 x double> %a0, <4 x doubl
|
||||
ret <4 x double> %res1
|
||||
}
|
||||
|
||||
define <4 x double> @combine_vpermil2pd256_0z73(<4 x double> %a0, <4 x double> %a1) {
|
||||
; CHECK-LABEL: combine_vpermil2pd256_0z73:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vpermil2pd {{.*#+}} ymm0 = ymm0[0],zero,ymm1[3],ymm0[3]
|
||||
; CHECK-NEXT: retq
|
||||
%res0 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 undef, i32 7, i32 3>
|
||||
%res1 = shufflevector <4 x double> %res0, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 7, i32 2, i32 3>
|
||||
ret <4 x double> %res1
|
||||
}
|
||||
|
||||
define <4 x float> @combine_vpermil2ps_identity(<4 x float> %a0, <4 x float> %a1) {
|
||||
; CHECK-LABEL: combine_vpermil2ps_identity:
|
||||
; CHECK: # BB#0:
|
||||
@ -40,6 +50,16 @@ define <4 x float> @combine_vpermil2ps_identity(<4 x float> %a0, <4 x float> %a1
|
||||
ret <4 x float> %res1
|
||||
}
|
||||
|
||||
define <4 x float> @combine_vpermil2ps_1z74(<4 x float> %a0, <4 x float> %a1) {
|
||||
; CHECK-LABEL: combine_vpermil2ps_1z74:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vpermil2ps {{.*#+}} xmm0 = xmm0[1],zero,xmm1[3,0]
|
||||
; CHECK-NEXT: retq
|
||||
%res0 = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 1, i32 1, i32 7, i32 4>, i8 0)
|
||||
%res1 = shufflevector <4 x float> %res0, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 7, i32 2, i32 3>
|
||||
ret <4 x float> %res1
|
||||
}
|
||||
|
||||
define <8 x float> @combine_vpermil2ps256_identity(<8 x float> %a0, <8 x float> %a1) {
|
||||
; CHECK-LABEL: combine_vpermil2ps256_identity:
|
||||
; CHECK: # BB#0:
|
||||
@ -50,6 +70,16 @@ define <8 x float> @combine_vpermil2ps256_identity(<8 x float> %a0, <8 x float>
|
||||
ret <8 x float> %res1
|
||||
}
|
||||
|
||||
define <8 x float> @combine_vpermil2ps256_08z945Az(<8 x float> %a0, <8 x float> %a1) {
|
||||
; CHECK-LABEL: combine_vpermil2ps256_08z945Az:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vpermil2ps {{.*#+}} ymm0 = ymm0[0],ymm1[0],zero,ymm1[1],ymm0[4,5],ymm1[6],zero
|
||||
; CHECK-NEXT: retq
|
||||
%res0 = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 0, i32 1, i32 6, i32 7>, i8 0)
|
||||
%res1 = shufflevector <8 x float> %res0, <8 x float> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 8, i32 3, i32 4, i32 5, i32 6, i32 8>
|
||||
ret <8 x float> %res1
|
||||
}
|
||||
|
||||
define <8 x float> @combine_vpermil2ps256_zero(<8 x float> %a0, <8 x float> %a1) {
|
||||
; CHECK-LABEL: combine_vpermil2ps256_zero:
|
||||
; CHECK: # BB#0:
|
||||
|
Loading…
Reference in New Issue
Block a user