mirror of
https://github.com/RPCS3/llvm.git
synced 2025-02-03 17:24:24 +00:00
Separate the check for blend shuffle_vector masks
Summary: Separate the check for blend shuffle_vector masks into isBlendMask. This function will also be used to check if a vector shuffle is legal. No change in functionality was intended, but we ended up improving codegen on two tests, which were being (more) optimized only if the resulting shuffle was legal. Reviewers: nadav, delena, andreadb Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D3964 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209923 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
6126c55782
commit
94141a42ed
@ -6429,38 +6429,30 @@ static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
|
||||
return LowerAVXCONCAT_VECTORS(Op, DAG);
|
||||
}
|
||||
|
||||
// Try to lower a shuffle node into a simple blend instruction.
|
||||
static SDValue
|
||||
LowerVECTOR_SHUFFLEtoBlend(ShuffleVectorSDNode *SVOp,
|
||||
const X86Subtarget *Subtarget, SelectionDAG &DAG) {
|
||||
SDValue V1 = SVOp->getOperand(0);
|
||||
SDValue V2 = SVOp->getOperand(1);
|
||||
SDLoc dl(SVOp);
|
||||
MVT VT = SVOp->getSimpleValueType(0);
|
||||
static bool isBlendMask(ArrayRef<int> MaskVals, MVT VT, bool hasSSE41,
|
||||
bool hasInt256, unsigned *MaskOut = nullptr) {
|
||||
MVT EltVT = VT.getVectorElementType();
|
||||
unsigned NumElems = VT.getVectorNumElements();
|
||||
|
||||
// There is no blend with immediate in AVX-512.
|
||||
if (VT.is512BitVector())
|
||||
return SDValue();
|
||||
return false;
|
||||
|
||||
if (!Subtarget->hasSSE41() || EltVT == MVT::i8)
|
||||
return SDValue();
|
||||
if (!Subtarget->hasInt256() && VT == MVT::v16i16)
|
||||
return SDValue();
|
||||
if (!hasSSE41 || EltVT == MVT::i8)
|
||||
return false;
|
||||
if (!hasInt256 && VT == MVT::v16i16)
|
||||
return false;
|
||||
|
||||
// Check the mask for BLEND and build the value.
|
||||
unsigned MaskValue = 0;
|
||||
unsigned NumElems = VT.getVectorNumElements();
|
||||
// There are 2 lanes if (NumElems > 8), and 1 lane otherwise.
|
||||
unsigned NumLanes = (NumElems-1)/8 + 1;
|
||||
unsigned NumLanes = (NumElems - 1) / 8 + 1;
|
||||
unsigned NumElemsInLane = NumElems / NumLanes;
|
||||
|
||||
// Blend for v16i16 should be symetric for the both lanes.
|
||||
for (unsigned i = 0; i < NumElemsInLane; ++i) {
|
||||
|
||||
int SndLaneEltIdx = (NumLanes == 2) ?
|
||||
SVOp->getMaskElt(i + NumElemsInLane) : -1;
|
||||
int EltIdx = SVOp->getMaskElt(i);
|
||||
int SndLaneEltIdx = (NumLanes == 2) ? MaskVals[i + NumElemsInLane] : -1;
|
||||
int EltIdx = MaskVals[i];
|
||||
|
||||
if ((EltIdx < 0 || EltIdx == (int)i) &&
|
||||
(SndLaneEltIdx < 0 || SndLaneEltIdx == (int)(i + NumElemsInLane)))
|
||||
@ -6469,11 +6461,34 @@ LowerVECTOR_SHUFFLEtoBlend(ShuffleVectorSDNode *SVOp,
|
||||
if (((unsigned)EltIdx == (i + NumElems)) &&
|
||||
(SndLaneEltIdx < 0 ||
|
||||
(unsigned)SndLaneEltIdx == i + NumElems + NumElemsInLane))
|
||||
MaskValue |= (1<<i);
|
||||
MaskValue |= (1 << i);
|
||||
else
|
||||
return SDValue();
|
||||
return false;
|
||||
}
|
||||
|
||||
if (MaskOut)
|
||||
*MaskOut = MaskValue;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Try to lower a shuffle node into a simple blend instruction.
|
||||
// This function assumes isBlendMask returns true for this
|
||||
// SuffleVectorSDNode
|
||||
static SDValue LowerVECTOR_SHUFFLEtoBlend(ShuffleVectorSDNode *SVOp,
|
||||
unsigned MaskValue,
|
||||
const X86Subtarget *Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
MVT VT = SVOp->getSimpleValueType(0);
|
||||
MVT EltVT = VT.getVectorElementType();
|
||||
assert(isBlendMask(SVOp->getMask(), VT, Subtarget->hasSSE41(),
|
||||
Subtarget->hasInt256() && "Trying to lower a "
|
||||
"VECTOR_SHUFFLE to a Blend but "
|
||||
"with the wrong mask"));
|
||||
SDValue V1 = SVOp->getOperand(0);
|
||||
SDValue V2 = SVOp->getOperand(1);
|
||||
SDLoc dl(SVOp);
|
||||
unsigned NumElems = VT.getVectorNumElements();
|
||||
|
||||
// Convert i32 vectors to floating point if it is not AVX2.
|
||||
// AVX2 introduced VPBLENDD instruction for 128 and 256-bit vectors.
|
||||
MVT BlendVT = VT;
|
||||
@ -7910,9 +7925,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
|
||||
return getTargetShuffleNode(X86ISD::VPERM2X128, dl, VT, V1,
|
||||
V2, getShuffleVPERM2X128Immediate(SVOp), DAG);
|
||||
|
||||
SDValue BlendOp = LowerVECTOR_SHUFFLEtoBlend(SVOp, Subtarget, DAG);
|
||||
if (BlendOp.getNode())
|
||||
return BlendOp;
|
||||
unsigned MaskValue;
|
||||
if (isBlendMask(M, VT, Subtarget->hasSSE41(), Subtarget->hasInt256(),
|
||||
&MaskValue))
|
||||
return LowerVECTOR_SHUFFLEtoBlend(SVOp, MaskValue, Subtarget, DAG);
|
||||
|
||||
if (Subtarget->hasSSE41() && isINSERTPSMask(M, VT))
|
||||
return getINSERTPS(SVOp, dl, DAG);
|
||||
@ -15173,7 +15189,8 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
|
||||
isUNPCKLMask(M, SVT, Subtarget->hasInt256()) ||
|
||||
isUNPCKHMask(M, SVT, Subtarget->hasInt256()) ||
|
||||
isUNPCKL_v_undef_Mask(M, SVT, Subtarget->hasInt256()) ||
|
||||
isUNPCKH_v_undef_Mask(M, SVT, Subtarget->hasInt256()));
|
||||
isUNPCKH_v_undef_Mask(M, SVT, Subtarget->hasInt256()) ||
|
||||
isBlendMask(M, SVT, Subtarget->hasSSE41(), Subtarget->hasInt256()));
|
||||
}
|
||||
|
||||
bool
|
||||
|
@ -25,7 +25,7 @@ define <4 x i32> @test2(<4 x i32> %a, <4 x i32> %b) {
|
||||
}
|
||||
; CHECK-LABEL: test2
|
||||
; CHECK-NOT: xorps
|
||||
; CHECK: shufps
|
||||
; CHECK: movsd
|
||||
; CHECK: ret
|
||||
|
||||
|
||||
@ -111,7 +111,7 @@ define <4 x i32> @test9(<4 x i32> %a, <4 x i32> %b) {
|
||||
}
|
||||
; CHECK-LABEL: test9
|
||||
; CHECK-NOT: xorps
|
||||
; CHECK: shufps
|
||||
; CHECK: movsd
|
||||
; CHECK: ret
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user