mirror of
https://github.com/RPCSX/llvm.git
synced 2024-12-01 07:30:31 +00:00
[X86][SSE} Add INSERTPS as a target shuffle
Follow up to D15378, added INSERTPS to the list of decodable target shuffles and enabled XFormVExtractWithShuffleIntoLoad to handle target shuffles with SentinelZero and tested this with INSERTPS. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@257046 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
00e4aed86a
commit
ce13714bfc
@ -3907,6 +3907,7 @@ static bool isTargetShuffle(unsigned Opcode) {
|
||||
case X86ISD::PSHUFHW:
|
||||
case X86ISD::PSHUFLW:
|
||||
case X86ISD::SHUFP:
|
||||
case X86ISD::INSERTPS:
|
||||
case X86ISD::PALIGNR:
|
||||
case X86ISD::MOVLHPS:
|
||||
case X86ISD::MOVLHPD:
|
||||
@ -4760,6 +4761,11 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
|
||||
DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
|
||||
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
|
||||
break;
|
||||
case X86ISD::INSERTPS:
|
||||
ImmN = N->getOperand(N->getNumOperands()-1);
|
||||
DecodeINSERTPSMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
|
||||
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
|
||||
break;
|
||||
case X86ISD::UNPCKH:
|
||||
DecodeUNPCKHMask(VT, Mask);
|
||||
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
|
||||
@ -23860,6 +23866,7 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
|
||||
|
||||
SDValue InVec = N->getOperand(0);
|
||||
SDValue EltNo = N->getOperand(1);
|
||||
EVT EltVT = N->getValueType(0);
|
||||
|
||||
if (!isa<ConstantSDNode>(EltNo))
|
||||
return SDValue();
|
||||
@ -23888,14 +23895,22 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
|
||||
|
||||
SmallVector<int, 16> ShuffleMask;
|
||||
bool UnaryShuffle;
|
||||
if (!getTargetShuffleMask(InVec.getNode(), CurrentVT.getSimpleVT(),
|
||||
false, ShuffleMask, UnaryShuffle))
|
||||
if (!getTargetShuffleMask(InVec.getNode(), CurrentVT.getSimpleVT(), true,
|
||||
ShuffleMask, UnaryShuffle))
|
||||
return SDValue();
|
||||
|
||||
// Select the input vector, guarding against out of range extract vector.
|
||||
unsigned NumElems = CurrentVT.getVectorNumElements();
|
||||
int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
|
||||
int Idx = (Elt > (int)NumElems) ? SM_SentinelUndef : ShuffleMask[Elt];
|
||||
|
||||
if (Idx == SM_SentinelZero)
|
||||
return EltVT.isInteger() ? DAG.getConstant(0, SDLoc(N), EltVT)
|
||||
: DAG.getConstantFP(+0.0, SDLoc(N), EltVT);
|
||||
if (Idx == SM_SentinelUndef)
|
||||
return DAG.getUNDEF(EltVT);
|
||||
|
||||
assert(0 <= Idx && Idx < (int)(2 * NumElems) && "Shuffle index out of range");
|
||||
SDValue LdNode = (Idx < (int)NumElems) ? InVec.getOperand(0)
|
||||
: InVec.getOperand(1);
|
||||
|
||||
@ -23920,7 +23935,6 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
|
||||
if (!LN0 ||!LN0->hasNUsesOfValue(AllowedUses, 0) || LN0->isVolatile())
|
||||
return SDValue();
|
||||
|
||||
EVT EltVT = N->getValueType(0);
|
||||
// If there's a bitcast before the shuffle, check if the load type and
|
||||
// alignment is valid.
|
||||
unsigned Align = LN0->getAlignment();
|
||||
|
@ -109,3 +109,36 @@ define <4 x float> @shuffle_v4f32_0z6z(<4 x float> %A, <4 x float> %B) {
|
||||
%vecinit4 = insertelement <4 x float> %vecinit3, float 0.000000e+00, i32 3
|
||||
ret <4 x float> %vecinit4
|
||||
}
|
||||
|
||||
define float @extract_zero_insertps_z0z7(<4 x float> %a0, <4 x float> %a1) {
|
||||
; SSE-LABEL: extract_zero_insertps_z0z7:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: xorps %xmm0, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: extract_zero_insertps_z0z7:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 21)
|
||||
%ext = extractelement <4 x float> %res, i32 0
|
||||
ret float %ext
|
||||
}
|
||||
|
||||
define float @extract_lane_insertps_5123(<4 x float> %a0, <4 x float> *%p1) {
|
||||
; SSE-LABEL: extract_lane_insertps_5123:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: extract_lane_insertps_5123:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX-NEXT: retq
|
||||
%a1 = load <4 x float>, <4 x float> *%p1
|
||||
%res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 128)
|
||||
%ext = extractelement <4 x float> %res, i32 0
|
||||
ret float %ext
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
|
||||
|
Loading…
Reference in New Issue
Block a user