mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-19 11:41:53 +00:00
[X86] Custom legalize v2i32 gathers via widening rather than promoting.
The default legalization for v2i32 is promotion to v2i64. This results in a gather that reads 64-bit elements rather than 32. If one of the elements is near a page boundary this can cause an illegal access that can fault. We also miscalculate the scale for the gather which is an even worse problem, but we probably could have found a separate way to fix that. llvm-svn: 319521
This commit is contained in:
parent
8c871357e5
commit
9e447ff84f
@ -1129,6 +1129,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
||||
if (HasInt256) {
|
||||
// Custom legalize 2x32 to get a little better code.
|
||||
setOperationAction(ISD::MGATHER, MVT::v2f32, Custom);
|
||||
setOperationAction(ISD::MGATHER, MVT::v2i32, Custom);
|
||||
|
||||
for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
|
||||
MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
|
||||
@ -24288,6 +24289,10 @@ static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget,
|
||||
unsigned NumElts = VT.getVectorNumElements();
|
||||
assert(VT.getScalarSizeInBits() >= 32 && "Unsupported gather op");
|
||||
|
||||
// If the index is v2i32, we're being called by type legalization.
|
||||
if (IndexVT == MVT::v2i32)
|
||||
return SDValue();
|
||||
|
||||
if (Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() &&
|
||||
!Index.getSimpleValueType().is512BitVector()) {
|
||||
// AVX512F supports only 512-bit vectors. Or data or index should
|
||||
@ -24332,39 +24337,6 @@ static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget,
|
||||
SDValue RetOps[] = {Extract, NewGather.getValue(2)};
|
||||
return DAG.getMergeValues(RetOps, dl);
|
||||
}
|
||||
if (N->getMemoryVT() == MVT::v2i32) {
|
||||
// There is a special case when the return type is v2i32 is illegal and
|
||||
// the type legaizer extended it to v2i64. Without this conversion we end up
|
||||
// with VPGATHERQQ (reading q-words from the memory) instead of VPGATHERQD.
|
||||
// In order to avoid this situation, we'll build an X86 specific Gather node
|
||||
// with index v2i64 and value type v4i32.
|
||||
assert(VT == MVT::v2i64 && Src0.getValueType() == MVT::v2i64 &&
|
||||
"Unexpected type in masked gather");
|
||||
Src0 =
|
||||
DAG.getVectorShuffle(MVT::v4i32, dl, DAG.getBitcast(MVT::v4i32, Src0),
|
||||
DAG.getUNDEF(MVT::v4i32), { 0, 2, -1, -1 });
|
||||
// The mask should match the destination type. Extending mask with zeroes
|
||||
// is not necessary since instruction itself reads only two values from
|
||||
// memory.
|
||||
SDVTList VTList;
|
||||
if (Subtarget.hasVLX()) {
|
||||
Mask = ExtendToType(Mask, MVT::v4i1, DAG, false);
|
||||
VTList = DAG.getVTList(MVT::v4i32, MVT::v2i1, MVT::Other);
|
||||
} else {
|
||||
Mask =
|
||||
DAG.getVectorShuffle(MVT::v4i32, dl, DAG.getBitcast(MVT::v4i32, Mask),
|
||||
DAG.getUNDEF(MVT::v4i32), {0, 2, -1, -1});
|
||||
VTList = DAG.getVTList(MVT::v4i32, MVT::v4i32, MVT::Other);
|
||||
}
|
||||
SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index };
|
||||
SDValue NewGather = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>(
|
||||
VTList, Ops, dl, N->getMemoryVT(), N->getMemOperand());
|
||||
|
||||
SDValue Sext = getExtendInVec(X86ISD::VSEXT, dl, MVT::v2i64,
|
||||
NewGather.getValue(0), DAG);
|
||||
SDValue RetOps[] = { Sext, NewGather.getValue(2) };
|
||||
return DAG.getMergeValues(RetOps, dl);
|
||||
}
|
||||
|
||||
SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index };
|
||||
SDValue NewGather = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>(
|
||||
@ -24900,6 +24872,58 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
|
||||
Results.push_back(Res.getValue(2));
|
||||
return;
|
||||
}
|
||||
if (VT == MVT::v2i32) {
|
||||
auto *Gather = cast<MaskedGatherSDNode>(N);
|
||||
SDValue Index = Gather->getIndex();
|
||||
SDValue Mask = Gather->getMask();
|
||||
assert(Mask.getValueType() == MVT::v2i1 && "Unexpected mask type");
|
||||
SDValue Src0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32,
|
||||
Gather->getValue(),
|
||||
DAG.getUNDEF(MVT::v2i32));
|
||||
// If the index is v2i64 we can use it directly.
|
||||
if (Index.getValueType() == MVT::v2i64 &&
|
||||
(Subtarget.hasVLX() || !Subtarget.hasAVX512())) {
|
||||
if (!Subtarget.hasVLX()) {
|
||||
// We need to widen the mask, but the instruction will only use 2
|
||||
// of its elements. So we can use undef.
|
||||
Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Mask,
|
||||
DAG.getUNDEF(MVT::v2i1));
|
||||
Mask = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Mask);
|
||||
}
|
||||
SDValue Ops[] = { Gather->getChain(), Src0, Mask, Gather->getBasePtr(),
|
||||
Index };
|
||||
SDValue Res = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>(
|
||||
DAG.getVTList(MVT::v4i32, Mask.getValueType(), MVT::Other), Ops, dl,
|
||||
Gather->getMemoryVT(), Gather->getMemOperand());
|
||||
SDValue Chain = Res.getValue(2);
|
||||
if (!ExperimentalVectorWideningLegalization)
|
||||
Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
Results.push_back(Res);
|
||||
Results.push_back(Chain);
|
||||
return;
|
||||
}
|
||||
EVT IndexVT = Index.getValueType();
|
||||
EVT NewIndexVT = EVT::getVectorVT(*DAG.getContext(),
|
||||
IndexVT.getScalarType(), 4);
|
||||
// Otherwise we need to custom widen everything to avoid promotion.
|
||||
Index = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewIndexVT, Index,
|
||||
DAG.getUNDEF(IndexVT));
|
||||
Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Mask,
|
||||
DAG.getConstant(0, dl, MVT::v2i1));
|
||||
SDValue Ops[] = { Gather->getChain(), Src0, Mask, Gather->getBasePtr(),
|
||||
Index };
|
||||
SDValue Res = DAG.getMaskedGather(DAG.getVTList(MVT::v4i32, MVT::Other),
|
||||
Gather->getMemoryVT(), dl, Ops,
|
||||
Gather->getMemOperand());
|
||||
SDValue Chain = Res.getValue(1);
|
||||
if (!ExperimentalVectorWideningLegalization)
|
||||
Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
Results.push_back(Res);
|
||||
Results.push_back(Chain);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -9,23 +9,23 @@ declare <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %ptrs, i32 %align, <2 x i
|
||||
define <2 x i32> @masked_gather_v2i32(<2 x i32*>* %ptr, <2 x i1> %masks, <2 x i32> %passthro) {
|
||||
; X86-LABEL: masked_gather_v2i32:
|
||||
; X86: # BB#0: # %entry
|
||||
; X86-NEXT: vpsllq $63, %xmm0, %xmm0
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: vpmovsxdq (%eax), %xmm2
|
||||
; X86-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
|
||||
; X86-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
|
||||
; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; X86-NEXT: vpgatherqd %xmm0, (,%xmm2), %xmm1
|
||||
; X86-NEXT: vpmovsxdq %xmm1, %xmm0
|
||||
; X86-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
|
||||
; X86-NEXT: vpslld $31, %xmm0, %xmm0
|
||||
; X86-NEXT: vpgatherdd %xmm0, (,%xmm2), %xmm1
|
||||
; X86-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: masked_gather_v2i32:
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: vpsllq $63, %xmm0, %xmm0
|
||||
; X64-NEXT: vmovdqa (%rdi), %xmm2
|
||||
; X64-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
|
||||
; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; X64-NEXT: vpslld $31, %xmm0, %xmm0
|
||||
; X64-NEXT: vpgatherqd %xmm0, (,%xmm2), %xmm1
|
||||
; X64-NEXT: vpmovsxdq %xmm1, %xmm0
|
||||
; X64-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; NOGATHER-LABEL: masked_gather_v2i32:
|
||||
@ -59,25 +59,23 @@ entry:
|
||||
define <4 x i32> @masked_gather_v2i32_concat(<2 x i32*>* %ptr, <2 x i1> %masks, <2 x i32> %passthro) {
|
||||
; X86-LABEL: masked_gather_v2i32_concat:
|
||||
; X86: # BB#0: # %entry
|
||||
; X86-NEXT: vpsllq $63, %xmm0, %xmm0
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: vpmovsxdq (%eax), %xmm2
|
||||
; X86-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
|
||||
; X86-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
|
||||
; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; X86-NEXT: vpgatherqd %xmm0, (,%xmm2), %xmm1
|
||||
; X86-NEXT: vpmovsxdq %xmm1, %xmm0
|
||||
; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; X86-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
|
||||
; X86-NEXT: vpslld $31, %xmm0, %xmm0
|
||||
; X86-NEXT: vpgatherdd %xmm0, (,%xmm2), %xmm1
|
||||
; X86-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: masked_gather_v2i32_concat:
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: vpsllq $63, %xmm0, %xmm0
|
||||
; X64-NEXT: vmovdqa (%rdi), %xmm2
|
||||
; X64-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
|
||||
; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; X64-NEXT: vpslld $31, %xmm0, %xmm0
|
||||
; X64-NEXT: vpgatherqd %xmm0, (,%xmm2), %xmm1
|
||||
; X64-NEXT: vpmovsxdq %xmm1, %xmm0
|
||||
; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; X64-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; NOGATHER-LABEL: masked_gather_v2i32_concat:
|
||||
|
@ -1275,52 +1275,52 @@ declare <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*>, i32, <2 x i1>, <
|
||||
define <2 x i32> @test23(i32* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i32> %src0) {
|
||||
; KNL_64-LABEL: test23:
|
||||
; KNL_64: # BB#0:
|
||||
; KNL_64-NEXT: # kill: %xmm2<def> %xmm2<kill> %zmm2<def>
|
||||
; KNL_64-NEXT: vpsllq $32, %xmm0, %xmm0
|
||||
; KNL_64-NEXT: vpsraq $32, %zmm0, %zmm0
|
||||
; KNL_64-NEXT: vmovdqa %xmm1, %xmm1
|
||||
; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1
|
||||
; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1
|
||||
; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm2 {%k1}
|
||||
; KNL_64-NEXT: vmovdqa %xmm2, %xmm0
|
||||
; KNL_64-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
|
||||
; KNL_64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm0
|
||||
; KNL_64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero
|
||||
; KNL_64-NEXT: vmovaps %xmm1, %xmm1
|
||||
; KNL_64-NEXT: vpslld $31, %ymm1, %ymm1
|
||||
; KNL_64-NEXT: vptestmd %zmm1, %zmm1, %k1
|
||||
; KNL_64-NEXT: vpgatherqd (%rdi,%zmm0,4), %ymm2 {%k1}
|
||||
; KNL_64-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm2[0],zero,xmm2[1],zero
|
||||
; KNL_64-NEXT: vzeroupper
|
||||
; KNL_64-NEXT: retq
|
||||
;
|
||||
; KNL_32-LABEL: test23:
|
||||
; KNL_32: # BB#0:
|
||||
; KNL_32-NEXT: # kill: %xmm2<def> %xmm2<kill> %zmm2<def>
|
||||
; KNL_32-NEXT: vpsllq $32, %xmm0, %xmm0
|
||||
; KNL_32-NEXT: vpsraq $32, %zmm0, %zmm0
|
||||
; KNL_32-NEXT: vmovdqa %xmm1, %xmm1
|
||||
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1
|
||||
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
|
||||
; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm2 {%k1}
|
||||
; KNL_32-NEXT: vmovdqa %xmm2, %xmm0
|
||||
; KNL_32-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
|
||||
; KNL_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0
|
||||
; KNL_32-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero
|
||||
; KNL_32-NEXT: vmovaps %xmm1, %xmm1
|
||||
; KNL_32-NEXT: vpslld $31, %ymm1, %ymm1
|
||||
; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1
|
||||
; KNL_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm2 {%k1}
|
||||
; KNL_32-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm2[0],zero,xmm2[1],zero
|
||||
; KNL_32-NEXT: vzeroupper
|
||||
; KNL_32-NEXT: retl
|
||||
;
|
||||
; SKX-LABEL: test23:
|
||||
; SKX: # BB#0:
|
||||
; SKX-NEXT: vpsllq $32, %xmm0, %xmm0
|
||||
; SKX-NEXT: vpsraq $32, %xmm0, %xmm0
|
||||
; SKX-NEXT: vpsllq $63, %xmm1, %xmm1
|
||||
; SKX-NEXT: vptestmq %xmm1, %xmm1, %k1
|
||||
; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; SKX-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
|
||||
; SKX-NEXT: vpgatherqd (%rdi,%xmm0,4), %xmm1 {%k1}
|
||||
; SKX-NEXT: vpmovsxdq %xmm1, %xmm0
|
||||
; SKX-NEXT: vpgatherdd (%rdi,%xmm0,4), %xmm1 {%k1}
|
||||
; SKX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
|
||||
; SKX-NEXT: retq
|
||||
;
|
||||
; SKX_32-LABEL: test23:
|
||||
; SKX_32: # BB#0:
|
||||
; SKX_32-NEXT: vpsllq $32, %xmm0, %xmm0
|
||||
; SKX_32-NEXT: vpsraq $32, %xmm0, %xmm0
|
||||
; SKX_32-NEXT: vpsllq $63, %xmm1, %xmm1
|
||||
; SKX_32-NEXT: vptestmq %xmm1, %xmm1, %k1
|
||||
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; SKX_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; SKX_32-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
|
||||
; SKX_32-NEXT: vpgatherqd (%eax,%xmm0,4), %xmm1 {%k1}
|
||||
; SKX_32-NEXT: vpmovsxdq %xmm1, %xmm0
|
||||
; SKX_32-NEXT: vpgatherdd (%eax,%xmm0,4), %xmm1 {%k1}
|
||||
; SKX_32-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
|
||||
; SKX_32-NEXT: retl
|
||||
%sext_ind = sext <2 x i32> %ind to <2 x i64>
|
||||
%gep.random = getelementptr i32, i32* %base, <2 x i64> %sext_ind
|
||||
@ -1331,26 +1331,28 @@ define <2 x i32> @test23(i32* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i32> %
|
||||
define <2 x i32> @test23b(i32* %base, <2 x i64> %ind, <2 x i1> %mask, <2 x i32> %src0) {
|
||||
; KNL_64-LABEL: test23b:
|
||||
; KNL_64: # BB#0:
|
||||
; KNL_64-NEXT: # kill: %xmm2<def> %xmm2<kill> %zmm2<def>
|
||||
; KNL_64-NEXT: # kill: %xmm0<def> %xmm0<kill> %zmm0<def>
|
||||
; KNL_64-NEXT: vmovdqa %xmm1, %xmm1
|
||||
; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1
|
||||
; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1
|
||||
; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm2 {%k1}
|
||||
; KNL_64-NEXT: vmovdqa %xmm2, %xmm0
|
||||
; KNL_64-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
|
||||
; KNL_64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero
|
||||
; KNL_64-NEXT: vmovaps %xmm1, %xmm1
|
||||
; KNL_64-NEXT: vpslld $31, %ymm1, %ymm1
|
||||
; KNL_64-NEXT: vptestmd %zmm1, %zmm1, %k1
|
||||
; KNL_64-NEXT: vpgatherqd (%rdi,%zmm0,4), %ymm2 {%k1}
|
||||
; KNL_64-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm2[0],zero,xmm2[1],zero
|
||||
; KNL_64-NEXT: vzeroupper
|
||||
; KNL_64-NEXT: retq
|
||||
;
|
||||
; KNL_32-LABEL: test23b:
|
||||
; KNL_32: # BB#0:
|
||||
; KNL_32-NEXT: # kill: %xmm2<def> %xmm2<kill> %zmm2<def>
|
||||
; KNL_32-NEXT: # kill: %xmm0<def> %xmm0<kill> %zmm0<def>
|
||||
; KNL_32-NEXT: vmovdqa %xmm1, %xmm1
|
||||
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1
|
||||
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
|
||||
; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm2 {%k1}
|
||||
; KNL_32-NEXT: vmovdqa %xmm2, %xmm0
|
||||
; KNL_32-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
|
||||
; KNL_32-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero
|
||||
; KNL_32-NEXT: vmovaps %xmm1, %xmm1
|
||||
; KNL_32-NEXT: vpslld $31, %ymm1, %ymm1
|
||||
; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1
|
||||
; KNL_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm2 {%k1}
|
||||
; KNL_32-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm2[0],zero,xmm2[1],zero
|
||||
; KNL_32-NEXT: vzeroupper
|
||||
; KNL_32-NEXT: retl
|
||||
;
|
||||
@ -1360,7 +1362,7 @@ define <2 x i32> @test23b(i32* %base, <2 x i64> %ind, <2 x i1> %mask, <2 x i32>
|
||||
; SKX-NEXT: vptestmq %xmm1, %xmm1, %k1
|
||||
; SKX-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
|
||||
; SKX-NEXT: vpgatherqd (%rdi,%xmm0,4), %xmm1 {%k1}
|
||||
; SKX-NEXT: vpmovsxdq %xmm1, %xmm0
|
||||
; SKX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
|
||||
; SKX-NEXT: retq
|
||||
;
|
||||
; SKX_32-LABEL: test23b:
|
||||
@ -1370,7 +1372,7 @@ define <2 x i32> @test23b(i32* %base, <2 x i64> %ind, <2 x i1> %mask, <2 x i32>
|
||||
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; SKX_32-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
|
||||
; SKX_32-NEXT: vpgatherqd (%eax,%xmm0,4), %xmm1 {%k1}
|
||||
; SKX_32-NEXT: vpmovsxdq %xmm1, %xmm0
|
||||
; SKX_32-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
|
||||
; SKX_32-NEXT: retl
|
||||
%gep.random = getelementptr i32, i32* %base, <2 x i64> %ind
|
||||
%res = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %gep.random, i32 4, <2 x i1> %mask, <2 x i32> %src0)
|
||||
@ -1380,45 +1382,44 @@ define <2 x i32> @test23b(i32* %base, <2 x i64> %ind, <2 x i1> %mask, <2 x i32>
|
||||
define <2 x i32> @test24(i32* %base, <2 x i32> %ind) {
|
||||
; KNL_64-LABEL: test24:
|
||||
; KNL_64: # BB#0:
|
||||
; KNL_64-NEXT: vpsllq $32, %xmm0, %xmm0
|
||||
; KNL_64-NEXT: vpsraq $32, %zmm0, %zmm1
|
||||
; KNL_64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm0
|
||||
; KNL_64-NEXT: movb $3, %al
|
||||
; KNL_64-NEXT: kmovw %eax, %k1
|
||||
; KNL_64-NEXT: vpgatherqq (%rdi,%zmm1,8), %zmm0 {%k1}
|
||||
; KNL_64-NEXT: # kill: %xmm0<def> %xmm0<kill> %zmm0<kill>
|
||||
; KNL_64-NEXT: vpgatherqd (%rdi,%zmm0,4), %ymm1 {%k1}
|
||||
; KNL_64-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
|
||||
; KNL_64-NEXT: vzeroupper
|
||||
; KNL_64-NEXT: retq
|
||||
;
|
||||
; KNL_32-LABEL: test24:
|
||||
; KNL_32: # BB#0:
|
||||
; KNL_32-NEXT: vpsllq $32, %xmm0, %xmm0
|
||||
; KNL_32-NEXT: vpsraq $32, %zmm0, %zmm1
|
||||
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; KNL_32-NEXT: vmovdqa {{.*#+}} xmm0 = [1,0,1,0]
|
||||
; KNL_32-NEXT: vpsllq $63, %zmm0, %zmm0
|
||||
; KNL_32-NEXT: vptestmq %zmm0, %zmm0, %k1
|
||||
; KNL_32-NEXT: vpgatherqq (%eax,%zmm1,8), %zmm0 {%k1}
|
||||
; KNL_32-NEXT: # kill: %xmm0<def> %xmm0<kill> %zmm0<kill>
|
||||
; KNL_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0
|
||||
; KNL_32-NEXT: movb $3, %cl
|
||||
; KNL_32-NEXT: kmovw %ecx, %k1
|
||||
; KNL_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm1 {%k1}
|
||||
; KNL_32-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
|
||||
; KNL_32-NEXT: vzeroupper
|
||||
; KNL_32-NEXT: retl
|
||||
;
|
||||
; SKX-LABEL: test24:
|
||||
; SKX: # BB#0:
|
||||
; SKX-NEXT: vpsllq $32, %xmm0, %xmm0
|
||||
; SKX-NEXT: vpsraq $32, %xmm0, %xmm0
|
||||
; SKX-NEXT: kxnorw %k0, %k0, %k1
|
||||
; SKX-NEXT: vpgatherqd (%rdi,%xmm0,4), %xmm1 {%k1}
|
||||
; SKX-NEXT: vpmovsxdq %xmm1, %xmm0
|
||||
; SKX-NEXT: movb $3, %al
|
||||
; SKX-NEXT: kmovw %eax, %k1
|
||||
; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; SKX-NEXT: vpgatherdd (%rdi,%xmm0,4), %xmm1 {%k1}
|
||||
; SKX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
|
||||
; SKX-NEXT: retq
|
||||
;
|
||||
; SKX_32-LABEL: test24:
|
||||
; SKX_32: # BB#0:
|
||||
; SKX_32-NEXT: vpsllq $32, %xmm0, %xmm0
|
||||
; SKX_32-NEXT: vpsraq $32, %xmm0, %xmm0
|
||||
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; SKX_32-NEXT: kxnorw %k0, %k0, %k1
|
||||
; SKX_32-NEXT: vpgatherqd (%eax,%xmm0,4), %xmm1 {%k1}
|
||||
; SKX_32-NEXT: vpmovsxdq %xmm1, %xmm0
|
||||
; SKX_32-NEXT: movb $3, %cl
|
||||
; SKX_32-NEXT: kmovw %ecx, %k1
|
||||
; SKX_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; SKX_32-NEXT: vpgatherdd (%eax,%xmm0,4), %xmm1 {%k1}
|
||||
; SKX_32-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
|
||||
; SKX_32-NEXT: retl
|
||||
%sext_ind = sext <2 x i32> %ind to <2 x i64>
|
||||
%gep.random = getelementptr i32, i32* %base, <2 x i64> %sext_ind
|
||||
|
Loading…
x
Reference in New Issue
Block a user