mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-04-07 07:42:11 +00:00
[SDAG] allow more cast folding for vector sext-of-setcc
This is a follow-up to D103280 that eases the use restrictions, so we can handle the motivating case from: https://llvm.org/PR50055 The loop code is adapted from similar use checks in ExtendUsesToFormExtLoad() and SliceUpLoad(). I did not see an easier way to filter out non-chain uses of load values. Differential Revision: https://reviews.llvm.org/D103462
This commit is contained in:
parent
8349ac9aa9
commit
ede3982792
@ -10826,13 +10826,29 @@ SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
|
|||||||
|
|
||||||
// Match a simple, non-extended load that can be converted to a
|
// Match a simple, non-extended load that can be converted to a
|
||||||
// legal zext-load.
|
// legal zext-load.
|
||||||
// TODO: Handle more than one use if the other uses are free to zext.
|
|
||||||
// TODO: Allow widening of an existing zext-load?
|
// TODO: Allow widening of an existing zext-load?
|
||||||
return ISD::isNON_EXTLoad(V.getNode()) &&
|
if (!(ISD::isNON_EXTLoad(V.getNode()) &&
|
||||||
ISD::isUNINDEXEDLoad(V.getNode()) &&
|
ISD::isUNINDEXEDLoad(V.getNode()) &&
|
||||||
cast<LoadSDNode>(V)->isSimple() &&
|
cast<LoadSDNode>(V)->isSimple() &&
|
||||||
TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, V.getValueType()) &&
|
TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, V.getValueType())))
|
||||||
V.hasOneUse();
|
return false;
|
||||||
|
|
||||||
|
// Non-chain users of this value must either be the setcc in this
|
||||||
|
// sequence or zexts that can be folded into the new zext-load.
|
||||||
|
for (SDNode::use_iterator UI = V->use_begin(), UE = V->use_end();
|
||||||
|
UI != UE; ++UI) {
|
||||||
|
// Skip uses of the chain and the setcc.
|
||||||
|
SDNode *User = *UI;
|
||||||
|
if (UI.getUse().getResNo() != 0 || User == N0.getNode())
|
||||||
|
continue;
|
||||||
|
// Extra users must have exactly the same cast we are about to create.
|
||||||
|
// TODO: This restriction could be eased if ExtendUsesToFormExtLoad()
|
||||||
|
// is enhanced similarly.
|
||||||
|
if (User->getOpcode() != ISD::ZERO_EXTEND ||
|
||||||
|
User->getValueType(0) != VT)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
};
|
};
|
||||||
|
|
||||||
if (IsFreeToZext(N00) && IsFreeToZext(N01)) {
|
if (IsFreeToZext(N00) && IsFreeToZext(N01)) {
|
||||||
|
@ -438,7 +438,7 @@ define <8 x i32> @cmp_ne_zextload_from_legal_op(<8 x i16>* %x, <8 x i16>* %y) {
|
|||||||
ret <8 x i32> %sext
|
ret <8 x i32> %sext
|
||||||
}
|
}
|
||||||
|
|
||||||
; negative test - extra use (TODO)
|
; Both uses of the load can be absorbed by the zext-load, so we eliminate the explicit casts.
|
||||||
|
|
||||||
define <8 x i32> @PR50055(<8 x i8>* %src, <8 x i32>* %dst) nounwind {
|
define <8 x i32> @PR50055(<8 x i8>* %src, <8 x i32>* %dst) nounwind {
|
||||||
; SSE-LABEL: PR50055:
|
; SSE-LABEL: PR50055:
|
||||||
@ -462,28 +462,13 @@ define <8 x i32> @PR50055(<8 x i8>* %src, <8 x i32>* %dst) nounwind {
|
|||||||
; SSE-NEXT: movdqa %xmm3, (%rsi)
|
; SSE-NEXT: movdqa %xmm3, (%rsi)
|
||||||
; SSE-NEXT: retq
|
; SSE-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX2-LABEL: PR50055:
|
; AVX-LABEL: PR50055:
|
||||||
; AVX2: # %bb.0:
|
; AVX: # %bb.0:
|
||||||
; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
; AVX-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
|
||||||
; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
|
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||||
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
; AVX-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm1
|
||||||
; AVX2-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
|
; AVX-NEXT: vmovdqa %ymm1, (%rsi)
|
||||||
; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
; AVX-NEXT: retq
|
||||||
; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
||||||
; AVX2-NEXT: vpmovsxbd %xmm1, %ymm1
|
|
||||||
; AVX2-NEXT: vmovdqa %ymm1, (%rsi)
|
|
||||||
; AVX2-NEXT: retq
|
|
||||||
;
|
|
||||||
; AVX512-LABEL: PR50055:
|
|
||||||
; AVX512: # %bb.0:
|
|
||||||
; AVX512-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
|
||||||
; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
|
|
||||||
; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
|
||||||
; AVX512-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
|
|
||||||
; AVX512-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
|
|
||||||
; AVX512-NEXT: vpmovsxbd %xmm1, %ymm1
|
|
||||||
; AVX512-NEXT: vmovdqa %ymm1, (%rsi)
|
|
||||||
; AVX512-NEXT: retq
|
|
||||||
%load = load <8 x i8>, <8 x i8>* %src
|
%load = load <8 x i8>, <8 x i8>* %src
|
||||||
%zext = zext <8 x i8> %load to <8 x i32>
|
%zext = zext <8 x i8> %load to <8 x i32>
|
||||||
%icmp = icmp ne <8 x i8> %load, zeroinitializer
|
%icmp = icmp ne <8 x i8> %load, zeroinitializer
|
||||||
@ -492,8 +477,10 @@ define <8 x i32> @PR50055(<8 x i8>* %src, <8 x i32>* %dst) nounwind {
|
|||||||
ret <8 x i32> %zext
|
ret <8 x i32> %zext
|
||||||
}
|
}
|
||||||
|
|
||||||
define <8 x i16> @multi_use_narrower_sizes(<8 x i8>* %src, <8 x i32>* %dst) nounwind {
|
; negative test - extra uses must be absorbable by a zext-load.
|
||||||
; SSE-LABEL: multi_use_narrower_sizes:
|
|
||||||
|
define <8 x i16> @multi_use_narrower_size(<8 x i8>* %src, <8 x i32>* %dst) nounwind {
|
||||||
|
; SSE-LABEL: multi_use_narrower_size:
|
||||||
; SSE: # %bb.0:
|
; SSE: # %bb.0:
|
||||||
; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
|
; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
|
||||||
; SSE-NEXT: pxor %xmm2, %xmm2
|
; SSE-NEXT: pxor %xmm2, %xmm2
|
||||||
@ -509,7 +496,7 @@ define <8 x i16> @multi_use_narrower_sizes(<8 x i8>* %src, <8 x i32>* %dst) noun
|
|||||||
; SSE-NEXT: movdqa %xmm2, (%rsi)
|
; SSE-NEXT: movdqa %xmm2, (%rsi)
|
||||||
; SSE-NEXT: retq
|
; SSE-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX-LABEL: multi_use_narrower_sizes:
|
; AVX-LABEL: multi_use_narrower_size:
|
||||||
; AVX: # %bb.0:
|
; AVX: # %bb.0:
|
||||||
; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||||
; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||||
@ -527,6 +514,8 @@ define <8 x i16> @multi_use_narrower_sizes(<8 x i8>* %src, <8 x i32>* %dst) noun
|
|||||||
ret <8 x i16> %zext
|
ret <8 x i16> %zext
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; negative test - extra uses must be absorbable by a zext-load.
|
||||||
|
|
||||||
define <8 x i32> @multi_use_wider_size(<8 x i8>* %src, <8 x i16>* %dst) nounwind {
|
define <8 x i32> @multi_use_wider_size(<8 x i8>* %src, <8 x i16>* %dst) nounwind {
|
||||||
; SSE-LABEL: multi_use_wider_size:
|
; SSE-LABEL: multi_use_wider_size:
|
||||||
; SSE: # %bb.0:
|
; SSE: # %bb.0:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user