mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-11-26 23:21:11 +00:00
[X86] combineBitcastvxi1 - don't prematurely create PACKSS nodes.
Similar to Issue #63710 - by truncating the v8i16 result with a PACKSS node before type legalization, we fail to make use of various folds that rely on TRUNCATE nodes. This required tweaks to LowerTruncateVecPackWithSignBits to recognise when the truncation source has been widened and to more closely match combineVectorSignBitsTruncation wrt truncating with PACKSS/PACKUS on AVX512 targets. One of the last stages before we can finally get rid of combineVectorSignBitsTruncation.
This commit is contained in:
parent
c6c5aad6a2
commit
65c9153cf0
@ -22945,6 +22945,26 @@ static SDValue LowerTruncateVecPackWithSignBits(MVT DstVT, SDValue In,
|
||||
(DstSVT == MVT::i8 || DstSVT == MVT::i16 || DstSVT == MVT::i32)))
|
||||
return SDValue();
|
||||
|
||||
// Don't lower with PACK nodes on AVX512 targets if we'd need more than one.
|
||||
if (Subtarget.hasAVX512() &&
|
||||
SrcSVT.getSizeInBits() > (DstSVT.getSizeInBits() * 2))
|
||||
return SDValue();
|
||||
|
||||
// If the upper half of the source is undef, then attempt to split and
|
||||
// only truncate the lower half.
|
||||
if (DstVT.getSizeInBits() >= 128) {
|
||||
SmallVector<SDValue> LowerOps;
|
||||
if (isUpperSubvectorUndef(In, LowerOps, DAG)) {
|
||||
MVT DstHalfVT = DstVT.getHalfNumVectorElementsVT();
|
||||
MVT SrcHalfVT = SrcVT.getHalfNumVectorElementsVT();
|
||||
SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, SrcHalfVT, LowerOps);
|
||||
if (SDValue Res = LowerTruncateVecPackWithSignBits(DstHalfVT, Lo, DL,
|
||||
Subtarget, DAG))
|
||||
return widenSubVector(Res, false, Subtarget, DAG, DL,
|
||||
DstVT.getSizeInBits());
|
||||
}
|
||||
}
|
||||
|
||||
unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
|
||||
unsigned NumPackedSignBits = std::min<unsigned>(DstSVT.getSizeInBits(), 16);
|
||||
unsigned NumPackedZeroBits = Subtarget.hasSSE41() ? NumPackedSignBits : 8;
|
||||
@ -45059,9 +45079,10 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src,
|
||||
if (SExtVT == MVT::v16i8 || SExtVT == MVT::v32i8 || SExtVT == MVT::v64i8) {
|
||||
V = getPMOVMSKB(DL, V, DAG, Subtarget);
|
||||
} else {
|
||||
if (SExtVT == MVT::v8i16)
|
||||
V = DAG.getNode(X86ISD::PACKSS, DL, MVT::v16i8, V,
|
||||
DAG.getUNDEF(MVT::v8i16));
|
||||
if (SExtVT == MVT::v8i16) {
|
||||
V = widenSubVector(V, false, Subtarget, DAG, DL, 256);
|
||||
V = DAG.getNode(ISD::TRUNCATE, DL, MVT::v16i8, V);
|
||||
}
|
||||
V = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, V);
|
||||
}
|
||||
|
||||
|
@ -1193,9 +1193,8 @@ define i8 @icmp0_v8i1(<8 x i8>) nounwind {
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSE2-NEXT: psllw $15, %xmm0
|
||||
; SSE2-NEXT: psraw $15, %xmm0
|
||||
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
||||
; SSE2-NEXT: testl %eax, %eax
|
||||
; SSE2-NEXT: testl $43690, %eax # imm = 0xAAAA
|
||||
; SSE2-NEXT: sete %al
|
||||
; SSE2-NEXT: ret{{[l|q]}}
|
||||
;
|
||||
@ -1203,9 +1202,8 @@ define i8 @icmp0_v8i1(<8 x i8>) nounwind {
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; SSE41-NEXT: psllw $15, %xmm0
|
||||
; SSE41-NEXT: psraw $15, %xmm0
|
||||
; SSE41-NEXT: pmovmskb %xmm0, %eax
|
||||
; SSE41-NEXT: testl %eax, %eax
|
||||
; SSE41-NEXT: testl $43690, %eax # imm = 0xAAAA
|
||||
; SSE41-NEXT: sete %al
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
|
Loading…
Reference in New Issue
Block a user