mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-12-22 23:39:24 +00:00
[X86][AVX] combineHorizontalPredicateResult - split any/allof v16i16/v32i8 reduction on AVX1
Perform the 2 x 128-bit lo/hi OR/AND on the vectors before calling PMOVMSKB on the 128-bit result. llvm-svn: 357611
This commit is contained in:
parent
0b28b8b09b
commit
15919ad306
@ -34316,6 +34316,14 @@ static SDValue combineHorizontalPredicateResult(SDNode *Extract,
|
||||
if (DAG.ComputeNumSignBits(Match) != BitWidth)
|
||||
return SDValue();
|
||||
|
||||
SDLoc DL(Extract);
|
||||
if (MatchSizeInBits == 256 && BitWidth < 32 && !Subtarget.hasInt256()) {
|
||||
SDValue Lo, Hi;
|
||||
std::tie(Lo, Hi) = DAG.SplitVector(Match, DL);
|
||||
Match = DAG.getNode(BinOp, DL, Lo.getValueType(), Lo, Hi);
|
||||
MatchSizeInBits = Match.getValueSizeInBits();
|
||||
}
|
||||
|
||||
// For 32/64 bit comparisons use MOVMSKPS/MOVMSKPD, else PMOVMSKB.
|
||||
MVT MaskSrcVT;
|
||||
if (64 == BitWidth || 32 == BitWidth)
|
||||
@ -34324,7 +34332,6 @@ static SDValue combineHorizontalPredicateResult(SDNode *Extract,
|
||||
else
|
||||
MaskSrcVT = MVT::getVectorVT(MVT::i8, MatchSizeInBits / 8);
|
||||
|
||||
SDLoc DL(Extract);
|
||||
SDValue CmpC;
|
||||
ISD::CondCode CondCode;
|
||||
if (BinOp == ISD::OR) {
|
||||
|
@ -673,12 +673,10 @@ define i16 @test_v16i16_sext(<16 x i16> %a0, <16 x i16> %a1) {
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||
; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
|
||||
; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
||||
; AVX1-NEXT: vpmovmskb %xmm2, %ecx
|
||||
; AVX1-NEXT: shll $16, %ecx
|
||||
; AVX1-NEXT: orl %eax, %ecx
|
||||
; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
|
||||
; AVX1-NEXT: xorl %eax, %eax
|
||||
; AVX1-NEXT: cmpl $-1, %ecx
|
||||
; AVX1-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
|
||||
; AVX1-NEXT: sete %al
|
||||
; AVX1-NEXT: negl %eax
|
||||
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
@ -867,11 +865,9 @@ define i8 @test_v32i8_sext(<32 x i8> %a0, <32 x i8> %a1) {
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||
; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2
|
||||
; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
||||
; AVX1-NEXT: vpmovmskb %xmm2, %ecx
|
||||
; AVX1-NEXT: shll $16, %ecx
|
||||
; AVX1-NEXT: orl %eax, %ecx
|
||||
; AVX1-NEXT: cmpl $-1, %ecx
|
||||
; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
|
||||
; AVX1-NEXT: sete %al
|
||||
; AVX1-NEXT: negb %al
|
||||
; AVX1-NEXT: vzeroupper
|
||||
@ -1555,11 +1551,9 @@ define i1 @bool_reduction_v32i8(<32 x i8> %x, <32 x i8> %y) {
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm2
|
||||
; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
||||
; AVX1-NEXT: vpmovmskb %xmm2, %ecx
|
||||
; AVX1-NEXT: shll $16, %ecx
|
||||
; AVX1-NEXT: orl %eax, %ecx
|
||||
; AVX1-NEXT: cmpl $-1, %ecx
|
||||
; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
|
||||
; AVX1-NEXT: sete %al
|
||||
; AVX1-NEXT: negb %al
|
||||
; AVX1-NEXT: vzeroupper
|
||||
|
@ -611,13 +611,10 @@ define i16 @test_v16i16_sext(<16 x i16> %a0, <16 x i16> %a1) {
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||
; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
|
||||
; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
|
||||
; AVX1-NEXT: vpmovmskb %xmm2, %edx
|
||||
; AVX1-NEXT: shll $16, %edx
|
||||
; AVX1-NEXT: xorl %eax, %eax
|
||||
; AVX1-NEXT: orl %ecx, %edx
|
||||
; AVX1-NEXT: setne %al
|
||||
; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
||||
; AVX1-NEXT: negl %eax
|
||||
; AVX1-NEXT: sbbl %eax, %eax
|
||||
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -790,12 +787,10 @@ define i8 @test_v32i8_sext(<32 x i8> %a0, <32 x i8> %a1) {
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||
; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2
|
||||
; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
||||
; AVX1-NEXT: vpmovmskb %xmm2, %ecx
|
||||
; AVX1-NEXT: shll $16, %ecx
|
||||
; AVX1-NEXT: orl %eax, %ecx
|
||||
; AVX1-NEXT: setne %al
|
||||
; AVX1-NEXT: negb %al
|
||||
; AVX1-NEXT: negl %eax
|
||||
; AVX1-NEXT: sbbb %al, %al
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
@ -1488,12 +1483,10 @@ define i1 @bool_reduction_v32i8(<32 x i8> %x, <32 x i8> %y) {
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm2
|
||||
; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
||||
; AVX1-NEXT: vpmovmskb %xmm2, %ecx
|
||||
; AVX1-NEXT: shll $16, %ecx
|
||||
; AVX1-NEXT: orl %eax, %ecx
|
||||
; AVX1-NEXT: setne %al
|
||||
; AVX1-NEXT: negb %al
|
||||
; AVX1-NEXT: negl %eax
|
||||
; AVX1-NEXT: sbbb %al, %al
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
|
Loading…
Reference in New Issue
Block a user