[SelectionDAG] Use known ones to provide a better bound for the known zeros for CTTZ/CTLZ operations.

This is the SelectionDAG version of D32521. If know where at least one 1 is located in the input to these intrinsics we can place an upper bound on the number of bits needed to represent the count and thus increase the number of known zeros in the output.

I think we can also refine this further for CTTZ_UNDEF/CTLZ_UNDEF by assuming that the answer will never be BitWidth. I've left this out for now because it caused other test failures across multiple targets. Usually because of turning ADD into OR based on this new information.

I'll fix CTPOP in a future patch.

Differential Revision: https://reviews.llvm.org/D32692

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@301806 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Craig Topper 2017-05-01 16:08:06 +00:00
parent fc533c3fb3
commit 4e5c5db786
2 changed files with 18 additions and 12 deletions

View File

@ -2353,9 +2353,23 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
break;
}
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTTZ_ZERO_UNDEF: {
computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
// If we have a known 1, its position is our upper bound.
unsigned PossibleTZ = Known2.One.countTrailingZeros();
unsigned LowBits = Log2_32(PossibleTZ) + 1;
Known.Zero.setBitsFrom(LowBits);
break;
}
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTLZ_ZERO_UNDEF: {
computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
// If we have a known 1, its position is our upper bound.
unsigned PossibleLZ = Known2.One.countLeadingZeros();
unsigned LowBits = Log2_32(PossibleLZ) + 1;
Known.Zero.setBitsFrom(LowBits);
break;
}
case ISD::CTPOP: {
Known.Zero.setBitsFrom(Log2_32(BitWidth)+1);
break;

View File

@ -786,7 +786,6 @@ define i8 @cttz_i8_knownbits(i8 %x) {
; X32-NEXT: orb $2, %al
; X32-NEXT: movzbl %al, %eax
; X32-NEXT: bsfl %eax, %eax
; X32-NEXT: andb $1, %al
; X32-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X32-NEXT: retl
;
@ -795,7 +794,6 @@ define i8 @cttz_i8_knownbits(i8 %x) {
; X64-NEXT: orb $2, %dil
; X64-NEXT: movzbl %dil, %eax
; X64-NEXT: bsfl %eax, %eax
; X64-NEXT: andb $1, %al
; X64-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X64-NEXT: retq
;
@ -805,7 +803,6 @@ define i8 @cttz_i8_knownbits(i8 %x) {
; X32-CLZ-NEXT: orb $2, %al
; X32-CLZ-NEXT: movzbl %al, %eax
; X32-CLZ-NEXT: tzcntl %eax, %eax
; X32-CLZ-NEXT: andb $1, %al
; X32-CLZ-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X32-CLZ-NEXT: retl
;
@ -814,7 +811,6 @@ define i8 @cttz_i8_knownbits(i8 %x) {
; X64-CLZ-NEXT: orb $2, %dil
; X64-CLZ-NEXT: movzbl %dil, %eax
; X64-CLZ-NEXT: tzcntl %eax, %eax
; X64-CLZ-NEXT: andb $1, %al
; X64-CLZ-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X64-CLZ-NEXT: retq
%x2 = or i8 %x, 2
@ -830,8 +826,7 @@ define i8 @ctlz_i8_knownbits(i8 %x) {
; X32-NEXT: orb $64, %al
; X32-NEXT: movzbl %al, %eax
; X32-NEXT: bsrl %eax, %eax
; X32-NEXT: notl %eax
; X32-NEXT: andb $1, %al
; X32-NEXT: xorl $7, %eax
; X32-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X32-NEXT: retl
;
@ -840,8 +835,7 @@ define i8 @ctlz_i8_knownbits(i8 %x) {
; X64-NEXT: orb $64, %dil
; X64-NEXT: movzbl %dil, %eax
; X64-NEXT: bsrl %eax, %eax
; X64-NEXT: notl %eax
; X64-NEXT: andb $1, %al
; X64-NEXT: xorl $7, %eax
; X64-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X64-NEXT: retq
;
@ -852,7 +846,6 @@ define i8 @ctlz_i8_knownbits(i8 %x) {
; X32-CLZ-NEXT: movzbl %al, %eax
; X32-CLZ-NEXT: lzcntl %eax, %eax
; X32-CLZ-NEXT: addl $-24, %eax
; X32-CLZ-NEXT: andb $1, %al
; X32-CLZ-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X32-CLZ-NEXT: retl
;
@ -862,7 +855,6 @@ define i8 @ctlz_i8_knownbits(i8 %x) {
; X64-CLZ-NEXT: movzbl %dil, %eax
; X64-CLZ-NEXT: lzcntl %eax, %eax
; X64-CLZ-NEXT: addl $-24, %eax
; X64-CLZ-NEXT: andb $1, %al
; X64-CLZ-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X64-CLZ-NEXT: retq