mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-03-02 17:29:03 +00:00
[X86] Add an override of targetShrinkDemandedConstant to limit the damage that shrinkdemandedbits can do to zext_in_reg operations
Summary: This patch adds an implementation of targetShrinkDemandedConstant that tries to keep shrinkdemandedbits from removing bits that would otherwise have been recognized as a movzx. We still need a follow patch to stop moving ands across srl if the and could be represented as a movzx before the shift but not after. I think this should help with some of the cases that D42088 ended up removing during isel. Reviewers: spatel, RKSimon Reviewed By: spatel Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D42265 llvm-svn: 323048
This commit is contained in:
parent
6320b01ed3
commit
293879ad3d
@ -27884,6 +27884,65 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
|
||||
// X86 Optimization Hooks
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
bool
|
||||
X86TargetLowering::targetShrinkDemandedConstant(SDValue Op,
|
||||
const APInt &Demanded,
|
||||
TargetLoweringOpt &TLO) const {
|
||||
// Only optimize Ands to prevent shrinking a constant that could be
|
||||
// matched by movzx.
|
||||
if (Op.getOpcode() != ISD::AND)
|
||||
return false;
|
||||
|
||||
EVT VT = Op.getValueType();
|
||||
|
||||
// Ignore vectors.
|
||||
if (VT.isVector())
|
||||
return false;
|
||||
|
||||
unsigned Size = VT.getSizeInBits();
|
||||
|
||||
// Make sure the RHS really is a constant.
|
||||
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
|
||||
if (!C)
|
||||
return false;
|
||||
|
||||
const APInt &Mask = C->getAPIntValue();
|
||||
|
||||
// Clear all non-demanded bits initially.
|
||||
APInt ShrunkMask = Mask & Demanded;
|
||||
|
||||
// Find the width of the shrunk mask.
|
||||
unsigned Width = ShrunkMask.getActiveBits();
|
||||
|
||||
// If the mask is all 0s there's nothing to do here.
|
||||
if (Width == 0)
|
||||
return false;
|
||||
|
||||
// Find the next power of 2 width, rounding up to a byte.
|
||||
Width = PowerOf2Ceil(std::max(Width, 8U));
|
||||
// Truncate the width to size to handle illegal types.
|
||||
Width = std::min(Width, Size);
|
||||
|
||||
// Calculate a possible zero extend mask for this constant.
|
||||
APInt ZeroExtendMask = APInt::getLowBitsSet(Size, Width);
|
||||
|
||||
// If we aren't changing the mask, just return true to keep it and prevent
|
||||
// the caller from optimizing.
|
||||
if (ZeroExtendMask == Mask)
|
||||
return true;
|
||||
|
||||
// Make sure the bits in the ZeroExtendMask are also set in the original mask.
|
||||
// TODO: We should be able to set bits that aren't demanded too.
|
||||
if (!ZeroExtendMask.isSubsetOf(Mask))
|
||||
return false;
|
||||
|
||||
// Replace the constant with the zero extend mask.
|
||||
SDLoc DL(Op);
|
||||
SDValue NewC = TLO.DAG.getConstant(ZeroExtendMask, DL, VT);
|
||||
SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
|
||||
return TLO.CombineTo(Op, NewOp);
|
||||
}
|
||||
|
||||
void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
|
||||
KnownBits &Known,
|
||||
const APInt &DemandedElts,
|
||||
|
@ -835,6 +835,9 @@ namespace llvm {
|
||||
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
|
||||
EVT VT) const override;
|
||||
|
||||
bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
|
||||
TargetLoweringOpt &TLO) const override;
|
||||
|
||||
/// Determine which of the bits specified in Mask are known to be either
|
||||
/// zero or one and return them in the KnownZero/KnownOne bitsets.
|
||||
void computeKnownBitsForTargetNode(const SDValue Op,
|
||||
|
@ -1514,6 +1514,10 @@ def : Pat<(i8 (trunc GR16:$src)),
|
||||
(EXTRACT_SUBREG GR16:$src, sub_8bit)>,
|
||||
Requires<[In64BitMode]>;
|
||||
|
||||
def immff00_ffff : ImmLeaf<i32, [{
|
||||
return Imm >= 0xff00 && Imm <= 0xffff;
|
||||
}]>;
|
||||
|
||||
// h-register tricks
|
||||
def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))),
|
||||
(EXTRACT_SUBREG GR16:$src, sub_8bit_hi)>,
|
||||
@ -1534,7 +1538,7 @@ def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
|
||||
(MOVZX32_NOREXrr8 (EXTRACT_SUBREG GR16:$src, sub_8bit_hi))>;
|
||||
def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
|
||||
(MOVZX32_NOREXrr8 (EXTRACT_SUBREG GR32:$src, sub_8bit_hi))>;
|
||||
def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
|
||||
def : Pat<(srl (and_su GR32:$src, immff00_ffff), (i8 8)),
|
||||
(MOVZX32_NOREXrr8 (EXTRACT_SUBREG GR32:$src, sub_8bit_hi))>;
|
||||
|
||||
// h-register tricks.
|
||||
|
@ -14,16 +14,18 @@ define i32 @test1(i32 %x) nounwind ssp {
|
||||
ret i32 %t1
|
||||
}
|
||||
|
||||
; This test no longer requires or to be converted to 3 addr form because we are
|
||||
; are able to use a zero extend instead of an 'and' which gives the register
|
||||
; allocator freedom.
|
||||
define i64 @test2(i8 %A, i8 %B) nounwind {
|
||||
; CHECK-LABEL: test2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: # kill: def %esi killed %esi def %rsi
|
||||
; CHECK-NEXT: # kill: def %edi killed %edi def %rdi
|
||||
; CHECK-NEXT: shll $4, %edi
|
||||
; CHECK-NEXT: andl $48, %edi
|
||||
; CHECK-NEXT: andl $240, %esi
|
||||
; CHECK-NEXT: shrq $4, %rsi
|
||||
; CHECK-NEXT: leaq (%rsi,%rdi), %rax
|
||||
; CHECK-NEXT: movzbl %sil, %eax
|
||||
; CHECK-NEXT: shrq $4, %rax
|
||||
; CHECK-NEXT: orq %rdi, %rax
|
||||
; CHECK-NEXT: retq
|
||||
%C = zext i8 %A to i64
|
||||
%D = shl i64 %C, 4
|
||||
|
@ -71,7 +71,6 @@ define i16 @cnt16(i16 %x) nounwind readnone {
|
||||
; X32-NEXT: andl $13107, %eax # imm = 0x3333
|
||||
; X32-NEXT: addl %ecx, %eax
|
||||
; X32-NEXT: movl %eax, %ecx
|
||||
; X32-NEXT: andl $-16, %ecx
|
||||
; X32-NEXT: shrl $4, %ecx
|
||||
; X32-NEXT: addl %eax, %ecx
|
||||
; X32-NEXT: andl $3855, %ecx # imm = 0xF0F
|
||||
@ -94,7 +93,6 @@ define i16 @cnt16(i16 %x) nounwind readnone {
|
||||
; X64-NEXT: andl $13107, %edi # imm = 0x3333
|
||||
; X64-NEXT: addl %eax, %edi
|
||||
; X64-NEXT: movl %edi, %eax
|
||||
; X64-NEXT: andl $-16, %eax
|
||||
; X64-NEXT: shrl $4, %eax
|
||||
; X64-NEXT: addl %edi, %eax
|
||||
; X64-NEXT: andl $3855, %eax # imm = 0xF0F
|
||||
|
@ -12,19 +12,18 @@ define void @func(<4 x float> %vx) {
|
||||
; CHECK-NEXT: pushq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: pextrq $1, %xmm0, %rdx
|
||||
; CHECK-NEXT: movq %rdx, %rcx
|
||||
; CHECK-NEXT: shrq $32, %rcx
|
||||
; CHECK-NEXT: movq %xmm0, %rax
|
||||
; CHECK-NEXT: movq %rax, %r9
|
||||
; CHECK-NEXT: pextrq $1, %xmm0, %rax
|
||||
; CHECK-NEXT: movzwl %ax, %ecx
|
||||
; CHECK-NEXT: shrq $32, %rax
|
||||
; CHECK-NEXT: movq %xmm0, %rdx
|
||||
; CHECK-NEXT: movzwl %dx, %r8d
|
||||
; CHECK-NEXT: movq %rdx, %r9
|
||||
; CHECK-NEXT: shrq $32, %r9
|
||||
; CHECK-NEXT: andl $2032, %eax # imm = 0x7F0
|
||||
; CHECK-NEXT: leaq stuff(%rax), %rdi
|
||||
; CHECK-NEXT: leaq stuff(%r8), %rdi
|
||||
; CHECK-NEXT: leaq stuff(%r9), %rsi
|
||||
; CHECK-NEXT: andl $2032, %edx # imm = 0x7F0
|
||||
; CHECK-NEXT: leaq stuff(%rdx), %rdx
|
||||
; CHECK-NEXT: leaq stuff(%rcx), %rcx
|
||||
; CHECK-NEXT: leaq stuff+8(%rax), %r8
|
||||
; CHECK-NEXT: leaq stuff(%rcx), %rdx
|
||||
; CHECK-NEXT: leaq stuff(%rax), %rcx
|
||||
; CHECK-NEXT: leaq stuff+8(%r8), %r8
|
||||
; CHECK-NEXT: leaq stuff+8(%r9), %r9
|
||||
; CHECK-NEXT: callq toto
|
||||
; CHECK-NEXT: popq %rax
|
||||
|
@ -5,25 +5,22 @@
|
||||
; demanded bits shortcomings.
|
||||
|
||||
; The backend will insert a zext to promote the shift to i32.
|
||||
; TODO: we should be able to use movzx here.
|
||||
define i16 @test1(i16 %x) {
|
||||
; CHECK-LABEL: test1:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: andl $65534, %edi # imm = 0xFFFE
|
||||
; CHECK-NEXT: shrl %edi
|
||||
; CHECK-NEXT: movl %edi, %eax
|
||||
; CHECK-NEXT: movzwl %di, %eax
|
||||
; CHECK-NEXT: shrl %eax
|
||||
; CHECK-NEXT: # kill: def %ax killed %ax killed %eax
|
||||
; CHECK-NEXT: retq
|
||||
%y = lshr i16 %x, 1
|
||||
ret i16 %y
|
||||
}
|
||||
|
||||
; TODO: we should be able to use movzx here.
|
||||
define i32 @test2(i32 %x) {
|
||||
; CHECK-LABEL: test2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: andl $65534, %edi # imm = 0xFFFE
|
||||
; CHECK-NEXT: shrl %edi
|
||||
; CHECK-NEXT: movl %edi, %eax
|
||||
; CHECK-NEXT: movzwl %di, %eax
|
||||
; CHECK-NEXT: shrl %eax
|
||||
; CHECK-NEXT: retq
|
||||
%y = and i32 %x, 65535
|
||||
%z = lshr i32 %y, 1
|
||||
|
Loading…
x
Reference in New Issue
Block a user