[DAG] Don't permit EXTLOAD when combining FSHL/FSHR consecutive loads (PR45265)

Technically we can permit EXTLOAD of the LHS operand but only if all the extended bits are shifted out. Until we test coverage for that case, I'm just disabling this to fix PR45265.
This commit is contained in:
Simon Pilgrim 2020-03-21 10:33:53 +00:00
parent c6d799156a
commit c5fd9e3888
2 changed files with 67 additions and 1 deletions
llvm
lib/CodeGen/SelectionDAG
test/CodeGen/X86

@ -8325,13 +8325,15 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
// fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
// TODO - bigendian support once we have test coverage.
// TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
// TODO - permit LHS EXTLOAD if extensions are shifted out.
if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
!DAG.getDataLayout().isBigEndian()) {
auto *LHS = dyn_cast<LoadSDNode>(N0);
auto *RHS = dyn_cast<LoadSDNode>(N1);
if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
LHS->getAddressSpace() == RHS->getAddressSpace() &&
(LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS)) {
(LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
ISD::isNON_EXTLoad(LHS)) {
if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
SDLoc DL(RHS);
uint64_t PtrOff =

@ -918,3 +918,67 @@ define <4 x i32> @fshr_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) nounw
ret <4 x i32> %f
}
%struct.S = type { [11 x i8], i8 }
define void @PR45265(i32 %0, %struct.S* nocapture readonly %1) nounwind {
; X32-SSE2-LABEL: PR45265:
; X32-SSE2: # %bb.0:
; X32-SSE2-NEXT: pushl %edi
; X32-SSE2-NEXT: pushl %esi
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-SSE2-NEXT: leal (%eax,%eax,2), %edx
; X32-SSE2-NEXT: movzwl 8(%ecx,%edx,4), %esi
; X32-SSE2-NEXT: movsbl 10(%ecx,%edx,4), %edi
; X32-SSE2-NEXT: shll $16, %edi
; X32-SSE2-NEXT: orl %edi, %esi
; X32-SSE2-NEXT: movl 4(%ecx,%edx,4), %ecx
; X32-SSE2-NEXT: shrdl $8, %esi, %ecx
; X32-SSE2-NEXT: xorl %eax, %ecx
; X32-SSE2-NEXT: sarl $31, %eax
; X32-SSE2-NEXT: sarl $31, %edi
; X32-SSE2-NEXT: shldl $24, %esi, %edi
; X32-SSE2-NEXT: xorl %eax, %edi
; X32-SSE2-NEXT: orl %edi, %ecx
; X32-SSE2-NEXT: jne .LBB44_1
; X32-SSE2-NEXT: # %bb.2:
; X32-SSE2-NEXT: popl %esi
; X32-SSE2-NEXT: popl %edi
; X32-SSE2-NEXT: jmp _Z3foov # TAILCALL
; X32-SSE2-NEXT: .LBB44_1:
; X32-SSE2-NEXT: popl %esi
; X32-SSE2-NEXT: popl %edi
; X32-SSE2-NEXT: retl
;
; X64-AVX2-LABEL: PR45265:
; X64-AVX2: # %bb.0:
; X64-AVX2-NEXT: movslq %edi, %rax
; X64-AVX2-NEXT: leaq (%rax,%rax,2), %rcx
; X64-AVX2-NEXT: movsbq 10(%rsi,%rcx,4), %rdx
; X64-AVX2-NEXT: shlq $16, %rdx
; X64-AVX2-NEXT: movzwl 8(%rsi,%rcx,4), %edi
; X64-AVX2-NEXT: orq %rdx, %rdi
; X64-AVX2-NEXT: movq (%rsi,%rcx,4), %rcx
; X64-AVX2-NEXT: shrdq $40, %rdi, %rcx
; X64-AVX2-NEXT: cmpq %rax, %rcx
; X64-AVX2-NEXT: jne .LBB44_1
; X64-AVX2-NEXT: # %bb.2:
; X64-AVX2-NEXT: jmp _Z3foov # TAILCALL
; X64-AVX2-NEXT: .LBB44_1:
; X64-AVX2-NEXT: retq
%3 = sext i32 %0 to i64
%4 = getelementptr inbounds %struct.S, %struct.S* %1, i64 %3
%5 = bitcast %struct.S* %4 to i88*
%6 = load i88, i88* %5, align 1
%7 = ashr i88 %6, 40
%8 = trunc i88 %7 to i64
%9 = icmp eq i64 %8, %3
br i1 %9, label %10, label %11
10:
tail call void @_Z3foov()
br label %11
11:
ret void
}
declare dso_local void @_Z3foov()