diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 7a68d18ef9b5..a6537a0bf97e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -8325,13 +8325,15 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
     // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
     // TODO - bigendian support once we have test coverage.
     // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
+    // TODO - permit LHS EXTLOAD if extensions are shifted out.
     if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
         !DAG.getDataLayout().isBigEndian()) {
       auto *LHS = dyn_cast<LoadSDNode>(N0);
       auto *RHS = dyn_cast<LoadSDNode>(N1);
       if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
           LHS->getAddressSpace() == RHS->getAddressSpace() &&
-          (LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS)) {
+          (LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
+          ISD::isNON_EXTLoad(LHS)) {
         if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
           SDLoc DL(RHS);
           uint64_t PtrOff =
diff --git a/llvm/test/CodeGen/X86/funnel-shift.ll b/llvm/test/CodeGen/X86/funnel-shift.ll
index 517880fb88e5..f78fe2c00eb3 100644
--- a/llvm/test/CodeGen/X86/funnel-shift.ll
+++ b/llvm/test/CodeGen/X86/funnel-shift.ll
@@ -918,3 +918,67 @@ define <4 x i32> @fshr_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) nounw
   ret <4 x i32> %f
 }
 
+%struct.S = type { [11 x i8], i8 }
+define void @PR45265(i32 %0, %struct.S* nocapture readonly %1) nounwind {
+; X32-SSE2-LABEL: PR45265:
+; X32-SSE2:       # %bb.0:
+; X32-SSE2-NEXT:    pushl %edi
+; X32-SSE2-NEXT:    pushl %esi
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X32-SSE2-NEXT:    leal (%eax,%eax,2), %edx
+; X32-SSE2-NEXT:    movzwl 8(%ecx,%edx,4), %esi
+; X32-SSE2-NEXT:    movsbl 10(%ecx,%edx,4), %edi
+; X32-SSE2-NEXT:    shll $16, %edi
+; X32-SSE2-NEXT:    orl %edi, %esi
+; X32-SSE2-NEXT:    movl 4(%ecx,%edx,4), %ecx
+; X32-SSE2-NEXT:    shrdl $8, %esi, %ecx
+; X32-SSE2-NEXT:    xorl %eax, %ecx
+; X32-SSE2-NEXT:    sarl $31, %eax
+; X32-SSE2-NEXT:    sarl $31, %edi
+; X32-SSE2-NEXT:    shldl $24, %esi, %edi
+; X32-SSE2-NEXT:    xorl %eax, %edi
+; X32-SSE2-NEXT:    orl %edi, %ecx
+; X32-SSE2-NEXT:    jne .LBB44_1
+; X32-SSE2-NEXT:  # %bb.2:
+; X32-SSE2-NEXT:    popl %esi
+; X32-SSE2-NEXT:    popl %edi
+; X32-SSE2-NEXT:    jmp _Z3foov # TAILCALL
+; X32-SSE2-NEXT:  .LBB44_1:
+; X32-SSE2-NEXT:    popl %esi
+; X32-SSE2-NEXT:    popl %edi
+; X32-SSE2-NEXT:    retl
+;
+; X64-AVX2-LABEL: PR45265:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    movslq %edi, %rax
+; X64-AVX2-NEXT:    leaq (%rax,%rax,2), %rcx
+; X64-AVX2-NEXT:    movsbq 10(%rsi,%rcx,4), %rdx
+; X64-AVX2-NEXT:    shlq $16, %rdx
+; X64-AVX2-NEXT:    movzwl 8(%rsi,%rcx,4), %edi
+; X64-AVX2-NEXT:    orq %rdx, %rdi
+; X64-AVX2-NEXT:    movq (%rsi,%rcx,4), %rcx
+; X64-AVX2-NEXT:    shrdq $40, %rdi, %rcx
+; X64-AVX2-NEXT:    cmpq %rax, %rcx
+; X64-AVX2-NEXT:    jne .LBB44_1
+; X64-AVX2-NEXT:  # %bb.2:
+; X64-AVX2-NEXT:    jmp _Z3foov # TAILCALL
+; X64-AVX2-NEXT:  .LBB44_1:
+; X64-AVX2-NEXT:    retq
+  %3 = sext i32 %0 to i64
+  %4 = getelementptr inbounds %struct.S, %struct.S* %1, i64 %3
+  %5 = bitcast %struct.S* %4 to i88*
+  %6 = load i88, i88* %5, align 1
+  %7 = ashr i88 %6, 40
+  %8 = trunc i88 %7 to i64
+  %9 = icmp eq i64 %8, %3
+  br i1 %9, label %10, label %11
+
+10:
+  tail call void @_Z3foov()
+  br label %11
+
+11:
+  ret void
+}
+declare dso_local void @_Z3foov()