Fix a bug in ReduceLoadWidth that wasn't handling extending

loads properly. We miscompiled the testcase into: _test: ## @test movl $128, (%rdi) movzbl 1(%rdi), %eax ret Now we get a proper: _test: ## @test movl $128, (%rdi) movsbl (%rdi), %eax movzbl %ah, %eax ret This fixes PR8757. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@122392 91177308-0d34-0410-b5e6-96231b3b80d8
2025-01-22 18:35:45 +00:00 · 2010-12-22 08:02:57 +00:00 · 2010-12-22 08:02:57 +00:00 · cbf68dfbc0
commit cbf68dfbc0
parent 7a2a7faf9c
2 changed files with 25 additions and 4 deletions
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@ -4241,12 +4241,15 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
          return SDValue();
      }

+      // At this point, we must have a load or else we can't do the transform.
+      if (!isa<LoadSDNode>(N0)) return SDValue();
+      
      // If the shift amount is larger than the input type then we're not
      // accessing any of the loaded bytes.  If the load was a zextload/extload
      // then the result of the shift+trunc is zero/undef (handled elsewhere).
      // If the load was a sextload then the result is a splat of the sign bit
      // of the extended byte.  This is not worth optimizing for.
-      if (ShAmt >= VT.getSizeInBits())
+      if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
        return SDValue();
    }
  }
--- a/test/CodeGen/X86/narrow-shl-load.ll
+++ b/test/CodeGen/X86/narrow-shl-load.ll
@ -6,7 +6,7 @@ target triple = "x86_64-pc-linux-gnu"
 ; DAGCombiner should fold this code in finite time.
 ; rdar://8606584

-define void @D() nounwind readnone {
+define void @test1() nounwind readnone {
 bb.nph:
  br label %while.cond

@ -33,10 +33,10 @@ while.end:                                        ; preds = %while.cond

 ; DAGCombiner shouldn't fold the sdiv (ashr) away.
 ; rdar://8636812
-; CHECK: main:
+; CHECK: test2:
 ; CHECK:   sarl

-define i32 @main() nounwind {
+define i32 @test2() nounwind {
 entry:
  %i = alloca i32, align 4
  %j = alloca i8, align 1
@ -63,3 +63,21 @@ if.end:                                           ; preds = %entry
 declare void @abort() noreturn

 declare void @exit(i32) noreturn
+
+; DAG Combiner can't fold this into a load of the 1'th byte.
+; PR8757
+define i32 @test3(i32 *%P) nounwind ssp {
+  volatile store i32 128, i32* %P
+  %tmp4.pre = load i32* %P
+  %phitmp = trunc i32 %tmp4.pre to i16
+  %phitmp13 = shl i16 %phitmp, 8
+  %phitmp14 = ashr i16 %phitmp13, 8
+  %phitmp15 = lshr i16 %phitmp14, 8
+  %phitmp16 = zext i16 %phitmp15 to i32
+  ret i32 %phitmp16
+  
+; CHECK: movl	$128, (%rdi)
+; CHECK-NEXT: movsbl	(%rdi), %eax
+; CHECK-NEXT: movzbl	%ah, %eax
+; CHECK-NEXT: ret
+}