Fix a bug in ReduceLoadWidth that wasn't handling extending

loads properly.  We miscompiled the testcase into:

_test:                                  ## @test
	movl	$128, (%rdi)
	movzbl	1(%rdi), %eax
	ret

Now we get a proper:

_test:                                  ## @test
	movl	$128, (%rdi)
	movsbl	(%rdi), %eax
	movzbl	%ah, %eax
	ret

This fixes PR8757.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@122392 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chris Lattner 2010-12-22 08:02:57 +00:00
parent 7a2a7faf9c
commit cbf68dfbc0
2 changed files with 25 additions and 4 deletions

View File

@ -4241,12 +4241,15 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
return SDValue();
}
// At this point, we must have a load or else we can't do the transform.
if (!isa<LoadSDNode>(N0)) return SDValue();
// If the shift amount is larger than the input type then we're not
// accessing any of the loaded bytes. If the load was a zextload/extload
// then the result of the shift+trunc is zero/undef (handled elsewhere).
// If the load was a sextload then the result is a splat of the sign bit
// of the extended byte. This is not worth optimizing for.
if (ShAmt >= VT.getSizeInBits())
if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
return SDValue();
}
}

View File

@ -6,7 +6,7 @@ target triple = "x86_64-pc-linux-gnu"
; DAGCombiner should fold this code in finite time.
; rdar://8606584
define void @D() nounwind readnone {
define void @test1() nounwind readnone {
bb.nph:
br label %while.cond
@ -33,10 +33,10 @@ while.end: ; preds = %while.cond
; DAGCombiner shouldn't fold the sdiv (ashr) away.
; rdar://8636812
; CHECK: main:
; CHECK: test2:
; CHECK: sarl
define i32 @main() nounwind {
define i32 @test2() nounwind {
entry:
%i = alloca i32, align 4
%j = alloca i8, align 1
@ -63,3 +63,21 @@ if.end: ; preds = %entry
declare void @abort() noreturn
declare void @exit(i32) noreturn
; DAG Combiner can't fold this into a load of the 1'th byte.
; PR8757
define i32 @test3(i32 *%P) nounwind ssp {
volatile store i32 128, i32* %P
%tmp4.pre = load i32* %P
%phitmp = trunc i32 %tmp4.pre to i16
%phitmp13 = shl i16 %phitmp, 8
%phitmp14 = ashr i16 %phitmp13, 8
%phitmp15 = lshr i16 %phitmp14, 8
%phitmp16 = zext i16 %phitmp15 to i32
ret i32 %phitmp16
; CHECK: movl $128, (%rdi)
; CHECK-NEXT: movsbl (%rdi), %eax
; CHECK-NEXT: movzbl %ah, %eax
; CHECK-NEXT: ret
}