llvm-mirror/test/Transforms/ScalarRepl/2006-11-07-InvalidArrayPromote.ll
Chris Lattner 235913be77 Simplify and generalize the SROA "convert to scalar" transformation to
be able to handle *ANY* alloca that is poked by loads and stores of 
bitcasts and GEPs with constant offsets.  Before the code had a number
of annoying limitations and caused it to miss cases such as storing into
holes in structs and complex casts (as in bitfield-sroa) where we had
unions of bitfields etc.  This also handles a number of important cases
that are exposed due to the ABI lowering stuff we do to pass stuff by
value.

One case that is pretty great is that we compile 
2006-11-07-InvalidArrayPromote.ll into:

define i32 @func(<4 x float> %v0, <4 x float> %v1) nounwind {
	%tmp10 = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %v1)
	%tmp105 = bitcast <4 x i32> %tmp10 to i128
	%tmp1056 = zext i128 %tmp105 to i256	
	%tmp.upgrd.43 = lshr i256 %tmp1056, 96
	%tmp.upgrd.44 = trunc i256 %tmp.upgrd.43 to i32	
	ret i32 %tmp.upgrd.44
}

which turns into:

_func:
	subl	$28, %esp
	cvttps2dq	%xmm1, %xmm0
	movaps	%xmm0, (%esp)
	movl	12(%esp), %eax
	addl	$28, %esp
	ret

Which is pretty good code all things considering :).

One effect of this is that SROA will start generating arbitrary bitwidth 
integers that are a multiple of 8 bits.  In the case above, we got a 
256 bit integer, but the codegen guys assure me that it can handle the 
simple and/or/shift/zext stuff that we're doing on these operations.

This addresses rdar://6532315

llvm-svn: 63469
2009-01-31 02:28:54 +00:00

20 lines
1.0 KiB
LLVM

; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | not grep alloca
define i32 @func(<4 x float> %v0, <4 x float> %v1) nounwind {
%vsiidx = alloca [2 x <4 x i32>], align 16 ; <[2 x <4 x i32>]*> [#uses=3]
%tmp = call <4 x i32> @llvm.x86.sse2.cvttps2dq( <4 x float> %v0 ) ; <<4 x i32>> [#uses=2]
%tmp.upgrd.1 = bitcast <4 x i32> %tmp to <2 x i64> ; <<2 x i64>> [#uses=0]
%tmp.upgrd.2 = getelementptr [2 x <4 x i32>]* %vsiidx, i32 0, i32 0 ; <<4 x i32>*> [#uses=1]
store <4 x i32> %tmp, <4 x i32>* %tmp.upgrd.2
%tmp10 = call <4 x i32> @llvm.x86.sse2.cvttps2dq( <4 x float> %v1 ) ; <<4 x i32>> [#uses=2]
%tmp10.upgrd.3 = bitcast <4 x i32> %tmp10 to <2 x i64> ; <<2 x i64>> [#uses=0]
%tmp14 = getelementptr [2 x <4 x i32>]* %vsiidx, i32 0, i32 1 ; <<4 x i32>*> [#uses=1]
store <4 x i32> %tmp10, <4 x i32>* %tmp14
%tmp15 = getelementptr [2 x <4 x i32>]* %vsiidx, i32 0, i32 0, i32 4 ; <i32*> [#uses=1]
%tmp.upgrd.4 = load i32* %tmp15 ; <i32> [#uses=1]
ret i32 %tmp.upgrd.4
}
declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>)