mirror of
https://github.com/RPCSX/llvm.git
synced 2024-12-11 05:35:11 +00:00
2e0d5f8432
be able to handle *ANY* alloca that is poked by loads and stores of bitcasts and GEPs with constant offsets. Before the code had a number of annoying limitations and caused it to miss cases such as storing into holes in structs and complex casts (as in bitfield-sroa) where we had unions of bitfields etc. This also handles a number of important cases that are exposed due to the ABI lowering stuff we do to pass stuff by value. One case that is pretty great is that we compile 2006-11-07-InvalidArrayPromote.ll into: define i32 @func(<4 x float> %v0, <4 x float> %v1) nounwind { %tmp10 = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %v1) %tmp105 = bitcast <4 x i32> %tmp10 to i128 %tmp1056 = zext i128 %tmp105 to i256 %tmp.upgrd.43 = lshr i256 %tmp1056, 96 %tmp.upgrd.44 = trunc i256 %tmp.upgrd.43 to i32 ret i32 %tmp.upgrd.44 } which turns into: _func: subl $28, %esp cvttps2dq %xmm1, %xmm0 movaps %xmm0, (%esp) movl 12(%esp), %eax addl $28, %esp ret Which is pretty good code all things considering :). One effect of this is that SROA will start generating arbitrary bitwidth integers that are a multiple of 8 bits. In the case above, we got a 256 bit integer, but the codegen guys assure me that it can handle the simple and/or/shift/zext stuff that we're doing on these operations. This addresses rdar://6532315 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@63469 91177308-0d34-0410-b5e6-96231b3b80d8
20 lines
1.0 KiB
LLVM
20 lines
1.0 KiB
LLVM
; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | not grep alloca
|
|
|
|
define i32 @func(<4 x float> %v0, <4 x float> %v1) nounwind {
|
|
%vsiidx = alloca [2 x <4 x i32>], align 16 ; <[2 x <4 x i32>]*> [#uses=3]
|
|
%tmp = call <4 x i32> @llvm.x86.sse2.cvttps2dq( <4 x float> %v0 ) ; <<4 x i32>> [#uses=2]
|
|
%tmp.upgrd.1 = bitcast <4 x i32> %tmp to <2 x i64> ; <<2 x i64>> [#uses=0]
|
|
%tmp.upgrd.2 = getelementptr [2 x <4 x i32>]* %vsiidx, i32 0, i32 0 ; <<4 x i32>*> [#uses=1]
|
|
store <4 x i32> %tmp, <4 x i32>* %tmp.upgrd.2
|
|
%tmp10 = call <4 x i32> @llvm.x86.sse2.cvttps2dq( <4 x float> %v1 ) ; <<4 x i32>> [#uses=2]
|
|
%tmp10.upgrd.3 = bitcast <4 x i32> %tmp10 to <2 x i64> ; <<2 x i64>> [#uses=0]
|
|
%tmp14 = getelementptr [2 x <4 x i32>]* %vsiidx, i32 0, i32 1 ; <<4 x i32>*> [#uses=1]
|
|
store <4 x i32> %tmp10, <4 x i32>* %tmp14
|
|
%tmp15 = getelementptr [2 x <4 x i32>]* %vsiidx, i32 0, i32 0, i32 4 ; <i32*> [#uses=1]
|
|
%tmp.upgrd.4 = load i32* %tmp15 ; <i32> [#uses=1]
|
|
ret i32 %tmp.upgrd.4
|
|
}
|
|
|
|
declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>)
|
|
|