mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-29 22:50:47 +00:00
Do some simple copy propagation through integer loads and stores when promoting
vector types. This helps a lot with inlined functions when using the ARM soft float ABI. Fixes <rdar://problem/9184212>. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@128453 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
ad7d8a598b
commit
9827b78b51
@ -252,7 +252,7 @@ public:
|
||||
|
||||
private:
|
||||
bool CanConvertToScalar(Value *V, uint64_t Offset);
|
||||
void MergeInType(const Type *In, uint64_t Offset);
|
||||
void MergeInType(const Type *In, uint64_t Offset, bool IsLoadOrStore);
|
||||
bool MergeInVectorType(const VectorType *VInTy, uint64_t Offset);
|
||||
void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset);
|
||||
|
||||
@ -315,7 +315,8 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
|
||||
/// large) integer type with extract and insert operations where the loads
|
||||
/// and stores would mutate the memory. We mark this by setting VectorTy
|
||||
/// to VoidTy.
|
||||
void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) {
|
||||
void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset,
|
||||
bool IsLoadOrStore) {
|
||||
// If we already decided to turn this into a blob of integer memory, there is
|
||||
// nothing to be done.
|
||||
if (VectorTy && VectorTy->isVoidTy())
|
||||
@ -331,10 +332,14 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) {
|
||||
} else if (In->isFloatTy() || In->isDoubleTy() ||
|
||||
(In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 &&
|
||||
isPowerOf2_32(In->getPrimitiveSizeInBits()))) {
|
||||
// Full width accesses can be ignored, because they can always be turned
|
||||
// into bitcasts.
|
||||
unsigned EltSize = In->getPrimitiveSizeInBits()/8;
|
||||
if (IsLoadOrStore && EltSize == AllocaSize)
|
||||
return;
|
||||
// If we're accessing something that could be an element of a vector, see
|
||||
// if the implied vector agrees with what we already have and if Offset is
|
||||
// compatible with it.
|
||||
unsigned EltSize = In->getPrimitiveSizeInBits()/8;
|
||||
if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 &&
|
||||
(VectorTy == 0 ||
|
||||
cast<VectorType>(VectorTy)->getElementType()
|
||||
@ -442,7 +447,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
|
||||
if (LI->getType()->isX86_MMXTy())
|
||||
return false;
|
||||
HadNonMemTransferAccess = true;
|
||||
MergeInType(LI->getType(), Offset);
|
||||
MergeInType(LI->getType(), Offset, true);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -453,7 +458,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
|
||||
if (SI->getOperand(0)->getType()->isX86_MMXTy())
|
||||
return false;
|
||||
HadNonMemTransferAccess = true;
|
||||
MergeInType(SI->getOperand(0)->getType(), Offset);
|
||||
MergeInType(SI->getOperand(0)->getType(), Offset, true);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -691,11 +696,11 @@ ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
|
||||
// If the result alloca is a vector type, this is either an element
|
||||
// access or a bitcast to another vector type of the same size.
|
||||
if (const VectorType *VTy = dyn_cast<VectorType>(FromVal->getType())) {
|
||||
if (ToType->isVectorTy()) {
|
||||
unsigned ToTypeSize = TD.getTypeAllocSize(ToType);
|
||||
if (ToTypeSize == AllocaSize)
|
||||
return Builder.CreateBitCast(FromVal, ToType, "tmp");
|
||||
unsigned ToTypeSize = TD.getTypeAllocSize(ToType);
|
||||
if (ToTypeSize == AllocaSize)
|
||||
return Builder.CreateBitCast(FromVal, ToType, "tmp");
|
||||
|
||||
if (ToType->isVectorTy()) {
|
||||
assert(isPowerOf2_64(AllocaSize / ToTypeSize) &&
|
||||
"Partial vector access of an alloca must have a power-of-2 size "
|
||||
"ratio.");
|
||||
|
53
test/Transforms/ScalarRepl/inline-vector.ll
Normal file
53
test/Transforms/ScalarRepl/inline-vector.ll
Normal file
@ -0,0 +1,53 @@
|
||||
; RUN: opt < %s -scalarrepl -S | FileCheck %s
|
||||
; RUN: opt < %s -scalarrepl-ssa -S | FileCheck %s
|
||||
target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
|
||||
target triple = "thumbv7-apple-darwin10.0.0"
|
||||
|
||||
%struct.Vector4 = type { float, float, float, float }
|
||||
@f.vector = internal constant %struct.Vector4 { float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 }, align 16
|
||||
|
||||
; CHECK: define void @f
|
||||
; CHECK-NOT: alloca
|
||||
; CHECK: phi <4 x float>
|
||||
|
||||
define void @f() nounwind ssp {
|
||||
entry:
|
||||
%i = alloca i32, align 4
|
||||
%vector = alloca %struct.Vector4, align 16
|
||||
%agg.tmp = alloca %struct.Vector4, align 16
|
||||
%tmp = bitcast %struct.Vector4* %vector to i8*
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* bitcast (%struct.Vector4* @f.vector to i8*), i32 16, i32 16, i1 false)
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.body, %entry
|
||||
%storemerge = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
||||
store i32 %storemerge, i32* %i, align 4
|
||||
%cmp = icmp slt i32 %storemerge, 1000000
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
|
||||
for.body: ; preds = %for.cond
|
||||
%tmp2 = bitcast %struct.Vector4* %agg.tmp to i8*
|
||||
%tmp3 = bitcast %struct.Vector4* %vector to i8*
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* %tmp3, i32 16, i32 16, i1 false)
|
||||
%0 = bitcast %struct.Vector4* %agg.tmp to [2 x i64]*
|
||||
%1 = load [2 x i64]* %0, align 16
|
||||
%tmp2.i = extractvalue [2 x i64] %1, 0
|
||||
%tmp3.i = zext i64 %tmp2.i to i128
|
||||
%tmp10.i = bitcast i128 %tmp3.i to <4 x float>
|
||||
%sub.i.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %tmp10.i
|
||||
%2 = bitcast %struct.Vector4* %vector to <4 x float>*
|
||||
store <4 x float> %sub.i.i, <4 x float>* %2, align 16
|
||||
%tmp4 = load i32* %i, align 4
|
||||
%inc = add nsw i32 %tmp4, 1
|
||||
br label %for.cond
|
||||
|
||||
for.end: ; preds = %for.cond
|
||||
%x = getelementptr inbounds %struct.Vector4* %vector, i32 0, i32 0
|
||||
%tmp5 = load float* %x, align 16
|
||||
%conv = fpext float %tmp5 to double
|
||||
%call = call i32 (...)* @printf(double %conv) nounwind
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
|
||||
declare i32 @printf(...)
|
@ -94,7 +94,7 @@ define i64 @test6(<2 x float> %X) {
|
||||
%tmp = load i64* %P
|
||||
ret i64 %tmp
|
||||
; CHECK: @test6
|
||||
; CHECK: bitcast <2 x float> %X to <1 x i64>
|
||||
; CHECK: bitcast <2 x float> %X to i64
|
||||
; CHECK: ret i64
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user