diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 46aaee4e793..0610844fac4 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -88,8 +88,19 @@ namespace { /// isMemCpyDst - This is true if this aggregate is memcpy'd into. bool isMemCpyDst : 1; + /// hasSubelementAccess - This is true if a subelement of the alloca is + /// ever accessed, or false if the alloca is only accessed with mem + /// intrinsics or load/store that only access the entire alloca at once. + bool hasSubelementAccess : 1; + + /// hasALoadOrStore - This is true if there are any loads or stores to it. + /// The alloca may just be accessed with memcpy, for example, which would + /// not set this. + bool hasALoadOrStore : 1; + AllocaInfo() - : isUnsafe(false), isMemCpySrc(false), isMemCpyDst(false) {} + : isUnsafe(false), isMemCpySrc(false), isMemCpyDst(false), + hasSubelementAccess(false), hasALoadOrStore(false) {} }; unsigned SRThreshold; @@ -1103,6 +1114,7 @@ void SROA::isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, const Type *LIType = LI->getType(); isSafeMemAccess(AI, Offset, TD->getTypeAllocSize(LIType), LIType, false, Info); + Info.hasALoadOrStore = true; } else MarkUnsafe(Info); } else if (StoreInst *SI = dyn_cast(User)) { @@ -1111,6 +1123,7 @@ void SROA::isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, const Type *SIType = SI->getOperand(0)->getType(); isSafeMemAccess(AI, Offset, TD->getTypeAllocSize(SIType), SIType, true, Info); + Info.hasALoadOrStore = true; } else MarkUnsafe(Info); } else { @@ -1217,13 +1230,17 @@ void SROA::isSafeMemAccess(AllocaInst *AI, uint64_t Offset, uint64_t MemSize, // This is also safe for references using a type that is compatible with // the type of the alloca, so that loads/stores can be rewritten using // insertvalue/extractvalue. - if (isCompatibleAggregate(MemOpType, AI->getAllocatedType())) + if (isCompatibleAggregate(MemOpType, AI->getAllocatedType())) { + Info.hasSubelementAccess = true; return; + } } // Check if the offset/size correspond to a component within the alloca type. const Type *T = AI->getAllocatedType(); - if (TypeHasComponent(T, Offset, MemSize)) + if (TypeHasComponent(T, Offset, MemSize)) { + Info.hasSubelementAccess = true; return; + } return MarkUnsafe(Info); } @@ -1851,6 +1868,19 @@ bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) { HasPadding(AI->getAllocatedType(), *TD)) return false; + // If the alloca is never has an access to just *part* of it, but is accessed + // with loads and stores, then we should use ConvertToScalarInfo to promote + // the alloca instead of promoting each piece at a time and inserting fission + // and fusion code. + if (!Info.hasSubelementAccess && Info.hasALoadOrStore) { + // If the struct/array just has one element, use basic SRoA. + if (const StructType *ST = dyn_cast(AI->getAllocatedType())) { + if (ST->getNumElements() > 1) return false; + } else { + if (cast(AI->getAllocatedType())->getNumElements() > 1) + return false; + } + } return true; } diff --git a/test/Transforms/ScalarRepl/basictest.ll b/test/Transforms/ScalarRepl/basictest.ll index a26b62d0ad7..9676873c30c 100644 --- a/test/Transforms/ScalarRepl/basictest.ll +++ b/test/Transforms/ScalarRepl/basictest.ll @@ -1,11 +1,30 @@ -; RUN: opt < %s -scalarrepl -mem2reg -S | not grep alloca +; RUN: opt < %s -scalarrepl -S | FileCheck %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -define i32 @test() { +define i32 @test1() { %X = alloca { i32, float } ; <{ i32, float }*> [#uses=1] %Y = getelementptr { i32, float }* %X, i64 0, i32 0 ; [#uses=2] store i32 0, i32* %Y %Z = load i32* %Y ; [#uses=1] ret i32 %Z +; CHECK: @test1 +; CHECK-NOT: alloca +; CHECK: ret i32 0 +} + +; PR8980 +define i64 @test2(i64 %X) { + %A = alloca [8 x i8] + %B = bitcast [8 x i8]* %A to i64* + + store i64 %X, i64* %B + br label %L2 + +L2: + %Z = load i64* %B ; [#uses=1] + ret i64 %Z +; CHECK: @test2 +; CHECK-NOT: alloca +; CHECK: ret i64 %X }