Teach SROA rewriteVectorizedStoreInst to handle cases when the loaded value is narrower than the stored value. rdar://12713675

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@168227 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Evan Cheng 2012-11-17 00:05:06 +00:00
parent 784c5bb8b5
commit fd22019ec3
2 changed files with 67 additions and 33 deletions

View File

@ -2559,18 +2559,55 @@ private:
return NewPtr == &NewAI && !LI.isVolatile();
}
bool rewriteWideStoreInst(IRBuilder<> &IRB, StoreInst &SI, Type *ValueTy,
unsigned Size) {
assert(!SI.isVolatile());
assert(ValueTy->isIntegerTy() &&
"Only integer type loads and stores are split");
assert(ValueTy->getIntegerBitWidth() ==
TD.getTypeStoreSizeInBits(ValueTy) &&
"Non-byte-multiple bit width");
assert(ValueTy->getIntegerBitWidth() ==
TD.getTypeSizeInBits(OldAI.getAllocatedType()) &&
"Only alloca-wide stores can be split and recomposed");
IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8);
Value *V = extractInteger(TD, IRB, SI.getValueOperand(), NarrowTy,
BeginOffset, getName(".extract"));
StoreInst *NewSI;
bool IsConvertable = (BeginOffset - NewAllocaBeginOffset == 0) &&
canConvertValue(TD, NarrowTy, NewAllocaTy);
if (IsConvertable)
NewSI = IRB.CreateAlignedStore(convertValue(TD, IRB, V, NewAllocaTy),
&NewAI, NewAI.getAlignment());
else
NewSI = IRB.CreateAlignedStore(
V, getAdjustedAllocaPtr(IRB, NarrowTy->getPointerTo()),
getPartitionTypeAlign(NarrowTy));
(void)NewSI;
if (Pass.DeadSplitInsts.insert(&SI))
Pass.DeadInsts.push_back(&SI);
DEBUG(dbgs() << " to: " << *NewSI << "\n");
return IsConvertable;
}
bool rewriteVectorizedStoreInst(IRBuilder<> &IRB, StoreInst &SI,
Value *OldOp) {
Value *V = SI.getValueOperand();
if (V->getType() == ElementTy ||
Type *ValueTy = V->getType();
if (ValueTy == ElementTy ||
BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) {
if (V->getType() != ElementTy)
if (ValueTy != ElementTy)
V = convertValue(TD, IRB, V, ElementTy);
LoadInst *LI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
getName(".load"));
V = IRB.CreateInsertElement(LI, V, getIndex(IRB, BeginOffset),
getName(".insert"));
} else if (V->getType() != VecTy) {
} else if (ValueTy != VecTy) {
uint64_t Size = EndOffset - BeginOffset;
if (Size < TD.getTypeStoreSize(ValueTy))
return rewriteWideStoreInst(IRB, SI, ValueTy, Size);
V = convertValue(TD, IRB, V, VecTy);
}
StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
@ -2613,36 +2650,8 @@ private:
Type *ValueTy = SI.getValueOperand()->getType();
uint64_t Size = EndOffset - BeginOffset;
if (Size < TD.getTypeStoreSize(ValueTy)) {
assert(!SI.isVolatile());
assert(ValueTy->isIntegerTy() &&
"Only integer type loads and stores are split");
assert(ValueTy->getIntegerBitWidth() ==
TD.getTypeStoreSizeInBits(ValueTy) &&
"Non-byte-multiple bit width");
assert(ValueTy->getIntegerBitWidth() ==
TD.getTypeSizeInBits(OldAI.getAllocatedType()) &&
"Only alloca-wide stores can be split and recomposed");
IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8);
Value *V = extractInteger(TD, IRB, SI.getValueOperand(), NarrowTy,
BeginOffset, getName(".extract"));
StoreInst *NewSI;
bool IsConvertable = (BeginOffset - NewAllocaBeginOffset == 0) &&
canConvertValue(TD, NarrowTy, NewAllocaTy);
if (IsConvertable)
NewSI = IRB.CreateAlignedStore(convertValue(TD, IRB, V, NewAllocaTy),
&NewAI, NewAI.getAlignment());
else
NewSI = IRB.CreateAlignedStore(
V, getAdjustedAllocaPtr(IRB, NarrowTy->getPointerTo()),
getPartitionTypeAlign(NarrowTy));
(void)NewSI;
if (Pass.DeadSplitInsts.insert(&SI))
Pass.DeadInsts.push_back(&SI);
DEBUG(dbgs() << " to: " << *NewSI << "\n");
return IsConvertable;
}
if (Size < TD.getTypeStoreSize(ValueTy))
return rewriteWideStoreInst(IRB, SI, ValueTy, Size);
if (IntTy && ValueTy->isIntegerTy())
return rewriteIntegerStore(IRB, SI);

View File

@ -0,0 +1,25 @@
; RUN: opt < %s -sroa -S | FileCheck %s
; rdar://12713675
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
define <2 x i16> @test1(i64 %x) nounwind ssp {
; CHECK: @test1
entry:
%tmp = alloca i64, align 8
br i1 undef, label %bb1, label %bb2
; CHECK-NOT: alloca
bb1:
store i64 %x, i64* %tmp, align 8
; CHECK-NOT: store
%0 = bitcast i64* %tmp to <2 x i16>*
%1 = load <2 x i16>* %0, align 8
; CHECK-NOT: load
; CHECK: trunc i64 %x to i32
; CHECK: bitcast i32
ret <2 x i16> %1
bb2:
ret <2 x i16> < i16 0, i16 0 >
}