mirror of
https://github.com/RPCS3/llvm.git
synced 2024-11-29 06:30:30 +00:00
when we see a unaligned load from an insufficiently aligned global or
alloca, increase the alignment of the load, turning it into an aligned load. This allows us to compile: #include <xmmintrin.h> __m128i foo(__m128i x){ static const unsigned int c_0[4] = { 0, 0, 0, 0 }; __m128i v_Zero = _mm_loadu_si128((__m128i*)c_0); x = _mm_unpacklo_epi8(x, v_Zero); return x; } into: _foo: punpcklbw _c_0.5944, %xmm0 ret .data .lcomm _c_0.5944,16,4 # c_0.5944 instead of: _foo: movdqu _c_0.5944, %xmm1 punpcklbw %xmm1, %xmm0 ret .data .lcomm _c_0.5944,16,2 # c_0.5944 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@40971 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
a333b41af9
commit
f2369f2042
@ -7492,13 +7492,23 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// GetKnownAlignment - If the specified pointer has an alignment that we can
|
||||
/// determine, return it, otherwise return 0.
|
||||
static unsigned GetKnownAlignment(Value *V, TargetData *TD) {
|
||||
/// GetOrEnforceKnownAlignment - If the specified pointer has an alignment that
|
||||
/// we can determine, return it, otherwise return 0. If PrefAlign is specified,
|
||||
/// and it is more than the alignment of the ultimate object, see if we can
|
||||
/// increase the alignment of the ultimate object, making this check succeed.
|
||||
static unsigned GetOrEnforceKnownAlignment(Value *V, TargetData *TD,
|
||||
unsigned PrefAlign = 0) {
|
||||
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
|
||||
unsigned Align = GV->getAlignment();
|
||||
if (Align == 0 && TD)
|
||||
Align = TD->getPrefTypeAlignment(GV->getType()->getElementType());
|
||||
|
||||
// If there is a large requested alignment and we can, bump up the alignment
|
||||
// of the global.
|
||||
if (PrefAlign > Align && GV->hasInitializer()) {
|
||||
GV->setAlignment(PrefAlign);
|
||||
Align = PrefAlign;
|
||||
}
|
||||
return Align;
|
||||
} else if (AllocationInst *AI = dyn_cast<AllocationInst>(V)) {
|
||||
unsigned Align = AI->getAlignment();
|
||||
@ -7516,18 +7526,20 @@ static unsigned GetKnownAlignment(Value *V, TargetData *TD) {
|
||||
(unsigned)TD->getABITypeAlignment(Type::Int64Ty));
|
||||
}
|
||||
}
|
||||
|
||||
// If there is a requested alignment and if this is an alloca, round up. We
|
||||
// don't do this for malloc, because some systems can't respect the request.
|
||||
if (PrefAlign > Align && isa<AllocaInst>(AI)) {
|
||||
AI->setAlignment(PrefAlign);
|
||||
Align = PrefAlign;
|
||||
}
|
||||
return Align;
|
||||
} else if (isa<BitCastInst>(V) ||
|
||||
(isa<ConstantExpr>(V) &&
|
||||
cast<ConstantExpr>(V)->getOpcode() == Instruction::BitCast)) {
|
||||
User *CI = cast<User>(V);
|
||||
if (isa<PointerType>(CI->getOperand(0)->getType()))
|
||||
return GetKnownAlignment(CI->getOperand(0), TD);
|
||||
return 0;
|
||||
return GetOrEnforceKnownAlignment(cast<User>(V)->getOperand(0),
|
||||
TD, PrefAlign);
|
||||
} else if (User *GEPI = dyn_castGetElementPtr(V)) {
|
||||
unsigned BaseAlignment = GetKnownAlignment(GEPI->getOperand(0), TD);
|
||||
if (BaseAlignment == 0) return 0;
|
||||
|
||||
// If all indexes are zero, it is just the alignment of the base pointer.
|
||||
bool AllZeroOperands = true;
|
||||
for (unsigned i = 1, e = GEPI->getNumOperands(); i != e; ++i)
|
||||
@ -7536,9 +7548,15 @@ static unsigned GetKnownAlignment(Value *V, TargetData *TD) {
|
||||
AllZeroOperands = false;
|
||||
break;
|
||||
}
|
||||
if (AllZeroOperands)
|
||||
return BaseAlignment;
|
||||
|
||||
|
||||
if (AllZeroOperands) {
|
||||
// Treat this like a bitcast.
|
||||
return GetOrEnforceKnownAlignment(GEPI->getOperand(0), TD, PrefAlign);
|
||||
}
|
||||
|
||||
unsigned BaseAlignment = GetOrEnforceKnownAlignment(GEPI->getOperand(0),TD);
|
||||
if (BaseAlignment == 0) return 0;
|
||||
|
||||
// Otherwise, if the base alignment is >= the alignment we expect for the
|
||||
// base pointer type, then we know that the resultant pointer is aligned at
|
||||
// least as much as its type requires.
|
||||
@ -7608,15 +7626,15 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
||||
// If we can determine a pointer alignment that is bigger than currently
|
||||
// set, update the alignment.
|
||||
if (isa<MemCpyInst>(MI) || isa<MemMoveInst>(MI)) {
|
||||
unsigned Alignment1 = GetKnownAlignment(MI->getOperand(1), TD);
|
||||
unsigned Alignment2 = GetKnownAlignment(MI->getOperand(2), TD);
|
||||
unsigned Alignment1 = GetOrEnforceKnownAlignment(MI->getOperand(1), TD);
|
||||
unsigned Alignment2 = GetOrEnforceKnownAlignment(MI->getOperand(2), TD);
|
||||
unsigned Align = std::min(Alignment1, Alignment2);
|
||||
if (MI->getAlignment()->getZExtValue() < Align) {
|
||||
MI->setAlignment(ConstantInt::get(Type::Int32Ty, Align));
|
||||
Changed = true;
|
||||
}
|
||||
} else if (isa<MemSetInst>(MI)) {
|
||||
unsigned Alignment = GetKnownAlignment(MI->getDest(), TD);
|
||||
unsigned Alignment = GetOrEnforceKnownAlignment(MI->getDest(), TD);
|
||||
if (MI->getAlignment()->getZExtValue() < Alignment) {
|
||||
MI->setAlignment(ConstantInt::get(Type::Int32Ty, Alignment));
|
||||
Changed = true;
|
||||
@ -7634,7 +7652,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
||||
case Intrinsic::x86_sse2_loadu_dq:
|
||||
// Turn PPC lvx -> load if the pointer is known aligned.
|
||||
// Turn X86 loadups -> load if the pointer is known aligned.
|
||||
if (GetKnownAlignment(II->getOperand(1), TD) >= 16) {
|
||||
if (GetOrEnforceKnownAlignment(II->getOperand(1), TD, 16) >= 16) {
|
||||
Value *Ptr = InsertCastBefore(Instruction::BitCast, II->getOperand(1),
|
||||
PointerType::get(II->getType()), CI);
|
||||
return new LoadInst(Ptr);
|
||||
@ -7643,7 +7661,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
||||
case Intrinsic::ppc_altivec_stvx:
|
||||
case Intrinsic::ppc_altivec_stvxl:
|
||||
// Turn stvx -> store if the pointer is known aligned.
|
||||
if (GetKnownAlignment(II->getOperand(2), TD) >= 16) {
|
||||
if (GetOrEnforceKnownAlignment(II->getOperand(2), TD, 16) >= 16) {
|
||||
const Type *OpPtrTy = PointerType::get(II->getOperand(1)->getType());
|
||||
Value *Ptr = InsertCastBefore(Instruction::BitCast, II->getOperand(2),
|
||||
OpPtrTy, CI);
|
||||
@ -7655,7 +7673,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
||||
case Intrinsic::x86_sse2_storeu_dq:
|
||||
case Intrinsic::x86_sse2_storel_dq:
|
||||
// Turn X86 storeu -> store if the pointer is known aligned.
|
||||
if (GetKnownAlignment(II->getOperand(1), TD) >= 16) {
|
||||
if (GetOrEnforceKnownAlignment(II->getOperand(1), TD, 16) >= 16) {
|
||||
const Type *OpPtrTy = PointerType::get(II->getOperand(2)->getType());
|
||||
Value *Ptr = InsertCastBefore(Instruction::BitCast, II->getOperand(1),
|
||||
OpPtrTy, CI);
|
||||
@ -8768,7 +8786,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
|
||||
Value *Op = LI.getOperand(0);
|
||||
|
||||
// Attempt to improve the alignment.
|
||||
unsigned KnownAlign = GetKnownAlignment(Op, TD);
|
||||
unsigned KnownAlign = GetOrEnforceKnownAlignment(Op, TD);
|
||||
if (KnownAlign > LI.getAlignment())
|
||||
LI.setAlignment(KnownAlign);
|
||||
|
||||
@ -8968,7 +8986,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
|
||||
}
|
||||
|
||||
// Attempt to improve the alignment.
|
||||
unsigned KnownAlign = GetKnownAlignment(Ptr, TD);
|
||||
unsigned KnownAlign = GetOrEnforceKnownAlignment(Ptr, TD);
|
||||
if (KnownAlign > SI.getAlignment())
|
||||
SI.setAlignment(KnownAlign);
|
||||
|
||||
|
14
test/Transforms/InstCombine/align-inc.ll
Normal file
14
test/Transforms/InstCombine/align-inc.ll
Normal file
@ -0,0 +1,14 @@
|
||||
; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {GLOBAL.*align 16}
|
||||
; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {tmp = load}
|
||||
|
||||
@GLOBAL = internal constant [4 x i32] zeroinitializer
|
||||
|
||||
declare <16 x i8> @llvm.x86.sse2.loadu.dq(i8*)
|
||||
|
||||
|
||||
define <16 x i8> @foo(<2 x i64> %x) {
|
||||
entry:
|
||||
%tmp = tail call <16 x i8> @llvm.x86.sse2.loadu.dq( i8* bitcast ([4 x i32]* @GLOBAL to i8*) )
|
||||
ret <16 x i8> %tmp
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user