When combining consecutive stores allow loads in between the stores, if the loads do not alias.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@168832 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Nadav Rotem 2012-11-29 00:00:08 +00:00
parent bf2ad736c7
commit 90e11dc8ad
2 changed files with 113 additions and 3 deletions

View File

@ -291,6 +291,10 @@ namespace {
unsigned SrcValueAlign2,
const MDNode *TBAAInfo2) const;
/// isAlias - Return true if there is any possibility that the two addresses
/// overlap.
bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1);
/// FindAliasInfo - Extracts the relevant alias information from the memory
/// node. Returns true if the operand was a load.
bool FindAliasInfo(SDNode *N,
@ -7552,7 +7556,14 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
if (BasePtr.first.getOpcode() == ISD::UNDEF)
return false;
// Save the LoadSDNodes that we find in the chain.
// We need to make sure that these nodes do not interfere with
// any of the store nodes.
SmallVector<LSBaseSDNode*, 8> AliasLoadNodes;
// Save the StoreSDNodes that we find in the chain.
SmallVector<MemOpLink, 8> StoreNodes;
// Walk up the chain and look for nodes with offsets from the same
// base pointer. Stop when reaching an instruction with a different kind
// or instruction which has a different base pointer.
@ -7596,8 +7607,26 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
// We found a potential memory operand to merge.
StoreNodes.push_back(MemOpLink(Index, Ptr.second, Seq++));
// Move up the chain to the next memory operation.
Index = dyn_cast<StoreSDNode>(Index->getChain().getNode());
// Find the next memory operand in the chain. If the next operand in the
// chain is a store then move up and continue the scan with the next
// memory operand. If the next operand is a load save it and use alias
// information to check if it interferes with anything.
SDNode *NextInChain = Index->getChain().getNode();
while (1) {
if (isa<StoreSDNode>(NextInChain)) {
// We found a store node. Use it for the next iteration.
Index = cast<StoreSDNode>(NextInChain);
break;
} else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
// Save the load node for later. Continue the scan.
AliasLoadNodes.push_back(Ldn);
NextInChain = Ldn->getChain().getNode();
continue;
} else {
Index = NULL;
break;
}
}
}
// Check if there is anything to merge.
@ -7612,11 +7641,23 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
// store memory address.
unsigned LastConsecutiveStore = 0;
int64_t StartAddress = StoreNodes[0].OffsetFromBase;
for (unsigned i=1; i<StoreNodes.size(); ++i) {
for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
if (CurrAddress - StartAddress != (ElementSizeBytes * i))
break;
bool Alias = false;
// Check if this store interferes with any of the loads that we found.
for (unsigned ld = 0, lde = AliasLoadNodes.size(); ld < lde; ++ld)
if (isAlias(AliasLoadNodes[ld], StoreNodes[i].MemNode)) {
Alias = true;
break;
}
// We found a load that alias with this store. Stop the sequence.
if (Alias)
break;
// Mark this node as useful.
LastConsecutiveStore = i;
}
@ -9680,6 +9721,23 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
return true;
}
bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) {
SDValue Ptr0, Ptr1;
int64_t Size0, Size1;
const Value *SrcValue0, *SrcValue1;
int SrcValueOffset0, SrcValueOffset1;
unsigned SrcValueAlign0, SrcValueAlign1;
const MDNode *SrcTBAAInfo0, *SrcTBAAInfo1;
FindAliasInfo(Op0, Ptr0, Size0, SrcValue0, SrcValueOffset0,
SrcValueAlign0, SrcTBAAInfo0);
FindAliasInfo(Op1, Ptr1, Size1, SrcValue1, SrcValueOffset1,
SrcValueAlign1, SrcTBAAInfo1);
return isAlias(Ptr0, Size0, SrcValue0, SrcValueOffset0,
SrcValueAlign0, SrcTBAAInfo0,
Ptr1, Size1, SrcValue1, SrcValueOffset1,
SrcValueAlign1, SrcTBAAInfo1);
}
/// FindAliasInfo - Extracts the relevant alias information from the memory
/// node. Returns true if the operand was a load.
bool DAGCombiner::FindAliasInfo(SDNode *N,

View File

@ -0,0 +1,52 @@
; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mtriple=x86_64-pc-win64 | FileCheck %s
; CHECK: merge_stores_can
; CHECK: callq foo
; CHECK-NEXT: xorps %xmm0, %xmm0
; CHECK-NEXT: movups %xmm0
; CHECK: callq foo
; CHECK: ret
declare i32 @foo([10 x i32]* )
define i32 @merge_stores_can() nounwind ssp {
%object1 = alloca [10 x i32]
%ret0 = call i32 @foo([10 x i32]* %object1) nounwind
%O1_1 = getelementptr [10 x i32]* %object1, i64 0, i32 1
%O1_2 = getelementptr [10 x i32]* %object1, i64 0, i32 2
%O1_3 = getelementptr [10 x i32]* %object1, i64 0, i32 3
%O1_4 = getelementptr [10 x i32]* %object1, i64 0, i32 4
%ld_ptr = getelementptr [10 x i32]* %object1, i64 0, i32 9
store i32 0, i32* %O1_1
store i32 0, i32* %O1_2
%ret = load i32* %ld_ptr ; <--- does not alias.
store i32 0, i32* %O1_3
store i32 0, i32* %O1_4
%ret1 = call i32 @foo([10 x i32]* %object1) nounwind
ret i32 %ret
}
; CHECK: merge_stores_cant
; CHECK-NOT: xorps %xmm0, %xmm0
; CHECK-NOT: movups %xmm0
; CHECK: ret
define i32 @merge_stores_cant([10 x i32]* %in0, [10 x i32]* %in1) nounwind ssp {
%O1_1 = getelementptr [10 x i32]* %in1, i64 0, i32 1
%O1_2 = getelementptr [10 x i32]* %in1, i64 0, i32 2
%O1_3 = getelementptr [10 x i32]* %in1, i64 0, i32 3
%O1_4 = getelementptr [10 x i32]* %in1, i64 0, i32 4
%ld_ptr = getelementptr [10 x i32]* %in0, i64 0, i32 2
store i32 0, i32* %O1_1
store i32 0, i32* %O1_2
%ret = load i32* %ld_ptr ; <--- may alias
store i32 0, i32* %O1_3
store i32 0, i32* %O1_4
ret i32 %ret
}