[EarlyCSE] Fix handling of target memory intrinsics for CSE'ing loads.

Summary:
Some target intrinsics can access multiple elements, using the pointer as a
base address (e.g. AArch64 ld4). When trying to CSE such instructions,
it must be checked the available value comes from a compatible instruction
because the pointer is not enough to discriminate whether the value is
correct.

Reviewers: ssijaric

Subscribers: mcrosier, llvm-commits, aemerson

Differential Revision: http://reviews.llvm.org/D13475

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@249523 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Arnaud A. de Grandmaison 2015-10-07 07:41:29 +00:00
parent 6d30abd62e
commit c87636435d
2 changed files with 41 additions and 14 deletions

View File

@ -290,12 +290,19 @@ public:
/// current generation count. The current generation count is incremented
/// after every possibly writing memory operation, which ensures that we only
/// CSE loads with other loads that have no intervening store.
typedef RecyclingAllocator<
BumpPtrAllocator,
ScopedHashTableVal<Value *, std::pair<Value *, unsigned>>>
struct LoadValue {
Value *data;
unsigned generation;
int matchingId;
LoadValue() : data(nullptr), generation(0), matchingId(-1) {}
LoadValue(Value *data, unsigned generation, unsigned matchingId)
: data(data), generation(generation), matchingId(matchingId) {}
};
typedef RecyclingAllocator<BumpPtrAllocator,
ScopedHashTableVal<Value *, LoadValue>>
LoadMapAllocator;
typedef ScopedHashTable<Value *, std::pair<Value *, unsigned>,
DenseMapInfo<Value *>, LoadMapAllocator> LoadHTType;
typedef ScopedHashTable<Value *, LoadValue, DenseMapInfo<Value *>,
LoadMapAllocator> LoadHTType;
LoadHTType AvailableLoads;
/// \brief A scoped hash table of the current values of read-only call
@ -560,13 +567,13 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// If we have an available version of this load, and if it is the right
// generation, replace this instruction.
std::pair<Value *, unsigned> InVal =
AvailableLoads.lookup(MemInst.getPtr());
if (InVal.first != nullptr && InVal.second == CurrentGeneration) {
Value *Op = getOrCreateResult(InVal.first, Inst->getType());
LoadValue InVal = AvailableLoads.lookup(MemInst.getPtr());
if (InVal.data != nullptr && InVal.generation == CurrentGeneration &&
InVal.matchingId == MemInst.getMatchingId()) {
Value *Op = getOrCreateResult(InVal.data, Inst->getType());
if (Op != nullptr) {
DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst
<< " to: " << *InVal.first << '\n');
<< " to: " << *InVal.data << '\n');
if (!Inst->use_empty())
Inst->replaceAllUsesWith(Op);
Inst->eraseFromParent();
@ -577,8 +584,9 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
}
// Otherwise, remember that we have this instruction.
AvailableLoads.insert(MemInst.getPtr(), std::pair<Value *, unsigned>(
Inst, CurrentGeneration));
AvailableLoads.insert(
MemInst.getPtr(),
LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId()));
LastStore = nullptr;
continue;
}
@ -652,8 +660,9 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// version of the pointer. It is safe to forward from volatile stores
// to non-volatile loads, so we don't have to check for volatility of
// the store.
AvailableLoads.insert(MemInst.getPtr(), std::pair<Value *, unsigned>(
Inst, CurrentGeneration));
AvailableLoads.insert(
MemInst.getPtr(),
LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId()));
// Remember that this was the last store we saw for DSE.
if (!MemInst.isVolatile())

View File

@ -0,0 +1,18 @@
; RUN: opt -S -early-cse < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
target triple = "aarch64--linux-gnu"
declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>*)
; Although the store and the ld4 are using the same pointer, the
; data can not be reused because ld4 accesses multiple elements.
define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @foo() {
entry:
store <4 x i16> undef, <4 x i16>* undef, align 8
%0 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* undef)
ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %0
; CHECK-LABEL: @foo(
; CHECK: store
; CHECK-NEXT: call
; CHECK-NEXT: ret
}