[LSV] Don't move stores across may-load instrs, and loosen restrictions on moving loads.

Summary:
Previously we wouldn't move loads/stores across instructions that had
side-effects, where that was defined as may-write or may-throw.  But
this is not sufficiently restrictive: Stores can't safely be moved
across instructions that may load.

This patch also adds a DEBUG check that all instructions in our chain
are either loads or stores.

Reviewers: asbirlea

Subscribers: llvm-commits, jholewinski, arsenm, mzolotukhin

Differential Revision: https://reviews.llvm.org/D22547

llvm-svn: 276171
This commit is contained in:
Justin Lebar 2016-07-20 20:07:37 +00:00
parent 2f14d630e7
commit d791dcafe4
2 changed files with 213 additions and 36 deletions

View File

@ -429,6 +429,18 @@ ArrayRef<Value *> Vectorizer::getVectorizablePrefix(ArrayRef<Value *> Chain) {
SmallVector<std::pair<Value *, unsigned>, 16> MemoryInstrs;
SmallVector<std::pair<Value *, unsigned>, 16> ChainInstrs;
bool IsLoadChain = isa<LoadInst>(Chain[0]);
DEBUG({
for (Value *V : Chain) {
if (IsLoadChain)
assert(isa<LoadInst>(V) &&
"All elements of Chain must be loads, or all must be stores.");
else
assert(isa<StoreInst>(V) &&
"All elements of Chain must be loads, or all must be stores.");
}
});
unsigned InstrIdx = 0;
for (Instruction &I : make_range(getBoundaryInstrs(Chain))) {
++InstrIdx;
@ -437,8 +449,12 @@ ArrayRef<Value *> Vectorizer::getVectorizablePrefix(ArrayRef<Value *> Chain) {
MemoryInstrs.push_back({&I, InstrIdx});
else
ChainInstrs.push_back({&I, InstrIdx});
} else if (I.mayHaveSideEffects()) {
DEBUG(dbgs() << "LSV: Found side-effecting operation: " << I << '\n');
} else if (IsLoadChain && (I.mayWriteToMemory() || I.mayThrow())) {
DEBUG(dbgs() << "LSV: Found may-write/throw operation: " << I << '\n');
break;
} else if (!IsLoadChain && (I.mayReadOrWriteMemory() || I.mayThrow())) {
DEBUG(dbgs() << "LSV: Found may-read/write/throw operation: " << I
<< '\n');
break;
}
}

View File

@ -1,48 +1,209 @@
; RUN: opt -mtriple=nvptx64-nvidia-cuda -load-store-vectorizer -S -o - %s | FileCheck %s
; If we have a chain of loads or stores with a side-effecting operation in the
; middle, we should still be able to merge the loads/stores that appear
; before/after the side-effecting op. We just can't merge *across* the
; side-effecting op.
; Check that the load/store vectorizer is willing to move loads/stores across
; intervening instructions only if it's safe.
;
; - Loads can be moved across instructions that don't write or throw.
; - Stores can only be moved across instructions which don't read, write, or
; throw.
declare void @fn() #0
declare void @fn()
declare void @fn_nounwind() #0
declare void @fn_nounwind_writeonly() #1
declare void @fn_nounwind_readonly() #2
declare void @fn_writeonly() #3
declare void @fn_readonly() #4
declare void @fn_readnone() #5
; CHECK-LABEL: @merge_stores
; CHECK: store <2 x i32> <i32 100, i32 101>
; CHECK-LABEL: @load_fn
; CHECK: load
; CHECK: call void @fn()
; CHECK: store <2 x i32> <i32 102, i32 103>
define void @merge_stores(i32* %out) #0 {
%out.gep.1 = getelementptr i32, i32* %out, i32 1
%out.gep.2 = getelementptr i32, i32* %out, i32 2
%out.gep.3 = getelementptr i32, i32* %out, i32 3
; CHECK: load
define void @load_fn(i32* %p) #0 {
%p.1 = getelementptr i32, i32* %p, i32 1
store i32 101, i32* %out.gep.1
store i32 100, i32* %out
%v0 = load i32, i32* %p
call void @fn()
store i32 102, i32* %out.gep.2
store i32 103, i32* %out.gep.3
%v1 = load i32, i32* %p.1
ret void
}
; CHECK-LABEL: @merge_loads
; CHECK: load <2 x i32>
; CHECK: call void @fn()
; CHECK: load <2 x i32>
define i32 @merge_loads(i32* %in) #0 {
%in.gep.1 = getelementptr i32, i32* %in, i32 1
%in.gep.2 = getelementptr i32, i32* %in, i32 2
%in.gep.3 = getelementptr i32, i32* %in, i32 3
; CHECK-LABEL: @load_fn_nounwind
; CHECK: load
; CHECK: call void @fn_nounwind()
; CHECK: load
define void @load_fn_nounwind(i32* %p) #0 {
%p.1 = getelementptr i32, i32* %p, i32 1
%v1 = load i32, i32* %in
%v2 = load i32, i32* %in.gep.1
call void @fn()
%v3 = load i32, i32* %in.gep.2
%v4 = load i32, i32* %in.gep.3
%sum1 = add i32 %v1, %v2
%sum2 = add i32 %sum1, %v3
%sum3 = add i32 %sum2, %v4
ret i32 %v4
%v0 = load i32, i32* %p
call void @fn_nounwind() #0
%v1 = load i32, i32* %p.1
ret void
}
; CHECK-LABEL: @load_fn_nounwind_writeonly
; CHECK: load
; CHECK: call void @fn_nounwind_writeonly()
; CHECK: load
define void @load_fn_nounwind_writeonly(i32* %p) #0 {
%p.1 = getelementptr i32, i32* %p, i32 1
%v0 = load i32, i32* %p
call void @fn_nounwind_writeonly() #1
%v1 = load i32, i32* %p.1
ret void
}
; CHECK-LABEL: @load_fn_nounwind_readonly
; CHECK-DAG: load <2 x i32>
; CHECK-DAG: call void @fn_nounwind_readonly()
define void @load_fn_nounwind_readonly(i32* %p) #0 {
%p.1 = getelementptr i32, i32* %p, i32 1
%v0 = load i32, i32* %p
call void @fn_nounwind_readonly() #2
%v1 = load i32, i32* %p.1
ret void
}
; CHECK-LABEL: @load_fn_readonly
; CHECK: load
; CHECK: call void @fn_readonly
; CHECK: load
define void @load_fn_readonly(i32* %p) #0 {
%p.1 = getelementptr i32, i32* %p, i32 1
%v0 = load i32, i32* %p
call void @fn_readonly() #4
%v1 = load i32, i32* %p.1
ret void
}
; CHECK-LABEL: @load_fn_writeonly
; CHECK: load
; CHECK: call void @fn_writeonly()
; CHECK: load
define void @load_fn_writeonly(i32* %p) #0 {
%p.1 = getelementptr i32, i32* %p, i32 1
%v0 = load i32, i32* %p
call void @fn_writeonly() #3
%v1 = load i32, i32* %p.1
ret void
}
; CHECK-LABEL: @load_fn_readnone
; CHECK-DAG: load <2 x i32>
; CHECK-DAG: call void @fn_readnone()
define void @load_fn_readnone(i32* %p) #0 {
%p.1 = getelementptr i32, i32* %p, i32 1
%v0 = load i32, i32* %p
call void @fn_readnone() #5
%v1 = load i32, i32* %p.1
ret void
}
; ------------------------------------------------
; Same tests, but now for stores instead of loads.
; ------------------------------------------------
; CHECK-LABEL: @store_fn
; CHECK: store
; CHECK: call void @fn()
; CHECK: store
define void @store_fn(i32* %p) #0 {
%p.1 = getelementptr i32, i32* %p, i32 1
store i32 0, i32* %p
call void @fn()
store i32 0, i32* %p.1
ret void
}
; CHECK-LABEL: @store_fn_nounwind
; CHECK: store
; CHECK: call void @fn_nounwind()
; CHECK: store
define void @store_fn_nounwind(i32* %p) #0 {
%p.1 = getelementptr i32, i32* %p, i32 1
store i32 0, i32* %p
call void @fn_nounwind() #0
store i32 0, i32* %p.1
ret void
}
; CHECK-LABEL: @store_fn_nounwind_writeonly
; CHECK: store
; CHECK: call void @fn_nounwind_writeonly()
; CHECK: store
define void @store_fn_nounwind_writeonly(i32* %p) #0 {
%p.1 = getelementptr i32, i32* %p, i32 1
store i32 0, i32* %p
call void @fn_nounwind_writeonly() #1
store i32 0, i32* %p.1
ret void
}
; CHECK-LABEL: @store_fn_nounwind_readonly
; CHECK: store
; CHECK: call void @fn_nounwind_readonly()
; CHECK: store
define void @store_fn_nounwind_readonly(i32* %p) #0 {
%p.1 = getelementptr i32, i32* %p, i32 1
store i32 0, i32* %p
call void @fn_nounwind_readonly() #2
store i32 0, i32* %p.1
ret void
}
; CHECK-LABEL: @store_fn_readonly
; CHECK: store
; CHECK: call void @fn_readonly
; CHECK: store
define void @store_fn_readonly(i32* %p) #0 {
%p.1 = getelementptr i32, i32* %p, i32 1
store i32 0, i32* %p
call void @fn_readonly() #4
store i32 0, i32* %p.1
ret void
}
; CHECK-LABEL: @store_fn_writeonly
; CHECK: store
; CHECK: call void @fn_writeonly()
; CHECK: store
define void @store_fn_writeonly(i32* %p) #0 {
%p.1 = getelementptr i32, i32* %p, i32 1
store i32 0, i32* %p
call void @fn_writeonly() #3
store i32 0, i32* %p.1
ret void
}
; This is the only store idiom we can vectorize.
; CHECK-LABEL: @store_fn_readnone
; CHECK-DAG: store <2 x i32>
; CHECK-DAG: call void @fn_readnone()
define void @store_fn_readnone(i32* %p) #0 {
%p.1 = getelementptr i32, i32* %p, i32 1
store i32 0, i32* %p
call void @fn_readnone() #5
store i32 0, i32* %p.1
ret void
}
attributes #0 = { nounwind }
attributes #1 = { nounwind writeonly }
attributes #2 = { nounwind readonly }
attributes #3 = { writeonly }
attributes #4 = { readonly }
; readnone implies nounwind, so no need to test separately
attributes #5 = { nounwind readnone }