mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-03-05 19:10:19 +00:00
[LSV] Don't move stores across may-load instrs, and loosen restrictions on moving loads.
Summary: Previously we wouldn't move loads/stores across instructions that had side-effects, where that was defined as may-write or may-throw. But this is not sufficiently restrictive: Stores can't safely be moved across instructions that may load. This patch also adds a DEBUG check that all instructions in our chain are either loads or stores. Reviewers: asbirlea Subscribers: llvm-commits, jholewinski, arsenm, mzolotukhin Differential Revision: https://reviews.llvm.org/D22547 llvm-svn: 276171
This commit is contained in:
parent
2f14d630e7
commit
d791dcafe4
@ -429,6 +429,18 @@ ArrayRef<Value *> Vectorizer::getVectorizablePrefix(ArrayRef<Value *> Chain) {
|
||||
SmallVector<std::pair<Value *, unsigned>, 16> MemoryInstrs;
|
||||
SmallVector<std::pair<Value *, unsigned>, 16> ChainInstrs;
|
||||
|
||||
bool IsLoadChain = isa<LoadInst>(Chain[0]);
|
||||
DEBUG({
|
||||
for (Value *V : Chain) {
|
||||
if (IsLoadChain)
|
||||
assert(isa<LoadInst>(V) &&
|
||||
"All elements of Chain must be loads, or all must be stores.");
|
||||
else
|
||||
assert(isa<StoreInst>(V) &&
|
||||
"All elements of Chain must be loads, or all must be stores.");
|
||||
}
|
||||
});
|
||||
|
||||
unsigned InstrIdx = 0;
|
||||
for (Instruction &I : make_range(getBoundaryInstrs(Chain))) {
|
||||
++InstrIdx;
|
||||
@ -437,8 +449,12 @@ ArrayRef<Value *> Vectorizer::getVectorizablePrefix(ArrayRef<Value *> Chain) {
|
||||
MemoryInstrs.push_back({&I, InstrIdx});
|
||||
else
|
||||
ChainInstrs.push_back({&I, InstrIdx});
|
||||
} else if (I.mayHaveSideEffects()) {
|
||||
DEBUG(dbgs() << "LSV: Found side-effecting operation: " << I << '\n');
|
||||
} else if (IsLoadChain && (I.mayWriteToMemory() || I.mayThrow())) {
|
||||
DEBUG(dbgs() << "LSV: Found may-write/throw operation: " << I << '\n');
|
||||
break;
|
||||
} else if (!IsLoadChain && (I.mayReadOrWriteMemory() || I.mayThrow())) {
|
||||
DEBUG(dbgs() << "LSV: Found may-read/write/throw operation: " << I
|
||||
<< '\n');
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -1,48 +1,209 @@
|
||||
; RUN: opt -mtriple=nvptx64-nvidia-cuda -load-store-vectorizer -S -o - %s | FileCheck %s
|
||||
|
||||
; If we have a chain of loads or stores with a side-effecting operation in the
|
||||
; middle, we should still be able to merge the loads/stores that appear
|
||||
; before/after the side-effecting op. We just can't merge *across* the
|
||||
; side-effecting op.
|
||||
; Check that the load/store vectorizer is willing to move loads/stores across
|
||||
; intervening instructions only if it's safe.
|
||||
;
|
||||
; - Loads can be moved across instructions that don't write or throw.
|
||||
; - Stores can only be moved across instructions which don't read, write, or
|
||||
; throw.
|
||||
|
||||
declare void @fn() #0
|
||||
declare void @fn()
|
||||
declare void @fn_nounwind() #0
|
||||
declare void @fn_nounwind_writeonly() #1
|
||||
declare void @fn_nounwind_readonly() #2
|
||||
declare void @fn_writeonly() #3
|
||||
declare void @fn_readonly() #4
|
||||
declare void @fn_readnone() #5
|
||||
|
||||
; CHECK-LABEL: @merge_stores
|
||||
; CHECK: store <2 x i32> <i32 100, i32 101>
|
||||
; CHECK-LABEL: @load_fn
|
||||
; CHECK: load
|
||||
; CHECK: call void @fn()
|
||||
; CHECK: store <2 x i32> <i32 102, i32 103>
|
||||
define void @merge_stores(i32* %out) #0 {
|
||||
%out.gep.1 = getelementptr i32, i32* %out, i32 1
|
||||
%out.gep.2 = getelementptr i32, i32* %out, i32 2
|
||||
%out.gep.3 = getelementptr i32, i32* %out, i32 3
|
||||
; CHECK: load
|
||||
define void @load_fn(i32* %p) #0 {
|
||||
%p.1 = getelementptr i32, i32* %p, i32 1
|
||||
|
||||
store i32 101, i32* %out.gep.1
|
||||
store i32 100, i32* %out
|
||||
%v0 = load i32, i32* %p
|
||||
call void @fn()
|
||||
store i32 102, i32* %out.gep.2
|
||||
store i32 103, i32* %out.gep.3
|
||||
%v1 = load i32, i32* %p.1
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @merge_loads
|
||||
; CHECK: load <2 x i32>
|
||||
; CHECK: call void @fn()
|
||||
; CHECK: load <2 x i32>
|
||||
define i32 @merge_loads(i32* %in) #0 {
|
||||
%in.gep.1 = getelementptr i32, i32* %in, i32 1
|
||||
%in.gep.2 = getelementptr i32, i32* %in, i32 2
|
||||
%in.gep.3 = getelementptr i32, i32* %in, i32 3
|
||||
; CHECK-LABEL: @load_fn_nounwind
|
||||
; CHECK: load
|
||||
; CHECK: call void @fn_nounwind()
|
||||
; CHECK: load
|
||||
define void @load_fn_nounwind(i32* %p) #0 {
|
||||
%p.1 = getelementptr i32, i32* %p, i32 1
|
||||
|
||||
%v1 = load i32, i32* %in
|
||||
%v2 = load i32, i32* %in.gep.1
|
||||
call void @fn()
|
||||
%v3 = load i32, i32* %in.gep.2
|
||||
%v4 = load i32, i32* %in.gep.3
|
||||
|
||||
%sum1 = add i32 %v1, %v2
|
||||
%sum2 = add i32 %sum1, %v3
|
||||
%sum3 = add i32 %sum2, %v4
|
||||
ret i32 %v4
|
||||
%v0 = load i32, i32* %p
|
||||
call void @fn_nounwind() #0
|
||||
%v1 = load i32, i32* %p.1
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @load_fn_nounwind_writeonly
|
||||
; CHECK: load
|
||||
; CHECK: call void @fn_nounwind_writeonly()
|
||||
; CHECK: load
|
||||
define void @load_fn_nounwind_writeonly(i32* %p) #0 {
|
||||
%p.1 = getelementptr i32, i32* %p, i32 1
|
||||
|
||||
%v0 = load i32, i32* %p
|
||||
call void @fn_nounwind_writeonly() #1
|
||||
%v1 = load i32, i32* %p.1
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @load_fn_nounwind_readonly
|
||||
; CHECK-DAG: load <2 x i32>
|
||||
; CHECK-DAG: call void @fn_nounwind_readonly()
|
||||
define void @load_fn_nounwind_readonly(i32* %p) #0 {
|
||||
%p.1 = getelementptr i32, i32* %p, i32 1
|
||||
|
||||
%v0 = load i32, i32* %p
|
||||
call void @fn_nounwind_readonly() #2
|
||||
%v1 = load i32, i32* %p.1
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @load_fn_readonly
|
||||
; CHECK: load
|
||||
; CHECK: call void @fn_readonly
|
||||
; CHECK: load
|
||||
define void @load_fn_readonly(i32* %p) #0 {
|
||||
%p.1 = getelementptr i32, i32* %p, i32 1
|
||||
|
||||
%v0 = load i32, i32* %p
|
||||
call void @fn_readonly() #4
|
||||
%v1 = load i32, i32* %p.1
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @load_fn_writeonly
|
||||
; CHECK: load
|
||||
; CHECK: call void @fn_writeonly()
|
||||
; CHECK: load
|
||||
define void @load_fn_writeonly(i32* %p) #0 {
|
||||
%p.1 = getelementptr i32, i32* %p, i32 1
|
||||
|
||||
%v0 = load i32, i32* %p
|
||||
call void @fn_writeonly() #3
|
||||
%v1 = load i32, i32* %p.1
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @load_fn_readnone
|
||||
; CHECK-DAG: load <2 x i32>
|
||||
; CHECK-DAG: call void @fn_readnone()
|
||||
define void @load_fn_readnone(i32* %p) #0 {
|
||||
%p.1 = getelementptr i32, i32* %p, i32 1
|
||||
|
||||
%v0 = load i32, i32* %p
|
||||
call void @fn_readnone() #5
|
||||
%v1 = load i32, i32* %p.1
|
||||
ret void
|
||||
}
|
||||
|
||||
; ------------------------------------------------
|
||||
; Same tests, but now for stores instead of loads.
|
||||
; ------------------------------------------------
|
||||
|
||||
; CHECK-LABEL: @store_fn
|
||||
; CHECK: store
|
||||
; CHECK: call void @fn()
|
||||
; CHECK: store
|
||||
define void @store_fn(i32* %p) #0 {
|
||||
%p.1 = getelementptr i32, i32* %p, i32 1
|
||||
|
||||
store i32 0, i32* %p
|
||||
call void @fn()
|
||||
store i32 0, i32* %p.1
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @store_fn_nounwind
|
||||
; CHECK: store
|
||||
; CHECK: call void @fn_nounwind()
|
||||
; CHECK: store
|
||||
define void @store_fn_nounwind(i32* %p) #0 {
|
||||
%p.1 = getelementptr i32, i32* %p, i32 1
|
||||
|
||||
store i32 0, i32* %p
|
||||
call void @fn_nounwind() #0
|
||||
store i32 0, i32* %p.1
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @store_fn_nounwind_writeonly
|
||||
; CHECK: store
|
||||
; CHECK: call void @fn_nounwind_writeonly()
|
||||
; CHECK: store
|
||||
define void @store_fn_nounwind_writeonly(i32* %p) #0 {
|
||||
%p.1 = getelementptr i32, i32* %p, i32 1
|
||||
|
||||
store i32 0, i32* %p
|
||||
call void @fn_nounwind_writeonly() #1
|
||||
store i32 0, i32* %p.1
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @store_fn_nounwind_readonly
|
||||
; CHECK: store
|
||||
; CHECK: call void @fn_nounwind_readonly()
|
||||
; CHECK: store
|
||||
define void @store_fn_nounwind_readonly(i32* %p) #0 {
|
||||
%p.1 = getelementptr i32, i32* %p, i32 1
|
||||
|
||||
store i32 0, i32* %p
|
||||
call void @fn_nounwind_readonly() #2
|
||||
store i32 0, i32* %p.1
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @store_fn_readonly
|
||||
; CHECK: store
|
||||
; CHECK: call void @fn_readonly
|
||||
; CHECK: store
|
||||
define void @store_fn_readonly(i32* %p) #0 {
|
||||
%p.1 = getelementptr i32, i32* %p, i32 1
|
||||
|
||||
store i32 0, i32* %p
|
||||
call void @fn_readonly() #4
|
||||
store i32 0, i32* %p.1
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @store_fn_writeonly
|
||||
; CHECK: store
|
||||
; CHECK: call void @fn_writeonly()
|
||||
; CHECK: store
|
||||
define void @store_fn_writeonly(i32* %p) #0 {
|
||||
%p.1 = getelementptr i32, i32* %p, i32 1
|
||||
|
||||
store i32 0, i32* %p
|
||||
call void @fn_writeonly() #3
|
||||
store i32 0, i32* %p.1
|
||||
ret void
|
||||
}
|
||||
|
||||
; This is the only store idiom we can vectorize.
|
||||
; CHECK-LABEL: @store_fn_readnone
|
||||
; CHECK-DAG: store <2 x i32>
|
||||
; CHECK-DAG: call void @fn_readnone()
|
||||
define void @store_fn_readnone(i32* %p) #0 {
|
||||
%p.1 = getelementptr i32, i32* %p, i32 1
|
||||
|
||||
store i32 0, i32* %p
|
||||
call void @fn_readnone() #5
|
||||
store i32 0, i32* %p.1
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind writeonly }
|
||||
attributes #2 = { nounwind readonly }
|
||||
attributes #3 = { writeonly }
|
||||
attributes #4 = { readonly }
|
||||
; readnone implies nounwind, so no need to test separately
|
||||
attributes #5 = { nounwind readnone }
|
||||
|
Loading…
x
Reference in New Issue
Block a user