mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 12:50:30 +00:00
[AliasAnalysis] Treat invariant.start as read-memory
Summary: We teach alias analysis that invariant.start is readonly. This helps with GVN and memcopy optimizations that currently treat. invariant.start as a clobber. We need to treat this as readonly, so that DSE does not incorrectly remove stores prior to the invariant.start Reviewers: sanjoy, reames, majnemer, dberlin Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D23214 llvm-svn: 278138
This commit is contained in:
parent
366c1adb94
commit
db9853f118
@ -781,6 +781,32 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
|
||||
if (isIntrinsicCall(CS, Intrinsic::experimental_guard))
|
||||
return MRI_Ref;
|
||||
|
||||
// Like assumes, invariant.start intrinsics were also marked as arbitrarily
|
||||
// writing so that proper control dependencies are maintained but they never
|
||||
// mod any particular memory location visible to the IR.
|
||||
// *Unlike* assumes (which are now modeled as NoModRef), invariant.start
|
||||
// intrinsic is now modeled as reading memory. This prevents hoisting the
|
||||
// invariant.start intrinsic over stores. Consider:
|
||||
// *ptr = 40;
|
||||
// *ptr = 50;
|
||||
// invariant_start(ptr)
|
||||
// int val = *ptr;
|
||||
// print(val);
|
||||
//
|
||||
// This cannot be transformed to:
|
||||
//
|
||||
// *ptr = 40;
|
||||
// invariant_start(ptr)
|
||||
// *ptr = 50;
|
||||
// int val = *ptr;
|
||||
// print(val);
|
||||
//
|
||||
// The transformation will cause the second store to be ignored (based on
|
||||
// rules of invariant.start) and print 40, while the first program always
|
||||
// prints 50.
|
||||
if (isIntrinsicCall(CS, Intrinsic::invariant_start))
|
||||
return MRI_Ref;
|
||||
|
||||
// The AAResultBase base class has some smarts, lets use them.
|
||||
return AAResultBase::getModRefInfo(CS, Loc);
|
||||
}
|
||||
|
34
test/Transforms/DeadStoreElimination/invariant.start.ll
Normal file
34
test/Transforms/DeadStoreElimination/invariant.start.ll
Normal file
@ -0,0 +1,34 @@
|
||||
; Test to make sure llvm.invariant.start calls are not treated as clobbers.
|
||||
; RUN: opt < %s -basicaa -dse -S | FileCheck %s
|
||||
|
||||
declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) nounwind readonly
|
||||
|
||||
; We cannot remove the store 1 to %p.
|
||||
; FIXME: By the semantics of invariant.start, the store 3 to p is unreachable.
|
||||
define void @test(i8 *%p) {
|
||||
store i8 1, i8* %p, align 4
|
||||
%i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %p)
|
||||
store i8 3, i8* %p, align 4
|
||||
ret void
|
||||
; CHECK-LABEL: @test(
|
||||
; CHECK-NEXT: store i8 1, i8* %p, align 4
|
||||
; CHECK-NEXT: %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %p)
|
||||
; CHECK-NEXT: store i8 3, i8* %p, align 4
|
||||
; CHECK-NEXT: ret void
|
||||
}
|
||||
|
||||
; FIXME: We should be able to remove the first store to p, even though p and q
|
||||
; may alias.
|
||||
define void @test2(i8* %p, i8* %q) {
|
||||
store i8 1, i8* %p, align 4
|
||||
store i8 2, i8* %q, align 4
|
||||
%i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %q)
|
||||
store i8 3, i8* %p, align 4
|
||||
ret void
|
||||
; CHECK-LABEL: @test2(
|
||||
; CHECK-NEXT: store i8 1, i8* %p, align 4
|
||||
; CHECK-NEXT: store i8 2, i8* %q, align 4
|
||||
; CHECK-NEXT: %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %q)
|
||||
; CHECK-NEXT: store i8 3, i8* %p, align 4
|
||||
; CHECK-NEXT: ret void
|
||||
}
|
59
test/Transforms/GVN/invariant.start.ll
Normal file
59
test/Transforms/GVN/invariant.start.ll
Normal file
@ -0,0 +1,59 @@
|
||||
; Test to make sure llvm.invariant.start calls are not treated as clobbers.
|
||||
; RUN: opt < %s -gvn -S | FileCheck %s
|
||||
|
||||
|
||||
declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) nounwind readonly
|
||||
declare void @llvm.invariant.end.p0i8({}*, i64, i8* nocapture) nounwind
|
||||
|
||||
; We forward store to the load across the invariant.start intrinsic
|
||||
define i8 @forward_store() {
|
||||
; CHECK-LABEL: @forward_store
|
||||
; CHECK: call {}* @llvm.invariant.start.p0i8(i64 1, i8* %a)
|
||||
; CHECK-NOT: load
|
||||
; CHECK: ret i8 0
|
||||
%a = alloca i8
|
||||
store i8 0, i8* %a
|
||||
%i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %a)
|
||||
%r = load i8, i8* %a
|
||||
ret i8 %r
|
||||
}
|
||||
|
||||
declare i8 @dummy(i8* nocapture) nounwind readonly
|
||||
|
||||
; We forward store to the load in the non-local analysis case,
|
||||
; i.e. invariant.start is in another basic block.
|
||||
define i8 @forward_store_nonlocal(i1 %cond) {
|
||||
; CHECK-LABEL: forward_store_nonlocal
|
||||
; CHECK: call {}* @llvm.invariant.start.p0i8(i64 1, i8* %a)
|
||||
; CHECK: ret i8 0
|
||||
; CHECK: ret i8 %val
|
||||
%a = alloca i8
|
||||
store i8 0, i8* %a
|
||||
%i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %a)
|
||||
br i1 %cond, label %loadblock, label %exit
|
||||
|
||||
loadblock:
|
||||
%r = load i8, i8* %a
|
||||
ret i8 %r
|
||||
|
||||
exit:
|
||||
%val = call i8 @dummy(i8* %a)
|
||||
ret i8 %val
|
||||
}
|
||||
|
||||
; We should not value forward %foo to the invariant.end corresponding to %bar.
|
||||
define i8 @forward_store1() {
|
||||
; CHECK-LABEL: forward_store1
|
||||
; CHECK: %foo = call {}* @llvm.invariant.start.p0i8
|
||||
; CHECK-NOT: load
|
||||
; CHECK: %bar = call {}* @llvm.invariant.start.p0i8
|
||||
; CHECK: call void @llvm.invariant.end.p0i8({}* %bar, i64 1, i8* %a)
|
||||
; CHECK: ret i8 0
|
||||
%a = alloca i8
|
||||
store i8 0, i8* %a
|
||||
%foo = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %a)
|
||||
%r = load i8, i8* %a
|
||||
%bar = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %a)
|
||||
call void @llvm.invariant.end.p0i8({}* %bar, i64 1, i8* %a)
|
||||
ret i8 %r
|
||||
}
|
49
test/Transforms/MemCpyOpt/invariant.start.ll
Normal file
49
test/Transforms/MemCpyOpt/invariant.start.ll
Normal file
@ -0,0 +1,49 @@
|
||||
; MemCpy optimizations should take place even in presence of invariant.start
|
||||
; RUN: opt < %s -basicaa -memcpyopt -dse -S | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
|
||||
|
||||
target triple = "i686-apple-darwin9"
|
||||
|
||||
%0 = type { x86_fp80, x86_fp80 }
|
||||
declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32,
|
||||
i1) nounwind
|
||||
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly,
|
||||
i64, i32, i1)
|
||||
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
|
||||
|
||||
declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) nounwind readonly
|
||||
|
||||
; FIXME: The invariant.start does not modify %P.
|
||||
; The intermediate alloca and one of the memcpy's should be eliminated, the
|
||||
; other should be transformed to a memmove.
|
||||
define void @test1(i8* %P, i8* %Q) nounwind {
|
||||
%memtmp = alloca %0, align 16
|
||||
%R = bitcast %0* %memtmp to i8*
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %R, i8* %P, i32 32, i32 16, i1 false)
|
||||
%i = call {}* @llvm.invariant.start.p0i8(i64 32, i8* %P)
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %Q, i8* %R, i32 32, i32 16, i1 false)
|
||||
ret void
|
||||
; CHECK-LABEL: @test1(
|
||||
; CHECK-NEXT: %memtmp = alloca %0, align 16
|
||||
; CHECK-NEXT: %R = bitcast %0* %memtmp to i8*
|
||||
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %R, i8* %P, i32 32, i32 16, i1 false)
|
||||
; CHECK-NEXT: %i = call {}* @llvm.invariant.start.p0i8(i64 32, i8* %P)
|
||||
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %Q, i8* %R, i32 32, i32 16, i1 false)
|
||||
; CHECK-NEXT: ret void
|
||||
}
|
||||
|
||||
|
||||
; The invariant.start intrinsic does not inhibit tranforming the memcpy to a
|
||||
; memset.
|
||||
define void @test2(i8* %dst1, i8* %dst2, i8 %c) {
|
||||
; CHECK-LABEL: define void @test2(
|
||||
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false)
|
||||
; CHECK-NEXT: %i = call {}* @llvm.invariant.start.p0i8(i64 32, i8* %dst1)
|
||||
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst2, i8 %c, i64 128, i32 8, i1 false)
|
||||
; CHECK-NEXT: ret void
|
||||
call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false)
|
||||
%i = call {}* @llvm.invariant.start.p0i8(i64 32, i8* %dst1)
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i32 8, i1 false)
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue
Block a user