mirror of
https://github.com/RPCSX/llvm.git
synced 2025-02-13 00:11:56 +00:00
remove a pointless restriction from memcpyopt. It was
refusing to optimize two memcpy's like this: copy A <- B copy C <- A if it couldn't prove that noalias(B,C). We can eliminate the copy by producing a memmove instead of memcpy. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@119694 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
7cac8e1691
commit
5a7aeaa019
@ -688,11 +688,14 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
|
|||||||
if (DepSize < MSize)
|
if (DepSize < MSize)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
// Finally, we have to make sure that the dest of the second does not
|
Intrinsic::ID ResultFn = Intrinsic::memcpy;
|
||||||
// alias the source of the first.
|
|
||||||
|
// If the dest of the second might alias the source of the first, then the
|
||||||
|
// source and dest might overlap. We still want to eliminate the intermediate
|
||||||
|
// value, but we have to generate a memmove instead of memcpy.
|
||||||
AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
|
AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
|
||||||
if (!AA.isNoAlias(M->getRawDest(), MSize, MDep->getRawSource(), DepSize))
|
if (!AA.isNoAlias(M->getRawDest(), MSize, MDep->getRawSource(), DepSize))
|
||||||
return false;
|
ResultFn = Intrinsic::memmove;
|
||||||
|
|
||||||
// If all checks passed, then we can transform these memcpy's
|
// If all checks passed, then we can transform these memcpy's
|
||||||
const Type *ArgTys[3] = {
|
const Type *ArgTys[3] = {
|
||||||
@ -702,7 +705,7 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
|
|||||||
};
|
};
|
||||||
Function *MemCpyFun =
|
Function *MemCpyFun =
|
||||||
Intrinsic::getDeclaration(M->getParent()->getParent()->getParent(),
|
Intrinsic::getDeclaration(M->getParent()->getParent()->getParent(),
|
||||||
M->getIntrinsicID(), ArgTys, 3);
|
ResultFn, ArgTys, 3);
|
||||||
|
|
||||||
// Make sure to use the lesser of the alignment of the source and the dest
|
// Make sure to use the lesser of the alignment of the source and the dest
|
||||||
// since we're changing where we're reading from, but don't want to increase
|
// since we're changing where we're reading from, but don't want to increase
|
||||||
|
@ -1,17 +0,0 @@
|
|||||||
; RUN: opt < %s -memcpyopt -S | grep {call.*memcpy.*agg.result}
|
|
||||||
|
|
||||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
|
|
||||||
target triple = "i386-apple-darwin8"
|
|
||||||
@x = external global { x86_fp80, x86_fp80 } ; <{ x86_fp80, x86_fp80 }*> [#uses=1]
|
|
||||||
|
|
||||||
define void @foo({ x86_fp80, x86_fp80 }* noalias sret %agg.result) nounwind {
|
|
||||||
entry:
|
|
||||||
%x.0 = alloca { x86_fp80, x86_fp80 } ; <{ x86_fp80, x86_fp80 }*> [#uses=1]
|
|
||||||
%x.01 = bitcast { x86_fp80, x86_fp80 }* %x.0 to i8* ; <i8*> [#uses=2]
|
|
||||||
call void @llvm.memcpy.i32( i8* %x.01, i8* bitcast ({ x86_fp80, x86_fp80 }* @x to i8*), i32 32, i32 16 )
|
|
||||||
%agg.result2 = bitcast { x86_fp80, x86_fp80 }* %agg.result to i8* ; <i8*> [#uses=1]
|
|
||||||
call void @llvm.memcpy.i32( i8* %agg.result2, i8* %x.01, i32 32, i32 16 )
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind
|
|
@ -1,4 +1,4 @@
|
|||||||
; RUN: opt < %s -basicaa -memcpyopt -dse -S | grep {call.*memcpy} | count 1
|
; RUN: opt < %s -basicaa -memcpyopt -dse -S | FileCheck %s
|
||||||
|
|
||||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
|
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
|
||||||
target triple = "i686-apple-darwin9"
|
target triple = "i686-apple-darwin9"
|
||||||
@ -20,7 +20,7 @@ entry:
|
|||||||
|
|
||||||
; CHECK: @test1
|
; CHECK: @test1
|
||||||
; CHECK: call void @ccoshl
|
; CHECK: call void @ccoshl
|
||||||
; CHECK: call @llvm.memcpy
|
; CHECK: call void @llvm.memcpy
|
||||||
; CHECK-NOT: llvm.memcpy
|
; CHECK-NOT: llvm.memcpy
|
||||||
; CHECK: ret void
|
; CHECK: ret void
|
||||||
ret void
|
ret void
|
||||||
@ -29,3 +29,36 @@ entry:
|
|||||||
declare void @ccoshl({ x86_fp80, x86_fp80 }* sret , x86_fp80, x86_fp80) nounwind
|
declare void @ccoshl({ x86_fp80, x86_fp80 }* sret , x86_fp80, x86_fp80) nounwind
|
||||||
|
|
||||||
declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind
|
declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind
|
||||||
|
|
||||||
|
|
||||||
|
; The intermediate alloca and one of the memcpy's should be eliminated, the
|
||||||
|
; other should be related with a memmove.
|
||||||
|
define void @test2(i8* %P, i8* %Q) nounwind {
|
||||||
|
%memtmp = alloca { x86_fp80, x86_fp80 }, align 16
|
||||||
|
%R = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8*
|
||||||
|
call void @llvm.memcpy.i32( i8* %R, i8* %P, i32 32, i32 16 )
|
||||||
|
call void @llvm.memcpy.i32( i8* %Q, i8* %R, i32 32, i32 16 )
|
||||||
|
ret void
|
||||||
|
|
||||||
|
; CHECK: @test2
|
||||||
|
; CHECK-NEXT: call void @llvm.memmove{{.*}}(i8* %Q, i8* %P
|
||||||
|
; CHECK-NEXT: ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@x = external global { x86_fp80, x86_fp80 }
|
||||||
|
|
||||||
|
define void @test3({ x86_fp80, x86_fp80 }* noalias sret %agg.result) nounwind {
|
||||||
|
%x.0 = alloca { x86_fp80, x86_fp80 }
|
||||||
|
%x.01 = bitcast { x86_fp80, x86_fp80 }* %x.0 to i8*
|
||||||
|
call void @llvm.memcpy.i32( i8* %x.01, i8* bitcast ({ x86_fp80, x86_fp80 }* @x to i8*), i32 32, i32 16 )
|
||||||
|
%agg.result2 = bitcast { x86_fp80, x86_fp80 }* %agg.result to i8*
|
||||||
|
call void @llvm.memcpy.i32( i8* %agg.result2, i8* %x.01, i32 32, i32 16 )
|
||||||
|
ret void
|
||||||
|
; CHECK: @test3
|
||||||
|
; CHECK-NEXT: %agg.result2 = bitcast
|
||||||
|
; CHECK-NEXT: call void @llvm.memcpy
|
||||||
|
; CHECK-NEXT: ret void
|
||||||
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user