mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-13 14:35:54 +00:00
[MemCpyOpt] Optimize double-storing by memset+memcpy.
A common idiom in some code is to do the following: memset(dst, 0, dst_size); memcpy(dst, src, src_size); Some of the memset is redundant; instead, we can do: memcpy(dst, src, src_size); memset(dst + src_size, 0, dst_size <= src_size ? 0 : dst_size - src_size); Original patch by: Joel Jones Differential Revision: http://reviews.llvm.org/D498 llvm-svn: 235232
This commit is contained in:
parent
ad3b0a3474
commit
027739a2e4
@ -346,6 +346,7 @@ namespace {
|
||||
uint64_t cpyLen, unsigned cpyAlign, CallInst *C);
|
||||
bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
|
||||
uint64_t MSize);
|
||||
bool processMemSetMemCpyDependence(MemCpyInst *M, MemSetInst *MDep);
|
||||
bool processByValArgument(CallSite CS, unsigned ArgNo);
|
||||
Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr,
|
||||
Value *ByteVal);
|
||||
@ -839,6 +840,53 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
|
||||
return true;
|
||||
}
|
||||
|
||||
/// We've found that the (upward scanning) memory dependence of \p MemCpy is
|
||||
/// \p MemSet. Try to simplify \p MemSet to only set the trailing bytes that
|
||||
/// weren't copied over by \p MemCpy.
|
||||
///
|
||||
/// In other words, transform:
|
||||
/// \code
|
||||
/// memset(dst, c, dst_size);
|
||||
/// memcpy(dst, src, src_size);
|
||||
/// \endcode
|
||||
/// into:
|
||||
/// \code
|
||||
/// memcpy(dst, src, src_size);
|
||||
/// memset(dst + src_size, c, dst_size <= src_size ? 0 : dst_size - src_size);
|
||||
/// \endcode
|
||||
bool MemCpyOpt::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
|
||||
MemSetInst *MemSet) {
|
||||
// We can only transform memset/memcpy with the same destination.
|
||||
if (MemSet->getDest() != MemCpy->getDest())
|
||||
return false;
|
||||
|
||||
Value *Dest = MemSet->getDest();
|
||||
Value *DestSize = MemSet->getLength();
|
||||
Value *SrcSize = MemCpy->getLength();
|
||||
|
||||
// By default, create an unaligned memset.
|
||||
unsigned Align = 1;
|
||||
// If Dest is aligned, and SrcSize is constant, use the minimum alignment
|
||||
// of the sum.
|
||||
const unsigned DestAlign =
|
||||
std::max(MemSet->getAlignment(), MemCpy->getAlignment());
|
||||
if (DestAlign > 1)
|
||||
if (ConstantInt *SrcSizeC = dyn_cast<ConstantInt>(SrcSize))
|
||||
Align = MinAlign(SrcSizeC->getZExtValue(), DestAlign);
|
||||
|
||||
IRBuilder<> Builder(MemCpy->getNextNode());
|
||||
|
||||
Value *MemsetLen =
|
||||
Builder.CreateSelect(Builder.CreateICmpULE(DestSize, SrcSize),
|
||||
ConstantInt::getNullValue(DestSize->getType()),
|
||||
Builder.CreateSub(DestSize, SrcSize));
|
||||
Builder.CreateMemSet(Builder.CreateGEP(Dest, SrcSize), MemSet->getOperand(1),
|
||||
MemsetLen, Align);
|
||||
|
||||
MD->removeInstruction(MemSet);
|
||||
MemSet->eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
/// processMemCpy - perform simplification of memcpy's. If we have memcpy A
|
||||
/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
|
||||
@ -869,6 +917,17 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
|
||||
return true;
|
||||
}
|
||||
|
||||
AliasAnalysis::Location SrcLoc = AliasAnalysis::getLocationForSource(M);
|
||||
MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(SrcLoc, true,
|
||||
M, M->getParent());
|
||||
|
||||
// Try to turn a partially redundant memset + memcpy into
|
||||
// memcpy + smaller memset. We don't need the memcpy size for this.
|
||||
if (SrcDepInfo.isClobber())
|
||||
if (MemSetInst *MDep = dyn_cast<MemSetInst>(SrcDepInfo.getInst()))
|
||||
if (processMemSetMemCpyDependence(M, MDep))
|
||||
return true;
|
||||
|
||||
// The optimizations after this point require the memcpy size.
|
||||
ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength());
|
||||
if (!CopySize) return false;
|
||||
@ -892,9 +951,6 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
|
||||
}
|
||||
}
|
||||
|
||||
AliasAnalysis::Location SrcLoc = AliasAnalysis::getLocationForSource(M);
|
||||
MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(SrcLoc, true,
|
||||
M, M->getParent());
|
||||
if (SrcDepInfo.isClobber()) {
|
||||
if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
|
||||
return processMemCpyMemCpyDependence(M, MDep, CopySize->getZExtValue());
|
||||
|
54
test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll
Normal file
54
test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll
Normal file
@ -0,0 +1,54 @@
|
||||
; RUN: opt -memcpyopt -S %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
; CHECK-LABEL: define void @test
|
||||
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i32 1, i1 false)
|
||||
; CHECK-DAG: [[DST:%[0-9]+]] = getelementptr i8, i8* %dst, i64 %src_size
|
||||
; CHECK-DAG: [[ULE:%[0-9]+]] = icmp ule i64 %dst_size, %src_size
|
||||
; CHECK-DAG: [[SIZEDIFF:%[0-9]+]] = sub i64 %dst_size, %src_size
|
||||
; CHECK-DAG: [[SIZE:%[0-9]+]] = select i1 [[ULE]], i64 0, i64 [[SIZEDIFF]]
|
||||
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DST]], i8 0, i64 [[SIZE]], i32 1, i1 false)
|
||||
; CHECK-NEXT: ret void
|
||||
define void @test(i8* %src, i64 %src_size, i8* %dst, i64 %dst_size) {
|
||||
call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 %dst_size, i32 1, i1 false)
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i32 1, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: define void @test_align_same
|
||||
; CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}, i8 0, i64 {{.*}}, i32 8, i1 false)
|
||||
define void @test_align_same(i8* %src, i8* %dst, i64 %dst_size) {
|
||||
call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 %dst_size, i32 8, i1 false)
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 80, i32 1, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: define void @test_align_min
|
||||
; CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}, i8 0, i64 {{.*}}, i32 4, i1 false)
|
||||
define void @test_align_min(i8* %src, i8* %dst, i64 %dst_size) {
|
||||
call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 %dst_size, i32 8, i1 false)
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 36, i32 1, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: define void @test_align_memcpy
|
||||
; CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}, i8 0, i64 {{.*}}, i32 8, i1 false)
|
||||
define void @test_align_memcpy(i8* %src, i8* %dst, i64 %dst_size) {
|
||||
call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 %dst_size, i32 1, i1 false)
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 80, i32 8, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: define void @test_different_dst
|
||||
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 %dst_size, i32 1, i1 false)
|
||||
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %src, i64 %src_size, i32 1, i1 false)
|
||||
; CHECK-NEXT: ret void
|
||||
define void @test_different_dst(i8* %dst2, i8* %src, i64 %src_size, i8* %dst, i64 %dst_size) {
|
||||
call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 %dst_size, i32 1, i1 false)
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %src, i64 %src_size, i32 1, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
|
||||
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1)
|
Loading…
Reference in New Issue
Block a user