[MemCpyOpt] Optimize double-storing by memset+memcpy.

A common idiom in some code is to do the following:

  memset(dst, 0, dst_size);
  memcpy(dst, src, src_size);

Some of the memset is redundant; instead, we can do:

  memcpy(dst, src, src_size);
  memset(dst + src_size, 0,
         dst_size <= src_size ? 0 : dst_size - src_size);

Original patch by: Joel Jones
Differential Revision: http://reviews.llvm.org/D498

llvm-svn: 235232
This commit is contained in:
Ahmed Bougacha 2015-04-17 22:20:57 +00:00
parent ad3b0a3474
commit 027739a2e4
2 changed files with 113 additions and 3 deletions

View File

@ -346,6 +346,7 @@ namespace {
uint64_t cpyLen, unsigned cpyAlign, CallInst *C);
bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
uint64_t MSize);
bool processMemSetMemCpyDependence(MemCpyInst *M, MemSetInst *MDep);
bool processByValArgument(CallSite CS, unsigned ArgNo);
Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr,
Value *ByteVal);
@ -839,6 +840,53 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
return true;
}
/// We've found that the (upward scanning) memory dependence of \p MemCpy is
/// \p MemSet. Try to simplify \p MemSet to only set the trailing bytes that
/// weren't copied over by \p MemCpy.
///
/// In other words, transform:
/// \code
/// memset(dst, c, dst_size);
/// memcpy(dst, src, src_size);
/// \endcode
/// into:
/// \code
/// memcpy(dst, src, src_size);
/// memset(dst + src_size, c, dst_size <= src_size ? 0 : dst_size - src_size);
/// \endcode
bool MemCpyOpt::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
MemSetInst *MemSet) {
// We can only transform memset/memcpy with the same destination.
if (MemSet->getDest() != MemCpy->getDest())
return false;
Value *Dest = MemSet->getDest();
Value *DestSize = MemSet->getLength();
Value *SrcSize = MemCpy->getLength();
// By default, create an unaligned memset.
unsigned Align = 1;
// If Dest is aligned, and SrcSize is constant, use the minimum alignment
// of the sum.
const unsigned DestAlign =
std::max(MemSet->getAlignment(), MemCpy->getAlignment());
if (DestAlign > 1)
if (ConstantInt *SrcSizeC = dyn_cast<ConstantInt>(SrcSize))
Align = MinAlign(SrcSizeC->getZExtValue(), DestAlign);
IRBuilder<> Builder(MemCpy->getNextNode());
Value *MemsetLen =
Builder.CreateSelect(Builder.CreateICmpULE(DestSize, SrcSize),
ConstantInt::getNullValue(DestSize->getType()),
Builder.CreateSub(DestSize, SrcSize));
Builder.CreateMemSet(Builder.CreateGEP(Dest, SrcSize), MemSet->getOperand(1),
MemsetLen, Align);
MD->removeInstruction(MemSet);
MemSet->eraseFromParent();
return true;
}
/// processMemCpy - perform simplification of memcpy's. If we have memcpy A
/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
@ -869,6 +917,17 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
return true;
}
AliasAnalysis::Location SrcLoc = AliasAnalysis::getLocationForSource(M);
MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(SrcLoc, true,
M, M->getParent());
// Try to turn a partially redundant memset + memcpy into
// memcpy + smaller memset. We don't need the memcpy size for this.
if (SrcDepInfo.isClobber())
if (MemSetInst *MDep = dyn_cast<MemSetInst>(SrcDepInfo.getInst()))
if (processMemSetMemCpyDependence(M, MDep))
return true;
// The optimizations after this point require the memcpy size.
ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength());
if (!CopySize) return false;
@ -892,9 +951,6 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
}
}
AliasAnalysis::Location SrcLoc = AliasAnalysis::getLocationForSource(M);
MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(SrcLoc, true,
M, M->getParent());
if (SrcDepInfo.isClobber()) {
if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
return processMemCpyMemCpyDependence(M, MDep, CopySize->getZExtValue());

View File

@ -0,0 +1,54 @@
; RUN: opt -memcpyopt -S %s | FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
; CHECK-LABEL: define void @test
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i32 1, i1 false)
; CHECK-DAG: [[DST:%[0-9]+]] = getelementptr i8, i8* %dst, i64 %src_size
; CHECK-DAG: [[ULE:%[0-9]+]] = icmp ule i64 %dst_size, %src_size
; CHECK-DAG: [[SIZEDIFF:%[0-9]+]] = sub i64 %dst_size, %src_size
; CHECK-DAG: [[SIZE:%[0-9]+]] = select i1 [[ULE]], i64 0, i64 [[SIZEDIFF]]
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DST]], i8 0, i64 [[SIZE]], i32 1, i1 false)
; CHECK-NEXT: ret void
define void @test(i8* %src, i64 %src_size, i8* %dst, i64 %dst_size) {
call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 %dst_size, i32 1, i1 false)
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i32 1, i1 false)
ret void
}
; CHECK-LABEL: define void @test_align_same
; CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}, i8 0, i64 {{.*}}, i32 8, i1 false)
define void @test_align_same(i8* %src, i8* %dst, i64 %dst_size) {
call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 %dst_size, i32 8, i1 false)
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 80, i32 1, i1 false)
ret void
}
; CHECK-LABEL: define void @test_align_min
; CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}, i8 0, i64 {{.*}}, i32 4, i1 false)
define void @test_align_min(i8* %src, i8* %dst, i64 %dst_size) {
call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 %dst_size, i32 8, i1 false)
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 36, i32 1, i1 false)
ret void
}
; CHECK-LABEL: define void @test_align_memcpy
; CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}, i8 0, i64 {{.*}}, i32 8, i1 false)
define void @test_align_memcpy(i8* %src, i8* %dst, i64 %dst_size) {
call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 %dst_size, i32 1, i1 false)
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 80, i32 8, i1 false)
ret void
}
; CHECK-LABEL: define void @test_different_dst
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 %dst_size, i32 1, i1 false)
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %src, i64 %src_size, i32 1, i1 false)
; CHECK-NEXT: ret void
define void @test_different_dst(i8* %dst2, i8* %src, i64 %src_size, i8* %dst, i64 %dst_size) {
call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 %dst_size, i32 1, i1 false)
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %src, i64 %src_size, i32 1, i1 false)
ret void
}
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1)