[instrinsics] Add @llvm.memcpy.inline instrinsics

Summary:
This is a follow up on D61634. It adds an LLVM IR intrinsic to allow better implementation of memcpy from C++.
A follow up CL will add the intrinsics in Clang.

Reviewers: courbet, theraven, t.p.northover, jdoerfert, tejohnson

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D71710
This commit is contained in:
Guillaume Chatelet 2019-12-19 17:33:36 +01:00
parent 2533bc2361
commit 879c825cb8
10 changed files with 215 additions and 3 deletions

View File

@ -11719,6 +11719,65 @@ the argument.
If "len" is 0, the pointers may be NULL or dangling. However, they must still
be appropriately aligned.
.. _int_memcpy_inline:
'``llvm.memcpy.inline``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Syntax:
"""""""
This is an overloaded intrinsic. You can use ``llvm.memcpy.inline`` on any
integer bit width and for different address spaces. Not all targets
support all bit widths however.
::
declare void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* <dest>, i8* <src>,
i32 <len>, i1 <isvolatile>)
declare void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* <dest>, i8* <src>,
i64 <len>, i1 <isvolatile>)
Overview:
"""""""""
The '``llvm.memcpy.inline.*``' intrinsics copy a block of memory from the
source location to the destination location and guarantees that no external
functions are called.
Note that, unlike the standard libc function, the ``llvm.memcpy.inline.*``
intrinsics do not return a value, takes extra isvolatile
arguments and the pointers can be in specified address spaces.
Arguments:
""""""""""
The first argument is a pointer to the destination, the second is a
pointer to the source. The third argument is a constant integer argument
specifying the number of bytes to copy, and the fourth is a
boolean indicating a volatile access.
The :ref:`align <attr_align>` parameter attribute can be provided
for the first and second arguments.
If the ``isvolatile`` parameter is ``true``, the ``llvm.memcpy.inline`` call is
a :ref:`volatile operation <volatile>`. The detailed access behavior is not
very cleanly specified and it is unwise to depend on it.
Semantics:
""""""""""
The '``llvm.memcpy.inline.*``' intrinsics copy a block of memory from the
source location to the destination location, which are not allowed to
overlap. It copies "len" bytes of memory over. If the argument is known
to be aligned to some boundary, this can be specified as an attribute on
the argument.
If "len" is 0, the pointers may be NULL or dangling. However, they must still
be appropriately aligned.
The generated code is guaranteed not to call any external functions.
.. _int_memmove:
'``llvm.memmove``' Intrinsic

View File

@ -582,6 +582,7 @@ namespace llvm {
case Intrinsic::memcpy:
case Intrinsic::memmove:
case Intrinsic::memset:
case Intrinsic::memcpy_inline:
return true;
default: return false;
}
@ -608,8 +609,14 @@ namespace llvm {
public:
// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::memcpy ||
I->getIntrinsicID() == Intrinsic::memmove;
switch (I->getIntrinsicID()) {
case Intrinsic::memcpy:
case Intrinsic::memmove:
case Intrinsic::memcpy_inline:
return true;
default:
return false;
}
}
static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
@ -640,6 +647,21 @@ namespace llvm {
}
};
/// This class wraps the llvm.memcpy.inline intrinsic.
class MemCpyInlineInst : public MemTransferInst {
public:
ConstantInt *getLength() const {
return cast<ConstantInt>(MemTransferInst::getLength());
}
// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::memcpy_inline;
}
static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
};
// The common base class for any memset/memmove/memcpy intrinsics;
// whether they be atomic or non-atomic.
// i.e. llvm.element.unordered.atomic.memset/memcpy/memmove
@ -656,6 +678,7 @@ namespace llvm {
static bool classof(const IntrinsicInst *I) {
switch (I->getIntrinsicID()) {
case Intrinsic::memcpy:
case Intrinsic::memcpy_inline:
case Intrinsic::memmove:
case Intrinsic::memset:
case Intrinsic::memcpy_element_unordered_atomic:
@ -698,6 +721,7 @@ namespace llvm {
static bool classof(const IntrinsicInst *I) {
switch (I->getIntrinsicID()) {
case Intrinsic::memcpy:
case Intrinsic::memcpy_inline:
case Intrinsic::memmove:
case Intrinsic::memcpy_element_unordered_atomic:
case Intrinsic::memmove_element_unordered_atomic:
@ -719,6 +743,7 @@ namespace llvm {
static bool classof(const IntrinsicInst *I) {
switch (I->getIntrinsicID()) {
case Intrinsic::memcpy:
case Intrinsic::memcpy_inline:
case Intrinsic::memcpy_element_unordered_atomic:
return true;
default:

View File

@ -511,6 +511,20 @@ def int_memcpy : Intrinsic<[],
llvm_i1_ty],
[IntrArgMemOnly, IntrWillReturn, NoCapture<0>, NoCapture<1>,
NoAlias<0>, NoAlias<1>, WriteOnly<0>, ReadOnly<1>, ImmArg<3>]>;
// Memcpy semantic that is guaranteed to be inlined.
// In particular this means that the generated code is not allowed to call any
// external function.
// The third argument (specifying the size) must be a constant.
def int_memcpy_inline
: Intrinsic<[],
[ llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, llvm_i1_ty ],
[ IntrArgMemOnly, IntrWillReturn,
NoCapture<0>, NoCapture<1>,
NoAlias<0>, NoAlias<1>,
WriteOnly<0>, ReadOnly<1>,
ImmArg<2>, ImmArg<3> ]>;
def int_memmove : Intrinsic<[],
[llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty,
llvm_i1_ty],

View File

@ -345,6 +345,22 @@ void Lint::visitCallSite(CallSite CS) {
"Undefined behavior: memcpy source and destination overlap", &I);
break;
}
case Intrinsic::memcpy_inline: {
MemCpyInlineInst *MCII = cast<MemCpyInlineInst>(&I);
const uint64_t Size = MCII->getLength()->getValue().getLimitedValue();
visitMemoryReference(I, MCII->getDest(), Size, MCII->getDestAlignment(),
nullptr, MemRef::Write);
visitMemoryReference(I, MCII->getSource(), Size,
MCII->getSourceAlignment(), nullptr, MemRef::Read);
// Check that the memcpy arguments don't overlap. The AliasAnalysis API
// isn't expressive enough for what we really want to do. Known partial
// overlap is not distinguished from the case where nothing is known.
const LocationSize LS = LocationSize::precise(Size);
Assert(AA->alias(MCII->getSource(), LS, MCII->getDest(), LS) != MustAlias,
"Undefined behavior: memcpy source and destination overlap", &I);
break;
}
case Intrinsic::memmove: {
MemMoveInst *MMI = cast<MemMoveInst>(&I);
// TODO: If the size is known, use it.

View File

@ -5840,12 +5840,33 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// node.
SDValue Root = isVol ? getRoot() : getMemoryRoot();
SDValue MC = DAG.getMemcpy(Root, sdl, Op1, Op2, Op3, Align, isVol,
false, isTC,
/* AlwaysInline */ false, isTC,
MachinePointerInfo(I.getArgOperand(0)),
MachinePointerInfo(I.getArgOperand(1)));
updateDAGForMaybeTailCall(MC);
return;
}
case Intrinsic::memcpy_inline: {
const auto &MCI = cast<MemCpyInlineInst>(I);
SDValue Dst = getValue(I.getArgOperand(0));
SDValue Src = getValue(I.getArgOperand(1));
SDValue Size = getValue(I.getArgOperand(2));
assert(isa<ConstantSDNode>(Size) && "memcpy_inline needs constant size");
// @llvm.memcpy.inline defines 0 and 1 to both mean no alignment.
Align DstAlign = MCI.getDestAlign().valueOrOne();
Align SrcAlign = MCI.getSourceAlign().valueOrOne();
Align Alignment = commonAlignment(DstAlign, SrcAlign);
bool isVol = MCI.isVolatile();
bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
// FIXME: Support passing different dest/src alignments to the memcpy DAG
// node.
SDValue MC = DAG.getMemcpy(
getRoot(), sdl, Dst, Src, Size, Alignment.value(), isVol,
/* AlwaysInline */ true, isTC, MachinePointerInfo(I.getArgOperand(0)),
MachinePointerInfo(I.getArgOperand(1)));
updateDAGForMaybeTailCall(MC);
return;
}
case Intrinsic::memset: {
const auto &MSI = cast<MemSetInst>(I);
SDValue Op1 = getValue(I.getArgOperand(0));

View File

@ -4347,6 +4347,7 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
visitDbgLabelIntrinsic("label", cast<DbgLabelInst>(Call));
break;
case Intrinsic::memcpy:
case Intrinsic::memcpy_inline:
case Intrinsic::memmove:
case Intrinsic::memset: {
const auto *MI = cast<MemIntrinsic>(&Call);

View File

@ -0,0 +1,39 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core2 | FileCheck -check-prefix=X64 %s
; NOTE: This is expected to fail on target that do not support memcpy.
; RUN: llc < %s -mtriple=r600-unknown-linux-gnu 2> %t.err || true
; RUN: FileCheck --input-file %t.err -check-prefix=R600 %s
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
declare void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
define void @test1(i8* %a, i8* %b) nounwind {
; X64-LABEL: test1:
; X64: # %bb.0:
; X64-NEXT: movq (%rsi), %rax
; X64-NEXT: movq %rax, (%rdi)
; X64-NEXT: retq
; R600: LLVM ERROR
tail call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* %a, i8* %b, i64 8, i1 0 )
ret void
}
define void @regular_memcpy_calls_external_function(i8* %a, i8* %b) nounwind {
; X64-LABEL: regular_memcpy_calls_external_function:
; X64: # %bb.0:
; X64-NEXT: movl $128, %edx
; X64-NEXT: jmp memcpy # TAILCALL
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 128, i1 0 )
ret void
}
define void @inlined_copy_doesnt_call_external_function(i8* %a, i8* %b) nounwind {
; X64-LABEL: inlined_copy_doesnt_call_external_function:
; X64: # %bb.0:
; X64-NEXT: movl $128, %ecx
; X64-NEXT: rep;movsb (%rsi), %es:(%rdi)
; X64-NEXT: retq
tail call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* %a, i8* %b, i64 128, i1 0 )
ret void
}

View File

@ -4,6 +4,7 @@ target datalayout = "e-p:64:64:64"
declare fastcc void @bar()
declare void @llvm.stackrestore(i8*)
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
declare void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
declare void @has_sret(i8* sret %p)
declare void @has_noaliases(i32* noalias %p, i32* %q)
declare void @one_arg(i32)
@ -80,6 +81,8 @@ define i32 @foo() noreturn {
; CHECK: Write to read-only memory
call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (i32* @CG to i8*), i8* bitcast (i32* @CG2 to i8*), i64 1, i1 0)
; CHECK: Write to read-only memory
call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* bitcast (i32* @CG to i8*), i8* bitcast (i32* @CG2 to i8*), i64 1, i1 0)
; CHECK: Unusual: noalias argument aliases another argument
call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (i32* @CG to i8*), i8* bitcast (i32* @CG to i8*), i64 1, i1 0)
@ -189,3 +192,11 @@ entry:
; CHECK: Undefined behavior: indirectbr with no destinations
indirectbr i8* null, []
}
define i32 @memcpy_inline_same_address() noreturn {
%buf = alloca i64, align 1
%ptr = bitcast i64* %buf to i8*
; CHECK: Unusual: noalias argument aliases another argument
call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* %ptr, i8* %ptr, i64 1, i1 false)
unreachable
}

View File

@ -27,6 +27,23 @@ define void @memcpy(i8* %dest, i8* %src, i1 %is.volatile) {
ret void
}
declare void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1)
define void @memcpy_inline_is_volatile(i8* %dest, i8* %src, i1 %is.volatile) {
; CHECK: immarg operand has non-immediate parameter
; CHECK-NEXT: i1 %is.volatile
; CHECK-NEXT: call void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 8, i1 %is.volatile)
call void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 8, i1 %is.volatile)
ret void
}
define void @memcpy_inline_variable_size(i8* %dest, i8* %src, i32 %size) {
; CHECK: immarg operand has non-immediate parameter
; CHECK-NEXT: i32 %size
; CHECK-NEXT: call void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %size, i1 true)
call void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %size, i1 true)
ret void
}
declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1)
define void @memmove(i8* %dest, i8* %src, i1 %is.volatile) {
; CHECK: immarg operand has non-immediate parameter

View File

@ -0,0 +1,9 @@
; RUN: not opt -verify < %s 2>&1 | FileCheck %s
; CHECK: alignment is not a power of two
define void @foo(i8* %P, i8* %Q) {
call void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* align 3 %P, i8* %Q, i32 4, i1 false)
ret void
}
declare void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind