mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-01-15 20:51:35 +00:00
[instrinsics] Add @llvm.memcpy.inline instrinsics
Summary: This is a follow up on D61634. It adds an LLVM IR intrinsic to allow better implementation of memcpy from C++. A follow up CL will add the intrinsics in Clang. Reviewers: courbet, theraven, t.p.northover, jdoerfert, tejohnson Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D71710
This commit is contained in:
parent
2533bc2361
commit
879c825cb8
@ -11719,6 +11719,65 @@ the argument.
|
||||
If "len" is 0, the pointers may be NULL or dangling. However, they must still
|
||||
be appropriately aligned.
|
||||
|
||||
.. _int_memcpy_inline:
|
||||
|
||||
'``llvm.memcpy.inline``' Intrinsic
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Syntax:
|
||||
"""""""
|
||||
|
||||
This is an overloaded intrinsic. You can use ``llvm.memcpy.inline`` on any
|
||||
integer bit width and for different address spaces. Not all targets
|
||||
support all bit widths however.
|
||||
|
||||
::
|
||||
|
||||
declare void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* <dest>, i8* <src>,
|
||||
i32 <len>, i1 <isvolatile>)
|
||||
declare void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* <dest>, i8* <src>,
|
||||
i64 <len>, i1 <isvolatile>)
|
||||
|
||||
Overview:
|
||||
"""""""""
|
||||
|
||||
The '``llvm.memcpy.inline.*``' intrinsics copy a block of memory from the
|
||||
source location to the destination location and guarantees that no external
|
||||
functions are called.
|
||||
|
||||
Note that, unlike the standard libc function, the ``llvm.memcpy.inline.*``
|
||||
intrinsics do not return a value, takes extra isvolatile
|
||||
arguments and the pointers can be in specified address spaces.
|
||||
|
||||
Arguments:
|
||||
""""""""""
|
||||
|
||||
The first argument is a pointer to the destination, the second is a
|
||||
pointer to the source. The third argument is a constant integer argument
|
||||
specifying the number of bytes to copy, and the fourth is a
|
||||
boolean indicating a volatile access.
|
||||
|
||||
The :ref:`align <attr_align>` parameter attribute can be provided
|
||||
for the first and second arguments.
|
||||
|
||||
If the ``isvolatile`` parameter is ``true``, the ``llvm.memcpy.inline`` call is
|
||||
a :ref:`volatile operation <volatile>`. The detailed access behavior is not
|
||||
very cleanly specified and it is unwise to depend on it.
|
||||
|
||||
Semantics:
|
||||
""""""""""
|
||||
|
||||
The '``llvm.memcpy.inline.*``' intrinsics copy a block of memory from the
|
||||
source location to the destination location, which are not allowed to
|
||||
overlap. It copies "len" bytes of memory over. If the argument is known
|
||||
to be aligned to some boundary, this can be specified as an attribute on
|
||||
the argument.
|
||||
|
||||
If "len" is 0, the pointers may be NULL or dangling. However, they must still
|
||||
be appropriately aligned.
|
||||
|
||||
The generated code is guaranteed not to call any external functions.
|
||||
|
||||
.. _int_memmove:
|
||||
|
||||
'``llvm.memmove``' Intrinsic
|
||||
|
@ -582,6 +582,7 @@ namespace llvm {
|
||||
case Intrinsic::memcpy:
|
||||
case Intrinsic::memmove:
|
||||
case Intrinsic::memset:
|
||||
case Intrinsic::memcpy_inline:
|
||||
return true;
|
||||
default: return false;
|
||||
}
|
||||
@ -608,8 +609,14 @@ namespace llvm {
|
||||
public:
|
||||
// Methods for support type inquiry through isa, cast, and dyn_cast:
|
||||
static bool classof(const IntrinsicInst *I) {
|
||||
return I->getIntrinsicID() == Intrinsic::memcpy ||
|
||||
I->getIntrinsicID() == Intrinsic::memmove;
|
||||
switch (I->getIntrinsicID()) {
|
||||
case Intrinsic::memcpy:
|
||||
case Intrinsic::memmove:
|
||||
case Intrinsic::memcpy_inline:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
static bool classof(const Value *V) {
|
||||
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
|
||||
@ -640,6 +647,21 @@ namespace llvm {
|
||||
}
|
||||
};
|
||||
|
||||
/// This class wraps the llvm.memcpy.inline intrinsic.
|
||||
class MemCpyInlineInst : public MemTransferInst {
|
||||
public:
|
||||
ConstantInt *getLength() const {
|
||||
return cast<ConstantInt>(MemTransferInst::getLength());
|
||||
}
|
||||
// Methods for support type inquiry through isa, cast, and dyn_cast:
|
||||
static bool classof(const IntrinsicInst *I) {
|
||||
return I->getIntrinsicID() == Intrinsic::memcpy_inline;
|
||||
}
|
||||
static bool classof(const Value *V) {
|
||||
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
|
||||
}
|
||||
};
|
||||
|
||||
// The common base class for any memset/memmove/memcpy intrinsics;
|
||||
// whether they be atomic or non-atomic.
|
||||
// i.e. llvm.element.unordered.atomic.memset/memcpy/memmove
|
||||
@ -656,6 +678,7 @@ namespace llvm {
|
||||
static bool classof(const IntrinsicInst *I) {
|
||||
switch (I->getIntrinsicID()) {
|
||||
case Intrinsic::memcpy:
|
||||
case Intrinsic::memcpy_inline:
|
||||
case Intrinsic::memmove:
|
||||
case Intrinsic::memset:
|
||||
case Intrinsic::memcpy_element_unordered_atomic:
|
||||
@ -698,6 +721,7 @@ namespace llvm {
|
||||
static bool classof(const IntrinsicInst *I) {
|
||||
switch (I->getIntrinsicID()) {
|
||||
case Intrinsic::memcpy:
|
||||
case Intrinsic::memcpy_inline:
|
||||
case Intrinsic::memmove:
|
||||
case Intrinsic::memcpy_element_unordered_atomic:
|
||||
case Intrinsic::memmove_element_unordered_atomic:
|
||||
@ -719,6 +743,7 @@ namespace llvm {
|
||||
static bool classof(const IntrinsicInst *I) {
|
||||
switch (I->getIntrinsicID()) {
|
||||
case Intrinsic::memcpy:
|
||||
case Intrinsic::memcpy_inline:
|
||||
case Intrinsic::memcpy_element_unordered_atomic:
|
||||
return true;
|
||||
default:
|
||||
|
@ -511,6 +511,20 @@ def int_memcpy : Intrinsic<[],
|
||||
llvm_i1_ty],
|
||||
[IntrArgMemOnly, IntrWillReturn, NoCapture<0>, NoCapture<1>,
|
||||
NoAlias<0>, NoAlias<1>, WriteOnly<0>, ReadOnly<1>, ImmArg<3>]>;
|
||||
|
||||
// Memcpy semantic that is guaranteed to be inlined.
|
||||
// In particular this means that the generated code is not allowed to call any
|
||||
// external function.
|
||||
// The third argument (specifying the size) must be a constant.
|
||||
def int_memcpy_inline
|
||||
: Intrinsic<[],
|
||||
[ llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, llvm_i1_ty ],
|
||||
[ IntrArgMemOnly, IntrWillReturn,
|
||||
NoCapture<0>, NoCapture<1>,
|
||||
NoAlias<0>, NoAlias<1>,
|
||||
WriteOnly<0>, ReadOnly<1>,
|
||||
ImmArg<2>, ImmArg<3> ]>;
|
||||
|
||||
def int_memmove : Intrinsic<[],
|
||||
[llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty,
|
||||
llvm_i1_ty],
|
||||
|
@ -345,6 +345,22 @@ void Lint::visitCallSite(CallSite CS) {
|
||||
"Undefined behavior: memcpy source and destination overlap", &I);
|
||||
break;
|
||||
}
|
||||
case Intrinsic::memcpy_inline: {
|
||||
MemCpyInlineInst *MCII = cast<MemCpyInlineInst>(&I);
|
||||
const uint64_t Size = MCII->getLength()->getValue().getLimitedValue();
|
||||
visitMemoryReference(I, MCII->getDest(), Size, MCII->getDestAlignment(),
|
||||
nullptr, MemRef::Write);
|
||||
visitMemoryReference(I, MCII->getSource(), Size,
|
||||
MCII->getSourceAlignment(), nullptr, MemRef::Read);
|
||||
|
||||
// Check that the memcpy arguments don't overlap. The AliasAnalysis API
|
||||
// isn't expressive enough for what we really want to do. Known partial
|
||||
// overlap is not distinguished from the case where nothing is known.
|
||||
const LocationSize LS = LocationSize::precise(Size);
|
||||
Assert(AA->alias(MCII->getSource(), LS, MCII->getDest(), LS) != MustAlias,
|
||||
"Undefined behavior: memcpy source and destination overlap", &I);
|
||||
break;
|
||||
}
|
||||
case Intrinsic::memmove: {
|
||||
MemMoveInst *MMI = cast<MemMoveInst>(&I);
|
||||
// TODO: If the size is known, use it.
|
||||
|
@ -5840,12 +5840,33 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
|
||||
// node.
|
||||
SDValue Root = isVol ? getRoot() : getMemoryRoot();
|
||||
SDValue MC = DAG.getMemcpy(Root, sdl, Op1, Op2, Op3, Align, isVol,
|
||||
false, isTC,
|
||||
/* AlwaysInline */ false, isTC,
|
||||
MachinePointerInfo(I.getArgOperand(0)),
|
||||
MachinePointerInfo(I.getArgOperand(1)));
|
||||
updateDAGForMaybeTailCall(MC);
|
||||
return;
|
||||
}
|
||||
case Intrinsic::memcpy_inline: {
|
||||
const auto &MCI = cast<MemCpyInlineInst>(I);
|
||||
SDValue Dst = getValue(I.getArgOperand(0));
|
||||
SDValue Src = getValue(I.getArgOperand(1));
|
||||
SDValue Size = getValue(I.getArgOperand(2));
|
||||
assert(isa<ConstantSDNode>(Size) && "memcpy_inline needs constant size");
|
||||
// @llvm.memcpy.inline defines 0 and 1 to both mean no alignment.
|
||||
Align DstAlign = MCI.getDestAlign().valueOrOne();
|
||||
Align SrcAlign = MCI.getSourceAlign().valueOrOne();
|
||||
Align Alignment = commonAlignment(DstAlign, SrcAlign);
|
||||
bool isVol = MCI.isVolatile();
|
||||
bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
|
||||
// FIXME: Support passing different dest/src alignments to the memcpy DAG
|
||||
// node.
|
||||
SDValue MC = DAG.getMemcpy(
|
||||
getRoot(), sdl, Dst, Src, Size, Alignment.value(), isVol,
|
||||
/* AlwaysInline */ true, isTC, MachinePointerInfo(I.getArgOperand(0)),
|
||||
MachinePointerInfo(I.getArgOperand(1)));
|
||||
updateDAGForMaybeTailCall(MC);
|
||||
return;
|
||||
}
|
||||
case Intrinsic::memset: {
|
||||
const auto &MSI = cast<MemSetInst>(I);
|
||||
SDValue Op1 = getValue(I.getArgOperand(0));
|
||||
|
@ -4347,6 +4347,7 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
|
||||
visitDbgLabelIntrinsic("label", cast<DbgLabelInst>(Call));
|
||||
break;
|
||||
case Intrinsic::memcpy:
|
||||
case Intrinsic::memcpy_inline:
|
||||
case Intrinsic::memmove:
|
||||
case Intrinsic::memset: {
|
||||
const auto *MI = cast<MemIntrinsic>(&Call);
|
||||
|
39
llvm/test/CodeGen/X86/memcpy-inline.ll
Normal file
39
llvm/test/CodeGen/X86/memcpy-inline.ll
Normal file
@ -0,0 +1,39 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core2 | FileCheck -check-prefix=X64 %s
|
||||
|
||||
; NOTE: This is expected to fail on target that do not support memcpy.
|
||||
; RUN: llc < %s -mtriple=r600-unknown-linux-gnu 2> %t.err || true
|
||||
; RUN: FileCheck --input-file %t.err -check-prefix=R600 %s
|
||||
|
||||
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
|
||||
declare void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
|
||||
|
||||
define void @test1(i8* %a, i8* %b) nounwind {
|
||||
; X64-LABEL: test1:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movq (%rsi), %rax
|
||||
; X64-NEXT: movq %rax, (%rdi)
|
||||
; X64-NEXT: retq
|
||||
; R600: LLVM ERROR
|
||||
tail call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* %a, i8* %b, i64 8, i1 0 )
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @regular_memcpy_calls_external_function(i8* %a, i8* %b) nounwind {
|
||||
; X64-LABEL: regular_memcpy_calls_external_function:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movl $128, %edx
|
||||
; X64-NEXT: jmp memcpy # TAILCALL
|
||||
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 128, i1 0 )
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @inlined_copy_doesnt_call_external_function(i8* %a, i8* %b) nounwind {
|
||||
; X64-LABEL: inlined_copy_doesnt_call_external_function:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movl $128, %ecx
|
||||
; X64-NEXT: rep;movsb (%rsi), %es:(%rdi)
|
||||
; X64-NEXT: retq
|
||||
tail call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* %a, i8* %b, i64 128, i1 0 )
|
||||
ret void
|
||||
}
|
@ -4,6 +4,7 @@ target datalayout = "e-p:64:64:64"
|
||||
declare fastcc void @bar()
|
||||
declare void @llvm.stackrestore(i8*)
|
||||
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
|
||||
declare void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
|
||||
declare void @has_sret(i8* sret %p)
|
||||
declare void @has_noaliases(i32* noalias %p, i32* %q)
|
||||
declare void @one_arg(i32)
|
||||
@ -80,6 +81,8 @@ define i32 @foo() noreturn {
|
||||
|
||||
; CHECK: Write to read-only memory
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (i32* @CG to i8*), i8* bitcast (i32* @CG2 to i8*), i64 1, i1 0)
|
||||
; CHECK: Write to read-only memory
|
||||
call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* bitcast (i32* @CG to i8*), i8* bitcast (i32* @CG2 to i8*), i64 1, i1 0)
|
||||
; CHECK: Unusual: noalias argument aliases another argument
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (i32* @CG to i8*), i8* bitcast (i32* @CG to i8*), i64 1, i1 0)
|
||||
|
||||
@ -189,3 +192,11 @@ entry:
|
||||
; CHECK: Undefined behavior: indirectbr with no destinations
|
||||
indirectbr i8* null, []
|
||||
}
|
||||
|
||||
define i32 @memcpy_inline_same_address() noreturn {
|
||||
%buf = alloca i64, align 1
|
||||
%ptr = bitcast i64* %buf to i8*
|
||||
; CHECK: Unusual: noalias argument aliases another argument
|
||||
call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* %ptr, i8* %ptr, i64 1, i1 false)
|
||||
unreachable
|
||||
}
|
||||
|
@ -27,6 +27,23 @@ define void @memcpy(i8* %dest, i8* %src, i1 %is.volatile) {
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1)
|
||||
define void @memcpy_inline_is_volatile(i8* %dest, i8* %src, i1 %is.volatile) {
|
||||
; CHECK: immarg operand has non-immediate parameter
|
||||
; CHECK-NEXT: i1 %is.volatile
|
||||
; CHECK-NEXT: call void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 8, i1 %is.volatile)
|
||||
call void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 8, i1 %is.volatile)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @memcpy_inline_variable_size(i8* %dest, i8* %src, i32 %size) {
|
||||
; CHECK: immarg operand has non-immediate parameter
|
||||
; CHECK-NEXT: i32 %size
|
||||
; CHECK-NEXT: call void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %size, i1 true)
|
||||
call void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %size, i1 true)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1)
|
||||
define void @memmove(i8* %dest, i8* %src, i1 %is.volatile) {
|
||||
; CHECK: immarg operand has non-immediate parameter
|
||||
|
9
llvm/test/Verifier/memcpy-inline.ll
Normal file
9
llvm/test/Verifier/memcpy-inline.ll
Normal file
@ -0,0 +1,9 @@
|
||||
; RUN: not opt -verify < %s 2>&1 | FileCheck %s
|
||||
|
||||
; CHECK: alignment is not a power of two
|
||||
|
||||
define void @foo(i8* %P, i8* %Q) {
|
||||
call void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* align 3 %P, i8* %Q, i32 4, i1 false)
|
||||
ret void
|
||||
}
|
||||
declare void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
|
Loading…
x
Reference in New Issue
Block a user