mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-11 13:37:07 +00:00
[XCore] Target optimized library function __memcpy_4()
Summary: If the src, dst and size of a memcpy are known to be 4 byte aligned we can call __memcpy_4() instead of memcpy(). Reviewers: robertlytton Reviewed By: robertlytton CC: llvm-commits Differential Revision: http://llvm-reviews.chandlerc.com/D2871 llvm-svn: 202395
This commit is contained in:
parent
75c16f2bf4
commit
5ac74685fd
@ -21,3 +21,36 @@ XCoreSelectionDAGInfo::XCoreSelectionDAGInfo(const XCoreTargetMachine &TM)
|
|||||||
|
|
||||||
XCoreSelectionDAGInfo::~XCoreSelectionDAGInfo() {
|
XCoreSelectionDAGInfo::~XCoreSelectionDAGInfo() {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SDValue XCoreSelectionDAGInfo::
|
||||||
|
EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain,
|
||||||
|
SDValue Dst, SDValue Src, SDValue Size, unsigned Align,
|
||||||
|
bool isVolatile, bool AlwaysInline,
|
||||||
|
MachinePointerInfo DstPtrInfo,
|
||||||
|
MachinePointerInfo SrcPtrInfo) const
|
||||||
|
{
|
||||||
|
unsigned SizeBitWidth = Size.getValueType().getSizeInBits();
|
||||||
|
// Call __memcpy_4 if the src, dst and size are all 4 byte aligned.
|
||||||
|
if (!AlwaysInline && (Align & 3) == 0 &&
|
||||||
|
DAG.MaskedValueIsZero(Size, APInt(SizeBitWidth, 3))) {
|
||||||
|
const TargetLowering &TLI = *DAG.getTarget().getTargetLowering();
|
||||||
|
TargetLowering::ArgListTy Args;
|
||||||
|
TargetLowering::ArgListEntry Entry;
|
||||||
|
Entry.Ty = TLI.getDataLayout()->getIntPtrType(*DAG.getContext());
|
||||||
|
Entry.Node = Dst; Args.push_back(Entry);
|
||||||
|
Entry.Node = Src; Args.push_back(Entry);
|
||||||
|
Entry.Node = Size; Args.push_back(Entry);
|
||||||
|
|
||||||
|
TargetLowering::CallLoweringInfo
|
||||||
|
CLI(Chain, Type::getVoidTy(*DAG.getContext()), false, false, false, false,
|
||||||
|
0, TLI.getLibcallCallingConv(RTLIB::MEMCPY), /*isTailCall=*/false,
|
||||||
|
/*doesNotRet=*/false, /*isReturnValueUsed=*/false,
|
||||||
|
DAG.getExternalSymbol("__memcpy_4", TLI.getPointerTy()), Args, DAG, dl);
|
||||||
|
std::pair<SDValue,SDValue> CallResult =
|
||||||
|
TLI.LowerCallTo(CLI);
|
||||||
|
return CallResult.second;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Otherwise have the target-independent code call memcpy.
|
||||||
|
return SDValue();
|
||||||
|
}
|
||||||
|
@ -24,6 +24,15 @@ class XCoreSelectionDAGInfo : public TargetSelectionDAGInfo {
|
|||||||
public:
|
public:
|
||||||
explicit XCoreSelectionDAGInfo(const XCoreTargetMachine &TM);
|
explicit XCoreSelectionDAGInfo(const XCoreTargetMachine &TM);
|
||||||
~XCoreSelectionDAGInfo();
|
~XCoreSelectionDAGInfo();
|
||||||
|
|
||||||
|
virtual SDValue
|
||||||
|
EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
|
||||||
|
SDValue Chain,
|
||||||
|
SDValue Op1, SDValue Op2,
|
||||||
|
SDValue Op3, unsigned Align, bool isVolatile,
|
||||||
|
bool AlwaysInline,
|
||||||
|
MachinePointerInfo DstPtrInfo,
|
||||||
|
MachinePointerInfo SrcPtrInfo) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -20,7 +20,7 @@ entry:
|
|||||||
; CHECK: ldaw r5, sp[1]
|
; CHECK: ldaw r5, sp[1]
|
||||||
; CHECK: ldc r2, 40
|
; CHECK: ldc r2, 40
|
||||||
; CHECK: mov r0, r5
|
; CHECK: mov r0, r5
|
||||||
; CHECK: bl memcpy
|
; CHECK: bl __memcpy_4
|
||||||
; CHECK: mov r0, r5
|
; CHECK: mov r0, r5
|
||||||
; CHECK: bl f1
|
; CHECK: bl f1
|
||||||
; CHECK: mov r0, r4
|
; CHECK: mov r0, r4
|
||||||
|
32
test/CodeGen/XCore/memcpy.ll
Normal file
32
test/CodeGen/XCore/memcpy.ll
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
; RUN: llc < %s -march=xcore | FileCheck %s
|
||||||
|
|
||||||
|
; Optimize memcpy to __memcpy_4 if src, dst and size are all 4 byte aligned.
|
||||||
|
define void @f1(i8* %dst, i8* %src, i32 %n) nounwind {
|
||||||
|
; CHECK-LABEL: f1:
|
||||||
|
; CHECK: bl __memcpy_4
|
||||||
|
entry:
|
||||||
|
%0 = shl i32 %n, 2
|
||||||
|
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %0, i32 4, i1 false)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; Can't optimize - size is not a multiple of 4.
|
||||||
|
define void @f2(i8* %dst, i8* %src, i32 %n) nounwind {
|
||||||
|
; CHECK-LABEL: f2:
|
||||||
|
; CHECK: bl memcpy
|
||||||
|
entry:
|
||||||
|
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %n, i32 4, i1 false)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; Can't optimize - alignment is not a multiple of 4.
|
||||||
|
define void @f3(i8* %dst, i8* %src, i32 %n) nounwind {
|
||||||
|
; CHECK-LABEL: f3:
|
||||||
|
; CHECK: bl memcpy
|
||||||
|
entry:
|
||||||
|
%0 = shl i32 %n, 2
|
||||||
|
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %0, i32 2, i1 false)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
|
Loading…
Reference in New Issue
Block a user