mirror of
https://github.com/RPCSX/llvm.git
synced 2025-02-05 11:57:07 +00:00
Add support in fast-isel for selecting memset/memcpy/memmove intrinsics.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144426 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
6d267449ac
commit
11add26ec2
@ -164,7 +164,8 @@ class ARMFastISel : public FastISel {
|
||||
bool SelectFPToSI(const Instruction *I);
|
||||
bool SelectSDiv(const Instruction *I);
|
||||
bool SelectSRem(const Instruction *I);
|
||||
bool SelectCall(const Instruction *I);
|
||||
bool SelectCall(const Instruction *I, const char *IntrMemName);
|
||||
bool SelectIntrinsicCall(const IntrinsicInst &I);
|
||||
bool SelectSelect(const Instruction *I);
|
||||
bool SelectRet(const Instruction *I);
|
||||
bool SelectTrunc(const Instruction *I);
|
||||
@ -1997,12 +1998,13 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ARMFastISel::SelectCall(const Instruction *I) {
|
||||
bool ARMFastISel::SelectCall(const Instruction *I,
|
||||
const char *IntrMemName = 0) {
|
||||
const CallInst *CI = cast<CallInst>(I);
|
||||
const Value *Callee = CI->getCalledValue();
|
||||
|
||||
// Can't handle inline asm or worry about intrinsics yet.
|
||||
if (isa<InlineAsm>(Callee) || isa<IntrinsicInst>(CI)) return false;
|
||||
// Can't handle inline asm.
|
||||
if (isa<InlineAsm>(Callee)) return false;
|
||||
|
||||
// Only handle global variable Callees.
|
||||
const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
|
||||
@ -2044,8 +2046,12 @@ bool ARMFastISel::SelectCall(const Instruction *I) {
|
||||
ArgFlags.reserve(CS.arg_size());
|
||||
for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
|
||||
i != e; ++i) {
|
||||
unsigned Arg = getRegForValue(*i);
|
||||
// If we're lowering a memory intrinsic instead of a regular call, skip the
|
||||
// last two arguments, which shouldn't be passed to the underlying function.
|
||||
if (IntrMemName && e-i <= 2)
|
||||
break;
|
||||
|
||||
unsigned Arg = getRegForValue(*i);
|
||||
if (Arg == 0)
|
||||
return false;
|
||||
ISD::ArgFlagsTy Flags;
|
||||
@ -2090,14 +2096,16 @@ bool ARMFastISel::SelectCall(const Instruction *I) {
|
||||
if(isThumb2)
|
||||
// Explicitly adding the predicate here.
|
||||
MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
|
||||
TII.get(CallOpc)))
|
||||
.addGlobalAddress(GV, 0, 0);
|
||||
TII.get(CallOpc)));
|
||||
else
|
||||
// Explicitly adding the predicate here.
|
||||
MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
|
||||
TII.get(CallOpc))
|
||||
.addGlobalAddress(GV, 0, 0));
|
||||
|
||||
TII.get(CallOpc)));
|
||||
if (!IntrMemName)
|
||||
MIB.addGlobalAddress(GV, 0, 0);
|
||||
else
|
||||
MIB.addExternalSymbol(IntrMemName, 0);
|
||||
|
||||
// Add implicit physical register uses to the call.
|
||||
for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
|
||||
MIB.addReg(RegArgs[i]);
|
||||
@ -2112,6 +2120,46 @@ bool ARMFastISel::SelectCall(const Instruction *I) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
|
||||
// FIXME: Handle more intrinsics.
|
||||
switch (I.getIntrinsicID()) {
|
||||
default: return false;
|
||||
case Intrinsic::memcpy:
|
||||
case Intrinsic::memmove: {
|
||||
// FIXME: Small memcpy/memmove's are common enough that we want to do them
|
||||
// without a call if possible.
|
||||
const MemTransferInst &MTI = cast<MemTransferInst>(I);
|
||||
// Don't handle volatile.
|
||||
if (MTI.isVolatile())
|
||||
return false;
|
||||
|
||||
if (!MTI.getLength()->getType()->isIntegerTy(32))
|
||||
return false;
|
||||
|
||||
if (MTI.getSourceAddressSpace() > 255 || MTI.getDestAddressSpace() > 255)
|
||||
return false;
|
||||
|
||||
const char *IntrMemName = isa<MemCpyInst>(I) ? "memcpy" : "memmove";
|
||||
return SelectCall(&I, IntrMemName);
|
||||
}
|
||||
case Intrinsic::memset: {
|
||||
const MemSetInst &MSI = cast<MemSetInst>(I);
|
||||
// Don't handle volatile.
|
||||
if (MSI.isVolatile())
|
||||
return false;
|
||||
|
||||
if (!MSI.getLength()->getType()->isIntegerTy(32))
|
||||
return false;
|
||||
|
||||
if (MSI.getDestAddressSpace() > 255)
|
||||
return false;
|
||||
|
||||
return SelectCall(&I, "memset");
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ARMFastISel::SelectTrunc(const Instruction *I) {
|
||||
// The high bits for a type smaller than the register size are assumed to be
|
||||
// undefined.
|
||||
@ -2235,6 +2283,8 @@ bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
|
||||
case Instruction::SRem:
|
||||
return SelectSRem(I);
|
||||
case Instruction::Call:
|
||||
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
|
||||
return SelectIntrinsicCall(*II);
|
||||
return SelectCall(I);
|
||||
case Instruction::Select:
|
||||
return SelectSelect(I);
|
||||
|
78
test/CodeGen/ARM/fast-isel-intrinsic.ll
Normal file
78
test/CodeGen/ARM/fast-isel-intrinsic.ll
Normal file
@ -0,0 +1,78 @@
|
||||
; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
|
||||
; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
|
||||
|
||||
@message1 = global [60 x i8] c"The LLVM Compiler Infrastructure\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", align 1
|
||||
@temp = common global [60 x i8] zeroinitializer, align 1
|
||||
|
||||
define void @t1() nounwind ssp {
|
||||
; ARM: t1
|
||||
; ARM: ldr r0, LCPI0_0
|
||||
; ARM: add r0, r0, #5
|
||||
; ARM: movw r1, #64
|
||||
; ARM: movw r2, #10
|
||||
; ARM: uxtb r1, r1
|
||||
; ARM: bl #14
|
||||
; THUMB: t1
|
||||
; THUMB: ldr.n r0, LCPI0_0
|
||||
; THUMB: adds r0, #5
|
||||
; THUMB: movs r1, #64
|
||||
; THUMB: movt r1, #0
|
||||
; THUMB: movs r2, #10
|
||||
; THUMB: movt r2, #0
|
||||
; THUMB: uxtb r1, r1
|
||||
; THUMB: bl _memset
|
||||
call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @message1, i32 0, i32 5), i8 64, i32 10, i32 1, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
|
||||
|
||||
define void @t2() nounwind ssp {
|
||||
; ARM: t2
|
||||
; ARM: ldr r0, LCPI1_0
|
||||
; ARM: ldr r0, [r0]
|
||||
; ARM: add r1, r0, #4
|
||||
; ARM: add r0, r0, #16
|
||||
; ARM: movw r2, #10
|
||||
; ARM: str r0, [sp] @ 4-byte Spill
|
||||
; ARM: mov r0, r1
|
||||
; ARM: ldr r1, [sp] @ 4-byte Reload
|
||||
; ARM: bl #14
|
||||
; THUMB: t2
|
||||
; THUMB: ldr.n r0, LCPI1_0
|
||||
; THUMB: ldr r0, [r0]
|
||||
; THUMB: adds r1, r0, #4
|
||||
; THUMB: adds r0, #16
|
||||
; THUMB: movs r2, #10
|
||||
; THUMB: movt r2, #0
|
||||
; THUMB: mov r0, r1
|
||||
; THUMB: bl _memcpy
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
|
||||
|
||||
define void @t3() nounwind ssp {
|
||||
; ARM: t3
|
||||
; ARM: ldr r0, LCPI2_0
|
||||
; ARM: ldr r0, [r0]
|
||||
; ARM: add r1, r0, #4
|
||||
; ARM: add r0, r0, #16
|
||||
; ARM: movw r2, #10
|
||||
; ARM: mov r0, r1
|
||||
; ARM: bl #14
|
||||
; THUMB: t3
|
||||
; THUMB: ldr.n r0, LCPI2_0
|
||||
; THUMB: ldr r0, [r0]
|
||||
; THUMB: adds r1, r0, #4
|
||||
; THUMB: adds r0, #16
|
||||
; THUMB: movs r2, #10
|
||||
; THUMB: movt r2, #0
|
||||
; THUMB: mov r0, r1
|
||||
; THUMB: bl _memmove
|
||||
call void @llvm.memmove.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
|
Loading…
x
Reference in New Issue
Block a user