Add support in fast-isel for selecting memset/memcpy/memmove intrinsics.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144426 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chad Rosier 2011-11-11 23:31:03 +00:00
parent 6d267449ac
commit 11add26ec2
2 changed files with 138 additions and 10 deletions

View File

@ -164,7 +164,8 @@ class ARMFastISel : public FastISel {
bool SelectFPToSI(const Instruction *I);
bool SelectSDiv(const Instruction *I);
bool SelectSRem(const Instruction *I);
bool SelectCall(const Instruction *I);
bool SelectCall(const Instruction *I, const char *IntrMemName);
bool SelectIntrinsicCall(const IntrinsicInst &I);
bool SelectSelect(const Instruction *I);
bool SelectRet(const Instruction *I);
bool SelectTrunc(const Instruction *I);
@ -1997,12 +1998,13 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
return true;
}
bool ARMFastISel::SelectCall(const Instruction *I) {
bool ARMFastISel::SelectCall(const Instruction *I,
const char *IntrMemName = 0) {
const CallInst *CI = cast<CallInst>(I);
const Value *Callee = CI->getCalledValue();
// Can't handle inline asm or worry about intrinsics yet.
if (isa<InlineAsm>(Callee) || isa<IntrinsicInst>(CI)) return false;
// Can't handle inline asm.
if (isa<InlineAsm>(Callee)) return false;
// Only handle global variable Callees.
const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
@ -2044,8 +2046,12 @@ bool ARMFastISel::SelectCall(const Instruction *I) {
ArgFlags.reserve(CS.arg_size());
for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
i != e; ++i) {
unsigned Arg = getRegForValue(*i);
// If we're lowering a memory intrinsic instead of a regular call, skip the
// last two arguments, which shouldn't be passed to the underlying function.
if (IntrMemName && e-i <= 2)
break;
unsigned Arg = getRegForValue(*i);
if (Arg == 0)
return false;
ISD::ArgFlagsTy Flags;
@ -2090,14 +2096,16 @@ bool ARMFastISel::SelectCall(const Instruction *I) {
if(isThumb2)
// Explicitly adding the predicate here.
MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(CallOpc)))
.addGlobalAddress(GV, 0, 0);
TII.get(CallOpc)));
else
// Explicitly adding the predicate here.
MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(CallOpc))
.addGlobalAddress(GV, 0, 0));
TII.get(CallOpc)));
if (!IntrMemName)
MIB.addGlobalAddress(GV, 0, 0);
else
MIB.addExternalSymbol(IntrMemName, 0);
// Add implicit physical register uses to the call.
for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
MIB.addReg(RegArgs[i]);
@ -2112,6 +2120,46 @@ bool ARMFastISel::SelectCall(const Instruction *I) {
return true;
}
bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
// FIXME: Handle more intrinsics.
switch (I.getIntrinsicID()) {
default: return false;
case Intrinsic::memcpy:
case Intrinsic::memmove: {
// FIXME: Small memcpy/memmove's are common enough that we want to do them
// without a call if possible.
const MemTransferInst &MTI = cast<MemTransferInst>(I);
// Don't handle volatile.
if (MTI.isVolatile())
return false;
if (!MTI.getLength()->getType()->isIntegerTy(32))
return false;
if (MTI.getSourceAddressSpace() > 255 || MTI.getDestAddressSpace() > 255)
return false;
const char *IntrMemName = isa<MemCpyInst>(I) ? "memcpy" : "memmove";
return SelectCall(&I, IntrMemName);
}
case Intrinsic::memset: {
const MemSetInst &MSI = cast<MemSetInst>(I);
// Don't handle volatile.
if (MSI.isVolatile())
return false;
if (!MSI.getLength()->getType()->isIntegerTy(32))
return false;
if (MSI.getDestAddressSpace() > 255)
return false;
return SelectCall(&I, "memset");
}
}
return false;
}
bool ARMFastISel::SelectTrunc(const Instruction *I) {
// The high bits for a type smaller than the register size are assumed to be
// undefined.
@ -2235,6 +2283,8 @@ bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
case Instruction::SRem:
return SelectSRem(I);
case Instruction::Call:
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
return SelectIntrinsicCall(*II);
return SelectCall(I);
case Instruction::Select:
return SelectSelect(I);

View File

@ -0,0 +1,78 @@
; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
@message1 = global [60 x i8] c"The LLVM Compiler Infrastructure\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", align 1
@temp = common global [60 x i8] zeroinitializer, align 1
define void @t1() nounwind ssp {
; ARM: t1
; ARM: ldr r0, LCPI0_0
; ARM: add r0, r0, #5
; ARM: movw r1, #64
; ARM: movw r2, #10
; ARM: uxtb r1, r1
; ARM: bl #14
; THUMB: t1
; THUMB: ldr.n r0, LCPI0_0
; THUMB: adds r0, #5
; THUMB: movs r1, #64
; THUMB: movt r1, #0
; THUMB: movs r2, #10
; THUMB: movt r2, #0
; THUMB: uxtb r1, r1
; THUMB: bl _memset
call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @message1, i32 0, i32 5), i8 64, i32 10, i32 1, i1 false)
ret void
}
declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
define void @t2() nounwind ssp {
; ARM: t2
; ARM: ldr r0, LCPI1_0
; ARM: ldr r0, [r0]
; ARM: add r1, r0, #4
; ARM: add r0, r0, #16
; ARM: movw r2, #10
; ARM: str r0, [sp] @ 4-byte Spill
; ARM: mov r0, r1
; ARM: ldr r1, [sp] @ 4-byte Reload
; ARM: bl #14
; THUMB: t2
; THUMB: ldr.n r0, LCPI1_0
; THUMB: ldr r0, [r0]
; THUMB: adds r1, r0, #4
; THUMB: adds r0, #16
; THUMB: movs r2, #10
; THUMB: movt r2, #0
; THUMB: mov r0, r1
; THUMB: bl _memcpy
call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false)
ret void
}
declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
define void @t3() nounwind ssp {
; ARM: t3
; ARM: ldr r0, LCPI2_0
; ARM: ldr r0, [r0]
; ARM: add r1, r0, #4
; ARM: add r0, r0, #16
; ARM: movw r2, #10
; ARM: mov r0, r1
; ARM: bl #14
; THUMB: t3
; THUMB: ldr.n r0, LCPI2_0
; THUMB: ldr r0, [r0]
; THUMB: adds r1, r0, #4
; THUMB: adds r0, #16
; THUMB: movs r2, #10
; THUMB: movt r2, #0
; THUMB: mov r0, r1
; THUMB: bl _memmove
call void @llvm.memmove.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false)
ret void
}
declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind