Add support in fast-isel for selecting memset/memcpy/memmove intrinsics.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144426 91177308-0d34-0410-b5e6-96231b3b80d8
2025-02-05 11:57:07 +00:00 · 2011-11-11 23:31:03 +00:00 · 2011-11-11 23:31:03 +00:00 · 11add26ec2
commit 11add26ec2
parent 6d267449ac
2 changed files with 138 additions and 10 deletions
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@ -164,7 +164,8 @@ class ARMFastISel : public FastISel {
    bool SelectFPToSI(const Instruction *I);
    bool SelectSDiv(const Instruction *I);
    bool SelectSRem(const Instruction *I);
-    bool SelectCall(const Instruction *I);
+    bool SelectCall(const Instruction *I, const char *IntrMemName);
+    bool SelectIntrinsicCall(const IntrinsicInst &I);
    bool SelectSelect(const Instruction *I);
    bool SelectRet(const Instruction *I);
    bool SelectTrunc(const Instruction *I);
@ -1997,12 +1998,13 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
  return true;
 }

-bool ARMFastISel::SelectCall(const Instruction *I) {
+bool ARMFastISel::SelectCall(const Instruction *I,
+                             const char *IntrMemName = 0) {
  const CallInst *CI = cast<CallInst>(I);
  const Value *Callee = CI->getCalledValue();

-  // Can't handle inline asm or worry about intrinsics yet.
-  if (isa<InlineAsm>(Callee) || isa<IntrinsicInst>(CI)) return false;
+  // Can't handle inline asm.
+  if (isa<InlineAsm>(Callee)) return false;

  // Only handle global variable Callees.
  const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
@ -2044,8 +2046,12 @@ bool ARMFastISel::SelectCall(const Instruction *I) {
  ArgFlags.reserve(CS.arg_size());
  for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
       i != e; ++i) {
-    unsigned Arg = getRegForValue(*i);
+    // If we're lowering a memory intrinsic instead of a regular call, skip the
+    // last two arguments, which shouldn't be passed to the underlying function.
+    if (IntrMemName && e-i <= 2)
+      break;

+    unsigned Arg = getRegForValue(*i);
    if (Arg == 0)
      return false;
    ISD::ArgFlagsTy Flags;
@ -2090,14 +2096,16 @@ bool ARMFastISel::SelectCall(const Instruction *I) {
  if(isThumb2)
    // Explicitly adding the predicate here.
    MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
-                         TII.get(CallOpc)))
-          .addGlobalAddress(GV, 0, 0);
+                                 TII.get(CallOpc)));
  else
    // Explicitly adding the predicate here.
    MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
-                         TII.get(CallOpc))
-          .addGlobalAddress(GV, 0, 0));
-
+                                 TII.get(CallOpc)));
+  if (!IntrMemName)
+    MIB.addGlobalAddress(GV, 0, 0);
+  else 
+    MIB.addExternalSymbol(IntrMemName, 0);
+  
  // Add implicit physical register uses to the call.
  for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
    MIB.addReg(RegArgs[i]);
@ -2112,6 +2120,46 @@ bool ARMFastISel::SelectCall(const Instruction *I) {
  return true;
 }

+bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
+  // FIXME: Handle more intrinsics.
+  switch (I.getIntrinsicID()) {
+  default: return false;
+  case Intrinsic::memcpy:
+  case Intrinsic::memmove: {
+    // FIXME: Small memcpy/memmove's are common enough that we want to do them
+    // without a call if possible.
+    const MemTransferInst &MTI = cast<MemTransferInst>(I);
+    // Don't handle volatile.
+    if (MTI.isVolatile())
+      return false;
+    
+    if (!MTI.getLength()->getType()->isIntegerTy(32))
+      return false;
+    
+    if (MTI.getSourceAddressSpace() > 255 || MTI.getDestAddressSpace() > 255)
+      return false;
+
+    const char *IntrMemName = isa<MemCpyInst>(I) ? "memcpy" : "memmove";
+    return SelectCall(&I, IntrMemName);
+  }
+  case Intrinsic::memset: {
+    const MemSetInst &MSI = cast<MemSetInst>(I);
+    // Don't handle volatile.
+    if (MSI.isVolatile())
+      return false;
+    
+    if (!MSI.getLength()->getType()->isIntegerTy(32))
+      return false;
+    
+    if (MSI.getDestAddressSpace() > 255)
+      return false;
+    
+    return SelectCall(&I, "memset");
+  }
+  }
+  return false;    
+}
+
 bool ARMFastISel::SelectTrunc(const Instruction *I) {
  // The high bits for a type smaller than the register size are assumed to be 
  // undefined.
@ -2235,6 +2283,8 @@ bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
    case Instruction::SRem:
      return SelectSRem(I);
    case Instruction::Call:
+      if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+        return SelectIntrinsicCall(*II);
      return SelectCall(I);
    case Instruction::Select:
      return SelectSelect(I);
--- a/test/CodeGen/ARM/fast-isel-intrinsic.ll
+++ b/test/CodeGen/ARM/fast-isel-intrinsic.ll
@ -0,0 +1,78 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+
+@message1 = global [60 x i8] c"The LLVM Compiler Infrastructure\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", align 1
+@temp = common global [60 x i8] zeroinitializer, align 1
+
+define void @t1() nounwind ssp {
+; ARM: t1
+; ARM: ldr r0, LCPI0_0
+; ARM: add r0, r0, #5
+; ARM: movw r1, #64
+; ARM: movw r2, #10
+; ARM: uxtb r1, r1
+; ARM: bl #14
+; THUMB: t1
+; THUMB: ldr.n r0, LCPI0_0
+; THUMB: adds r0, #5
+; THUMB: movs r1, #64
+; THUMB: movt r1, #0
+; THUMB: movs r2, #10
+; THUMB: movt r2, #0
+; THUMB: uxtb r1, r1
+; THUMB: bl _memset
+  call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @message1, i32 0, i32 5), i8 64, i32 10, i32 1, i1 false)
+  ret void
+}
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+
+define void @t2() nounwind ssp {
+; ARM: t2
+; ARM: ldr r0, LCPI1_0
+; ARM: ldr r0, [r0]
+; ARM: add r1, r0, #4
+; ARM: add r0, r0, #16
+; ARM: movw r2, #10
+; ARM: str r0, [sp]                @ 4-byte Spill
+; ARM: mov r0, r1
+; ARM: ldr r1, [sp]                @ 4-byte Reload
+; ARM: bl #14
+; THUMB: t2
+; THUMB: ldr.n r0, LCPI1_0
+; THUMB: ldr r0, [r0]
+; THUMB: adds r1, r0, #4
+; THUMB: adds r0, #16
+; THUMB: movs r2, #10
+; THUMB: movt r2, #0
+; THUMB: mov r0, r1
+; THUMB: bl _memcpy
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+define void @t3() nounwind ssp {
+; ARM: t3
+; ARM: ldr r0, LCPI2_0
+; ARM: ldr r0, [r0]
+; ARM: add r1, r0, #4
+; ARM: add r0, r0, #16
+; ARM: movw r2, #10
+; ARM: mov r0, r1
+; ARM: bl #14
+; THUMB: t3
+; THUMB: ldr.n r0, LCPI2_0
+; THUMB: ldr r0, [r0]
+; THUMB: adds r1, r0, #4
+; THUMB: adds r0, #16
+; THUMB: movs r2, #10
+; THUMB: movt r2, #0
+; THUMB: mov r0, r1
+; THUMB: bl _memmove
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false)
+  ret void
+}
+
+declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind