ARM EABI divmod support

This patch enables calls to __aeabi_idivmod when in EABI mode,
by using the remainder value returned on registers (R1),
enabled by the ARM triple "none-eabi". Note that Darwin and
GNUEABI triples will continue lowering on GNU style, that is,
using the stack for the remainder.

Still need to add SREM/UREM support fix for 64-bit lowering.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186390 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Renato Golin 2013-07-16 09:32:17 +00:00
parent 9273151c3b
commit 103ba845f0
4 changed files with 289 additions and 2 deletions

View File

@ -693,10 +693,36 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::SDIV, MVT::i32, Expand);
setOperationAction(ISD::UDIV, MVT::i32, Expand);
}
// FIXME: Also set divmod for SREM on EABI
setOperationAction(ISD::SREM, MVT::i32, Expand);
setOperationAction(ISD::UREM, MVT::i32, Expand);
setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
// Register based DivRem for AEABI (RTABI 4.2)
if (Subtarget->isTargetAEABI()) {
setLibcallName(RTLIB::SDIVREM_I8, "__aeabi_idivmod");
setLibcallName(RTLIB::SDIVREM_I16, "__aeabi_idivmod");
setLibcallName(RTLIB::SDIVREM_I32, "__aeabi_idivmod");
setLibcallName(RTLIB::SDIVREM_I64, "__aeabi_ldivmod");
setLibcallName(RTLIB::UDIVREM_I8, "__aeabi_uidivmod");
setLibcallName(RTLIB::UDIVREM_I16, "__aeabi_uidivmod");
setLibcallName(RTLIB::UDIVREM_I32, "__aeabi_uidivmod");
setLibcallName(RTLIB::UDIVREM_I64, "__aeabi_uldivmod");
setLibcallCallingConv(RTLIB::SDIVREM_I8, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::SDIVREM_I16, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::SDIVREM_I32, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::SDIVREM_I64, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::UDIVREM_I8, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::UDIVREM_I16, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::UDIVREM_I32, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::UDIVREM_I64, CallingConv::ARM_AAPCS);
setOperationAction(ISD::SDIVREM, MVT::i32, Custom);
setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
} else {
setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
}
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
@ -5863,6 +5889,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
case ISD::ATOMIC_LOAD:
case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
case ISD::SDIVREM:
case ISD::UDIVREM: return LowerDivRem(Op, DAG);
}
}
@ -10677,6 +10705,54 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
}
SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
assert(Subtarget->isTargetAEABI() && "Register-based DivRem lowering only");
unsigned Opcode = Op->getOpcode();
assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
"Invalid opcode for Div/Rem lowering");
bool isSigned = (Opcode == ISD::SDIVREM);
EVT VT = Op->getValueType(0);
Type *Ty = VT.getTypeForEVT(*DAG.getContext());
RTLIB::Libcall LC;
switch (VT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unexpected request for libcall!");
case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
}
SDValue InChain = DAG.getEntryNode();
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) {
EVT ArgVT = Op->getOperand(i).getValueType();
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
Entry.Node = Op->getOperand(i);
Entry.Ty = ArgTy;
Entry.isSExt = isSigned;
Entry.isZExt = !isSigned;
Args.push_back(Entry);
}
SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
getPointerTy());
Type *RetTy = (Type*)StructType::get(Ty, Ty, NULL);
SDLoc dl(Op);
TargetLowering::
CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, true,
0, getLibcallCallingConv(LC), /*isTailCall=*/false,
/*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
Callee, Args, DAG, dl);
std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
return CallInfo.first;
}
bool
ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
// The ARM target isn't yet aware of offsets.

View File

@ -457,6 +457,7 @@ namespace llvm {
const ARMSubtarget *ST) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *ST) const;
SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const;
/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be

View File

@ -280,6 +280,14 @@ public:
bool isTargetNaCl() const { return TargetTriple.getOS() == Triple::NaCl; }
bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; }
bool isTargetELF() const { return !isTargetDarwin(); }
// ARM EABI is the bare-metal EABI described in ARM ABI documents and
// can be accessed via -target arm-none-eabi. This is NOT GNUEABI.
// FIXME: Add a flag for bare-metal for that target and set Triple::EABI
// even for GNUEABI, so we can make a distinction here and still conform to
// the EABI on GNU (and Android) mode. This requires change in Clang, too.
bool isTargetAEABI() const {
return TargetTriple.getEnvironment() == Triple::EABI;
}
bool isAPCS_ABI() const { return TargetABI == ARM_ABI_APCS; }
bool isAAPCS_ABI() const { return TargetABI == ARM_ABI_AAPCS; }

View File

@ -0,0 +1,202 @@
; RUN: llc -mtriple armv7-none-eabi %s -o - | FileCheck %s --check-prefix=EABI
; RUN: llc -mtriple armv7-linux-gnueabi %s -o - | FileCheck %s --check-prefix=GNU
; RUN: llc -mtriple armv7-apple-darwin %s -o - | FileCheck %s --check-prefix=DARWIN
define signext i16 @f16(i16 signext %a, i16 signext %b) {
; EABI: f16:
; GNU: f16:
; DARWIN: f16:
entry:
%conv = sext i16 %a to i32
%conv1 = sext i16 %b to i32
%div = sdiv i32 %conv, %conv1
%rem = srem i32 %conv, %conv1
; EABI: __aeabi_idivmod
; EABI: mov [[div:r[0-9]+]], r0
; EABI: mov [[rem:r[0-9]+]], r1
; GNU: __aeabi_idiv
; GNU: mov [[sum:r[0-9]+]], r0
; GNU: __modsi3
; GNU: add [[sum]]{{.*}}r0
; DARWIN: ___divsi3
; DARWIN: mov [[sum:r[0-9]+]], r0
; DARWIN: __modsi3
; DARWIN: add [[sum]]{{.*}}r0
%rem8 = srem i32 %conv1, %conv
; EABI: __aeabi_idivmod
; GNU: __modsi3
; DARWIN: __modsi3
%add = add nsw i32 %rem, %div
%add13 = add nsw i32 %add, %rem8
%conv14 = trunc i32 %add13 to i16
; EABI: add r0{{.*}}r1
; EABI: sxth r0, r0
; GNU: add r0{{.*}}[[sum]]
; GNU: sxth r0, r0
; DARWIN: add r0{{.*}}[[sum]]
; DARWIN: sxth r0, r0
ret i16 %conv14
}
define i32 @f32(i32 %a, i32 %b) {
; EABI: f32:
; GNU: f32:
; DARWIN: f32:
entry:
%div = sdiv i32 %a, %b
%rem = srem i32 %a, %b
; EABI: __aeabi_idivmod
; EABI: mov [[div:r[0-9]+]], r0
; EABI: mov [[rem:r[0-9]+]], r1
; GNU: __aeabi_idiv
; GNU: mov [[sum:r[0-9]+]], r0
; GNU: __modsi3
; GNU: add [[sum]]{{.*}}r0
; DARWIN: ___divsi3
; DARWIN: mov [[sum:r[0-9]+]], r0
; DARWIN: __modsi3
; DARWIN: add [[sum]]{{.*}}r0
%rem1 = srem i32 %b, %a
; EABI: __aeabi_idivmod
; GNU: __modsi3
; DARWIN: __modsi3
%add = add nsw i32 %rem, %div
%add2 = add nsw i32 %add, %rem1
; EABI: add r0{{.*}}r1
; GNU: add r0{{.*}}[[sum]]
; DARWIN: add r0{{.*}}[[sum]]
ret i32 %add2
}
define i32 @uf(i32 %a, i32 %b) {
; EABI: uf:
; GNU: uf:
; DARWIN: uf:
entry:
%div = udiv i32 %a, %b
%rem = urem i32 %a, %b
; EABI: __aeabi_uidivmod
; GNU: __aeabi_uidiv
; GNU: mov [[sum:r[0-9]+]], r0
; GNU: __umodsi3
; GNU: add [[sum]]{{.*}}r0
; DARWIN: ___udivsi3
; DARWIN: mov [[sum:r[0-9]+]], r0
; DARWIN: __umodsi3
; DARWIN: add [[sum]]{{.*}}r0
%rem1 = urem i32 %b, %a
; EABI: __aeabi_uidivmod
; GNU: __umodsi3
; DARWIN: __umodsi3
%add = add nuw i32 %rem, %div
%add2 = add nuw i32 %add, %rem1
; EABI: add r0{{.*}}r1
; GNU: add r0{{.*}}[[sum]]
; DARWIN: add r0{{.*}}[[sum]]
ret i32 %add2
}
; FIXME: AEABI is not lowering long u/srem into u/ldivmod
define i64 @longf(i64 %a, i64 %b) {
; EABI: longf:
; GNU: longf:
; DARWIN: longf:
entry:
%div = sdiv i64 %a, %b
%rem = srem i64 %a, %b
; EABI: __aeabi_ldivmod
; GNU: __aeabi_ldivmod
; GNU: mov [[div1:r[0-9]+]], r0
; GNU: mov [[div2:r[0-9]+]], r1
; DARWIN: ___divdi3
; DARWIN: mov [[div1:r[0-9]+]], r0
; DARWIN: mov [[div2:r[0-9]+]], r1
; DARWIN: __moddi3
%add = add nsw i64 %rem, %div
; GNU: adds r0{{.*}}[[div1]]
; GNU: adc r1{{.*}}[[div2]]
; DARWIN: adds r0{{.*}}[[div1]]
; DARWIN: adc r1{{.*}}[[div2]]
ret i64 %add
}
define i32 @g1(i32 %a, i32 %b) {
; EABI: g1:
; GNU: g1:
; DARWIN: g1:
entry:
%div = sdiv i32 %a, %b
%rem = srem i32 %a, %b
; EABI: __aeabi_idivmod
; GNU: __aeabi_idiv
; GNU: mov [[sum:r[0-9]+]], r0
; GNU: __modsi3
; DARWIN: ___divsi3
; DARWIN: mov [[sum:r[0-9]+]], r0
; DARWIN: __modsi3
%add = add nsw i32 %rem, %div
; EABI: add r0{{.*}}r1
; GNU: add r0{{.*}}[[sum]]
; DARWIN: add r0{{.*}}[[sum]]
ret i32 %add
}
; On both Darwin and Gnu, this is just a call to __modsi3
define i32 @g2(i32 %a, i32 %b) {
; EABI: g2:
; GNU: g2:
; DARWIN: g2:
entry:
%rem = srem i32 %a, %b
; EABI: __aeabi_idivmod
; GNU: __modsi3
; DARWIN: __modsi3
ret i32 %rem
; EABI: mov r0, r1
}
define i32 @g3(i32 %a, i32 %b) {
; EABI: g3:
; GNU: g3:
; DARWIN: g3:
entry:
%rem = srem i32 %a, %b
; EABI: __aeabi_idivmod
; EABI: mov [[mod:r[0-9]+]], r1
; GNU: __modsi3
; GNU: mov [[sum:r[0-9]+]], r0
; DARWIN: __modsi3
; DARWIN: mov [[sum:r[0-9]+]], r0
%rem1 = srem i32 %b, %rem
; EABI: __aeabi_idivmod
; GNU: __modsi3
; DARWIN: __modsi3
%add = add nsw i32 %rem1, %rem
; EABI: add r0, r1, [[mod]]
; GNU: add r0{{.*}}[[sum]]
; DARWIN: add r0{{.*}}[[sum]]
ret i32 %add
}
define i32 @g4(i32 %a, i32 %b) {
; EABI: g4:
; GNU: g4:
; DARWIN: g4:
entry:
%div = sdiv i32 %a, %b
; EABI: __aeabi_idivmod
; EABI: mov [[div:r[0-9]+]], r0
; GNU __aeabi_idiv
; GNU: mov [[sum:r[0-9]+]], r0
; DARWIN: ___divsi3
; DARWIN: mov [[sum:r[0-9]+]], r0
%rem = srem i32 %b, %div
; EABI: __aeabi_idivmod
; GNU: __modsi3
; DARWIN: __modsi3
%add = add nsw i32 %rem, %div
; EABI: add r0, r1, [[div]]
; GNU: add r0{{.*}}[[sum]]
; DARWIN: add r0{{.*}}[[sum]]
ret i32 %add
}