mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-12 06:06:32 +00:00
[ARM] Enable DP copy, load and store instructions for FPv4-SP
The FPv4-SP floating-point unit is generally referred to as single-precision only, but it does have double-precision registers and load, store and GPR<->DPR move instructions which operate on them. This patch enables the use of these registers, the main advantage of which is that we now comply with the AAPCS-VFP calling convention. This partially reverts r209650, which added some AAPCS-VFP support, but did not handle return values or alignment of double arguments in registers. This patch also adds tests for Thumb2 code generation for floating-point instructions and intrinsics, which previously only existed for ARM. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@216172 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
fc4bdcdc87
commit
760a46522a
@ -3517,6 +3517,16 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
|
||||
RTLIB::FMA_F80, RTLIB::FMA_F128,
|
||||
RTLIB::FMA_PPCF128));
|
||||
break;
|
||||
case ISD::FADD:
|
||||
Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64,
|
||||
RTLIB::ADD_F80, RTLIB::ADD_F128,
|
||||
RTLIB::ADD_PPCF128));
|
||||
break;
|
||||
case ISD::FMUL:
|
||||
Results.push_back(ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64,
|
||||
RTLIB::MUL_F80, RTLIB::MUL_F128,
|
||||
RTLIB::MUL_PPCF128));
|
||||
break;
|
||||
case ISD::FP16_TO_FP: {
|
||||
if (Node->getValueType(0) == MVT::f32) {
|
||||
Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false));
|
||||
@ -3549,12 +3559,16 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
|
||||
}
|
||||
case ISD::FSUB: {
|
||||
EVT VT = Node->getValueType(0);
|
||||
assert(TLI.isOperationLegalOrCustom(ISD::FADD, VT) &&
|
||||
TLI.isOperationLegalOrCustom(ISD::FNEG, VT) &&
|
||||
"Don't know how to expand this FP subtraction!");
|
||||
Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1));
|
||||
Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1);
|
||||
Results.push_back(Tmp1);
|
||||
if (TLI.isOperationLegalOrCustom(ISD::FADD, VT) &&
|
||||
TLI.isOperationLegalOrCustom(ISD::FNEG, VT)) {
|
||||
Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1));
|
||||
Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1);
|
||||
Results.push_back(Tmp1);
|
||||
} else {
|
||||
Results.push_back(ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64,
|
||||
RTLIB::SUB_F80, RTLIB::SUB_F128,
|
||||
RTLIB::SUB_PPCF128));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ISD::SUB: {
|
||||
|
@ -7265,8 +7265,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
|
||||
}
|
||||
if (Args[i].isNest)
|
||||
Flags.setNest();
|
||||
if (NeedsRegBlock)
|
||||
if (NeedsRegBlock) {
|
||||
Flags.setInConsecutiveRegs();
|
||||
if (Value == NumValues - 1)
|
||||
Flags.setInConsecutiveRegsLast();
|
||||
}
|
||||
Flags.setOrigAlign(OriginalAlignment);
|
||||
|
||||
MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT);
|
||||
@ -7312,10 +7315,6 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
|
||||
else if (j != 0)
|
||||
MyFlags.Flags.setOrigAlign(1);
|
||||
|
||||
// Only mark the end at the last register of the last value.
|
||||
if (NeedsRegBlock && Value == NumValues - 1 && j == NumParts - 1)
|
||||
MyFlags.Flags.setInConsecutiveRegsLast();
|
||||
|
||||
CLI.Outs.push_back(MyFlags);
|
||||
CLI.OutVals.push_back(Parts[j]);
|
||||
}
|
||||
@ -7530,8 +7529,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
|
||||
}
|
||||
if (F.getAttributes().hasAttribute(Idx, Attribute::Nest))
|
||||
Flags.setNest();
|
||||
if (NeedsRegBlock)
|
||||
if (NeedsRegBlock) {
|
||||
Flags.setInConsecutiveRegs();
|
||||
if (Value == NumValues - 1)
|
||||
Flags.setInConsecutiveRegsLast();
|
||||
}
|
||||
Flags.setOrigAlign(OriginalAlignment);
|
||||
|
||||
MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
|
||||
@ -7544,11 +7546,6 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
|
||||
// if it isn't first piece, alignment must be 1
|
||||
else if (i > 0)
|
||||
MyFlags.Flags.setOrigAlign(1);
|
||||
|
||||
// Only mark the end at the last register of the last value.
|
||||
if (NeedsRegBlock && Value == NumValues - 1 && i == NumRegs - 1)
|
||||
MyFlags.Flags.setInConsecutiveRegsLast();
|
||||
|
||||
Ins.push_back(MyFlags);
|
||||
}
|
||||
PartBase += VT.getStoreSize();
|
||||
|
@ -721,7 +721,7 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
||||
Opc = ARM::VMOVRS;
|
||||
else if (SPRDest && GPRSrc)
|
||||
Opc = ARM::VMOVSR;
|
||||
else if (ARM::DPRRegClass.contains(DestReg, SrcReg))
|
||||
else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && !Subtarget.isFPOnlySP())
|
||||
Opc = ARM::VMOVD;
|
||||
else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
|
||||
Opc = ARM::VORRq;
|
||||
@ -781,6 +781,10 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
||||
BeginIdx = ARM::dsub_0;
|
||||
SubRegs = 4;
|
||||
Spacing = 2;
|
||||
} else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.isFPOnlySP()) {
|
||||
Opc = ARM::VMOVS;
|
||||
BeginIdx = ARM::ssub_0;
|
||||
SubRegs = 2;
|
||||
}
|
||||
|
||||
assert(Opc && "Impossible reg-to-reg copy");
|
||||
@ -1231,7 +1235,8 @@ ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
|
||||
// copyPhysReg() calls. Look for VMOVS instructions that can legally be
|
||||
// widened to VMOVD. We prefer the VMOVD when possible because it may be
|
||||
// changed into a VORR that can go down the NEON pipeline.
|
||||
if (!WidenVMOVS || !MI->isCopy() || Subtarget.isCortexA15())
|
||||
if (!WidenVMOVS || !MI->isCopy() || Subtarget.isCortexA15() ||
|
||||
Subtarget.isFPOnlySP())
|
||||
return false;
|
||||
|
||||
// Look for a copy between even S-registers. That is where we keep floats
|
||||
|
@ -177,8 +177,9 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
|
||||
CCValAssign::LocInfo &LocInfo,
|
||||
ISD::ArgFlagsTy &ArgFlags, CCState &State) {
|
||||
SmallVectorImpl<CCValAssign> &PendingHAMembers = State.getPendingLocs();
|
||||
|
||||
// AAPCS HFAs must have 1-4 elements, all of the same type
|
||||
assert(PendingHAMembers.size() < 8);
|
||||
assert(PendingHAMembers.size() < 4);
|
||||
if (PendingHAMembers.size() > 0)
|
||||
assert(PendingHAMembers[0].getLocVT() == LocVT);
|
||||
|
||||
@ -188,7 +189,7 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
|
||||
CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
|
||||
|
||||
if (ArgFlags.isInConsecutiveRegsLast()) {
|
||||
assert(PendingHAMembers.size() > 0 && PendingHAMembers.size() <= 8 &&
|
||||
assert(PendingHAMembers.size() > 0 && PendingHAMembers.size() <= 4 &&
|
||||
"Homogeneous aggregates must have between 1 and 4 members");
|
||||
|
||||
// Try to allocate a contiguous block of registers, each of the correct
|
||||
@ -196,7 +197,6 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
|
||||
const uint16_t *RegList;
|
||||
unsigned NumRegs;
|
||||
switch (LocVT.SimpleTy) {
|
||||
case MVT::i32:
|
||||
case MVT::f32:
|
||||
RegList = SRegList;
|
||||
NumRegs = 16;
|
||||
@ -235,20 +235,11 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
|
||||
State.AllocateReg(SRegList[regNo]);
|
||||
|
||||
unsigned Size = LocVT.getSizeInBits() / 8;
|
||||
unsigned Align = Size;
|
||||
|
||||
if (LocVT.SimpleTy == MVT::v2f64 || LocVT.SimpleTy == MVT::i32) {
|
||||
// Vectors are always aligned to 8 bytes. If we've seen an i32 here
|
||||
// it's because it's been split from a larger type, also with align 8.
|
||||
Align = 8;
|
||||
}
|
||||
unsigned Align = std::min(Size, 8U);
|
||||
|
||||
for (auto It : PendingHAMembers) {
|
||||
It.convertToMem(State.AllocateStack(Size, Align));
|
||||
State.addLoc(It);
|
||||
|
||||
// Only the first member needs to be aligned.
|
||||
Align = 1;
|
||||
}
|
||||
|
||||
// All pending members have now been allocated
|
||||
|
@ -445,8 +445,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
|
||||
if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
|
||||
!Subtarget->isThumb1Only()) {
|
||||
addRegisterClass(MVT::f32, &ARM::SPRRegClass);
|
||||
if (!Subtarget->isFPOnlySP())
|
||||
addRegisterClass(MVT::f64, &ARM::DPRRegClass);
|
||||
addRegisterClass(MVT::f64, &ARM::DPRRegClass);
|
||||
}
|
||||
|
||||
for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
|
||||
@ -628,6 +627,39 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
|
||||
if (!Subtarget->isThumb1Only())
|
||||
setTargetDAGCombine(ISD::ADDC);
|
||||
|
||||
if (Subtarget->isFPOnlySP()) {
|
||||
// When targetting a floating-point unit with only single-precision
|
||||
// operations, f64 is legal for the few double-precision instructions which
|
||||
// are present However, no double-precision operations other than moves,
|
||||
// loads and stores are provided by the hardware.
|
||||
setOperationAction(ISD::FADD, MVT::f64, Expand);
|
||||
setOperationAction(ISD::FSUB, MVT::f64, Expand);
|
||||
setOperationAction(ISD::FMUL, MVT::f64, Expand);
|
||||
setOperationAction(ISD::FMA, MVT::f64, Expand);
|
||||
setOperationAction(ISD::FDIV, MVT::f64, Expand);
|
||||
setOperationAction(ISD::FREM, MVT::f64, Expand);
|
||||
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
|
||||
setOperationAction(ISD::FGETSIGN, MVT::f64, Expand);
|
||||
setOperationAction(ISD::FNEG, MVT::f64, Expand);
|
||||
setOperationAction(ISD::FABS, MVT::f64, Expand);
|
||||
setOperationAction(ISD::FSQRT, MVT::f64, Expand);
|
||||
setOperationAction(ISD::FSIN, MVT::f64, Expand);
|
||||
setOperationAction(ISD::FCOS, MVT::f64, Expand);
|
||||
setOperationAction(ISD::FPOWI, MVT::f64, Expand);
|
||||
setOperationAction(ISD::FPOW, MVT::f64, Expand);
|
||||
setOperationAction(ISD::FLOG, MVT::f64, Expand);
|
||||
setOperationAction(ISD::FLOG2, MVT::f64, Expand);
|
||||
setOperationAction(ISD::FLOG10, MVT::f64, Expand);
|
||||
setOperationAction(ISD::FEXP, MVT::f64, Expand);
|
||||
setOperationAction(ISD::FEXP2, MVT::f64, Expand);
|
||||
setOperationAction(ISD::FCEIL, MVT::f64, Expand);
|
||||
setOperationAction(ISD::FTRUNC, MVT::f64, Expand);
|
||||
setOperationAction(ISD::FRINT, MVT::f64, Expand);
|
||||
setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
|
||||
setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
|
||||
setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
|
||||
setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
|
||||
}
|
||||
|
||||
computeRegisterProperties();
|
||||
|
||||
@ -3276,6 +3308,7 @@ ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
|
||||
SDValue
|
||||
ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
|
||||
SDLoc dl) const {
|
||||
assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64);
|
||||
SDValue Cmp;
|
||||
if (!isFloatingPointZero(RHS))
|
||||
Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS);
|
||||
@ -3391,9 +3424,8 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
|
||||
EVT VT = Op.getValueType();
|
||||
|
||||
return DAG.getNode(ARMISD::CMOV, SDLoc(Op), VT, SelectTrue, SelectFalse,
|
||||
ARMcc, CCR, OverflowCmp);
|
||||
|
||||
return getCMOV(SDLoc(Op), VT, SelectTrue, SelectFalse, ARMcc, CCR,
|
||||
OverflowCmp, DAG);
|
||||
}
|
||||
|
||||
// Convert:
|
||||
@ -3427,7 +3459,7 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDValue CCR = Cond.getOperand(3);
|
||||
SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
|
||||
assert(True.getValueType() == VT);
|
||||
return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp);
|
||||
return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -3497,6 +3529,32 @@ static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
|
||||
}
|
||||
}
|
||||
|
||||
SDValue ARMTargetLowering::getCMOV(SDLoc dl, EVT VT, SDValue FalseVal,
|
||||
SDValue TrueVal, SDValue ARMcc, SDValue CCR,
|
||||
SDValue Cmp, SelectionDAG &DAG) const {
|
||||
if (Subtarget->isFPOnlySP() && VT == MVT::f64) {
|
||||
FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl,
|
||||
DAG.getVTList(MVT::i32, MVT::i32), FalseVal);
|
||||
TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl,
|
||||
DAG.getVTList(MVT::i32, MVT::i32), TrueVal);
|
||||
|
||||
SDValue TrueLow = TrueVal.getValue(0);
|
||||
SDValue TrueHigh = TrueVal.getValue(1);
|
||||
SDValue FalseLow = FalseVal.getValue(0);
|
||||
SDValue FalseHigh = FalseVal.getValue(1);
|
||||
|
||||
SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow,
|
||||
ARMcc, CCR, Cmp);
|
||||
SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh,
|
||||
ARMcc, CCR, duplicateCmp(Cmp, DAG));
|
||||
|
||||
return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High);
|
||||
} else {
|
||||
return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
|
||||
Cmp);
|
||||
}
|
||||
}
|
||||
|
||||
SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
|
||||
EVT VT = Op.getValueType();
|
||||
SDValue LHS = Op.getOperand(0);
|
||||
@ -3506,6 +3564,18 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDValue FalseVal = Op.getOperand(3);
|
||||
SDLoc dl(Op);
|
||||
|
||||
if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
|
||||
DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
|
||||
dl);
|
||||
|
||||
// If softenSetCCOperands only returned one value, we should compare it to
|
||||
// zero.
|
||||
if (!RHS.getNode()) {
|
||||
RHS = DAG.getConstant(0, LHS.getValueType());
|
||||
CC = ISD::SETNE;
|
||||
}
|
||||
}
|
||||
|
||||
if (LHS.getValueType() == MVT::i32) {
|
||||
// Try to generate VSEL on ARMv8.
|
||||
// The VSEL instruction can't use all the usual ARM condition
|
||||
@ -3530,8 +3600,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDValue ARMcc;
|
||||
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
|
||||
SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
|
||||
return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
|
||||
Cmp);
|
||||
return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
|
||||
}
|
||||
|
||||
ARMCC::CondCodes CondCode, CondCode2;
|
||||
@ -3570,14 +3639,12 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
|
||||
SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
|
||||
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
|
||||
SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
|
||||
ARMcc, CCR, Cmp);
|
||||
SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
|
||||
if (CondCode2 != ARMCC::AL) {
|
||||
SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32);
|
||||
// FIXME: Needs another CMP because flag can have but one use.
|
||||
SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
|
||||
Result = DAG.getNode(ARMISD::CMOV, dl, VT,
|
||||
Result, TrueVal, ARMcc2, CCR, Cmp2);
|
||||
Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG);
|
||||
}
|
||||
return Result;
|
||||
}
|
||||
@ -3710,6 +3777,18 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDValue Dest = Op.getOperand(4);
|
||||
SDLoc dl(Op);
|
||||
|
||||
if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
|
||||
DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
|
||||
dl);
|
||||
|
||||
// If softenSetCCOperands only returned one value, we should compare it to
|
||||
// zero.
|
||||
if (!RHS.getNode()) {
|
||||
RHS = DAG.getConstant(0, LHS.getValueType());
|
||||
CC = ISD::SETNE;
|
||||
}
|
||||
}
|
||||
|
||||
if (LHS.getValueType() == MVT::i32) {
|
||||
SDValue ARMcc;
|
||||
SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
|
||||
@ -3802,11 +3881,23 @@ static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
|
||||
return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
|
||||
}
|
||||
|
||||
static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
|
||||
SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
|
||||
EVT VT = Op.getValueType();
|
||||
if (VT.isVector())
|
||||
return LowerVectorFP_TO_INT(Op, DAG);
|
||||
|
||||
if (Subtarget->isFPOnlySP() && Op.getOperand(0).getValueType() == MVT::f64) {
|
||||
RTLIB::Libcall LC;
|
||||
if (Op.getOpcode() == ISD::FP_TO_SINT)
|
||||
LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(),
|
||||
Op.getValueType());
|
||||
else
|
||||
LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(),
|
||||
Op.getValueType());
|
||||
return makeLibCall(DAG, LC, Op.getValueType(), &Op.getOperand(0), 1,
|
||||
/*isSigned*/ false, SDLoc(Op)).first;
|
||||
}
|
||||
|
||||
SDLoc dl(Op);
|
||||
unsigned Opc;
|
||||
|
||||
@ -3856,11 +3947,23 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
|
||||
return DAG.getNode(Opc, dl, VT, Op);
|
||||
}
|
||||
|
||||
static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
|
||||
SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
|
||||
EVT VT = Op.getValueType();
|
||||
if (VT.isVector())
|
||||
return LowerVectorINT_TO_FP(Op, DAG);
|
||||
|
||||
if (Subtarget->isFPOnlySP() && Op.getValueType() == MVT::f64) {
|
||||
RTLIB::Libcall LC;
|
||||
if (Op.getOpcode() == ISD::SINT_TO_FP)
|
||||
LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(),
|
||||
Op.getValueType());
|
||||
else
|
||||
LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(),
|
||||
Op.getValueType());
|
||||
return makeLibCall(DAG, LC, Op.getValueType(), &Op.getOperand(0), 1,
|
||||
/*isSigned*/ false, SDLoc(Op)).first;
|
||||
}
|
||||
|
||||
SDLoc dl(Op);
|
||||
unsigned Opc;
|
||||
|
||||
@ -4369,7 +4472,7 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
|
||||
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
|
||||
SDLoc dl(Op);
|
||||
|
||||
if (Op.getOperand(1).getValueType().isFloatingPoint()) {
|
||||
if (Op1.getValueType().isFloatingPoint()) {
|
||||
switch (SetCCOpcode) {
|
||||
default: llvm_unreachable("Illegal FP comparison");
|
||||
case ISD::SETUNE:
|
||||
@ -4633,6 +4736,11 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
|
||||
bool IsDouble = Op.getValueType() == MVT::f64;
|
||||
ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
|
||||
|
||||
// Use the default (constant pool) lowering for double constants when we have
|
||||
// an SP-only FPU
|
||||
if (IsDouble && Subtarget->isFPOnlySP())
|
||||
return SDValue();
|
||||
|
||||
// Try splatting with a VMOV.f32...
|
||||
APFloat FPVal = CFP->getValueAPF();
|
||||
int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
|
||||
@ -6336,6 +6444,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||
if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
|
||||
return LowerDYNAMIC_STACKALLOC(Op, DAG);
|
||||
llvm_unreachable("Don't know how to custom lower this!");
|
||||
case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
|
||||
case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
|
||||
}
|
||||
}
|
||||
|
||||
@ -8479,10 +8589,11 @@ static SDValue PerformBFICombine(SDNode *N,
|
||||
/// PerformVMOVRRDCombine - Target-specific dag combine xforms for
|
||||
/// ARMISD::VMOVRRD.
|
||||
static SDValue PerformVMOVRRDCombine(SDNode *N,
|
||||
TargetLowering::DAGCombinerInfo &DCI) {
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
const ARMSubtarget *Subtarget) {
|
||||
// vmovrrd(vmovdrr x, y) -> x,y
|
||||
SDValue InDouble = N->getOperand(0);
|
||||
if (InDouble.getOpcode() == ARMISD::VMOVDRR)
|
||||
if (InDouble.getOpcode() == ARMISD::VMOVDRR && !Subtarget->isFPOnlySP())
|
||||
return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
|
||||
|
||||
// vmovrrd(load f64) -> (load i32), (load i32)
|
||||
@ -8695,7 +8806,8 @@ static bool hasNormalLoadOperand(SDNode *N) {
|
||||
/// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for
|
||||
/// ISD::BUILD_VECTOR.
|
||||
static SDValue PerformBUILD_VECTORCombine(SDNode *N,
|
||||
TargetLowering::DAGCombinerInfo &DCI){
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
const ARMSubtarget *Subtarget) {
|
||||
// build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):
|
||||
// VMOVRRD is introduced when legalizing i64 types. It forces the i64 value
|
||||
// into a pair of GPRs, which is fine when the value is used as a scalar,
|
||||
@ -9710,10 +9822,10 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
|
||||
case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget);
|
||||
case ISD::AND: return PerformANDCombine(N, DCI, Subtarget);
|
||||
case ARMISD::BFI: return PerformBFICombine(N, DCI);
|
||||
case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
|
||||
case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget);
|
||||
case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
|
||||
case ISD::STORE: return PerformSTORECombine(N, DCI);
|
||||
case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI);
|
||||
case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget);
|
||||
case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI);
|
||||
case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
|
||||
case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
|
||||
@ -10703,6 +10815,31 @@ ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
|
||||
return DAG.getMergeValues(Ops, DL);
|
||||
}
|
||||
|
||||
SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
|
||||
assert(Op.getValueType() == MVT::f64 && Subtarget->isFPOnlySP() &&
|
||||
"Unexpected type for custom-lowering FP_EXTEND");
|
||||
|
||||
RTLIB::Libcall LC;
|
||||
LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType());
|
||||
|
||||
SDValue SrcVal = Op.getOperand(0);
|
||||
return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1,
|
||||
/*isSigned*/ false, SDLoc(Op)).first;
|
||||
}
|
||||
|
||||
SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
|
||||
assert(Op.getOperand(0).getValueType() == MVT::f64 &&
|
||||
Subtarget->isFPOnlySP() &&
|
||||
"Unexpected type for custom-lowering FP_ROUND");
|
||||
|
||||
RTLIB::Libcall LC;
|
||||
LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType());
|
||||
|
||||
SDValue SrcVal = Op.getOperand(0);
|
||||
return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1,
|
||||
/*isSigned*/ false, SDLoc(Op)).first;
|
||||
}
|
||||
|
||||
bool
|
||||
ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
|
||||
// The ARM target isn't yet aware of offsets.
|
||||
@ -10730,7 +10867,7 @@ bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
|
||||
return false;
|
||||
if (VT == MVT::f32)
|
||||
return ARM_AM::getFP32Imm(Imm) != -1;
|
||||
if (VT == MVT::f64)
|
||||
if (VT == MVT::f64 && !Subtarget->isFPOnlySP())
|
||||
return ARM_AM::getFP64Imm(Imm) != -1;
|
||||
return false;
|
||||
}
|
||||
|
@ -476,6 +476,10 @@ namespace llvm {
|
||||
SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
unsigned getRegisterByName(const char* RegName, EVT VT) const override;
|
||||
|
||||
@ -565,6 +569,9 @@ namespace llvm {
|
||||
|
||||
bool mayBeEmittedAsTailCall(CallInst *CI) const override;
|
||||
|
||||
SDValue getCMOV(SDLoc dl, EVT VT, SDValue FalseVal, SDValue TrueVal,
|
||||
SDValue ARMcc, SDValue CCR, SDValue Cmp,
|
||||
SelectionDAG &DAG) const;
|
||||
SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
|
||||
SDValue &ARMcc, SelectionDAG &DAG, SDLoc dl) const;
|
||||
SDValue getVFPCmp(SDValue LHS, SDValue RHS,
|
||||
|
@ -515,6 +515,8 @@ def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0,
|
||||
let Inst{5} = Sm{0};
|
||||
let Inst{15-12} = Dd{3-0};
|
||||
let Inst{22} = Dd{4};
|
||||
|
||||
let Predicates = [HasVFP2, HasDPVFP];
|
||||
}
|
||||
|
||||
// Special case encoding: bits 11-8 is 0b1011.
|
||||
|
@ -54,12 +54,11 @@ define arm_aapcs_vfpcc void @test_1double({ double } %a) {
|
||||
; CHECK: bl test_1double
|
||||
|
||||
; CHECK-M4F-LABEL: test_1double:
|
||||
; CHECK-M4F: movs [[ONEHI:r[0-9]+]], #0
|
||||
; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0
|
||||
; CHECK-M4F: movt [[ONEHI]], #16368
|
||||
; CHECK-M4F-DAG: vmov s0, [[ONELO]]
|
||||
; CHECK-M4F-DAG: vmov s1, [[ONEHI]]
|
||||
; CHECK-M4F: vldr d0, [[CP_LABEL:.*]]
|
||||
; CHECK-M4F: bl test_1double
|
||||
; CHECK-M4F: [[CP_LABEL]]
|
||||
; CHECK-M4F-NEXT: .long 0
|
||||
; CHECK-M4F-NEXT: .long 1072693248
|
||||
|
||||
call arm_aapcs_vfpcc void @test_1double({ double } { double 1.0 })
|
||||
ret void
|
||||
@ -76,11 +75,10 @@ define arm_aapcs_vfpcc void @test_1double_nosplit([4 x float], [4 x double], [3
|
||||
; CHECK: bl test_1double_nosplit
|
||||
|
||||
; CHECK-M4F-LABEL: test_1double_nosplit:
|
||||
; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0
|
||||
; CHECK-M4F: movs [[ONEHI:r[0-9]+]], #0
|
||||
; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0
|
||||
; CHECK-M4F: movt [[ONEHI]], #16368
|
||||
; CHECK-M4F-DAG: str [[ONELO]], [sp]
|
||||
; CHECK-M4F-DAG: str [[ONEHI]], [sp, #4]
|
||||
; CHECK-M4F: strd [[ONELO]], [[ONEHI]], [sp]
|
||||
; CHECK-M4F: bl test_1double_nosplit
|
||||
call arm_aapcs_vfpcc void @test_1double_nosplit([4 x float] undef, [4 x double] undef, [3 x float] undef, double 1.0)
|
||||
ret void
|
||||
@ -98,11 +96,10 @@ define arm_aapcs_vfpcc void @test_1double_misaligned([4 x double], [4 x double],
|
||||
; CHECK-DAG: strd [[ONELO]], [[ONEHI]], [sp, #8]
|
||||
|
||||
; CHECK-M4F-LABEL: test_1double_misaligned:
|
||||
; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0
|
||||
; CHECK-M4F: movs [[ONEHI:r[0-9]+]], #0
|
||||
; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0
|
||||
; CHECK-M4F: movt [[ONEHI]], #16368
|
||||
; CHECK-M4F-DAG: str [[ONELO]], [sp, #8]
|
||||
; CHECK-M4F-DAG: str [[ONEHI]], [sp, #12]
|
||||
; CHECK-M4F: strd [[ONELO]], [[ONEHI]], [sp, #8]
|
||||
; CHECK-M4F: bl test_1double_misaligned
|
||||
|
||||
ret void
|
||||
|
@ -20,5 +20,5 @@ define double @double_op(double %lhs, double %rhs) {
|
||||
; CHECK-M3: bl ___adddf3
|
||||
|
||||
; CHECK-M4-LABEL: double_op:
|
||||
; CHECK-M4: bl ___adddf3
|
||||
; CHECK-M4: {{(blx|b.w)}} ___adddf3
|
||||
}
|
||||
|
50
test/CodeGen/Thumb2/aapcs.ll
Normal file
50
test/CodeGen/Thumb2/aapcs.ll
Normal file
@ -0,0 +1,50 @@
|
||||
; RUN: llc < %s -mtriple=thumbv7-none-eabi -mcpu=cortex-m4 -mattr=-vfp2 | FileCheck %s -check-prefix=CHECK -check-prefix=SOFT
|
||||
; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m4 -mattr=+vfp4,+fp-only-sp | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=SP
|
||||
; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a8 -mattr=+vfp3 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP
|
||||
|
||||
define float @float_in_reg(float %a, float %b) {
|
||||
entry:
|
||||
; CHECK-LABEL: float_in_reg:
|
||||
; SOFT: mov r0, r1
|
||||
; HARD: vmov.f32 s0, s1
|
||||
; CHECK-NEXT: bx lr
|
||||
ret float %b
|
||||
}
|
||||
|
||||
define double @double_in_reg(double %a, double %b) {
|
||||
entry:
|
||||
; CHECK-LABEL: double_in_reg:
|
||||
; SOFT: mov r0, r2
|
||||
; SOFT: mov r1, r3
|
||||
; SP: vmov.f32 s0, s2
|
||||
; SP: vmov.f32 s1, s3
|
||||
; DP: vmov.f64 d0, d1
|
||||
; CHECK-NEXT: bx lr
|
||||
ret double %b
|
||||
}
|
||||
|
||||
define float @float_on_stack(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, float %i) {
|
||||
; CHECK-LABEL: float_on_stack:
|
||||
; SOFT: ldr r0, [sp, #48]
|
||||
; HARD: vldr s0, [sp]
|
||||
; CHECK-NEXT: bx lr
|
||||
ret float %i
|
||||
}
|
||||
|
||||
define double @double_on_stack(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i) {
|
||||
; CHECK-LABEL: double_on_stack:
|
||||
; SOFT: ldr r0, [sp, #48]
|
||||
; SOFT: ldr r1, [sp, #52]
|
||||
; HARD: vldr d0, [sp]
|
||||
; CHECK-NEXT: bx lr
|
||||
ret double %i
|
||||
}
|
||||
|
||||
define double @double_not_split(double %a, double %b, double %c, double %d, double %e, double %f, double %g, float %h, double %i) {
|
||||
; CHECK-LABEL: double_not_split:
|
||||
; SOFT: ldr r0, [sp, #48]
|
||||
; SOFT: ldr r1, [sp, #52]
|
||||
; HARD: vldr d0, [sp]
|
||||
; CHECK-NEXT: bx lr
|
||||
ret double %i
|
||||
}
|
@ -18,7 +18,7 @@ entry:
|
||||
; CHECK-LABEL: bar:
|
||||
%0 = fmul double %a, %b
|
||||
; CORTEXM3: bl ___muldf3
|
||||
; CORTEXM4: bl ___muldf3
|
||||
; CORTEXM4: {{bl|b.w}} ___muldf3
|
||||
; CORTEXA8: vmul.f64 d
|
||||
ret double %0
|
||||
}
|
||||
|
300
test/CodeGen/Thumb2/float-cmp.ll
Normal file
300
test/CodeGen/Thumb2/float-cmp.ll
Normal file
@ -0,0 +1,300 @@
|
||||
; RUN: llc < %s -mtriple=thumbv7-none-eabi -mcpu=cortex-m3 | FileCheck %s -check-prefix=CHECK -check-prefix=NONE
|
||||
; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m4 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=SP
|
||||
; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a8 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP
|
||||
|
||||
|
||||
|
||||
define i1 @cmp_f_false(float %a, float %b) {
|
||||
; CHECK-LABEL: cmp_f_false:
|
||||
; NONE: movs r0, #0
|
||||
; HARD: movs r0, #0
|
||||
%1 = fcmp false float %a, %b
|
||||
ret i1 %1
|
||||
}
|
||||
define i1 @cmp_f_oeq(float %a, float %b) {
|
||||
; CHECK-LABEL: cmp_f_oeq:
|
||||
; NONE: bl __aeabi_fcmpeq
|
||||
; HARD: vcmpe.f32
|
||||
; HARD: moveq r0, #1
|
||||
%1 = fcmp oeq float %a, %b
|
||||
ret i1 %1
|
||||
}
|
||||
define i1 @cmp_f_ogt(float %a, float %b) {
|
||||
; CHECK-LABEL: cmp_f_ogt:
|
||||
; NONE: bl __aeabi_fcmpgt
|
||||
; HARD: vcmpe.f32
|
||||
; HARD: movgt r0, #1
|
||||
%1 = fcmp ogt float %a, %b
|
||||
ret i1 %1
|
||||
}
|
||||
define i1 @cmp_f_oge(float %a, float %b) {
|
||||
; CHECK-LABEL: cmp_f_oge:
|
||||
; NONE: bl __aeabi_fcmpge
|
||||
; HARD: vcmpe.f32
|
||||
; HARD: movge r0, #1
|
||||
%1 = fcmp oge float %a, %b
|
||||
ret i1 %1
|
||||
}
|
||||
define i1 @cmp_f_olt(float %a, float %b) {
|
||||
; CHECK-LABEL: cmp_f_olt:
|
||||
; NONE: bl __aeabi_fcmplt
|
||||
; HARD: vcmpe.f32
|
||||
; HARD: movmi r0, #1
|
||||
%1 = fcmp olt float %a, %b
|
||||
ret i1 %1
|
||||
}
|
||||
define i1 @cmp_f_ole(float %a, float %b) {
|
||||
; CHECK-LABEL: cmp_f_ole:
|
||||
; NONE: bl __aeabi_fcmple
|
||||
; HARD: vcmpe.f32
|
||||
; HARD: movls r0, #1
|
||||
%1 = fcmp ole float %a, %b
|
||||
ret i1 %1
|
||||
}
|
||||
define i1 @cmp_f_one(float %a, float %b) {
|
||||
; CHECK-LABEL: cmp_f_one:
|
||||
; NONE: bl __aeabi_fcmpgt
|
||||
; NONE: bl __aeabi_fcmplt
|
||||
; HARD: vcmpe.f32
|
||||
; HARD: movmi r0, #1
|
||||
; HARD: movgt r0, #1
|
||||
%1 = fcmp one float %a, %b
|
||||
ret i1 %1
|
||||
}
|
||||
define i1 @cmp_f_ord(float %a, float %b) {
|
||||
; CHECK-LABEL: cmp_f_ord:
|
||||
; NONE: bl __aeabi_fcmpun
|
||||
; HARD: vcmpe.f32
|
||||
; HARD: movvc r0, #1
|
||||
%1 = fcmp ord float %a, %b
|
||||
ret i1 %1
|
||||
}define i1 @cmp_f_ueq(float %a, float %b) {
|
||||
; CHECK-LABEL: cmp_f_ueq:
|
||||
; NONE: bl __aeabi_fcmpeq
|
||||
; NONE: bl __aeabi_fcmpun
|
||||
; HARD: vcmpe.f32
|
||||
; HARD: moveq r0, #1
|
||||
; HARD: movvs r0, #1
|
||||
%1 = fcmp ueq float %a, %b
|
||||
ret i1 %1
|
||||
}
|
||||
define i1 @cmp_f_ugt(float %a, float %b) {
|
||||
; CHECK-LABEL: cmp_f_ugt:
|
||||
; NONE: bl __aeabi_fcmpgt
|
||||
; NONE: bl __aeabi_fcmpun
|
||||
; HARD: vcmpe.f32
|
||||
; HARD: movhi r0, #1
|
||||
%1 = fcmp ugt float %a, %b
|
||||
ret i1 %1
|
||||
}
|
||||
define i1 @cmp_f_uge(float %a, float %b) {
|
||||
; CHECK-LABEL: cmp_f_uge:
|
||||
; NONE: bl __aeabi_fcmpge
|
||||
; NONE: bl __aeabi_fcmpun
|
||||
; HARD: vcmpe.f32
|
||||
; HARD: movpl r0, #1
|
||||
%1 = fcmp uge float %a, %b
|
||||
ret i1 %1
|
||||
}
|
||||
define i1 @cmp_f_ult(float %a, float %b) {
|
||||
; CHECK-LABEL: cmp_f_ult:
|
||||
; NONE: bl __aeabi_fcmplt
|
||||
; NONE: bl __aeabi_fcmpun
|
||||
; HARD: vcmpe.f32
|
||||
; HARD: movlt r0, #1
|
||||
%1 = fcmp ult float %a, %b
|
||||
ret i1 %1
|
||||
}
|
||||
define i1 @cmp_f_ule(float %a, float %b) {
|
||||
; CHECK-LABEL: cmp_f_ule:
|
||||
; NONE: bl __aeabi_fcmple
|
||||
; NONE: bl __aeabi_fcmpun
|
||||
; HARD: vcmpe.f32
|
||||
; HARD: movle r0, #1
|
||||
%1 = fcmp ule float %a, %b
|
||||
ret i1 %1
|
||||
}
|
||||
define i1 @cmp_f_une(float %a, float %b) {
|
||||
; CHECK-LABEL: cmp_f_une:
|
||||
; NONE: bl __aeabi_fcmpeq
|
||||
; HARD: vcmpe.f32
|
||||
; HARD: movne r0, #1
|
||||
%1 = fcmp une float %a, %b
|
||||
ret i1 %1
|
||||
}
|
||||
define i1 @cmp_f_uno(float %a, float %b) {
|
||||
; CHECK-LABEL: cmp_f_uno:
|
||||
; NONE: bl __aeabi_fcmpun
|
||||
; HARD: vcmpe.f32
|
||||
; HARD: movvs r0, #1
|
||||
%1 = fcmp uno float %a, %b
|
||||
ret i1 %1
|
||||
}
|
||||
define i1 @cmp_f_true(float %a, float %b) {
|
||||
; CHECK-LABEL: cmp_f_true:
|
||||
; NONE: movs r0, #1
|
||||
; HARD: movs r0, #1
|
||||
%1 = fcmp true float %a, %b
|
||||
ret i1 %1
|
||||
}
|
||||
|
||||
define i1 @cmp_d_false(double %a, double %b) {
|
||||
; CHECK-LABEL: cmp_d_false:
|
||||
; NONE: movs r0, #0
|
||||
; HARD: movs r0, #0
|
||||
%1 = fcmp false double %a, %b
|
||||
ret i1 %1
|
||||
}
|
||||
define i1 @cmp_d_oeq(double %a, double %b) {
|
||||
; CHECK-LABEL: cmp_d_oeq:
|
||||
; NONE: bl __aeabi_dcmpeq
|
||||
; SP: bl __aeabi_dcmpeq
|
||||
; DP: vcmpe.f64
|
||||
; DP: moveq r0, #1
|
||||
%1 = fcmp oeq double %a, %b
|
||||
ret i1 %1
|
||||
}
|
||||
define i1 @cmp_d_ogt(double %a, double %b) {
|
||||
; CHECK-LABEL: cmp_d_ogt:
|
||||
; NONE: bl __aeabi_dcmpgt
|
||||
; SP: bl __aeabi_dcmpgt
|
||||
; DP: vcmpe.f64
|
||||
; DP: movgt r0, #1
|
||||
%1 = fcmp ogt double %a, %b
|
||||
ret i1 %1
|
||||
}
|
||||
define i1 @cmp_d_oge(double %a, double %b) {
|
||||
; CHECK-LABEL: cmp_d_oge:
|
||||
; NONE: bl __aeabi_dcmpge
|
||||
; SP: bl __aeabi_dcmpge
|
||||
; DP: vcmpe.f64
|
||||
; DP: movge r0, #1
|
||||
%1 = fcmp oge double %a, %b
|
||||
ret i1 %1
|
||||
}
|
||||
define i1 @cmp_d_olt(double %a, double %b) {
|
||||
; CHECK-LABEL: cmp_d_olt:
|
||||
; NONE: bl __aeabi_dcmplt
|
||||
; SP: bl __aeabi_dcmplt
|
||||
; DP: vcmpe.f64
|
||||
; DP: movmi r0, #1
|
||||
%1 = fcmp olt double %a, %b
|
||||
ret i1 %1
|
||||
}
|
||||
define i1 @cmp_d_ole(double %a, double %b) {
|
||||
; CHECK-LABEL: cmp_d_ole:
|
||||
; NONE: bl __aeabi_dcmple
|
||||
; SP: bl __aeabi_dcmple
|
||||
; DP: vcmpe.f64
|
||||
; DP: movls r0, #1
|
||||
%1 = fcmp ole double %a, %b
|
||||
ret i1 %1
|
||||
}
|
||||
define i1 @cmp_d_one(double %a, double %b) {
|
||||
; CHECK-LABEL: cmp_d_one:
|
||||
; NONE: bl __aeabi_dcmpgt
|
||||
; NONE: bl __aeabi_dcmplt
|
||||
; SP: bl __aeabi_dcmpgt
|
||||
; SP: bl __aeabi_dcmplt
|
||||
; DP: vcmpe.f64
|
||||
; DP: movmi r0, #1
|
||||
; DP: movgt r0, #1
|
||||
%1 = fcmp one double %a, %b
|
||||
ret i1 %1
|
||||
}
|
||||
define i1 @cmp_d_ord(double %a, double %b) {
|
||||
; CHECK-LABEL: cmp_d_ord:
|
||||
; NONE: bl __aeabi_dcmpun
|
||||
; SP: bl __aeabi_dcmpun
|
||||
; DP: vcmpe.f64
|
||||
; DP: movvc r0, #1
|
||||
%1 = fcmp ord double %a, %b
|
||||
ret i1 %1
|
||||
}
|
||||
define i1 @cmp_d_ugt(double %a, double %b) {
|
||||
; CHECK-LABEL: cmp_d_ugt:
|
||||
; NONE: bl __aeabi_dcmpgt
|
||||
; NONE: bl __aeabi_dcmpun
|
||||
; SP: bl __aeabi_dcmpgt
|
||||
; SP: bl __aeabi_dcmpun
|
||||
; DP: vcmpe.f64
|
||||
; DP: movhi r0, #1
|
||||
%1 = fcmp ugt double %a, %b
|
||||
ret i1 %1
|
||||
}
|
||||
|
||||
define i1 @cmp_d_ult(double %a, double %b) {
|
||||
; CHECK-LABEL: cmp_d_ult:
|
||||
; NONE: bl __aeabi_dcmplt
|
||||
; NONE: bl __aeabi_dcmpun
|
||||
; SP: bl __aeabi_dcmplt
|
||||
; SP: bl __aeabi_dcmpun
|
||||
; DP: vcmpe.f64
|
||||
; DP: movlt r0, #1
|
||||
%1 = fcmp ult double %a, %b
|
||||
ret i1 %1
|
||||
}
|
||||
|
||||
|
||||
define i1 @cmp_d_uno(double %a, double %b) {
|
||||
; CHECK-LABEL: cmp_d_uno:
|
||||
; NONE: bl __aeabi_dcmpun
|
||||
; SP: bl __aeabi_dcmpun
|
||||
; DP: vcmpe.f64
|
||||
; DP: movvs r0, #1
|
||||
%1 = fcmp uno double %a, %b
|
||||
ret i1 %1
|
||||
}
|
||||
define i1 @cmp_d_true(double %a, double %b) {
|
||||
; CHECK-LABEL: cmp_d_true:
|
||||
; NONE: movs r0, #1
|
||||
; HARD: movs r0, #1
|
||||
%1 = fcmp true double %a, %b
|
||||
ret i1 %1
|
||||
}
|
||||
define i1 @cmp_d_ueq(double %a, double %b) {
|
||||
; CHECK-LABEL: cmp_d_ueq:
|
||||
; NONE: bl __aeabi_dcmpeq
|
||||
; NONE: bl __aeabi_dcmpun
|
||||
; SP: bl __aeabi_dcmpeq
|
||||
; SP: bl __aeabi_dcmpun
|
||||
; DP: vcmpe.f64
|
||||
; DP: moveq r0, #1
|
||||
; DP: movvs r0, #1
|
||||
%1 = fcmp ueq double %a, %b
|
||||
ret i1 %1
|
||||
}
|
||||
|
||||
define i1 @cmp_d_uge(double %a, double %b) {
|
||||
; CHECK-LABEL: cmp_d_uge:
|
||||
; NONE: bl __aeabi_dcmpge
|
||||
; NONE: bl __aeabi_dcmpun
|
||||
; SP: bl __aeabi_dcmpge
|
||||
; SP: bl __aeabi_dcmpun
|
||||
; DP: vcmpe.f64
|
||||
; DP: movpl r0, #1
|
||||
%1 = fcmp uge double %a, %b
|
||||
ret i1 %1
|
||||
}
|
||||
|
||||
define i1 @cmp_d_ule(double %a, double %b) {
|
||||
; CHECK-LABEL: cmp_d_ule:
|
||||
; NONE: bl __aeabi_dcmple
|
||||
; NONE: bl __aeabi_dcmpun
|
||||
; SP: bl __aeabi_dcmple
|
||||
; SP: bl __aeabi_dcmpun
|
||||
; DP: vcmpe.f64
|
||||
; DP: movle r0, #1
|
||||
%1 = fcmp ule double %a, %b
|
||||
ret i1 %1
|
||||
}
|
||||
|
||||
define i1 @cmp_d_une(double %a, double %b) {
|
||||
; CHECK-LABEL: cmp_d_une:
|
||||
; NONE: bl __aeabi_dcmpeq
|
||||
; SP: bl __aeabi_dcmpeq
|
||||
; DP: vcmpe.f64
|
||||
; DP: movne r0, #1
|
||||
%1 = fcmp une double %a, %b
|
||||
ret i1 %1
|
||||
}
|
214
test/CodeGen/Thumb2/float-intrinsics-double.ll
Normal file
214
test/CodeGen/Thumb2/float-intrinsics-double.ll
Normal file
@ -0,0 +1,214 @@
|
||||
; RUN: llc < %s -mtriple=thumbv7-none-eabi -mcpu=cortex-m3 | FileCheck %s -check-prefix=CHECK -check-prefix=SOFT -check-prefix=NONE
|
||||
; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m4 | FileCheck %s -check-prefix=CHECK -check-prefix=SOFT -check-prefix=SP
|
||||
; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a7 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP
|
||||
|
||||
declare double @llvm.sqrt.f64(double %Val)
|
||||
define double @sqrt_d(double %a) {
|
||||
; CHECK-LABEL: sqrt_d:
|
||||
; SOFT: {{(bl|b)}} sqrt
|
||||
; HARD: vsqrt.f64 d0, d0
|
||||
%1 = call double @llvm.sqrt.f64(double %a)
|
||||
ret double %1
|
||||
}
|
||||
|
||||
declare double @llvm.powi.f64(double %Val, i32 %power)
|
||||
define double @powi_d(double %a, i32 %b) {
|
||||
; CHECK-LABEL: powi_d:
|
||||
; SOFT: {{(bl|b)}} __powidf2
|
||||
; HARD: b __powidf2
|
||||
%1 = call double @llvm.powi.f64(double %a, i32 %b)
|
||||
ret double %1
|
||||
}
|
||||
|
||||
declare double @llvm.sin.f64(double %Val)
|
||||
define double @sin_d(double %a) {
|
||||
; CHECK-LABEL: sin_d:
|
||||
; SOFT: {{(bl|b)}} sin
|
||||
; HARD: b sin
|
||||
%1 = call double @llvm.sin.f64(double %a)
|
||||
ret double %1
|
||||
}
|
||||
|
||||
declare double @llvm.cos.f64(double %Val)
|
||||
define double @cos_d(double %a) {
|
||||
; CHECK-LABEL: cos_d:
|
||||
; SOFT: {{(bl|b)}} cos
|
||||
; HARD: b cos
|
||||
%1 = call double @llvm.cos.f64(double %a)
|
||||
ret double %1
|
||||
}
|
||||
|
||||
declare double @llvm.pow.f64(double %Val, double %power)
|
||||
define double @pow_d(double %a, double %b) {
|
||||
; CHECK-LABEL: pow_d:
|
||||
; SOFT: {{(bl|b)}} pow
|
||||
; HARD: b pow
|
||||
%1 = call double @llvm.pow.f64(double %a, double %b)
|
||||
ret double %1
|
||||
}
|
||||
|
||||
declare double @llvm.exp.f64(double %Val)
|
||||
define double @exp_d(double %a) {
|
||||
; CHECK-LABEL: exp_d:
|
||||
; SOFT: {{(bl|b)}} exp
|
||||
; HARD: b exp
|
||||
%1 = call double @llvm.exp.f64(double %a)
|
||||
ret double %1
|
||||
}
|
||||
|
||||
declare double @llvm.exp2.f64(double %Val)
|
||||
define double @exp2_d(double %a) {
|
||||
; CHECK-LABEL: exp2_d:
|
||||
; SOFT: {{(bl|b)}} exp2
|
||||
; HARD: b exp2
|
||||
%1 = call double @llvm.exp2.f64(double %a)
|
||||
ret double %1
|
||||
}
|
||||
|
||||
declare double @llvm.log.f64(double %Val)
|
||||
define double @log_d(double %a) {
|
||||
; CHECK-LABEL: log_d:
|
||||
; SOFT: {{(bl|b)}} log
|
||||
; HARD: b log
|
||||
%1 = call double @llvm.log.f64(double %a)
|
||||
ret double %1
|
||||
}
|
||||
|
||||
declare double @llvm.log10.f64(double %Val)
|
||||
define double @log10_d(double %a) {
|
||||
; CHECK-LABEL: log10_d:
|
||||
; SOFT: {{(bl|b)}} log10
|
||||
; HARD: b log10
|
||||
%1 = call double @llvm.log10.f64(double %a)
|
||||
ret double %1
|
||||
}
|
||||
|
||||
declare double @llvm.log2.f64(double %Val)
|
||||
define double @log2_d(double %a) {
|
||||
; CHECK-LABEL: log2_d:
|
||||
; SOFT: {{(bl|b)}} log2
|
||||
; HARD: b log2
|
||||
%1 = call double @llvm.log2.f64(double %a)
|
||||
ret double %1
|
||||
}
|
||||
|
||||
declare double @llvm.fma.f64(double %a, double %b, double %c)
|
||||
define double @fma_d(double %a, double %b, double %c) {
|
||||
; CHECK-LABEL: fma_d:
|
||||
; SOFT: {{(bl|b)}} fma
|
||||
; HARD: vfma.f64
|
||||
%1 = call double @llvm.fma.f64(double %a, double %b, double %c)
|
||||
ret double %1
|
||||
}
|
||||
|
||||
; FIXME: the FPv4-SP version is less efficient than the no-FPU version
|
||||
declare double @llvm.fabs.f64(double %Val)
|
||||
define double @abs_d(double %a) {
|
||||
; CHECK-LABEL: abs_d:
|
||||
; NONE: bic r1, r1, #-2147483648
|
||||
; SP: bl __aeabi_dcmpgt
|
||||
; SP: bl __aeabi_dcmpun
|
||||
; SP: bl __aeabi_dsub
|
||||
; DP: vabs.f64 d0, d0
|
||||
%1 = call double @llvm.fabs.f64(double %a)
|
||||
ret double %1
|
||||
}
|
||||
|
||||
declare double @llvm.copysign.f64(double %Mag, double %Sgn)
|
||||
define double @copysign_d(double %a, double %b) {
|
||||
; CHECK-LABEL: copysign_d:
|
||||
; SOFT: lsrs [[REG:r[0-9]+]], r3, #31
|
||||
; SOFT: bfi r1, [[REG]], #31, #1
|
||||
; HARD: vmov.i32 [[REG:d[0-9]+]], #0x80000000
|
||||
; HARD: vshl.i64 [[REG]], [[REG]], #32
|
||||
; HARD: vbsl [[REG]], d
|
||||
%1 = call double @llvm.copysign.f64(double %a, double %b)
|
||||
ret double %1
|
||||
}
|
||||
|
||||
declare double @llvm.floor.f64(double %Val)
|
||||
define double @floor_d(double %a) {
|
||||
; CHECK-LABEL: floor_d:
|
||||
; SOFT: {{(bl|b)}} floor
|
||||
; HARD: b floor
|
||||
%1 = call double @llvm.floor.f64(double %a)
|
||||
ret double %1
|
||||
}
|
||||
|
||||
declare double @llvm.ceil.f64(double %Val)
|
||||
define double @ceil_d(double %a) {
|
||||
; CHECK-LABEL: ceil_d:
|
||||
; SOFT: {{(bl|b)}} ceil
|
||||
; HARD: b ceil
|
||||
%1 = call double @llvm.ceil.f64(double %a)
|
||||
ret double %1
|
||||
}
|
||||
|
||||
declare double @llvm.trunc.f64(double %Val)
|
||||
define double @trunc_d(double %a) {
|
||||
; CHECK-LABEL: trunc_d:
|
||||
; SOFT: {{(bl|b)}} trunc
|
||||
; HARD: b trunc
|
||||
%1 = call double @llvm.trunc.f64(double %a)
|
||||
ret double %1
|
||||
}
|
||||
|
||||
declare double @llvm.rint.f64(double %Val)
|
||||
define double @rint_d(double %a) {
|
||||
; CHECK-LABEL: rint_d:
|
||||
; SOFT: {{(bl|b)}} rint
|
||||
; HARD: b rint
|
||||
%1 = call double @llvm.rint.f64(double %a)
|
||||
ret double %1
|
||||
}
|
||||
|
||||
declare double @llvm.nearbyint.f64(double %Val)
|
||||
define double @nearbyint_d(double %a) {
|
||||
; CHECK-LABEL: nearbyint_d:
|
||||
; SOFT: {{(bl|b)}} nearbyint
|
||||
; HARD: b nearbyint
|
||||
%1 = call double @llvm.nearbyint.f64(double %a)
|
||||
ret double %1
|
||||
}
|
||||
|
||||
declare double @llvm.round.f64(double %Val)
|
||||
define double @round_d(double %a) {
|
||||
; CHECK-LABEL: round_d:
|
||||
; SOFT: {{(bl|b)}} round
|
||||
; HARD: b round
|
||||
%1 = call double @llvm.round.f64(double %a)
|
||||
ret double %1
|
||||
}
|
||||
|
||||
declare double @llvm.fmuladd.f64(double %a, double %b, double %c)
|
||||
define double @fmuladd_d(double %a, double %b, double %c) {
|
||||
; CHECK-LABEL: fmuladd_d:
|
||||
; SOFT: bl __aeabi_dmul
|
||||
; SOFT: bl __aeabi_dadd
|
||||
; HARD: vmul.f64
|
||||
; HARD: vadd.f64
|
||||
%1 = call double @llvm.fmuladd.f64(double %a, double %b, double %c)
|
||||
ret double %1
|
||||
}
|
||||
|
||||
declare i16 @llvm.convert.to.fp16.f64(double %a)
|
||||
define i16 @d_to_h(double %a) {
|
||||
; CHECK-LABEL: d_to_h:
|
||||
; SOFT: bl __aeabi_d2h
|
||||
; HARD: bl __aeabi_d2h
|
||||
%1 = call i16 @llvm.convert.to.fp16.f64(double %a)
|
||||
ret i16 %1
|
||||
}
|
||||
|
||||
declare double @llvm.convert.from.fp16.f64(i16 %a)
|
||||
define double @h_to_d(i16 %a) {
|
||||
; CHECK-LABEL: h_to_d:
|
||||
; NONE: bl __gnu_h2f_ieee
|
||||
; NONE: bl __aeabi_f2d
|
||||
; SP: vcvtb.f32.f16
|
||||
; SP: bl __aeabi_f2d
|
||||
; DP: vcvtb.f32.f16
|
||||
; DP: vcvt.f64.f32
|
||||
%1 = call double @llvm.convert.from.fp16.f64(i16 %a)
|
||||
ret double %1
|
||||
}
|
210
test/CodeGen/Thumb2/float-intrinsics-float.ll
Normal file
210
test/CodeGen/Thumb2/float-intrinsics-float.ll
Normal file
@ -0,0 +1,210 @@
|
||||
; RUN: llc < %s -mtriple=thumbv7-none-eabi -mcpu=cortex-m3 | FileCheck %s -check-prefix=CHECK -check-prefix=SOFT -check-prefix=NONE
|
||||
; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m4 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=SP
|
||||
; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a7 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP
|
||||
|
||||
declare float @llvm.sqrt.f32(float %Val)
|
||||
define float @sqrt_f(float %a) {
|
||||
; CHECK-LABEL: sqrt_f:
|
||||
; SOFT: bl sqrtf
|
||||
; HARD: vsqrt.f32 s0, s0
|
||||
%1 = call float @llvm.sqrt.f32(float %a)
|
||||
ret float %1
|
||||
}
|
||||
|
||||
declare float @llvm.powi.f32(float %Val, i32 %power)
|
||||
define float @powi_f(float %a, i32 %b) {
|
||||
; CHECK-LABEL: powi_f:
|
||||
; SOFT: bl __powisf2
|
||||
; HARD: b __powisf2
|
||||
%1 = call float @llvm.powi.f32(float %a, i32 %b)
|
||||
ret float %1
|
||||
}
|
||||
|
||||
declare float @llvm.sin.f32(float %Val)
|
||||
define float @sin_f(float %a) {
|
||||
; CHECK-LABEL: sin_f:
|
||||
; SOFT: bl sinf
|
||||
; HARD: b sinf
|
||||
%1 = call float @llvm.sin.f32(float %a)
|
||||
ret float %1
|
||||
}
|
||||
|
||||
declare float @llvm.cos.f32(float %Val)
|
||||
define float @cos_f(float %a) {
|
||||
; CHECK-LABEL: cos_f:
|
||||
; SOFT: bl cosf
|
||||
; HARD: b cosf
|
||||
%1 = call float @llvm.cos.f32(float %a)
|
||||
ret float %1
|
||||
}
|
||||
|
||||
declare float @llvm.pow.f32(float %Val, float %power)
|
||||
define float @pow_f(float %a, float %b) {
|
||||
; CHECK-LABEL: pow_f:
|
||||
; SOFT: bl powf
|
||||
; HARD: b powf
|
||||
%1 = call float @llvm.pow.f32(float %a, float %b)
|
||||
ret float %1
|
||||
}
|
||||
|
||||
declare float @llvm.exp.f32(float %Val)
|
||||
define float @exp_f(float %a) {
|
||||
; CHECK-LABEL: exp_f:
|
||||
; SOFT: bl expf
|
||||
; HARD: b expf
|
||||
%1 = call float @llvm.exp.f32(float %a)
|
||||
ret float %1
|
||||
}
|
||||
|
||||
declare float @llvm.exp2.f32(float %Val)
|
||||
define float @exp2_f(float %a) {
|
||||
; CHECK-LABEL: exp2_f:
|
||||
; SOFT: bl exp2f
|
||||
; HARD: b exp2f
|
||||
%1 = call float @llvm.exp2.f32(float %a)
|
||||
ret float %1
|
||||
}
|
||||
|
||||
declare float @llvm.log.f32(float %Val)
|
||||
define float @log_f(float %a) {
|
||||
; CHECK-LABEL: log_f:
|
||||
; SOFT: bl logf
|
||||
; HARD: b logf
|
||||
%1 = call float @llvm.log.f32(float %a)
|
||||
ret float %1
|
||||
}
|
||||
|
||||
declare float @llvm.log10.f32(float %Val)
|
||||
define float @log10_f(float %a) {
|
||||
; CHECK-LABEL: log10_f:
|
||||
; SOFT: bl log10f
|
||||
; HARD: b log10f
|
||||
%1 = call float @llvm.log10.f32(float %a)
|
||||
ret float %1
|
||||
}
|
||||
|
||||
declare float @llvm.log2.f32(float %Val)
|
||||
define float @log2_f(float %a) {
|
||||
; CHECK-LABEL: log2_f:
|
||||
; SOFT: bl log2f
|
||||
; HARD: b log2f
|
||||
%1 = call float @llvm.log2.f32(float %a)
|
||||
ret float %1
|
||||
}
|
||||
|
||||
declare float @llvm.fma.f32(float %a, float %b, float %c)
|
||||
define float @fma_f(float %a, float %b, float %c) {
|
||||
; CHECK-LABEL: fma_f:
|
||||
; SOFT: bl fmaf
|
||||
; HARD: vfma.f32
|
||||
%1 = call float @llvm.fma.f32(float %a, float %b, float %c)
|
||||
ret float %1
|
||||
}
|
||||
|
||||
declare float @llvm.fabs.f32(float %Val)
|
||||
define float @abs_f(float %a) {
|
||||
; CHECK-LABEL: abs_f:
|
||||
; SOFT: bic r0, r0, #-2147483648
|
||||
; HARD: vabs.f32
|
||||
%1 = call float @llvm.fabs.f32(float %a)
|
||||
ret float %1
|
||||
}
|
||||
|
||||
declare float @llvm.copysign.f32(float %Mag, float %Sgn)
|
||||
define float @copysign_f(float %a, float %b) {
|
||||
; CHECK-LABEL: copysign_f:
|
||||
; NONE: lsrs [[REG:r[0-9]+]], r{{[0-9]+}}, #31
|
||||
; NONE: bfi r{{[0-9]+}}, [[REG]], #31, #1
|
||||
; SP: lsrs [[REG:r[0-9]+]], r{{[0-9]+}}, #31
|
||||
; SP: bfi r{{[0-9]+}}, [[REG]], #31, #1
|
||||
; DP: vmov.i32 [[REG:d[0-9]+]], #0x80000000
|
||||
; DP: vbsl [[REG]], d
|
||||
%1 = call float @llvm.copysign.f32(float %a, float %b)
|
||||
ret float %1
|
||||
}
|
||||
|
||||
declare float @llvm.floor.f32(float %Val)
|
||||
define float @floor_f(float %a) {
|
||||
; CHECK-LABEL: floor_f:
|
||||
; SOFT: bl floorf
|
||||
; HARD: b floorf
|
||||
%1 = call float @llvm.floor.f32(float %a)
|
||||
ret float %1
|
||||
}
|
||||
|
||||
declare float @llvm.ceil.f32(float %Val)
|
||||
define float @ceil_f(float %a) {
|
||||
; CHECK-LABEL: ceil_f:
|
||||
; SOFT: bl ceilf
|
||||
; HARD: b ceilf
|
||||
%1 = call float @llvm.ceil.f32(float %a)
|
||||
ret float %1
|
||||
}
|
||||
|
||||
declare float @llvm.trunc.f32(float %Val)
|
||||
define float @trunc_f(float %a) {
|
||||
; CHECK-LABEL: trunc_f:
|
||||
; SOFT: bl truncf
|
||||
; HARD: b truncf
|
||||
%1 = call float @llvm.trunc.f32(float %a)
|
||||
ret float %1
|
||||
}
|
||||
|
||||
declare float @llvm.rint.f32(float %Val)
|
||||
define float @rint_f(float %a) {
|
||||
; CHECK-LABEL: rint_f:
|
||||
; SOFT: bl rintf
|
||||
; HARD: b rintf
|
||||
%1 = call float @llvm.rint.f32(float %a)
|
||||
ret float %1
|
||||
}
|
||||
|
||||
declare float @llvm.nearbyint.f32(float %Val)
|
||||
define float @nearbyint_f(float %a) {
|
||||
; CHECK-LABEL: nearbyint_f:
|
||||
; SOFT: bl nearbyintf
|
||||
; HARD: b nearbyintf
|
||||
%1 = call float @llvm.nearbyint.f32(float %a)
|
||||
ret float %1
|
||||
}
|
||||
|
||||
declare float @llvm.round.f32(float %Val)
|
||||
define float @round_f(float %a) {
|
||||
; CHECK-LABEL: round_f:
|
||||
; SOFT: bl roundf
|
||||
; HARD: b roundf
|
||||
%1 = call float @llvm.round.f32(float %a)
|
||||
ret float %1
|
||||
}
|
||||
|
||||
; FIXME: why does cortex-m4 use vmla, while cortex-a7 uses vmul+vadd?
|
||||
; (these should be equivalent, even the rounding is the same)
|
||||
declare float @llvm.fmuladd.f32(float %a, float %b, float %c)
|
||||
define float @fmuladd_f(float %a, float %b, float %c) {
|
||||
; CHECK-LABEL: fmuladd_f:
|
||||
; SOFT: bl __aeabi_fmul
|
||||
; SOFT: bl __aeabi_fadd
|
||||
; SP: vmla.f32
|
||||
; DP: vmul.f32
|
||||
; DP: vadd.f32
|
||||
%1 = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
|
||||
ret float %1
|
||||
}
|
||||
|
||||
declare i16 @llvm.convert.to.fp16.f32(float %a)
|
||||
define i16 @f_to_h(float %a) {
|
||||
; CHECK-LABEL: f_to_h:
|
||||
; SOFT: bl __gnu_f2h_ieee
|
||||
; HARD: vcvtb.f16.f32
|
||||
%1 = call i16 @llvm.convert.to.fp16.f32(float %a)
|
||||
ret i16 %1
|
||||
}
|
||||
|
||||
declare float @llvm.convert.from.fp16.f32(i16 %a)
|
||||
define float @h_to_f(i16 %a) {
|
||||
; CHECK-LABEL: h_to_f:
|
||||
; SOFT: bl __gnu_h2f_ieee
|
||||
; HARD: vcvtb.f32.f16
|
||||
%1 = call float @llvm.convert.from.fp16.f32(i16 %a)
|
||||
ret float %1
|
||||
}
|
290
test/CodeGen/Thumb2/float-ops.ll
Normal file
290
test/CodeGen/Thumb2/float-ops.ll
Normal file
@ -0,0 +1,290 @@
|
||||
; RUN: llc < %s -mtriple=thumbv7-none-eabi -mcpu=cortex-m3 | FileCheck %s -check-prefix=CHECK -check-prefix=NONE
|
||||
; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m4 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=SP
|
||||
; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a8 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP
|
||||
|
||||
define float @add_f(float %a, float %b) {
|
||||
entry:
|
||||
; CHECK-LABEL: add_f:
|
||||
; NONE: bl __aeabi_fadd
|
||||
; HARD: vadd.f32 s0, s0, s1
|
||||
%0 = fadd float %a, %b
|
||||
ret float %0
|
||||
}
|
||||
|
||||
define double @add_d(double %a, double %b) {
|
||||
entry:
|
||||
; CHECK-LABEL: add_d:
|
||||
; NONE: bl __aeabi_dadd
|
||||
; SP: bl __aeabi_dadd
|
||||
; DP: vadd.f64 d0, d0, d1
|
||||
%0 = fadd double %a, %b
|
||||
ret double %0
|
||||
}
|
||||
|
||||
define float @sub_f(float %a, float %b) {
|
||||
entry:
|
||||
; CHECK-LABEL: sub_f:
|
||||
; NONE: bl __aeabi_fsub
|
||||
; HARD: vsub.f32 s
|
||||
%0 = fsub float %a, %b
|
||||
ret float %0
|
||||
}
|
||||
|
||||
define double @sub_d(double %a, double %b) {
|
||||
entry:
|
||||
; CHECK-LABEL: sub_d:
|
||||
; NONE: bl __aeabi_dsub
|
||||
; SP: bl __aeabi_dsub
|
||||
; DP: vsub.f64 d0, d0, d1
|
||||
%0 = fsub double %a, %b
|
||||
ret double %0
|
||||
}
|
||||
|
||||
define float @mul_f(float %a, float %b) {
|
||||
entry:
|
||||
; CHECK-LABEL: mul_f:
|
||||
; NONE: bl __aeabi_fmul
|
||||
; HARD: vmul.f32 s
|
||||
%0 = fmul float %a, %b
|
||||
ret float %0
|
||||
}
|
||||
|
||||
define double @mul_d(double %a, double %b) {
|
||||
entry:
|
||||
; CHECK-LABEL: mul_d:
|
||||
; NONE: bl __aeabi_dmul
|
||||
; SP: bl __aeabi_dmul
|
||||
; DP: vmul.f64 d0, d0, d1
|
||||
%0 = fmul double %a, %b
|
||||
ret double %0
|
||||
}
|
||||
|
||||
define float @div_f(float %a, float %b) {
|
||||
entry:
|
||||
; CHECK-LABEL: div_f:
|
||||
; NONE: bl __aeabi_fdiv
|
||||
; HARD: vdiv.f32 s
|
||||
%0 = fdiv float %a, %b
|
||||
ret float %0
|
||||
}
|
||||
|
||||
define double @div_d(double %a, double %b) {
|
||||
entry:
|
||||
; CHECK-LABEL: div_d:
|
||||
; NONE: bl __aeabi_ddiv
|
||||
; SP: bl __aeabi_ddiv
|
||||
; DP: vdiv.f64 d0, d0, d1
|
||||
%0 = fdiv double %a, %b
|
||||
ret double %0
|
||||
}
|
||||
|
||||
define float @rem_f(float %a, float %b) {
|
||||
entry:
|
||||
; CHECK-LABEL: rem_f:
|
||||
; NONE: bl fmodf
|
||||
; HARD: b fmodf
|
||||
%0 = frem float %a, %b
|
||||
ret float %0
|
||||
}
|
||||
|
||||
define double @rem_d(double %a, double %b) {
|
||||
entry:
|
||||
; CHECK-LABEL: rem_d:
|
||||
; NONE: bl fmod
|
||||
; HARD: b fmod
|
||||
%0 = frem double %a, %b
|
||||
ret double %0
|
||||
}
|
||||
|
||||
define float @load_f(float* %a) {
|
||||
entry:
|
||||
; CHECK-LABEL: load_f:
|
||||
; NONE: ldr r0, [r0]
|
||||
; HARD: vldr s0, [r0]
|
||||
%0 = load float* %a, align 4
|
||||
ret float %0
|
||||
}
|
||||
|
||||
define double @load_d(double* %a) {
|
||||
entry:
|
||||
; CHECK-LABEL: load_d:
|
||||
; NONE: ldm.w r0, {r0, r1}
|
||||
; HARD: vldr d0, [r0]
|
||||
%0 = load double* %a, align 8
|
||||
ret double %0
|
||||
}
|
||||
|
||||
define void @store_f(float* %a, float %b) {
|
||||
entry:
|
||||
; CHECK-LABEL: store_f:
|
||||
; NONE: str r1, [r0]
|
||||
; HARD: vstr s0, [r0]
|
||||
store float %b, float* %a, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_d(double* %a, double %b) {
|
||||
entry:
|
||||
; CHECK-LABEL: store_d:
|
||||
; NONE: mov r1, r3
|
||||
; NONE: str r2, [r0]
|
||||
; NONE: str r1, [r0, #4]
|
||||
; HARD: vstr d0, [r0]
|
||||
store double %b, double* %a, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
define double @f_to_d(float %a) {
|
||||
; CHECK-LABEL: f_to_d:
|
||||
; NONE: bl __aeabi_f2d
|
||||
; SP: bl __aeabi_f2d
|
||||
; DP: vcvt.f64.f32 d0, s0
|
||||
%1 = fpext float %a to double
|
||||
ret double %1
|
||||
}
|
||||
|
||||
define float @d_to_f(double %a) {
|
||||
; CHECK-LABEL: d_to_f:
|
||||
; NONE: bl __aeabi_d2f
|
||||
; SP: bl __aeabi_d2f
|
||||
; DP: vcvt.f32.f64 s0, d0
|
||||
%1 = fptrunc double %a to float
|
||||
ret float %1
|
||||
}
|
||||
|
||||
define i32 @f_to_si(float %a) {
|
||||
; CHECK-LABEL: f_to_si:
|
||||
; NONE: bl __aeabi_f2iz
|
||||
; HARD: vcvt.s32.f32 s0, s0
|
||||
; HARD: vmov r0, s0
|
||||
%1 = fptosi float %a to i32
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define i32 @d_to_si(double %a) {
|
||||
; CHECK-LABEL: d_to_si:
|
||||
; NONE: bl __aeabi_d2iz
|
||||
; SP: vmov r0, r1, d0
|
||||
; SP: bl __aeabi_d2iz
|
||||
; DP: vcvt.s32.f64 s0, d0
|
||||
; DP: vmov r0, s0
|
||||
%1 = fptosi double %a to i32
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define i32 @f_to_ui(float %a) {
|
||||
; CHECK-LABEL: f_to_ui:
|
||||
; NONE: bl __aeabi_f2uiz
|
||||
; HARD: vcvt.u32.f32 s0, s0
|
||||
; HARD: vmov r0, s0
|
||||
%1 = fptoui float %a to i32
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define i32 @d_to_ui(double %a) {
|
||||
; CHECK-LABEL: d_to_ui:
|
||||
; NONE: bl __aeabi_d2uiz
|
||||
; SP: vmov r0, r1, d0
|
||||
; SP: bl __aeabi_d2uiz
|
||||
; DP: vcvt.u32.f64 s0, d0
|
||||
; DP: vmov r0, s0
|
||||
%1 = fptoui double %a to i32
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define float @si_to_f(i32 %a) {
|
||||
; CHECK-LABEL: si_to_f:
|
||||
; NONE: bl __aeabi_i2f
|
||||
; HARD: vcvt.f32.s32 s0, s0
|
||||
%1 = sitofp i32 %a to float
|
||||
ret float %1
|
||||
}
|
||||
|
||||
define double @si_to_d(i32 %a) {
|
||||
; CHECK-LABEL: si_to_d:
|
||||
; NONE: bl __aeabi_i2d
|
||||
; SP: bl __aeabi_i2d
|
||||
; DP: vcvt.f64.s32 d0, s0
|
||||
%1 = sitofp i32 %a to double
|
||||
ret double %1
|
||||
}
|
||||
|
||||
define float @ui_to_f(i32 %a) {
|
||||
; CHECK-LABEL: ui_to_f:
|
||||
; NONE: bl __aeabi_ui2f
|
||||
; HARD: vcvt.f32.u32 s0, s0
|
||||
%1 = uitofp i32 %a to float
|
||||
ret float %1
|
||||
}
|
||||
|
||||
define double @ui_to_d(i32 %a) {
|
||||
; CHECK-LABEL: ui_to_d:
|
||||
; NONE: bl __aeabi_ui2d
|
||||
; SP: bl __aeabi_ui2d
|
||||
; DP: vcvt.f64.u32 d0, s0
|
||||
%1 = uitofp i32 %a to double
|
||||
ret double %1
|
||||
}
|
||||
|
||||
define float @bitcast_i_to_f(i32 %a) {
|
||||
; CHECK-LABEL: bitcast_i_to_f:
|
||||
; NONE-NOT: mov
|
||||
; HARD: vmov s0, r0
|
||||
%1 = bitcast i32 %a to float
|
||||
ret float %1
|
||||
}
|
||||
|
||||
define double @bitcast_i_to_d(i64 %a) {
|
||||
; CHECK-LABEL: bitcast_i_to_d:
|
||||
; NONE-NOT: mov
|
||||
; HARD: vmov d0, r0, r1
|
||||
%1 = bitcast i64 %a to double
|
||||
ret double %1
|
||||
}
|
||||
|
||||
define i32 @bitcast_f_to_i(float %a) {
|
||||
; CHECK-LABEL: bitcast_f_to_i:
|
||||
; NONE-NOT: mov
|
||||
; HARD: vmov r0, s0
|
||||
%1 = bitcast float %a to i32
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define i64 @bitcast_d_to_i(double %a) {
|
||||
; CHECK-LABEL: bitcast_d_to_i:
|
||||
; NONE-NOT: mov
|
||||
; HARD: vmov r0, r1, d0
|
||||
%1 = bitcast double %a to i64
|
||||
ret i64 %1
|
||||
}
|
||||
|
||||
define float @select_f(float %a, float %b, i1 %c) {
|
||||
; CHECK-LABEL: select_f:
|
||||
; NONE: tst.w r2, #1
|
||||
; NONE: moveq r0, r1
|
||||
; HARD: tst.w r0, #1
|
||||
; HARD: vmovne.f32 s1, s0
|
||||
; HARD: vmov.f32 s0, s1
|
||||
%1 = select i1 %c, float %a, float %b
|
||||
ret float %1
|
||||
}
|
||||
|
||||
define double @select_d(double %a, double %b, i1 %c) {
|
||||
; CHECK-LABEL: select_d:
|
||||
; NONE: ldr.w [[REG:r[0-9]+]], [sp]
|
||||
; NONE: ands [[REG]], [[REG]], #1
|
||||
; NONE: moveq r0, r2
|
||||
; NONE: moveq r1, r3
|
||||
; SP: ands r0, r0, #1
|
||||
; SP-DAG: vmov [[ALO:r[0-9]+]], [[AHI:r[0-9]+]], d0
|
||||
; SP-DAG: vmov [[BLO:r[0-9]+]], [[BHI:r[0-9]+]], d1
|
||||
; SP: itt ne
|
||||
; SP-DAG: movne [[BLO]], [[ALO]]
|
||||
; SP-DAG: movne [[BHI]], [[AHI]]
|
||||
; SP: vmov d0, [[BLO]], [[BHI]]
|
||||
; DP: tst.w r0, #1
|
||||
; DP: vmovne.f64 d1, d0
|
||||
; DP: vmov.f64 d0, d1
|
||||
%1 = select i1 %c, double %a, double %b
|
||||
ret double %1
|
||||
}
|
Loading…
Reference in New Issue
Block a user