diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 20ec3ac5e62..e3c034c8a15 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -397,6 +397,12 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = N->getOperand(0); + if (N->getValueType(0) == MVT::f16) { + // Semi-soften first, to FP_TO_FP16, so that targets which support f16 as a + // storage-only type get a chance to select things. + return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), NVT, Op); + } + RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!"); return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)).first; @@ -632,6 +638,7 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { case ISD::BITCAST: Res = SoftenFloatOp_BITCAST(N); break; case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break; + case ISD::FP_EXTEND: Res = SoftenFloatOp_FP_EXTEND(N); break; case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break; case ISD::FP_TO_SINT: Res = SoftenFloatOp_FP_TO_SINT(N); break; @@ -661,6 +668,22 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) { GetSoftenedFloat(N->getOperand(0))); } +SDValue DAGTypeLegalizer::SoftenFloatOp_FP_EXTEND(SDNode *N) { + // If we get here, the result must be legal but the source illegal. + EVT SVT = N->getOperand(0).getValueType(); + EVT RVT = N->getValueType(0); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + + if (SVT == MVT::f16) + return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), RVT, Op); + + RTLIB::Libcall LC = RTLIB::getFPEXT(SVT, RVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND libcall"); + + return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first; +} + + SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) { // We actually deal with the partially-softened FP_TO_FP16 node too, which // returns an i16 so doesn't meet the constraints necessary for FP_ROUND. diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 63c74a05232..117ff31e2e8 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -425,6 +425,7 @@ private: bool SoftenFloatOperand(SDNode *N, unsigned OpNo); SDValue SoftenFloatOp_BITCAST(SDNode *N); SDValue SoftenFloatOp_BR_CC(SDNode *N); + SDValue SoftenFloatOp_FP_EXTEND(SDNode *N); SDValue SoftenFloatOp_FP_ROUND(SDNode *N); SDValue SoftenFloatOp_FP_TO_SINT(SDNode *N); SDValue SoftenFloatOp_FP_TO_UINT(SDNode *N); diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 27048f9a10d..f6f937a7dd5 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -396,8 +396,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) addRegisterClass(MVT::f32, &ARM::SPRRegClass); if (!Subtarget->isFPOnlySP()) addRegisterClass(MVT::f64, &ARM::DPRRegClass); - - setTruncStoreAction(MVT::f64, MVT::f32, Expand); } for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; @@ -582,8 +580,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) computeRegisterProperties(); - // ARM does not have f32 extending load. + // ARM does not have floating-point extending loads. setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand); + + // ... or truncating stores + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + setTruncStoreAction(MVT::f32, MVT::f16, Expand); + setTruncStoreAction(MVT::f64, MVT::f16, Expand); // ARM does not have i1 sign extending load. setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); diff --git a/test/CodeGen/ARM/half.ll b/test/CodeGen/ARM/half.ll index 72da1a514c6..10cebb38c56 100644 --- a/test/CodeGen/ARM/half.ll +++ b/test/CodeGen/ARM/half.ll @@ -1,4 +1,6 @@ -; RUN: llc < %s -mtriple=thumbv7s-apple-ios7.0 | FileCheck %s +; RUN: llc < %s -mtriple=thumbv7-apple-ios7.0 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-OLD +; RUN: llc < %s -mtriple=thumbv7s-apple-ios7.0 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-F16 +; RUN: llc < %s -mtriple=thumbv8-apple-ios7.0 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-V8 define void @test_load_store(half* %in, half* %out) { ; CHECK-LABEL: test_load_store: @@ -24,3 +26,49 @@ define void @test_bitcast_to_half(half* %addr, i16 %in) { store half %val_fp, half* %addr ret void } + +define float @test_extend32(half* %addr) { +; CHECK-LABEL: test_extend32: + +; CHECK-OLD: b.w ___gnu_h2f_ieee +; CHECK-F16: vcvtb.f32.f16 +; CHECK-V8: vcvtb.f32.f16 + %val16 = load half* %addr + %val32 = fpext half %val16 to float + ret float %val32 +} + +define double @test_extend64(half* %addr) { +; CHECK-LABEL: test_extend64: + +; CHECK-OLD: blx ___gnu_h2f_ieee +; CHECK-OLD: vcvt.f64.f32 +; CHECK-F16: vcvtb.f32.f16 +; CHECK-F16: vcvt.f64.f32 +; CHECK-V8: vcvtb.f64.f16 + %val16 = load half* %addr + %val32 = fpext half %val16 to double + ret double %val32 +} + +define void @test_trunc32(float %in, half* %addr) { +; CHECK-LABEL: test_trunc32: + +; CHECK-OLD: blx ___gnu_f2h_ieee +; CHECK-F16: vcvtb.f16.f32 +; CHECK-V8: vcvtb.f16.f32 + %val16 = fptrunc float %in to half + store half %val16, half* %addr + ret void +} + +define void @test_trunc64(double %in, half* %addr) { +; CHECK-LABEL: test_trunc64: + +; CHECK-OLD: blx ___truncdfhf2 +; CHECK-F16: blx ___truncdfhf2 +; CHECK-V8: vcvtb.f16.f64 + %val16 = fptrunc double %in to half + store half %val16, half* %addr + ret void +}