From 9d7038c437bb9e2c4dd148088172fab23cac295d Mon Sep 17 00:00:00 2001 From: Oliver Stannard Date: Wed, 1 Oct 2014 13:13:18 +0000 Subject: [PATCH] [ARM] Allow selecting VRINT[APMXZR] and VCVT[BT] instructions for FPv5 Currently, we only codegen the VRINT[APMXZR] and VCVT[BT] instructions when targeting ARMv8, but they are actually present on any target with FP-ARMv8. Note that FP-ARMv8 is called FPv5 when is is part of an M-profile core, but they have the same instructions so we model them both as FPARMv8 in the ARM backend. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218763 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 29 +++++++----- test/CodeGen/ARM/arm32-rounding.ll | 22 +++++++--- .../CodeGen/Thumb2/float-intrinsics-double.ll | 44 ++++++++++++------- test/CodeGen/Thumb2/float-intrinsics-float.ll | 39 +++++++++------- 4 files changed, 82 insertions(+), 52 deletions(-) diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 425732cb7d2..aee8a40066b 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -881,8 +881,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); } - // v8 adds f64 <-> f16 conversion. Before that it should be expanded. - if (!Subtarget->hasV8Ops()) { + // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded. + if (!Subtarget->hasFPARMv8() || Subtarget->isFPOnlySP()) { setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); } @@ -906,16 +906,21 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } } - // ARMv8 implements a lot of rounding-like FP operations. - if (Subtarget->hasV8Ops()) { - static MVT RoundingTypes[] = {MVT::f32, MVT::f64}; - for (const auto Ty : RoundingTypes) { - setOperationAction(ISD::FFLOOR, Ty, Legal); - setOperationAction(ISD::FCEIL, Ty, Legal); - setOperationAction(ISD::FROUND, Ty, Legal); - setOperationAction(ISD::FTRUNC, Ty, Legal); - setOperationAction(ISD::FNEARBYINT, Ty, Legal); - setOperationAction(ISD::FRINT, Ty, Legal); + // FP-ARMv8 implements a lot of rounding-like FP operations. + if (Subtarget->hasFPARMv8()) { + setOperationAction(ISD::FFLOOR, MVT::f32, Legal); + setOperationAction(ISD::FCEIL, MVT::f32, Legal); + setOperationAction(ISD::FROUND, MVT::f32, Legal); + setOperationAction(ISD::FTRUNC, MVT::f32, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); + setOperationAction(ISD::FRINT, MVT::f32, Legal); + if (!Subtarget->isFPOnlySP()) { + setOperationAction(ISD::FFLOOR, MVT::f64, Legal); + setOperationAction(ISD::FCEIL, MVT::f64, Legal); + setOperationAction(ISD::FROUND, MVT::f64, Legal); + setOperationAction(ISD::FTRUNC, MVT::f64, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); + setOperationAction(ISD::FRINT, MVT::f64, Legal); } } // We have target-specific dag combine patterns for the following nodes: diff --git a/test/CodeGen/ARM/arm32-rounding.ll b/test/CodeGen/ARM/arm32-rounding.ll index 00642b23fee..f247648d814 100644 --- a/test/CodeGen/ARM/arm32-rounding.ll +++ b/test/CodeGen/ARM/arm32-rounding.ll @@ -1,4 +1,6 @@ -; RUN: llc < %s -mtriple=armv8-linux-gnueabihf -mattr=+fp-armv8 | FileCheck %s +; RUN: llc < %s -mtriple=armv8-linux-gnueabihf -mattr=+fp-armv8 | FileCheck --check-prefix=CHECK --check-prefix=DP %s +; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabihf -mattr=+fp-armv8,+d16,+fp-only-sp | FileCheck --check-prefix=SP %s +; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabihf -mattr=+fp-armv8,+d16 | FileCheck --check-prefix=DP %s ; CHECK-LABEL: test1 ; CHECK: vrintm.f32 @@ -9,7 +11,8 @@ entry: } ; CHECK-LABEL: test2 -; CHECK: vrintm.f64 +; SP: b floor +; DP: vrintm.f64 define double @test2(double %a) { entry: %call = call double @floor(double %a) nounwind readnone @@ -25,7 +28,8 @@ entry: } ; CHECK-LABEL: test4 -; CHECK: vrintp.f64 +; SP: b ceil +; DP: vrintp.f64 define double @test4(double %a) { entry: %call = call double @ceil(double %a) nounwind readnone @@ -41,7 +45,8 @@ entry: } ; CHECK-LABEL: test6 -; CHECK: vrinta.f64 +; SP: b round +; DP: vrinta.f64 define double @test6(double %a) { entry: %call = call double @round(double %a) nounwind readnone @@ -57,7 +62,8 @@ entry: } ; CHECK-LABEL: test8 -; CHECK: vrintz.f64 +; SP: b trunc +; DP: vrintz.f64 define double @test8(double %a) { entry: %call = call double @trunc(double %a) nounwind readnone @@ -73,7 +79,8 @@ entry: } ; CHECK-LABEL: test10 -; CHECK: vrintr.f64 +; SP: b nearbyint +; DP: vrintr.f64 define double @test10(double %a) { entry: %call = call double @nearbyint(double %a) nounwind readnone @@ -89,7 +96,8 @@ entry: } ; CHECK-LABEL: test12 -; CHECK: vrintx.f64 +; SP: b rint +; DP: vrintx.f64 define double @test12(double %a) { entry: %call = call double @rint(double %a) nounwind readnone diff --git a/test/CodeGen/Thumb2/float-intrinsics-double.ll b/test/CodeGen/Thumb2/float-intrinsics-double.ll index d6e0c8b9770..01a23bd0fe6 100644 --- a/test/CodeGen/Thumb2/float-intrinsics-double.ll +++ b/test/CodeGen/Thumb2/float-intrinsics-double.ll @@ -1,7 +1,9 @@ -; RUN: llc < %s -mtriple=thumbv7-none-eabi -mcpu=cortex-m3 | FileCheck %s -check-prefix=CHECK -check-prefix=SOFT -check-prefix=NONE -; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m4 | FileCheck %s -check-prefix=CHECK -check-prefix=SOFT -check-prefix=SP -; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m7 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP -check-prefix=VFP -; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a7 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP -check-prefix=NEON +; RUN: llc < %s -mtriple=thumbv7-none-eabi -mcpu=cortex-m3 | FileCheck %s -check-prefix=CHECK -check-prefix=SOFT -check-prefix=NONE +; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m4 | FileCheck %s -check-prefix=CHECK -check-prefix=SOFT -check-prefix=SP +; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m7 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP -check-prefix=VFP -check-prefix=FP-ARMv8 +; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m7 -mattr=+fp-only-sp | FileCheck %s -check-prefix=CHECK -check-prefix=SOFT -check-prefix=SP +; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a7 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP -check-prefix=NEON -check-prefix=VFP4 +; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a57 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP -check-prefix=NEON -check-prefix=FP-ARMv8 declare double @llvm.sqrt.f64(double %Val) define double @sqrt_d(double %a) { @@ -133,7 +135,8 @@ declare double @llvm.floor.f64(double %Val) define double @floor_d(double %a) { ; CHECK-LABEL: floor_d: ; SOFT: {{(bl|b)}} floor -; HARD: b floor +; VFP4: b floor +; FP-ARMv8: vrintm.f64 %1 = call double @llvm.floor.f64(double %a) ret double %1 } @@ -142,7 +145,8 @@ declare double @llvm.ceil.f64(double %Val) define double @ceil_d(double %a) { ; CHECK-LABEL: ceil_d: ; SOFT: {{(bl|b)}} ceil -; HARD: b ceil +; VFP4: b ceil +; FP-ARMv8: vrintp.f64 %1 = call double @llvm.ceil.f64(double %a) ret double %1 } @@ -151,7 +155,8 @@ declare double @llvm.trunc.f64(double %Val) define double @trunc_d(double %a) { ; CHECK-LABEL: trunc_d: ; SOFT: {{(bl|b)}} trunc -; HARD: b trunc +; FFP4: b trunc +; FP-ARMv8: vrintz.f64 %1 = call double @llvm.trunc.f64(double %a) ret double %1 } @@ -160,7 +165,8 @@ declare double @llvm.rint.f64(double %Val) define double @rint_d(double %a) { ; CHECK-LABEL: rint_d: ; SOFT: {{(bl|b)}} rint -; HARD: b rint +; VFP4: b rint +; FP-ARMv8: vrintx.f64 %1 = call double @llvm.rint.f64(double %a) ret double %1 } @@ -169,7 +175,8 @@ declare double @llvm.nearbyint.f64(double %Val) define double @nearbyint_d(double %a) { ; CHECK-LABEL: nearbyint_d: ; SOFT: {{(bl|b)}} nearbyint -; HARD: b nearbyint +; VFP4: b nearbyint +; FP-ARMv8: vrintr.f64 %1 = call double @llvm.nearbyint.f64(double %a) ret double %1 } @@ -178,7 +185,8 @@ declare double @llvm.round.f64(double %Val) define double @round_d(double %a) { ; CHECK-LABEL: round_d: ; SOFT: {{(bl|b)}} round -; HARD: b round +; VFP4: b round +; FP-ARMv8: vrinta.f64 %1 = call double @llvm.round.f64(double %a) ret double %1 } @@ -188,9 +196,9 @@ define double @fmuladd_d(double %a, double %b, double %c) { ; CHECK-LABEL: fmuladd_d: ; SOFT: bl __aeabi_dmul ; SOFT: bl __aeabi_dadd -; NEON: vmul.f64 -; NEON: vadd.f64 -; VFP: vmla.f64 +; VFP4: vmul.f64 +; VFP4: vadd.f64 +; FP-ARMv8: vmla.f64 %1 = call double @llvm.fmuladd.f64(double %a, double %b, double %c) ret double %1 } @@ -199,7 +207,8 @@ declare i16 @llvm.convert.to.fp16.f64(double %a) define i16 @d_to_h(double %a) { ; CHECK-LABEL: d_to_h: ; SOFT: bl __aeabi_d2h -; HARD: bl __aeabi_d2h +; VFP4: bl __aeabi_d2h +; FP-ARMv8: vcvt{{[bt]}}.f16.f64 %1 = call i16 @llvm.convert.to.fp16.f64(double %a) ret i16 %1 } @@ -209,10 +218,11 @@ define double @h_to_d(i16 %a) { ; CHECK-LABEL: h_to_d: ; NONE: bl __gnu_h2f_ieee ; NONE: bl __aeabi_f2d -; SP: vcvtb.f32.f16 +; SP: vcvt{{[bt]}}.f32.f16 ; SP: bl __aeabi_f2d -; DP: vcvtb.f32.f16 -; DP: vcvt.f64.f32 +; VFPv4: vcvt{{[bt]}}.f32.f16 +; VFPv4: vcvt.f64.f32 +; FP-ARMv8: vcvt{{[bt]}}.f64.f16 %1 = call double @llvm.convert.from.fp16.f64(i16 %a) ret double %1 } diff --git a/test/CodeGen/Thumb2/float-intrinsics-float.ll b/test/CodeGen/Thumb2/float-intrinsics-float.ll index f811b69bc9e..ec1bcd3708a 100644 --- a/test/CodeGen/Thumb2/float-intrinsics-float.ll +++ b/test/CodeGen/Thumb2/float-intrinsics-float.ll @@ -1,7 +1,9 @@ -; RUN: llc < %s -mtriple=thumbv7-none-eabi -mcpu=cortex-m3 | FileCheck %s -check-prefix=CHECK -check-prefix=SOFT -check-prefix=NONE -; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m4 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=SP -; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m7 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP -check-prefix=VFP -; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a7 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP -check-prefix=NEON +; RUN: llc < %s -mtriple=thumbv7-none-eabi -mcpu=cortex-m3 | FileCheck %s -check-prefix=CHECK -check-prefix=SOFT -check-prefix=NONE +; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m4 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=SP -check-prefix=VMLA +; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m7 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP -check-prefix=VFP -check-prefix=FP-ARMv8 -check-prefix=VMLA +; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m7 -mattr=+fp-only-sp | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=SP -check-prefix=FP-ARMv8 -check-prefix=VMLA +; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a7 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP -check-prefix=NEON -check-prefix=VFP4 -check-prefix=NO-VMLA +; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a57 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP -check-prefix=NEON -check-prefix=FP-ARMv8 -check-prefix=VMLA declare float @llvm.sqrt.f32(float %Val) define float @sqrt_f(float %a) { @@ -130,7 +132,8 @@ declare float @llvm.floor.f32(float %Val) define float @floor_f(float %a) { ; CHECK-LABEL: floor_f: ; SOFT: bl floorf -; HARD: b floorf +; VFP4: b floorf +; FP-ARMv8: vrintm.f32 %1 = call float @llvm.floor.f32(float %a) ret float %1 } @@ -139,7 +142,8 @@ declare float @llvm.ceil.f32(float %Val) define float @ceil_f(float %a) { ; CHECK-LABEL: ceil_f: ; SOFT: bl ceilf -; HARD: b ceilf +; VFP4: b ceilf +; FP-ARMv8: vrintp.f32 %1 = call float @llvm.ceil.f32(float %a) ret float %1 } @@ -148,7 +152,8 @@ declare float @llvm.trunc.f32(float %Val) define float @trunc_f(float %a) { ; CHECK-LABEL: trunc_f: ; SOFT: bl truncf -; HARD: b truncf +; VFP4: b truncf +; FP-ARMv8: vrintz.f32 %1 = call float @llvm.trunc.f32(float %a) ret float %1 } @@ -157,7 +162,8 @@ declare float @llvm.rint.f32(float %Val) define float @rint_f(float %a) { ; CHECK-LABEL: rint_f: ; SOFT: bl rintf -; HARD: b rintf +; VFP4: b rintf +; FP-ARMv8: vrintx.f32 %1 = call float @llvm.rint.f32(float %a) ret float %1 } @@ -166,7 +172,8 @@ declare float @llvm.nearbyint.f32(float %Val) define float @nearbyint_f(float %a) { ; CHECK-LABEL: nearbyint_f: ; SOFT: bl nearbyintf -; HARD: b nearbyintf +; VFP4: b nearbyintf +; FP-ARMv8: vrintr.f32 %1 = call float @llvm.nearbyint.f32(float %a) ret float %1 } @@ -175,7 +182,8 @@ declare float @llvm.round.f32(float %Val) define float @round_f(float %a) { ; CHECK-LABEL: round_f: ; SOFT: bl roundf -; HARD: b roundf +; VFP4: b roundf +; FP-ARMv8: vrinta.f32 %1 = call float @llvm.round.f32(float %a) ret float %1 } @@ -187,10 +195,9 @@ define float @fmuladd_f(float %a, float %b, float %c) { ; CHECK-LABEL: fmuladd_f: ; SOFT: bl __aeabi_fmul ; SOFT: bl __aeabi_fadd -; SP: vmla.f32 -; VFP: vmla.f32 -; NEON: vmul.f32 -; NEON: vadd.f32 +; VMLA: vmla.f32 +; NO-VMLA: vmul.f32 +; NO-VMLA: vadd.f32 %1 = call float @llvm.fmuladd.f32(float %a, float %b, float %c) ret float %1 } @@ -199,7 +206,7 @@ declare i16 @llvm.convert.to.fp16.f32(float %a) define i16 @f_to_h(float %a) { ; CHECK-LABEL: f_to_h: ; SOFT: bl __gnu_f2h_ieee -; HARD: vcvtb.f16.f32 +; HARD: vcvt{{[bt]}}.f16.f32 %1 = call i16 @llvm.convert.to.fp16.f32(float %a) ret i16 %1 } @@ -208,7 +215,7 @@ declare float @llvm.convert.from.fp16.f32(i16 %a) define float @h_to_f(i16 %a) { ; CHECK-LABEL: h_to_f: ; SOFT: bl __gnu_h2f_ieee -; HARD: vcvtb.f32.f16 +; HARD: vcvt{{[bt]}}.f32.f16 %1 = call float @llvm.convert.from.fp16.f32(i16 %a) ret float %1 }