From ff18b9ff38e4044ea4acb8c8f1751653c1ac7697 Mon Sep 17 00:00:00 2001 From: Oliver Stannard Date: Wed, 1 Oct 2014 09:02:17 +0000 Subject: [PATCH] [ARM] Add support for Cortex-M7, FPv5-SP and FPv5-DP (LLVM) The Cortex-M7 has 3 options for its FPU: none, FPv5-SP-D16 and FPv5-DP-D16. FPv5 has the same instructions as FP-ARMv8, so it can be modelled using the same target feature, and all double-precision operations are already disabled by the fp-only-sp target features. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218747 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARM.td | 6 +++++ lib/Target/ARM/ARMAsmPrinter.cpp | 4 +++- lib/Target/ARM/ARMFPUName.def | 1 + lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 2 ++ .../ARM/MCTargetDesc/ARMELFStreamer.cpp | 8 +++++++ test/CodeGen/ARM/build-attributes.ll | 23 +++++++++++++++++++ test/CodeGen/Thumb2/cortex-fp.ll | 3 +++ test/CodeGen/Thumb2/float-cmp.ll | 1 + .../CodeGen/Thumb2/float-intrinsics-double.ll | 16 ++++++++----- test/CodeGen/Thumb2/float-intrinsics-float.ll | 14 +++++++---- test/CodeGen/Thumb2/float-ops.ll | 15 +++++++----- 11 files changed, 75 insertions(+), 18 deletions(-) diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 7916ccc180c..26bbc164f02 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -392,6 +392,12 @@ def : ProcNoItin<"cortex-m4", [HasV7Ops, FeatureT2XtPk, FeatureVFP4, FeatureVFPOnlySP, FeatureD16, FeatureMClass]>; +def : ProcNoItin<"cortex-m7", [HasV7Ops, + FeatureThumb2, FeatureNoARM, FeatureDB, + FeatureHWDiv, FeatureDSPThumb2, + FeatureT2XtPk, FeatureFPARMv8, + FeatureD16, FeatureMClass]>; + // Swift uArch Processors. def : ProcessorModel<"swift", SwiftModel, diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 41f4d821134..5ae8b2f11ae 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -667,7 +667,9 @@ void ARMAsmPrinter::emitAttributes() { ARMBuildAttrs::AllowNeonARMv8); } else { if (Subtarget->hasFPARMv8()) - ATS.emitFPU(ARM::FP_ARMV8); + // FPv5 and FP-ARMv8 have the same instructions, so are modeled as one + // FPU, but there are two different names for it depending on the CPU. + ATS.emitFPU(Subtarget->hasD16() ? ARM::FPV5_D16 : ARM::FP_ARMV8); else if (Subtarget->hasVFP4()) ATS.emitFPU(Subtarget->hasD16() ? ARM::VFPV4_D16 : ARM::VFPV4); else if (Subtarget->hasVFP3()) diff --git a/lib/Target/ARM/ARMFPUName.def b/lib/Target/ARM/ARMFPUName.def index 1fef3b3bc5e..34ce85d280e 100644 --- a/lib/Target/ARM/ARMFPUName.def +++ b/lib/Target/ARM/ARMFPUName.def @@ -23,6 +23,7 @@ ARM_FPU_NAME("vfpv3", VFPV3) ARM_FPU_NAME("vfpv3-d16", VFPV3_D16) ARM_FPU_NAME("vfpv4", VFPV4) ARM_FPU_NAME("vfpv4-d16", VFPV4_D16) +ARM_FPU_NAME("fpv5-d16", FPV5_D16) ARM_FPU_NAME("fp-armv8", FP_ARMV8) ARM_FPU_NAME("neon", NEON) ARM_FPU_NAME("neon-vfpv4", NEON_VFPV4) diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index d19c2435d9f..37aad28dd3c 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -8844,6 +8844,8 @@ static const struct { {ARM::VFPV3_D16, ARM::FeatureVFP3 | ARM::FeatureD16, ARM::FeatureNEON}, {ARM::VFPV4, ARM::FeatureVFP4, ARM::FeatureNEON}, {ARM::VFPV4_D16, ARM::FeatureVFP4 | ARM::FeatureD16, ARM::FeatureNEON}, + {ARM::FPV5_D16, ARM::FeatureFPARMv8 | ARM::FeatureD16, + ARM::FeatureNEON | ARM::FeatureCrypto}, {ARM::FP_ARMV8, ARM::FeatureFPARMv8, ARM::FeatureNEON | ARM::FeatureCrypto}, {ARM::NEON, ARM::FeatureNEON, 0}, diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index 7b5d8b01dfe..ab48e148cf9 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -848,6 +848,14 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() { /* OverwriteExisting= */ false); break; + // FPV5_D16 is identical to FP_ARMV8 except for the number of D registers, so + // uses the FP_ARMV8_D16 build attribute. + case ARM::FPV5_D16: + setAttributeItem(ARMBuildAttrs::FP_arch, + ARMBuildAttrs::AllowFPARMv8B, + /* OverwriteExisting= */ false); + break; + case ARM::NEON: setAttributeItem(ARMBuildAttrs::FP_arch, ARMBuildAttrs::AllowFPv3A, diff --git a/test/CodeGen/ARM/build-attributes.ll b/test/CodeGen/ARM/build-attributes.ll index a4b77c80c10..4243fa4efbd 100644 --- a/test/CodeGen/ARM/build-attributes.ll +++ b/test/CodeGen/ARM/build-attributes.ll @@ -26,6 +26,9 @@ ; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m3 | FileCheck %s --check-prefix=CORTEX-M3 ; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m4 -float-abi=soft | FileCheck %s --check-prefix=CORTEX-M4-SOFT ; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m4 -float-abi=hard | FileCheck %s --check-prefix=CORTEX-M4-HARD +; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabi -mcpu=cortex-m7 -mattr=-vfp2 | FileCheck %s --check-prefix=CORTEX-M7 --check-prefix=CORTEX-M7-SOFT +; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabi -mcpu=cortex-m7 -mattr=+fp-only-sp | FileCheck %s --check-prefix=CORTEX-M7 --check-prefix=CORTEX-M7-SINGLE +; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabi -mcpu=cortex-m7 | FileCheck %s --check-prefix=CORTEX-M7 --check-prefix=CORTEX-M7-DOUBLE ; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r5 | FileCheck %s --check-prefix=CORTEX-R5 ; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a53 | FileCheck %s --check-prefix=CORTEX-A53 ; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a57 | FileCheck %s --check-prefix=CORTEX-A57 @@ -410,6 +413,26 @@ ; CORTEX-M4-HARD-NOT: .eabi_attribute 44 ; CORTEX-M4-HARD-NOT: .eabi_attribute 68 +; CORTEX-M7: .cpu cortex-m7 +; CORTEX-M7: .eabi_attribute 6, 13 +; CORTEX-M7: .eabi_attribute 7, 77 +; CORTEX-M7: .eabi_attribute 8, 0 +; CORTEX-M7: .eabi_attribute 9, 2 +; CORTEX-M7-SOFT-NOT: .fpu +; CORTEX-M7-SINGLE: .fpu fpv5-d16 +; CORTEX-M7-DOUBLE: .fpu fpv5-d16 +; CORTEX-M7: .eabi_attribute 17, 1 +; CORTEX-M7: .eabi_attribute 20, 1 +; CORTEX-M7: .eabi_attribute 21, 1 +; CORTEX-M7: .eabi_attribute 23, 3 +; CORTEX-M7: .eabi_attribute 24, 1 +; CORTEX-M7: .eabi_attribute 25, 1 +; CORTEX-M7-SOFT-NOT: .eabi_attribute 27 +; CORTEX-M7-SINGLE: .eabi_attribute 27, 1 +; CORTEX-M7-DOUBLE-NOT: .eabi_attribute 27 +; CORTEX-M7: .eabi_attribute 36, 1 +; CORTEX-M7: .eabi_attribute 14, 0 + ; CORTEX-R5: .cpu cortex-r5 ; CORTEX-R5: .eabi_attribute 6, 10 ; CORTEX-R5: .eabi_attribute 7, 82 diff --git a/test/CodeGen/Thumb2/cortex-fp.ll b/test/CodeGen/Thumb2/cortex-fp.ll index b4227615af4..5548492ed09 100644 --- a/test/CodeGen/Thumb2/cortex-fp.ll +++ b/test/CodeGen/Thumb2/cortex-fp.ll @@ -1,5 +1,6 @@ ; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -march=thumb -mcpu=cortex-m3 | FileCheck %s -check-prefix=CHECK -check-prefix=CORTEXM3 ; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -march=thumb -mcpu=cortex-m4 | FileCheck %s -check-prefix=CHECK -check-prefix=CORTEXM4 +; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -march=thumb -mcpu=cortex-m7 | FileCheck %s -check-prefix=CHECK -check-prefix=CORTEXM7 ; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -march=thumb -mcpu=cortex-a8 | FileCheck %s -check-prefix=CHECK -check-prefix=CORTEXA8 @@ -8,6 +9,7 @@ entry: ; CHECK-LABEL: foo: ; CORTEXM3: bl ___mulsf3 ; CORTEXM4: vmul.f32 s +; CORTEXM7: vmul.f32 s ; CORTEXA8: vmul.f32 d %0 = fmul float %a, %b ret float %0 @@ -19,6 +21,7 @@ entry: %0 = fmul double %a, %b ; CORTEXM3: bl ___muldf3 ; CORTEXM4: {{bl|b.w}} ___muldf3 +; CORTEXM7: vmul.f64 d ; CORTEXA8: vmul.f64 d ret double %0 } diff --git a/test/CodeGen/Thumb2/float-cmp.ll b/test/CodeGen/Thumb2/float-cmp.ll index a28114918ed..88d6c3b0adb 100644 --- a/test/CodeGen/Thumb2/float-cmp.ll +++ b/test/CodeGen/Thumb2/float-cmp.ll @@ -1,5 +1,6 @@ ; RUN: llc < %s -mtriple=thumbv7-none-eabi -mcpu=cortex-m3 | FileCheck %s -check-prefix=CHECK -check-prefix=NONE ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m4 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=SP +; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m7 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a8 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP diff --git a/test/CodeGen/Thumb2/float-intrinsics-double.ll b/test/CodeGen/Thumb2/float-intrinsics-double.ll index b34a69cdb50..d6e0c8b9770 100644 --- a/test/CodeGen/Thumb2/float-intrinsics-double.ll +++ b/test/CodeGen/Thumb2/float-intrinsics-double.ll @@ -1,6 +1,7 @@ ; RUN: llc < %s -mtriple=thumbv7-none-eabi -mcpu=cortex-m3 | FileCheck %s -check-prefix=CHECK -check-prefix=SOFT -check-prefix=NONE ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m4 | FileCheck %s -check-prefix=CHECK -check-prefix=SOFT -check-prefix=SP -; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a7 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP +; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m7 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP -check-prefix=VFP +; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a7 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP -check-prefix=NEON declare double @llvm.sqrt.f64(double %Val) define double @sqrt_d(double %a) { @@ -119,9 +120,11 @@ define double @copysign_d(double %a, double %b) { ; CHECK-LABEL: copysign_d: ; SOFT: lsrs [[REG:r[0-9]+]], r3, #31 ; SOFT: bfi r1, [[REG]], #31, #1 -; HARD: vmov.i32 [[REG:d[0-9]+]], #0x80000000 -; HARD: vshl.i64 [[REG]], [[REG]], #32 -; HARD: vbsl [[REG]], d +; VFP: lsrs [[REG:r[0-9]+]], r3, #31 +; VFP: bfi r1, [[REG]], #31, #1 +; NEON: vmov.i32 [[REG:d[0-9]+]], #0x80000000 +; NEON: vshl.i64 [[REG]], [[REG]], #32 +; NEON: vbsl [[REG]], d %1 = call double @llvm.copysign.f64(double %a, double %b) ret double %1 } @@ -185,8 +188,9 @@ define double @fmuladd_d(double %a, double %b, double %c) { ; CHECK-LABEL: fmuladd_d: ; SOFT: bl __aeabi_dmul ; SOFT: bl __aeabi_dadd -; HARD: vmul.f64 -; HARD: vadd.f64 +; NEON: vmul.f64 +; NEON: vadd.f64 +; VFP: vmla.f64 %1 = call double @llvm.fmuladd.f64(double %a, double %b, double %c) ret double %1 } diff --git a/test/CodeGen/Thumb2/float-intrinsics-float.ll b/test/CodeGen/Thumb2/float-intrinsics-float.ll index b29ab35c8f6..f811b69bc9e 100644 --- a/test/CodeGen/Thumb2/float-intrinsics-float.ll +++ b/test/CodeGen/Thumb2/float-intrinsics-float.ll @@ -1,6 +1,7 @@ ; RUN: llc < %s -mtriple=thumbv7-none-eabi -mcpu=cortex-m3 | FileCheck %s -check-prefix=CHECK -check-prefix=SOFT -check-prefix=NONE ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m4 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=SP -; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a7 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP +; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m7 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP -check-prefix=VFP +; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a7 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP -check-prefix=NEON declare float @llvm.sqrt.f32(float %Val) define float @sqrt_f(float %a) { @@ -117,8 +118,10 @@ define float @copysign_f(float %a, float %b) { ; NONE: bfi r{{[0-9]+}}, [[REG]], #31, #1 ; SP: lsrs [[REG:r[0-9]+]], r{{[0-9]+}}, #31 ; SP: bfi r{{[0-9]+}}, [[REG]], #31, #1 -; DP: vmov.i32 [[REG:d[0-9]+]], #0x80000000 -; DP: vbsl [[REG]], d +; VFP: lsrs [[REG:r[0-9]+]], r{{[0-9]+}}, #31 +; VFP: bfi r{{[0-9]+}}, [[REG]], #31, #1 +; NEON: vmov.i32 [[REG:d[0-9]+]], #0x80000000 +; NEON: vbsl [[REG]], d %1 = call float @llvm.copysign.f32(float %a, float %b) ret float %1 } @@ -185,8 +188,9 @@ define float @fmuladd_f(float %a, float %b, float %c) { ; SOFT: bl __aeabi_fmul ; SOFT: bl __aeabi_fadd ; SP: vmla.f32 -; DP: vmul.f32 -; DP: vadd.f32 +; VFP: vmla.f32 +; NEON: vmul.f32 +; NEON: vadd.f32 %1 = call float @llvm.fmuladd.f32(float %a, float %b, float %c) ret float %1 } diff --git a/test/CodeGen/Thumb2/float-ops.ll b/test/CodeGen/Thumb2/float-ops.ll index c5741cdacb2..d383065cd53 100644 --- a/test/CodeGen/Thumb2/float-ops.ll +++ b/test/CodeGen/Thumb2/float-ops.ll @@ -1,6 +1,7 @@ ; RUN: llc < %s -mtriple=thumbv7-none-eabi -mcpu=cortex-m3 | FileCheck %s -check-prefix=CHECK -check-prefix=NONE -; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m4 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=SP -; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a8 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP +; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m4 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=SP -check-prefix=VFP4-ALL +; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m7 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP -check-prefix=FP-ARMv8 +; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a8 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP -check-prefix=VFP4-ALL -check-prefix=VFP4-DP define float @add_f(float %a, float %b) { entry: @@ -263,8 +264,9 @@ define float @select_f(float %a, float %b, i1 %c) { ; NONE: tst.w r2, #1 ; NONE: moveq r0, r1 ; HARD: tst.w r0, #1 -; HARD: vmovne.f32 s1, s0 -; HARD: vmov.f32 s0, s1 +; VFP4-ALL: vmovne.f32 s1, s0 +; VFP4-ALL: vmov.f32 s0, s1 +; FP-ARMv8: vseleq.f32 s0, s1, s0 %1 = select i1 %c, float %a, float %b ret float %1 } @@ -283,8 +285,9 @@ define double @select_d(double %a, double %b, i1 %c) { ; SP-DAG: movne [[BHI]], [[AHI]] ; SP: vmov d0, [[BLO]], [[BHI]] ; DP: tst.w r0, #1 -; DP: vmovne.f64 d1, d0 -; DP: vmov.f64 d0, d1 +; VFP4-DP: vmovne.f64 d1, d0 +; VFP4-DP: vmov.f64 d0, d1 +; FP-ARMV8: vseleq.f64 d0, d1, d0 %1 = select i1 %c, double %a, double %b ret double %1 }