mirror of
https://github.com/RPCS3/llvm.git
synced 2025-02-22 11:42:42 +00:00
[ARM]: Extend -mfpu options for half-precision and vfpv3xd
Some of the the permissible ARM -mfpu options, which are supported in GCC, are currently not present in llvm/clang.This patch adds the options: 'neon-fp16', 'vfpv3-fp16', 'vfpv3-d16-fp16', 'vfpv3xd' and 'vfpv3xd-fp16. These are related to half-precision floating-point and single precision. Reviewers: rengolin, ranjeet.singh Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D10645 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@240930 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
048a1eb977
commit
263dd533ee
@ -36,7 +36,11 @@ namespace ARM {
|
||||
FK_VFP,
|
||||
FK_VFPV2,
|
||||
FK_VFPV3,
|
||||
FK_VFPV3_FP16,
|
||||
FK_VFPV3_D16,
|
||||
FK_VFPV3_D16_FP16,
|
||||
FK_VFPV3XD,
|
||||
FK_VFPV3XD_FP16,
|
||||
FK_VFPV4,
|
||||
FK_VFPV4_D16,
|
||||
FK_FPV4_SP_D16,
|
||||
@ -44,6 +48,7 @@ namespace ARM {
|
||||
FK_FPV5_SP_D16,
|
||||
FK_FP_ARMV8,
|
||||
FK_NEON,
|
||||
FK_NEON_FP16,
|
||||
FK_NEON_VFPV4,
|
||||
FK_NEON_FP_ARMV8,
|
||||
FK_CRYPTO_NEON_FP_ARMV8,
|
||||
@ -51,6 +56,16 @@ namespace ARM {
|
||||
FK_LAST
|
||||
};
|
||||
|
||||
// FPU Version
|
||||
enum FPUVersion {
|
||||
FV_NONE = 0,
|
||||
FV_VFPV2,
|
||||
FV_VFPV3,
|
||||
FV_VFPV3_FP16,
|
||||
FV_VFPV4,
|
||||
FV_VFPV5
|
||||
};
|
||||
|
||||
// An FPU name implies one of three levels of Neon support:
|
||||
enum NeonSupportLevel {
|
||||
NS_None = 0, ///< No Neon
|
||||
|
@ -25,31 +25,37 @@ namespace {
|
||||
// List of canonical FPU names (use getFPUSynonym) and which architectural
|
||||
// features they correspond to (use getFPUFeatures).
|
||||
// FIXME: TableGen this.
|
||||
// The entries must appear in the order listed in ARM::FPUKind for correct indexing
|
||||
struct {
|
||||
const char * Name;
|
||||
ARM::FPUKind ID;
|
||||
unsigned FPUVersion; ///< Corresponds directly to the FP arch version number.
|
||||
ARM::FPUVersion FPUVersion;
|
||||
ARM::NeonSupportLevel NeonSupport;
|
||||
ARM::FPURestriction Restriction;
|
||||
} FPUNames[] = {
|
||||
{ "invalid", ARM::FK_INVALID, 0, ARM::NS_None, ARM::FR_None},
|
||||
{ "none", ARM::FK_NONE, 0, ARM::NS_None, ARM::FR_None},
|
||||
{ "vfp", ARM::FK_VFP, 2, ARM::NS_None, ARM::FR_None},
|
||||
{ "vfpv2", ARM::FK_VFPV2, 2, ARM::NS_None, ARM::FR_None},
|
||||
{ "vfpv3", ARM::FK_VFPV3, 3, ARM::NS_None, ARM::FR_None},
|
||||
{ "vfpv3-d16", ARM::FK_VFPV3_D16, 3, ARM::NS_None, ARM::FR_D16},
|
||||
{ "vfpv4", ARM::FK_VFPV4, 4, ARM::NS_None, ARM::FR_None},
|
||||
{ "vfpv4-d16", ARM::FK_VFPV4_D16, 4, ARM::NS_None, ARM::FR_D16},
|
||||
{ "fpv4-sp-d16", ARM::FK_FPV4_SP_D16, 4, ARM::NS_None, ARM::FR_SP_D16},
|
||||
{ "fpv5-d16", ARM::FK_FPV5_D16, 5, ARM::NS_None, ARM::FR_D16},
|
||||
{ "fpv5-sp-d16", ARM::FK_FPV5_SP_D16, 5, ARM::NS_None, ARM::FR_SP_D16},
|
||||
{ "fp-armv8", ARM::FK_FP_ARMV8, 5, ARM::NS_None, ARM::FR_None},
|
||||
{ "neon", ARM::FK_NEON, 3, ARM::NS_Neon, ARM::FR_None},
|
||||
{ "neon-vfpv4", ARM::FK_NEON_VFPV4, 4, ARM::NS_Neon, ARM::FR_None},
|
||||
{ "neon-fp-armv8", ARM::FK_NEON_FP_ARMV8, 5, ARM::NS_Neon, ARM::FR_None},
|
||||
{ "invalid", ARM::FK_INVALID, ARM::FV_NONE, ARM::NS_None, ARM::FR_None},
|
||||
{ "none", ARM::FK_NONE, ARM::FV_NONE, ARM::NS_None, ARM::FR_None},
|
||||
{ "vfp", ARM::FK_VFP, ARM::FV_VFPV2, ARM::NS_None, ARM::FR_None},
|
||||
{ "vfpv2", ARM::FK_VFPV2, ARM::FV_VFPV2, ARM::NS_None, ARM::FR_None},
|
||||
{ "vfpv3", ARM::FK_VFPV3, ARM::FV_VFPV3, ARM::NS_None, ARM::FR_None},
|
||||
{ "vfpv3-fp16", ARM::FK_VFPV3_FP16, ARM::FV_VFPV3_FP16, ARM::NS_None, ARM::FR_None},
|
||||
{ "vfpv3-d16", ARM::FK_VFPV3_D16, ARM::FV_VFPV3, ARM::NS_None, ARM::FR_D16},
|
||||
{ "vfpv3-d16-fp16", ARM::FK_VFPV3_D16_FP16, ARM::FV_VFPV3_FP16, ARM::NS_None, ARM::FR_D16},
|
||||
{ "vfpv3xd", ARM::FK_VFPV3XD, ARM::FV_VFPV3, ARM::NS_None, ARM::FR_SP_D16},
|
||||
{ "vfpv3xd-fp16", ARM::FK_VFPV3XD_FP16, ARM::FV_VFPV3_FP16, ARM::NS_None, ARM::FR_SP_D16},
|
||||
{ "vfpv4", ARM::FK_VFPV4, ARM::FV_VFPV4, ARM::NS_None, ARM::FR_None},
|
||||
{ "vfpv4-d16", ARM::FK_VFPV4_D16, ARM::FV_VFPV4, ARM::NS_None, ARM::FR_D16},
|
||||
{ "fpv4-sp-d16", ARM::FK_FPV4_SP_D16, ARM::FV_VFPV4, ARM::NS_None, ARM::FR_SP_D16},
|
||||
{ "fpv5-d16", ARM::FK_FPV5_D16, ARM::FV_VFPV5, ARM::NS_None, ARM::FR_D16},
|
||||
{ "fpv5-sp-d16", ARM::FK_FPV5_SP_D16, ARM::FV_VFPV5, ARM::NS_None, ARM::FR_SP_D16},
|
||||
{ "fp-armv8", ARM::FK_FP_ARMV8, ARM::FV_VFPV5, ARM::NS_None, ARM::FR_None},
|
||||
{ "neon", ARM::FK_NEON, ARM::FV_VFPV3, ARM::NS_Neon, ARM::FR_None},
|
||||
{ "neon-fp16", ARM::FK_NEON_FP16, ARM::FV_VFPV3_FP16, ARM::NS_Neon, ARM::FR_None},
|
||||
{ "neon-vfpv4", ARM::FK_NEON_VFPV4, ARM::FV_VFPV4, ARM::NS_Neon, ARM::FR_None},
|
||||
{ "neon-fp-armv8", ARM::FK_NEON_FP_ARMV8, ARM::FV_VFPV5, ARM::NS_Neon, ARM::FR_None},
|
||||
{ "crypto-neon-fp-armv8",
|
||||
ARM::FK_CRYPTO_NEON_FP_ARMV8, 5, ARM::NS_Crypto, ARM::FR_None},
|
||||
{ "softvfp", ARM::FK_SOFTVFP, 0, ARM::NS_None, ARM::FR_None},
|
||||
ARM::FK_CRYPTO_NEON_FP_ARMV8, ARM::FV_VFPV5, ARM::NS_Crypto, ARM::FR_None},
|
||||
{ "softvfp", ARM::FK_SOFTVFP, ARM::FV_NONE, ARM::NS_None, ARM::FR_None},
|
||||
};
|
||||
|
||||
// List of canonical arch names (use getArchSynonym).
|
||||
@ -279,33 +285,41 @@ bool ARMTargetParser::getFPUFeatures(unsigned FPUKind,
|
||||
// higher. We also have to make sure to disable fp16 when vfp4 is disabled,
|
||||
// as +vfp4 implies +fp16 but -vfp4 does not imply -fp16.
|
||||
switch (FPUNames[FPUKind].FPUVersion) {
|
||||
case 5:
|
||||
case ARM::FV_VFPV5:
|
||||
Features.push_back("+fp-armv8");
|
||||
break;
|
||||
case 4:
|
||||
case ARM::FV_VFPV4:
|
||||
Features.push_back("+vfp4");
|
||||
Features.push_back("-fp-armv8");
|
||||
break;
|
||||
case 3:
|
||||
case ARM::FV_VFPV3_FP16:
|
||||
Features.push_back("+vfp3");
|
||||
Features.push_back("+fp16");
|
||||
Features.push_back("-vfp4");
|
||||
Features.push_back("-fp-armv8");
|
||||
break;
|
||||
case ARM::FV_VFPV3:
|
||||
Features.push_back("+vfp3");
|
||||
Features.push_back("-fp16");
|
||||
Features.push_back("-vfp4");
|
||||
Features.push_back("-fp-armv8");
|
||||
break;
|
||||
case 2:
|
||||
case ARM::FV_VFPV2:
|
||||
Features.push_back("+vfp2");
|
||||
Features.push_back("-vfp3");
|
||||
Features.push_back("-fp16");
|
||||
Features.push_back("-vfp4");
|
||||
Features.push_back("-fp-armv8");
|
||||
break;
|
||||
case 0:
|
||||
case ARM::FV_NONE:
|
||||
Features.push_back("-vfp2");
|
||||
Features.push_back("-vfp3");
|
||||
Features.push_back("-fp16");
|
||||
Features.push_back("-vfp4");
|
||||
Features.push_back("-fp-armv8");
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
// crypto includes neon, so we handle this similarly to FPU version.
|
||||
|
@ -630,7 +630,7 @@ void ARMAsmPrinter::emitAttributes() {
|
||||
} else if (STI.hasVFP4())
|
||||
ATS.emitFPU(ARM::FK_NEON_VFPV4);
|
||||
else
|
||||
ATS.emitFPU(ARM::FK_NEON);
|
||||
ATS.emitFPU(STI.hasFP16() ? ARM::FK_NEON_FP16 : ARM::FK_NEON);
|
||||
// Emit Tag_Advanced_SIMD_arch for ARMv8 architecture
|
||||
if (STI.hasV8Ops())
|
||||
ATS.emitAttribute(ARMBuildAttrs::Advanced_SIMD_arch,
|
||||
@ -648,7 +648,13 @@ void ARMAsmPrinter::emitAttributes() {
|
||||
? (STI.isFPOnlySP() ? ARM::FK_FPV4_SP_D16 : ARM::FK_VFPV4_D16)
|
||||
: ARM::FK_VFPV4);
|
||||
else if (STI.hasVFP3())
|
||||
ATS.emitFPU(STI.hasD16() ? ARM::FK_VFPV3_D16 : ARM::FK_VFPV3);
|
||||
ATS.emitFPU(STI.hasD16()
|
||||
// +d16
|
||||
? (STI.isFPOnlySP()
|
||||
? (STI.hasFP16() ? ARM::FK_VFPV3XD_FP16 : ARM::FK_VFPV3XD)
|
||||
: (STI.hasFP16() ? ARM::FK_VFPV3_D16_FP16 : ARM::FK_VFPV3_D16))
|
||||
// -d16
|
||||
: (STI.hasFP16() ? ARM::FK_VFPV3_FP16 : ARM::FK_VFPV3));
|
||||
else if (STI.hasVFP2())
|
||||
ATS.emitFPU(ARM::FK_VFPV2);
|
||||
}
|
||||
|
@ -797,12 +797,44 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() {
|
||||
/* OverwriteExisting= */ false);
|
||||
break;
|
||||
|
||||
case ARM::FK_VFPV3_FP16:
|
||||
setAttributeItem(ARMBuildAttrs::FP_arch,
|
||||
ARMBuildAttrs::AllowFPv3A,
|
||||
/* OverwriteExisting= */ false);
|
||||
setAttributeItem(ARMBuildAttrs::FP_HP_extension,
|
||||
ARMBuildAttrs::AllowHPFP,
|
||||
/* OverwriteExisting= */ false);
|
||||
break;
|
||||
|
||||
case ARM::FK_VFPV3_D16:
|
||||
setAttributeItem(ARMBuildAttrs::FP_arch,
|
||||
ARMBuildAttrs::AllowFPv3B,
|
||||
/* OverwriteExisting= */ false);
|
||||
break;
|
||||
|
||||
case ARM::FK_VFPV3_D16_FP16:
|
||||
setAttributeItem(ARMBuildAttrs::FP_arch,
|
||||
ARMBuildAttrs::AllowFPv3B,
|
||||
/* OverwriteExisting= */ false);
|
||||
setAttributeItem(ARMBuildAttrs::FP_HP_extension,
|
||||
ARMBuildAttrs::AllowHPFP,
|
||||
/* OverwriteExisting= */ false);
|
||||
break;
|
||||
|
||||
case ARM::FK_VFPV3XD:
|
||||
setAttributeItem(ARMBuildAttrs::FP_arch,
|
||||
ARMBuildAttrs::AllowFPv3B,
|
||||
/* OverwriteExisting= */ false);
|
||||
break;
|
||||
case ARM::FK_VFPV3XD_FP16:
|
||||
setAttributeItem(ARMBuildAttrs::FP_arch,
|
||||
ARMBuildAttrs::AllowFPv3B,
|
||||
/* OverwriteExisting= */ false);
|
||||
setAttributeItem(ARMBuildAttrs::FP_HP_extension,
|
||||
ARMBuildAttrs::AllowHPFP,
|
||||
/* OverwriteExisting= */ false);
|
||||
break;
|
||||
|
||||
case ARM::FK_VFPV4:
|
||||
setAttributeItem(ARMBuildAttrs::FP_arch,
|
||||
ARMBuildAttrs::AllowFPv4A,
|
||||
@ -842,6 +874,18 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() {
|
||||
/* OverwriteExisting= */ false);
|
||||
break;
|
||||
|
||||
case ARM::FK_NEON_FP16:
|
||||
setAttributeItem(ARMBuildAttrs::FP_arch,
|
||||
ARMBuildAttrs::AllowFPv3A,
|
||||
/* OverwriteExisting= */ false);
|
||||
setAttributeItem(ARMBuildAttrs::Advanced_SIMD_arch,
|
||||
ARMBuildAttrs::AllowNeon,
|
||||
/* OverwriteExisting= */ false);
|
||||
setAttributeItem(ARMBuildAttrs::FP_HP_extension,
|
||||
ARMBuildAttrs::AllowHPFP,
|
||||
/* OverwriteExisting= */ false);
|
||||
break;
|
||||
|
||||
case ARM::FK_NEON_VFPV4:
|
||||
setAttributeItem(ARMBuildAttrs::FP_arch,
|
||||
ARMBuildAttrs::AllowFPv4A,
|
||||
|
@ -51,6 +51,13 @@
|
||||
; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a17 -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A17-FAST
|
||||
; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a17 -mattr=-vfp2 | FileCheck %s --check-prefix=CORTEX-A17-NOFPU
|
||||
; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a17 -mattr=-vfp2 -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A17-NOFPU-FAST
|
||||
|
||||
; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mattr=-neon,+vfp3,+fp16 | FileCheck %s --check-prefix=GENERIC-FPU-VFPV3-FP16
|
||||
; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mattr=-neon,+vfp3,+d16,+fp16 | FileCheck %s --check-prefix=GENERIC-FPU-VFPV3-D16-FP16
|
||||
; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mattr=-neon,+vfp3,+fp-only-sp,+d16 | FileCheck %s --check-prefix=GENERIC-FPU-VFPV3XD
|
||||
; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mattr=-neon,+vfp3,+fp-only-sp,+d16,+fp16 | FileCheck %s --check-prefix=GENERIC-FPU-VFPV3XD-FP16
|
||||
; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mattr=+neon,+fp16 | FileCheck %s --check-prefix=GENERIC-FPU-NEON-FP16
|
||||
|
||||
; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a17 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
|
||||
; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0 | FileCheck %s --check-prefix=CORTEX-M0
|
||||
; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0 -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M0-FAST
|
||||
@ -1091,7 +1098,7 @@
|
||||
; CORTEX-R7: .eabi_attribute 7, 82
|
||||
; CORTEX-R7: .eabi_attribute 8, 1
|
||||
; CORTEX-R7: .eabi_attribute 9, 2
|
||||
; CORTEX-R7: .fpu vfpv3-d16
|
||||
; CORTEX-R7: .fpu vfpv3xd
|
||||
; CORTEX-R7-NOT: .eabi_attribute 19
|
||||
;; We default to IEEE 754 compliance
|
||||
; CORTEX-R7: .eabi_attribute 20, 1
|
||||
@ -1205,6 +1212,12 @@
|
||||
; CORTEX-A72-FAST-NOT: .eabi_attribute 22
|
||||
; CORTEX-A72-FAST: .eabi_attribute 23, 1
|
||||
|
||||
; GENERIC-FPU-VFPV3-FP16: .fpu vfpv3-fp16
|
||||
; GENERIC-FPU-VFPV3-D16-FP16: .fpu vfpv3-d16-fp16
|
||||
; GENERIC-FPU-VFPV3XD: .fpu vfpv3xd
|
||||
; GENERIC-FPU-VFPV3XD-FP16: .fpu vfpv3xd-fp16
|
||||
; GENERIC-FPU-NEON-FP16: .fpu neon-fp16
|
||||
|
||||
; GENERIC-ARMV8_1-A: .eabi_attribute 6, 14
|
||||
; GENERIC-ARMV8_1-A: .eabi_attribute 7, 65
|
||||
; GENERIC-ARMV8_1-A: .eabi_attribute 8, 1
|
||||
|
@ -10,7 +10,11 @@
|
||||
.fpu vfp
|
||||
.fpu vfpv2
|
||||
.fpu vfpv3
|
||||
.fpu vfpv3-fp16
|
||||
.fpu vfpv3-d16
|
||||
.fpu vfpv3-d16-fp16
|
||||
.fpu vfpv3xd
|
||||
.fpu vfpv3xd-fp16
|
||||
.fpu vfpv4
|
||||
.fpu vfpv4-d16
|
||||
.fpu fpv4-sp-d16
|
||||
@ -18,6 +22,7 @@
|
||||
.fpu fpv5-sp-d16
|
||||
.fpu fp-armv8
|
||||
.fpu neon
|
||||
.fpu neon-fp16
|
||||
.fpu neon-vfpv4
|
||||
.fpu neon-fp-armv8
|
||||
.fpu crypto-neon-fp-armv8
|
||||
|
Loading…
x
Reference in New Issue
Block a user