mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-27 15:02:16 +00:00
Avoid NEON SP-FP unless unsafe-math or Darwin
NEON is not IEEE 754 compliant, so we should avoid lowering single-precision floating point operations with NEON unless unsafe-math is turned on. The equivalent VFP instructions are IEEE 754 compliant, but in some cores they're much slower, so some archs/OSs might still request it to be on by default, such as Swift and Darwin. llvm-svn: 177651
This commit is contained in:
parent
e20714f292
commit
0854fd9bef
@ -143,14 +143,12 @@ include "ARMSchedule.td"
|
||||
// ARM processor families.
|
||||
def ProcA5 : SubtargetFeature<"a5", "ARMProcFamily", "CortexA5",
|
||||
"Cortex-A5 ARM processors",
|
||||
[FeatureSlowFPBrcc, FeatureNEONForFP,
|
||||
FeatureHasSlowFPVMLx, FeatureVMLxForwarding,
|
||||
FeatureT2XtPk]>;
|
||||
[FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
|
||||
FeatureVMLxForwarding, FeatureT2XtPk]>;
|
||||
def ProcA8 : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8",
|
||||
"Cortex-A8 ARM processors",
|
||||
[FeatureSlowFPBrcc, FeatureNEONForFP,
|
||||
FeatureHasSlowFPVMLx, FeatureVMLxForwarding,
|
||||
FeatureT2XtPk]>;
|
||||
[FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
|
||||
FeatureVMLxForwarding, FeatureT2XtPk]>;
|
||||
def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9",
|
||||
"Cortex-A9 ARM processors",
|
||||
[FeatureVMLxForwarding,
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Target/TargetInstrInfo.h"
|
||||
#include "llvm/Target/TargetOptions.h"
|
||||
|
||||
#define GET_SUBTARGETINFO_TARGET_DESC
|
||||
#define GET_SUBTARGETINFO_CTOR
|
||||
@ -42,12 +43,13 @@ StrictAlign("arm-strict-align", cl::Hidden,
|
||||
cl::desc("Disallow all unaligned memory accesses"));
|
||||
|
||||
ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
|
||||
const std::string &FS)
|
||||
const std::string &FS, const TargetOptions &Options)
|
||||
: ARMGenSubtargetInfo(TT, CPU, FS)
|
||||
, ARMProcFamily(Others)
|
||||
, stackAlignment(4)
|
||||
, CPUString(CPU)
|
||||
, TargetTriple(TT)
|
||||
, Options(Options)
|
||||
, TargetABI(ARM_ABI_APCS) {
|
||||
initializeEnvironment();
|
||||
resetSubtargetFeatures(CPU, FS);
|
||||
@ -92,6 +94,7 @@ void ARMSubtarget::initializeEnvironment() {
|
||||
AllowsUnalignedMem = false;
|
||||
Thumb2DSP = false;
|
||||
UseNaClTrap = false;
|
||||
UnsafeFPMath = false;
|
||||
}
|
||||
|
||||
void ARMSubtarget::resetSubtargetFeatures(const MachineFunction *MF) {
|
||||
@ -162,6 +165,12 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
|
||||
// configuration.
|
||||
if (!StrictAlign && hasV6Ops() && isTargetDarwin())
|
||||
AllowsUnalignedMem = true;
|
||||
|
||||
// NEON f32 ops are non-IEEE 754 compliant. Darwin is ok with it by default.
|
||||
uint64_t Bits = getFeatureBits();
|
||||
if ((Bits & ARM::ProcA5 || Bits & ARM::ProcA8) && // Where this matters
|
||||
(Options.UnsafeFPMath || isTargetDarwin()))
|
||||
UseNEONForSinglePrecisionFP = true;
|
||||
}
|
||||
|
||||
/// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol.
|
||||
|
@ -26,6 +26,7 @@
|
||||
namespace llvm {
|
||||
class GlobalValue;
|
||||
class StringRef;
|
||||
class TargetOptions;
|
||||
|
||||
class ARMSubtarget : public ARMGenSubtargetInfo {
|
||||
protected:
|
||||
@ -159,6 +160,9 @@ protected:
|
||||
/// NaCl TRAP instruction is generated instead of the regular TRAP.
|
||||
bool UseNaClTrap;
|
||||
|
||||
/// Target machine allowed unsafe FP math (such as use of NEON fp)
|
||||
bool UnsafeFPMath;
|
||||
|
||||
/// stackAlignment - The minimum alignment known to hold of the stack frame on
|
||||
/// entry to the function and which must be maintained by every function.
|
||||
unsigned stackAlignment;
|
||||
@ -175,6 +179,9 @@ protected:
|
||||
/// Selected instruction itineraries (one entry per itinerary class.)
|
||||
InstrItineraryData InstrItins;
|
||||
|
||||
/// Options passed via command line that could influence the target
|
||||
const TargetOptions &Options;
|
||||
|
||||
public:
|
||||
enum {
|
||||
isELF, isDarwin
|
||||
@ -189,7 +196,7 @@ protected:
|
||||
/// of the specified triple.
|
||||
///
|
||||
ARMSubtarget(const std::string &TT, const std::string &CPU,
|
||||
const std::string &FS);
|
||||
const std::string &FS, const TargetOptions &Options);
|
||||
|
||||
/// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
|
||||
/// that still makes it profitable to inline the call.
|
||||
|
@ -48,7 +48,7 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT,
|
||||
Reloc::Model RM, CodeModel::Model CM,
|
||||
CodeGenOpt::Level OL)
|
||||
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
|
||||
Subtarget(TT, CPU, FS),
|
||||
Subtarget(TT, CPU, FS, Options),
|
||||
JITInfo(),
|
||||
InstrItins(Subtarget.getInstrItineraryData()) {
|
||||
// Default to soft float ABI
|
||||
|
@ -1,6 +1,7 @@
|
||||
; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
|
||||
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
|
||||
; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
|
||||
; RUN: llc < %s -march=arm -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=CORTEXA8U
|
||||
; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
|
||||
|
||||
define float @test(float %a, float %b) {
|
||||
@ -18,6 +19,8 @@ entry:
|
||||
; NFP0: vadd.f32 s
|
||||
|
||||
; CORTEXA8: test:
|
||||
; CORTEXA8: vadd.f32 d
|
||||
; CORTEXA8: vadd.f32 s
|
||||
; CORTEXA8U: test:
|
||||
; CORTEXA8U: vadd.f32 d
|
||||
; CORTEXA9: test:
|
||||
; CORTEXA9: vadd.f32 s{{.}}, s{{.}}, s{{.}}
|
||||
; CORTEXA9: vadd.f32 s
|
||||
|
@ -1,6 +1,7 @@
|
||||
; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
|
||||
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
|
||||
; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
|
||||
; RUN: llc < %s -march=arm -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=CORTEXA8U
|
||||
; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
|
||||
|
||||
define float @test(float %a, float %b) {
|
||||
@ -18,9 +19,11 @@ entry:
|
||||
; NFP0: vmul.f32 s
|
||||
|
||||
; CORTEXA8: test:
|
||||
; CORTEXA8: vmul.f32 d
|
||||
; CORTEXA8: vmul.f32 s
|
||||
; CORTEXA8U: test:
|
||||
; CORTEXA8U: vmul.f32 d
|
||||
; CORTEXA9: test:
|
||||
; CORTEXA9: vmul.f32 s{{.}}, s{{.}}, s{{.}}
|
||||
; CORTEXA9: vmul.f32 s
|
||||
|
||||
; VFP2: test2
|
||||
define float @test2(float %a) nounwind {
|
||||
|
@ -1,6 +1,7 @@
|
||||
; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
|
||||
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
|
||||
; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
|
||||
; RUN: llc < %s -march=arm -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=CORTEXA8U
|
||||
; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
|
||||
|
||||
define float @test1(float* %a) {
|
||||
@ -22,7 +23,10 @@ entry:
|
||||
; NFP0: vneg.f32 s{{.*}}, s{{.*}}
|
||||
|
||||
; CORTEXA8: test1:
|
||||
; CORTEXA8: vneg.f32 d{{.*}}, d{{.*}}
|
||||
; CORTEXA8: vneg.f32 s{{.*}}, s{{.*}}
|
||||
|
||||
; CORTEXA8U: test1:
|
||||
; CORTEXA8U: vneg.f32 d{{.*}}, d{{.*}}
|
||||
|
||||
; CORTEXA9: test1:
|
||||
; CORTEXA9: vneg.f32 s{{.*}}, s{{.*}}
|
||||
@ -46,7 +50,10 @@ entry:
|
||||
; NFP0: vneg.f32 s{{.*}}, s{{.*}}
|
||||
|
||||
; CORTEXA8: test2:
|
||||
; CORTEXA8: vneg.f32 d{{.*}}, d{{.*}}
|
||||
; CORTEXA8: vneg.f32 s{{.*}}, s{{.*}}
|
||||
|
||||
; CORTEXA8U: test2:
|
||||
; CORTEXA8U: vneg.f32 d{{.*}}, d{{.*}}
|
||||
|
||||
; CORTEXA9: test2:
|
||||
; CORTEXA9: vneg.f32 s{{.*}}, s{{.*}}
|
||||
|
@ -1,6 +1,7 @@
|
||||
; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
|
||||
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON
|
||||
; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
|
||||
; RUN: llc < %s -march=arm -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=A8U
|
||||
; RUN: llc < %s -march=arm -mcpu=cortex-a8 -regalloc=basic | FileCheck %s -check-prefix=A8
|
||||
|
||||
define float @t1(float %acc, float %a, float %b) nounwind {
|
||||
@ -11,9 +12,13 @@ entry:
|
||||
; NEON: t1:
|
||||
; NEON: vnmla.f32
|
||||
|
||||
; A8U: t1:
|
||||
; A8U: vnmul.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}}
|
||||
; A8U: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}}
|
||||
|
||||
; A8: t1:
|
||||
; A8: vnmul.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}}
|
||||
; A8: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}}
|
||||
; A8: vsub.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}}
|
||||
%0 = fmul float %a, %b
|
||||
%1 = fsub float -0.0, %0
|
||||
%2 = fsub float %1, %acc
|
||||
@ -28,9 +33,13 @@ entry:
|
||||
; NEON: t2:
|
||||
; NEON: vnmla.f32
|
||||
|
||||
; A8U: t2:
|
||||
; A8U: vnmul.f32 s{{[01234]}}, s{{[01234]}}, s{{[01234]}}
|
||||
; A8U: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}}
|
||||
|
||||
; A8: t2:
|
||||
; A8: vnmul.f32 s{{[01234]}}, s{{[01234]}}, s{{[01234]}}
|
||||
; A8: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}}
|
||||
; A8: vsub.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}}
|
||||
%0 = fmul float %a, %b
|
||||
%1 = fmul float -1.0, %0
|
||||
%2 = fsub float %1, %acc
|
||||
@ -45,6 +54,10 @@ entry:
|
||||
; NEON: t3:
|
||||
; NEON: vnmla.f64
|
||||
|
||||
; A8U: t3:
|
||||
; A8U: vnmul.f64 d
|
||||
; A8U: vsub.f64 d
|
||||
|
||||
; A8: t3:
|
||||
; A8: vnmul.f64 d
|
||||
; A8: vsub.f64 d
|
||||
@ -62,6 +75,10 @@ entry:
|
||||
; NEON: t4:
|
||||
; NEON: vnmla.f64
|
||||
|
||||
; A8U: t4:
|
||||
; A8U: vnmul.f64 d
|
||||
; A8U: vsub.f64 d
|
||||
|
||||
; A8: t4:
|
||||
; A8: vnmul.f64 d
|
||||
; A8: vsub.f64 d
|
||||
|
@ -1,6 +1,7 @@
|
||||
; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
|
||||
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=VFP2
|
||||
; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=NEON
|
||||
; RUN: llc < %s -march=arm -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=NEON
|
||||
; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=VFP2
|
||||
; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=VFP2
|
||||
|
||||
define i32 @test1(float %a, float %b) {
|
||||
|
@ -1,4 +1,5 @@
|
||||
; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
|
||||
; RUN: llc < %s -march=arm -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=NFP1U
|
||||
; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=NFP1
|
||||
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
|
||||
|
||||
@ -9,5 +10,6 @@ entry:
|
||||
}
|
||||
|
||||
; VFP2: vsub.f32 s
|
||||
; NFP1: vsub.f32 d
|
||||
; NFP1U: vsub.f32 d
|
||||
; NFP1: vsub.f32 s
|
||||
; NFP0: vsub.f32 s
|
||||
|
76
test/CodeGen/ARM/neon-spfp.ll
Normal file
76
test/CodeGen/ARM/neon-spfp.ll
Normal file
@ -0,0 +1,76 @@
|
||||
; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a5 | FileCheck %s -check-prefix=LINUXA5
|
||||
; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a8 | FileCheck %s -check-prefix=LINUXA8
|
||||
; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a9 | FileCheck %s -check-prefix=LINUXA9
|
||||
; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a15 | FileCheck %s -check-prefix=LINUXA15
|
||||
; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=swift | FileCheck %s -check-prefix=LINUXSWIFT
|
||||
|
||||
; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a5 --enable-unsafe-fp-math | FileCheck %s -check-prefix=UNSAFEA5
|
||||
; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=UNSAFEA8
|
||||
; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a9 --enable-unsafe-fp-math | FileCheck %s -check-prefix=UNSAFEA9
|
||||
; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a15 --enable-unsafe-fp-math | FileCheck %s -check-prefix=UNSAFEA15
|
||||
; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=swift --enable-unsafe-fp-math | FileCheck %s -check-prefix=UNSAFESWIFT
|
||||
|
||||
; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a5 | FileCheck %s -check-prefix=DARWINA5
|
||||
; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=DARWINA8
|
||||
; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a9 | FileCheck %s -check-prefix=DARWINA9
|
||||
; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a15 | FileCheck %s -check-prefix=DARWINA15
|
||||
; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=swift | FileCheck %s -check-prefix=DARWINSWIFT
|
||||
|
||||
; This test makes sure we're not lowering VMUL.f32 D* (aka. NEON) for single-prec. FP ops, since
|
||||
; NEON is not fully IEEE 754 compliant, unless unsafe-math is selected.
|
||||
|
||||
@.str = private unnamed_addr constant [12 x i8] c"S317\09%.5g \0A\00", align 1
|
||||
|
||||
; CHECK-LINUXA5: main:
|
||||
; CHECK-LINUXA8: main:
|
||||
; CHECK-LINUXA9: main:
|
||||
; CHECK-LINUXA15: main:
|
||||
; CHECK-LINUXSWIFT: main:
|
||||
; CHECK-UNSAFEA5: main:
|
||||
; CHECK-UNSAFEA8: main:
|
||||
; CHECK-UNSAFEA9: main:
|
||||
; CHECK-UNSAFEA15: main:
|
||||
; CHECK-UNSAFESWIFT: main:
|
||||
; CHECK-DARWINA5: main:
|
||||
; CHECK-DARWINA8: main:
|
||||
; CHECK-DARWINA9: main:
|
||||
; CHECK-DARWINA15: main:
|
||||
; CHECK-DARWINSWIFT: main:
|
||||
define i32 @main() {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%i.04 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
||||
%q.03 = phi float [ 1.000000e+00, %entry ], [ %mul, %for.body ]
|
||||
%mul = fmul float %q.03, 0x3FEFAE1480000000
|
||||
; CHECK-LINUXA5: vmul.f32 s{{[0-9]*}}
|
||||
; CHECK-LINUXA8: vmul.f32 s{{[0-9]*}}
|
||||
; CHECK-LINUXA9: vmul.f32 s{{[0-9]*}}
|
||||
; CHECK-LINUXA15: vmul.f32 s{{[0-9]*}}
|
||||
; Swift is *always* unsafe
|
||||
; CHECK-LINUXSWIFT: vmul.f32 d{{[0-9]*}}
|
||||
|
||||
; CHECK-UNSAFEA5: vmul.f32 d{{[0-9]*}}
|
||||
; CHECK-UNSAFEA8: vmul.f32 d{{[0-9]*}}
|
||||
; A9 and A15 don't need this
|
||||
; CHECK-UNSAFEA9: vmul.f32 s{{[0-9]*}}
|
||||
; CHECK-UNSAFEA15: vmul.f32 s{{[0-9]*}}
|
||||
; CHECK-UNSAFESWIFT: vmul.f32 d{{[0-9]*}}
|
||||
|
||||
; CHECK-DARWINA5: vmul.f32 d{{[0-9]*}}
|
||||
; CHECK-DARWINA8: vmul.f32 d{{[0-9]*}}
|
||||
; CHECK-DARWINA9: vmul.f32 s{{[0-9]*}}
|
||||
; CHECK-DARWINA15: vmul.f32 s{{[0-9]*}}
|
||||
; CHECK-DARWINSWIFT: vmul.f32 d{{[0-9]*}}
|
||||
%conv = fpext float %mul to double
|
||||
%call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8]* @.str, i32 0, i32 0), double %conv) #1
|
||||
%inc = add nsw i32 %i.04, 1
|
||||
%exitcond = icmp eq i32 %inc, 16000
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
declare i32 @printf(i8* nocapture, ...)
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
|
||||
; RUN: llc < %s -march=arm -mcpu=swift | FileCheck %s
|
||||
|
||||
define float @fmin_ole(float %x) nounwind {
|
||||
;CHECK: fmin_ole:
|
||||
|
Loading…
x
Reference in New Issue
Block a user