AMDGPU/EG: Add a new FeatureFMA and use it to selectively enable FMA instruction

Only used by pre-GCN targets
v2: fix predicate setting for FMA_Common

Differential Revision: https://reviews.llvm.org/D40692

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@319712 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Jan Vesely 2017-12-04 23:07:28 +00:00
parent 85c02734d1
commit f68b9beeb9
7 changed files with 31 additions and 4 deletions

View File

@ -19,6 +19,12 @@ def FeatureFP64 : SubtargetFeature<"fp64",
"Enable double precision operations"
>;
def FeatureFMA : SubtargetFeature<"fmaf",
"FMA",
"true",
"Enable single precision FMA (not as fast as mul+add, but fused)"
>;
def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf",
"FastFMAF32",
"true",

View File

@ -49,6 +49,7 @@ def NoFP16Denormals : Predicate<"!Subtarget->hasFP16Denormals()">;
def NoFP32Denormals : Predicate<"!Subtarget->hasFP32Denormals()">;
def NoFP64Denormals : Predicate<"!Subtarget->hasFP64Denormals()">;
def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">;
def FMA : Predicate<"Subtarget->hasFMA()">;
def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>;

View File

@ -140,6 +140,7 @@ protected:
// Subtarget statically properties set by tablegen
bool FP64;
bool FMA;
bool IsGCN;
bool GCN3Encoding;
bool CIInsts;
@ -348,6 +349,10 @@ public:
return CaymanISA;
}
bool hasFMA() const {
return FMA;
}
TrapHandlerAbi getTrapHandlerAbi() const {
return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
}

View File

@ -211,6 +211,11 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
if (!Subtarget->hasFMA()) {
setOperationAction(ISD::FMA, MVT::f32, Expand);
setOperationAction(ISD::FMA, MVT::f64, Expand);
}
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };

View File

@ -989,7 +989,10 @@ class MULADD_IEEE_Common <bits<5> inst> : R600_3OP <
class FMA_Common <bits<5> inst> : R600_3OP <
inst, "FMA",
[(set f32:$dst, (fma f32:$src0, f32:$src1, f32:$src2))], VecALU
>;
>
{
let OtherPredicates = [FMA];
}
class CNDE_Common <bits<5> inst> : R600_3OP <
inst, "CNDE",

View File

@ -53,7 +53,7 @@ def : Processor<"cedar", R600_VLIW5_Itin,
>;
def : Processor<"cypress", R600_VLIW5_Itin,
[FeatureEvergreen, FeatureWavefrontSize64, FeatureVertexCache]
[FeatureEvergreen, FeatureWavefrontSize64, FeatureVertexCache, FeatureFMA]
>;
def : Processor<"juniper", R600_VLIW5_Itin,
@ -82,7 +82,7 @@ def : Processor<"caicos", R600_VLIW5_Itin,
>;
def : Processor<"cayman", R600_VLIW4_Itin,
[FeatureNorthernIslands, FeatureCaymanISA]
[FeatureNorthernIslands, FeatureCaymanISA, FeatureFMA]
>;
def : Processor<"turks", R600_VLIW5_Itin,

View File

@ -1,5 +1,12 @@
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; XUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; RUN: not llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cedar -verify-machineinstrs < %s
; RUN: not llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=juniper -verify-machineinstrs < %s
; RUN: not llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood -verify-machineinstrs < %s
; RUN: not llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=sumo -verify-machineinstrs < %s
; RUN: not llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=barts -verify-machineinstrs < %s
; RUN: not llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=caicos -verify-machineinstrs < %s
; RUN: not llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=turks -verify-machineinstrs < %s
declare float @llvm.fma.f32(float, float, float) nounwind readnone
declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone