mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-08 04:51:23 +00:00
AMDGPU/EG: Add a new FeatureFMA and use it to selectively enable FMA instruction
Only used by pre-GCN targets v2: fix predicate setting for FMA_Common Differential Revision: https://reviews.llvm.org/D40692 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@319712 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
85c02734d1
commit
f68b9beeb9
@ -19,6 +19,12 @@ def FeatureFP64 : SubtargetFeature<"fp64",
|
||||
"Enable double precision operations"
|
||||
>;
|
||||
|
||||
def FeatureFMA : SubtargetFeature<"fmaf",
|
||||
"FMA",
|
||||
"true",
|
||||
"Enable single precision FMA (not as fast as mul+add, but fused)"
|
||||
>;
|
||||
|
||||
def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf",
|
||||
"FastFMAF32",
|
||||
"true",
|
||||
|
@ -49,6 +49,7 @@ def NoFP16Denormals : Predicate<"!Subtarget->hasFP16Denormals()">;
|
||||
def NoFP32Denormals : Predicate<"!Subtarget->hasFP32Denormals()">;
|
||||
def NoFP64Denormals : Predicate<"!Subtarget->hasFP64Denormals()">;
|
||||
def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">;
|
||||
def FMA : Predicate<"Subtarget->hasFMA()">;
|
||||
|
||||
def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
|
||||
def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>;
|
||||
|
@ -140,6 +140,7 @@ protected:
|
||||
|
||||
// Subtarget statically properties set by tablegen
|
||||
bool FP64;
|
||||
bool FMA;
|
||||
bool IsGCN;
|
||||
bool GCN3Encoding;
|
||||
bool CIInsts;
|
||||
@ -348,6 +349,10 @@ public:
|
||||
return CaymanISA;
|
||||
}
|
||||
|
||||
bool hasFMA() const {
|
||||
return FMA;
|
||||
}
|
||||
|
||||
TrapHandlerAbi getTrapHandlerAbi() const {
|
||||
return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
|
||||
}
|
||||
|
@ -211,6 +211,11 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
|
||||
setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
|
||||
setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
|
||||
|
||||
if (!Subtarget->hasFMA()) {
|
||||
setOperationAction(ISD::FMA, MVT::f32, Expand);
|
||||
setOperationAction(ISD::FMA, MVT::f64, Expand);
|
||||
}
|
||||
|
||||
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
|
||||
|
||||
const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
|
||||
|
@ -989,7 +989,10 @@ class MULADD_IEEE_Common <bits<5> inst> : R600_3OP <
|
||||
class FMA_Common <bits<5> inst> : R600_3OP <
|
||||
inst, "FMA",
|
||||
[(set f32:$dst, (fma f32:$src0, f32:$src1, f32:$src2))], VecALU
|
||||
>;
|
||||
>
|
||||
{
|
||||
let OtherPredicates = [FMA];
|
||||
}
|
||||
|
||||
class CNDE_Common <bits<5> inst> : R600_3OP <
|
||||
inst, "CNDE",
|
||||
|
@ -53,7 +53,7 @@ def : Processor<"cedar", R600_VLIW5_Itin,
|
||||
>;
|
||||
|
||||
def : Processor<"cypress", R600_VLIW5_Itin,
|
||||
[FeatureEvergreen, FeatureWavefrontSize64, FeatureVertexCache]
|
||||
[FeatureEvergreen, FeatureWavefrontSize64, FeatureVertexCache, FeatureFMA]
|
||||
>;
|
||||
|
||||
def : Processor<"juniper", R600_VLIW5_Itin,
|
||||
@ -82,7 +82,7 @@ def : Processor<"caicos", R600_VLIW5_Itin,
|
||||
>;
|
||||
|
||||
def : Processor<"cayman", R600_VLIW4_Itin,
|
||||
[FeatureNorthernIslands, FeatureCaymanISA]
|
||||
[FeatureNorthernIslands, FeatureCaymanISA, FeatureFMA]
|
||||
>;
|
||||
|
||||
def : Processor<"turks", R600_VLIW5_Itin,
|
||||
|
@ -1,5 +1,12 @@
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; XUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
; RUN: not llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cedar -verify-machineinstrs < %s
|
||||
; RUN: not llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=juniper -verify-machineinstrs < %s
|
||||
; RUN: not llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood -verify-machineinstrs < %s
|
||||
; RUN: not llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=sumo -verify-machineinstrs < %s
|
||||
; RUN: not llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=barts -verify-machineinstrs < %s
|
||||
; RUN: not llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=caicos -verify-machineinstrs < %s
|
||||
; RUN: not llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=turks -verify-machineinstrs < %s
|
||||
|
||||
declare float @llvm.fma.f32(float, float, float) nounwind readnone
|
||||
declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone
|
||||
|
Loading…
Reference in New Issue
Block a user