mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-08 21:10:35 +00:00
AMDGPU/EG: Add a new FeatureFMA and use it to selectively enable FMA instruction
Only used by pre-GCN targets v2: fix predicate setting for FMA_Common Differential Revision: https://reviews.llvm.org/D40692 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@319712 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
85c02734d1
commit
f68b9beeb9
@ -19,6 +19,12 @@ def FeatureFP64 : SubtargetFeature<"fp64",
|
|||||||
"Enable double precision operations"
|
"Enable double precision operations"
|
||||||
>;
|
>;
|
||||||
|
|
||||||
|
def FeatureFMA : SubtargetFeature<"fmaf",
|
||||||
|
"FMA",
|
||||||
|
"true",
|
||||||
|
"Enable single precision FMA (not as fast as mul+add, but fused)"
|
||||||
|
>;
|
||||||
|
|
||||||
def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf",
|
def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf",
|
||||||
"FastFMAF32",
|
"FastFMAF32",
|
||||||
"true",
|
"true",
|
||||||
|
@ -49,6 +49,7 @@ def NoFP16Denormals : Predicate<"!Subtarget->hasFP16Denormals()">;
|
|||||||
def NoFP32Denormals : Predicate<"!Subtarget->hasFP32Denormals()">;
|
def NoFP32Denormals : Predicate<"!Subtarget->hasFP32Denormals()">;
|
||||||
def NoFP64Denormals : Predicate<"!Subtarget->hasFP64Denormals()">;
|
def NoFP64Denormals : Predicate<"!Subtarget->hasFP64Denormals()">;
|
||||||
def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">;
|
def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">;
|
||||||
|
def FMA : Predicate<"Subtarget->hasFMA()">;
|
||||||
|
|
||||||
def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
|
def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
|
||||||
def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>;
|
def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>;
|
||||||
|
@ -140,6 +140,7 @@ protected:
|
|||||||
|
|
||||||
// Subtarget statically properties set by tablegen
|
// Subtarget statically properties set by tablegen
|
||||||
bool FP64;
|
bool FP64;
|
||||||
|
bool FMA;
|
||||||
bool IsGCN;
|
bool IsGCN;
|
||||||
bool GCN3Encoding;
|
bool GCN3Encoding;
|
||||||
bool CIInsts;
|
bool CIInsts;
|
||||||
@ -348,6 +349,10 @@ public:
|
|||||||
return CaymanISA;
|
return CaymanISA;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool hasFMA() const {
|
||||||
|
return FMA;
|
||||||
|
}
|
||||||
|
|
||||||
TrapHandlerAbi getTrapHandlerAbi() const {
|
TrapHandlerAbi getTrapHandlerAbi() const {
|
||||||
return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
|
return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
|
||||||
}
|
}
|
||||||
|
@ -211,6 +211,11 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
|
|||||||
setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
|
setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
|
||||||
setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
|
setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
|
||||||
|
|
||||||
|
if (!Subtarget->hasFMA()) {
|
||||||
|
setOperationAction(ISD::FMA, MVT::f32, Expand);
|
||||||
|
setOperationAction(ISD::FMA, MVT::f64, Expand);
|
||||||
|
}
|
||||||
|
|
||||||
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
|
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
|
||||||
|
|
||||||
const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
|
const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
|
||||||
|
@ -989,7 +989,10 @@ class MULADD_IEEE_Common <bits<5> inst> : R600_3OP <
|
|||||||
class FMA_Common <bits<5> inst> : R600_3OP <
|
class FMA_Common <bits<5> inst> : R600_3OP <
|
||||||
inst, "FMA",
|
inst, "FMA",
|
||||||
[(set f32:$dst, (fma f32:$src0, f32:$src1, f32:$src2))], VecALU
|
[(set f32:$dst, (fma f32:$src0, f32:$src1, f32:$src2))], VecALU
|
||||||
>;
|
>
|
||||||
|
{
|
||||||
|
let OtherPredicates = [FMA];
|
||||||
|
}
|
||||||
|
|
||||||
class CNDE_Common <bits<5> inst> : R600_3OP <
|
class CNDE_Common <bits<5> inst> : R600_3OP <
|
||||||
inst, "CNDE",
|
inst, "CNDE",
|
||||||
|
@ -53,7 +53,7 @@ def : Processor<"cedar", R600_VLIW5_Itin,
|
|||||||
>;
|
>;
|
||||||
|
|
||||||
def : Processor<"cypress", R600_VLIW5_Itin,
|
def : Processor<"cypress", R600_VLIW5_Itin,
|
||||||
[FeatureEvergreen, FeatureWavefrontSize64, FeatureVertexCache]
|
[FeatureEvergreen, FeatureWavefrontSize64, FeatureVertexCache, FeatureFMA]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def : Processor<"juniper", R600_VLIW5_Itin,
|
def : Processor<"juniper", R600_VLIW5_Itin,
|
||||||
@ -82,7 +82,7 @@ def : Processor<"caicos", R600_VLIW5_Itin,
|
|||||||
>;
|
>;
|
||||||
|
|
||||||
def : Processor<"cayman", R600_VLIW4_Itin,
|
def : Processor<"cayman", R600_VLIW4_Itin,
|
||||||
[FeatureNorthernIslands, FeatureCaymanISA]
|
[FeatureNorthernIslands, FeatureCaymanISA, FeatureFMA]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def : Processor<"turks", R600_VLIW5_Itin,
|
def : Processor<"turks", R600_VLIW5_Itin,
|
||||||
|
@ -1,5 +1,12 @@
|
|||||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||||
; XUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||||
|
; RUN: not llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cedar -verify-machineinstrs < %s
|
||||||
|
; RUN: not llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=juniper -verify-machineinstrs < %s
|
||||||
|
; RUN: not llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood -verify-machineinstrs < %s
|
||||||
|
; RUN: not llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=sumo -verify-machineinstrs < %s
|
||||||
|
; RUN: not llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=barts -verify-machineinstrs < %s
|
||||||
|
; RUN: not llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=caicos -verify-machineinstrs < %s
|
||||||
|
; RUN: not llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=turks -verify-machineinstrs < %s
|
||||||
|
|
||||||
declare float @llvm.fma.f32(float, float, float) nounwind readnone
|
declare float @llvm.fma.f32(float, float, float) nounwind readnone
|
||||||
declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone
|
declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone
|
||||||
|
Loading…
Reference in New Issue
Block a user