mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-26 22:34:39 +00:00
PTX: Re-work target sm/compute selection and add some basic GPU
targets: g80, gt200, gf100(fermi) llvm-svn: 133799
This commit is contained in:
parent
701cdf4f09
commit
5e20d4dbfc
@ -30,31 +30,51 @@ def FeatureNoFMA : SubtargetFeature<"no-fma","SupportsFMA", "false",
|
||||
//===- PTX Version --------------------------------------------------------===//
|
||||
|
||||
def FeaturePTX20 : SubtargetFeature<"ptx20", "PTXVersion", "PTX_VERSION_2_0",
|
||||
"Use PTX Language Version 2.0",
|
||||
[]>;
|
||||
"Use PTX Language Version 2.0">;
|
||||
|
||||
def FeaturePTX21 : SubtargetFeature<"ptx21", "PTXVersion", "PTX_VERSION_2_1",
|
||||
"Use PTX Language Version 2.1",
|
||||
[FeaturePTX20]>;
|
||||
"Use PTX Language Version 2.1">;
|
||||
|
||||
def FeaturePTX22 : SubtargetFeature<"ptx22", "PTXVersion", "PTX_VERSION_2_2",
|
||||
"Use PTX Language Version 2.2",
|
||||
[FeaturePTX21]>;
|
||||
"Use PTX Language Version 2.2">;
|
||||
|
||||
def FeaturePTX23 : SubtargetFeature<"ptx23", "PTXVersion", "PTX_VERSION_2_3",
|
||||
"Use PTX Language Version 2.3",
|
||||
[FeaturePTX22]>;
|
||||
"Use PTX Language Version 2.3">;
|
||||
|
||||
//===- PTX Shader Model ---------------------------------------------------===//
|
||||
//===- PTX Target ---------------------------------------------------------===//
|
||||
|
||||
def FeatureSM10 : SubtargetFeature<"sm10", "PTXShaderModel", "PTX_SM_1_0",
|
||||
"Enable Shader Model 1.0 compliance">;
|
||||
def FeatureSM13 : SubtargetFeature<"sm13", "PTXShaderModel", "PTX_SM_1_3",
|
||||
"Enable Shader Model 1.3 compliance",
|
||||
[FeatureSM10, FeatureDouble]>;
|
||||
def FeatureSM20 : SubtargetFeature<"sm20", "PTXShaderModel", "PTX_SM_2_0",
|
||||
"Enable Shader Model 2.0 compliance",
|
||||
[FeatureSM13]>;
|
||||
def FeatureSM10 : SubtargetFeature<"sm10", "PTXTarget", "PTX_SM_1_0",
|
||||
"Use Shader Model 1.0">;
|
||||
def FeatureSM11 : SubtargetFeature<"sm11", "PTXTarget", "PTX_SM_1_1",
|
||||
"Use Shader Model 1.1">;
|
||||
def FeatureSM12 : SubtargetFeature<"sm12", "PTXTarget", "PTX_SM_1_2",
|
||||
"Use Shader Model 1.2">;
|
||||
def FeatureSM13 : SubtargetFeature<"sm13", "PTXTarget", "PTX_SM_1_3",
|
||||
"Use Shader Model 1.3">;
|
||||
def FeatureSM20 : SubtargetFeature<"sm20", "PTXTarget", "PTX_SM_2_0",
|
||||
"Use Shader Model 2.0">;
|
||||
def FeatureSM21 : SubtargetFeature<"sm21", "PTXTarget", "PTX_SM_2_1",
|
||||
"Use Shader Model 2.1">;
|
||||
def FeatureSM22 : SubtargetFeature<"sm22", "PTXTarget", "PTX_SM_2_2",
|
||||
"Use Shader Model 2.2">;
|
||||
def FeatureSM23 : SubtargetFeature<"sm23", "PTXTarget", "PTX_SM_2_3",
|
||||
"Use Shader Model 2.3">;
|
||||
|
||||
def FeatureCOMPUTE10 : SubtargetFeature<"compute10", "PTXTarget",
|
||||
"PTX_COMPUTE_1_0",
|
||||
"Use Compute Compatibility 1.0">;
|
||||
def FeatureCOMPUTE11 : SubtargetFeature<"compute11", "PTXTarget",
|
||||
"PTX_COMPUTE_1_1",
|
||||
"Use Compute Compatibility 1.1">;
|
||||
def FeatureCOMPUTE12 : SubtargetFeature<"compute12", "PTXTarget",
|
||||
"PTX_COMPUTE_1_2",
|
||||
"Use Compute Compatibility 1.2">;
|
||||
def FeatureCOMPUTE13 : SubtargetFeature<"compute13", "PTXTarget",
|
||||
"PTX_COMPUTE_1_3",
|
||||
"Use Compute Compatibility 1.3">;
|
||||
def FeatureCOMPUTE20 : SubtargetFeature<"compute20", "PTXTarget",
|
||||
"PTX_COMPUTE_2_0",
|
||||
"Use Compute Compatibility 2.0">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// PTX supported processors
|
||||
@ -65,6 +85,27 @@ class Proc<string Name, list<SubtargetFeature> Features>
|
||||
|
||||
def : Proc<"generic", []>;
|
||||
|
||||
// Processor definitions for compute/shader models
|
||||
def : Proc<"compute_10", [FeatureCOMPUTE10]>;
|
||||
def : Proc<"compute_11", [FeatureCOMPUTE11]>;
|
||||
def : Proc<"compute_12", [FeatureCOMPUTE12]>;
|
||||
def : Proc<"compute_13", [FeatureCOMPUTE13]>;
|
||||
def : Proc<"compute_20", [FeatureCOMPUTE20]>;
|
||||
def : Proc<"sm_10", [FeatureSM10]>;
|
||||
def : Proc<"sm_11", [FeatureSM11]>;
|
||||
def : Proc<"sm_12", [FeatureSM12]>;
|
||||
def : Proc<"sm_13", [FeatureSM13]>;
|
||||
def : Proc<"sm_20", [FeatureSM20]>;
|
||||
def : Proc<"sm_21", [FeatureSM21]>;
|
||||
def : Proc<"sm_22", [FeatureSM22]>;
|
||||
def : Proc<"sm_23", [FeatureSM23]>;
|
||||
|
||||
// Processor definitions for common GPU architectures
|
||||
def : Proc<"g80", [FeatureSM10]>;
|
||||
def : Proc<"gt200", [FeatureSM13]>;
|
||||
def : Proc<"gf100", [FeatureSM20, FeatureDouble]>;
|
||||
def : Proc<"fermi", [FeatureSM20, FeatureDouble]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Register File Description
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -462,7 +462,7 @@ void PTXAsmPrinter::EmitFunctionDeclaration() {
|
||||
if (i != b) {
|
||||
decl += ", ";
|
||||
}
|
||||
if (isKernel || ST.getShaderModel() >= PTXSubtarget::PTX_SM_2_0) {
|
||||
if (isKernel || ST.useParamSpaceForDeviceArgs()) {
|
||||
decl += ".param .b";
|
||||
decl += utostr(*i);
|
||||
decl += " ";
|
||||
|
@ -213,7 +213,7 @@ SDValue PTXTargetLowering::
|
||||
// We do one of two things here:
|
||||
// IsKernel || SM >= 2.0 -> Use param space for arguments
|
||||
// SM < 2.0 -> Use registers for arguments
|
||||
if (MFI->isKernel() || ST.getShaderModel() >= PTXSubtarget::PTX_SM_2_0) {
|
||||
if (MFI->isKernel() || ST.useParamSpaceForDeviceArgs()) {
|
||||
// We just need to emit the proper LOAD_PARAM ISDs
|
||||
for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
|
||||
|
||||
|
@ -26,10 +26,10 @@ def Use32BitAddresses : Predicate<"!getSubtarget().is64Bit()">;
|
||||
def Use64BitAddresses : Predicate<"getSubtarget().is64Bit()">;
|
||||
|
||||
// Shader Model Support
|
||||
def SupportsSM13 : Predicate<"getSubtarget().supportsSM13()">;
|
||||
def DoesNotSupportSM13 : Predicate<"!getSubtarget().supportsSM13()">;
|
||||
def SupportsSM20 : Predicate<"getSubtarget().supportsSM20()">;
|
||||
def DoesNotSupportSM20 : Predicate<"!getSubtarget().supportsSM20()">;
|
||||
def FDivNeedsRoundingMode : Predicate<"getSubtarget().fdivNeedsRoundingMode()">;
|
||||
def FDivNoRoundingMode : Predicate<"!getSubtarget().fdivNeedsRoundingMode()">;
|
||||
def FMadNeedsRoundingMode : Predicate<"getSubtarget().fmadNeedsRoundingMode()">;
|
||||
def FMadNoRoundingMode : Predicate<"!getSubtarget().fmadNeedsRoundingMode()">;
|
||||
|
||||
// PTX Version Support
|
||||
def SupportsPTX21 : Predicate<"getSubtarget().supportsPTX21()">;
|
||||
@ -613,43 +613,43 @@ def FDIVrr32SM13 : InstPTX<(outs RegF32:$d),
|
||||
(ins RegF32:$a, RegF32:$b),
|
||||
"div.rn.f32\t$d, $a, $b",
|
||||
[(set RegF32:$d, (fdiv RegF32:$a, RegF32:$b))]>,
|
||||
Requires<[SupportsSM13]>;
|
||||
Requires<[FDivNeedsRoundingMode]>;
|
||||
def FDIVri32SM13 : InstPTX<(outs RegF32:$d),
|
||||
(ins RegF32:$a, f32imm:$b),
|
||||
"div.rn.f32\t$d, $a, $b",
|
||||
[(set RegF32:$d, (fdiv RegF32:$a, fpimm:$b))]>,
|
||||
Requires<[SupportsSM13]>;
|
||||
Requires<[FDivNeedsRoundingMode]>;
|
||||
def FDIVrr32SM10 : InstPTX<(outs RegF32:$d),
|
||||
(ins RegF32:$a, RegF32:$b),
|
||||
"div.f32\t$d, $a, $b",
|
||||
[(set RegF32:$d, (fdiv RegF32:$a, RegF32:$b))]>,
|
||||
Requires<[DoesNotSupportSM13]>;
|
||||
Requires<[FDivNoRoundingMode]>;
|
||||
def FDIVri32SM10 : InstPTX<(outs RegF32:$d),
|
||||
(ins RegF32:$a, f32imm:$b),
|
||||
"div.f32\t$d, $a, $b",
|
||||
[(set RegF32:$d, (fdiv RegF32:$a, fpimm:$b))]>,
|
||||
Requires<[DoesNotSupportSM13]>;
|
||||
Requires<[FDivNoRoundingMode]>;
|
||||
|
||||
def FDIVrr64SM13 : InstPTX<(outs RegF64:$d),
|
||||
(ins RegF64:$a, RegF64:$b),
|
||||
"div.rn.f64\t$d, $a, $b",
|
||||
[(set RegF64:$d, (fdiv RegF64:$a, RegF64:$b))]>,
|
||||
Requires<[SupportsSM13]>;
|
||||
Requires<[FDivNeedsRoundingMode]>;
|
||||
def FDIVri64SM13 : InstPTX<(outs RegF64:$d),
|
||||
(ins RegF64:$a, f64imm:$b),
|
||||
"div.rn.f64\t$d, $a, $b",
|
||||
[(set RegF64:$d, (fdiv RegF64:$a, fpimm:$b))]>,
|
||||
Requires<[SupportsSM13]>;
|
||||
Requires<[FDivNeedsRoundingMode]>;
|
||||
def FDIVrr64SM10 : InstPTX<(outs RegF64:$d),
|
||||
(ins RegF64:$a, RegF64:$b),
|
||||
"div.f64\t$d, $a, $b",
|
||||
[(set RegF64:$d, (fdiv RegF64:$a, RegF64:$b))]>,
|
||||
Requires<[DoesNotSupportSM13]>;
|
||||
Requires<[FDivNoRoundingMode]>;
|
||||
def FDIVri64SM10 : InstPTX<(outs RegF64:$d),
|
||||
(ins RegF64:$a, f64imm:$b),
|
||||
"div.f64\t$d, $a, $b",
|
||||
[(set RegF64:$d, (fdiv RegF64:$a, fpimm:$b))]>,
|
||||
Requires<[DoesNotSupportSM13]>;
|
||||
Requires<[FDivNoRoundingMode]>;
|
||||
|
||||
|
||||
|
||||
@ -661,8 +661,10 @@ def FDIVri64SM10 : InstPTX<(outs RegF64:$d),
|
||||
// In the short term, mad is supported on all PTX versions and we use a
|
||||
// default rounding mode no matter what shader model or PTX version.
|
||||
// TODO: Allow the rounding mode to be selectable through llc.
|
||||
defm FMADSM13 : PTX_FLOAT_4OP<"mad.rn", fmul, fadd>, Requires<[SupportsSM13, SupportsFMA]>;
|
||||
defm FMAD : PTX_FLOAT_4OP<"mad", fmul, fadd>, Requires<[DoesNotSupportSM13, SupportsFMA]>;
|
||||
defm FMADSM13 : PTX_FLOAT_4OP<"mad.rn", fmul, fadd>,
|
||||
Requires<[FMadNeedsRoundingMode, SupportsFMA]>;
|
||||
defm FMAD : PTX_FLOAT_4OP<"mad", fmul, fadd>,
|
||||
Requires<[FMadNoRoundingMode, SupportsFMA]>;
|
||||
|
||||
///===- Floating-Point Intrinsic Instructions -----------------------------===//
|
||||
|
||||
|
@ -18,21 +18,31 @@ using namespace llvm;
|
||||
|
||||
PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &FS,
|
||||
bool is64Bit)
|
||||
: PTXShaderModel(PTX_SM_1_0),
|
||||
: PTXTarget(PTX_COMPUTE_1_0),
|
||||
PTXVersion(PTX_VERSION_2_0),
|
||||
SupportsDouble(false),
|
||||
SupportsFMA(true),
|
||||
Is64Bit(is64Bit) {
|
||||
Is64Bit(is64Bit) {
|
||||
std::string TARGET = "generic";
|
||||
ParseSubtargetFeatures(FS, TARGET);
|
||||
}
|
||||
|
||||
std::string PTXSubtarget::getTargetString() const {
|
||||
switch(PTXShaderModel) {
|
||||
default: llvm_unreachable("Unknown shader model");
|
||||
switch(PTXTarget) {
|
||||
default: llvm_unreachable("Unknown PTX target");
|
||||
case PTX_SM_1_0: return "sm_10";
|
||||
case PTX_SM_1_1: return "sm_11";
|
||||
case PTX_SM_1_2: return "sm_12";
|
||||
case PTX_SM_1_3: return "sm_13";
|
||||
case PTX_SM_2_0: return "sm_20";
|
||||
case PTX_SM_2_1: return "sm_21";
|
||||
case PTX_SM_2_2: return "sm_22";
|
||||
case PTX_SM_2_3: return "sm_23";
|
||||
case PTX_COMPUTE_1_0: return "compute_10";
|
||||
case PTX_COMPUTE_1_1: return "compute_11";
|
||||
case PTX_COMPUTE_1_2: return "compute_12";
|
||||
case PTX_COMPUTE_1_3: return "compute_13";
|
||||
case PTX_COMPUTE_2_0: return "compute_20";
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -23,10 +23,23 @@ namespace llvm {
|
||||
/**
|
||||
* Enumeration of Shader Models supported by the back-end.
|
||||
*/
|
||||
enum PTXShaderModelEnum {
|
||||
enum PTXTargetEnum {
|
||||
PTX_COMPUTE_1_0, /*< Compute Compatibility 1.0 */
|
||||
PTX_COMPUTE_1_1, /*< Compute Compatibility 1.1 */
|
||||
PTX_COMPUTE_1_2, /*< Compute Compatibility 1.2 */
|
||||
PTX_COMPUTE_1_3, /*< Compute Compatibility 1.3 */
|
||||
PTX_COMPUTE_2_0, /*< Compute Compatibility 2.0 */
|
||||
PTX_LAST_COMPUTE,
|
||||
|
||||
PTX_SM_1_0, /*< Shader Model 1.0 */
|
||||
PTX_SM_1_1, /*< Shader Model 1.1 */
|
||||
PTX_SM_1_2, /*< Shader Model 1.2 */
|
||||
PTX_SM_1_3, /*< Shader Model 1.3 */
|
||||
PTX_SM_2_0 /*< Shader Model 2.0 */
|
||||
PTX_SM_2_0, /*< Shader Model 2.0 */
|
||||
PTX_SM_2_1, /*< Shader Model 2.1 */
|
||||
PTX_SM_2_2, /*< Shader Model 2.2 */
|
||||
PTX_SM_2_3, /*< Shader Model 2.3 */
|
||||
PTX_LAST_SM
|
||||
};
|
||||
|
||||
/**
|
||||
@ -44,7 +57,7 @@ namespace llvm {
|
||||
private:
|
||||
|
||||
/// Shader Model supported on the target GPU.
|
||||
PTXShaderModelEnum PTXShaderModel;
|
||||
PTXTargetEnum PTXTarget;
|
||||
|
||||
/// PTX Language Version.
|
||||
PTXVersionEnum PTXVersion;
|
||||
@ -74,18 +87,26 @@ namespace llvm {
|
||||
|
||||
bool supportsFMA() const { return SupportsFMA; }
|
||||
|
||||
bool supportsSM13() const { return PTXShaderModel >= PTX_SM_1_3; }
|
||||
|
||||
bool supportsSM20() const { return PTXShaderModel >= PTX_SM_2_0; }
|
||||
|
||||
bool supportsPTX21() const { return PTXVersion >= PTX_VERSION_2_1; }
|
||||
|
||||
bool supportsPTX22() const { return PTXVersion >= PTX_VERSION_2_2; }
|
||||
|
||||
bool supportsPTX23() const { return PTXVersion >= PTX_VERSION_2_3; }
|
||||
|
||||
PTXShaderModelEnum getShaderModel() const { return PTXShaderModel; }
|
||||
bool fdivNeedsRoundingMode() const {
|
||||
return (PTXTarget >= PTX_SM_1_3 && PTXTarget < PTX_LAST_SM) ||
|
||||
(PTXTarget >= PTX_COMPUTE_1_3 && PTXTarget < PTX_LAST_COMPUTE);
|
||||
}
|
||||
|
||||
bool fmadNeedsRoundingMode() const {
|
||||
return (PTXTarget >= PTX_SM_1_3 && PTXTarget < PTX_LAST_SM) ||
|
||||
(PTXTarget >= PTX_COMPUTE_1_3 && PTXTarget < PTX_LAST_COMPUTE);
|
||||
}
|
||||
|
||||
bool useParamSpaceForDeviceArgs() const {
|
||||
return (PTXTarget >= PTX_SM_2_0 && PTXTarget < PTX_LAST_SM) ||
|
||||
(PTXTarget >= PTX_COMPUTE_2_0 && PTXTarget < PTX_LAST_COMPUTE);
|
||||
}
|
||||
|
||||
std::string ParseSubtargetFeatures(const std::string &FS,
|
||||
const std::string &CPU);
|
||||
|
Loading…
x
Reference in New Issue
Block a user