mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-01-19 14:44:39 +00:00
[AMDGPU] Introduce RC flags for vector register classes
Configure and use the TSFlags in TargetRegisterClass to have unique flags for VGPR and AGPR register classes. The vector register class queries like `hasVGPRs` will now become more efficient with just a bitwise operation. Reviewed By: rampitec Differential Revision: https://reviews.llvm.org/D108815
This commit is contained in:
parent
5eaebd5d64
commit
4dab15288d
@ -14,6 +14,13 @@
|
||||
|
||||
namespace llvm {
|
||||
|
||||
// This needs to be kept in sync with the field bits in SIRegisterClass.
|
||||
enum SIRCFlags : uint8_t {
|
||||
// For vector registers.
|
||||
HasVGPR = 1 << 0,
|
||||
HasAGPR = 1 << 1
|
||||
}; // enum SIRCFlags
|
||||
|
||||
namespace SIInstrFlags {
|
||||
// This needs to be kept in sync with the field bits in InstSI.
|
||||
enum : uint64_t {
|
||||
|
@ -2166,32 +2166,12 @@ bool SIRegisterInfo::isSGPRReg(const MachineRegisterInfo &MRI,
|
||||
return isSGPRClass(RC);
|
||||
}
|
||||
|
||||
// TODO: It might be helpful to have some target specific flags in
|
||||
// TargetRegisterClass to mark which classes are VGPRs to make this trivial.
|
||||
bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const {
|
||||
unsigned Size = getRegSizeInBits(*RC);
|
||||
if (Size == 16) {
|
||||
return getCommonSubClass(&AMDGPU::VGPR_LO16RegClass, RC) != nullptr ||
|
||||
getCommonSubClass(&AMDGPU::VGPR_HI16RegClass, RC) != nullptr;
|
||||
}
|
||||
const TargetRegisterClass *VRC = getVGPRClassForBitWidth(Size);
|
||||
if (!VRC) {
|
||||
assert(Size < 32 && "Invalid register class size");
|
||||
return false;
|
||||
}
|
||||
return getCommonSubClass(VRC, RC) != nullptr;
|
||||
return RC->TSFlags & SIRCFlags::HasVGPR;
|
||||
}
|
||||
|
||||
bool SIRegisterInfo::hasAGPRs(const TargetRegisterClass *RC) const {
|
||||
unsigned Size = getRegSizeInBits(*RC);
|
||||
if (Size < 16)
|
||||
return false;
|
||||
const TargetRegisterClass *ARC = getAGPRClassForBitWidth(Size);
|
||||
if (!ARC) {
|
||||
assert(getVGPRClassForBitWidth(Size) && "Invalid register class size");
|
||||
return false;
|
||||
}
|
||||
return getCommonSubClass(ARC, RC) != nullptr;
|
||||
return RC->TSFlags & SIRCFlags::HasAGPR;
|
||||
}
|
||||
|
||||
const TargetRegisterClass *
|
||||
@ -2335,7 +2315,7 @@ bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI,
|
||||
Register Reg) const {
|
||||
const TargetRegisterClass *RC = getRegClassForReg(MRI, Reg);
|
||||
// Registers without classes are unaddressable, SGPR-like registers.
|
||||
return RC && hasVGPRs(RC);
|
||||
return RC && isVGPRClass(RC);
|
||||
}
|
||||
|
||||
bool SIRegisterInfo::isAGPR(const MachineRegisterInfo &MRI,
|
||||
@ -2343,7 +2323,7 @@ bool SIRegisterInfo::isAGPR(const MachineRegisterInfo &MRI,
|
||||
const TargetRegisterClass *RC = getRegClassForReg(MRI, Reg);
|
||||
|
||||
// Registers without classes are unaddressable, SGPR-like registers.
|
||||
return RC && hasAGPRs(RC);
|
||||
return RC && isAGPRClass(RC);
|
||||
}
|
||||
|
||||
bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI,
|
||||
|
@ -168,6 +168,11 @@ public:
|
||||
|
||||
bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const;
|
||||
|
||||
/// \returns true if this class contains only VGPR registers
|
||||
bool isVGPRClass(const TargetRegisterClass *RC) const {
|
||||
return hasVGPRs(RC) && !hasAGPRs(RC);
|
||||
}
|
||||
|
||||
/// \returns true if this class contains only AGPR registers
|
||||
bool isAGPRClass(const TargetRegisterClass *RC) const {
|
||||
return hasAGPRs(RC) && !hasVGPRs(RC);
|
||||
|
@ -130,6 +130,18 @@ class SIRegWithSubRegs <string n, list<Register> subregs, bits<16> regIdx> :
|
||||
RegisterWithSubRegs<n, subregs> {
|
||||
}
|
||||
|
||||
// For register classes that use TSFlags.
|
||||
class SIRegisterClass <string n, list<ValueType> rTypes, int Align, dag rList>
|
||||
: RegisterClass <n, rTypes, Align, rList> {
|
||||
// For vector register classes.
|
||||
field bit HasVGPR = 0;
|
||||
field bit HasAGPR = 0;
|
||||
|
||||
// These need to be kept in sync with the enum SIRCFlags.
|
||||
let TSFlags{0} = HasVGPR;
|
||||
let TSFlags{1} = HasAGPR;
|
||||
}
|
||||
|
||||
multiclass SIRegLoHi16 <string n, bits<16> regIdx, bit ArtificialHigh = 1,
|
||||
bit HWEncodingHigh = 0> {
|
||||
// There is no special encoding for 16 bit subregs, these are not real
|
||||
@ -490,14 +502,15 @@ class RegisterTypes<list<ValueType> reg_types> {
|
||||
def Reg16Types : RegisterTypes<[i16, f16]>;
|
||||
def Reg32Types : RegisterTypes<[i32, f32, v2i16, v2f16, p2, p3, p5, p6]>;
|
||||
|
||||
def VGPR_LO16 : RegisterClass<"AMDGPU", Reg16Types.types, 16,
|
||||
let HasVGPR = 1 in {
|
||||
def VGPR_LO16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
|
||||
(add (sequence "VGPR%u_LO16", 0, 255))> {
|
||||
let AllocationPriority = 1;
|
||||
let Size = 16;
|
||||
let GeneratePressureSet = 0;
|
||||
}
|
||||
|
||||
def VGPR_HI16 : RegisterClass<"AMDGPU", Reg16Types.types, 16,
|
||||
def VGPR_HI16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
|
||||
(add (sequence "VGPR%u_HI16", 0, 255))> {
|
||||
let AllocationPriority = 1;
|
||||
let Size = 16;
|
||||
@ -506,12 +519,13 @@ def VGPR_HI16 : RegisterClass<"AMDGPU", Reg16Types.types, 16,
|
||||
|
||||
// VGPR 32-bit registers
|
||||
// i16/f16 only on VI+
|
||||
def VGPR_32 : RegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg16Types.types), 32,
|
||||
def VGPR_32 : SIRegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg16Types.types), 32,
|
||||
(add (sequence "VGPR%u", 0, 255))> {
|
||||
let AllocationPriority = 1;
|
||||
let Size = 32;
|
||||
let Weight = 1;
|
||||
}
|
||||
} // End HasVGPR = 1
|
||||
|
||||
// VGPR 64-bit registers
|
||||
def VGPR_64 : SIRegisterTuples<getSubRegs<2>.ret, VGPR_32, 255, 1, 2, "v">;
|
||||
@ -540,7 +554,8 @@ def VGPR_512 : SIRegisterTuples<getSubRegs<16>.ret, VGPR_32, 255, 1, 16, "v">;
|
||||
// VGPR 1024-bit registers
|
||||
def VGPR_1024 : SIRegisterTuples<getSubRegs<32>.ret, VGPR_32, 255, 1, 32, "v">;
|
||||
|
||||
def AGPR_LO16 : RegisterClass<"AMDGPU", Reg16Types.types, 16,
|
||||
let HasAGPR = 1 in {
|
||||
def AGPR_LO16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
|
||||
(add (sequence "AGPR%u_LO16", 0, 255))> {
|
||||
let isAllocatable = 0;
|
||||
let Size = 16;
|
||||
@ -548,12 +563,13 @@ def AGPR_LO16 : RegisterClass<"AMDGPU", Reg16Types.types, 16,
|
||||
}
|
||||
|
||||
// AccVGPR 32-bit registers
|
||||
def AGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
|
||||
def AGPR_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
|
||||
(add (sequence "AGPR%u", 0, 255))> {
|
||||
let AllocationPriority = 1;
|
||||
let Size = 32;
|
||||
let Weight = 1;
|
||||
}
|
||||
} // End HasAGPR = 1
|
||||
|
||||
// AGPR 64-bit registers
|
||||
def AGPR_64 : SIRegisterTuples<getSubRegs<2>.ret, AGPR_32, 255, 1, 2, "a">;
|
||||
@ -748,14 +764,15 @@ defm "" : SRegClass<8, 19, [v8i32, v8f32, v4i64, v4f64], SGPR_256Regs, TTMP_256R
|
||||
defm "" : SRegClass<16, 20, [v16i32, v16f32, v8i64, v8f64], SGPR_512Regs, TTMP_512Regs>;
|
||||
defm "" : SRegClass<32, 21, [v32i32, v32f32, v16i64, v16f64], SGPR_1024Regs>;
|
||||
|
||||
def VRegOrLds_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
|
||||
def VRegOrLds_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
|
||||
(add VGPR_32, LDS_DIRECT_CLASS)> {
|
||||
let isAllocatable = 0;
|
||||
let HasVGPR = 1;
|
||||
}
|
||||
|
||||
// Register class for all vector registers (VGPRs + Interpolation Registers)
|
||||
class VRegClassBase<int numRegs, list<ValueType> regTypes, dag regList> :
|
||||
RegisterClass<"AMDGPU", regTypes, 32, regList> {
|
||||
SIRegisterClass<"AMDGPU", regTypes, 32, regList> {
|
||||
let Size = !mul(numRegs, 32);
|
||||
|
||||
// Requires n v_mov_b32 to copy
|
||||
@ -767,11 +784,13 @@ class VRegClassBase<int numRegs, list<ValueType> regTypes, dag regList> :
|
||||
// Define a register tuple class, along with one requiring an even
|
||||
// aligned base register.
|
||||
multiclass VRegClass<int numRegs, list<ValueType> regTypes, dag regList> {
|
||||
// Define the regular class.
|
||||
def "" : VRegClassBase<numRegs, regTypes, regList>;
|
||||
let HasVGPR = 1 in {
|
||||
// Define the regular class.
|
||||
def "" : VRegClassBase<numRegs, regTypes, regList>;
|
||||
|
||||
// Define 2-aligned variant
|
||||
def _Align2 : VRegClassBase<numRegs, regTypes, (decimate regList, 2)>;
|
||||
// Define 2-aligned variant
|
||||
def _Align2 : VRegClassBase<numRegs, regTypes, (decimate regList, 2)>;
|
||||
}
|
||||
}
|
||||
|
||||
defm VReg_64 : VRegClass<2, [i64, f64, v2i32, v2f32, v4f16, v4i16, p0, p1, p4],
|
||||
@ -787,7 +806,7 @@ defm VReg_512 : VRegClass<16, [v16i32, v16f32, v8i64, v8f64], (add VGPR_512)>;
|
||||
defm VReg_1024 : VRegClass<32, [v32i32, v32f32, v16i64, v16f64], (add VGPR_1024)>;
|
||||
|
||||
multiclass ARegClass<int numRegs, list<ValueType> regTypes, dag regList> {
|
||||
let CopyCost = !add(numRegs, numRegs, 1) in {
|
||||
let CopyCost = !add(numRegs, numRegs, 1), HasAGPR = 1 in {
|
||||
// Define the regular class.
|
||||
def "" : VRegClassBase<numRegs, regTypes, regList>;
|
||||
|
||||
@ -823,44 +842,53 @@ let GeneratePressureSet = 0 in {
|
||||
// on an empty register set, but also sorts register classes based on
|
||||
// the number of registerss in them. Add only one register so this is
|
||||
// sorted to the end and not preferred over VGPR_32.
|
||||
def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add ARTIFICIAL_VGPR)> {
|
||||
def VReg_1 : SIRegisterClass<"AMDGPU", [i1], 32, (add ARTIFICIAL_VGPR)> {
|
||||
let Size = 1;
|
||||
let HasVGPR = 1;
|
||||
}
|
||||
|
||||
def VS_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
|
||||
def VS_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
|
||||
(add VGPR_32, SReg_32, LDS_DIRECT_CLASS)> {
|
||||
let isAllocatable = 0;
|
||||
let HasVGPR = 1;
|
||||
}
|
||||
|
||||
def VS_64 : RegisterClass<"AMDGPU", [i64, f64, v2f32], 32, (add VReg_64, SReg_64)> {
|
||||
def VS_64 : SIRegisterClass<"AMDGPU", [i64, f64, v2f32], 32, (add VReg_64, SReg_64)> {
|
||||
let isAllocatable = 0;
|
||||
let HasVGPR = 1;
|
||||
}
|
||||
|
||||
def AV_32 : RegisterClass<"AMDGPU", VGPR_32.RegTypes, 32,
|
||||
def AV_32 : SIRegisterClass<"AMDGPU", VGPR_32.RegTypes, 32,
|
||||
(add AGPR_32, VGPR_32)> {
|
||||
let isAllocatable = 0;
|
||||
let HasVGPR = 1;
|
||||
let HasAGPR = 1;
|
||||
}
|
||||
|
||||
def AV_64 : RegisterClass<"AMDGPU", VReg_64.RegTypes, 32,
|
||||
def AV_64 : SIRegisterClass<"AMDGPU", VReg_64.RegTypes, 32,
|
||||
(add AReg_64, VReg_64)> {
|
||||
let isAllocatable = 0;
|
||||
let HasVGPR = 1;
|
||||
let HasAGPR = 1;
|
||||
}
|
||||
} // End GeneratePressureSet = 0
|
||||
|
||||
def AV_96 : RegisterClass<"AMDGPU", VReg_96.RegTypes, 32,
|
||||
let HasVGPR = 1, HasAGPR = 1 in {
|
||||
def AV_96 : SIRegisterClass<"AMDGPU", VReg_96.RegTypes, 32,
|
||||
(add AReg_96, VReg_96)> {
|
||||
let isAllocatable = 0;
|
||||
}
|
||||
|
||||
def AV_128 : RegisterClass<"AMDGPU", VReg_128.RegTypes, 32,
|
||||
def AV_128 : SIRegisterClass<"AMDGPU", VReg_128.RegTypes, 32,
|
||||
(add AReg_128, VReg_128)> {
|
||||
let isAllocatable = 0;
|
||||
}
|
||||
|
||||
def AV_160 : RegisterClass<"AMDGPU", VReg_160.RegTypes, 32,
|
||||
def AV_160 : SIRegisterClass<"AMDGPU", VReg_160.RegTypes, 32,
|
||||
(add AReg_160, VReg_160)> {
|
||||
let isAllocatable = 0;
|
||||
}
|
||||
} // End HasVGPR = 1, HasAGPR = 1
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Register operands
|
||||
|
Loading…
x
Reference in New Issue
Block a user