mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-01-14 12:12:07 +00:00
AMDGPU/GlobalISel: Add AGPR bank and RegBankSelect mfma intrinsics
Differential Revision: https://reviews.llvm.org/D70871
This commit is contained in:
parent
259ca0418e
commit
cfbbdc83b4
@ -32,7 +32,13 @@ enum PartialMappingIdx {
|
||||
PM_VGPR512 = 22,
|
||||
PM_VGPR1024 = 23,
|
||||
PM_SGPR96 = 24,
|
||||
PM_VGPR96 = 25
|
||||
PM_VGPR96 = 25,
|
||||
PM_AGPR96 = 26,
|
||||
PM_AGPR32 = 32,
|
||||
PM_AGPR64 = 33,
|
||||
PM_AGPR128 = 34,
|
||||
PM_AGPR512 = 36,
|
||||
PM_AGPR1024 = 37
|
||||
};
|
||||
|
||||
const RegisterBankInfo::PartialMapping PartMappings[] {
|
||||
@ -58,7 +64,14 @@ const RegisterBankInfo::PartialMapping PartMappings[] {
|
||||
{0, 512, VGPRRegBank},
|
||||
{0, 1024, VGPRRegBank},
|
||||
{0, 96, SGPRRegBank},
|
||||
{0, 96, VGPRRegBank}
|
||||
{0, 96, VGPRRegBank},
|
||||
{0, 96, AGPRRegBank},
|
||||
|
||||
{0, 32, AGPRRegBank}, // AGPR begin
|
||||
{0, 64, AGPRRegBank},
|
||||
{0, 128, AGPRRegBank},
|
||||
{0, 512, AGPRRegBank},
|
||||
{0, 1024, AGPRRegBank}
|
||||
};
|
||||
|
||||
const RegisterBankInfo::ValueMapping ValMappings[] {
|
||||
@ -94,7 +107,21 @@ const RegisterBankInfo::ValueMapping ValMappings[] {
|
||||
{&PartMappings[16], 1}, // 512
|
||||
{&PartMappings[17], 1}, // 1024
|
||||
{&PartMappings[18], 1},
|
||||
{&PartMappings[19], 1}
|
||||
{&PartMappings[19], 1},
|
||||
{&PartMappings[20], 1},
|
||||
|
||||
// AGPRs
|
||||
{nullptr, 0},
|
||||
{nullptr, 0},
|
||||
{nullptr, 0},
|
||||
{nullptr, 0},
|
||||
{nullptr, 0},
|
||||
{&PartMappings[21], 1}, // 32
|
||||
{&PartMappings[22], 1}, // 64
|
||||
{&PartMappings[23], 1}, // 128
|
||||
{nullptr, 0},
|
||||
{&PartMappings[24], 1}, // 512
|
||||
{&PartMappings[25], 1} // 1024
|
||||
};
|
||||
|
||||
const RegisterBankInfo::PartialMapping SGPROnly64BreakDown[] {
|
||||
@ -122,7 +149,8 @@ const RegisterBankInfo::ValueMapping ValMappingsSGPR64OnlyVGPR32[] {
|
||||
enum ValueMappingIdx {
|
||||
SCCStartIdx = 0,
|
||||
SGPRStartIdx = 2,
|
||||
VGPRStartIdx = 13
|
||||
VGPRStartIdx = 13,
|
||||
AGPRStartIdx = 27
|
||||
};
|
||||
|
||||
const RegisterBankInfo::ValueMapping *getValueMapping(unsigned BankID,
|
||||
@ -139,12 +167,32 @@ const RegisterBankInfo::ValueMapping *getValueMapping(unsigned BankID,
|
||||
Idx = BankID == AMDGPU::SGPRRegBankID ? PM_SGPR1 : PM_VGPR1;
|
||||
break;
|
||||
case 96:
|
||||
assert(BankID != AMDGPU::VCCRegBankID);
|
||||
Idx = BankID == AMDGPU::SGPRRegBankID ? PM_SGPR96 : PM_VGPR96;
|
||||
switch (BankID) {
|
||||
case AMDGPU::VGPRRegBankID:
|
||||
Idx = PM_VGPR96;
|
||||
break;
|
||||
case AMDGPU::SGPRRegBankID:
|
||||
Idx = PM_SGPR96;
|
||||
break;
|
||||
case AMDGPU::AGPRRegBankID:
|
||||
Idx = PM_AGPR96;
|
||||
break;
|
||||
default: llvm_unreachable("Invalid register bank");
|
||||
}
|
||||
break;
|
||||
default:
|
||||
assert(BankID != AMDGPU::VCCRegBankID);
|
||||
Idx = BankID == AMDGPU::VGPRRegBankID ? VGPRStartIdx : SGPRStartIdx;
|
||||
switch (BankID) {
|
||||
case AMDGPU::VGPRRegBankID:
|
||||
Idx = VGPRStartIdx;
|
||||
break;
|
||||
case AMDGPU::SGPRRegBankID:
|
||||
Idx = SGPRStartIdx;
|
||||
break;
|
||||
case AMDGPU::AGPRRegBankID:
|
||||
Idx = AGPRStartIdx;
|
||||
break;
|
||||
default: llvm_unreachable("Invalid register bank");
|
||||
}
|
||||
Idx += Log2_32_Ceil(Size);
|
||||
break;
|
||||
}
|
||||
|
@ -106,6 +106,14 @@ AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const GCNSubtarget &ST)
|
||||
(void)RBVGPR;
|
||||
assert(&RBVGPR == &AMDGPU::VGPRRegBank);
|
||||
|
||||
const RegisterBank &RBAGPR = getRegBank(AMDGPU::AGPRRegBankID);
|
||||
(void)RBAGPR;
|
||||
assert(&RBAGPR == &AMDGPU::AGPRRegBank);
|
||||
}
|
||||
|
||||
static bool isVectorRegisterBank(const RegisterBank &Bank) {
|
||||
unsigned BankID = Bank.getID();
|
||||
return BankID == AMDGPU::VGPRRegBankID || BankID == AMDGPU::AGPRRegBankID;
|
||||
}
|
||||
|
||||
unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &Dst,
|
||||
@ -113,7 +121,7 @@ unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &Dst,
|
||||
unsigned Size) const {
|
||||
// TODO: Should there be a UniformVGPRRegBank which can use readfirstlane?
|
||||
if (Dst.getID() == AMDGPU::SGPRRegBankID &&
|
||||
Src.getID() == AMDGPU::VGPRRegBankID) {
|
||||
isVectorRegisterBank(Src)) {
|
||||
return std::numeric_limits<unsigned>::max();
|
||||
}
|
||||
|
||||
@ -127,8 +135,8 @@ unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &Dst,
|
||||
if (Size == 1 &&
|
||||
(Dst.getID() == AMDGPU::SCCRegBankID ||
|
||||
Dst.getID() == AMDGPU::SGPRRegBankID) &&
|
||||
(Src.getID() == AMDGPU::SGPRRegBankID ||
|
||||
Src.getID() == AMDGPU::VGPRRegBankID ||
|
||||
(isVectorRegisterBank(Src) ||
|
||||
Src.getID() == AMDGPU::SGPRRegBankID ||
|
||||
Src.getID() == AMDGPU::VCCRegBankID))
|
||||
return std::numeric_limits<unsigned>::max();
|
||||
|
||||
@ -136,6 +144,11 @@ unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &Dst,
|
||||
Src.getID() == AMDGPU::VCCRegBankID)
|
||||
return std::numeric_limits<unsigned>::max();
|
||||
|
||||
// There is no direct copy between AGPRs.
|
||||
if (Dst.getID() == AMDGPU::AGPRRegBankID &&
|
||||
Src.getID() == AMDGPU::AGPRRegBankID)
|
||||
return 4;
|
||||
|
||||
return RegisterBankInfo::copyCost(Dst, Src, Size);
|
||||
}
|
||||
|
||||
@ -169,7 +182,12 @@ const RegisterBank &AMDGPURegisterBankInfo::getRegBankFromRegClass(
|
||||
if (&RC == &AMDGPU::SReg_1RegClass)
|
||||
return AMDGPU::VCCRegBank;
|
||||
|
||||
return TRI->isSGPRClass(&RC) ? AMDGPU::SGPRRegBank : AMDGPU::VGPRRegBank;
|
||||
if (TRI->isSGPRClass(&RC))
|
||||
return AMDGPU::SGPRRegBank;
|
||||
if (TRI->isAGPRClass(&RC))
|
||||
return AMDGPU::AGPRRegBank;
|
||||
|
||||
return AMDGPU::VGPRRegBank;
|
||||
}
|
||||
|
||||
template <unsigned NumOps>
|
||||
@ -1908,7 +1926,7 @@ bool AMDGPURegisterBankInfo::isSALUMapping(const MachineInstr &MI) const {
|
||||
continue;
|
||||
Register Reg = MI.getOperand(i).getReg();
|
||||
if (const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI)) {
|
||||
if (Bank->getID() == AMDGPU::VGPRRegBankID)
|
||||
if (isVectorRegisterBank(*Bank))
|
||||
return false;
|
||||
|
||||
assert(Bank->getID() == AMDGPU::SGPRRegBankID ||
|
||||
@ -2072,7 +2090,6 @@ AMDGPURegisterBankInfo::getRegBankID(Register Reg,
|
||||
const MachineRegisterInfo &MRI,
|
||||
const TargetRegisterInfo &TRI,
|
||||
unsigned Default) const {
|
||||
|
||||
const RegisterBank *Bank = getRegBank(Reg, MRI, TRI);
|
||||
return Bank ? Bank->getID() : Default;
|
||||
}
|
||||
@ -2102,6 +2119,14 @@ AMDGPURegisterBankInfo::getVGPROpMapping(Register Reg,
|
||||
return AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
|
||||
}
|
||||
|
||||
const RegisterBankInfo::ValueMapping *
|
||||
AMDGPURegisterBankInfo::getAGPROpMapping(Register Reg,
|
||||
const MachineRegisterInfo &MRI,
|
||||
const TargetRegisterInfo &TRI) const {
|
||||
unsigned Size = getSizeInBits(Reg, MRI, TRI);
|
||||
return AMDGPU::getValueMapping(AMDGPU::AGPRRegBankID, Size);
|
||||
}
|
||||
|
||||
///
|
||||
/// This function must return a legal mapping, because
|
||||
/// AMDGPURegisterBankInfo::getInstrAlternativeMappings() is not called
|
||||
@ -2725,6 +2750,38 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
|
||||
OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
|
||||
break;
|
||||
}
|
||||
case Intrinsic::amdgcn_mfma_f32_4x4x1f32:
|
||||
case Intrinsic::amdgcn_mfma_f32_4x4x4f16:
|
||||
case Intrinsic::amdgcn_mfma_i32_4x4x4i8:
|
||||
case Intrinsic::amdgcn_mfma_f32_4x4x2bf16:
|
||||
case Intrinsic::amdgcn_mfma_f32_16x16x1f32:
|
||||
case Intrinsic::amdgcn_mfma_f32_16x16x4f32:
|
||||
case Intrinsic::amdgcn_mfma_f32_16x16x4f16:
|
||||
case Intrinsic::amdgcn_mfma_f32_16x16x16f16:
|
||||
case Intrinsic::amdgcn_mfma_i32_16x16x4i8:
|
||||
case Intrinsic::amdgcn_mfma_i32_16x16x16i8:
|
||||
case Intrinsic::amdgcn_mfma_f32_16x16x2bf16:
|
||||
case Intrinsic::amdgcn_mfma_f32_16x16x8bf16:
|
||||
case Intrinsic::amdgcn_mfma_f32_32x32x1f32:
|
||||
case Intrinsic::amdgcn_mfma_f32_32x32x2f32:
|
||||
case Intrinsic::amdgcn_mfma_f32_32x32x4f16:
|
||||
case Intrinsic::amdgcn_mfma_f32_32x32x8f16:
|
||||
case Intrinsic::amdgcn_mfma_i32_32x32x4i8:
|
||||
case Intrinsic::amdgcn_mfma_i32_32x32x8i8:
|
||||
case Intrinsic::amdgcn_mfma_f32_32x32x2bf16:
|
||||
case Intrinsic::amdgcn_mfma_f32_32x32x4bf16: {
|
||||
// Default for MAI intrinsics.
|
||||
// srcC can also be an immediate which can be folded later.
|
||||
// FIXME: Should we eventually add an alternative mapping with AGPR src
|
||||
// for srcA/srcB?
|
||||
//
|
||||
// vdst, srcA, srcB, srcC
|
||||
OpdsMapping[0] = getAGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
|
||||
OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
|
||||
OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
|
||||
OpdsMapping[4] = getAGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -103,6 +103,11 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
|
||||
const MachineRegisterInfo &MRI,
|
||||
const TargetRegisterInfo &TRI) const;
|
||||
|
||||
// Return a value mapping for an operand that is required to be a AGPR.
|
||||
const ValueMapping *getAGPROpMapping(Register Reg,
|
||||
const MachineRegisterInfo &MRI,
|
||||
const TargetRegisterInfo &TRI) const;
|
||||
|
||||
/// Split 64-bit value \p Reg into two 32-bit halves and populate them into \p
|
||||
/// Regs. This appropriately sets the regbank of the new registers.
|
||||
void split64BitValueForMapping(MachineIRBuilder &B,
|
||||
@ -131,6 +136,7 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
|
||||
const MachineInstr &MI, const MachineRegisterInfo &MRI) const;
|
||||
|
||||
bool isSALUMapping(const MachineInstr &MI) const;
|
||||
|
||||
const InstructionMapping &getDefaultMappingSOP(const MachineInstr &MI) const;
|
||||
const InstructionMapping &getDefaultMappingVOP(const MachineInstr &MI) const;
|
||||
const InstructionMapping &getDefaultMappingAllVGPR(
|
||||
|
@ -18,3 +18,7 @@ def SCCRegBank : RegisterBank <"SCC", [SReg_32, SCC_CLASS]>;
|
||||
|
||||
// It is helpful to distinguish conditions from ordinary SGPRs.
|
||||
def VCCRegBank : RegisterBank <"VCC", [SReg_1]>;
|
||||
|
||||
def AGPRRegBank : RegisterBank <"AGPR",
|
||||
[AGPR_32, AReg_64, AReg_128, AReg_512, AReg_1024]
|
||||
>;
|
||||
|
@ -144,6 +144,11 @@ public:
|
||||
return isSGPRClass(RC);
|
||||
}
|
||||
|
||||
/// \returns true if this class contains only AGPR registers
|
||||
bool isAGPRClass(const TargetRegisterClass *RC) const {
|
||||
return hasAGPRs(RC) && !hasVGPRs(RC);
|
||||
}
|
||||
|
||||
/// \returns true if this class contains VGPR registers.
|
||||
bool hasVGPRs(const TargetRegisterClass *RC) const;
|
||||
|
||||
|
@ -0,0 +1,943 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx908 -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs %s -o - | FileCheck %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx908 -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs %s -o - | FileCheck %s
|
||||
|
||||
---
|
||||
name: mfma_f32_32x32x1f32_vva
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
|
||||
|
||||
; CHECK-LABEL: name: mfma_f32_32x32x1f32_vva
|
||||
; CHECK: liveins: $vgpr0, $vgpr1, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||
; CHECK: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
|
||||
; CHECK: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x1f32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<32 x s32>), 0, 0, 0
|
||||
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
|
||||
%3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x1f32), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: mfma_f32_32x32x1f32_sss
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
|
||||
|
||||
; CHECK-LABEL: name: mfma_f32_32x32x1f32_sss
|
||||
; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sgpr(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||
; CHECK: [[COPY5:%[0-9]+]]:agpr(<32 x s32>) = COPY [[COPY2]](<32 x s32>)
|
||||
; CHECK: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x1f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<32 x s32>), 0, 0, 0
|
||||
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>)
|
||||
%0:_(s32) = COPY $sgpr32
|
||||
%1:_(s32) = COPY $sgpr33
|
||||
%2:_(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
|
||||
%3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x1f32), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: mfma_f32_16x16x1f32_vva
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
|
||||
; CHECK-LABEL: name: mfma_f32_16x16x1f32_vva
|
||||
; CHECK: liveins: $vgpr0, $vgpr1, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||
; CHECK: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
; CHECK: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x1f32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<16 x s32>), 0, 0, 0
|
||||
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x1f32), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: mfma_f32_16x16x1f32_sss
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
|
||||
; CHECK-LABEL: name: mfma_f32_16x16x1f32_sss
|
||||
; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||
; CHECK: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>)
|
||||
; CHECK: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x1f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<16 x s32>), 0, 0, 0
|
||||
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
|
||||
%0:_(s32) = COPY $sgpr32
|
||||
%1:_(s32) = COPY $sgpr33
|
||||
%2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x1f32), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: mfma_f32_4x4x1f32_vva
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1, $agpr0_agpr1_agpr2_agpr3
|
||||
|
||||
; CHECK-LABEL: name: mfma_f32_4x4x1f32_vva
|
||||
; CHECK: liveins: $vgpr0, $vgpr1, $agpr0_agpr1_agpr2_agpr3
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||
; CHECK: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
; CHECK: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x1f32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<4 x s32>), 0, 0, 0
|
||||
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x1f32), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: mfma_f32_4x4x1f32_sss
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
|
||||
; CHECK-LABEL: name: mfma_f32_4x4x1f32_sss
|
||||
; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||
; CHECK: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>)
|
||||
; CHECK: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x1f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<4 x s32>), 0, 0, 0
|
||||
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
|
||||
%0:_(s32) = COPY $sgpr32
|
||||
%1:_(s32) = COPY $sgpr33
|
||||
%2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x1f32), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: mfma_f32_32x32x2f32_vva
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
|
||||
; CHECK-LABEL: name: mfma_f32_32x32x2f32_vva
|
||||
; CHECK: liveins: $vgpr0, $vgpr1, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||
; CHECK: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
; CHECK: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2f32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<16 x s32>), 0, 0, 0
|
||||
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2f32), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: mfma_f32_32x32x2f32_sss
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
|
||||
; CHECK-LABEL: name: mfma_f32_32x32x2f32_sss
|
||||
; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||
; CHECK: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>)
|
||||
; CHECK: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<16 x s32>), 0, 0, 0
|
||||
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
|
||||
%0:_(s32) = COPY $sgpr32
|
||||
%1:_(s32) = COPY $sgpr33
|
||||
%2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2f32), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: mfma_f32_16x16x4f32_vva
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1, $agpr0_agpr1_agpr2_agpr3
|
||||
|
||||
; CHECK-LABEL: name: mfma_f32_16x16x4f32_vva
|
||||
; CHECK: liveins: $vgpr0, $vgpr1, $agpr0_agpr1_agpr2_agpr3
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||
; CHECK: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
; CHECK: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<4 x s32>), 0, 0, 0
|
||||
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f32), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: mfma_f32_16x16x4f32_sss
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
|
||||
; CHECK-LABEL: name: mfma_f32_16x16x4f32_sss
|
||||
; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||
; CHECK: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>)
|
||||
; CHECK: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<4 x s32>), 0, 0, 0
|
||||
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
|
||||
%0:_(s32) = COPY $sgpr32
|
||||
%1:_(s32) = COPY $sgpr33
|
||||
%2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f32), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: mfma_f32_32x32x4f16_vva
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
|
||||
|
||||
; CHECK-LABEL: name: mfma_f32_32x32x4f16_vva
|
||||
; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
|
||||
; CHECK: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4f16), [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<32 x s32>), 0, 0, 0
|
||||
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>)
|
||||
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
|
||||
%1:_(<4 x s16>) = COPY $vgpr2_vgpr3
|
||||
%2:_(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
|
||||
%3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4f16), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: mfma_f32_32x32x4f16_sss
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr32_sgpr33, $sgpr34_sgpr35, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
|
||||
|
||||
; CHECK-LABEL: name: mfma_f32_32x32x4f16_sss
|
||||
; CHECK: liveins: $sgpr32_sgpr33, $sgpr34_sgpr35, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr32_sgpr33
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr34_sgpr35
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sgpr(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>)
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>)
|
||||
; CHECK: [[COPY5:%[0-9]+]]:agpr(<32 x s32>) = COPY [[COPY2]](<32 x s32>)
|
||||
; CHECK: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<32 x s32>), 0, 0, 0
|
||||
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>)
|
||||
%0:_(<4 x s16>) = COPY $sgpr32_sgpr33
|
||||
%1:_(<4 x s16>) = COPY $sgpr34_sgpr35
|
||||
%2:_(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
|
||||
%3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4f16), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: mfma_f32_16x16x4f16_vva
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
|
||||
; CHECK-LABEL: name: mfma_f32_16x16x4f16_vva
|
||||
; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
; CHECK: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f16), [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<16 x s32>), 0, 0, 0
|
||||
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
|
||||
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
|
||||
%1:_(<4 x s16>) = COPY $vgpr2_vgpr3
|
||||
%2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f16), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: mfma_f32_16x16x4f16_sss
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr32_sgpr33, $sgpr34_sgpr35, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
|
||||
; CHECK-LABEL: name: mfma_f32_16x16x4f16_sss
|
||||
; CHECK: liveins: $sgpr32_sgpr33, $sgpr34_sgpr35, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr32_sgpr33
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr34_sgpr35
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>)
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>)
|
||||
; CHECK: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>)
|
||||
; CHECK: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<16 x s32>), 0, 0, 0
|
||||
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
|
||||
%0:_(<4 x s16>) = COPY $sgpr32_sgpr33
|
||||
%1:_(<4 x s16>) = COPY $sgpr34_sgpr35
|
||||
%2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f16), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: mfma_f32_4x4x4f16_vva
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3
|
||||
|
||||
; CHECK-LABEL: name: mfma_f32_4x4x4f16_vva
|
||||
; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
; CHECK: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x4f16), [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<4 x s32>), 0, 0, 0
|
||||
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
|
||||
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
|
||||
%1:_(<4 x s16>) = COPY $vgpr2_vgpr3
|
||||
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x4f16), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: mfma_f32_4x4x4f16_sss
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr32_sgpr33, $sgpr34_sgpr35, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
|
||||
; CHECK-LABEL: name: mfma_f32_4x4x4f16_sss
|
||||
; CHECK: liveins: $sgpr32_sgpr33, $sgpr34_sgpr35, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr32_sgpr33
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr34_sgpr35
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>)
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>)
|
||||
; CHECK: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>)
|
||||
; CHECK: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x4f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<4 x s32>), 0, 0, 0
|
||||
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
|
||||
%0:_(<4 x s16>) = COPY $sgpr32_sgpr33
|
||||
%1:_(<4 x s16>) = COPY $sgpr34_sgpr35
|
||||
%2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x4f16), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: mfma_f32_32x32x8f16_vva
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
|
||||
; CHECK-LABEL: name: mfma_f32_32x32x8f16_vva
|
||||
; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
; CHECK: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x8f16), [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<16 x s32>), 0, 0, 0
|
||||
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
|
||||
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
|
||||
%1:_(<4 x s16>) = COPY $vgpr2_vgpr3
|
||||
%2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x8f16), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: mfma_f32_32x32x8f16_sss
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr32_sgpr33, $sgpr34_sgpr35, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
|
||||
; CHECK-LABEL: name: mfma_f32_32x32x8f16_sss
|
||||
; CHECK: liveins: $sgpr32_sgpr33, $sgpr34_sgpr35, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr32_sgpr33
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr34_sgpr35
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>)
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>)
|
||||
; CHECK: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>)
|
||||
; CHECK: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x8f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<16 x s32>), 0, 0, 0
|
||||
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
|
||||
%0:_(<4 x s16>) = COPY $sgpr32_sgpr33
|
||||
%1:_(<4 x s16>) = COPY $sgpr34_sgpr35
|
||||
%2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x8f16), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: mfma_f32_16x16x16f16_vva
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3
|
||||
|
||||
; CHECK-LABEL: name: mfma_f32_16x16x16f16_vva
|
||||
; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
; CHECK: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x16f16), [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<4 x s32>), 0, 0, 0
|
||||
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
|
||||
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
|
||||
%1:_(<4 x s16>) = COPY $vgpr2_vgpr3
|
||||
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x16f16), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: mfma_f32_16x16x16f16_sss
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr32_sgpr33, $sgpr34_sgpr35, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
|
||||
; CHECK-LABEL: name: mfma_f32_16x16x16f16_sss
|
||||
; CHECK: liveins: $sgpr32_sgpr33, $sgpr34_sgpr35, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr32_sgpr33
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr34_sgpr35
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>)
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>)
|
||||
; CHECK: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>)
|
||||
; CHECK: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x16f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<4 x s32>), 0, 0, 0
|
||||
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
|
||||
%0:_(<4 x s16>) = COPY $sgpr32_sgpr33
|
||||
%1:_(<4 x s16>) = COPY $sgpr34_sgpr35
|
||||
%2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x16f16), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: mfma_i32_32x32x4i8_vva
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
|
||||
|
||||
; CHECK-LABEL: name: mfma_i32_32x32x4i8_vva
|
||||
; CHECK: liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
|
||||
; CHECK: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
|
||||
; CHECK: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x4i8), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<32 x s32>), 0, 0, 0
|
||||
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr2
|
||||
%2:_(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
|
||||
%3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x4i8), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: mfma_i32_32x32x4i8_sss
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
|
||||
|
||||
; CHECK-LABEL: name: mfma_i32_32x32x4i8_sss
|
||||
; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sgpr(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||
; CHECK: [[COPY5:%[0-9]+]]:agpr(<32 x s32>) = COPY [[COPY2]](<32 x s32>)
|
||||
; CHECK: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x4i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<32 x s32>), 0, 0, 0
|
||||
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>)
|
||||
%0:_(s32) = COPY $sgpr32
|
||||
%1:_(s32) = COPY $sgpr33
|
||||
%2:_(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
|
||||
%3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x4i8), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: mfma_i32_16x16x4i8_vva
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
|
||||
; CHECK-LABEL: name: mfma_i32_16x16x4i8_vva
|
||||
; CHECK: liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
|
||||
; CHECK: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
; CHECK: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x4i8), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<16 x s32>), 0, 0, 0
|
||||
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr2
|
||||
%2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x4i8), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: mfma_i32_16x16x4i8_sss
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
|
||||
; CHECK-LABEL: name: mfma_i32_16x16x4i8_sss
|
||||
; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||
; CHECK: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>)
|
||||
; CHECK: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x4i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<16 x s32>), 0, 0, 0
|
||||
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
|
||||
%0:_(s32) = COPY $sgpr32
|
||||
%1:_(s32) = COPY $sgpr33
|
||||
%2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x4i8), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: mfma_i32_4x4x4i8_vva
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3
|
||||
|
||||
; CHECK-LABEL: name: mfma_i32_4x4x4i8_vva
|
||||
; CHECK: liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
|
||||
; CHECK: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
; CHECK: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.4x4x4i8), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<4 x s32>), 0, 0, 0
|
||||
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr2
|
||||
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.4x4x4i8), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: mfma_i32_4x4x4i8_sss
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
|
||||
; CHECK-LABEL: name: mfma_i32_4x4x4i8_sss
|
||||
; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||
; CHECK: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>)
|
||||
; CHECK: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.4x4x4i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<4 x s32>), 0, 0, 0
|
||||
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
|
||||
%0:_(s32) = COPY $sgpr32
|
||||
%1:_(s32) = COPY $sgpr33
|
||||
%2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.4x4x4i8), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: mfma_i32_32x32x8i8_vva
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
|
||||
; CHECK-LABEL: name: mfma_i32_32x32x8i8_vva
|
||||
; CHECK: liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
|
||||
; CHECK: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
; CHECK: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x8i8), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<16 x s32>), 0, 0, 0
|
||||
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr2
|
||||
%2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x8i8), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: mfma_i32_32x32x8i8_sss
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
|
||||
; CHECK-LABEL: name: mfma_i32_32x32x8i8_sss
|
||||
; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||
; CHECK: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>)
|
||||
; CHECK: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x8i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<16 x s32>), 0, 0, 0
|
||||
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
|
||||
%0:_(s32) = COPY $sgpr32
|
||||
%1:_(s32) = COPY $sgpr33
|
||||
%2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x8i8), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: mfma_i32_16x16x16i8_vva
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3
|
||||
|
||||
; CHECK-LABEL: name: mfma_i32_16x16x16i8_vva
|
||||
; CHECK: liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
|
||||
; CHECK: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
; CHECK: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x16i8), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<4 x s32>), 0, 0, 0
|
||||
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr2
|
||||
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x16i8), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: mfma_i32_16x16x16i8_sss
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
|
||||
; CHECK-LABEL: name: mfma_i32_16x16x16i8_sss
|
||||
; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||
; CHECK: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>)
|
||||
; CHECK: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x16i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<4 x s32>), 0, 0, 0
|
||||
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
|
||||
%0:_(s32) = COPY $sgpr32
|
||||
%1:_(s32) = COPY $sgpr33
|
||||
%2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x16i8), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: mfma_f32_32x32x2bf16_vva
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
|
||||
|
||||
; CHECK-LABEL: name: mfma_f32_32x32x2bf16_vva
|
||||
; CHECK: liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
|
||||
; CHECK: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
|
||||
; CHECK: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2bf16), [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<32 x s32>), 0, 0, 0
|
||||
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>)
|
||||
%0:_(<2 x s16>) = COPY $vgpr0
|
||||
%1:_(<2 x s16>) = COPY $vgpr2
|
||||
%2:_(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
|
||||
%3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2bf16), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: mfma_f32_32x32x2bf16_sss
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
|
||||
|
||||
; CHECK-LABEL: name: mfma_f32_32x32x2bf16_sss
|
||||
; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr32
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr33
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sgpr(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>)
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>)
|
||||
; CHECK: [[COPY5:%[0-9]+]]:agpr(<32 x s32>) = COPY [[COPY2]](<32 x s32>)
|
||||
; CHECK: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<32 x s32>), 0, 0, 0
|
||||
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>)
|
||||
%0:_(<2 x s16>) = COPY $sgpr32
|
||||
%1:_(<2 x s16>) = COPY $sgpr33
|
||||
%2:_(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
|
||||
%3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2bf16), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: mfma_f32_16x16x2bf16_vva
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
|
||||
; CHECK-LABEL: name: mfma_f32_16x16x2bf16_vva
|
||||
; CHECK: liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
|
||||
; CHECK: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
; CHECK: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x2bf16), [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<16 x s32>), 0, 0, 0
|
||||
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
|
||||
%0:_(<2 x s16>) = COPY $vgpr0
|
||||
%1:_(<2 x s16>) = COPY $vgpr2
|
||||
%2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x2bf16), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: mfma_f32_16x16x2bf16_sss
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
|
||||
; CHECK-LABEL: name: mfma_f32_16x16x2bf16_sss
|
||||
; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr32
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr33
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>)
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>)
|
||||
; CHECK: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>)
|
||||
; CHECK: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x2bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<16 x s32>), 0, 0, 0
|
||||
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
|
||||
%0:_(<2 x s16>) = COPY $sgpr32
|
||||
%1:_(<2 x s16>) = COPY $sgpr33
|
||||
%2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x2bf16), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: mfma_f32_4x4x2bf16_vva
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3
|
||||
|
||||
; CHECK-LABEL: name: mfma_f32_4x4x2bf16_vva
|
||||
; CHECK: liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
|
||||
; CHECK: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
; CHECK: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x2bf16), [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<4 x s32>), 0, 0, 0
|
||||
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
|
||||
%0:_(<2 x s16>) = COPY $vgpr0
|
||||
%1:_(<2 x s16>) = COPY $vgpr2
|
||||
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x2bf16), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: mfma_f32_4x4x2bf16_sss
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
|
||||
; CHECK-LABEL: name: mfma_f32_4x4x2bf16_sss
|
||||
; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr32
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr33
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>)
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>)
|
||||
; CHECK: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>)
|
||||
; CHECK: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x2bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<4 x s32>), 0, 0, 0
|
||||
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
|
||||
%0:_(<2 x s16>) = COPY $sgpr32
|
||||
%1:_(<2 x s16>) = COPY $sgpr33
|
||||
%2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x2bf16), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: mfma_f32_32x32x4bf16_vva
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
|
||||
; CHECK-LABEL: name: mfma_f32_32x32x4bf16_vva
|
||||
; CHECK: liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
|
||||
; CHECK: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
; CHECK: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16), [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<16 x s32>), 0, 0, 0
|
||||
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
|
||||
%0:_(<2 x s16>) = COPY $vgpr0
|
||||
%1:_(<2 x s16>) = COPY $vgpr2
|
||||
%2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: mfma_f32_32x32x4bf16_sss
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
|
||||
; CHECK-LABEL: name: mfma_f32_32x32x4bf16_sss
|
||||
; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr32
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr33
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>)
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>)
|
||||
; CHECK: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>)
|
||||
; CHECK: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<16 x s32>), 0, 0, 0
|
||||
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
|
||||
%0:_(<2 x s16>) = COPY $sgpr32
|
||||
%1:_(<2 x s16>) = COPY $sgpr33
|
||||
%2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: mfma_f32_16x16x8bf16_vva
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3
|
||||
|
||||
; CHECK-LABEL: name: mfma_f32_16x16x8bf16_vva
|
||||
; CHECK: liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
|
||||
; CHECK: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
; CHECK: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x8bf16), [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<4 x s32>), 0, 0, 0
|
||||
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
|
||||
%0:_(<2 x s16>) = COPY $vgpr0
|
||||
%1:_(<2 x s16>) = COPY $vgpr2
|
||||
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x8bf16), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: mfma_f32_16x16x8bf16_sss
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
|
||||
; CHECK-LABEL: name: mfma_f32_16x16x8bf16_sss
|
||||
; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr32
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr33
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>)
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>)
|
||||
; CHECK: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>)
|
||||
; CHECK: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x8bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<4 x s32>), 0, 0, 0
|
||||
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
|
||||
%0:_(<2 x s16>) = COPY $sgpr32
|
||||
%1:_(<2 x s16>) = COPY $sgpr33
|
||||
%2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x8bf16), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
|
||||
...
|
Loading…
x
Reference in New Issue
Block a user