mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-15 17:07:52 +00:00
[AMDGPU] Assembler: support SDWA and DPP for VOP2b instructions
Reviewers: nhaustov, artem.tamazov, vpykhtin, tstellarAMD Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, tony-tye Differential Revision: https://reviews.llvm.org/D28051 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@290599 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
9b804c5b0a
commit
79df598ffb
@ -3309,7 +3309,11 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
|
||||
for (unsigned E = Operands.size(); I != E; ++I) {
|
||||
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
|
||||
// Add the register arguments
|
||||
if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
|
||||
if (Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
|
||||
// VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token.
|
||||
// Skip it.
|
||||
continue;
|
||||
} if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
|
||||
Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
|
||||
} else if (Op.isDPPCtrl()) {
|
||||
Op.addImmOperands(Inst, 1);
|
||||
@ -3428,10 +3432,12 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
|
||||
for (unsigned E = Operands.size(); I != E; ++I) {
|
||||
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
|
||||
// Add the register arguments
|
||||
if (BasicInstType == SIInstrFlags::VOPC &&
|
||||
if ((BasicInstType == SIInstrFlags::VOPC ||
|
||||
BasicInstType == SIInstrFlags::VOP2)&&
|
||||
Op.isReg() &&
|
||||
Op.Reg.RegNo == AMDGPU::VCC) {
|
||||
// VOPC sdwa use "vcc" token as dst. Skip it.
|
||||
// VOPC and VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
|
||||
// Skip it.
|
||||
continue;
|
||||
} else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
|
||||
Op.addRegOrImmWithInputModsOperands(Inst, 2);
|
||||
|
@ -857,11 +857,11 @@ class getInsSDWA <RegisterClass Src0RC, RegisterClass Src1RC, int NumSrcArgs,
|
||||
src0_sel:$src0_sel),
|
||||
!if(!eq(NumSrcArgs, 2),
|
||||
!if(!eq(DstVT.Size, 1),
|
||||
// VOPC_SDWA with float modifiers
|
||||
// VOPC_SDWA with modifiers
|
||||
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
|
||||
Src1Mod:$src1_modifiers, Src1RC:$src1,
|
||||
clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel),
|
||||
// VOP2_SDWA or VOPC_SDWA with float modifiers
|
||||
// VOP2_SDWA or VOPC_SDWA with modifiers
|
||||
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
|
||||
Src1Mod:$src1_modifiers, Src1RC:$src1,
|
||||
clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
|
||||
|
@ -134,6 +134,9 @@ multiclass VOP2bInst <string opName,
|
||||
let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in {
|
||||
def _e32 : VOP2_Pseudo <opName, P>,
|
||||
Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
|
||||
|
||||
def _sdwa : VOP2_SDWA_Pseudo <opName, P>,
|
||||
Commutable_REV<revOp#"_sdwa", !eq(revOp, opName)>;
|
||||
}
|
||||
def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>,
|
||||
Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
|
||||
@ -214,6 +217,8 @@ def VOP_MAC_F32 : VOP_MAC <f32> {
|
||||
def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped]> {
|
||||
let Asm32 = "$vdst, vcc, $src0, $src1";
|
||||
let Asm64 = "$vdst, $sdst, $src0, $src1";
|
||||
let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel";
|
||||
let AsmDPP = "$vdst, vcc, $src0, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
|
||||
let Outs32 = (outs DstRC:$vdst);
|
||||
let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
|
||||
}
|
||||
@ -229,12 +234,25 @@ def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
|
||||
let Src0RC32 = VCSrc_b32;
|
||||
let Asm32 = "$vdst, vcc, $src0, $src1, vcc";
|
||||
let Asm64 = "$vdst, $sdst, $src0, $src1, $src2";
|
||||
let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel";
|
||||
let AsmDPP = "$vdst, vcc, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
|
||||
let Outs32 = (outs DstRC:$vdst);
|
||||
let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
|
||||
|
||||
// Suppress src2 implied by type since the 32-bit encoding uses an
|
||||
// implicit VCC use.
|
||||
let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1);
|
||||
|
||||
let InsSDWA = (ins Src0Mod:$src0_modifiers, Src0SDWA:$src0,
|
||||
Src1Mod:$src1_modifiers, Src1SDWA:$src1,
|
||||
clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
|
||||
src0_sel:$src0_sel, src1_sel:$src1_sel);
|
||||
|
||||
let InsDPP = (ins Src0Mod:$src0_modifiers, Src0DPP:$src0,
|
||||
Src1Mod:$src1_modifiers, Src1DPP:$src1,
|
||||
dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
|
||||
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
|
||||
let HasExt = 1;
|
||||
}
|
||||
|
||||
// Read in from vcc or arbitrary SGPR
|
||||
@ -617,7 +635,7 @@ multiclass VOP2_Real_e64_vi <bits<10> op> {
|
||||
VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
|
||||
}
|
||||
|
||||
multiclass VOP2be_Real_e32e64_vi <bits<6> op> : VOP2_Real_e32_vi<op> {
|
||||
multiclass Base_VOP2be_Real_e32e64_vi <bits<6> op> : VOP2_Real_e32_vi<op> {
|
||||
def _e64_vi :
|
||||
VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>,
|
||||
VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
|
||||
@ -635,6 +653,13 @@ multiclass VOP2_SDWA_Real <bits<6> op> {
|
||||
VOP2_SDWAe <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
|
||||
}
|
||||
|
||||
multiclass VOP2be_Real_e32e64_vi <bits<6> op> :
|
||||
Base_VOP2be_Real_e32e64_vi<op>, VOP2_SDWA_Real<op> {
|
||||
// For now left dpp only for asm/dasm
|
||||
// TODO: add corresponding pseudo
|
||||
def _dpp : VOP2_DPP<op, !cast<VOP2_Pseudo>(NAME#"_e32")>;
|
||||
}
|
||||
|
||||
multiclass VOP2_Real_e32e64_vi <bits<6> op> :
|
||||
Base_VOP2_Real_e32e64_vi<op>, VOP2_SDWA_Real<op> {
|
||||
// For now left dpp only for asm/dasm
|
||||
|
@ -503,3 +503,27 @@ v_min_i16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
|
||||
// NOSICI: error:
|
||||
// VI: v_ldexp_f16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x66,0x02,0x01,0x09,0xa1]
|
||||
v_ldexp_f16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
|
||||
|
||||
// NOSICI: error:
|
||||
// VI: v_add_i32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x32,0x02,0x01,0x09,0xa1]
|
||||
v_add_i32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
|
||||
|
||||
// NOSICI: error:
|
||||
// VI: v_sub_i32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x34,0x02,0x01,0x09,0xa1]
|
||||
v_sub_i32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
|
||||
|
||||
// NOSICI: error:
|
||||
// VI: v_subrev_i32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x36,0x02,0x01,0x09,0xa1]
|
||||
v_subrev_i32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
|
||||
|
||||
// NOSICI: error:
|
||||
// VI: v_addc_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x38,0x02,0x01,0x09,0xa1]
|
||||
v_addc_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
|
||||
|
||||
// NOSICI: error:
|
||||
// VI: v_subb_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x3a,0x02,0x01,0x09,0xa1]
|
||||
v_subb_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
|
||||
|
||||
// NOSICI: error:
|
||||
// VI: v_subbrev_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x3c,0x02,0x01,0x09,0xa1]
|
||||
v_subbrev_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
|
||||
|
@ -5,7 +5,6 @@
|
||||
// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI
|
||||
// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOVI
|
||||
|
||||
// ToDo: VOP2b (see vop_dpp.s)
|
||||
// ToDo: intrinsics
|
||||
|
||||
//---------------------------------------------------------------------------//
|
||||
@ -512,6 +511,30 @@ v_min_i16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_se
|
||||
// VI: v_ldexp_f16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x66,0x02,0x06,0x05,0x02]
|
||||
v_ldexp_f16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2
|
||||
|
||||
// NOSICI: error:
|
||||
// VI: v_add_i32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x32,0x02,0x06,0x05,0x02]
|
||||
v_add_i32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2
|
||||
|
||||
// NOSICI: error:
|
||||
// VI: v_sub_i32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x34,0x02,0x06,0x05,0x02]
|
||||
v_sub_i32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2
|
||||
|
||||
// NOSICI: error:
|
||||
// VI: v_subrev_i32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x36,0x02,0x06,0x05,0x02]
|
||||
v_subrev_i32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2
|
||||
|
||||
// NOSICI: error:
|
||||
// VI: v_addc_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x38,0x02,0x06,0x05,0x02]
|
||||
v_addc_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2
|
||||
|
||||
// NOSICI: error:
|
||||
// VI: v_subb_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x3a,0x02,0x06,0x05,0x02]
|
||||
v_subb_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2
|
||||
|
||||
// NOSICI: error:
|
||||
// VI: v_subbrev_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x3c,0x02,0x06,0x05,0x02]
|
||||
v_subbrev_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Check VOPC opcodes
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
Loading…
x
Reference in New Issue
Block a user