[AMDGPU][MC][GFX9] Added 16-bit renamed and "_legacy" VALU opcodes

See Bug 33629: https://bugs.llvm.org//show_bug.cgi?id=33629

Reviewers: vpykhtin, SamWot, arsenm

Differential Revision: https://reviews.llvm.org/D36322

llvm-svn: 310497
This commit is contained in:
Dmitry Preobrazhensky 2017-08-09 17:10:47 +00:00
parent 5a8769dcd9
commit 559a664ace
10 changed files with 492 additions and 16 deletions

View File

@ -70,7 +70,8 @@ enum SIEncodingFamily {
SI = 0,
VI = 1,
SDWA = 2,
SDWA9 = 3
SDWA9 = 3,
GFX9 = 4
};
// Wrapper for Tablegen'd function. enum Subtarget is not defined in any
@ -110,6 +111,10 @@ int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
Gen = ST.getGeneration() == AMDGPUSubtarget::GFX9 ? SIEncodingFamily::SDWA9
: SIEncodingFamily::SDWA;
if ((get(Opcode).TSFlags & SIInstrFlags::F16_ZFILL) != 0 &&
ST.getGeneration() >= AMDGPUSubtarget::GFX9)
Gen = SIEncodingFamily::GFX9;
int MCOp = AMDGPU::getMCOpcode(Opcode, Gen);
// -1 means that Opcode is already a native instruction.

View File

@ -208,6 +208,9 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
if (Res) break;
Res = tryDecodeInst(DecoderTableAMDGPU64, MI, QW, Address);
if (Res) break;
Res = tryDecodeInst(DecoderTableGFX964, MI, QW, Address);
} while (false);
if (Res && (MI.getOpcode() == AMDGPU::V_MAC_F32_e64_vi ||

View File

@ -69,7 +69,8 @@ enum : uint64_t {
VOPAsmPrefer32Bit = UINT64_C(1) << 41,
HasFPClamp = UINT64_C(1) << 42,
VOP3_OPSEL = UINT64_C(1) << 43,
maybeAtomic = UINT64_C(1) << 44
maybeAtomic = UINT64_C(1) << 44,
F16_ZFILL = UINT64_C(1) << 45
};
// v_cmp_class_* etc. use a 10-bit mask for what operation is checked.

View File

@ -90,6 +90,10 @@ class InstSI <dag outs, dag ins, string asm = "",
// Is it possible for this instruction to be atomic?
field bit maybeAtomic = 0;
// This bit indicates that this is a 16-bit instruction which zero-fills
// unused bits in dst. Note that new GFX9 opcodes preserve unused bits.
field bit F16_ZFILL = 0;
// These need to be kept in sync with the enum in SIInstrFlags.
let TSFlags{0} = SALU;
let TSFlags{1} = VALU;
@ -137,6 +141,7 @@ class InstSI <dag outs, dag ins, string asm = "",
let TSFlags{43} = VOP3_OPSEL;
let TSFlags{44} = maybeAtomic;
let TSFlags{45} = F16_ZFILL;
let SchedRW = [Write32Bit];

View File

@ -11,6 +11,9 @@ def isCI : Predicate<"Subtarget->getGeneration() "
def isCIOnly : Predicate<"Subtarget->getGeneration() =="
"SISubtarget::SEA_ISLANDS">,
AssemblerPredicate <"FeatureSeaIslands">;
def isVIOnly : Predicate<"Subtarget->getGeneration() =="
"SISubtarget::VOLCANIC_ISLANDS">,
AssemblerPredicate <"FeatureVolcanicIslands">;
def DisableInst : Predicate <"false">, AssemblerPredicate<"FeatureDisable">;
@ -22,6 +25,7 @@ def SIEncodingFamily {
int VI = 1;
int SDWA = 2;
int SDWA9 = 3;
int GFX9 = 4;
}
//===----------------------------------------------------------------------===//
@ -1762,7 +1766,8 @@ def getMCOpcodeGen : InstrMapping {
let ValueCols = [[!cast<string>(SIEncodingFamily.SI)],
[!cast<string>(SIEncodingFamily.VI)],
[!cast<string>(SIEncodingFamily.SDWA)],
[!cast<string>(SIEncodingFamily.SDWA9)]];
[!cast<string>(SIEncodingFamily.SDWA9)],
[!cast<string>(SIEncodingFamily.GFX9)]];
}
// Get equivalent SOPK instruction.

View File

@ -372,21 +372,33 @@ def V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64>;
let SubtargetPredicate = Has16BitInsts in {
def V_DIV_FIXUP_F16 : VOP3Inst <"v_div_fixup_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, AMDGPUdiv_fixup>;
let F16_ZFILL = 1 in {
def V_DIV_FIXUP_F16 : VOP3Inst <"v_div_fixup_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, AMDGPUdiv_fixup>;
}
let SubtargetPredicate = isGFX9 in {
def V_DIV_FIXUP_F16_gfx9 : VOP3Inst <"v_div_fixup_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16>>;
}
let isCommutable = 1 in {
def V_FMA_F16 : VOP3Inst <"v_fma_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fma>;
let F16_ZFILL = 1 in {
def V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fmad>;
def V_MAD_U16 : VOP3Inst <"v_mad_u16", VOP3_Profile<VOP_I16_I16_I16_I16>>;
def V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3_Profile<VOP_I16_I16_I16_I16>>;
def V_FMA_F16 : VOP3Inst <"v_fma_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fma>;
}
let SubtargetPredicate = isGFX9 in {
def V_MAD_F16_gfx9 : VOP3Inst <"v_mad_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16>>;
def V_MAD_U16_gfx9 : VOP3Inst <"v_mad_u16_gfx9", VOP3_Profile<VOP_I16_I16_I16_I16>>;
def V_MAD_I16_gfx9 : VOP3Inst <"v_mad_i16_gfx9", VOP3_Profile<VOP_I16_I16_I16_I16>>;
def V_FMA_F16_gfx9 : VOP3Inst <"v_fma_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16>>;
} // End SubtargetPredicate = isGFX9
def V_INTERP_P1LL_F16 : VOP3Interp <"v_interp_p1ll_f16", VOP3_INTERP16<[f32, f32, i32, untyped]>>;
def V_INTERP_P1LV_F16 : VOP3Interp <"v_interp_p1lv_f16", VOP3_INTERP16<[f32, f32, i32, f16]>>;
def V_INTERP_P2_F16 : VOP3Interp <"v_interp_p2_f16", VOP3_INTERP16<[f16, f32, i32, f32]>>;
def V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fmad>;
def V_MAD_U16 : VOP3Inst <"v_mad_u16", VOP3_Profile<VOP_I16_I16_I16_I16>>;
def V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3_Profile<VOP_I16_I16_I16_I16>>;
} // End isCommutable = 1
} // End SubtargetPredicate = Has16BitInsts
@ -587,6 +599,27 @@ multiclass VOP3Interp_Real_vi<bits<10> op> {
} // End AssemblerPredicates = [isVI], DecoderNamespace = "VI"
let AssemblerPredicates = [isVIOnly], DecoderNamespace = "VI" in {
multiclass VOP3_F16_Real_vi<bits<10> op> {
def _vi : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,
VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME).Pfl>;
}
} // End AssemblerPredicates = [isVIOnly], DecoderNamespace = "VI"
let AssemblerPredicates = [isGFX9], DecoderNamespace = "GFX9" in {
multiclass VOP3_F16_Real_gfx9<bits<10> op, string OpName, string AsmName> {
def _vi : VOP3_Real<!cast<VOP3_Pseudo>(OpName), SIEncodingFamily.GFX9>,
VOP3e_vi <op, !cast<VOP3_Pseudo>(OpName).Pfl> {
VOP3_Pseudo ps = !cast<VOP3_Pseudo>(OpName);
let AsmString = AsmName # ps.AsmOperands;
}
}
} // End AssemblerPredicates = [isGFX9], DecoderNamespace = "GFX9"
defm V_MAD_U64_U32 : VOP3be_Real_vi <0x1E8>;
defm V_MAD_I64_I32 : VOP3be_Real_vi <0x1E9>;
@ -631,14 +664,25 @@ defm V_QSAD_PK_U16_U8 : VOP3_Real_vi <0x1e5>;
defm V_MQSAD_PK_U16_U8 : VOP3_Real_vi <0x1e6>;
defm V_MQSAD_U32_U8 : VOP3_Real_vi <0x1e7>;
defm V_MAD_F16 : VOP3_Real_vi <0x1ea>;
defm V_MAD_U16 : VOP3_Real_vi <0x1eb>;
defm V_MAD_I16 : VOP3_Real_vi <0x1ec>;
defm V_PERM_B32 : VOP3_Real_vi <0x1ed>;
defm V_FMA_F16 : VOP3_Real_vi <0x1ee>;
defm V_DIV_FIXUP_F16 : VOP3_Real_vi <0x1ef>;
defm V_MAD_F16 : VOP3_F16_Real_vi <0x1ea>;
defm V_MAD_U16 : VOP3_F16_Real_vi <0x1eb>;
defm V_MAD_I16 : VOP3_F16_Real_vi <0x1ec>;
defm V_FMA_F16 : VOP3_F16_Real_vi <0x1ee>;
defm V_DIV_FIXUP_F16 : VOP3_F16_Real_vi <0x1ef>;
defm V_MAD_LEGACY_F16 : VOP3_F16_Real_gfx9 <0x1ea, "V_MAD_F16", "v_mad_legacy_f16">;
defm V_MAD_LEGACY_U16 : VOP3_F16_Real_gfx9 <0x1eb, "V_MAD_U16", "v_mad_legacy_u16">;
defm V_MAD_LEGACY_I16 : VOP3_F16_Real_gfx9 <0x1ec, "V_MAD_I16", "v_mad_legacy_i16">;
defm V_FMA_LEGACY_F16 : VOP3_F16_Real_gfx9 <0x1ee, "V_FMA_F16", "v_fma_legacy_f16">;
defm V_DIV_FIXUP_LEGACY_F16 : VOP3_F16_Real_gfx9 <0x1ef, "V_DIV_FIXUP_F16", "v_div_fixup_legacy_f16">;
defm V_MAD_F16_gfx9 : VOP3_F16_Real_gfx9 <0x203, "V_MAD_F16_gfx9", "v_mad_f16">;
defm V_MAD_U16_gfx9 : VOP3_F16_Real_gfx9 <0x204, "V_MAD_U16_gfx9", "v_mad_u16">;
defm V_MAD_I16_gfx9 : VOP3_F16_Real_gfx9 <0x205, "V_MAD_I16_gfx9", "v_mad_i16">;
defm V_FMA_F16_gfx9 : VOP3_F16_Real_gfx9 <0x206, "V_FMA_F16_gfx9", "v_fma_f16">;
defm V_DIV_FIXUP_F16_gfx9 : VOP3_F16_Real_gfx9 <0x207, "V_DIV_FIXUP_F16_gfx9", "v_div_fixup_f16">;
defm V_INTERP_P1_F32_e64 : VOP3Interp_Real_vi <0x270>;
defm V_INTERP_P2_F32_e64 : VOP3Interp_Real_vi <0x271>;

View File

@ -190,3 +190,135 @@ v_sub_i16 v5, v1, v2 op_sel:[1,1,1]
v_sub_i16 v5, v1, v2 clamp
// GFX9: v_sub_i16 v5, v1, v2 clamp ; encoding: [0x05,0x80,0x9f,0xd2,0x01,0x05,0x02,0x00]
v_fma_f16_e64 v5, v1, v2, v3
// GFX9: v_fma_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x06,0xd2,0x01,0x05,0x0e,0x04]
v_fma_f16 v5, v1, -v2, v3
// GFX9: v_fma_f16 v5, v1, -v2, v3 ; encoding: [0x05,0x00,0x06,0xd2,0x01,0x05,0x0e,0x44]
v_fma_f16 v5, v1, v2, |v3|
// GFX9: v_fma_f16 v5, v1, v2, |v3| ; encoding: [0x05,0x04,0x06,0xd2,0x01,0x05,0x0e,0x04]
v_fma_f16 v5, v1, v2, v3 clamp
// GFX9: v_fma_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x06,0xd2,0x01,0x05,0x0e,0x04]
v_fma_legacy_f16_e64 v5, v1, v2, v3
// GFX9: v_fma_legacy_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0x04]
v_fma_legacy_f16 v5, -v1, v2, v3
// GFX9: v_fma_legacy_f16 v5, -v1, v2, v3 ; encoding: [0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0x24]
v_fma_legacy_f16 v5, v1, |v2|, v3
// GFX9: v_fma_legacy_f16 v5, v1, |v2|, v3 ; encoding: [0x05,0x02,0xee,0xd1,0x01,0x05,0x0e,0x04]
v_fma_legacy_f16 v5, v1, v2, v3 clamp
// GFX9: v_fma_legacy_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xee,0xd1,0x01,0x05,0x0e,0x04]
v_div_fixup_f16_e64 v5, 0.5, v2, v3
// GFX9: v_div_fixup_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x07,0xd2,0xf0,0x04,0x0e,0x04]
v_div_fixup_f16 v5, v1, 0.5, v3
// GFX9: v_div_fixup_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x07,0xd2,0x01,0xe1,0x0d,0x04]
v_div_fixup_f16 v5, v1, v2, 0.5
// GFX9: v_div_fixup_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x07,0xd2,0x01,0x05,0xc2,0x03]
v_div_fixup_f16 v5, -v1, v2, v3
// GFX9: v_div_fixup_f16 v5, -v1, v2, v3 ; encoding: [0x05,0x00,0x07,0xd2,0x01,0x05,0x0e,0x24]
v_div_fixup_f16 v5, |v1|, v2, v3
// GFX9: v_div_fixup_f16 v5, |v1|, v2, v3 ; encoding: [0x05,0x01,0x07,0xd2,0x01,0x05,0x0e,0x04]
v_div_fixup_f16 v5, v1, v2, v3 clamp
// GFX9: v_div_fixup_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x07,0xd2,0x01,0x05,0x0e,0x04]
v_div_fixup_legacy_f16_e64 v5, 0.5, v2, v3
// GFX9: v_div_fixup_legacy_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0xef,0xd1,0xf0,0x04,0x0e,0x04]
v_div_fixup_legacy_f16 v5, v1, 0.5, v3
// GFX9: v_div_fixup_legacy_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0xe1,0x0d,0x04]
v_div_fixup_legacy_f16 v5, v1, v2, 0.5
// GFX9: v_div_fixup_legacy_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0x05,0xc2,0x03]
v_div_fixup_legacy_f16 v5, -v1, v2, v3
// GFX9: v_div_fixup_legacy_f16 v5, -v1, v2, v3 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0x05,0x0e,0x24]
v_div_fixup_legacy_f16 v5, v1, |v2|, v3
// GFX9: v_div_fixup_legacy_f16 v5, v1, |v2|, v3 ; encoding: [0x05,0x02,0xef,0xd1,0x01,0x05,0x0e,0x04]
v_div_fixup_legacy_f16 v5, v1, v2, v3 clamp
// GFX9: v_div_fixup_legacy_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xef,0xd1,0x01,0x05,0x0e,0x04]
v_mad_f16_e64 v5, 0.5, v2, v3
// GFX9: v_mad_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x03,0xd2,0xf0,0x04,0x0e,0x04]
v_mad_f16 v5, v1, 0.5, v3
// GFX9: v_mad_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x03,0xd2,0x01,0xe1,0x0d,0x04]
v_mad_f16 v5, v1, v2, 0.5
// GFX9: v_mad_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x03,0xd2,0x01,0x05,0xc2,0x03]
v_mad_f16 v5, v1, v2, -v3
// GFX9: v_mad_f16 v5, v1, v2, -v3 ; encoding: [0x05,0x00,0x03,0xd2,0x01,0x05,0x0e,0x84]
v_mad_f16 v5, v1, v2, |v3|
// GFX9: v_mad_f16 v5, v1, v2, |v3| ; encoding: [0x05,0x04,0x03,0xd2,0x01,0x05,0x0e,0x04]
v_mad_f16 v5, v1, v2, v3 clamp
// GFX9: v_mad_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x03,0xd2,0x01,0x05,0x0e,0x04]
v_mad_i16_e64 v5, 0, v2, v3
// GFX9: v_mad_i16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x05,0xd2,0x80,0x04,0x0e,0x04]
v_mad_i16 v5, v1, -1, v3
// GFX9: v_mad_i16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x05,0xd2,0x01,0x83,0x0d,0x04]
v_mad_i16 v5, v1, v2, -4.0
// GFX9: v_mad_i16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x05,0xd2,0x01,0x05,0xde,0x03]
v_mad_legacy_f16_e64 v5, 0.5, v2, v3
// GFX9: v_mad_legacy_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0xea,0xd1,0xf0,0x04,0x0e,0x04]
v_mad_legacy_f16 v5, v1, 0.5, v3
// GFX9: v_mad_legacy_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0xe1,0x0d,0x04]
v_mad_legacy_f16 v5, v1, v2, 0.5
// GFX9: v_mad_legacy_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0x05,0xc2,0x03]
v_mad_legacy_f16 v5, v1, -v2, v3
// GFX9: v_mad_legacy_f16 v5, v1, -v2, v3 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0x05,0x0e,0x44]
v_mad_legacy_f16 v5, v1, |v2|, v3
// GFX9: v_mad_legacy_f16 v5, v1, |v2|, v3 ; encoding: [0x05,0x02,0xea,0xd1,0x01,0x05,0x0e,0x04]
v_mad_legacy_f16 v5, v1, v2, v3 clamp
// GFX9: v_mad_legacy_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xea,0xd1,0x01,0x05,0x0e,0x04]
v_mad_legacy_i16_e64 v5, 0, v2, v3
// GFX9: v_mad_legacy_i16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0xec,0xd1,0x80,0x04,0x0e,0x04]
v_mad_legacy_i16 v5, v1, -1, v3
// GFX9: v_mad_legacy_i16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0x83,0x0d,0x04]
v_mad_legacy_i16 v5, v1, v2, -4.0
// GFX9: v_mad_legacy_i16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0x05,0xde,0x03]
v_mad_legacy_u16_e64 v5, 0, v2, v3
// GFX9: v_mad_legacy_u16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0xeb,0xd1,0x80,0x04,0x0e,0x04]
v_mad_legacy_u16 v5, v1, -1, v3
// GFX9: v_mad_legacy_u16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0xeb,0xd1,0x01,0x83,0x0d,0x04]
v_mad_legacy_u16 v5, v1, v2, -4.0
// GFX9: v_mad_legacy_u16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0xeb,0xd1,0x01,0x05,0xde,0x03]
v_mad_u16_e64 v5, 0, v2, v3
// GFX9: v_mad_u16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x04,0xd2,0x80,0x04,0x0e,0x04]
v_mad_u16 v5, v1, -1, v3
// GFX9: v_mad_u16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x04,0xd2,0x01,0x83,0x0d,0x04]
v_mad_u16 v5, v1, v2, -4.0
// GFX9: v_mad_u16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x04,0xd2,0x01,0x05,0xde,0x03]

View File

@ -436,6 +436,88 @@ v_cubeid_f32 v0, |-1|, |-1.0|, |1.0|
// SICI: v_cubeid_f32 v0, |-1|, |-1.0|, |1.0| ; encoding: [0x00,0x07,0x88,0xd2,0xc1,0xe6,0xc9,0x03]
// VI: v_cubeid_f32 v0, |-1|, |-1.0|, |1.0| ; encoding: [0x00,0x07,0xc4,0xd1,0xc1,0xe6,0xc9,0x03]
///===---------------------------------------------------------------------===//
// VOP3 Legacy
///===---------------------------------------------------------------------===//
v_fma_f16_e64 v5, v1, v2, v3
// VI: v_fma_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0x04]
v_fma_f16 v5, v1, v2, 0.5
// VI: v_fma_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0xee,0xd1,0x01,0x05,0xc2,0x03]
v_fma_f16 v5, -v1, -v2, -v3
// VI: v_fma_f16 v5, -v1, -v2, -v3 ; encoding: [0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0xe4]
v_fma_f16 v5, |v1|, |v2|, |v3|
// VI: v_fma_f16 v5, |v1|, |v2|, |v3| ; encoding: [0x05,0x07,0xee,0xd1,0x01,0x05,0x0e,0x04]
v_fma_f16 v5, v1, v2, v3 clamp
// VI: v_fma_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xee,0xd1,0x01,0x05,0x0e,0x04]
v_div_fixup_f16_e64 v5, v1, v2, v3
// VI: v_div_fixup_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0x05,0x0e,0x04]
v_div_fixup_f16 v5, 0.5, v2, v3
// VI: v_div_fixup_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0xef,0xd1,0xf0,0x04,0x0e,0x04]
v_div_fixup_f16 v5, v1, 0.5, v3
// VI: v_div_fixup_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0xe1,0x0d,0x04]
v_div_fixup_f16 v5, v1, v2, 0.5
// VI: v_div_fixup_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0x05,0xc2,0x03]
v_div_fixup_f16 v5, v1, v2, -4.0
// VI: v_div_fixup_f16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0x05,0xde,0x03]
v_div_fixup_f16 v5, -v1, v2, v3
// VI: v_div_fixup_f16 v5, -v1, v2, v3 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0x05,0x0e,0x24]
v_div_fixup_f16 v5, v1, |v2|, v3
// VI: v_div_fixup_f16 v5, v1, |v2|, v3 ; encoding: [0x05,0x02,0xef,0xd1,0x01,0x05,0x0e,0x04]
v_div_fixup_f16 v5, v1, v2, v3 clamp
// VI: v_div_fixup_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xef,0xd1,0x01,0x05,0x0e,0x04]
v_mad_f16_e64 v5, v1, v2, v3
// VI: v_mad_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0x05,0x0e,0x04]
v_mad_f16 v5, 0.5, v2, v3
// VI: v_mad_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0xea,0xd1,0xf0,0x04,0x0e,0x04]
v_mad_f16 v5, v1, 0.5, v3
// VI: v_mad_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0xe1,0x0d,0x04]
v_mad_f16 v5, v1, v2, 0.5
// VI: v_mad_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0x05,0xc2,0x03]
v_mad_f16 v5, v1, -v2, v3
// VI: v_mad_f16 v5, v1, -v2, v3 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0x05,0x0e,0x44]
v_mad_f16 v5, v1, v2, |v3|
// VI: v_mad_f16 v5, v1, v2, |v3| ; encoding: [0x05,0x04,0xea,0xd1,0x01,0x05,0x0e,0x04]
v_mad_f16 v5, v1, v2, v3 clamp
// VI: v_mad_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xea,0xd1,0x01,0x05,0x0e,0x04]
v_mad_i16_e64 v5, -1, v2, v3
// VI: v_mad_i16 v5, -1, v2, v3 ; encoding: [0x05,0x00,0xec,0xd1,0xc1,0x04,0x0e,0x04]
v_mad_i16 v5, v1, -4.0, v3
// VI: v_mad_i16 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0xef,0x0d,0x04]
v_mad_i16 v5, v1, v2, 0
// VI: v_mad_i16 v5, v1, v2, 0 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0x05,0x02,0x02]
v_mad_u16_e64 v5, -1, v2, v3
// VI: v_mad_u16 v5, -1, v2, v3 ; encoding: [0x05,0x00,0xeb,0xd1,0xc1,0x04,0x0e,0x04]
v_mad_u16 v5, v1, 0, v3
// VI: v_mad_u16 v5, v1, 0, v3 ; encoding: [0x05,0x00,0xeb,0xd1,0x01,0x01,0x0d,0x04]
v_mad_u16 v5, v1, v2, -4.0
// VI: v_mad_u16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0xeb,0xd1,0x01,0x05,0xde,0x03]
//
// v_interp*
//

View File

@ -0,0 +1,133 @@
# RUN: llvm-mc -arch=amdgcn -mcpu=gfx901 -disassemble -show-encoding < %s | FileCheck %s -check-prefix=GFX9
# GFX9: v_fma_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x06,0xd2,0x01,0x05,0x0e,0x04]
0x05,0x00,0x06,0xd2,0x01,0x05,0x0e,0x04
# GFX9: v_fma_f16 v5, -v1, v2, v3 ; encoding: [0x05,0x00,0x06,0xd2,0x01,0x05,0x0e,0x24]
0x05,0x00,0x06,0xd2,0x01,0x05,0x0e,0x24
# GFX9: v_fma_f16 v5, v1, |v2|, v3 ; encoding: [0x05,0x02,0x06,0xd2,0x01,0x05,0x0e,0x04]
0x05,0x02,0x06,0xd2,0x01,0x05,0x0e,0x04
# GFX9: v_fma_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x06,0xd2,0x01,0x05,0x0e,0x04]
0x05,0x80,0x06,0xd2,0x01,0x05,0x0e,0x04
# CHECK: v_fma_legacy_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0x04]
0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0x04
# CHECK: v_fma_legacy_f16 v5, v1, v2, -v3 ; encoding: [0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0x84]
0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0x84
# CHECK: v_fma_legacy_f16 v5, |v1|, v2, v3 ; encoding: [0x05,0x01,0xee,0xd1,0x01,0x05,0x0e,0x04]
0x05,0x01,0xee,0xd1,0x01,0x05,0x0e,0x04
# CHECK: v_fma_legacy_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xee,0xd1,0x01,0x05,0x0e,0x04]
0x05,0x80,0xee,0xd1,0x01,0x05,0x0e,0x04
# CHECK: v_div_fixup_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x07,0xd2,0xf0,0x04,0x0e,0x04]
0x05,0x00,0x07,0xd2,0xf0,0x04,0x0e,0x04
# CHECK: v_div_fixup_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x07,0xd2,0x01,0xe1,0x0d,0x04]
0x05,0x00,0x07,0xd2,0x01,0xe1,0x0d,0x04
# CHECK: v_div_fixup_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x07,0xd2,0x01,0x05,0xc2,0x03]
0x05,0x00,0x07,0xd2,0x01,0x05,0xc2,0x03
# CHECK: v_div_fixup_f16 v5, -v1, -v2, -v3 ; encoding: [0x05,0x00,0x07,0xd2,0x01,0x05,0x0e,0xe4]
0x05,0x00,0x07,0xd2,0x01,0x05,0x0e,0xe4
# CHECK: v_div_fixup_f16 v5, |v1|, |v2|, |v3| ; encoding: [0x05,0x07,0x07,0xd2,0x01,0x05,0x0e,0x04]
0x05,0x07,0x07,0xd2,0x01,0x05,0x0e,0x04
# CHECK: v_div_fixup_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x07,0xd2,0x01,0x05,0x0e,0x04]
0x05,0x80,0x07,0xd2,0x01,0x05,0x0e,0x04
# CHECK: v_div_fixup_legacy_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0xef,0xd1,0xf0,0x04,0x0e,0x04]
0x05,0x00,0xef,0xd1,0xf0,0x04,0x0e,0x04
# CHECK: v_div_fixup_legacy_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0xe1,0x0d,0x04]
0x05,0x00,0xef,0xd1,0x01,0xe1,0x0d,0x04
# CHECK: v_div_fixup_legacy_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0x05,0xc2,0x03]
0x05,0x00,0xef,0xd1,0x01,0x05,0xc2,0x03
# CHECK: v_div_fixup_legacy_f16 v5, -v1, -v2, -v3 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0x05,0x0e,0xe4]
0x05,0x00,0xef,0xd1,0x01,0x05,0x0e,0xe4
# CHECK: v_div_fixup_legacy_f16 v5, |v1|, |v2|, |v3| ; encoding: [0x05,0x07,0xef,0xd1,0x01,0x05,0x0e,0x04]
0x05,0x07,0xef,0xd1,0x01,0x05,0x0e,0x04
# CHECK: v_div_fixup_legacy_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xef,0xd1,0x01,0x05,0x0e,0x04]
0x05,0x80,0xef,0xd1,0x01,0x05,0x0e,0x04
# CHECK: v_mad_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x03,0xd2,0xf0,0x04,0x0e,0x04]
0x05,0x00,0x03,0xd2,0xf0,0x04,0x0e,0x04
# CHECK: v_mad_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x03,0xd2,0x01,0xe1,0x0d,0x04]
0x05,0x00,0x03,0xd2,0x01,0xe1,0x0d,0x04
# CHECK: v_mad_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x03,0xd2,0x01,0x05,0xc2,0x03]
0x05,0x00,0x03,0xd2,0x01,0x05,0xc2,0x03
# CHECK: v_mad_f16 v5, -v1, -v2, -v3 ; encoding: [0x05,0x00,0x03,0xd2,0x01,0x05,0x0e,0xe4]
0x05,0x00,0x03,0xd2,0x01,0x05,0x0e,0xe4
# CHECK: v_mad_f16 v5, |v1|, |v2|, |v3| ; encoding: [0x05,0x07,0x03,0xd2,0x01,0x05,0x0e,0x04]
0x05,0x07,0x03,0xd2,0x01,0x05,0x0e,0x04
# CHECK: v_mad_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x03,0xd2,0x01,0x05,0x0e,0x04]
0x05,0x80,0x03,0xd2,0x01,0x05,0x0e,0x04
# CHECK: v_mad_i16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x05,0xd2,0x80,0x04,0x0e,0x04]
0x05,0x00,0x05,0xd2,0x80,0x04,0x0e,0x04
# CHECK: v_mad_i16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x05,0xd2,0x01,0x83,0x0d,0x04]
0x05,0x00,0x05,0xd2,0x01,0x83,0x0d,0x04
# CHECK: v_mad_i16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x05,0xd2,0x01,0x05,0xde,0x03]
0x05,0x00,0x05,0xd2,0x01,0x05,0xde,0x03
# CHECK: v_mad_legacy_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0xea,0xd1,0xf0,0x04,0x0e,0x04]
0x05,0x00,0xea,0xd1,0xf0,0x04,0x0e,0x04
# CHECK: v_mad_legacy_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0xe1,0x0d,0x04]
0x05,0x00,0xea,0xd1,0x01,0xe1,0x0d,0x04
# CHECK: v_mad_legacy_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0x05,0xc2,0x03]
0x05,0x00,0xea,0xd1,0x01,0x05,0xc2,0x03
# CHECK: v_mad_legacy_f16 v5, -v1, -v2, -v3 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0x05,0x0e,0xe4]
0x05,0x00,0xea,0xd1,0x01,0x05,0x0e,0xe4
# CHECK: v_mad_legacy_f16 v5, |v1|, |v2|, |v3| ; encoding: [0x05,0x07,0xea,0xd1,0x01,0x05,0x0e,0x04]
0x05,0x07,0xea,0xd1,0x01,0x05,0x0e,0x04
# CHECK: v_mad_legacy_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xea,0xd1,0x01,0x05,0x0e,0x04]
0x05,0x80,0xea,0xd1,0x01,0x05,0x0e,0x04
# CHECK: v_mad_legacy_i16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0xec,0xd1,0x80,0x04,0x0e,0x04]
0x05,0x00,0xec,0xd1,0x80,0x04,0x0e,0x04
# CHECK: v_mad_legacy_i16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0x83,0x0d,0x04]
0x05,0x00,0xec,0xd1,0x01,0x83,0x0d,0x04
# CHECK: v_mad_legacy_i16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0x05,0xde,0x03]
0x05,0x00,0xec,0xd1,0x01,0x05,0xde,0x03
# CHECK: v_mad_legacy_u16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0xeb,0xd1,0x80,0x04,0x0e,0x04]
0x05,0x00,0xeb,0xd1,0x80,0x04,0x0e,0x04
# CHECK: v_mad_legacy_u16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0xeb,0xd1,0x01,0x83,0x0d,0x04]
0x05,0x00,0xeb,0xd1,0x01,0x83,0x0d,0x04
# CHECK: v_mad_legacy_u16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0xeb,0xd1,0x01,0x05,0xde,0x03]
0x05,0x00,0xeb,0xd1,0x01,0x05,0xde,0x03
# CHECK: v_mad_u16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x04,0xd2,0x80,0x04,0x0e,0x04]
0x05,0x00,0x04,0xd2,0x80,0x04,0x0e,0x04
# CHECK: v_mad_u16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x04,0xd2,0x01,0x83,0x0d,0x04]
0x05,0x00,0x04,0xd2,0x01,0x83,0x0d,0x04
# CHECK: v_mad_u16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x04,0xd2,0x01,0x05,0xde,0x03]
0x05,0x00,0x04,0xd2,0x01,0x05,0xde,0x03

View File

@ -240,6 +240,72 @@
# VI: v_ceil_f32_e64 v0, neg(-1.0) ; encoding: [0x00,0x00,0x5d,0xd1,0xf3,0x00,0x00,0x20]
0x00,0x00,0x5d,0xd1,0xf3,0x00,0x00,0x20
# VI: v_fma_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0x04]
0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0x04
# VI: v_fma_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0xee,0xd1,0xf0,0x04,0x0e,0x04]
0x05,0x00,0xee,0xd1,0xf0,0x04,0x0e,0x04
# VI: v_fma_f16 v5, v1, v2, |v3| ; encoding: [0x05,0x04,0xee,0xd1,0x01,0x05,0x0e,0x04]
0x05,0x04,0xee,0xd1,0x01,0x05,0x0e,0x04
# VI: v_fma_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xee,0xd1,0x01,0x05,0x0e,0x04]
0x05,0x80,0xee,0xd1,0x01,0x05,0x0e,0x04
# CHECK: v_div_fixup_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0xef,0xd1,0xf0,0x04,0x0e,0x04]
0x05,0x00,0xef,0xd1,0xf0,0x04,0x0e,0x04
# CHECK: v_div_fixup_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0xe1,0x0d,0x04]
0x05,0x00,0xef,0xd1,0x01,0xe1,0x0d,0x04
# CHECK: v_div_fixup_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0x05,0xc2,0x03]
0x05,0x00,0xef,0xd1,0x01,0x05,0xc2,0x03
# CHECK: v_div_fixup_f16 v5, -v1, -v2, -v3 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0x05,0x0e,0xe4]
0x05,0x00,0xef,0xd1,0x01,0x05,0x0e,0xe4
# CHECK: v_div_fixup_f16 v5, |v1|, |v2|, |v3| ; encoding: [0x05,0x07,0xef,0xd1,0x01,0x05,0x0e,0x04]
0x05,0x07,0xef,0xd1,0x01,0x05,0x0e,0x04
# CHECK: v_div_fixup_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xef,0xd1,0x01,0x05,0x0e,0x04]
0x05,0x80,0xef,0xd1,0x01,0x05,0x0e,0x04
# CHECK: v_mad_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0xea,0xd1,0xf0,0x04,0x0e,0x04]
0x05,0x00,0xea,0xd1,0xf0,0x04,0x0e,0x04
# CHECK: v_mad_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0xe1,0x0d,0x04]
0x05,0x00,0xea,0xd1,0x01,0xe1,0x0d,0x04
# CHECK: v_mad_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0x05,0xc2,0x03]
0x05,0x00,0xea,0xd1,0x01,0x05,0xc2,0x03
# CHECK: v_mad_f16 v5, -v1, -v2, -v3 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0x05,0x0e,0xe4]
0x05,0x00,0xea,0xd1,0x01,0x05,0x0e,0xe4
# CHECK: v_mad_f16 v5, |v1|, |v2|, |v3| ; encoding: [0x05,0x07,0xea,0xd1,0x01,0x05,0x0e,0x04]
0x05,0x07,0xea,0xd1,0x01,0x05,0x0e,0x04
# CHECK: v_mad_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xea,0xd1,0x01,0x05,0x0e,0x04]
0x05,0x80,0xea,0xd1,0x01,0x05,0x0e,0x04
# CHECK: v_mad_i16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0xec,0xd1,0xf0,0x04,0x0e,0x04]
0x05,0x00,0xec,0xd1,0xf0,0x04,0x0e,0x04
# CHECK: v_mad_i16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0xe1,0x0d,0x04]
0x05,0x00,0xec,0xd1,0x01,0xe1,0x0d,0x04
# CHECK: v_mad_i16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0x05,0xc2,0x03]
0x05,0x00,0xec,0xd1,0x01,0x05,0xc2,0x03
# CHECK: v_mad_u16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0xeb,0xd1,0x80,0x04,0x0e,0x04]
0x05,0x00,0xeb,0xd1,0x80,0x04,0x0e,0x04
# CHECK: v_mad_u16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0xeb,0xd1,0x01,0x83,0x0d,0x04]
0x05,0x00,0xeb,0xd1,0x01,0x83,0x0d,0x04
# CHECK: v_mad_u16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0xeb,0xd1,0x01,0x05,0xde,0x03]
0x05,0x00,0xeb,0xd1,0x01,0x05,0xde,0x03
# VI: v_interp_mov_f32_e64 v5, p10, attr0.x ; encoding: [0x05,0x00,0x72,0xd2,0x00,0x00,0x00,0x00]
0x05,0x00,0x72,0xd2,0x00,0x00,0x00,0x00