[AMDGPU] Assembler: Custom converters for SDWA instructions. Support for _dpp and _sdwa suffixes in mnemonics.

Summary:
Added custom converters for SDWA instruction to support optional operands and modifiers.
Support for _dpp and _sdwa suffixes that allows to force DPP or SDWA encoding for instructions.

Reviewers: tstellarAMD, vpykhtin, artem.tamazov

Subscribers: arsenm, kzhuravl

Differential Revision: http://reviews.llvm.org/D20625

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@271655 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Sam Kolton 2016-06-03 10:27:37 +00:00
parent 2306053194
commit 9b4f7ea1b7
3 changed files with 177 additions and 65 deletions

View File

@ -82,7 +82,9 @@ public:
ImmTyDppRowMask,
ImmTyDppBankMask,
ImmTyDppBoundCtrl,
ImmTySdwaSel,
ImmTySdwaDstSel,
ImmTySdwaSrc0Sel,
ImmTySdwaSrc1Sel,
ImmTySdwaDstUnused,
ImmTyDMask,
ImmTyUNorm,
@ -274,8 +276,16 @@ public:
return isImmTy(ImmTyDppBoundCtrl);
}
bool isSDWASel() const {
return isImmTy(ImmTySdwaSel);
bool isSDWADstSel() const {
return isImmTy(ImmTySdwaDstSel);
}
bool isSDWASrc0Sel() const {
return isImmTy(ImmTySdwaSrc0Sel);
}
bool isSDWASrc1Sel() const {
return isImmTy(ImmTySdwaSrc1Sel);
}
bool isSDWADstUnused() const {
@ -385,7 +395,9 @@ public:
case ImmTyDppRowMask: OS << "DppRowMask"; break;
case ImmTyDppBankMask: OS << "DppBankMask"; break;
case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
case ImmTySdwaSel: OS << "SdwaSel"; break;
case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
case ImmTyDMask: OS << "DMask"; break;
case ImmTyUNorm: OS << "UNorm"; break;
@ -479,6 +491,8 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
MCAsmParser &Parser;
unsigned ForcedEncodingSize;
bool ForcedDPP;
bool ForcedSDWA;
bool isSI() const {
return AMDGPU::isSI(getSTI());
@ -531,7 +545,9 @@ public:
const MCInstrInfo &MII,
const MCTargetOptions &Options)
: MCTargetAsmParser(Options, STI), MII(MII), Parser(_Parser),
ForcedEncodingSize(0) {
ForcedEncodingSize(0),
ForcedDPP(false),
ForcedSDWA(false) {
MCAsmParserExtension::Initialize(Parser);
if (getSTI().getFeatureBits().none()) {
@ -546,18 +562,15 @@ public:
MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
return static_cast<AMDGPUTargetStreamer &>(TS);
}
void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
unsigned getForcedEncodingSize() const {
return ForcedEncodingSize;
}
void setForcedEncodingSize(unsigned Size) {
ForcedEncodingSize = Size;
}
bool isForcedVOP3() const {
return ForcedEncodingSize == 64;
}
unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
bool isForcedDPP() const { return ForcedDPP; }
bool isForcedSDWA() const { return ForcedSDWA; }
std::unique_ptr<AMDGPUOperand> parseRegister();
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
@ -570,6 +583,7 @@ public:
bool MatchingInlineAsm) override;
bool ParseDirective(AsmToken DirectiveID) override;
OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic);
StringRef parseMnemonicSuffix(StringRef Name);
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
@ -580,7 +594,7 @@ public:
bool (*ConvertResult)(int64_t&) = 0);
OperandMatchResultTy parseNamedBit(const char *Name, OperandVector &Operands,
enum AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
OperandMatchResultTy parseStringWithPrefix(const char *Prefix, StringRef &Value);
OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, StringRef &Value);
OperandMatchResultTy parseImm(OperandVector &Operands);
OperandMatchResultTy parseRegOrImm(OperandVector &Operands);
@ -643,10 +657,20 @@ public:
AMDGPUOperand::Ptr defaultBoundCtrl() const;
void cvtDPP(MCInst &Inst, const OperandVector &Operands);
OperandMatchResultTy parseSDWASel(OperandVector &Operands);
OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
AMDGPUOperand::ImmTy Type);
OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
AMDGPUOperand::Ptr defaultSDWASel() const;
AMDGPUOperand::Ptr defaultSDWASel(AMDGPUOperand::ImmTy Type) const;
AMDGPUOperand::Ptr defaultSDWADstSel() const;
AMDGPUOperand::Ptr defaultSDWASrc0Sel() const;
AMDGPUOperand::Ptr defaultSDWASrc1Sel() const;
AMDGPUOperand::Ptr defaultSDWADstUnused() const;
void cvtSdwaVop1_mod(MCInst &Inst, const OperandVector &Operands);
void cvtSdwaVop1_nomod(MCInst &Inst, const OperandVector &Operands);
void cvtSdwaVop2_mod(MCInst &Inst, const OperandVector &Operands);
void cvtSdwaVop2_nomod(MCInst &Inst, const OperandVector &Operands);
void cvtSDWA(MCInst &Inst, const OperandVector &Operands, bool HasMods,
bool IsVOP1);
};
struct OptionalOperand {
@ -988,7 +1012,9 @@ unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
(getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)))
(getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
(isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
(isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
return Match_InvalidOperand;
if ((TSFlags & SIInstrFlags::VOP3) &&
@ -1323,25 +1349,35 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
return MatchOperand_NoMatch;
}
StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
// Clear any forced encodings from the previous instruction.
setForcedEncodingSize(0);
setForcedDPP(false);
setForcedSDWA(false);
if (Name.endswith("_e64")) {
setForcedEncodingSize(64);
return Name.substr(0, Name.size() - 4);
} else if (Name.endswith("_e32")) {
setForcedEncodingSize(32);
return Name.substr(0, Name.size() - 4);
} else if (Name.endswith("_dpp")) {
setForcedDPP(true);
return Name.substr(0, Name.size() - 4);
} else if (Name.endswith("_sdwa")) {
setForcedSDWA(true);
return Name.substr(0, Name.size() - 5);
}
return Name;
}
bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
StringRef Name,
SMLoc NameLoc, OperandVector &Operands) {
// Clear any forced encodings from the previous instruction.
setForcedEncodingSize(0);
if (Name.endswith("_e64"))
setForcedEncodingSize(64);
else if (Name.endswith("_e32"))
setForcedEncodingSize(32);
// Add the instruction mnemonic
Name = parseMnemonicSuffix(Name);
Operands.push_back(AMDGPUOperand::CreateToken(Name, NameLoc));
if (Name.endswith("_e64")) { Name = Name.substr(0, Name.size() - 4); }
if (Name.endswith("_e32")) { Name = Name.substr(0, Name.size() - 4); }
while (!getLexer().is(AsmToken::EndOfStatement)) {
AMDGPUAsmParser::OperandMatchResultTy Res = parseOperand(Operands, Name);
@ -1466,7 +1502,7 @@ void addOptionalImmOperand(MCInst& Inst, const OperandVector& Operands,
}
AMDGPUAsmParser::OperandMatchResultTy
AMDGPUAsmParser::parseStringWithPrefix(const char *Prefix, StringRef &Value) {
AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
if (getLexer().isNot(AsmToken::Identifier)) {
return MatchOperand_NoMatch;
}
@ -2196,7 +2232,9 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {
{"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
{"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
{"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
{"sdwa_sel", AMDGPUOperand::ImmTySdwaSel, false, nullptr},
{"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
{"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
{"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
{"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
};
@ -2208,8 +2246,10 @@ AMDGPUAsmParser::OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(Oper
res = parseNamedBit(Op.Name, Operands, Op.Type);
} else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
res = parseOModOperand(Operands);
} else if (Op.Type == AMDGPUOperand::ImmTySdwaSel) {
res = parseSDWASel(Operands);
} else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
res = parseSDWASel(Operands, Op.Name, Op.Type);
} else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
res = parseSDWADstUnused(Operands);
} else {
@ -2457,7 +2497,6 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
}
}
// ToDo: fix default values for row_mask and bank_mask
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
@ -2468,24 +2507,15 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
//===----------------------------------------------------------------------===//
AMDGPUAsmParser::OperandMatchResultTy
AMDGPUAsmParser::parseSDWASel(OperandVector &Operands) {
AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
AMDGPUOperand::ImmTy Type) {
SMLoc S = Parser.getTok().getLoc();
StringRef Value;
AMDGPUAsmParser::OperandMatchResultTy res;
res = parseStringWithPrefix("dst_sel", Value);
if (res == MatchOperand_ParseFail) {
return MatchOperand_ParseFail;
} else if (res == MatchOperand_NoMatch) {
res = parseStringWithPrefix("src0_sel", Value);
if (res == MatchOperand_ParseFail) {
return MatchOperand_ParseFail;
} else if (res == MatchOperand_NoMatch) {
res = parseStringWithPrefix("src1_sel", Value);
if (res != MatchOperand_Success) {
return res;
}
}
res = parseStringWithPrefix(Prefix, Value);
if (res != MatchOperand_Success) {
return res;
}
int64_t Int;
@ -2504,8 +2534,7 @@ AMDGPUAsmParser::parseSDWASel(OperandVector &Operands) {
return MatchOperand_ParseFail;
}
Operands.push_back(AMDGPUOperand::CreateImm(Int, S,
AMDGPUOperand::ImmTySdwaSel));
Operands.push_back(AMDGPUOperand::CreateImm(Int, S, Type));
return MatchOperand_Success;
}
@ -2537,14 +2566,85 @@ AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
return MatchOperand_Success;
}
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSDWASel() const {
return AMDGPUOperand::CreateImm(6, SMLoc(), AMDGPUOperand::ImmTySdwaSel);
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSDWASel(AMDGPUOperand::ImmTy Type) const {
return AMDGPUOperand::CreateImm(6, SMLoc(), Type);
}
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSDWADstSel() const {
return defaultSDWASel(AMDGPUOperand::ImmTySdwaDstSel);
}
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSDWASrc0Sel() const {
return defaultSDWASel(AMDGPUOperand::ImmTySdwaSrc0Sel);
}
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSDWASrc1Sel() const {
return defaultSDWASel(AMDGPUOperand::ImmTySdwaSrc1Sel);
}
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSDWADstUnused() const {
return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTySdwaDstUnused);
}
void AMDGPUAsmParser::cvtSdwaVop1_mod(MCInst &Inst, const OperandVector &Operands) {
cvtSDWA(Inst, Operands, true, true);
}
void AMDGPUAsmParser::cvtSdwaVop1_nomod(MCInst &Inst, const OperandVector &Operands) {
cvtSDWA(Inst, Operands, false, true);
}
void AMDGPUAsmParser::cvtSdwaVop2_mod(MCInst &Inst, const OperandVector &Operands) {
cvtSDWA(Inst, Operands, true, false);
}
void AMDGPUAsmParser::cvtSdwaVop2_nomod(MCInst &Inst, const OperandVector &Operands) {
cvtSDWA(Inst, Operands, false, false);
}
void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
bool HasMods, bool IsVOP1) {
OptionalImmIndexMap OptionalIdx;
unsigned I = 1;
const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
}
for (unsigned E = Operands.size(); I != E; ++I) {
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
// Add the register arguments
if (!HasMods && Op.isReg()) {
Op.addRegOperands(Inst, 1);
} else if (HasMods && Op.isRegOrImmWithInputMods()) {
Op.addRegOrImmWithInputModsOperands(Inst, 2);
} else if (Op.isImm()) {
// Handle optional arguments
OptionalIdx[Op.getImmTy()] = I;
} else {
llvm_unreachable("Invalid operand type");
}
}
if (HasMods) {
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
}
if (Inst.getOpcode() == AMDGPU::V_NOP_sdwa) {
// V_NOP_sdwa has no optional sdwa arguments
return;
}
if (IsVOP1) {
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, 6);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, 2);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, 6);
} else { // VOP2
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, 6);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, 2);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, 6);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, 6);
}
}
/// Force static initialization.
extern "C" void LLVMInitializeAMDGPUAsmParser() {

View File

@ -489,11 +489,9 @@ class NamedMatchClass<string CName, bit Optional = 1> : AsmOperandClass {
let ParserMethod = !if(Optional, "parseOptionalOperand", "parse"#CName);
let RenderMethod = "addImmOperands";
let IsOptional = Optional;
let DefaultMethod = "default"#CName;
let DefaultMethod = !if(Optional, "default"#CName, ?);
}
def sdwa_sel : NamedMatchClass<"SDWASel">;
class NamedOperandBit<string Name, AsmOperandClass MatchClass> : Operand<i1> {
let PrintMethod = "print"#Name;
let ParserMatchClass = MatchClass;
@ -547,9 +545,9 @@ def row_mask : NamedOperandU32<"RowMask", NamedMatchClass<"RowMask">>;
def bank_mask : NamedOperandU32<"BankMask", NamedMatchClass<"BankMask">>;
def bound_ctrl : NamedOperandBit<"BoundCtrl", NamedMatchClass<"BoundCtrl">>;
def dst_sel : NamedOperandU32<"SDWADstSel", sdwa_sel>;
def src0_sel : NamedOperandU32<"SDWASrc0Sel", sdwa_sel>;
def src1_sel : NamedOperandU32<"SDWASrc1Sel", sdwa_sel>;
def dst_sel : NamedOperandU32<"SDWADstSel", NamedMatchClass<"SDWADstSel">>;
def src0_sel : NamedOperandU32<"SDWASrc0Sel", NamedMatchClass<"SDWASrc0Sel">>;
def src1_sel : NamedOperandU32<"SDWASrc1Sel", NamedMatchClass<"SDWASrc1Sel">>;
def dst_unused : NamedOperandU32<"SDWADstUnused", NamedMatchClass<"SDWADstUnused">>;
def hwreg : NamedOperandU16<"Hwreg", NamedMatchClass<"Hwreg", 0>>;
@ -1337,7 +1335,7 @@ class getAsmSDWA <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT
" $src1_modifiers,"));
string args = !if(!eq(HasModifiers, 0),
getAsm32<0, NumSrcArgs, DstVT>.ret,
", "#src0#src1#", $clamp");
", "#src0#src1#"$clamp");
string sdwa = !if(!eq(NumSrcArgs, 0),
"",
!if(!eq(NumSrcArgs, 1),
@ -1679,7 +1677,7 @@ class SDWADisableFields <VOPProfile p> {
!if(!eq(p.NumSrcArgs, 1), 0,
!if(p.HasModifiers, ?, 0)));
bits<3> dst_sel = !if(p.HasDst, ?, 6);
bits<2> dst_unused = !if(p.HasDst, ?, 0);
bits<2> dst_unused = !if(p.HasDst, ?, 2);
bits<1> clamp = !if(p.HasModifiers, ?, 0);
}
@ -1687,6 +1685,7 @@ class VOP1_SDWA <vop1 op, string opName, VOPProfile p> :
VOP1_SDWAe <op.VI>,
VOP_SDWA <p.OutsSDWA, p.InsSDWA, opName#p.AsmSDWA, [], p.HasModifiers>,
SDWADisableFields <p> {
let AsmMatchConverter = !if(!eq(p.HasModifiers,1), "cvtSdwaVop1_mod", "cvtSdwaVop1_nomod");
let AssemblerPredicates = !if(p.HasExt, [isVI], [DisableInst]);
let DecoderNamespace = "SDWA";
let DisableDecoder = DisableVIDecoder;
@ -1760,6 +1759,7 @@ class VOP2_SDWA <vop2 op, string opName, VOPProfile p> :
VOP2_SDWAe <op.VI>,
VOP_SDWA <p.OutsSDWA, p.InsSDWA, opName#p.AsmSDWA, [], p.HasModifiers>,
SDWADisableFields <p> {
let AsmMatchConverter = !if(!eq(p.HasModifiers,1), "cvtSdwaVop2_mod", "cvtSdwaVop2_nomod");
let AssemblerPredicates = !if(p.HasExt, [isVI], [DisableInst]);
let DecoderNamespace = "SDWA";
let DisableDecoder = DisableVIDecoder;

View File

@ -38,3 +38,15 @@ v_min_u32 v200, v200, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_1 sr
// NOSICI: error:
// VI: v_min_u32_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x02,0x02,0x1c,0x01,0x06,0x00,0x06]
v_min_u32 v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
// NOSICI: error:
// VI: v_mov_b32_sdwa v0, v0 dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x02,0x00,0x7e,0x00,0x10,0x06,0x06]
v_mov_b32 v0, v0 dst_sel:BYTE_0 src0_sel:DWORD
// NOSICI: error:
// VI: v_mov_b32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x02,0x00,0x7e,0x00,0x16,0x06,0x06]
v_mov_b32 v0, v0 src0_sel:DWORD
// NOSICI: error:
// VI: v_mov_b32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x02,0x00,0x7e,0x00,0x16,0x06,0x06]
v_mov_b32_sdwa v0, v0