[AMDGPU][MC] Fix for Bug 28207 + LIT tests

Enabled clamp and omod for v_cvt_* opcodes which have src0 of an integer type

Reviewers: vpykhtin, arsenm

Differential Revision: https://reviews.llvm.org/D31327

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@298852 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Dmitry Preobrazhensky 2017-03-27 15:57:17 +00:00
parent c7506ed124
commit cb5431a931
6 changed files with 226 additions and 18 deletions

View File

@ -162,6 +162,9 @@ private:
SDValue &Clamp,
SDValue &Omod) const;
bool SelectVOP3OMods(SDValue In, SDValue &Src,
SDValue &Clamp, SDValue &Omod) const;
bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
SDValue &Clamp) const;
@ -1669,6 +1672,18 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
return SelectVOP3Mods(In, Src, SrcMods);
}
bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
SDValue &Clamp, SDValue &Omod) const {
Src = In;
SDLoc DL(In);
// FIXME: Handle Clamp and Omod
Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32);
Omod = CurDAG->getTargetConstant(0, DL, MVT::i32);
return true;
}
bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
unsigned Mods = 0;

View File

@ -1018,11 +1018,13 @@ public:
void cvtId(MCInst &Inst, const OperandVector &Operands);
void cvtVOP3_2_mod(MCInst &Inst, const OperandVector &Operands);
void cvtVOP3_omod(MCInst &Inst, const OperandVector &Operands);
void cvtVOP3Impl(MCInst &Inst,
const OperandVector &Operands,
OptionalImmIndexMap &OptionalIdx);
void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
void cvtVOP3OMod(MCInst &Inst, const OperandVector &Operands);
void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
void cvtMIMG(MCInst &Inst, const OperandVector &Operands);
@ -3678,6 +3680,15 @@ void AMDGPUAsmParser::cvtVOP3_2_mod(MCInst &Inst, const OperandVector &Operands)
}
}
void AMDGPUAsmParser::cvtVOP3_omod(MCInst &Inst, const OperandVector &Operands) {
uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
if (TSFlags & SIInstrFlags::VOP3) {
cvtVOP3OMod(Inst, Operands);
} else {
cvtId(Inst, Operands);
}
}
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
// 1. This operand is input modifiers
return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
@ -3737,6 +3748,28 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
}
}
void AMDGPUAsmParser::cvtVOP3OMod(MCInst &Inst, const OperandVector &Operands) {
OptionalImmIndexMap OptionalIdx;
unsigned I = 1;
const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
}
for (unsigned E = Operands.size(); I != E; ++I) {
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
if (Op.isMod()) {
OptionalIdx[Op.getImmTy()] = I;
} else {
Op.addRegOrImmOperands(Inst, 1);
}
}
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
}
void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
OptionalImmIndexMap OptIdx;

View File

@ -659,6 +659,8 @@ def VOP3NoMods : ComplexPattern<untyped, 2, "SelectVOP3NoMods">;
// VOP3Mods, but the input source is known to never be NaN.
def VOP3Mods_nnan : ComplexPattern<fAny, 2, "SelectVOP3Mods_NNaN">;
def VOP3OMods : ComplexPattern<untyped, 3, "SelectVOP3OMods">;
def VOP3PMods : ComplexPattern<untyped, 2, "SelectVOP3PMods">;
def VOP3PMods0 : ComplexPattern<untyped, 3, "SelectVOP3PMods0">;

View File

@ -85,10 +85,17 @@ class VOP1_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
}
class getVOP1Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies {
list<dag> ret = !if(P.HasModifiers,
[(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0,
i32:$src0_modifiers, i1:$clamp, i32:$omod))))],
[(set P.DstVT:$vdst, (node P.Src0VT:$src0))]);
list<dag> ret =
!if(P.HasModifiers,
[(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0,
i32:$src0_modifiers,
i1:$clamp, i32:$omod))))],
!if(P.HasOMod,
[(set P.DstVT:$vdst, (node (P.Src0VT (VOP3OMods P.Src0VT:$src0,
i1:$clamp, i32:$omod))))],
[(set P.DstVT:$vdst, (node P.Src0VT:$src0))]
)
);
}
multiclass VOP1Inst <string opName, VOPProfile P,
@ -98,6 +105,23 @@ multiclass VOP1Inst <string opName, VOPProfile P,
def _sdwa : VOP1_SDWA_Pseudo <opName, P>;
}
// Special profile for instructions which have clamp
// and output modifiers (but have no input modifiers)
class VOPProfileI2F<ValueType dstVt, ValueType srcVt> :
VOPProfile<[dstVt, srcVt, untyped, untyped]> {
let Ins64 = (ins Src0RC64:$src0, clampmod:$clamp, omod:$omod);
let Asm64 = "$vdst, $src0$clamp$omod";
let HasModifiers = 0;
let HasClamp = 1;
let HasOMod = 1;
}
def VOP1_F64_I32 : VOPProfileI2F <f64, i32>;
def VOP1_F32_I32 : VOPProfileI2F <f32, i32>;
def VOP1_F16_I16 : VOPProfileI2F <f16, i16>;
//===----------------------------------------------------------------------===//
// VOP1 Instructions
//===----------------------------------------------------------------------===//
@ -144,24 +168,24 @@ def V_READFIRSTLANE_B32 :
let SchedRW = [WriteQuarterRate32] in {
defm V_CVT_I32_F64 : VOP1Inst <"v_cvt_i32_f64", VOP_I32_F64, fp_to_sint>;
defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP_F64_I32, sint_to_fp>;
defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP_F32_I32, sint_to_fp>;
defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP_F32_I32, uint_to_fp>;
defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP1_F64_I32, sint_to_fp>;
defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP1_F32_I32, sint_to_fp>;
defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP1_F32_I32, uint_to_fp>;
defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32, fp_to_uint>;
defm V_CVT_I32_F32 : VOP1Inst <"v_cvt_i32_f32", VOP_I32_F32, fp_to_sint>;
defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, fpround>;
defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, fpextend>;
defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>;
defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>;
defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP_F32_I32>;
defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP1_F32_I32>;
defm V_CVT_F32_F64 : VOP1Inst <"v_cvt_f32_f64", VOP_F32_F64, fpround>;
defm V_CVT_F64_F32 : VOP1Inst <"v_cvt_f64_f32", VOP_F64_F32, fpextend>;
defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP_F32_I32, AMDGPUcvt_f32_ubyte0>;
defm V_CVT_F32_UBYTE1 : VOP1Inst <"v_cvt_f32_ubyte1", VOP_F32_I32, AMDGPUcvt_f32_ubyte1>;
defm V_CVT_F32_UBYTE2 : VOP1Inst <"v_cvt_f32_ubyte2", VOP_F32_I32, AMDGPUcvt_f32_ubyte2>;
defm V_CVT_F32_UBYTE3 : VOP1Inst <"v_cvt_f32_ubyte3", VOP_F32_I32, AMDGPUcvt_f32_ubyte3>;
defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP1_F32_I32, AMDGPUcvt_f32_ubyte0>;
defm V_CVT_F32_UBYTE1 : VOP1Inst <"v_cvt_f32_ubyte1", VOP1_F32_I32, AMDGPUcvt_f32_ubyte1>;
defm V_CVT_F32_UBYTE2 : VOP1Inst <"v_cvt_f32_ubyte2", VOP1_F32_I32, AMDGPUcvt_f32_ubyte2>;
defm V_CVT_F32_UBYTE3 : VOP1Inst <"v_cvt_f32_ubyte3", VOP1_F32_I32, AMDGPUcvt_f32_ubyte3>;
defm V_CVT_U32_F64 : VOP1Inst <"v_cvt_u32_f64", VOP_I32_F64, fp_to_uint>;
defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP_F64_I32, uint_to_fp>;
defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP1_F64_I32, uint_to_fp>;
} // End SchedRW = [WriteQuarterRate32]
defm V_FRACT_F32 : VOP1Inst <"v_fract_f32", VOP_F32_F32, AMDGPUfract>;
@ -299,8 +323,8 @@ defm V_EXP_LEGACY_F32 : VOP1Inst <"v_exp_legacy_f32", VOP_F32_F32>;
let SubtargetPredicate = isVI in {
defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP_F16_I16, uint_to_fp>;
defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP_F16_I16, sint_to_fp>;
defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP1_F16_I16, uint_to_fp>;
defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP1_F16_I16, sint_to_fp>;
defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16, fp_to_uint>;
defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16, fp_to_sint>;
defm V_RCP_F16 : VOP1Inst <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>;

View File

@ -107,8 +107,12 @@ class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern = [],
let AsmVariantName = AMDGPUAsmVariants.VOP3;
let AsmMatchConverter =
!if(!eq(VOP3Only,1),
!if(!and(P.IsPacked, isVOP3P), "cvtVOP3P", "cvtVOP3"),
!if(!eq(P.HasModifiers, 1), "cvtVOP3_2_mod", ""));
!if(!and(P.IsPacked, isVOP3P), "cvtVOP3P", "cvtVOP3"),
!if(!eq(P.HasModifiers, 1),
"cvtVOP3_2_mod",
!if(!eq(P.HasOMod, 1), "cvtVOP3OMod", "")
)
);
VOPProfile Pfl = P;
}

View File

@ -255,4 +255,134 @@ v_cubeid_f32 v0, s0, s0, neg(0x3e22f983)
// CHECK: [0x00,0x00,0xc4,0xd1,0x00,0x00,0xe0,0x83]
v_cubeid_f32 v0, s0, s0, abs(0x3e22f983)
// CHECK: [0x00,0x04,0xc4,0xd1,0x00,0x00,0xe0,0x03]
// CHECK: [0x00,0x04,0xc4,0xd1,0x00,0x00,0xe0,0x03]
//---------------------------------------------------------------------------//
// VOP3 Instructions without Input Modifiers but with Output Modifiers
//---------------------------------------------------------------------------//
v_cvt_f64_i32_e64 v[5:6], s1 clamp
// CHECK: [0x05,0x80,0x44,0xd1,0x01,0x00,0x00,0x00]
v_cvt_f64_i32_e64 v[5:6], s1 mul:2
// CHECK: [0x05,0x00,0x44,0xd1,0x01,0x00,0x00,0x08]
v_cvt_f64_i32_e64 v[5:6], s1 mul:4
// CHECK: [0x05,0x00,0x44,0xd1,0x01,0x00,0x00,0x10]
v_cvt_f64_i32_e64 v[5:6], s1 div:2
// CHECK: [0x05,0x00,0x44,0xd1,0x01,0x00,0x00,0x18]
v_cvt_f64_u32_e64 v[5:6], s1 clamp
// CHECK: [0x05,0x80,0x56,0xd1,0x01,0x00,0x00,0x00]
v_cvt_f64_u32_e64 v[5:6], s1 mul:2
// CHECK: [0x05,0x00,0x56,0xd1,0x01,0x00,0x00,0x08]
v_cvt_f64_u32_e64 v[5:6], s1 mul:4
// CHECK: [0x05,0x00,0x56,0xd1,0x01,0x00,0x00,0x10]
v_cvt_f64_u32_e64 v[5:6], s1 div:2
// CHECK: [0x05,0x00,0x56,0xd1,0x01,0x00,0x00,0x18]
v_cvt_f32_i32_e64 v5, s1 clamp
// CHECK: [0x05,0x80,0x45,0xd1,0x01,0x00,0x00,0x00]
v_cvt_f32_i32_e64 v5, s1 mul:2
// CHECK: [0x05,0x00,0x45,0xd1,0x01,0x00,0x00,0x08]
v_cvt_f32_i32_e64 v5, s1 mul:4
// CHECK: [0x05,0x00,0x45,0xd1,0x01,0x00,0x00,0x10]
v_cvt_f32_i32_e64 v5, s1 div:2
// CHECK: [0x05,0x00,0x45,0xd1,0x01,0x00,0x00,0x18]
v_cvt_f32_u32_e64 v5, s1 clamp
// CHECK: [0x05,0x80,0x46,0xd1,0x01,0x00,0x00,0x00]
v_cvt_f32_u32_e64 v5, s1 mul:2
// CHECK: [0x05,0x00,0x46,0xd1,0x01,0x00,0x00,0x08]
v_cvt_f32_u32_e64 v5, s1 mul:4
// CHECK: [0x05,0x00,0x46,0xd1,0x01,0x00,0x00,0x10]
v_cvt_f32_u32_e64 v5, s1 div:2
// CHECK: [0x05,0x00,0x46,0xd1,0x01,0x00,0x00,0x18]
v_cvt_off_f32_i4_e64 v5, s1 clamp
// CHECK: [0x05,0x80,0x4e,0xd1,0x01,0x00,0x00,0x00]
v_cvt_off_f32_i4_e64 v5, s1 mul:2
// CHECK: [0x05,0x00,0x4e,0xd1,0x01,0x00,0x00,0x08]
v_cvt_off_f32_i4_e64 v5, s1 mul:4
// CHECK: [0x05,0x00,0x4e,0xd1,0x01,0x00,0x00,0x10]
v_cvt_off_f32_i4_e64 v5, s1 div:2
// CHECK: [0x05,0x00,0x4e,0xd1,0x01,0x00,0x00,0x18]
v_cvt_f32_ubyte0_e64 v5, s1 clamp
// CHECK: [0x05,0x80,0x51,0xd1,0x01,0x00,0x00,0x00]
v_cvt_f32_ubyte0_e64 v5, s1 mul:2
// CHECK: [0x05,0x00,0x51,0xd1,0x01,0x00,0x00,0x08]
v_cvt_f32_ubyte0_e64 v5, s1 mul:4
// CHECK: [0x05,0x00,0x51,0xd1,0x01,0x00,0x00,0x10]
v_cvt_f32_ubyte0_e64 v5, s1 div:2
// CHECK: [0x05,0x00,0x51,0xd1,0x01,0x00,0x00,0x18]
v_cvt_f32_ubyte1_e64 v5, s1 clamp
// CHECK: [0x05,0x80,0x52,0xd1,0x01,0x00,0x00,0x00]
v_cvt_f32_ubyte1_e64 v5, s1 mul:2
// CHECK: [0x05,0x00,0x52,0xd1,0x01,0x00,0x00,0x08]
v_cvt_f32_ubyte1_e64 v5, s1 mul:4
// CHECK: [0x05,0x00,0x52,0xd1,0x01,0x00,0x00,0x10]
v_cvt_f32_ubyte1_e64 v5, s1 div:2
// CHECK: [0x05,0x00,0x52,0xd1,0x01,0x00,0x00,0x18]
v_cvt_f32_ubyte2_e64 v5, s1 clamp
// CHECK: [0x05,0x80,0x53,0xd1,0x01,0x00,0x00,0x00]
v_cvt_f32_ubyte2_e64 v5, s1 mul:2
// CHECK: [0x05,0x00,0x53,0xd1,0x01,0x00,0x00,0x08]
v_cvt_f32_ubyte2_e64 v5, s1 mul:4
// CHECK: [0x05,0x00,0x53,0xd1,0x01,0x00,0x00,0x10]
v_cvt_f32_ubyte2_e64 v5, s1 div:2
// CHECK: [0x05,0x00,0x53,0xd1,0x01,0x00,0x00,0x18]
v_cvt_f32_ubyte3_e64 v5, s1 clamp
// CHECK: [0x05,0x80,0x54,0xd1,0x01,0x00,0x00,0x00]
v_cvt_f32_ubyte3_e64 v5, s1 mul:2
// CHECK: [0x05,0x00,0x54,0xd1,0x01,0x00,0x00,0x08]
v_cvt_f32_ubyte3_e64 v5, s1 mul:4
// CHECK: [0x05,0x00,0x54,0xd1,0x01,0x00,0x00,0x10]
v_cvt_f32_ubyte3_e64 v5, s1 div:2
// CHECK: [0x05,0x00,0x54,0xd1,0x01,0x00,0x00,0x18]
// NB: output modifiers are not supported for f16
v_cvt_f16_i16_e64 v5, s1 clamp
// CHECK: [0x05,0x80,0x7a,0xd1,0x01,0x00,0x00,0x00]
// NB: output modifiers are not supported for f16
v_cvt_f16_u16_e64 v5, s1 clamp
// CHECK: [0x05,0x80,0x79,0xd1,0x01,0x00,0x00,0x00]