[AMDGPU][llvm-mc] v_cndmask_b32: src2 is mandatory; do not enforce VOP2 when src2 == VCC.

Another step for unification llvm assembler/disassembler with sp3.
Besides, CodeGen output is a bit improved, thus changes in CodeGen tests.
Assembler/Disassembler tests updated/added.

Differential Revision: http://reviews.llvm.org/D20796

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@271900 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Artem Tamazov 2016-06-06 15:23:43 +00:00
parent 683b746f24
commit 7049ac906c
9 changed files with 103 additions and 19 deletions

View File

@ -1475,6 +1475,19 @@ def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1);
}
// Read in from vcc or arbitrary SGPR
def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
let Src0RC32 = VCSrc_32; // See comment in def VOP2b_I32_I1_I32_I32_I1 above.
let Asm32 = "$vdst, $src0, $src1, vcc";
let Asm64 = "$vdst, $src0, $src1, $src2";
let Outs32 = (outs DstRC:$vdst);
let Outs64 = (outs DstRC:$vdst);
// Suppress src2 implied by type since the 32-bit encoding uses an
// implicit VCC use.
let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1);
}
class VOP3b_Profile<ValueType vt> : VOPProfile<[vt, vt, vt, vt]> {
let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
let Asm64 = "$vdst, $sdst, $src0_modifiers, $src1_modifiers, $src2_modifiers"#"$clamp"#"$omod";
@ -1516,11 +1529,6 @@ def VOPC_I1_F64_I32 : VOPC_Class_Profile<f64>;
def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>;
def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>;
def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>;
def VOP_CNDMASK : VOPProfile <[i32, i32, i32, untyped]> {
let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1);
let Ins64 = (ins Src0RC64:$src0, Src1RC64:$src1, SSrc_64:$src2);
let Asm64 = "$vdst, $src0, $src1, $src2";
}
def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>;
def VOP_MADAK : VOPProfile <[f32, f32, f32, f32]> {
@ -1863,6 +1871,26 @@ class VOP3b_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName,
let DisableDecoder = DisableVIDecoder;
}
class VOP3e_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName,
bit HasMods = 0, bit VOP3Only = 0> :
VOP3Common <outs, ins, asm, [], HasMods, VOP3Only>,
VOP3e <op>,
SIMCInstr<opName#"_e64", SISubtarget.SI> {
let AssemblerPredicates = [isSICI];
let DecoderNamespace = "SICI";
let DisableDecoder = DisableSIDecoder;
}
class VOP3e_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName,
bit HasMods = 0, bit VOP3Only = 0> :
VOP3Common <outs, ins, asm, [], HasMods, VOP3Only>,
VOP3e_vi <op>,
SIMCInstr <opName#"_e64", SISubtarget.VI> {
let AssemblerPredicates = [isVI];
let DecoderNamespace = "VI";
let DisableDecoder = DisableVIDecoder;
}
multiclass VOP3_m <vop op, dag outs, dag ins, string asm, list<dag> pattern,
string opName, int NumSrcArgs, bit HasMods = 1, bit VOP3Only = 0> {
@ -1941,6 +1969,19 @@ multiclass VOP3b_2_3_m <vop op, dag outs, dag ins, string asm,
VOP3DisableFields<1, useSrc2Input, HasMods>;
}
// Same as VOP3b_2_3_m but no 2nd destination (sdst), e.g. v_cndmask_b32.
multiclass VOP3e_2_3_m <vop op, dag outs, dag ins, string asm,
list<dag> pattern, string opName, string revOp,
bit HasMods = 1, bit useSrc2Input = 0, bit VOP3Only = 0> {
def "" : VOP3_Pseudo <outs, ins, pattern, opName, HasMods, VOP3Only>;
def _si : VOP3e_Real_si <op.SI3, outs, ins, asm, opName, HasMods, VOP3Only>,
VOP3DisableFields<1, useSrc2Input, HasMods>;
def _vi : VOP3e_Real_vi <op.VI3, outs, ins, asm, opName, HasMods, VOP3Only>,
VOP3DisableFields<1, useSrc2Input, HasMods>;
}
multiclass VOP3_C_m <vop op, dag outs, dag ins, string asm,
list<dag> pattern, string opName,
bit HasMods, bit defExec,
@ -2067,6 +2108,33 @@ multiclass VOP2InstSI <vop2 op, string opName, VOPProfile P,
opName, revOp, P.HasModifiers>;
}
multiclass VOP2e_Helper <vop2 op, string opName, VOPProfile p,
list<dag> pat32, list<dag> pat64,
string revOp, bit useSGPRInput> {
let SchedRW = [Write32Bit, WriteSALU] in {
let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in {
defm _e32 : VOP2_m <op, opName, p, pat32, revOp>;
}
defm _e64 : VOP3e_2_3_m <op, p.Outs64, p.Ins64, opName#p.Asm64, pat64,
opName, revOp, p.HasModifiers, useSGPRInput>;
}
}
multiclass VOP2eInst <vop2 op, string opName, VOPProfile P,
SDPatternOperator node = null_frag,
string revOp = opName> : VOP2e_Helper <
op, opName, P, [],
!if(P.HasModifiers,
[(set P.DstVT:$vdst,
(node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
i1:$clamp, i32:$omod)),
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
[(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
revOp, !eq(P.NumSrcArgs, 3)
>;
multiclass VOP2b_Helper <vop2 op, string opName, VOPProfile p,
list<dag> pat32, list<dag> pat64,
string revOp, bit useSGPRInput> {

View File

@ -1473,15 +1473,9 @@ defm V_INTERP_MOV_F32 : VINTRP_m <
// VOP2 Instructions
//===----------------------------------------------------------------------===//
multiclass V_CNDMASK <vop2 op, string name> {
defm _e32 : VOP2_m <op, name, VOP_CNDMASK, [], name>;
defm _e64 : VOP3_m <
op, VOP_CNDMASK.Outs, VOP_CNDMASK.Ins64,
name#!cast<string>(VOP_CNDMASK.Asm64), [], name, 3, 0>;
}
defm V_CNDMASK_B32 : V_CNDMASK<vop2<0x0>, "v_cndmask_b32">;
defm V_CNDMASK_B32 : VOP2eInst <vop2<0x0, 0x0>, "v_cndmask_b32",
VOP2e_I32_I32_I32_I1
>;
let isCommutable = 1 in {
defm V_ADD_F32 : VOP2Inst <vop2<0x3, 0x1>, "v_add_f32",

View File

@ -25,8 +25,7 @@ declare <16 x double> @llvm.ceil.v16f64(<16 x double>) nounwind readnone
; SI-DAG: cndmask_b32
; SI-DAG: v_cmp_lt_f64
; SI-DAG: v_cmp_lg_f64
; SI-DAG: s_and_b64
; SI: v_cndmask_b32
; SI-DAG: v_cndmask_b32
; SI: v_cndmask_b32
; SI: v_add_f64
; SI: s_endpgm

View File

@ -25,7 +25,7 @@ define void @s_sint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 %in) #0 {
; GCN-DAG: v_cmp_lt_u64
; GCN: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
; GCN: v_cndmask_b32_e32 [[SIGN_SEL:v[0-9]+]],
; GCN: v_cndmask_b32_e{{32|64}} [[SIGN_SEL:v[0-9]+]],
; GCN: {{buffer|flat}}_store_dword {{.*}}[[SIGN_SEL]]
define void @v_sint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()

View File

@ -8,6 +8,9 @@
v_mul_i32_i24 v1, v2, 100
// CHECK: error: invalid operand for instruction
v_cndmask_b32 v1, v2, v3
// CHECK: error: too few operands for instruction
//===----------------------------------------------------------------------===//
// _e32 checks
//===----------------------------------------------------------------------===//
@ -20,6 +23,9 @@ v_mul_i32_i24_e32 v1, v2, 100
v_mul_i32_i24_e32 v1, v2, s3
// CHECK: error: invalid operand for instruction
v_cndmask_b32_e32 v1, v2, v3, s[0:1]
// CHECK: error: invalid operand for instruction
//===----------------------------------------------------------------------===//
// _e64 checks
//===----------------------------------------------------------------------===//

View File

@ -98,8 +98,11 @@ v_mul_i32_i24 v1, 3, s3
// Instructions
//===----------------------------------------------------------------------===//
// GCN: v_cndmask_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x00]
v_cndmask_b32 v1, v2, v3
// GCN: v_cndmask_b32_e32 v1, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x00]
v_cndmask_b32 v1, v2, v3, vcc
// GCN: v_cndmask_b32_e32 v1, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x00]
v_cndmask_b32_e32 v1, v2, v3, vcc
// SICI: v_readlane_b32 s1, v2, s3 ; encoding: [0x02,0x07,0x02,0x02]
// VI: v_readlane_b32 s1, v2, s3 ; encoding: [0x01,0x00,0x89,0xd2,0x02,0x07,0x00,0x00]

View File

@ -202,6 +202,14 @@ v_cndmask_b32 v1, v3, v5, s[4:5]
// SICI: v_cndmask_b32_e64 v1, v3, v5, s[4:5] ; encoding: [0x01,0x00,0x00,0xd2,0x03,0x0b,0x12,0x00]
// VI: v_cndmask_b32_e64 v1, v3, v5, s[4:5] ; encoding: [0x01,0x00,0x00,0xd1,0x03,0x0b,0x12,0x00]
v_cndmask_b32_e64 v1, v3, v5, s[4:5]
// SICI: v_cndmask_b32_e64 v1, v3, v5, s[4:5] ; encoding: [0x01,0x00,0x00,0xd2,0x03,0x0b,0x12,0x00]
// VI: v_cndmask_b32_e64 v1, v3, v5, s[4:5] ; encoding: [0x01,0x00,0x00,0xd1,0x03,0x0b,0x12,0x00]
v_cndmask_b32_e64 v1, v3, v5, vcc
// SICI: v_cndmask_b32_e64 v1, v3, v5, vcc ; encoding: [0x01,0x00,0x00,0xd2,0x03,0x0b,0xaa,0x01]
// VI: v_cndmask_b32_e64 v1, v3, v5, vcc ; encoding: [0x01,0x00,0x00,0xd1,0x03,0x0b,0xaa,0x01]
//TODO: readlane, writelane
v_add_f32 v1, v3, s5

View File

@ -1,5 +1,8 @@
# RUN: llvm-mc -arch=amdgcn -mcpu=tonga -disassemble -show-encoding < %s | FileCheck %s -check-prefix=VI
# VI: v_cndmask_b32_e32 v1, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x00]
0x02 0x07 0x02 0x00
# VI: v_readlane_b32 s1, v2, s3 ; encoding: [0x01,0x00,0x89,0xd2,0x02,0x07,0x00,0x00]
0x01 0x00 0x89 0xd2 0x02 0x07 0x00 0x00

View File

@ -111,6 +111,9 @@
# VI: v_cndmask_b32_e64 v1, v3, v5, s[4:5] ; encoding: [0x01,0x00,0x00,0xd1,0x03,0x0b,0x12,0x00]
0x01 0x00 0x00 0xd1 0x03 0x0b 0x12 0x00
# VI: v_cndmask_b32_e64 v1, v3, v5, vcc ; encoding: [0x01,0x00,0x00,0xd1,0x03,0x0b,0xaa,0x01]
0x01 0x00 0x00 0xd1 0x03 0x0b 0xaa 0x01
# VI: v_add_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x01,0xd1,0x03,0x0b,0x00,0x00]
0x01 0x00 0x01 0xd1 0x03 0x0b 0x00 0x00