R600/SI: Use REG_SEQUENCE instead of INSERT_SUBREGs

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@221118 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Matt Arsenault 2014-11-02 23:46:54 +00:00
parent 2220408e1a
commit 37b154c175
4 changed files with 43 additions and 47 deletions

View File

@ -524,8 +524,9 @@ class DwordAddrPat<ValueType vt, RegisterClass rc> : Pat <
// BFI_INT patterns
multiclass BFIPatterns <Instruction BFI_INT, Instruction LoadImm32> {
multiclass BFIPatterns <Instruction BFI_INT,
Instruction LoadImm32,
RegisterClass RC64> {
// Definition from ISA doc:
// (y & x) | (z & ~x)
def : Pat <
@ -547,8 +548,8 @@ multiclass BFIPatterns <Instruction BFI_INT, Instruction LoadImm32> {
def : Pat <
(f64 (fcopysign f64:$src0, f64:$src1)),
(INSERT_SUBREG (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
(i32 (EXTRACT_SUBREG $src0, sub0)), sub0),
(REG_SEQUENCE RC64,
(i32 (EXTRACT_SUBREG $src0, sub0)), sub0,
(BFI_INT (LoadImm32 0x7fffffff),
(i32 (EXTRACT_SUBREG $src0, sub1)),
(i32 (EXTRACT_SUBREG $src1, sub1))), sub1)

View File

@ -302,7 +302,7 @@ def : Pat<(i32 (sext_inreg i32:$src, i8)),
def : Pat<(i32 (sext_inreg i32:$src, i16)),
(BFE_INT_eg i32:$src, (i32 ZERO), (MOV_IMM_I32 16))>;
defm : BFIPatterns <BFI_INT_eg, MOV_IMM_I32>;
defm : BFIPatterns <BFI_INT_eg, MOV_IMM_I32, R600_Reg64>;
def BFM_INT_eg : R600_2OP <0xA0, "BFM_INT",
[(set i32:$dst, (AMDGPUbfm i32:$src0, i32:$src1))],

View File

@ -1943,7 +1943,8 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
if (TII->isMIMG(Node->getMachineOpcode()))
adjustWritemask(Node, DAG);
if (Node->getMachineOpcode() == AMDGPU::INSERT_SUBREG) {
if (Node->getMachineOpcode() == AMDGPU::INSERT_SUBREG ||
Node->getMachineOpcode() == AMDGPU::REG_SEQUENCE) {
legalizeTargetIndependentNode(Node, DAG);
return Node;
}

View File

@ -2403,11 +2403,12 @@ def : Pat <
// FIXME: Should use S_OR_B32
def : Pat <
(fneg (fabs f64:$src)),
(f64 (INSERT_SUBREG
(INSERT_SUBREG (f64 (IMPLICIT_DEF)),
(i32 (EXTRACT_SUBREG f64:$src, sub0)), sub0),
(REG_SEQUENCE VReg_64,
(i32 (EXTRACT_SUBREG f64:$src, sub0)),
sub0,
(V_OR_B32_e32 (EXTRACT_SUBREG f64:$src, sub1),
(V_MOV_B32_e32 0x80000000)), sub1)) // Set sign bit.
(V_MOV_B32_e32 0x80000000)), // Set sign bit.
sub1)
>;
def : Pat <
@ -2422,20 +2423,22 @@ def : Pat <
def : Pat <
(fabs f64:$src),
(f64 (INSERT_SUBREG
(INSERT_SUBREG (f64 (IMPLICIT_DEF)),
(i32 (EXTRACT_SUBREG f64:$src, sub0)), sub0),
(REG_SEQUENCE VReg_64,
(i32 (EXTRACT_SUBREG f64:$src, sub0)),
sub0,
(V_AND_B32_e32 (EXTRACT_SUBREG f64:$src, sub1),
(V_MOV_B32_e32 0x7fffffff)), sub1)) // Set sign bit.
(V_MOV_B32_e32 0x7fffffff)), // Set sign bit.
sub1)
>;
def : Pat <
(fneg f64:$src),
(f64 (INSERT_SUBREG
(INSERT_SUBREG (f64 (IMPLICIT_DEF)),
(i32 (EXTRACT_SUBREG f64:$src, sub0)), sub0),
(REG_SEQUENCE VReg_64,
(i32 (EXTRACT_SUBREG f64:$src, sub0)),
sub0,
(V_XOR_B32_e32 (EXTRACT_SUBREG f64:$src, sub1),
(V_MOV_B32_e32 0x80000000)), sub1))
(V_MOV_B32_e32 0x80000000)),
sub1)
>;
/********** ================== **********/
@ -2505,27 +2508,23 @@ def : Pat<
def : Pat <
(int_AMDGPU_cube v4f32:$src),
(INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
(REG_SEQUENCE VReg_128,
(V_CUBETC_F32 0 /* src0_modifiers */, (EXTRACT_SUBREG $src, sub0),
0 /* src1_modifiers */, (EXTRACT_SUBREG $src, sub1),
0 /* src2_modifiers */, (EXTRACT_SUBREG $src, sub2),
0 /* clamp */, 0 /* omod */),
sub0),
0 /* clamp */, 0 /* omod */), sub0,
(V_CUBESC_F32 0 /* src0_modifiers */, (EXTRACT_SUBREG $src, sub0),
0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub1),
0 /* src2_modifiers */,(EXTRACT_SUBREG $src, sub2),
0 /* clamp */, 0 /* omod */),
sub1),
0 /* clamp */, 0 /* omod */), sub1,
(V_CUBEMA_F32 0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub0),
0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub1),
0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub2),
0 /* clamp */, 0 /* omod */),
sub2),
0 /* clamp */, 0 /* omod */), sub2,
(V_CUBEID_F32 0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub0),
0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub1),
0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub2),
0 /* clamp */, 0 /* omod */),
sub3)
0 /* clamp */, 0 /* omod */), sub3)
>;
def : Pat <
@ -2581,7 +2580,7 @@ def : Pat <
def : Vop3ModPat<V_MAD_F32, VOP_F32_F32_F32_F32, AMDGPUmad>;
defm : BFIPatterns <V_BFI_B32, S_MOV_B32>;
defm : BFIPatterns <V_BFI_B32, S_MOV_B32, SReg_64>;
def : ROTRPattern <V_ALIGNBIT_B32>;
/********** ======================= **********/
@ -2968,37 +2967,35 @@ def : Pat<(i32 (sext_inreg i32:$src, i1)),
// Handle sext_inreg in i64
def : Pat <
(i64 (sext_inreg i64:$src, i1)),
(INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
(S_BFE_I32 (EXTRACT_SUBREG i64:$src, sub0), 65536), sub0), // 0 | 1 << 16
(REG_SEQUENCE SReg_64,
(S_BFE_I32 (EXTRACT_SUBREG i64:$src, sub0), 65536), sub0, // 0 | 1 << 16
(S_MOV_B32 -1), sub1)
>;
def : Pat <
(i64 (sext_inreg i64:$src, i8)),
(INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
(S_SEXT_I32_I8 (EXTRACT_SUBREG i64:$src, sub0)), sub0),
(REG_SEQUENCE SReg_64,
(S_SEXT_I32_I8 (EXTRACT_SUBREG i64:$src, sub0)), sub0,
(S_MOV_B32 -1), sub1)
>;
def : Pat <
(i64 (sext_inreg i64:$src, i16)),
(INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
(S_SEXT_I32_I16 (EXTRACT_SUBREG i64:$src, sub0)), sub0),
(REG_SEQUENCE SReg_64,
(S_SEXT_I32_I16 (EXTRACT_SUBREG i64:$src, sub0)), sub0,
(S_MOV_B32 -1), sub1)
>;
class ZExt_i64_i32_Pat <SDNode ext> : Pat <
(i64 (ext i32:$src)),
(INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $src, sub0),
(S_MOV_B32 0), sub1)
(REG_SEQUENCE SReg_64, $src, sub0, (S_MOV_B32 0), sub1)
>;
class ZExt_i64_i1_Pat <SDNode ext> : Pat <
(i64 (ext i1:$src)),
(INSERT_SUBREG
(INSERT_SUBREG (i64 (IMPLICIT_DEF)),
(V_CNDMASK_B32_e64 (i32 0), (i32 1), $src), sub0),
(S_MOV_B32 0), sub1)
(REG_SEQUENCE VReg_64,
(V_CNDMASK_B32_e64 (i32 0), (i32 1), $src), sub0,
(S_MOV_B32 0), sub1)
>;
@ -3009,17 +3006,14 @@ def : ZExt_i64_i1_Pat<anyext>;
def : Pat <
(i64 (sext i32:$src)),
(INSERT_SUBREG
(INSERT_SUBREG (i64 (IMPLICIT_DEF)), $src, sub0),
(S_ASHR_I32 $src, 31), sub1)
(REG_SEQUENCE SReg_64, $src, sub0,
(S_ASHR_I32 $src, 31), sub1)
>;
def : Pat <
(i64 (sext i1:$src)),
(INSERT_SUBREG
(INSERT_SUBREG
(i64 (IMPLICIT_DEF)),
(V_CNDMASK_B32_e64 0, -1, $src), sub0),
(REG_SEQUENCE VReg_64,
(V_CNDMASK_B32_e64 0, -1, $src), sub0,
(V_CNDMASK_B32_e64 0, -1, $src), sub1)
>;