diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td index 674f5b70836..7eb6ab319bb 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.td +++ b/lib/Target/AMDGPU/SIInstrInfo.td @@ -1490,19 +1490,24 @@ multiclass VOP3b_2_3_m pattern, string opName, - bit HasMods, bit defExec, string revOp> { + bit HasMods, bit defExec, + string revOp, list sched> { def "" : VOP3_Pseudo , - VOP2_REV; + VOP2_REV { + let SchedRW = sched; + } def _si : VOP3_Real_si , VOP3DisableFields<1, 0, HasMods> { let Defs = !if(defExec, [EXEC], []); + let SchedRW = sched; } def _vi : VOP3_Real_vi , VOP3DisableFields<1, 0, HasMods> { let Defs = !if(defExec, [EXEC], []); + let SchedRW = sched; } } @@ -1690,39 +1695,40 @@ class VOPC_Pseudo pattern, string opName> : multiclass VOPC_m pattern, string opName, bit DefExec, VOPProfile p, + list sched, string revOpName = "", string asm = opName#"_e32 "#op_asm, string alias_asm = opName#" "#op_asm> { - def "" : VOPC_Pseudo ; - - let AssemblerPredicates = [isSICI] in { - - def _si : VOPC, - SIMCInstr { - let Defs = !if(DefExec, [VCC, EXEC], [VCC]); - let hasSideEffects = DefExec; + def "" : VOPC_Pseudo { + let SchedRW = sched; } - def : SIInstAlias < - alias_asm, - (!cast(NAME#"_e32_si") p.Src0RC32:$src0, p.Src1RC32:$src1) - >; + let AssemblerPredicates = [isSICI] in { + def _si : VOPC, + SIMCInstr { + let Defs = !if(DefExec, [VCC, EXEC], [VCC]); + let hasSideEffects = DefExec; + let SchedRW = sched; + } + + def : SIInstAlias < + alias_asm, + (!cast(NAME#"_e32_si") p.Src0RC32:$src0, p.Src1RC32:$src1) + >; } // End AssemblerPredicates = [isSICI] - let AssemblerPredicates = [isVI] in { + def _vi : VOPC, + SIMCInstr { + let Defs = !if(DefExec, [VCC, EXEC], [VCC]); + let hasSideEffects = DefExec; + let SchedRW = sched; + } - def _vi : VOPC, - SIMCInstr { - let Defs = !if(DefExec, [VCC, EXEC], [VCC]); - let hasSideEffects = DefExec; - } - - def : SIInstAlias < - alias_asm, - (!cast(NAME#"_e32_vi") p.Src0RC32:$src0, p.Src1RC32:$src1) - >; - + def : SIInstAlias < + alias_asm, + (!cast(NAME#"_e32_vi") p.Src0RC32:$src0, p.Src1RC32:$src1) + >; } // End AssemblerPredicates = [isVI] } @@ -1730,11 +1736,13 @@ multiclass VOPC_Helper pat32, dag out64, dag ins64, string asm64, list pat64, bit HasMods, bit DefExec, string revOp, - VOPProfile p> { - defm _e32 : VOPC_m ; + VOPProfile p, + list sched> { + defm _e32 : VOPC_m ; defm _e64 : VOP3_C_m ; + opName, HasMods, DefExec, revOp, + sched>; } // Special case for class instructions which only have modifiers on @@ -1743,18 +1751,21 @@ multiclass VOPC_Class_Helper pat32, dag out64, dag ins64, string asm64, list pat64, bit HasMods, bit DefExec, string revOp, - VOPProfile p> { - defm _e32 : VOPC_m ; + VOPProfile p, + list sched> { + defm _e32 : VOPC_m ; defm _e64 : VOP3_C_m , + opName, HasMods, DefExec, revOp, sched>, VOP3DisableModFields<1, 0, 0>; } multiclass VOPCInst : VOPC_Helper < + bit DefExec = 0, + list sched = [Write32Bit]> : + VOPC_Helper < op, opName, P.Ins32, P.Asm32, [], (outs VOPDstS64:$dst), P.Ins64, P.Asm64, @@ -1765,11 +1776,12 @@ multiclass VOPCInst ; multiclass VOPCClassInst : VOPC_Class_Helper < + bit DefExec = 0, + list sched> : VOPC_Class_Helper < op, opName, P.Ins32, P.Asm32, [], (outs VOPDstS64:$dst), P.Ins64, P.Asm64, @@ -1777,7 +1789,7 @@ multiclass VOPCClassInst ; @@ -1785,31 +1797,32 @@ multiclass VOPC_F32 ; multiclass VOPC_F64 : - VOPCInst ; + VOPCInst ; multiclass VOPC_I32 : VOPCInst ; multiclass VOPC_I64 : - VOPCInst ; + VOPCInst ; multiclass VOPCX sched, string revOp = ""> - : VOPCInst ; + : VOPCInst ; multiclass VOPCX_F32 : - VOPCX ; + VOPCX ; multiclass VOPCX_F64 : - VOPCX ; + VOPCX ; multiclass VOPCX_I32 : - VOPCX ; + VOPCX ; multiclass VOPCX_I64 : - VOPCX ; + VOPCX ; multiclass VOP3_Helper pat, int NumSrcArgs, bit HasMods> : VOP3_m < @@ -1817,16 +1830,16 @@ multiclass VOP3_Helper ; multiclass VOPC_CLASS_F32 : - VOPCClassInst ; + VOPCClassInst ; multiclass VOPCX_CLASS_F32 : - VOPCClassInst ; + VOPCClassInst ; multiclass VOPC_CLASS_F64 : - VOPCClassInst ; + VOPCClassInst ; multiclass VOPCX_CLASS_F64 : - VOPCClassInst ; + VOPCClassInst ; multiclass VOP3Inst : VOP3_Helper < diff --git a/lib/Target/AMDGPU/SISchedule.td b/lib/Target/AMDGPU/SISchedule.td index da7601492f9..cd77e519abb 100644 --- a/lib/Target/AMDGPU/SISchedule.td +++ b/lib/Target/AMDGPU/SISchedule.td @@ -22,12 +22,23 @@ def WriteBarrier : SchedWrite; // Vector ALU instructions def Write32Bit : SchedWrite; def WriteQuarterRate32 : SchedWrite; +def WriteFullOrQuarterRate32 : SchedWrite; def WriteFloatFMA : SchedWrite; -def WriteDouble : SchedWrite; +// Slow quarter rate f64 instruction. +def WriteDouble : SchedWrite; + +// half rate f64 instruction (same as v_add_f64) def WriteDoubleAdd : SchedWrite; +// Half rate 64-bit instructions. +def Write64Bit : SchedWrite; + +// FIXME: Should there be a class for instructions which are VALU +// instructions and have VALU rates, but write to the SALU (i.e. VOPC +// instructions) + def SIFullSpeedModel : SchedMachineModel; def SIQuarterSpeedModel : SchedMachineModel; @@ -54,7 +65,7 @@ class HWVALUWriteRes : // The latency numbers are taken from AMD Accelerated Parallel Processing -// guide. They may not be acurate. +// guide. They may not be accurate. // The latency values are 1 / (operations / cycle) / 4. multiclass SICommonWriteRes { @@ -68,6 +79,7 @@ multiclass SICommonWriteRes { def : HWWriteRes; // XXX: Guessed ??? def : HWVALUWriteRes; + def : HWVALUWriteRes; def : HWVALUWriteRes; } diff --git a/test/CodeGen/AMDGPU/llvm.AMDGPU.class.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.class.ll index 805a88b59c7..80eb3b93f8e 100644 --- a/test/CodeGen/AMDGPU/llvm.AMDGPU.class.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.class.ll @@ -271,7 +271,8 @@ define void @test_class_64_f64(i32 addrspace(1)* %out, double %a) #0 { ; SI: s_load_dwordx2 [[SA:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb ; SI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x1ff{{$}} ; SI: v_cmp_class_f64_e32 vcc, [[SA]], [[MASK]] -; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc +; SI-NOT: vcc +; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc ; SI-NEXT: buffer_store_dword [[RESULT]] ; SI: s_endpgm define void @test_class_full_mask_f64(i32 addrspace(1)* %out, double %a) #0 { @@ -285,7 +286,8 @@ define void @test_class_full_mask_f64(i32 addrspace(1)* %out, double %a) #0 { ; SI-DAG: buffer_load_dwordx2 [[VA:v\[[0-9]+:[0-9]+\]]] ; SI-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x1ff{{$}} ; SI: v_cmp_class_f64_e32 vcc, [[VA]], [[MASK]] -; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc +; SI-NOT: vcc +; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc ; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm define void @v_test_class_full_mask_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #0 { diff --git a/test/CodeGen/AMDGPU/valu-i1.ll b/test/CodeGen/AMDGPU/valu-i1.ll index 7d0ebd139f5..c27702813a8 100644 --- a/test/CodeGen/AMDGPU/valu-i1.ll +++ b/test/CodeGen/AMDGPU/valu-i1.ll @@ -128,18 +128,18 @@ exit: ; SI-DAG: v_cmp_ne_i32_e64 [[NEG1_CHECK_0:s\[[0-9]+:[0-9]+\]]], -1, [[A]] ; SI-DAG: v_cmp_ne_i32_e32 [[NEG1_CHECK_1:vcc]], -1, [[B]] ; SI: s_and_b64 [[ORNEG1:s\[[0-9]+:[0-9]+\]]], [[NEG1_CHECK_1]], [[NEG1_CHECK_0]] -; SI: s_and_saveexec_b64 [[ORNEG1]], [[ORNEG1]] -; SI: s_xor_b64 [[ORNEG1]], exec, [[ORNEG1]] +; SI: s_and_saveexec_b64 [[ORNEG2:s\[[0-9]+:[0-9]+\]]], [[ORNEG1]] +; SI: s_xor_b64 [[ORNEG2]], exec, [[ORNEG2]] ; SI: s_cbranch_execz BB3_5 ; SI: BB#4: ; SI: buffer_store_dword -; SI: v_cmp_ge_i64_e32 vcc -; SI: s_or_b64 [[COND_STATE]], vcc, [[COND_STATE]] +; SI: v_cmp_ge_i64_e64 [[CMP:s\[[0-9]+:[0-9]+\]]] +; SI: s_or_b64 [[COND_STATE]], [[CMP]], [[COND_STATE]] ; SI: BB3_5: -; SI: s_or_b64 exec, exec, [[ORNEG1]] -; SI: s_or_b64 [[COND_STATE]], [[ORNEG1]], [[COND_STATE]] +; SI: s_or_b64 exec, exec, [[ORNEG2]] +; SI: s_or_b64 [[COND_STATE]], [[ORNEG2]], [[COND_STATE]] ; SI: s_andn2_b64 exec, exec, [[COND_STATE]] ; SI: s_cbranch_execnz BB3_3