mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-10 06:25:01 +00:00
AMDGPU/SI: Add support for shrinking v_cndmask_b32_e32 instructions
Reviewers: arsenm Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D11061 llvm-svn: 242146
This commit is contained in:
parent
4fd74dc826
commit
a3220fa789
@ -95,13 +95,18 @@ static bool canShrink(MachineInstr &MI, const SIInstrInfo *TII,
|
||||
// a register allocation hint pre-regalloc and then do the shrining
|
||||
// post-regalloc.
|
||||
if (Src2) {
|
||||
if (MI.getOpcode() != AMDGPU::V_MAC_F32_e64)
|
||||
return false;
|
||||
switch (MI.getOpcode()) {
|
||||
default: return false;
|
||||
|
||||
const MachineOperand *Src2Mod =
|
||||
TII->getNamedOperand(MI, AMDGPU::OpName::src2_modifiers);
|
||||
if (!isVGPR(Src2, TRI, MRI) || (Src2Mod && Src2Mod->getImm() != 0))
|
||||
return false;
|
||||
case AMDGPU::V_MAC_F32_e64:
|
||||
if (!isVGPR(Src2, TRI, MRI) ||
|
||||
TII->hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers))
|
||||
return false;
|
||||
break;
|
||||
|
||||
case AMDGPU::V_CNDMASK_B32_e64:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
|
||||
@ -250,6 +255,22 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (Op32 == AMDGPU::V_CNDMASK_B32_e32) {
|
||||
// We shrink V_CNDMASK_B32_e64 using regalloc hints like we do for VOPC
|
||||
// instructions.
|
||||
const MachineOperand *Src2 =
|
||||
TII->getNamedOperand(MI, AMDGPU::OpName::src2);
|
||||
if (!Src2->isReg())
|
||||
continue;
|
||||
unsigned SReg = Src2->getReg();
|
||||
if (TargetRegisterInfo::isVirtualRegister(SReg)) {
|
||||
MRI.setRegAllocationHint(SReg, 0, AMDGPU::VCC);
|
||||
continue;
|
||||
}
|
||||
if (SReg != AMDGPU::VCC)
|
||||
continue;
|
||||
}
|
||||
|
||||
// We can shrink this instruction
|
||||
DEBUG(dbgs() << "Shrinking "; MI.dump(); dbgs() << '\n';);
|
||||
|
||||
|
@ -9,8 +9,8 @@
|
||||
; SI: v_sub_f32_e32 [[SUB:v[0-9]+]], [[SX]], [[TRUNC]]
|
||||
; SI: v_mov_b32_e32 [[VX:v[0-9]+]], [[SX]]
|
||||
; SI: v_bfi_b32 [[COPYSIGN:v[0-9]+]], [[K]], 1.0, [[VX]]
|
||||
; SI: v_cmp_le_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], 0.5, |[[SUB]]|
|
||||
; SI: v_cndmask_b32_e64 [[SEL:v[0-9]+]], 0, [[VX]], [[CMP]]
|
||||
; SI: v_cmp_le_f32_e64 vcc, 0.5, |[[SUB]]|
|
||||
; SI: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, [[VX]]
|
||||
; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], [[SEL]], [[TRUNC]]
|
||||
; SI: buffer_store_dword [[RESULT]]
|
||||
|
||||
|
@ -6,10 +6,10 @@
|
||||
|
||||
|
||||
; FUNC-LABEL: {{^}}select_v4i8:
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_cndmask_b32_e32
|
||||
define void @select_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, <4 x i8> %b, i8 %c) nounwind {
|
||||
%cmp = icmp eq i8 %c, 0
|
||||
%select = select i1 %cmp, <4 x i8> %a, <4 x i8> %b
|
||||
@ -18,10 +18,10 @@ define void @select_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, <4 x i8> %b,
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}select_v4i16:
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_cndmask_b32_e32
|
||||
define void @select_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, <4 x i16> %b, i32 %c) nounwind {
|
||||
%cmp = icmp eq i32 %c, 0
|
||||
%select = select i1 %cmp, <4 x i16> %a, <4 x i16> %b
|
||||
@ -30,8 +30,8 @@ define void @select_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, <4 x i16>
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}select_v2i32:
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: buffer_store_dwordx2
|
||||
define void @select_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b, i32 %c) nounwind {
|
||||
%cmp = icmp eq i32 %c, 0
|
||||
@ -41,10 +41,10 @@ define void @select_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32>
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}select_v4i32:
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: buffer_store_dwordx4
|
||||
define void @select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b, i32 %c) nounwind {
|
||||
%cmp = icmp eq i32 %c, 0
|
||||
@ -54,14 +54,14 @@ define void @select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32>
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}select_v8i32:
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_cndmask_b32_e32
|
||||
define void @select_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b, i32 %c) nounwind {
|
||||
%cmp = icmp eq i32 %c, 0
|
||||
%select = select i1 %cmp, <8 x i32> %a, <8 x i32> %b
|
||||
@ -88,14 +88,14 @@ define void @select_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x f
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}select_v8f32:
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_cndmask_b32_e32
|
||||
define void @select_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b, i32 %c) nounwind {
|
||||
%cmp = icmp eq i32 %c, 0
|
||||
%select = select i1 %cmp, <8 x float> %a, <8 x float> %b
|
||||
@ -104,10 +104,10 @@ define void @select_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x f
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}select_v2f64:
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_cndmask_b32_e32
|
||||
define void @select_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b, i32 %c) nounwind {
|
||||
%cmp = icmp eq i32 %c, 0
|
||||
%select = select i1 %cmp, <2 x double> %a, <2 x double> %b
|
||||
@ -116,14 +116,14 @@ define void @select_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <2 x
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}select_v4f64:
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_cndmask_b32_e32
|
||||
define void @select_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b, i32 %c) nounwind {
|
||||
%cmp = icmp eq i32 %c, 0
|
||||
%select = select i1 %cmp, <4 x double> %a, <4 x double> %b
|
||||
@ -132,22 +132,22 @@ define void @select_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, <4 x
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}select_v8f64:
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_cndmask_b32_e32
|
||||
; SI: v_cndmask_b32_e32
|
||||
define void @select_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b, i32 %c) nounwind {
|
||||
%cmp = icmp eq i32 %c, 0
|
||||
%select = select i1 %cmp, <8 x double> %a, <8 x double> %b
|
||||
|
@ -55,8 +55,8 @@ define void @v_select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, i64 addrspa
|
||||
; CHECK: s_mov_b32 [[SLO:s[0-9]+]], 0
|
||||
; CHECK-DAG: v_mov_b32_e32 [[VHI:v[0-9]+]], [[SHI]]
|
||||
; CHECK-DAG: v_mov_b32_e32 [[VLO:v[0-9]+]], [[SLO]]
|
||||
; CHECK-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, [[VLO]], {{v[0-9]+}}
|
||||
; CHECK-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, [[VHI]], {{v[0-9]+}}
|
||||
; CHECK-DAG: v_cndmask_b32_e32 {{v[0-9]+}}, [[VLO]], {{v[0-9]+}}
|
||||
; CHECK-DAG: v_cndmask_b32_e32 {{v[0-9]+}}, [[VHI]], {{v[0-9]+}}
|
||||
; CHECK: s_endpgm
|
||||
define void @v_select_i64_split_imm(i64 addrspace(1)* %out, i32 %cond, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
|
||||
%cmp = icmp ugt i32 %cond, 5
|
||||
|
@ -12,11 +12,11 @@ define void @sint_to_fp_i32_to_f64(double addrspace(1)* %out, i32 %in) {
|
||||
|
||||
; FIXME: select on 0, 0
|
||||
; SI-LABEL: {{^}}sint_to_fp_i1_f64:
|
||||
; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
|
||||
; SI: v_cmp_eq_i32_e64 vcc,
|
||||
; We can't fold the SGPRs into v_cndmask_b32_e64, because it already
|
||||
; uses an SGPR for [[CMP]]
|
||||
; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, v{{[0-9]+}}, [[CMP]]
|
||||
; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 0, [[CMP]]
|
||||
; uses an SGPR (implicit vcc).
|
||||
; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
|
||||
; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 0, vcc
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: s_endpgm
|
||||
define void @sint_to_fp_i1_f64(double addrspace(1)* %out, i32 %in) {
|
||||
|
@ -72,11 +72,11 @@ define void @s_uint_to_fp_v4i32_to_v4f64(<4 x double> addrspace(1)* %out, <4 x i
|
||||
|
||||
; FIXME: select on 0, 0
|
||||
; SI-LABEL: {{^}}uint_to_fp_i1_to_f64:
|
||||
; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
|
||||
; We can't fold the SGPRs into v_cndmask_b32_e64, because it already
|
||||
; uses an SGPR for [[CMP]]
|
||||
; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, v{{[0-9]+}}, [[CMP]]
|
||||
; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 0, [[CMP]]
|
||||
; SI: v_cmp_eq_i32_e64 vcc
|
||||
; We can't fold the SGPRs into v_cndmask_b32_e32, because it already
|
||||
; uses an SGPR (implicit vcc).
|
||||
; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
|
||||
; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 0, vcc
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: s_endpgm
|
||||
define void @uint_to_fp_i1_to_f64(double addrspace(1)* %out, i32 %in) {
|
||||
|
@ -1,14 +1,14 @@
|
||||
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG %s
|
||||
;RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI %s
|
||||
;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI %s
|
||||
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG --check-prefix=FUNC %s
|
||||
;RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
|
||||
;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=VI --check-prefix=FUNC %s
|
||||
|
||||
;FUNC-LABEL: {{^}}test_select_v2i32:
|
||||
|
||||
;EG: {{^}}test_select_v2i32:
|
||||
;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
|
||||
;SI: {{^}}test_select_v2i32:
|
||||
;SI: v_cndmask_b32_e64
|
||||
;SI: v_cndmask_b32_e64
|
||||
;SI: v_cndmask_b32_e32
|
||||
|
||||
define void @test_select_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in0, <2 x i32> addrspace(1)* %in1) {
|
||||
entry:
|
||||
@ -20,13 +20,13 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
;EG: {{^}}test_select_v2f32:
|
||||
;FUNC-LABEL: {{^}}test_select_v2f32:
|
||||
|
||||
;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
|
||||
;SI: {{^}}test_select_v2f32:
|
||||
;SI: v_cndmask_b32_e64
|
||||
;SI: v_cndmask_b32_e64
|
||||
;SI: v_cndmask_b32_e32
|
||||
|
||||
define void @test_select_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in0, <2 x float> addrspace(1)* %in1) {
|
||||
entry:
|
||||
@ -38,17 +38,19 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
;EG: {{^}}test_select_v4i32:
|
||||
;FUNC-LABEL: {{^}}test_select_v4i32:
|
||||
|
||||
;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
|
||||
;SI: {{^}}test_select_v4i32:
|
||||
;SI: v_cndmask_b32_e64
|
||||
;SI: v_cndmask_b32_e64
|
||||
;SI: v_cndmask_b32_e64
|
||||
;SI: v_cndmask_b32_e64
|
||||
; FIXME: The shrinking does not happen on tonga
|
||||
|
||||
;SI: v_cndmask_b32
|
||||
;SI: v_cndmask_b32
|
||||
;SI: v_cndmask_b32
|
||||
;SI: v_cndmask_b32
|
||||
|
||||
define void @test_select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in0, <4 x i32> addrspace(1)* %in1) {
|
||||
entry:
|
||||
@ -60,7 +62,7 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
;EG: {{^}}test_select_v4f32:
|
||||
;FUNC-LABEL: {{^}}test_select_v4f32:
|
||||
;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
|
@ -42,8 +42,8 @@ define void @xor_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in
|
||||
|
||||
; SI-DAG: v_cmp_le_f32_e32 [[CMP0:vcc]], 0, {{v[0-9]+}}
|
||||
; SI-DAG: v_cmp_le_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], 1.0, {{v[0-9]+}}
|
||||
; SI: s_xor_b64 [[XOR:s\[[0-9]+:[0-9]+\]]], [[CMP0]], [[CMP1]]
|
||||
; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, [[XOR]]
|
||||
; SI: s_xor_b64 [[XOR:vcc]], [[CMP0]], [[CMP1]]
|
||||
; SI: v_cndmask_b32_e32 [[RESULT:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
|
||||
; SI: buffer_store_dword [[RESULT]]
|
||||
; SI: s_endpgm
|
||||
define void @xor_i1(float addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user