mirror of
https://github.com/RPCS3/llvm.git
synced 2025-02-12 22:30:12 +00:00
AMDGPU : Fix an error for the llvm.cttz implementation.
Differential Revision: http://reviews.llvm.org/D39014 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@316037 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
37790cc1f7
commit
607acf30af
@ -2208,9 +2208,8 @@ SDValue AMDGPUTargetLowering::LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) cons
|
||||
EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(),
|
||||
*DAG.getContext(), MVT::i32);
|
||||
|
||||
SDValue ZeroOrOne = isCtlzOpc(Op.getOpcode()) ? Zero : One;
|
||||
SDValue HiOrLo = isCtlzOpc(Op.getOpcode()) ? Hi : Lo;
|
||||
SDValue Hi0orLo0 = DAG.getSetCC(SL, SetCCVT, HiOrLo, ZeroOrOne, ISD::SETEQ);
|
||||
SDValue Hi0orLo0 = DAG.getSetCC(SL, SetCCVT, HiOrLo, Zero, ISD::SETEQ);
|
||||
|
||||
SDValue OprLo = DAG.getNode(ISDOpc, SL, MVT::i32, Lo);
|
||||
SDValue OprHi = DAG.getNode(ISDOpc, SL, MVT::i32, Hi);
|
||||
@ -2233,7 +2232,7 @@ SDValue AMDGPUTargetLowering::LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) cons
|
||||
// FIXME: DAG combines turn what should be an s_and_b64 into a v_or_b32,
|
||||
// which we probably don't want.
|
||||
SDValue LoOrHi = isCtlzOpc(Op.getOpcode()) ? Lo : Hi;
|
||||
SDValue Lo0OrHi0 = DAG.getSetCC(SL, SetCCVT, LoOrHi, ZeroOrOne, ISD::SETEQ);
|
||||
SDValue Lo0OrHi0 = DAG.getSetCC(SL, SetCCVT, LoOrHi, Zero, ISD::SETEQ);
|
||||
SDValue SrcIsZero = DAG.getNode(ISD::AND, SL, SetCCVT, Lo0OrHi0, Hi0orLo0);
|
||||
|
||||
// TODO: If i64 setcc is half rate, it can result in 1 fewer instruction
|
||||
|
@ -157,6 +157,7 @@ define amdgpu_kernel void @v_cttz_zero_undef_i16_with_select(i16 addrspace(1)* n
|
||||
|
||||
; FUNC-LABEL: {{^}}v_cttz_zero_undef_i32_with_select:
|
||||
; SI: v_ffbl_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}
|
||||
; SI: v_cmp_ne_u32_e32 vcc, 0
|
||||
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
|
||||
define amdgpu_kernel void @v_cttz_zero_undef_i32_with_select(i32 addrspace(1)* noalias %out, i32 addrspace(1)* nocapture readonly %arrayidx) nounwind {
|
||||
%val = load i32, i32 addrspace(1)* %arrayidx, align 1
|
||||
@ -178,6 +179,8 @@ define amdgpu_kernel void @v_cttz_zero_undef_i32_with_select(i32 addrspace(1)* n
|
||||
; SI: v_or_b32_e32 [[VAL2:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}
|
||||
; SI-DAG: v_ffbl_b32_e32 v{{[0-9]+}}, [[VAL1]]
|
||||
; SI-DAG: v_ffbl_b32_e32 v{{[0-9]+}}, [[VAL2]]
|
||||
; SI: v_cmp_eq_u32_e32 vcc, 0
|
||||
; SI: v_cmp_ne_u64_e32 vcc, 0
|
||||
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
|
||||
define amdgpu_kernel void @v_cttz_zero_undef_i64_with_select(i64 addrspace(1)* noalias %out, i64 addrspace(1)* nocapture readonly %arrayidx) nounwind {
|
||||
%val = load i64, i64 addrspace(1)* %arrayidx, align 1
|
||||
|
Loading…
x
Reference in New Issue
Block a user