mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-07 12:30:44 +00:00
AMDGPU: Fix ctlz combine for sub 32-bit types
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@257353 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
f12a12cd25
commit
68f559ea61
@ -2521,6 +2521,27 @@ static bool isCtlzOpc(unsigned Opc) {
|
|||||||
return Opc == ISD::CTLZ || Opc == ISD::CTLZ_ZERO_UNDEF;
|
return Opc == ISD::CTLZ || Opc == ISD::CTLZ_ZERO_UNDEF;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Get FFBH node if the incoming op may have been type legalized from a smaller
|
||||||
|
// type VT.
|
||||||
|
// Need to match pre-legalized type because the generic legalization inserts the
|
||||||
|
// add/sub between the select and compare.
|
||||||
|
static SDValue getFFBH_U32(const TargetLowering &TLI,
|
||||||
|
SelectionDAG &DAG, SDLoc SL, SDValue Op) {
|
||||||
|
EVT VT = Op.getValueType();
|
||||||
|
EVT LegalVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
|
||||||
|
if (LegalVT != MVT::i32)
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
|
if (VT != MVT::i32)
|
||||||
|
Op = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Op);
|
||||||
|
|
||||||
|
SDValue FFBH = DAG.getNode(AMDGPUISD::FFBH_U32, SL, MVT::i32, Op);
|
||||||
|
if (VT != MVT::i32)
|
||||||
|
FFBH = DAG.getNode(ISD::TRUNCATE, SL, VT, FFBH);
|
||||||
|
|
||||||
|
return FFBH;
|
||||||
|
}
|
||||||
|
|
||||||
// The native instructions return -1 on 0 input. Optimize out a select that
|
// The native instructions return -1 on 0 input. Optimize out a select that
|
||||||
// produces -1 on 0.
|
// produces -1 on 0.
|
||||||
//
|
//
|
||||||
@ -2546,7 +2567,7 @@ SDValue AMDGPUTargetLowering::performCtlzCombine(SDLoc SL,
|
|||||||
isCtlzOpc(RHS.getOpcode()) &&
|
isCtlzOpc(RHS.getOpcode()) &&
|
||||||
RHS.getOperand(0) == CmpLHS &&
|
RHS.getOperand(0) == CmpLHS &&
|
||||||
isNegativeOne(LHS)) {
|
isNegativeOne(LHS)) {
|
||||||
return DAG.getNode(AMDGPUISD::FFBH_U32, SL, MVT::i32, CmpLHS);
|
return getFFBH_U32(*this, DAG, SL, CmpLHS);
|
||||||
}
|
}
|
||||||
|
|
||||||
// select (setcc x, 0, ne), (ctlz_zero_undef x), -1 -> ffbh_u32 x
|
// select (setcc x, 0, ne), (ctlz_zero_undef x), -1 -> ffbh_u32 x
|
||||||
@ -2554,7 +2575,7 @@ SDValue AMDGPUTargetLowering::performCtlzCombine(SDLoc SL,
|
|||||||
isCtlzOpc(LHS.getOpcode()) &&
|
isCtlzOpc(LHS.getOpcode()) &&
|
||||||
LHS.getOperand(0) == CmpLHS &&
|
LHS.getOperand(0) == CmpLHS &&
|
||||||
isNegativeOne(RHS)) {
|
isNegativeOne(RHS)) {
|
||||||
return DAG.getNode(AMDGPUISD::FFBH_U32, SL, MVT::i32, CmpLHS);
|
return getFFBH_U32(*this, DAG, SL, CmpLHS);
|
||||||
}
|
}
|
||||||
|
|
||||||
return SDValue();
|
return SDValue();
|
||||||
@ -2578,10 +2599,7 @@ SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N,
|
|||||||
return CombineFMinMaxLegacy(SDLoc(N), VT, LHS, RHS, True, False, CC, DCI);
|
return CombineFMinMaxLegacy(SDLoc(N), VT, LHS, RHS, True, False, CC, DCI);
|
||||||
|
|
||||||
// There's no reason to not do this if the condition has other uses.
|
// There's no reason to not do this if the condition has other uses.
|
||||||
if (VT == MVT::i32)
|
|
||||||
return performCtlzCombine(SDLoc(N), Cond, True, False, DCI);
|
return performCtlzCombine(SDLoc(N), Cond, True, False, DCI);
|
||||||
|
|
||||||
return SDValue();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
|
SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
|
||||||
|
@ -2,6 +2,10 @@
|
|||||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||||
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||||
|
|
||||||
|
declare i7 @llvm.ctlz.i7(i7, i1) nounwind readnone
|
||||||
|
declare i8 @llvm.ctlz.i8(i8, i1) nounwind readnone
|
||||||
|
declare i16 @llvm.ctlz.i16(i16, i1) nounwind readnone
|
||||||
|
|
||||||
declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
|
declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
|
||||||
declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone
|
declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone
|
||||||
declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) nounwind readnone
|
declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) nounwind readnone
|
||||||
@ -92,6 +96,20 @@ define void @v_ctlz_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrsp
|
|||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; FUNC-LABEL: {{^}}v_ctlz_i8:
|
||||||
|
; SI: buffer_load_ubyte [[VAL:v[0-9]+]],
|
||||||
|
; SI-DAG: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]]
|
||||||
|
; SI-DAG: v_cmp_eq_i32_e32 vcc, 0, [[CTLZ]]
|
||||||
|
; SI-DAG: v_cndmask_b32_e64 [[CORRECTED_FFBH:v[0-9]+]], [[FFBH]], 32, vcc
|
||||||
|
; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, 0xffffffe8, [[CORRECTED_FFBH]]
|
||||||
|
; SI: buffer_store_byte [[RESULT]],
|
||||||
|
define void @v_ctlz_i8(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind {
|
||||||
|
%val = load i8, i8 addrspace(1)* %valptr
|
||||||
|
%ctlz = call i8 @llvm.ctlz.i8(i8 %val, i1 false) nounwind readnone
|
||||||
|
store i8 %ctlz, i8 addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
; FUNC-LABEL: {{^}}s_ctlz_i64:
|
; FUNC-LABEL: {{^}}s_ctlz_i64:
|
||||||
; SI: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
|
; SI: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
|
||||||
; SI-DAG: v_cmp_eq_i32_e64 vcc, 0, s[[HI]]
|
; SI-DAG: v_cmp_eq_i32_e64 vcc, 0, s[[HI]]
|
||||||
@ -209,3 +227,43 @@ define void @v_ctlz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias %out, i32 addr
|
|||||||
store i32 %sel, i32 addrspace(1)* %out
|
store i32 %sel, i32 addrspace(1)* %out
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; FUNC-LABEL: {{^}}v_ctlz_i8_sel_eq_neg1:
|
||||||
|
; SI: buffer_load_ubyte [[VAL:v[0-9]+]],
|
||||||
|
; SI: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]]
|
||||||
|
; SI: buffer_store_byte [[FFBH]],
|
||||||
|
define void @v_ctlz_i8_sel_eq_neg1(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind {
|
||||||
|
%val = load i8, i8 addrspace(1)* %valptr
|
||||||
|
%ctlz = call i8 @llvm.ctlz.i8(i8 %val, i1 false) nounwind readnone
|
||||||
|
%cmp = icmp eq i8 %val, 0
|
||||||
|
%sel = select i1 %cmp, i8 -1, i8 %ctlz
|
||||||
|
store i8 %sel, i8 addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; FUNC-LABEL: {{^}}v_ctlz_i16_sel_eq_neg1:
|
||||||
|
; SI: buffer_load_ushort [[VAL:v[0-9]+]],
|
||||||
|
; SI: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]]
|
||||||
|
; SI: buffer_store_short [[FFBH]],
|
||||||
|
define void @v_ctlz_i16_sel_eq_neg1(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %valptr) nounwind {
|
||||||
|
%val = load i16, i16 addrspace(1)* %valptr
|
||||||
|
%ctlz = call i16 @llvm.ctlz.i16(i16 %val, i1 false) nounwind readnone
|
||||||
|
%cmp = icmp eq i16 %val, 0
|
||||||
|
%sel = select i1 %cmp, i16 -1, i16 %ctlz
|
||||||
|
store i16 %sel, i16 addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; FUNC-LABEL: {{^}}v_ctlz_i7_sel_eq_neg1:
|
||||||
|
; SI: buffer_load_ubyte [[VAL:v[0-9]+]],
|
||||||
|
; SI: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]]
|
||||||
|
; SI: v_and_b32_e32 [[TRUNC:v[0-9]+]], 0x7f, [[FFBH]]
|
||||||
|
; SI: buffer_store_byte [[TRUNC]],
|
||||||
|
define void @v_ctlz_i7_sel_eq_neg1(i7 addrspace(1)* noalias %out, i7 addrspace(1)* noalias %valptr) nounwind {
|
||||||
|
%val = load i7, i7 addrspace(1)* %valptr
|
||||||
|
%ctlz = call i7 @llvm.ctlz.i7(i7 %val, i1 false) nounwind readnone
|
||||||
|
%cmp = icmp eq i7 %val, 0
|
||||||
|
%sel = select i1 %cmp, i7 -1, i7 %ctlz
|
||||||
|
store i7 %sel, i7 addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
@ -78,6 +78,18 @@ define void @v_ctlz_zero_undef_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x
|
|||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i8:
|
||||||
|
; SI: buffer_load_ubyte [[VAL:v[0-9]+]],
|
||||||
|
; SI: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]]
|
||||||
|
; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, 0xffffffe8, [[FFBH]]
|
||||||
|
; SI: buffer_store_byte [[RESULT]],
|
||||||
|
define void @v_ctlz_zero_undef_i8(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind {
|
||||||
|
%val = load i8, i8 addrspace(1)* %valptr
|
||||||
|
%ctlz = call i8 @llvm.ctlz.i8(i8 %val, i1 true) nounwind readnone
|
||||||
|
store i8 %ctlz, i8 addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
; FUNC-LABEL: {{^}}s_ctlz_zero_undef_i64:
|
; FUNC-LABEL: {{^}}s_ctlz_zero_undef_i64:
|
||||||
; SI: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
|
; SI: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
|
||||||
; SI-DAG: v_cmp_eq_i32_e64 vcc, 0, s[[HI]]
|
; SI-DAG: v_cmp_eq_i32_e64 vcc, 0, s[[HI]]
|
||||||
@ -160,6 +172,19 @@ define void @v_ctlz_zero_undef_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out, i
|
|||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i8_sel_eq_neg1:
|
||||||
|
; SI: buffer_load_ubyte [[VAL:v[0-9]+]],
|
||||||
|
; SI: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]]
|
||||||
|
; SI: buffer_store_byte [[FFBH]],
|
||||||
|
define void @v_ctlz_zero_undef_i8_sel_eq_neg1(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind {
|
||||||
|
%val = load i8, i8 addrspace(1)* %valptr
|
||||||
|
%ctlz = call i8 @llvm.ctlz.i8(i8 %val, i1 true) nounwind readnone
|
||||||
|
%cmp = icmp eq i8 %val, 0
|
||||||
|
%sel = select i1 %cmp, i8 -1, i8 %ctlz
|
||||||
|
store i8 %sel, i8 addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i32_sel_eq_neg1_two_use:
|
; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i32_sel_eq_neg1_two_use:
|
||||||
; SI: buffer_load_dword [[VAL:v[0-9]+]],
|
; SI: buffer_load_dword [[VAL:v[0-9]+]],
|
||||||
; SI-DAG: v_ffbh_u32_e32 [[RESULT0:v[0-9]+]], [[VAL]]
|
; SI-DAG: v_ffbh_u32_e32 [[RESULT0:v[0-9]+]], [[VAL]]
|
||||||
@ -241,15 +266,3 @@ define void @v_ctlz_zero_undef_i32_sel_ne_cmp_non0(i32 addrspace(1)* noalias %ou
|
|||||||
store i32 %sel, i32 addrspace(1)* %out
|
store i32 %sel, i32 addrspace(1)* %out
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i8:
|
|
||||||
; SI: buffer_load_dword [[VAL:v[0-9]+]],
|
|
||||||
; SI: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]]
|
|
||||||
; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, 0xffffffe8, [[FFBH]]
|
|
||||||
; SI: buffer_store_dword [[RESULT]],
|
|
||||||
define void @v_ctlz_zero_undef_i8(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind {
|
|
||||||
%val = load i8, i8 addrspace(1)* %valptr
|
|
||||||
%ctlz = call i8 @llvm.ctlz.i8(i8 %val, i1 true) nounwind readnone
|
|
||||||
store i8 %ctlz, i8 addrspace(1)* %out
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
Loading…
Reference in New Issue
Block a user