mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-26 14:25:18 +00:00
AMDGPU: Reduce 64-bit lshr by constant to 32-bit
64-bit shifts are very slow on some subtargets. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258090 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
ac7f00cce5
commit
cc893f0656
@ -377,6 +377,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM,
|
||||
setOperationAction(ISD::FNEARBYINT, MVT::f64, Custom);
|
||||
|
||||
setTargetDAGCombine(ISD::SHL);
|
||||
setTargetDAGCombine(ISD::SRL);
|
||||
setTargetDAGCombine(ISD::MUL);
|
||||
setTargetDAGCombine(ISD::SELECT);
|
||||
setTargetDAGCombine(ISD::SELECT_CC);
|
||||
@ -2562,9 +2563,46 @@ SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
|
||||
SDValue Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
|
||||
|
||||
const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
|
||||
|
||||
return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i64, Zero, Lo);
|
||||
}
|
||||
|
||||
SDValue AMDGPUTargetLowering::performSrlCombine(SDNode *N,
|
||||
DAGCombinerInfo &DCI) const {
|
||||
if (N->getValueType(0) != MVT::i64)
|
||||
return SDValue();
|
||||
|
||||
const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
|
||||
if (!RHS)
|
||||
return SDValue();
|
||||
|
||||
unsigned ShiftAmt = RHS->getZExtValue();
|
||||
if (ShiftAmt < 32)
|
||||
return SDValue();
|
||||
|
||||
// srl i64:x, C for C >= 32
|
||||
// =>
|
||||
// build_pair (srl hi_32(x), C - 32), 0
|
||||
|
||||
SelectionDAG &DAG = DCI.DAG;
|
||||
SDLoc SL(N);
|
||||
|
||||
SDValue One = DAG.getConstant(1, SL, MVT::i32);
|
||||
SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
|
||||
|
||||
SDValue VecOp = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, N->getOperand(0));
|
||||
SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32,
|
||||
VecOp, One);
|
||||
|
||||
SDValue NewConst = DAG.getConstant(ShiftAmt - 32, SL, MVT::i32);
|
||||
SDValue NewShift = DAG.getNode(ISD::SRL, SL, MVT::i32, Hi, NewConst);
|
||||
|
||||
SDValue BuildPair = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32,
|
||||
NewShift, Zero);
|
||||
|
||||
return DAG.getNode(ISD::BITCAST, SL, MVT::i64, BuildPair);
|
||||
}
|
||||
|
||||
SDValue AMDGPUTargetLowering::performMulCombine(SDNode *N,
|
||||
DAGCombinerInfo &DCI) const {
|
||||
EVT VT = N->getValueType(0);
|
||||
@ -2701,6 +2739,12 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
|
||||
|
||||
return performShlCombine(N, DCI);
|
||||
}
|
||||
case ISD::SRL: {
|
||||
if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
|
||||
break;
|
||||
|
||||
return performSrlCombine(N, DCI);
|
||||
}
|
||||
case ISD::MUL:
|
||||
return performMulCombine(N, DCI);
|
||||
case AMDGPUISD::MUL_I24:
|
||||
|
@ -69,6 +69,7 @@ private:
|
||||
|
||||
SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
SDValue performShlCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
SDValue performSrlCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
SDValue performMulCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
SDValue performCtlzCombine(SDLoc SL, SDValue Cond, SDValue LHS, SDValue RHS,
|
||||
DAGCombinerInfo &DCI) const;
|
||||
|
@ -396,8 +396,6 @@ define void @global_extload_v2f16_to_v2f64(<2 x double> addrspace(1)* %out, <2 x
|
||||
; GCN-LABEL: {{^}}global_extload_v3f16_to_v3f64:
|
||||
|
||||
; GCN: buffer_load_dwordx2 [[LOAD:v\[[0-9]+:[0-9]+\]]]
|
||||
; SI: v_lshr_b64 v{{\[[0-9]+:[0-9]+\]}}, [[LOAD]], 32
|
||||
; VI: v_lshrrev_b64 v{{\[[0-9]+:[0-9]+\]}}, 32, [[LOAD]]
|
||||
; GCN: v_lshrrev_b32_e32 {{v[0-9]+}}, 16, {{v[0-9]+}}
|
||||
|
||||
; GCN: v_cvt_f32_f16_e32
|
||||
|
64
test/CodeGen/AMDGPU/shift-i64-opts.ll
Normal file
64
test/CodeGen/AMDGPU/shift-i64-opts.ll
Normal file
@ -0,0 +1,64 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=FAST64 -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire < %s | FileCheck -check-prefix=SLOW64 -check-prefix=GCN %s
|
||||
|
||||
|
||||
; lshr (i64 x), c: c > 32 => reg_sequence lshr (i32 hi_32(x)), (c - 32), 0
|
||||
; GCN-LABEL: {{^}}lshr_i64_35:
|
||||
; GCN: buffer_load_dword [[VAL:v[0-9]+]]
|
||||
; GCN: v_lshrrev_b32_e32 v[[LO:[0-9]+]], 3, [[VAL]]
|
||||
; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
|
||||
; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
|
||||
define void @lshr_i64_35(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
|
||||
%val = load i64, i64 addrspace(1)* %in
|
||||
%shl = lshr i64 %val, 35
|
||||
store i64 %shl, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}lshr_i64_63:
|
||||
; GCN: buffer_load_dword [[VAL:v[0-9]+]]
|
||||
; GCN: v_lshrrev_b32_e32 v[[LO:[0-9]+]], 31, [[VAL]]
|
||||
; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
|
||||
; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
|
||||
define void @lshr_i64_63(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
|
||||
%val = load i64, i64 addrspace(1)* %in
|
||||
%shl = lshr i64 %val, 63
|
||||
store i64 %shl, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}lshr_i64_33:
|
||||
; GCN: buffer_load_dword [[VAL:v[0-9]+]]
|
||||
; GCN: v_lshrrev_b32_e32 v[[LO:[0-9]+]], 1, [[VAL]]
|
||||
; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
|
||||
; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
|
||||
define void @lshr_i64_33(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
|
||||
%val = load i64, i64 addrspace(1)* %in
|
||||
%shl = lshr i64 %val, 33
|
||||
store i64 %shl, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}lshr_i64_32:
|
||||
; GCN: buffer_load_dword v[[LO:[0-9]+]]
|
||||
; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
|
||||
; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
|
||||
define void @lshr_i64_32(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
|
||||
%val = load i64, i64 addrspace(1)* %in
|
||||
%shl = lshr i64 %val, 32
|
||||
store i64 %shl, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}lshr_and_i64_35:
|
||||
; XGCN: buffer_load_dword [[VAL:v[0-9]+]]
|
||||
; XGCN: v_lshlrev_b32_e32 v[[LO:[0-9]+]], 3, [[VAL]]
|
||||
; XGCN: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
|
||||
; XGCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
|
||||
define void @lshr_and_i64_35(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
|
||||
%val = load i64, i64 addrspace(1)* %in
|
||||
%and = and i64 %val, 2147483647 ; 0x7fffffff
|
||||
%shl = lshr i64 %and, 35
|
||||
store i64 %shl, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user