mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-02 00:16:25 +00:00
AMDGPU: Fold bitcasts of scalar constants to vectors
This cleans up some messes since the individual scalar components can be CSEed. llvm-svn: 266376
This commit is contained in:
parent
f26cedc3ed
commit
e73cb153a7
@ -399,6 +399,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM,
|
||||
setTargetDAGCombine(ISD::FADD);
|
||||
setTargetDAGCombine(ISD::FSUB);
|
||||
|
||||
setTargetDAGCombine(ISD::BITCAST);
|
||||
|
||||
setBooleanContents(ZeroOrNegativeOneBooleanContent);
|
||||
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
|
||||
|
||||
@ -2547,6 +2549,38 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
|
||||
switch(N->getOpcode()) {
|
||||
default:
|
||||
break;
|
||||
case ISD::BITCAST: {
|
||||
EVT DestVT = N->getValueType(0);
|
||||
if (DestVT.getSizeInBits() != 64 && !DestVT.isVector())
|
||||
break;
|
||||
|
||||
// Fold bitcasts of constants.
|
||||
//
|
||||
// v2i32 (bitcast i64:k) -> build_vector lo_32(k), hi_32(k)
|
||||
// TODO: Generalize and move to DAGCombiner
|
||||
SDValue Src = N->getOperand(0);
|
||||
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src)) {
|
||||
assert(Src.getValueType() == MVT::i64);
|
||||
SDLoc SL(N);
|
||||
uint64_t CVal = C->getZExtValue();
|
||||
return DAG.getNode(ISD::BUILD_VECTOR, SL, DestVT,
|
||||
DAG.getConstant(Lo_32(CVal), SL, MVT::i32),
|
||||
DAG.getConstant(Hi_32(CVal), SL, MVT::i32));
|
||||
}
|
||||
|
||||
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Src)) {
|
||||
const APInt &Val = C->getValueAPF().bitcastToAPInt();
|
||||
SDLoc SL(N);
|
||||
uint64_t CVal = Val.getZExtValue();
|
||||
SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32,
|
||||
DAG.getConstant(Lo_32(CVal), SL, MVT::i32),
|
||||
DAG.getConstant(Hi_32(CVal), SL, MVT::i32));
|
||||
|
||||
return DAG.getNode(ISD::BITCAST, SL, DestVT, Vec);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case ISD::SHL: {
|
||||
if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
|
||||
break;
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
|
||||
|
||||
@ -25,7 +25,7 @@ declare <16 x double> @llvm.ceil.v16f64(<16 x double>) nounwind readnone
|
||||
; SI-DAG: cndmask_b32
|
||||
; SI-DAG: v_cmp_lt_f64
|
||||
; SI-DAG: v_cmp_lg_f64
|
||||
; SI: s_and_b64
|
||||
; SI-DAG: s_and_b64
|
||||
; SI: v_cndmask_b32
|
||||
; SI: v_cndmask_b32
|
||||
; SI: v_add_f64
|
||||
|
@ -1,8 +1,8 @@
|
||||
;RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=GCN --check-prefix=FUNC %s
|
||||
;RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=GCN --check-prefix=FUNC %s
|
||||
;RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefix=VI --check-prefix=GCN --check-prefix=FUNC %s
|
||||
;RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG --check-prefix=FUNC %s
|
||||
|
||||
;FUNC-LABEL: {{^}}test_sdiv:
|
||||
;FUNC-LABEL: {{^}}s_test_sdiv:
|
||||
;EG: RECIP_UINT
|
||||
;EG: LSHL {{.*}}, 1,
|
||||
;EG: BFE_UINT
|
||||
@ -36,47 +36,47 @@
|
||||
;EG: BFE_UINT
|
||||
;EG: BFE_UINT
|
||||
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN-NOT: v_mad_f32
|
||||
;SI-NOT: v_lshr_b64
|
||||
;VI-NOT: v_lshrrev_b64
|
||||
;GCN: s_endpgm
|
||||
define void @test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
|
||||
; GCN: s_bfe_u32
|
||||
; GCN: s_bfe_u32
|
||||
; GCN: s_bfe_u32
|
||||
; GCN: s_bfe_u32
|
||||
; GCN: s_bfe_u32
|
||||
; GCN: s_bfe_u32
|
||||
; GCN: s_bfe_u32
|
||||
; GCN: s_bfe_u32
|
||||
; GCN: s_bfe_u32
|
||||
; GCN: s_bfe_u32
|
||||
; GCN: s_bfe_u32
|
||||
; GCN: s_bfe_u32
|
||||
; GCN: s_bfe_u32
|
||||
; GCN: s_bfe_u32
|
||||
; GCN: s_bfe_u32
|
||||
; GCN: s_bfe_u32
|
||||
; GCN: s_bfe_u32
|
||||
; GCN: s_bfe_u32
|
||||
; GCN: s_bfe_u32
|
||||
; GCN: s_bfe_u32
|
||||
; GCN: s_bfe_u32
|
||||
; GCN: s_bfe_u32
|
||||
; GCN: s_bfe_u32
|
||||
; GCN: s_bfe_u32
|
||||
; GCN: s_bfe_u32
|
||||
; GCN: s_bfe_u32
|
||||
; GCN: s_bfe_u32
|
||||
; GCN: s_bfe_u32
|
||||
; GCN: s_bfe_u32
|
||||
; GCN: s_bfe_u32
|
||||
; GCN-NOT: v_mad_f32
|
||||
; SI-NOT: v_lshr_b64
|
||||
; VI-NOT: v_lshrrev_b64
|
||||
; GCN: s_endpgm
|
||||
define void @s_test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
|
||||
%result = sdiv i64 %x, %y
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
;FUNC-LABEL: {{^}}test_srem:
|
||||
;FUNC-LABEL: {{^}}s_test_srem:
|
||||
;EG: RECIP_UINT
|
||||
;EG: BFE_UINT
|
||||
;EG: BFE_UINT
|
||||
@ -144,7 +144,7 @@ define void @test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
|
||||
;SI-NOT: v_lshr_b64
|
||||
;VI-NOT: v_lshrrev_b64
|
||||
;GCN: s_endpgm
|
||||
define void @test_srem(i64 addrspace(1)* %out, i64 %x, i64 %y) {
|
||||
define void @s_test_srem(i64 addrspace(1)* %out, i64 %x, i64 %y) {
|
||||
%result = urem i64 %x, %y
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
|
||||
@ -10,14 +10,14 @@ define void @sint_to_fp_i32_to_f64(double addrspace(1)* %out, i32 %in) {
|
||||
ret void
|
||||
}
|
||||
|
||||
; FIXME: select on 0, 0
|
||||
; SI-LABEL: {{^}}sint_to_fp_i1_f64:
|
||||
; SI: v_cmp_eq_i32_e64 vcc,
|
||||
; We can't fold the SGPRs into v_cndmask_b32_e64, because it already
|
||||
; uses an SGPR (implicit vcc).
|
||||
; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
|
||||
; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 0, vcc
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI-DAG: v_cndmask_b32_e32 v[[SEL:[0-9]+]], 0, v{{[0-9]+}}
|
||||
; SI-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
|
||||
; SI: buffer_store_dwordx2 v{{\[}}[[ZERO]]:[[SEL]]{{\]}}
|
||||
|
||||
; SI: s_endpgm
|
||||
define void @sint_to_fp_i1_f64(double addrspace(1)* %out, i32 %in) {
|
||||
%cmp = icmp eq i32 %in, 0
|
||||
|
@ -70,14 +70,13 @@ define void @s_uint_to_fp_v4i32_to_v4f64(<4 x double> addrspace(1)* %out, <4 x i
|
||||
ret void
|
||||
}
|
||||
|
||||
; FIXME: select on 0, 0
|
||||
; SI-LABEL: {{^}}uint_to_fp_i1_to_f64:
|
||||
; SI: v_cmp_eq_i32_e64 vcc
|
||||
; We can't fold the SGPRs into v_cndmask_b32_e32, because it already
|
||||
; uses an SGPR (implicit vcc).
|
||||
; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
|
||||
; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 0, vcc
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI-DAG: v_cndmask_b32_e32 v[[SEL:[0-9]+]], 0, v{{[0-9]+}}
|
||||
; SI-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
|
||||
; SI: buffer_store_dwordx2 v{{\[}}[[ZERO]]:[[SEL]]{{\]}}
|
||||
; SI: s_endpgm
|
||||
define void @uint_to_fp_i1_to_f64(double addrspace(1)* %out, i32 %in) {
|
||||
%cmp = icmp eq i32 %in, 0
|
||||
|
Loading…
Reference in New Issue
Block a user