mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-14 15:39:06 +00:00
AMDGPU: Eliminate half of i64 or if one operand is zero_extend from i32
This helps clean up some of the mess when expanding unaligned 64-bit loads when changed to be promote to v2i32, and fixes situations where or x, 0 was emitted after splitting 64-bit ors during moveToVALU. I think this could be a generic combine but I'm not sure. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@266104 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
361311da1f
commit
87f61332d1
@ -2332,6 +2332,36 @@ SDValue SITargetLowering::performOrCombine(SDNode *N,
|
||||
SDValue LHS = N->getOperand(0);
|
||||
SDValue RHS = N->getOperand(1);
|
||||
|
||||
EVT VT = N->getValueType(0);
|
||||
if (VT == MVT::i64) {
|
||||
// TODO: This could be a generic combine with a predicate for extracting the
|
||||
// high half of an integer being free.
|
||||
|
||||
// (or i64:x, (zero_extend i32:y)) ->
|
||||
// i64 (bitcast (v2i32 build_vector (or i32:y, lo_32(x)), hi_32(x)))
|
||||
if (LHS.getOpcode() == ISD::ZERO_EXTEND &&
|
||||
RHS.getOpcode() != ISD::ZERO_EXTEND)
|
||||
std::swap(LHS, RHS);
|
||||
|
||||
if (RHS.getOpcode() == ISD::ZERO_EXTEND) {
|
||||
SDValue ExtSrc = RHS.getOperand(0);
|
||||
EVT SrcVT = ExtSrc.getValueType();
|
||||
if (SrcVT == MVT::i32) {
|
||||
SDLoc SL(N);
|
||||
SDValue LowLHS, HiBits;
|
||||
std::tie(LowLHS, HiBits) = split64BitValue(LHS, DAG);
|
||||
SDValue LowOr = DAG.getNode(ISD::OR, SL, MVT::i32, LowLHS, ExtSrc);
|
||||
|
||||
DCI.AddToWorklist(LowOr.getNode());
|
||||
DCI.AddToWorklist(HiBits.getNode());
|
||||
|
||||
SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32,
|
||||
LowOr, HiBits);
|
||||
return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// or (fp_class x, c1), (fp_class x, c2) -> fp_class x, (c1 | c2)
|
||||
if (LHS.getOpcode() == AMDGPUISD::FP_CLASS &&
|
||||
RHS.getOpcode() == AMDGPUISD::FP_CLASS) {
|
||||
|
41
test/CodeGen/AMDGPU/zext-i64-bit-operand.ll
Normal file
41
test/CodeGen/AMDGPU/zext-i64-bit-operand.ll
Normal file
@ -0,0 +1,41 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
; GCN-LABEL: {{^}}zext_or_operand_i64:
|
||||
; GCN: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
|
||||
; GCN: buffer_load_dword v[[LD32:[0-9]+]]
|
||||
; GCN-NOT: _or_
|
||||
; GCN-NOT: v[[HI]]
|
||||
; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0
|
||||
; GCN: v_or_b32_e32 v[[LO]], v[[LD32]], v[[LO]]
|
||||
; GCN-NOT: _or_
|
||||
; GCN-NOT: v[[HI]]
|
||||
; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0
|
||||
; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
|
||||
define void @zext_or_operand_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in0, i32 addrspace(1)* %in1) {
|
||||
%ld.64 = load volatile i64, i64 addrspace(1)* %in0
|
||||
%ld.32 = load volatile i32, i32 addrspace(1)* %in1
|
||||
%ext = zext i32 %ld.32 to i64
|
||||
%or = or i64 %ld.64, %ext
|
||||
store i64 %or, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}zext_or_operand_commute_i64:
|
||||
; GCN: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
|
||||
; GCN: buffer_load_dword v[[LD32:[0-9]+]]
|
||||
; GCN-NOT: _or_
|
||||
; GCN-NOT: v[[HI]]
|
||||
; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0
|
||||
; GCN: v_or_b32_e32 v[[LO]], v[[LD32]], v[[LO]]
|
||||
; GCN-NOT: v[[HI]]
|
||||
; GCN-NOT: _or_
|
||||
; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0
|
||||
; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
|
||||
define void @zext_or_operand_commute_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in0, i32 addrspace(1)* %in1) {
|
||||
%ld.64 = load volatile i64, i64 addrspace(1)* %in0
|
||||
%ld.32 = load volatile i32, i32 addrspace(1)* %in1
|
||||
%ext = zext i32 %ld.32 to i64
|
||||
%or = or i64 %ext, %ld.64
|
||||
store i64 %or, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue
Block a user