mirror of
https://github.com/RPCSX/llvm.git
synced 2024-12-27 22:55:25 +00:00
AMDGPU: Don't fold subregister extracts into tied operands
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@278676 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
515497059a
commit
8f1b18be38
@ -197,9 +197,21 @@ static void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI,
|
||||
const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);
|
||||
|
||||
// FIXME: Fold operands with subregs.
|
||||
if (UseOp.isReg() && ((UseOp.getSubReg() && OpToFold.isReg()) ||
|
||||
UseOp.isImplicit())) {
|
||||
return;
|
||||
if (UseOp.isReg() && OpToFold.isReg()) {
|
||||
if (UseOp.isImplicit() || UseOp.getSubReg() != AMDGPU::NoSubRegister)
|
||||
return;
|
||||
|
||||
// Don't fold subregister extracts into tied operands, only if it is a full
|
||||
// copy since a subregister use tied to a full register def doesn't really
|
||||
// make sense. e.g. don't fold:
|
||||
//
|
||||
// %vreg1 = COPY %vreg0:sub1
|
||||
// %vreg2<tied3> = V_MAC_F32 %vreg3, %vreg4, %vreg1<tied0>
|
||||
//
|
||||
// into
|
||||
// %vreg2<tied3> = V_MAC_F32 %vreg3, %vreg4, %vreg0:sub1<tied0>
|
||||
if (UseOp.isTied() && OpToFold.getSubReg() != AMDGPU::NoSubRegister)
|
||||
return;
|
||||
}
|
||||
|
||||
bool FoldingImm = OpToFold.isImm();
|
||||
|
@ -109,6 +109,21 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; A subregister use operand should not be tied.
|
||||
; CHECK-LABEL: {{^}}no_fold_tied_subregister:
|
||||
; CHECK: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
|
||||
; CHECK: v_mac_f32_e32 v[[LO]], 0x41200000, v[[HI]]
|
||||
; CHECK: buffer_store_dword v[[LO]]
|
||||
define void @no_fold_tied_subregister() {
|
||||
%tmp1 = load volatile <2 x float>, <2 x float> addrspace(1)* undef
|
||||
%tmp2 = extractelement <2 x float> %tmp1, i32 0
|
||||
%tmp3 = extractelement <2 x float> %tmp1, i32 1
|
||||
%tmp4 = fmul float %tmp3, 10.0
|
||||
%tmp5 = fadd float %tmp4, %tmp2
|
||||
store volatile float %tmp5, float addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
|
Loading…
Reference in New Issue
Block a user