mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-19 02:42:58 +00:00
RegisterCoalscer: Only coalesce complete reserved registers.
The coalescer eliminates copies from reserved registers of the form: %vregX = COPY %rY in the case where %rY is a reserved register. However this turns out to be invalid if only some of the subregisters are reserved (see also https://reviews.llvm.org/D26648). Differential Revision: https://reviews.llvm.org/D26687 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@288428 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
1750ec944f
commit
c139fdb8e4
@ -1571,11 +1571,17 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
|
||||
// Deny any overlapping intervals. This depends on all the reserved
|
||||
// register live ranges to look like dead defs.
|
||||
if (!MRI->isConstantPhysReg(DstReg)) {
|
||||
for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI)
|
||||
for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI) {
|
||||
// Abort if not all the regunits are reserved.
|
||||
for (MCRegUnitRootIterator RI(*UI, TRI); RI.isValid(); ++RI) {
|
||||
if (!MRI->isReserved(*RI))
|
||||
return false;
|
||||
}
|
||||
if (RHS.overlaps(LIS->getRegUnit(*UI))) {
|
||||
DEBUG(dbgs() << "\t\tInterference: " << PrintRegUnit(*UI, TRI) << '\n');
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Skip any value computations, we are not adding new values to the
|
||||
|
@ -1,31 +0,0 @@
|
||||
# RUN: llc -mtriple=aarch64-- -run-pass=simple-register-coalescing %s -o - | FileCheck %s
|
||||
--- |
|
||||
define void @func() { ret void }
|
||||
...
|
||||
---
|
||||
# Check that we eliminate copies to/from constant physregs regardless of
|
||||
# "interfering" reads/writes.
|
||||
# CHECK: name: func
|
||||
# CHECK-NOT: COPY
|
||||
# CHECK: STRWui %wzr, %x1
|
||||
# CHECK-NOT: COPY
|
||||
# CHECK: STRXui %xzr, %x1
|
||||
# CHECK: %wzr = SUBSWri %w1, 0, 0
|
||||
name: func
|
||||
registers:
|
||||
- { id: 0, class: gpr32 }
|
||||
- { id: 1, class: gpr64 }
|
||||
- { id: 2, class: gpr32 }
|
||||
body: |
|
||||
bb.0:
|
||||
%0 = COPY %wzr
|
||||
dead %wzr = SUBSWri %w1, 0, 0, implicit-def %nzcv
|
||||
STRWui %0, %x1, 0
|
||||
|
||||
%1 = COPY %xzr
|
||||
dead %wzr = SUBSWri %w1, 0, 0, implicit-def %nzcv
|
||||
STRXui %1, %x1, 0
|
||||
|
||||
%2 = SUBSWri %w1, 0, 0, implicit-def %nzcv
|
||||
%wzr = COPY %2
|
||||
...
|
67
test/CodeGen/AArch64/regcoal-physreg.mir
Normal file
67
test/CodeGen/AArch64/regcoal-physreg.mir
Normal file
@ -0,0 +1,67 @@
|
||||
# RUN: llc -mtriple=aarch64-apple-ios -run-pass=simple-register-coalescing %s -o - | FileCheck %s
|
||||
--- |
|
||||
define void @func() { ret void }
|
||||
...
|
||||
---
|
||||
# Check coalescing of COPYs from reserved physregs.
|
||||
# CHECK-LABEL: name: func
|
||||
name: func
|
||||
registers:
|
||||
- { id: 0, class: gpr32 }
|
||||
- { id: 1, class: gpr64 }
|
||||
- { id: 2, class: gpr64 }
|
||||
- { id: 3, class: gpr32 }
|
||||
- { id: 4, class: gpr64 }
|
||||
- { id: 5, class: gpr32 }
|
||||
- { id: 6, class: xseqpairsclass }
|
||||
body: |
|
||||
bb.0:
|
||||
; We usually should not coalesce copies from allocatable physregs.
|
||||
; CHECK: %0 = COPY %w7
|
||||
; CHECK: STRWui %0, %x1, 0
|
||||
%0 = COPY %w7
|
||||
STRWui %0, %x1, 0
|
||||
|
||||
; It is fine to coalesce copies from reserved physregs
|
||||
; CHECK-NOT: COPY
|
||||
; CHECK: STRXui %fp, %x1, 0
|
||||
%1 = COPY %fp
|
||||
STRXui %1, %x1, 0
|
||||
|
||||
; It is not fine to coalesce copies from reserved physregs when they are
|
||||
; clobbered.
|
||||
; CHECK: %2 = COPY %fp
|
||||
; CHECK: STRXui %2, %x1, 0
|
||||
%2 = COPY %fp
|
||||
%fp = SUBXri %fp, 4, 0
|
||||
STRXui %2, %x1, 0
|
||||
|
||||
; Is is fine to coalesce copies from constant physregs even when they are
|
||||
; clobbered.
|
||||
; CHECK-NOT: COPY
|
||||
; CHECK: STRWui %wzr, %x1
|
||||
%3 = COPY %wzr
|
||||
dead %wzr = SUBSWri %w1, 0, 0, implicit-def %nzcv
|
||||
STRWui %3, %x1, 0
|
||||
|
||||
; Is is fine to coalesce copies from constant physregs even when they are
|
||||
; clobbered.
|
||||
; CHECK-NOT: COPY
|
||||
; CHECK: STRXui %xzr, %x1
|
||||
%4 = COPY %xzr
|
||||
dead %wzr = SUBSWri %w1, 0, 0, implicit-def %nzcv
|
||||
STRXui %4, %x1, 0
|
||||
|
||||
; Coalescing COPYs into constant physregs.
|
||||
; CHECK: %wzr = SUBSWri %w1, 0, 0
|
||||
%5 = SUBSWri %w1, 0, 0, implicit-def %nzcv
|
||||
%wzr = COPY %5
|
||||
|
||||
; Only coalesce when the source register is reserved as a whole (this is
|
||||
; a limitation of the current code which cannot update liveness information
|
||||
; of the non-reserved part).
|
||||
; CHECK: %6 = COPY %xzr_x0
|
||||
; CHECK: HINT 0, implicit %6
|
||||
%6 = COPY %xzr_x0
|
||||
HINT 0, implicit %6
|
||||
...
|
@ -78,40 +78,41 @@ define void @max_12_sgprs_14_input_sgprs(i32 addrspace(1)* %out1,
|
||||
ret void
|
||||
}
|
||||
|
||||
; ALL-LABEL: max_12_sgprs_12_input_sgprs{{$}}
|
||||
; The following test is commented out for now; http://llvm.org/PR31230
|
||||
; XALL-LABEL: max_12_sgprs_12_input_sgprs{{$}}
|
||||
; ; Make sure copies for input buffer are not clobbered. This requires
|
||||
; ; swapping the order the registers are copied from what normally
|
||||
; ; happens.
|
||||
|
||||
; TOSMEM: s_mov_b32 s5, s11
|
||||
; TOSMEM: s_add_u32 m0, s5,
|
||||
; TOSMEM: s_buffer_store_dword vcc_lo, s[0:3], m0
|
||||
; XTOSMEM: s_mov_b32 s5, s11
|
||||
; XTOSMEM: s_add_u32 m0, s5,
|
||||
; XTOSMEM: s_buffer_store_dword vcc_lo, s[0:3], m0
|
||||
|
||||
; ALL: SGPRBlocks: 2
|
||||
; ALL: NumSGPRsForWavesPerEU: 18
|
||||
define void @max_12_sgprs_12_input_sgprs(i32 addrspace(1)* %out1,
|
||||
i32 addrspace(1)* %out2,
|
||||
i32 addrspace(1)* %out3,
|
||||
i32 addrspace(1)* %out4,
|
||||
i32 %one, i32 %two, i32 %three, i32 %four) #2 {
|
||||
store volatile i32 0, i32* undef
|
||||
%x.0 = call i32 @llvm.amdgcn.workgroup.id.x()
|
||||
store volatile i32 %x.0, i32 addrspace(1)* undef
|
||||
%x.1 = call i32 @llvm.amdgcn.workgroup.id.y()
|
||||
store volatile i32 %x.0, i32 addrspace(1)* undef
|
||||
%x.2 = call i32 @llvm.amdgcn.workgroup.id.z()
|
||||
store volatile i32 %x.0, i32 addrspace(1)* undef
|
||||
%x.3 = call i64 @llvm.amdgcn.dispatch.id()
|
||||
store volatile i64 %x.3, i64 addrspace(1)* undef
|
||||
%x.4 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
|
||||
store volatile i8 addrspace(2)* %x.4, i8 addrspace(2)* addrspace(1)* undef
|
||||
|
||||
store i32 %one, i32 addrspace(1)* %out1
|
||||
store i32 %two, i32 addrspace(1)* %out2
|
||||
store i32 %three, i32 addrspace(1)* %out3
|
||||
store i32 %four, i32 addrspace(1)* %out4
|
||||
ret void
|
||||
}
|
||||
; XALL: SGPRBlocks: 2
|
||||
; XALL: NumSGPRsForWavesPerEU: 18
|
||||
;define void @max_12_sgprs_12_input_sgprs(i32 addrspace(1)* %out1,
|
||||
; i32 addrspace(1)* %out2,
|
||||
; i32 addrspace(1)* %out3,
|
||||
; i32 addrspace(1)* %out4,
|
||||
; i32 %one, i32 %two, i32 %three, i32 %four) #2 {
|
||||
; store volatile i32 0, i32* undef
|
||||
; %x.0 = call i32 @llvm.amdgcn.workgroup.id.x()
|
||||
; store volatile i32 %x.0, i32 addrspace(1)* undef
|
||||
; %x.1 = call i32 @llvm.amdgcn.workgroup.id.y()
|
||||
; store volatile i32 %x.0, i32 addrspace(1)* undef
|
||||
; %x.2 = call i32 @llvm.amdgcn.workgroup.id.z()
|
||||
; store volatile i32 %x.0, i32 addrspace(1)* undef
|
||||
; %x.3 = call i64 @llvm.amdgcn.dispatch.id()
|
||||
; store volatile i64 %x.3, i64 addrspace(1)* undef
|
||||
; %x.4 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
|
||||
; store volatile i8 addrspace(2)* %x.4, i8 addrspace(2)* addrspace(1)* undef
|
||||
;
|
||||
; store i32 %one, i32 addrspace(1)* %out1
|
||||
; store i32 %two, i32 addrspace(1)* %out2
|
||||
; store i32 %three, i32 addrspace(1)* %out3
|
||||
; store i32 %four, i32 addrspace(1)* %out4
|
||||
; ret void
|
||||
;}
|
||||
|
||||
declare i32 @llvm.amdgcn.workgroup.id.x() #1
|
||||
declare i32 @llvm.amdgcn.workgroup.id.y() #1
|
||||
|
Loading…
x
Reference in New Issue
Block a user