mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-27 07:45:35 +00:00
StackSlotColoring: Decide colors per stack ID
I thought I fixed this in r308673, but that fix was very broken. The assumption that any frame index can be used in place of another was more widespread than I realized. Even when stack slot sharing was disabled, this was still replacing frame index uses with a different ID with a different stack slot. Really fix this by doing the coloring per-stack ID, so all of the coloring logically done in a separate namespace. This is a lot simpler than trying to figure out how to change the color if the stack ID is different. llvm-svn: 335488
This commit is contained in:
parent
69747dd448
commit
eb4c18e5a7
@ -123,6 +123,9 @@ class MachineFrameInfo {
|
||||
/// necessarily reside in the same contiguous memory block as other stack
|
||||
/// objects. Objects with differing stack IDs should not be merged or
|
||||
/// replaced substituted for each other.
|
||||
//
|
||||
/// It is assumed a target uses consecutive, increasing stack IDs starting
|
||||
/// from 1.
|
||||
uint8_t StackID;
|
||||
|
||||
/// If this stack object is originated from an Alloca instruction
|
||||
|
@ -82,14 +82,14 @@ namespace {
|
||||
// AllColors - If index is set, it's a spill slot, i.e. color.
|
||||
// FIXME: This assumes PEI locate spill slot with smaller indices
|
||||
// closest to stack pointer / frame pointer. Therefore, smaller
|
||||
// index == better color.
|
||||
BitVector AllColors;
|
||||
// index == better color. This is per stack ID.
|
||||
SmallVector<BitVector, 2> AllColors;
|
||||
|
||||
// NextColor - Next "color" that's not yet used.
|
||||
int NextColor = -1;
|
||||
// NextColor - Next "color" that's not yet used. This is per stack ID.
|
||||
SmallVector<int, 2> NextColors = { -1 };
|
||||
|
||||
// UsedColors - "Colors" that have been assigned.
|
||||
BitVector UsedColors;
|
||||
// UsedColors - "Colors" that have been assigned. This is per stack ID
|
||||
SmallVector<BitVector, 2> UsedColors;
|
||||
|
||||
// Assignments - Color to intervals mapping.
|
||||
SmallVector<SmallVector<LiveInterval*,4>, 16> Assignments;
|
||||
@ -196,10 +196,15 @@ void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) {
|
||||
/// to a sorted (by weight) list.
|
||||
void StackSlotColoring::InitializeSlots() {
|
||||
int LastFI = MFI->getObjectIndexEnd();
|
||||
|
||||
// There is always at least one stack ID.
|
||||
AllColors.resize(1);
|
||||
UsedColors.resize(1);
|
||||
|
||||
OrigAlignments.resize(LastFI);
|
||||
OrigSizes.resize(LastFI);
|
||||
AllColors.resize(LastFI);
|
||||
UsedColors.resize(LastFI);
|
||||
AllColors[0].resize(LastFI);
|
||||
UsedColors[0].resize(LastFI);
|
||||
Assignments.resize(LastFI);
|
||||
|
||||
using Pair = std::iterator_traits<LiveStacks::iterator>::value_type;
|
||||
@ -220,18 +225,31 @@ void StackSlotColoring::InitializeSlots() {
|
||||
int FI = TargetRegisterInfo::stackSlot2Index(li.reg);
|
||||
if (MFI->isDeadObjectIndex(FI))
|
||||
continue;
|
||||
|
||||
SSIntervals.push_back(&li);
|
||||
OrigAlignments[FI] = MFI->getObjectAlignment(FI);
|
||||
OrigSizes[FI] = MFI->getObjectSize(FI);
|
||||
AllColors.set(FI);
|
||||
|
||||
auto StackID = MFI->getStackID(FI);
|
||||
if (StackID != 0) {
|
||||
AllColors.resize(StackID + 1);
|
||||
UsedColors.resize(StackID + 1);
|
||||
AllColors[StackID].resize(LastFI);
|
||||
UsedColors[StackID].resize(LastFI);
|
||||
}
|
||||
|
||||
AllColors[StackID].set(FI);
|
||||
}
|
||||
LLVM_DEBUG(dbgs() << '\n');
|
||||
|
||||
// Sort them by weight.
|
||||
std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter());
|
||||
|
||||
NextColors.resize(AllColors.size());
|
||||
|
||||
// Get first "color".
|
||||
NextColor = AllColors.find_first();
|
||||
for (unsigned I = 0, E = AllColors.size(); I != E; ++I)
|
||||
NextColors[I] = AllColors[I].find_first();
|
||||
}
|
||||
|
||||
/// OverlapWithAssignments - Return true if LiveInterval overlaps with any
|
||||
@ -252,17 +270,19 @@ int StackSlotColoring::ColorSlot(LiveInterval *li) {
|
||||
int Color = -1;
|
||||
bool Share = false;
|
||||
int FI = TargetRegisterInfo::stackSlot2Index(li->reg);
|
||||
uint8_t StackID = MFI->getStackID(FI);
|
||||
|
||||
if (!DisableSharing) {
|
||||
|
||||
// Check if it's possible to reuse any of the used colors.
|
||||
Color = UsedColors.find_first();
|
||||
Color = UsedColors[StackID].find_first();
|
||||
while (Color != -1) {
|
||||
if (!OverlapWithAssignments(li, Color)) {
|
||||
Share = true;
|
||||
++NumEliminated;
|
||||
break;
|
||||
}
|
||||
Color = UsedColors.find_next(Color);
|
||||
Color = UsedColors[StackID].find_next(Color);
|
||||
}
|
||||
}
|
||||
|
||||
@ -274,12 +294,14 @@ int StackSlotColoring::ColorSlot(LiveInterval *li) {
|
||||
// Assign it to the first available color (assumed to be the best) if it's
|
||||
// not possible to share a used color with other objects.
|
||||
if (!Share) {
|
||||
assert(NextColor != -1 && "No more spill slots?");
|
||||
Color = NextColor;
|
||||
UsedColors.set(Color);
|
||||
NextColor = AllColors.find_next(NextColor);
|
||||
assert(NextColors[StackID] != -1 && "No more spill slots?");
|
||||
Color = NextColors[StackID];
|
||||
UsedColors[StackID].set(Color);
|
||||
NextColors[StackID] = AllColors[StackID].find_next(NextColors[StackID]);
|
||||
}
|
||||
|
||||
assert(MFI->getStackID(Color) == MFI->getStackID(FI));
|
||||
|
||||
// Record the assignment.
|
||||
Assignments[Color].push_back(li);
|
||||
LLVM_DEBUG(dbgs() << "Assigning fi#" << FI << " to fi#" << Color << "\n");
|
||||
@ -357,11 +379,13 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
|
||||
}
|
||||
|
||||
// Delete unused stack slots.
|
||||
while (NextColor != -1) {
|
||||
LLVM_DEBUG(dbgs() << "Removing unused stack object fi#" << NextColor
|
||||
<< "\n");
|
||||
MFI->RemoveStackObject(NextColor);
|
||||
NextColor = AllColors.find_next(NextColor);
|
||||
for (int StackID = 0, E = AllColors.size(); StackID != E; ++StackID) {
|
||||
int NextColor = NextColors[StackID];
|
||||
while (NextColor != -1) {
|
||||
LLVM_DEBUG(dbgs() << "Removing unused stack object fi#" << NextColor << "\n");
|
||||
MFI->RemoveStackObject(NextColor);
|
||||
NextColor = AllColors[StackID].find_next(NextColor);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
@ -383,6 +407,8 @@ void StackSlotColoring::RewriteInstruction(MachineInstr &MI,
|
||||
int NewFI = SlotMapping[OldFI];
|
||||
if (NewFI == -1 || NewFI == OldFI)
|
||||
continue;
|
||||
|
||||
assert(MFI->getStackID(OldFI) == MFI->getStackID(NewFI));
|
||||
MO.setIndex(NewFI);
|
||||
}
|
||||
|
||||
@ -487,7 +513,9 @@ bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) {
|
||||
InitializeSlots();
|
||||
Changed = ColorSlots(MF);
|
||||
|
||||
NextColor = -1;
|
||||
for (int &Next : NextColors)
|
||||
Next = -1;
|
||||
|
||||
SSIntervals.clear();
|
||||
for (unsigned i = 0, e = SSRefs.size(); i != e; ++i)
|
||||
SSRefs[i].clear();
|
||||
|
97
test/CodeGen/AMDGPU/sgpr-spill-wrong-stack-id.mir
Normal file
97
test/CodeGen/AMDGPU/sgpr-spill-wrong-stack-id.mir
Normal file
@ -0,0 +1,97 @@
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -stress-regalloc=3 -start-before=greedy -stop-after=stack-slot-coloring -o - %s | FileCheck -check-prefixes=SHARE,GCN %s
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -stress-regalloc=3 -start-before=greedy -stop-after=stack-slot-coloring -no-stack-slot-sharing -o - %s | FileCheck -check-prefixes=NOSHARE,GCN %s
|
||||
|
||||
# Make sure that stack slot coloring doesn't try to merge frame
|
||||
# indexes used for SGPR spilling with those that aren't.
|
||||
# Even when stack slot sharing was disabled, it was still moving the
|
||||
# FI ID used for an SGPR spill to a normal frame index.
|
||||
|
||||
--- |
|
||||
|
||||
define void @sgpr_spill_wrong_stack_id(float addrspace(1)* nocapture readnone %arg, float addrspace(1)* noalias %arg1) {
|
||||
bb:
|
||||
%tmp = load i32, i32 addrspace(1)* null, align 4
|
||||
call void @func(i32 undef)
|
||||
call void @func(i32 %tmp)
|
||||
unreachable
|
||||
}
|
||||
|
||||
declare void @func(i32)
|
||||
|
||||
...
|
||||
---
|
||||
|
||||
# GCN-LABEL: name: sgpr_spill_wrong_stack_id
|
||||
# SHARE: stack:
|
||||
# SHARE: - { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4,
|
||||
# SHARE: stack-id: 0, callee-saved-register: '', callee-saved-restored: true,
|
||||
# SHARE: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||
# SHARE: - { id: 1, name: '', type: spill-slot, offset: 0, size: 8, alignment: 4,
|
||||
# SHARE: stack-id: 1, callee-saved-register: '', callee-saved-restored: true,
|
||||
# SHARE: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||
# SHARE: - { id: 2, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4,
|
||||
# SHARE: stack-id: 1, callee-saved-register: '', callee-saved-restored: true,
|
||||
# SHARE: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||
|
||||
# SHARE: SI_SPILL_S32_SAVE $sgpr5, %stack.2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (store 4 into %stack.2, addrspace 5)
|
||||
# SHARE: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
|
||||
# SHARE: SI_SPILL_S64_SAVE killed renamable $sgpr6_sgpr7, %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (store 8 into %stack.1, align 4, addrspace 5)
|
||||
# SHARE: renamable $sgpr6_sgpr7 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (load 8 from %stack.1, align 4, addrspace 5)
|
||||
# SHARE: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr6_sgpr7, @func, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit undef $vgpr0
|
||||
# SHARE: $sgpr5 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (load 4 from %stack.2, addrspace 5)
|
||||
# SHARE: $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
|
||||
# SHARE: renamable $sgpr6_sgpr7 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (load 8 from %stack.1, align 4, addrspace 5)
|
||||
# SHARE: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr6_sgpr7, @func, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit $vgpr0
|
||||
# SHARE: $sgpr5 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (load 4 from %stack.2, addrspace 5)
|
||||
|
||||
# NOSHARE: stack:
|
||||
# NOSHARE: - { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4,
|
||||
# NOSHARE: stack-id: 0, callee-saved-register: '', callee-saved-restored: true,
|
||||
# NOSHARE: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||
# NOSHARE: - { id: 1, name: '', type: spill-slot, offset: 0, size: 8, alignment: 4,
|
||||
# NOSHARE: stack-id: 1, callee-saved-register: '', callee-saved-restored: true,
|
||||
# NOSHARE: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||
# NOSHARE: - { id: 2, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4,
|
||||
# NOSHARE: stack-id: 1, callee-saved-register: '', callee-saved-restored: true,
|
||||
# NOSHARE: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||
# NOSHARE: - { id: 3, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4,
|
||||
# NOSHARE: stack-id: 1, callee-saved-register: '', callee-saved-restored: true,
|
||||
# NOSHARE: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||
|
||||
# NOSHARE: SI_SPILL_S32_SAVE $sgpr5, %stack.2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (store 4 into %stack.2, addrspace 5)
|
||||
# NOSHARE: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
|
||||
# NOSHARE: SI_SPILL_S64_SAVE killed renamable $sgpr6_sgpr7, %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (store 8 into %stack.1, align 4, addrspace 5)
|
||||
# NOSHARE: renamable $sgpr6_sgpr7 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (load 8 from %stack.1, align 4, addrspace 5)
|
||||
# NOSHARE: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr6_sgpr7, @func, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit undef $vgpr0
|
||||
# NOSHARE: $sgpr5 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (load 4 from %stack.2, addrspace 5)
|
||||
# NOSHARE: SI_SPILL_S32_SAVE $sgpr5, %stack.3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (store 4 into %stack.3, addrspace 5)
|
||||
# NOSHARE: $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
|
||||
# NOSHARE: renamable $sgpr6_sgpr7 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (load 8 from %stack.1, align 4, addrspace 5)
|
||||
# NOSHARE: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr6_sgpr7, @func, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit $vgpr0
|
||||
# NOSHARE: $sgpr5 = SI_SPILL_S32_RESTORE %stack.3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (load 4 from %stack.3, addrspace 5)
|
||||
|
||||
...
|
||||
|
||||
name: sgpr_spill_wrong_stack_id
|
||||
tracksRegLiveness: true
|
||||
frameInfo:
|
||||
adjustsStack: false
|
||||
hasCalls: true
|
||||
body: |
|
||||
bb.0.bb:
|
||||
%8:sreg_32_xm0 = COPY $sgpr5
|
||||
%4:vreg_64 = IMPLICIT_DEF
|
||||
%3:vgpr_32 = FLAT_LOAD_DWORD %4, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
%5:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func + 4, target-flags(amdgpu-rel32-hi) @func + 4, implicit-def dead $scc
|
||||
ADJCALLSTACKUP 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr5
|
||||
dead $sgpr30_sgpr31 = SI_CALL %5, @func, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit undef $vgpr0
|
||||
$sgpr5 = COPY %8
|
||||
%12:sreg_32_xm0 = COPY $sgpr5
|
||||
ADJCALLSTACKDOWN 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr5
|
||||
ADJCALLSTACKUP 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr5
|
||||
$vgpr0 = COPY %3
|
||||
dead $sgpr30_sgpr31 = SI_CALL %5, @func, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit killed $vgpr0
|
||||
$sgpr5 = COPY %12
|
||||
ADJCALLSTACKDOWN 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr5
|
||||
|
||||
...
|
Loading…
x
Reference in New Issue
Block a user