[MachineScheduler] Order FI-based memops based on stack direction

It makes more sense to order FI-based memops in descending order when
the stack goes down. This allows offsets to stay "consecutive" and allow
easier pattern matching.

llvm-svn: 347906
This commit is contained in:
Francis Visoiu Mistrih 2018-11-29 20:03:19 +00:00
parent dfe7e315ea
commit b1262b3845
3 changed files with 29 additions and 12 deletions

View File

@ -41,6 +41,7 @@
#include "llvm/CodeGen/ScheduleDFS.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@ -1497,10 +1498,23 @@ class BaseMemOpClusterMutation : public ScheduleDAGMutation {
return std::make_tuple(BaseOp->getReg(), Offset, SU->NodeNum) <
std::make_tuple(RHS.BaseOp->getReg(), RHS.Offset,
RHS.SU->NodeNum);
if (BaseOp->isFI())
return std::make_tuple(BaseOp->getIndex(), Offset, SU->NodeNum) <
std::make_tuple(RHS.BaseOp->getIndex(), RHS.Offset,
RHS.SU->NodeNum);
if (BaseOp->isFI()) {
const MachineFunction &MF =
*BaseOp->getParent()->getParent()->getParent();
const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
bool StackGrowsDown = TFI.getStackGrowthDirection() ==
TargetFrameLowering::StackGrowsDown;
// Can't use tuple comparison here since we might need to use a
// different order when the stack grows down.
if (BaseOp->getIndex() != RHS.BaseOp->getIndex())
return StackGrowsDown ? BaseOp->getIndex() > RHS.BaseOp->getIndex()
: BaseOp->getIndex() < RHS.BaseOp->getIndex();
if (Offset != RHS.Offset)
return StackGrowsDown ? Offset > RHS.Offset : Offset < RHS.Offset;
return SU->NodeNum < RHS.SU->NodeNum;
}
llvm_unreachable("MemOpClusterMutation only supports register or frame "
"index bases.");

View File

@ -2355,7 +2355,7 @@ static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
assert(ObjectOffset1 >= ObjectOffset2 && "Object offsets are not ordered.");
assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
// Get the byte-offset from the object offset.
if (!unscaleOffset(Opcode1, Offset1) || !unscaleOffset(Opcode2, Offset2))
return false;
@ -2365,7 +2365,7 @@ static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
if (!scaleOffset(Opcode1, ObjectOffset1) ||
!scaleOffset(Opcode2, ObjectOffset2))
return false;
return ObjectOffset2 + 1 == ObjectOffset1;
return ObjectOffset1 + 1 == ObjectOffset2;
}
return FI1 == FI2;
@ -2424,16 +2424,19 @@ bool AArch64InstrInfo::shouldClusterMemOps(MachineOperand &BaseOp1,
// The caller should already have ordered First/SecondLdSt by offset.
// Note: except for non-equal frame index bases
assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
"Caller should have ordered offsets.");
if (BaseOp1.isFI()) {
assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 >= Offset2) &&
"Caller should have ordered offsets.");
const MachineFrameInfo &MFI =
FirstLdSt.getParent()->getParent()->getFrameInfo();
return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
BaseOp2.getIndex(), Offset2, SecondOpc);
}
assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
"Caller should have ordered offsets.");
return Offset1 + 1 == Offset2;
}

View File

@ -113,9 +113,9 @@ define void @bzero_20_stack() {
define void @bzero_26_stack() {
; CHECK-LABEL: bzero_26_stack:
; CHECK: stp xzr, xzr, [sp]
; CHECK: stp xzr, xzr, [sp, #8]
; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: strh wzr, [sp, #24]
; CHECK-NEXT: str xzr, [sp, #16]
; CHECK-NEXT: bl something
%buf = alloca [26 x i8], align 1
%cast = bitcast [26 x i8]* %buf to i8*
@ -259,9 +259,9 @@ define void @memset_12_stack() {
define void @memset_16_stack() {
; CHECK-LABEL: memset_16_stack:
; CHECK: mov x8, #-6148914691236517206
; CHECK-NEXT: str x8, [sp, #-32]!
; CHECK-NEXT: mov x0, sp
; CHECK-NEXT: stp x8, x30, [sp, #8]
; CHECK-NEXT: str x8, [sp]
; CHECK-NEXT: bl something
%buf = alloca [16 x i8], align 1
%cast = bitcast [16 x i8]* %buf to i8*