From f6b47d2a4fe6194129b1d709f595eb34e410c9a0 Mon Sep 17 00:00:00 2001 From: QingShan Zhang Date: Fri, 24 May 2019 05:30:09 +0000 Subject: [PATCH] [Power9] Add a specific heuristic to schedule the addi before the load When we are scheduling the load and addi, if all other heuristic didn't take effect, we will try to schedule the addi before the load, to hide the latency, and avoid the true dependency added by RA. And this only take effects for Power9. Differential Revision: https://reviews.llvm.org/D61930 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@361600 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCMachineScheduler.cpp | 51 +++++++++++++++++++++ lib/Target/PowerPC/PPCMachineScheduler.h | 7 +++ test/CodeGen/PowerPC/schedule-addi-load.mir | 19 +++++++- 3 files changed, 76 insertions(+), 1 deletion(-) diff --git a/lib/Target/PowerPC/PPCMachineScheduler.cpp b/lib/Target/PowerPC/PPCMachineScheduler.cpp index 19aa53d54f1..d57e38acef6 100644 --- a/lib/Target/PowerPC/PPCMachineScheduler.cpp +++ b/lib/Target/PowerPC/PPCMachineScheduler.cpp @@ -5,9 +5,60 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// +#include "PPC.h" #include "PPCMachineScheduler.h" using namespace llvm; +static cl::opt +DisableAddiLoadHeuristic("disable-ppc-sched-addi-load", + cl::desc("Disable scheduling addi instruction before" + "load for ppc"), cl::Hidden); + +bool PPCPreRASchedStrategy::biasAddiLoadCandidate(SchedCandidate &Cand, + SchedCandidate &TryCand, + SchedBoundary &Zone) const { + if (DisableAddiLoadHeuristic) + return false; + + auto isADDIInstr = [&] (const MachineInstr &Inst) { + return Inst.getOpcode() == PPC::ADDI || Inst.getOpcode() == PPC::ADDI8; + }; + + SchedCandidate &FirstCand = Zone.isTop() ? TryCand : Cand; + SchedCandidate &SecondCand = Zone.isTop() ? Cand : TryCand; + if (isADDIInstr(*FirstCand.SU->getInstr()) && + SecondCand.SU->getInstr()->mayLoad()) { + TryCand.Reason = Stall; + return true; + } + if (FirstCand.SU->getInstr()->mayLoad() && + isADDIInstr(*SecondCand.SU->getInstr())) { + TryCand.Reason = NoCand; + return true; + } + + return false; +} + +void PPCPreRASchedStrategy::tryCandidate(SchedCandidate &Cand, + SchedCandidate &TryCand, + SchedBoundary *Zone) const { + GenericScheduler::tryCandidate(Cand, TryCand, Zone); + + if (!Cand.isValid() || !Zone) + return; + + // Add powerpc specific heuristic only when TryCand isn't selected or + // selected as node order. + if (TryCand.Reason != NodeOrder && TryCand.Reason != NoCand) + return; + + // There are some benefits to schedule the ADDI before the load to hide the + // latency, as RA may create a true dependency between the load and addi. + if (biasAddiLoadCandidate(Cand, TryCand, *Zone)) + return; +} + void PPCPostRASchedStrategy::enterMBB(MachineBasicBlock *MBB) { // Custom PPC PostRA specific behavior here. PostGenericScheduler::enterMBB(MBB); diff --git a/lib/Target/PowerPC/PPCMachineScheduler.h b/lib/Target/PowerPC/PPCMachineScheduler.h index ea6d3ffbb26..93532d9545a 100644 --- a/lib/Target/PowerPC/PPCMachineScheduler.h +++ b/lib/Target/PowerPC/PPCMachineScheduler.h @@ -22,6 +22,13 @@ class PPCPreRASchedStrategy : public GenericScheduler { public: PPCPreRASchedStrategy(const MachineSchedContext *C) : GenericScheduler(C) {} +protected: + void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, + SchedBoundary *Zone) const override; +private: + bool biasAddiLoadCandidate(SchedCandidate &Cand, + SchedCandidate &TryCand, + SchedBoundary &Zone) const; }; /// A MachineSchedStrategy implementation for PowerPC post RA scheduling. diff --git a/test/CodeGen/PowerPC/schedule-addi-load.mir b/test/CodeGen/PowerPC/schedule-addi-load.mir index f0c9ea66f6b..f9820062cfd 100644 --- a/test/CodeGen/PowerPC/schedule-addi-load.mir +++ b/test/CodeGen/PowerPC/schedule-addi-load.mir @@ -1,4 +1,7 @@ # RUN: llc -mcpu=pwr9 -mtriple powerpc64le-unknown-linux-gnu -start-before machine-scheduler -stop-after machine-scheduler -verify-machineinstrs %s -o - | FileCheck %s +# RUN: llc -mcpu=pwr9 -mtriple powerpc64le-unknown-linux-gnu -disable-ppc-sched-addi-load -start-before machine-scheduler -stop-after machine-scheduler \ +# RUN: -verify-machineinstrs %s -o - | FileCheck --check-prefix=CHECK-DISABLE %s +# RUN: llc -mcpu=pwr8 -mtriple powerpc64le-unknown-linux-gnu -start-before machine-scheduler -stop-after machine-scheduler -verify-machineinstrs %s -o - | FileCheck --check-prefix=CHECK-P8 %s # Test that if the scheduler moves the addi before the load. --- | @@ -93,11 +96,25 @@ body: | B %bb.2 ; CHECK-LABEL: foo ; CHECK: %5:g8rc_and_g8rc_nox0 = RLDICL %0, 0, 32 + ; CHECK-NEXT: %9:g8rc = ADDI8 %5, 1 ; CHECK-NEXT: %6:gprc = LBZX %2, %5 :: (load 1 from %ir.arrayidx) ; CHECK-NEXT: %7:gprc = LBZX %3, %5 :: (load 1 from %ir.arrayidx4) - ; CHECK-NEXT: %9:g8rc = ADDI8 %5, 1 ; CHECK-NEXT: %8:crrc = CMPLW %6, %7 ; CHECK-NEXT: BCC 76, %8 + ; CHECK-DISABLE-LABEL: foo + ; CHECK-DISABLE: %5:g8rc_and_g8rc_nox0 = RLDICL %0, 0, 32 + ; CHECK-DISABLE-NEXT: %6:gprc = LBZX %2, %5 :: (load 1 from %ir.arrayidx) + ; CHECK-DISABLE-NEXT: %7:gprc = LBZX %3, %5 :: (load 1 from %ir.arrayidx4) + ; CHECK-DISABLE-NEXT: %9:g8rc = ADDI8 %5, 1 + ; CHECK-DISABLE-NEXT: %8:crrc = CMPLW %6, %7 + ; CHECK-DISABLE-NEXT: BCC 76, %8 + ; CHECK-P8-LABEL: foo + ; CHECK-P8: %5:g8rc_and_g8rc_nox0 = RLDICL %0, 0, 32 + ; CHECK-P8-NEXT: %6:gprc = LBZX %2, %5 :: (load 1 from %ir.arrayidx) + ; CHECK-P8-NEXT: %7:gprc = LBZX %3, %5 :: (load 1 from %ir.arrayidx4) + ; CHECK-P8-NEXT: %8:crrc = CMPLW %6, %7 + ; CHECK-P8-NEXT: %9:g8rc = ADDI8 %5, 1 + ; CHECK-P8-NEXT: BCC 76, %8 bb.2.while.end: $x3 = COPY %0