From 76020ed6f33e3b3943b25c8b5e111afece086f5c Mon Sep 17 00:00:00 2001 From: Kalle Raiskila Date: Tue, 11 Jan 2011 09:07:54 +0000 Subject: [PATCH] Add a "nop filler" pass to SPU. Filling no-ops is done just before emitting of assembly, when the instruction stream is final. No-ops are inserted to align the instructions so the dual-issue of the pipeline is utilized. This speeds up generated code with a minimum of 1% on a select set of algorithms. This pass may be redundant if the instruction scheduler and all subsequent passes that modify the instruction stream (prolog+epilog inserter, register scavenger, are there others?) are made aware of the instruction alignments. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@123226 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/CellSPU/CMakeLists.txt | 1 + lib/Target/CellSPU/SPU.h | 1 + lib/Target/CellSPU/SPUInstrInfo.td | 2 +- lib/Target/CellSPU/SPUNopFiller.cpp | 153 ++++++++++++++++++++++++ lib/Target/CellSPU/SPUTargetMachine.cpp | 9 ++ lib/Target/CellSPU/SPUTargetMachine.h | 1 + 6 files changed, 166 insertions(+), 1 deletion(-) create mode 100644 lib/Target/CellSPU/SPUNopFiller.cpp diff --git a/lib/Target/CellSPU/CMakeLists.txt b/lib/Target/CellSPU/CMakeLists.txt index f2183111ab5..633bdf65fde 100644 --- a/lib/Target/CellSPU/CMakeLists.txt +++ b/lib/Target/CellSPU/CMakeLists.txt @@ -23,4 +23,5 @@ add_llvm_target(CellSPUCodeGen SPUSubtarget.cpp SPUTargetMachine.cpp SPUSelectionDAGInfo.cpp + SPUNopFiller.cpp ) diff --git a/lib/Target/CellSPU/SPU.h b/lib/Target/CellSPU/SPU.h index 1f215113b40..72f84300b2c 100644 --- a/lib/Target/CellSPU/SPU.h +++ b/lib/Target/CellSPU/SPU.h @@ -23,6 +23,7 @@ namespace llvm { class formatted_raw_ostream; FunctionPass *createSPUISelDag(SPUTargetMachine &TM); + FunctionPass *createSPUNopFillerPass(SPUTargetMachine &tm); extern Target TheCellSPUTarget; } diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td index 4095951c24c..4f59e0607e2 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.td +++ b/lib/Target/CellSPU/SPUInstrInfo.td @@ -4216,7 +4216,7 @@ def : Pat<(fabs (v4f32 VECREG:$rA)), // in the odd pipeline) //===----------------------------------------------------------------------===// -def ENOP : SPUInstr<(outs), (ins), "enop", ExecNOP> { +def ENOP : SPUInstr<(outs), (ins), "nop", ExecNOP> { let Pattern = []; let Inst{0-10} = 0b10000000010; diff --git a/lib/Target/CellSPU/SPUNopFiller.cpp b/lib/Target/CellSPU/SPUNopFiller.cpp new file mode 100644 index 00000000000..54a5925d80f --- /dev/null +++ b/lib/Target/CellSPU/SPUNopFiller.cpp @@ -0,0 +1,153 @@ +//===-- SPUNopFiller.cpp - Add nops/lnops to align the pipelines---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The final pass just before assembly printing. This pass is the last +// checkpoint where nops and lnops are added to the instruction stream to +// satisfy the dual issue requirements. The actual dual issue scheduling is +// done (TODO: nowhere, currently) +// +//===----------------------------------------------------------------------===// + +#include "SPU.h" +#include "SPUTargetMachine.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { + struct SPUNopFiller : public MachineFunctionPass { + + TargetMachine &TM; + const TargetInstrInfo *TII; + const InstrItineraryData *IID; + bool isEvenPlace; // the instruction slot (mem address) at hand is even/odd + + static char ID; + SPUNopFiller(TargetMachine &tm) + : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()), + IID(tm.getInstrItineraryData()) + { + DEBUG( dbgs() << "********** SPU Nop filler **********\n" ; ); + } + + virtual const char *getPassName() const { + return "SPU nop/lnop Filler"; + } + + void runOnMachineBasicBlock(MachineBasicBlock &MBB); + + bool runOnMachineFunction(MachineFunction &F) { + isEvenPlace = true; //all functions get an .align 3 directive at start + for (MachineFunction::iterator FI = F.begin(), FE = F.end(); + FI != FE; ++FI) + runOnMachineBasicBlock(*FI); + return true; //never-ever do any more modifications, just print it! + } + + typedef enum { none = 0, // no more instructions in this function / BB + pseudo = 1, // this does not get executed + even = 2, + odd = 3 } SPUOpPlace; + SPUOpPlace getOpPlacement( MachineInstr &instr ); + + }; + char SPUNopFiller::ID = 0; + +} + +// Fill a BasicBlock to alignment. +// In the assebly we align the functions to 'even' adresses, but +// basic blocks have an implicit alignmnet. We hereby define +// basic blocks to have the same, even, alignment. +void SPUNopFiller:: +runOnMachineBasicBlock(MachineBasicBlock &MBB) +{ + assert( isEvenPlace && "basic block start from odd address"); + for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) + { + SPUOpPlace this_optype, next_optype; + MachineBasicBlock::iterator J = I; + J++; + + this_optype = getOpPlacement( *I ); + next_optype = none; + while (J!=MBB.end()){ + next_optype = getOpPlacement( *J ); + ++J; + if (next_optype != pseudo ) + break; + } + + // padd: odd(wrong), even(wrong), ... + // to: nop(corr), odd(corr), even(corr)... + if( isEvenPlace && this_optype == odd && next_optype == even ) { + DEBUG( dbgs() <<"Adding NOP before: "; ); + DEBUG( I->dump(); ); + BuildMI(MBB, I, I->getDebugLoc(), TII->get(SPU::ENOP)); + isEvenPlace=false; + } + + // padd: even(wrong), odd(wrong), ... + // to: lnop(corr), even(corr), odd(corr)... + else if ( !isEvenPlace && this_optype == even && next_optype == odd){ + DEBUG( dbgs() <<"Adding LNOP before: "; ); + DEBUG( I->dump(); ); + BuildMI(MBB, I, I->getDebugLoc(), TII->get(SPU::LNOP)); + isEvenPlace=true; + } + + // now go to next mem slot + if( this_optype != pseudo ) + isEvenPlace = !isEvenPlace; + + } + + // padd basicblock end + if( !isEvenPlace ){ + MachineBasicBlock::iterator J = MBB.end(); + J--; + if (getOpPlacement( *J ) == odd) { + DEBUG( dbgs() <<"Padding basic block with NOP\n"; ); + BuildMI(MBB, J, J->getDebugLoc(), TII->get(SPU::ENOP)); + } + else { + J++; + DEBUG( dbgs() <<"Padding basic block with LNOP\n"; ); + BuildMI(MBB, J, J->getDebugLoc(), TII->get(SPU::LNOP)); + } + isEvenPlace=true; + } +} + +FunctionPass *llvm::createSPUNopFillerPass(SPUTargetMachine &tm) { + return new SPUNopFiller(tm); +} + +// Figure out if 'instr' is executed in the even or odd pipeline +SPUNopFiller::SPUOpPlace +SPUNopFiller::getOpPlacement( MachineInstr &instr ) { + int sc = instr.getDesc().getSchedClass(); + const InstrStage *stage = IID->beginStage(sc); + unsigned FUs = stage->getUnits(); + SPUOpPlace retval; + + switch( FUs ) { + case 0: retval = pseudo; break; + case 1: retval = odd; break; + case 2: retval = even; break; + default: retval= pseudo; + assert( false && "got unknown FuncUnit\n"); + break; + }; + return retval; +} diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp index 3423c6928a9..3ed73613a31 100644 --- a/lib/Target/CellSPU/SPUTargetMachine.cpp +++ b/lib/Target/CellSPU/SPUTargetMachine.cpp @@ -59,3 +59,12 @@ bool SPUTargetMachine::addInstSelector(PassManagerBase &PM, PM.add(createSPUISelDag(*this)); return false; } + +// passes to run just before printing the assembly +bool SPUTargetMachine:: +addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) +{ + //align instructions with nops/lnops for dual issue + PM.add(createSPUNopFillerPass(*this)); + return true; +} diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h index 6e467576124..75abd5eb3fc 100644 --- a/lib/Target/CellSPU/SPUTargetMachine.h +++ b/lib/Target/CellSPU/SPUTargetMachine.h @@ -82,6 +82,7 @@ public: // Pass Pipeline Configuration virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); + virtual bool addPreEmitPass(PassManagerBase &, CodeGenOpt::Level); }; } // end namespace llvm