llvm/lib/Target/PowerPC/PPCQPXLoadSplat.cpp
Hal Finkel dd0687afa4 [PowerPC/QPX] Fix the load/splat peephole with overlapping reads
If, in between the splat and the load (which does an implicit splat), there is
a read of the splat register, then that register must have another earlier
definition. In that case, we can't replace the load's destination register with
the splat's destination register.

Unfortunately, I don't have a small or non-fragile test case.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@268152 91177308-0d34-0410-b5e6-96231b3b80d8
2016-04-30 01:59:28 +00:00

168 lines
5.3 KiB
C++

//===----- PPCQPXLoadSplat.cpp - QPX Load Splat Simplification ------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// The QPX vector registers overlay the scalar floating-point registers, and
// any scalar floating-point loads splat their value across all vector lanes.
// Thus, if we have a scalar load followed by a splat, we can remove the splat
// (i.e. replace the load with a load-and-splat pseudo instruction).
//
// This pass must run after anything that might do store-to-load forwarding.
//
//===----------------------------------------------------------------------===//
#include "PPC.h"
#include "PPCInstrBuilder.h"
#include "PPCInstrInfo.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "ppc-qpx-load-splat"
STATISTIC(NumSimplified, "Number of QPX load splats simplified");
namespace llvm {
void initializePPCQPXLoadSplatPass(PassRegistry&);
}
namespace {
struct PPCQPXLoadSplat : public MachineFunctionPass {
static char ID;
PPCQPXLoadSplat() : MachineFunctionPass(ID) {
initializePPCQPXLoadSplatPass(*PassRegistry::getPassRegistry());
}
bool runOnMachineFunction(MachineFunction &Fn) override;
const char *getPassName() const override {
return "PowerPC QPX Load Splat Simplification";
}
};
char PPCQPXLoadSplat::ID = 0;
}
INITIALIZE_PASS(PPCQPXLoadSplat, "ppc-qpx-load-splat",
"PowerPC QPX Load Splat Simplification",
false, false)
FunctionPass *llvm::createPPCQPXLoadSplatPass() {
return new PPCQPXLoadSplat();
}
bool PPCQPXLoadSplat::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(*MF.getFunction()))
return false;
bool MadeChange = false;
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
for (auto MFI = MF.begin(), MFIE = MF.end(); MFI != MFIE; ++MFI) {
MachineBasicBlock *MBB = &*MFI;
SmallVector<MachineInstr *, 4> Splats;
for (auto MBBI = MBB->rbegin(); MBBI != MBB->rend(); ++MBBI) {
MachineInstr *MI = &*MBBI;
if (MI->hasUnmodeledSideEffects() || MI->isCall()) {
Splats.clear();
continue;
}
// We're looking for a sequence like this:
// %F0<def> = LFD 0, %X3<kill>, %QF0<imp-def>; mem:LD8[%a](tbaa=!2)
// %QF1<def> = QVESPLATI %QF0<kill>, 0, %RM<imp-use>
for (auto SI = Splats.begin(); SI != Splats.end();) {
MachineInstr *SMI = *SI;
unsigned SplatReg = SMI->getOperand(0).getReg();
unsigned SrcReg = SMI->getOperand(1).getReg();
if (MI->modifiesRegister(SrcReg, TRI)) {
switch (MI->getOpcode()) {
default:
SI = Splats.erase(SI);
continue;
case PPC::LFS:
case PPC::LFD:
case PPC::LFSU:
case PPC::LFDU:
case PPC::LFSUX:
case PPC::LFDUX:
case PPC::LFSX:
case PPC::LFDX:
case PPC::LFIWAX:
case PPC::LFIWZX:
if (SplatReg != SrcReg) {
// We need to change the load to define the scalar subregister of
// the QPX splat source register.
unsigned SubRegIndex =
TRI->getSubRegIndex(SrcReg, MI->getOperand(0).getReg());
unsigned SplatSubReg = TRI->getSubReg(SplatReg, SubRegIndex);
// Substitute both the explicit defined register, and also the
// implicit def of the containing QPX register.
MI->getOperand(0).setReg(SplatSubReg);
MI->substituteRegister(SrcReg, SplatReg, 0, *TRI);
}
SI = Splats.erase(SI);
// If SMI is directly after MI, then MBBI's base iterator is
// pointing at SMI. Adjust MBBI around the call to erase SMI to
// avoid invalidating MBBI.
++MBBI;
SMI->eraseFromParent();
--MBBI;
++NumSimplified;
MadeChange = true;
continue;
}
}
// If this instruction defines the splat register, then we cannot move
// the previous definition above it. If it reads from the splat
// register, then it must already be alive from some previous
// definition, and if the splat register is different from the source
// register, then this definition must not be the load for which we're
// searching.
if (MI->modifiesRegister(SplatReg, TRI) ||
(SrcReg != SplatReg &&
MI->readsRegister(SplatReg, TRI))) {
SI = Splats.erase(SI);
continue;
}
++SI;
}
if (MI->getOpcode() != PPC::QVESPLATI &&
MI->getOpcode() != PPC::QVESPLATIs &&
MI->getOpcode() != PPC::QVESPLATIb)
continue;
if (MI->getOperand(2).getImm() != 0)
continue;
// If there are other uses of the scalar value after this, replacing
// those uses might be non-trivial.
if (!MI->getOperand(1).isKill())
continue;
Splats.push_back(MI);
}
}
return MadeChange;
}