2005-10-16 05:39:50 +00:00
|
|
|
//===-- PPCTargetMachine.cpp - Define TargetMachine for PowerPC -----------===//
|
2005-04-21 23:30:14 +00:00
|
|
|
//
|
2004-06-21 16:55:25 +00:00
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
2007-12-29 20:36:04 +00:00
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
2005-04-21 23:30:14 +00:00
|
|
|
//
|
2004-06-21 16:55:25 +00:00
|
|
|
//===----------------------------------------------------------------------===//
|
2005-04-21 23:30:14 +00:00
|
|
|
//
|
2005-08-15 23:47:04 +00:00
|
|
|
// Top-level implementation for the PowerPC target.
|
2004-06-21 16:55:25 +00:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2005-10-14 23:59:06 +00:00
|
|
|
#include "PPCTargetMachine.h"
|
2012-03-17 18:46:09 +00:00
|
|
|
#include "PPC.h"
|
2012-02-03 05:12:41 +00:00
|
|
|
#include "llvm/CodeGen/Passes.h"
|
2012-12-03 16:50:05 +00:00
|
|
|
#include "llvm/MC/MCStreamer.h"
|
|
|
|
#include "llvm/PassManager.h"
|
2012-06-08 15:38:21 +00:00
|
|
|
#include "llvm/Support/CommandLine.h"
|
2009-07-14 20:18:05 +00:00
|
|
|
#include "llvm/Support/FormattedStream.h"
|
2011-08-24 18:08:43 +00:00
|
|
|
#include "llvm/Support/TargetRegistry.h"
|
2012-12-03 16:50:05 +00:00
|
|
|
#include "llvm/Target/TargetOptions.h"
|
2004-06-21 16:55:25 +00:00
|
|
|
using namespace llvm;
|
|
|
|
|
2012-06-08 15:38:21 +00:00
|
|
|
static cl::
|
2012-06-08 19:19:53 +00:00
|
|
|
opt<bool> DisableCTRLoops("disable-ppc-ctrloops", cl::Hidden,
|
|
|
|
cl::desc("Disable CTR loops for PPC"));
|
2012-06-08 15:38:21 +00:00
|
|
|
|
2009-07-25 06:49:55 +00:00
|
|
|
extern "C" void LLVMInitializePowerPCTarget() {
|
|
|
|
// Register the targets
|
2012-02-03 05:12:30 +00:00
|
|
|
RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target);
|
2009-07-25 06:49:55 +00:00
|
|
|
RegisterTargetMachine<PPC64TargetMachine> B(ThePPC64Target);
|
2013-07-26 01:35:43 +00:00
|
|
|
RegisterTargetMachine<PPC64TargetMachine> C(ThePPC64LETarget);
|
2009-07-25 06:49:55 +00:00
|
|
|
}
|
2009-06-16 20:12:29 +00:00
|
|
|
|
2011-07-19 06:37:02 +00:00
|
|
|
PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT,
|
|
|
|
StringRef CPU, StringRef FS,
|
2011-12-02 22:16:29 +00:00
|
|
|
const TargetOptions &Options,
|
2011-07-20 07:51:56 +00:00
|
|
|
Reloc::Model RM, CodeModel::Model CM,
|
2011-11-16 08:38:26 +00:00
|
|
|
CodeGenOpt::Level OL,
|
2011-07-20 07:51:56 +00:00
|
|
|
bool is64Bit)
|
2011-12-02 22:16:29 +00:00
|
|
|
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
|
2011-06-30 01:53:36 +00:00
|
|
|
Subtarget(TT, CPU, FS, is64Bit),
|
2012-10-08 16:38:25 +00:00
|
|
|
DL(Subtarget.getDataLayoutString()), InstrInfo(*this),
|
2011-01-10 12:39:04 +00:00
|
|
|
FrameLowering(Subtarget), JITInfo(*this, is64Bit),
|
2010-05-11 17:31:57 +00:00
|
|
|
TLInfo(*this), TSInfo(*this),
|
Switch TargetTransformInfo from an immutable analysis pass that requires
a TargetMachine to construct (and thus isn't always available), to an
analysis group that supports layered implementations much like
AliasAnalysis does. This is a pretty massive change, with a few parts
that I was unable to easily separate (sorry), so I'll walk through it.
The first step of this conversion was to make TargetTransformInfo an
analysis group, and to sink the nonce implementations in
ScalarTargetTransformInfo and VectorTargetTranformInfo into
a NoTargetTransformInfo pass. This allows other passes to add a hard
requirement on TTI, and assume they will always get at least on
implementation.
The TargetTransformInfo analysis group leverages the delegation chaining
trick that AliasAnalysis uses, where the base class for the analysis
group delegates to the previous analysis *pass*, allowing all but tho
NoFoo analysis passes to only implement the parts of the interfaces they
support. It also introduces a new trick where each pass in the group
retains a pointer to the top-most pass that has been initialized. This
allows passes to implement one API in terms of another API and benefit
when some other pass above them in the stack has more precise results
for the second API.
The second step of this conversion is to create a pass that implements
the TargetTransformInfo analysis using the target-independent
abstractions in the code generator. This replaces the
ScalarTargetTransformImpl and VectorTargetTransformImpl classes in
lib/Target with a single pass in lib/CodeGen called
BasicTargetTransformInfo. This class actually provides most of the TTI
functionality, basing it upon the TargetLowering abstraction and other
information in the target independent code generator.
The third step of the conversion adds support to all TargetMachines to
register custom analysis passes. This allows building those passes with
access to TargetLowering or other target-specific classes, and it also
allows each target to customize the set of analysis passes desired in
the pass manager. The baseline LLVMTargetMachine implements this
interface to add the BasicTTI pass to the pass manager, and all of the
tools that want to support target-aware TTI passes call this routine on
whatever target machine they end up with to add the appropriate passes.
The fourth step of the conversion created target-specific TTI analysis
passes for the X86 and ARM backends. These passes contain the custom
logic that was previously in their extensions of the
ScalarTargetTransformInfo and VectorTargetTransformInfo interfaces.
I separated them into their own file, as now all of the interface bits
are private and they just expose a function to create the pass itself.
Then I extended these target machines to set up a custom set of analysis
passes, first adding BasicTTI as a fallback, and then adding their
customized TTI implementations.
The fourth step required logic that was shared between the target
independent layer and the specific targets to move to a different
interface, as they no longer derive from each other. As a consequence,
a helper functions were added to TargetLowering representing the common
logic needed both in the target implementation and the codegen
implementation of the TTI pass. While technically this is the only
change that could have been committed separately, it would have been
a nightmare to extract.
The final step of the conversion was just to delete all the old
boilerplate. This got rid of the ScalarTargetTransformInfo and
VectorTargetTransformInfo classes, all of the support in all of the
targets for producing instances of them, and all of the support in the
tools for manually constructing a pass based around them.
Now that TTI is a relatively normal analysis group, two things become
straightforward. First, we can sink it into lib/Analysis which is a more
natural layer for it to live. Second, clients of this interface can
depend on it *always* being available which will simplify their code and
behavior. These (and other) simplifications will follow in subsequent
commits, this one is clearly big enough.
Finally, I'm very aware that much of the comments and documentation
needs to be updated. As soon as I had this working, and plausibly well
commented, I wanted to get it committed and in front of the build bots.
I'll be doing a few passes over documentation later if it sticks.
Commits to update DragonEgg and Clang will be made presently.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171681 91177308-0d34-0410-b5e6-96231b3b80d8
2013-01-07 01:37:14 +00:00
|
|
|
InstrItins(Subtarget.getInstrItineraryData()) {
|
2012-04-02 19:09:04 +00:00
|
|
|
|
|
|
|
// The binutils for the BG/P are too old for CFI.
|
|
|
|
if (Subtarget.isBGP())
|
|
|
|
setMCUseCFI(false);
|
2013-05-13 01:16:13 +00:00
|
|
|
initAsmInfo();
|
2005-10-16 05:39:50 +00:00
|
|
|
}
|
|
|
|
|
2011-12-20 02:50:00 +00:00
|
|
|
void PPC32TargetMachine::anchor() { }
|
|
|
|
|
2012-02-03 05:12:30 +00:00
|
|
|
PPC32TargetMachine::PPC32TargetMachine(const Target &T, StringRef TT,
|
2011-07-20 07:51:56 +00:00
|
|
|
StringRef CPU, StringRef FS,
|
2011-12-02 22:16:29 +00:00
|
|
|
const TargetOptions &Options,
|
2011-11-16 08:38:26 +00:00
|
|
|
Reloc::Model RM, CodeModel::Model CM,
|
|
|
|
CodeGenOpt::Level OL)
|
2011-12-02 22:16:29 +00:00
|
|
|
: PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {
|
2006-06-16 01:37:27 +00:00
|
|
|
}
|
|
|
|
|
2011-12-20 02:50:00 +00:00
|
|
|
void PPC64TargetMachine::anchor() { }
|
2006-06-16 01:37:27 +00:00
|
|
|
|
2012-02-03 05:12:30 +00:00
|
|
|
PPC64TargetMachine::PPC64TargetMachine(const Target &T, StringRef TT,
|
2011-07-20 07:51:56 +00:00
|
|
|
StringRef CPU, StringRef FS,
|
2011-12-02 22:16:29 +00:00
|
|
|
const TargetOptions &Options,
|
2011-11-16 08:38:26 +00:00
|
|
|
Reloc::Model RM, CodeModel::Model CM,
|
|
|
|
CodeGenOpt::Level OL)
|
2011-12-02 22:16:29 +00:00
|
|
|
: PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {
|
2006-06-16 01:37:27 +00:00
|
|
|
}
|
|
|
|
|
2004-08-11 07:40:04 +00:00
|
|
|
|
2006-09-04 04:14:57 +00:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Pass Pipeline Configuration
|
|
|
|
//===----------------------------------------------------------------------===//
|
2004-08-11 07:40:04 +00:00
|
|
|
|
2012-02-03 05:12:41 +00:00
|
|
|
namespace {
|
|
|
|
/// PPC Code Generator Pass Configuration Options.
|
|
|
|
class PPCPassConfig : public TargetPassConfig {
|
|
|
|
public:
|
2012-02-04 02:56:59 +00:00
|
|
|
PPCPassConfig(PPCTargetMachine *TM, PassManagerBase &PM)
|
|
|
|
: TargetPassConfig(TM, PM) {}
|
2012-02-03 05:12:41 +00:00
|
|
|
|
|
|
|
PPCTargetMachine &getPPCTargetMachine() const {
|
|
|
|
return getTM<PPCTargetMachine>();
|
|
|
|
}
|
|
|
|
|
2013-04-05 23:29:01 +00:00
|
|
|
const PPCSubtarget &getPPCSubtarget() const {
|
|
|
|
return *getPPCTargetMachine().getSubtargetImpl();
|
|
|
|
}
|
|
|
|
|
Implement PPC counter loops as a late IR-level pass
The old PPCCTRLoops pass, like the Hexagon pass version from which it was
derived, could only handle some simple loops in canonical form. We cannot
directly adapt the new Hexagon hardware loops pass, however, because the
Hexagon pass contains a fundamental assumption that non-constant-trip-count
loops will contain a guard, and this is not always true (the result being that
incorrect negative counts can be generated). With this commit, we replace the
pass with a late IR-level pass which makes use of SE to calculate the
backedge-taken counts and safely generate the loop-count expressions (including
any necessary max() parts). This IR level pass inserts custom intrinsics that
are lowered into the desired decrement-and-branch instructions.
The most fragile part of this new implementation is that interfering uses of
the counter register must be detected on the IR level (and, on PPC, this also
includes any indirect branches in addition to function calls). Also, to make
all of this work, we need a variant of the mtctr instruction that is marked
as having side effects. Without this, machine-code level CSE, DCE, etc.
illegally transform the resulting code. Hopefully, this can be improved
in the future.
This new pass is smaller than the original (and much smaller than the new
Hexagon hardware loops pass), and can handle many additional cases correctly.
In addition, the preheader-creation code has been copied from LoopSimplify, and
after we decide on where it belongs, this code will be refactored so that it
can be explicitly shared (making this implementation even smaller).
The new test-case files ctrloop-{le,lt,ne}.ll have been adapted from tests for
the new Hexagon pass. There are a few classes of loops that this pass does not
transform (noted by FIXMEs in the files), but these deficiencies can be
addressed within the SE infrastructure (thus helping many other passes as well).
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@181927 91177308-0d34-0410-b5e6-96231b3b80d8
2013-05-15 21:37:41 +00:00
|
|
|
virtual bool addPreISel();
|
2013-04-05 23:29:01 +00:00
|
|
|
virtual bool addILPOpts();
|
2012-02-03 05:12:41 +00:00
|
|
|
virtual bool addInstSelector();
|
2013-04-09 22:58:37 +00:00
|
|
|
virtual bool addPreSched2();
|
2012-02-03 05:12:41 +00:00
|
|
|
virtual bool addPreEmitPass();
|
|
|
|
};
|
|
|
|
} // namespace
|
|
|
|
|
2012-02-04 02:56:59 +00:00
|
|
|
TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) {
|
2012-06-09 03:14:50 +00:00
|
|
|
return new PPCPassConfig(this, PM);
|
2012-02-03 05:12:41 +00:00
|
|
|
}
|
|
|
|
|
Implement PPC counter loops as a late IR-level pass
The old PPCCTRLoops pass, like the Hexagon pass version from which it was
derived, could only handle some simple loops in canonical form. We cannot
directly adapt the new Hexagon hardware loops pass, however, because the
Hexagon pass contains a fundamental assumption that non-constant-trip-count
loops will contain a guard, and this is not always true (the result being that
incorrect negative counts can be generated). With this commit, we replace the
pass with a late IR-level pass which makes use of SE to calculate the
backedge-taken counts and safely generate the loop-count expressions (including
any necessary max() parts). This IR level pass inserts custom intrinsics that
are lowered into the desired decrement-and-branch instructions.
The most fragile part of this new implementation is that interfering uses of
the counter register must be detected on the IR level (and, on PPC, this also
includes any indirect branches in addition to function calls). Also, to make
all of this work, we need a variant of the mtctr instruction that is marked
as having side effects. Without this, machine-code level CSE, DCE, etc.
illegally transform the resulting code. Hopefully, this can be improved
in the future.
This new pass is smaller than the original (and much smaller than the new
Hexagon hardware loops pass), and can handle many additional cases correctly.
In addition, the preheader-creation code has been copied from LoopSimplify, and
after we decide on where it belongs, this code will be refactored so that it
can be explicitly shared (making this implementation even smaller).
The new test-case files ctrloop-{le,lt,ne}.ll have been adapted from tests for
the new Hexagon pass. There are a few classes of loops that this pass does not
transform (noted by FIXMEs in the files), but these deficiencies can be
addressed within the SE infrastructure (thus helping many other passes as well).
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@181927 91177308-0d34-0410-b5e6-96231b3b80d8
2013-05-15 21:37:41 +00:00
|
|
|
bool PPCPassConfig::addPreISel() {
|
2012-06-08 19:19:53 +00:00
|
|
|
if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
|
Implement PPC counter loops as a late IR-level pass
The old PPCCTRLoops pass, like the Hexagon pass version from which it was
derived, could only handle some simple loops in canonical form. We cannot
directly adapt the new Hexagon hardware loops pass, however, because the
Hexagon pass contains a fundamental assumption that non-constant-trip-count
loops will contain a guard, and this is not always true (the result being that
incorrect negative counts can be generated). With this commit, we replace the
pass with a late IR-level pass which makes use of SE to calculate the
backedge-taken counts and safely generate the loop-count expressions (including
any necessary max() parts). This IR level pass inserts custom intrinsics that
are lowered into the desired decrement-and-branch instructions.
The most fragile part of this new implementation is that interfering uses of
the counter register must be detected on the IR level (and, on PPC, this also
includes any indirect branches in addition to function calls). Also, to make
all of this work, we need a variant of the mtctr instruction that is marked
as having side effects. Without this, machine-code level CSE, DCE, etc.
illegally transform the resulting code. Hopefully, this can be improved
in the future.
This new pass is smaller than the original (and much smaller than the new
Hexagon hardware loops pass), and can handle many additional cases correctly.
In addition, the preheader-creation code has been copied from LoopSimplify, and
after we decide on where it belongs, this code will be refactored so that it
can be explicitly shared (making this implementation even smaller).
The new test-case files ctrloop-{le,lt,ne}.ll have been adapted from tests for
the new Hexagon pass. There are a few classes of loops that this pass does not
transform (noted by FIXMEs in the files), but these deficiencies can be
addressed within the SE infrastructure (thus helping many other passes as well).
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@181927 91177308-0d34-0410-b5e6-96231b3b80d8
2013-05-15 21:37:41 +00:00
|
|
|
addPass(createPPCCTRLoops(getPPCTargetMachine()));
|
2012-06-08 15:38:21 +00:00
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2013-04-05 23:29:01 +00:00
|
|
|
bool PPCPassConfig::addILPOpts() {
|
|
|
|
if (getPPCSubtarget().hasISEL()) {
|
|
|
|
addPass(&EarlyIfConverterID);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2012-02-03 05:12:41 +00:00
|
|
|
bool PPCPassConfig::addInstSelector() {
|
2005-08-17 19:33:30 +00:00
|
|
|
// Install an instruction selector.
|
2012-07-02 19:48:31 +00:00
|
|
|
addPass(createPPCISelDag(getPPCTargetMachine()));
|
Add a PPCCTRLoops verification pass
When asserts are enabled, this adds a verification pass for PPC counter-loop
formation. Unfortunately, without sacrificing code quality, there is no better
way of forming counter-based loops except at the (late) IR level. This means
that we need to recognize, at the IR level, anything which might turn into a
function call (or indirect branch). Because this is currently a finite set of
things, and because SelectionDAG lowering is basic-block local, this can be
done. Nevertheless, it is fragile, and failure results in a miscompile. This
verification pass checks that all (reachable) counter-based branches are
dominated by a loop mtctr instruction, and that no instructions in between
clobber the counter register. If these conditions are not satisfied, then an
ICE will be triggered.
In short, this is to help us sleep better at night.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@182295 91177308-0d34-0410-b5e6-96231b3b80d8
2013-05-20 16:08:17 +00:00
|
|
|
|
|
|
|
#ifndef NDEBUG
|
|
|
|
if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
|
|
|
|
addPass(createPPCCTRLoopsVerify());
|
|
|
|
#endif
|
|
|
|
|
2006-09-04 04:14:57 +00:00
|
|
|
return false;
|
|
|
|
}
|
2004-08-11 07:40:04 +00:00
|
|
|
|
2013-04-09 22:58:37 +00:00
|
|
|
bool PPCPassConfig::addPreSched2() {
|
|
|
|
if (getOptLevel() != CodeGenOpt::None)
|
|
|
|
addPass(&IfConverterID);
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2012-02-03 05:12:41 +00:00
|
|
|
bool PPCPassConfig::addPreEmitPass() {
|
2013-04-08 16:24:03 +00:00
|
|
|
if (getOptLevel() != CodeGenOpt::None)
|
|
|
|
addPass(createPPCEarlyReturnPass());
|
2006-09-04 04:14:57 +00:00
|
|
|
// Must run branch selection immediately preceding the asm printer.
|
2012-07-02 19:48:31 +00:00
|
|
|
addPass(createPPCBranchSelectionPass());
|
2004-08-11 07:40:04 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2009-05-30 20:51:52 +00:00
|
|
|
bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM,
|
2009-07-15 22:33:19 +00:00
|
|
|
JITCodeEmitter &JCE) {
|
2009-05-30 20:51:52 +00:00
|
|
|
// Inform the subtarget that we are in JIT mode. FIXME: does this break macho
|
|
|
|
// writing?
|
|
|
|
Subtarget.SetJITMode();
|
2012-02-03 05:12:30 +00:00
|
|
|
|
2009-05-30 20:51:52 +00:00
|
|
|
// Machine code emitter pass for PowerPC.
|
|
|
|
PM.add(createPPCJITCodeEmitterPass(*this, JCE));
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
2013-01-25 23:05:59 +00:00
|
|
|
|
|
|
|
void PPCTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
|
|
|
|
// Add first the target-independent BasicTTI pass, then our PPC pass. This
|
|
|
|
// allows the PPC pass to delegate to the target independent layer when
|
|
|
|
// appropriate.
|
2013-06-19 20:51:24 +00:00
|
|
|
PM.add(createBasicTargetTransformInfoPass(this));
|
2013-01-25 23:05:59 +00:00
|
|
|
PM.add(createPPCTargetTransformInfoPass(this));
|
|
|
|
}
|
|
|
|
|