2012-02-18 12:03:15 +00:00
|
|
|
//===-- PPCAsmPrinter.cpp - Print machine instrs to PowerPC assembly ------===//
|
2005-04-21 23:30:14 +00:00
|
|
|
//
|
2004-06-21 16:55:25 +00:00
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
2007-12-29 20:36:04 +00:00
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
2005-04-21 23:30:14 +00:00
|
|
|
//
|
2004-06-21 16:55:25 +00:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
2004-07-08 17:58:04 +00:00
|
|
|
// This file contains a printer that converts from our internal representation
|
|
|
|
// of machine-dependent LLVM code to PowerPC assembly language. This printer is
|
2004-07-28 20:18:53 +00:00
|
|
|
// the output mechanism used by `llc'.
|
2004-06-21 16:55:25 +00:00
|
|
|
//
|
2004-07-08 17:58:04 +00:00
|
|
|
// Documentation at http://developer.apple.com/documentation/DeveloperTools/
|
|
|
|
// Reference/Assembler/ASMIntroduction/chapter_1_section_1.html
|
2004-06-29 17:13:26 +00:00
|
|
|
//
|
2004-06-21 16:55:25 +00:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2004-06-24 17:31:42 +00:00
|
|
|
#define DEBUG_TYPE "asmprinter"
|
2005-10-14 23:51:18 +00:00
|
|
|
#include "PPC.h"
|
2012-03-17 18:46:09 +00:00
|
|
|
#include "InstPrinter/PPCInstPrinter.h"
|
2011-07-26 00:24:13 +00:00
|
|
|
#include "MCTargetDesc/PPCPredicates.h"
|
2013-05-23 22:26:41 +00:00
|
|
|
#include "MCTargetDesc/PPCMCExpr.h"
|
2012-12-03 16:50:05 +00:00
|
|
|
#include "PPCSubtarget.h"
|
|
|
|
#include "PPCTargetMachine.h"
|
|
|
|
#include "llvm/ADT/MapVector.h"
|
|
|
|
#include "llvm/ADT/SmallString.h"
|
|
|
|
#include "llvm/ADT/StringExtras.h"
|
2004-06-21 16:55:25 +00:00
|
|
|
#include "llvm/Assembly/Writer.h"
|
2004-08-16 23:25:21 +00:00
|
|
|
#include "llvm/CodeGen/AsmPrinter.h"
|
2004-06-24 17:31:42 +00:00
|
|
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
2004-06-21 16:55:25 +00:00
|
|
|
#include "llvm/CodeGen/MachineInstr.h"
|
2008-01-26 06:51:24 +00:00
|
|
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
2010-01-20 21:16:14 +00:00
|
|
|
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
|
2010-02-15 22:37:53 +00:00
|
|
|
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
|
2012-12-03 16:50:05 +00:00
|
|
|
#include "llvm/DebugInfo.h"
|
2013-01-02 11:36:10 +00:00
|
|
|
#include "llvm/IR/Constants.h"
|
|
|
|
#include "llvm/IR/DerivedTypes.h"
|
|
|
|
#include "llvm/IR/Module.h"
|
2009-09-13 17:14:04 +00:00
|
|
|
#include "llvm/MC/MCAsmInfo.h"
|
2010-01-13 19:00:57 +00:00
|
|
|
#include "llvm/MC/MCContext.h"
|
2010-03-11 23:39:44 +00:00
|
|
|
#include "llvm/MC/MCExpr.h"
|
2010-11-14 19:53:02 +00:00
|
|
|
#include "llvm/MC/MCInst.h"
|
2012-11-26 13:34:22 +00:00
|
|
|
#include "llvm/MC/MCInstBuilder.h"
|
2012-12-03 16:50:05 +00:00
|
|
|
#include "llvm/MC/MCSectionELF.h"
|
2009-08-10 18:15:01 +00:00
|
|
|
#include "llvm/MC/MCSectionMachO.h"
|
2009-08-19 05:49:37 +00:00
|
|
|
#include "llvm/MC/MCStreamer.h"
|
2009-09-13 17:14:04 +00:00
|
|
|
#include "llvm/MC/MCSymbol.h"
|
2010-11-14 19:53:02 +00:00
|
|
|
#include "llvm/Support/CommandLine.h"
|
2010-08-04 22:07:50 +00:00
|
|
|
#include "llvm/Support/Debug.h"
|
2012-12-03 16:50:05 +00:00
|
|
|
#include "llvm/Support/ELF.h"
|
2009-07-08 20:53:28 +00:00
|
|
|
#include "llvm/Support/ErrorHandling.h"
|
2012-12-03 16:50:05 +00:00
|
|
|
#include "llvm/Support/MathExtras.h"
|
2011-08-24 18:08:43 +00:00
|
|
|
#include "llvm/Support/TargetRegistry.h"
|
2010-04-04 08:18:47 +00:00
|
|
|
#include "llvm/Support/raw_ostream.h"
|
2012-12-03 16:50:05 +00:00
|
|
|
#include "llvm/Target/Mangler.h"
|
|
|
|
#include "llvm/Target/TargetInstrInfo.h"
|
|
|
|
#include "llvm/Target/TargetOptions.h"
|
|
|
|
#include "llvm/Target/TargetRegisterInfo.h"
|
2004-08-16 23:25:21 +00:00
|
|
|
using namespace llvm;
|
2004-06-21 16:55:25 +00:00
|
|
|
|
2006-12-19 22:59:26 +00:00
|
|
|
namespace {
|
2009-10-25 06:33:48 +00:00
|
|
|
class PPCAsmPrinter : public AsmPrinter {
|
2009-02-24 08:30:20 +00:00
|
|
|
protected:
|
2012-11-12 19:13:24 +00:00
|
|
|
MapVector<MCSymbol*, MCSymbol*> TOC;
|
2006-09-20 17:07:15 +00:00
|
|
|
const PPCSubtarget &Subtarget;
|
2010-11-15 03:42:54 +00:00
|
|
|
uint64_t TOCLabelID;
|
2009-02-24 08:30:20 +00:00
|
|
|
public:
|
2010-04-04 08:18:47 +00:00
|
|
|
explicit PPCAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
|
|
|
|
: AsmPrinter(TM, Streamer),
|
2010-11-15 03:42:54 +00:00
|
|
|
Subtarget(TM.getSubtarget<PPCSubtarget>()), TOCLabelID(0) {}
|
2005-04-21 23:30:14 +00:00
|
|
|
|
2004-06-21 16:55:25 +00:00
|
|
|
virtual const char *getPassName() const {
|
2004-09-04 05:00:00 +00:00
|
|
|
return "PowerPC Assembly Printer";
|
2004-06-21 16:55:25 +00:00
|
|
|
}
|
|
|
|
|
This patch implements medium code model support for 64-bit PowerPC.
The default for 64-bit PowerPC is small code model, in which TOC entries
must be addressable using a 16-bit offset from the TOC pointer. Additionally,
only TOC entries are addressed via the TOC pointer.
With medium code model, TOC entries and data sections can all be addressed
via the TOC pointer using a 32-bit offset. Cooperation with the linker
allows 16-bit offsets to be used when these are sufficient, reducing the
number of extra instructions that need to be executed. Medium code model
also does not generate explicit TOC entries in ".section toc" for variables
that are wholly internal to the compilation unit.
Consider a load of an external 4-byte integer. With small code model, the
compiler generates:
ld 3, .LC1@toc(2)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc ei[TC],ei
With medium model, it instead generates:
addis 3, 2, .LC1@toc@ha
ld 3, .LC1@toc@l(3)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc ei[TC],ei
Here .LC1@toc@ha is a relocation requesting the upper 16 bits of the
32-bit offset of ei's TOC entry from the TOC base pointer. Similarly,
.LC1@toc@l is a relocation requesting the lower 16 bits. Note that if
the linker determines that ei's TOC entry is within a 16-bit offset of
the TOC base pointer, it will replace the "addis" with a "nop", and
replace the "ld" with the identical "ld" instruction from the small
code model example.
Consider next a load of a function-scope static integer. For small code
model, the compiler generates:
ld 3, .LC1@toc(2)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc test_fn_static.si[TC],test_fn_static.si
.type test_fn_static.si,@object
.local test_fn_static.si
.comm test_fn_static.si,4,4
For medium code model, the compiler generates:
addis 3, 2, test_fn_static.si@toc@ha
addi 3, 3, test_fn_static.si@toc@l
lwz 4, 0(3)
.type test_fn_static.si,@object
.local test_fn_static.si
.comm test_fn_static.si,4,4
Again, the linker may replace the "addis" with a "nop", calculating only
a 16-bit offset when this is sufficient.
Note that it would be more efficient for the compiler to generate:
addis 3, 2, test_fn_static.si@toc@ha
lwz 4, test_fn_static.si@toc@l(3)
The current patch does not perform this optimization yet. This will be
addressed as a peephole optimization in a later patch.
For the moment, the default code model for 64-bit PowerPC will remain the
small code model. We plan to eventually change the default to medium code
model, which matches current upstream GCC behavior. Note that the different
code models are ABI-compatible, so code compiled with different models will
be linked and execute correctly.
I've tested the regression suite and the application/benchmark test suite in
two ways: Once with the patch as submitted here, and once with additional
logic to force medium code model as the default. The tests all compile
cleanly, with one exception. The mandel-2 application test fails due to an
unrelated ABI compatibility with passing complex numbers. It just so happens
that small code model was incredibly lucky, in that temporary values in
floating-point registers held the expected values needed by the external
library routine that was called incorrectly. My current thought is to correct
the ABI problems with _Complex before making medium code model the default,
to avoid introducing this "regression."
Here are a few comments on how the patch works, since the selection code
can be difficult to follow:
The existing logic for small code model defines three pseudo-instructions:
LDtoc for most uses, LDtocJTI for jump table addresses, and LDtocCPT for
constant pool addresses. These are expanded by SelectCodeCommon(). The
pseudo-instruction approach doesn't work for medium code model, because
we need to generate two instructions when we match the same pattern.
Instead, new logic in PPCDAGToDAGISel::Select() intercepts the TOC_ENTRY
node for medium code model, and generates an ADDIStocHA followed by either
a LDtocL or an ADDItocL. These new node types correspond naturally to
the sequences described above.
The addis/ld sequence is generated for the following cases:
* Jump table addresses
* Function addresses
* External global variables
* Tentative definitions of global variables (common linkage)
The addis/addi sequence is generated for the following cases:
* Constant pool entries
* File-scope static global variables
* Function-scope static variables
Expanding to the two-instruction sequences at select time exposes the
instructions to subsequent optimization, particularly scheduling.
The rest of the processing occurs at assembly time, in
PPCAsmPrinter::EmitInstruction. Each of the instructions is converted to
a "real" PowerPC instruction. When a TOC entry needs to be created, this
is done here in the same manner as for the existing LDtoc, LDtocJTI, and
LDtocCPT pseudo-instructions (I factored out a new routine to handle this).
I had originally thought that if a TOC entry was needed for LDtocL or
ADDItocL, it would already have been generated for the previous ADDIStocHA.
However, at higher optimization levels, the ADDIStocHA may appear in a
different block, which may be assembled textually following the block
containing the LDtocL or ADDItocL. So it is necessary to include the
possibility of creating a new TOC entry for those two instructions.
Note that for LDtocL, we generate a new form of LD called LDrs. This
allows specifying the @toc@l relocation for the offset field of the LD
instruction (i.e., the offset is replaced by a SymbolLo relocation).
When the peephole optimization described above is added, we will need
to do similar things for all immediate-form load and store operations.
The seven "mcm-n.ll" test cases are kept separate because otherwise the
intermingling of various TOC entries and so forth makes the tests fragile
and hard to understand.
The above assumes use of an external assembler. For use of the
integrated assembler, new relocations are added and used by
PPCELFObjectWriter. Testing is done with "mcm-obj.ll", which tests for
proper generation of the various relocations for the same sequences
tested with the external assembler.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@168708 91177308-0d34-0410-b5e6-96231b3b80d8
2012-11-27 17:35:46 +00:00
|
|
|
MCSymbol *lookUpOrCreateTOCEntry(MCSymbol *Sym);
|
2004-08-14 22:09:10 +00:00
|
|
|
|
2010-01-28 01:28:58 +00:00
|
|
|
virtual void EmitInstruction(const MachineInstr *MI);
|
2004-08-14 23:27:29 +00:00
|
|
|
|
2010-11-15 03:39:06 +00:00
|
|
|
void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O);
|
2008-08-08 18:22:59 +00:00
|
|
|
|
2006-02-01 22:38:46 +00:00
|
|
|
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
|
2010-04-04 05:29:35 +00:00
|
|
|
unsigned AsmVariant, const char *ExtraCode,
|
|
|
|
raw_ostream &O);
|
2006-02-24 20:27:40 +00:00
|
|
|
bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
|
2010-04-04 05:29:35 +00:00
|
|
|
unsigned AsmVariant, const char *ExtraCode,
|
|
|
|
raw_ostream &O);
|
2004-09-04 05:00:00 +00:00
|
|
|
};
|
2005-04-21 23:30:14 +00:00
|
|
|
|
2008-08-08 18:22:59 +00:00
|
|
|
/// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux
|
2009-10-25 06:33:48 +00:00
|
|
|
class PPCLinuxAsmPrinter : public PPCAsmPrinter {
|
2009-02-24 08:30:20 +00:00
|
|
|
public:
|
2010-04-04 08:18:47 +00:00
|
|
|
explicit PPCLinuxAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
|
|
|
|
: PPCAsmPrinter(TM, Streamer) {}
|
2006-12-21 20:26:09 +00:00
|
|
|
|
|
|
|
virtual const char *getPassName() const {
|
|
|
|
return "Linux PPC Assembly Printer";
|
|
|
|
}
|
|
|
|
|
2009-08-15 11:54:46 +00:00
|
|
|
bool doFinalization(Module &M);
|
2008-08-08 18:22:59 +00:00
|
|
|
|
2010-01-27 07:21:55 +00:00
|
|
|
virtual void EmitFunctionEntryLabel();
|
2012-08-28 19:06:55 +00:00
|
|
|
|
|
|
|
void EmitFunctionBodyEnd();
|
2006-12-21 20:26:09 +00:00
|
|
|
};
|
|
|
|
|
2008-08-08 18:22:59 +00:00
|
|
|
/// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac
|
|
|
|
/// OS X
|
2009-10-25 06:33:48 +00:00
|
|
|
class PPCDarwinAsmPrinter : public PPCAsmPrinter {
|
2009-02-24 08:30:20 +00:00
|
|
|
public:
|
2010-04-04 08:18:47 +00:00
|
|
|
explicit PPCDarwinAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
|
|
|
|
: PPCAsmPrinter(TM, Streamer) {}
|
2004-09-04 05:00:00 +00:00
|
|
|
|
|
|
|
virtual const char *getPassName() const {
|
|
|
|
return "Darwin PPC Assembly Printer";
|
|
|
|
}
|
2008-08-08 18:22:59 +00:00
|
|
|
|
2004-06-21 16:55:25 +00:00
|
|
|
bool doFinalization(Module &M);
|
2009-09-30 22:06:26 +00:00
|
|
|
void EmitStartOfAsmFile(Module &M);
|
2008-08-08 18:22:59 +00:00
|
|
|
|
2010-01-20 21:36:48 +00:00
|
|
|
void EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs);
|
2004-06-21 16:55:25 +00:00
|
|
|
};
|
|
|
|
} // end of anonymous namespace
|
|
|
|
|
2010-11-15 03:39:06 +00:00
|
|
|
/// stripRegisterPrefix - This method strips the character prefix from a
|
|
|
|
/// register name so that only the number is left. Used by for linux asm.
|
|
|
|
static const char *stripRegisterPrefix(const char *RegName) {
|
|
|
|
switch (RegName[0]) {
|
|
|
|
case 'r':
|
|
|
|
case 'f':
|
|
|
|
case 'v': return RegName + 1;
|
|
|
|
case 'c': if (RegName[1] == 'r') return RegName + 2;
|
|
|
|
}
|
|
|
|
|
|
|
|
return RegName;
|
|
|
|
}
|
2004-09-04 05:00:00 +00:00
|
|
|
|
2010-11-15 03:39:06 +00:00
|
|
|
void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
|
|
|
|
raw_ostream &O) {
|
|
|
|
const MachineOperand &MO = MI->getOperand(OpNo);
|
|
|
|
|
2004-06-21 16:55:25 +00:00
|
|
|
switch (MO.getType()) {
|
2010-11-15 03:39:06 +00:00
|
|
|
case MachineOperand::MO_Register: {
|
|
|
|
const char *RegName = PPCInstPrinter::getRegisterName(MO.getReg());
|
|
|
|
// Linux assembler (Others?) does not take register mnemonics.
|
|
|
|
// FIXME - What about special registers used in mfspr/mtspr?
|
|
|
|
if (!Subtarget.isDarwin()) RegName = stripRegisterPrefix(RegName);
|
|
|
|
O << RegName;
|
|
|
|
return;
|
|
|
|
}
|
2006-05-04 17:21:20 +00:00
|
|
|
case MachineOperand::MO_Immediate:
|
2010-11-15 03:39:06 +00:00
|
|
|
O << MO.getImm();
|
|
|
|
return;
|
2004-07-28 00:00:48 +00:00
|
|
|
|
2006-04-22 18:53:45 +00:00
|
|
|
case MachineOperand::MO_MachineBasicBlock:
|
2010-03-13 21:04:28 +00:00
|
|
|
O << *MO.getMBB()->getSymbol();
|
2006-04-22 18:53:45 +00:00
|
|
|
return;
|
|
|
|
case MachineOperand::MO_JumpTableIndex:
|
2009-08-22 21:43:10 +00:00
|
|
|
O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
|
2007-12-30 23:10:15 +00:00
|
|
|
<< '_' << MO.getIndex();
|
2006-04-22 18:53:45 +00:00
|
|
|
// FIXME: PIC relocation model
|
2004-06-21 16:55:25 +00:00
|
|
|
return;
|
2004-07-08 17:58:04 +00:00
|
|
|
case MachineOperand::MO_ConstantPoolIndex:
|
2009-08-22 21:43:10 +00:00
|
|
|
O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
|
2007-12-30 23:10:15 +00:00
|
|
|
<< '_' << MO.getIndex();
|
2004-06-21 16:55:25 +00:00
|
|
|
return;
|
2009-11-04 21:31:18 +00:00
|
|
|
case MachineOperand::MO_BlockAddress:
|
2010-01-17 21:43:43 +00:00
|
|
|
O << *GetBlockAddressSymbol(MO.getBlockAddress());
|
2009-11-04 21:31:18 +00:00
|
|
|
return;
|
2009-07-15 01:14:44 +00:00
|
|
|
case MachineOperand::MO_ExternalSymbol: {
|
2005-12-16 00:22:14 +00:00
|
|
|
// Computing the address of an external symbol, not calling it.
|
2010-01-16 02:09:06 +00:00
|
|
|
if (TM.getRelocationModel() == Reloc::Static) {
|
2010-01-20 21:16:14 +00:00
|
|
|
O << *GetExternalSymbolSymbol(MO.getSymbolName());
|
2010-01-16 02:09:06 +00:00
|
|
|
return;
|
2005-12-16 00:22:14 +00:00
|
|
|
}
|
2010-01-20 21:16:14 +00:00
|
|
|
|
2010-02-03 06:18:30 +00:00
|
|
|
MCSymbol *NLPSym =
|
2010-01-16 02:09:06 +00:00
|
|
|
OutContext.GetOrCreateSymbol(StringRef(MAI->getGlobalPrefix())+
|
|
|
|
MO.getSymbolName()+"$non_lazy_ptr");
|
2010-03-10 22:34:10 +00:00
|
|
|
MachineModuleInfoImpl::StubValueTy &StubSym =
|
2010-01-20 21:16:14 +00:00
|
|
|
MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(NLPSym);
|
2010-03-10 22:34:10 +00:00
|
|
|
if (StubSym.getPointer() == 0)
|
|
|
|
StubSym = MachineModuleInfoImpl::
|
|
|
|
StubValueTy(GetExternalSymbolSymbol(MO.getSymbolName()), true);
|
2010-01-20 21:16:14 +00:00
|
|
|
|
2010-01-17 21:43:43 +00:00
|
|
|
O << *NLPSym;
|
2004-07-08 17:58:04 +00:00
|
|
|
return;
|
2009-07-15 01:14:44 +00:00
|
|
|
}
|
2004-08-13 09:32:01 +00:00
|
|
|
case MachineOperand::MO_GlobalAddress: {
|
2005-12-16 00:22:14 +00:00
|
|
|
// Computing the address of a global symbol, not calling it.
|
2010-04-15 01:51:59 +00:00
|
|
|
const GlobalValue *GV = MO.getGlobal();
|
2010-01-16 02:00:23 +00:00
|
|
|
MCSymbol *SymToPrint;
|
2004-08-13 09:32:01 +00:00
|
|
|
|
2004-10-17 23:01:34 +00:00
|
|
|
// External or weakly linked global variables need non-lazily-resolved stubs
|
2009-07-15 01:14:44 +00:00
|
|
|
if (TM.getRelocationModel() != Reloc::Static &&
|
|
|
|
(GV->isDeclaration() || GV->isWeakForLinker())) {
|
|
|
|
if (!GV->hasHiddenVisibility()) {
|
2010-01-16 18:37:32 +00:00
|
|
|
SymToPrint = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
|
2010-03-10 22:34:10 +00:00
|
|
|
MachineModuleInfoImpl::StubValueTy &StubSym =
|
|
|
|
MMI->getObjFileInfo<MachineModuleInfoMachO>()
|
|
|
|
.getGVStubEntry(SymToPrint);
|
|
|
|
if (StubSym.getPointer() == 0)
|
|
|
|
StubSym = MachineModuleInfoImpl::
|
2010-03-12 21:19:23 +00:00
|
|
|
StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
|
2009-07-15 01:14:44 +00:00
|
|
|
} else if (GV->isDeclaration() || GV->hasCommonLinkage() ||
|
|
|
|
GV->hasAvailableExternallyLinkage()) {
|
2010-01-16 18:37:32 +00:00
|
|
|
SymToPrint = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
|
2010-01-20 21:16:14 +00:00
|
|
|
|
2010-03-10 22:34:10 +00:00
|
|
|
MachineModuleInfoImpl::StubValueTy &StubSym =
|
2010-01-20 21:16:14 +00:00
|
|
|
MMI->getObjFileInfo<MachineModuleInfoMachO>().
|
|
|
|
getHiddenGVStubEntry(SymToPrint);
|
2010-03-10 22:34:10 +00:00
|
|
|
if (StubSym.getPointer() == 0)
|
|
|
|
StubSym = MachineModuleInfoImpl::
|
2010-03-12 21:19:23 +00:00
|
|
|
StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
|
2009-07-15 01:14:44 +00:00
|
|
|
} else {
|
2010-03-12 21:19:23 +00:00
|
|
|
SymToPrint = Mang->getSymbol(GV);
|
2005-12-16 00:22:14 +00:00
|
|
|
}
|
2009-07-15 01:14:44 +00:00
|
|
|
} else {
|
2010-03-12 21:19:23 +00:00
|
|
|
SymToPrint = Mang->getSymbol(GV);
|
2004-06-21 16:55:25 +00:00
|
|
|
}
|
2010-01-16 02:00:23 +00:00
|
|
|
|
2010-01-17 21:43:43 +00:00
|
|
|
O << *SymToPrint;
|
2008-08-08 18:22:59 +00:00
|
|
|
|
2010-04-03 22:28:33 +00:00
|
|
|
printOffset(MO.getOffset(), O);
|
2004-06-21 16:55:25 +00:00
|
|
|
return;
|
2004-08-13 09:32:01 +00:00
|
|
|
}
|
2005-04-21 23:30:14 +00:00
|
|
|
|
2004-06-21 16:55:25 +00:00
|
|
|
default:
|
2004-07-08 17:58:04 +00:00
|
|
|
O << "<unknown operand type: " << MO.getType() << ">";
|
2004-06-25 15:11:34 +00:00
|
|
|
return;
|
2004-06-21 16:55:25 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-02-23 19:31:10 +00:00
|
|
|
/// PrintAsmOperand - Print out an operand for an inline asm expression.
|
|
|
|
///
|
|
|
|
bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
|
2008-08-08 18:22:59 +00:00
|
|
|
unsigned AsmVariant,
|
2010-04-04 05:29:35 +00:00
|
|
|
const char *ExtraCode, raw_ostream &O) {
|
2006-02-23 19:31:10 +00:00
|
|
|
// Does this asm operand have a single letter operand modifier?
|
|
|
|
if (ExtraCode && ExtraCode[0]) {
|
|
|
|
if (ExtraCode[1] != 0) return true; // Unknown modifier.
|
2008-08-08 18:22:59 +00:00
|
|
|
|
2006-02-23 19:31:10 +00:00
|
|
|
switch (ExtraCode[0]) {
|
2012-06-26 13:49:27 +00:00
|
|
|
default:
|
|
|
|
// See if this is a generic print operand
|
|
|
|
return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
|
2007-01-25 02:52:50 +00:00
|
|
|
case 'c': // Don't print "$" before a global var name or constant.
|
2010-11-15 03:39:06 +00:00
|
|
|
break; // PPC never has a prefix.
|
2008-08-08 18:22:59 +00:00
|
|
|
case 'L': // Write second word of DImode reference.
|
2006-02-23 19:31:10 +00:00
|
|
|
// Verify that this operand has two consecutive registers.
|
2008-10-03 15:45:36 +00:00
|
|
|
if (!MI->getOperand(OpNo).isReg() ||
|
2006-02-23 19:31:10 +00:00
|
|
|
OpNo+1 == MI->getNumOperands() ||
|
2008-10-03 15:45:36 +00:00
|
|
|
!MI->getOperand(OpNo+1).isReg())
|
2006-02-23 19:31:10 +00:00
|
|
|
return true;
|
|
|
|
++OpNo; // Return the high-part.
|
|
|
|
break;
|
2007-04-24 22:51:03 +00:00
|
|
|
case 'I':
|
|
|
|
// Write 'i' if an integer constant, otherwise nothing. Used to print
|
|
|
|
// addi vs add, etc.
|
2008-10-03 15:45:36 +00:00
|
|
|
if (MI->getOperand(OpNo).isImm())
|
2007-04-24 22:51:03 +00:00
|
|
|
O << "i";
|
|
|
|
return false;
|
2006-02-23 19:31:10 +00:00
|
|
|
}
|
|
|
|
}
|
2008-08-08 18:22:59 +00:00
|
|
|
|
2010-04-04 04:47:45 +00:00
|
|
|
printOperand(MI, OpNo, O);
|
2006-02-23 19:31:10 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2009-08-18 00:18:39 +00:00
|
|
|
// At the moment, all inline asm memory operands are a single register.
|
|
|
|
// In any case, the output of this routine should always be just one
|
|
|
|
// assembler operand.
|
|
|
|
|
2006-02-24 20:27:40 +00:00
|
|
|
bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
|
2008-08-08 18:22:59 +00:00
|
|
|
unsigned AsmVariant,
|
2010-04-04 05:29:35 +00:00
|
|
|
const char *ExtraCode,
|
|
|
|
raw_ostream &O) {
|
2012-11-05 18:18:42 +00:00
|
|
|
if (ExtraCode && ExtraCode[0]) {
|
|
|
|
if (ExtraCode[1] != 0) return true; // Unknown modifier.
|
|
|
|
|
|
|
|
switch (ExtraCode[0]) {
|
|
|
|
default: return true; // Unknown modifier.
|
|
|
|
case 'y': // A memory reference for an X-form instruction
|
|
|
|
{
|
|
|
|
const char *RegName = "r0";
|
|
|
|
if (!Subtarget.isDarwin()) RegName = stripRegisterPrefix(RegName);
|
|
|
|
O << RegName << ", ";
|
|
|
|
printOperand(MI, OpNo, O);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-11-15 03:39:06 +00:00
|
|
|
assert(MI->getOperand(OpNo).isReg());
|
2009-08-26 18:10:32 +00:00
|
|
|
O << "0(";
|
2010-04-04 04:47:45 +00:00
|
|
|
printOperand(MI, OpNo, O);
|
2009-08-26 18:10:32 +00:00
|
|
|
O << ")";
|
2006-02-24 20:27:40 +00:00
|
|
|
return false;
|
|
|
|
}
|
2006-02-23 19:31:10 +00:00
|
|
|
|
2006-11-04 05:27:39 +00:00
|
|
|
|
This patch implements medium code model support for 64-bit PowerPC.
The default for 64-bit PowerPC is small code model, in which TOC entries
must be addressable using a 16-bit offset from the TOC pointer. Additionally,
only TOC entries are addressed via the TOC pointer.
With medium code model, TOC entries and data sections can all be addressed
via the TOC pointer using a 32-bit offset. Cooperation with the linker
allows 16-bit offsets to be used when these are sufficient, reducing the
number of extra instructions that need to be executed. Medium code model
also does not generate explicit TOC entries in ".section toc" for variables
that are wholly internal to the compilation unit.
Consider a load of an external 4-byte integer. With small code model, the
compiler generates:
ld 3, .LC1@toc(2)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc ei[TC],ei
With medium model, it instead generates:
addis 3, 2, .LC1@toc@ha
ld 3, .LC1@toc@l(3)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc ei[TC],ei
Here .LC1@toc@ha is a relocation requesting the upper 16 bits of the
32-bit offset of ei's TOC entry from the TOC base pointer. Similarly,
.LC1@toc@l is a relocation requesting the lower 16 bits. Note that if
the linker determines that ei's TOC entry is within a 16-bit offset of
the TOC base pointer, it will replace the "addis" with a "nop", and
replace the "ld" with the identical "ld" instruction from the small
code model example.
Consider next a load of a function-scope static integer. For small code
model, the compiler generates:
ld 3, .LC1@toc(2)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc test_fn_static.si[TC],test_fn_static.si
.type test_fn_static.si,@object
.local test_fn_static.si
.comm test_fn_static.si,4,4
For medium code model, the compiler generates:
addis 3, 2, test_fn_static.si@toc@ha
addi 3, 3, test_fn_static.si@toc@l
lwz 4, 0(3)
.type test_fn_static.si,@object
.local test_fn_static.si
.comm test_fn_static.si,4,4
Again, the linker may replace the "addis" with a "nop", calculating only
a 16-bit offset when this is sufficient.
Note that it would be more efficient for the compiler to generate:
addis 3, 2, test_fn_static.si@toc@ha
lwz 4, test_fn_static.si@toc@l(3)
The current patch does not perform this optimization yet. This will be
addressed as a peephole optimization in a later patch.
For the moment, the default code model for 64-bit PowerPC will remain the
small code model. We plan to eventually change the default to medium code
model, which matches current upstream GCC behavior. Note that the different
code models are ABI-compatible, so code compiled with different models will
be linked and execute correctly.
I've tested the regression suite and the application/benchmark test suite in
two ways: Once with the patch as submitted here, and once with additional
logic to force medium code model as the default. The tests all compile
cleanly, with one exception. The mandel-2 application test fails due to an
unrelated ABI compatibility with passing complex numbers. It just so happens
that small code model was incredibly lucky, in that temporary values in
floating-point registers held the expected values needed by the external
library routine that was called incorrectly. My current thought is to correct
the ABI problems with _Complex before making medium code model the default,
to avoid introducing this "regression."
Here are a few comments on how the patch works, since the selection code
can be difficult to follow:
The existing logic for small code model defines three pseudo-instructions:
LDtoc for most uses, LDtocJTI for jump table addresses, and LDtocCPT for
constant pool addresses. These are expanded by SelectCodeCommon(). The
pseudo-instruction approach doesn't work for medium code model, because
we need to generate two instructions when we match the same pattern.
Instead, new logic in PPCDAGToDAGISel::Select() intercepts the TOC_ENTRY
node for medium code model, and generates an ADDIStocHA followed by either
a LDtocL or an ADDItocL. These new node types correspond naturally to
the sequences described above.
The addis/ld sequence is generated for the following cases:
* Jump table addresses
* Function addresses
* External global variables
* Tentative definitions of global variables (common linkage)
The addis/addi sequence is generated for the following cases:
* Constant pool entries
* File-scope static global variables
* Function-scope static variables
Expanding to the two-instruction sequences at select time exposes the
instructions to subsequent optimization, particularly scheduling.
The rest of the processing occurs at assembly time, in
PPCAsmPrinter::EmitInstruction. Each of the instructions is converted to
a "real" PowerPC instruction. When a TOC entry needs to be created, this
is done here in the same manner as for the existing LDtoc, LDtocJTI, and
LDtocCPT pseudo-instructions (I factored out a new routine to handle this).
I had originally thought that if a TOC entry was needed for LDtocL or
ADDItocL, it would already have been generated for the previous ADDIStocHA.
However, at higher optimization levels, the ADDIStocHA may appear in a
different block, which may be assembled textually following the block
containing the LDtocL or ADDItocL. So it is necessary to include the
possibility of creating a new TOC entry for those two instructions.
Note that for LDtocL, we generate a new form of LD called LDrs. This
allows specifying the @toc@l relocation for the offset field of the LD
instruction (i.e., the offset is replaced by a SymbolLo relocation).
When the peephole optimization described above is added, we will need
to do similar things for all immediate-form load and store operations.
The seven "mcm-n.ll" test cases are kept separate because otherwise the
intermingling of various TOC entries and so forth makes the tests fragile
and hard to understand.
The above assumes use of an external assembler. For use of the
integrated assembler, new relocations are added and used by
PPCELFObjectWriter. Testing is done with "mcm-obj.ll", which tests for
proper generation of the various relocations for the same sequences
tested with the external assembler.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@168708 91177308-0d34-0410-b5e6-96231b3b80d8
2012-11-27 17:35:46 +00:00
|
|
|
/// lookUpOrCreateTOCEntry -- Given a symbol, look up whether a TOC entry
|
|
|
|
/// exists for it. If not, create one. Then return a symbol that references
|
|
|
|
/// the TOC entry.
|
|
|
|
MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) {
|
|
|
|
|
|
|
|
MCSymbol *&TOCEntry = TOC[Sym];
|
|
|
|
|
|
|
|
// To avoid name clash check if the name already exists.
|
|
|
|
while (TOCEntry == 0) {
|
|
|
|
if (OutContext.LookupSymbol(Twine(MAI->getPrivateGlobalPrefix()) +
|
|
|
|
"C" + Twine(TOCLabelID++)) == 0) {
|
|
|
|
TOCEntry = GetTempSymbol("C", TOCLabelID);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return TOCEntry;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2010-01-28 01:28:58 +00:00
|
|
|
/// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to
|
2004-08-14 22:09:10 +00:00
|
|
|
/// the current output stream.
|
2004-06-21 16:55:25 +00:00
|
|
|
///
|
2010-01-28 01:28:58 +00:00
|
|
|
void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
|
2010-11-15 03:39:06 +00:00
|
|
|
MCInst TmpInst;
|
|
|
|
|
|
|
|
// Lower multi-instruction pseudo operations.
|
|
|
|
switch (MI->getOpcode()) {
|
|
|
|
default: break;
|
2013-06-16 20:34:27 +00:00
|
|
|
case TargetOpcode::DBG_VALUE:
|
|
|
|
llvm_unreachable("Should be handled target independently");
|
2010-11-15 03:39:06 +00:00
|
|
|
case PPC::MovePCtoLR:
|
|
|
|
case PPC::MovePCtoLR8: {
|
|
|
|
// Transform %LR = MovePCtoLR
|
|
|
|
// Into this, where the label is the PIC base:
|
|
|
|
// bl L1$pb
|
|
|
|
// L1$pb:
|
|
|
|
MCSymbol *PICBase = MF->getPICBaseSymbol();
|
2010-11-14 22:03:15 +00:00
|
|
|
|
2010-11-15 03:39:06 +00:00
|
|
|
// Emit the 'bl'.
|
2013-03-22 15:24:13 +00:00
|
|
|
OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL)
|
2012-11-26 13:34:22 +00:00
|
|
|
// FIXME: We would like an efficient form for this, so we don't have to do
|
|
|
|
// a lot of extra uniquing.
|
2012-11-26 18:05:52 +00:00
|
|
|
.addExpr(MCSymbolRefExpr::Create(PICBase, OutContext)));
|
2010-11-15 03:39:06 +00:00
|
|
|
|
|
|
|
// Emit the label.
|
|
|
|
OutStreamer.EmitLabel(PICBase);
|
|
|
|
return;
|
|
|
|
}
|
2012-08-24 16:26:02 +00:00
|
|
|
case PPC::LDtocJTI:
|
|
|
|
case PPC::LDtocCPT:
|
2010-11-15 03:39:06 +00:00
|
|
|
case PPC::LDtoc: {
|
|
|
|
// Transform %X3 = LDtoc <ga:@min1>, %X2
|
2013-06-20 16:58:14 +00:00
|
|
|
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
|
2012-08-24 16:26:02 +00:00
|
|
|
|
2010-11-15 03:39:06 +00:00
|
|
|
// Change the opcode to LD, and the global address operand to be a
|
|
|
|
// reference to the TOC entry we will synthesize later.
|
|
|
|
TmpInst.setOpcode(PPC::LD);
|
|
|
|
const MachineOperand &MO = MI->getOperand(1);
|
2012-08-24 16:26:02 +00:00
|
|
|
|
|
|
|
// Map symbol -> label of TOC entry
|
|
|
|
assert(MO.isGlobal() || MO.isCPI() || MO.isJTI());
|
|
|
|
MCSymbol *MOSymbol = 0;
|
|
|
|
if (MO.isGlobal())
|
|
|
|
MOSymbol = Mang->getSymbol(MO.getGlobal());
|
|
|
|
else if (MO.isCPI())
|
|
|
|
MOSymbol = GetCPISymbol(MO.getIndex());
|
|
|
|
else if (MO.isJTI())
|
|
|
|
MOSymbol = GetJTISymbol(MO.getIndex());
|
This patch implements medium code model support for 64-bit PowerPC.
The default for 64-bit PowerPC is small code model, in which TOC entries
must be addressable using a 16-bit offset from the TOC pointer. Additionally,
only TOC entries are addressed via the TOC pointer.
With medium code model, TOC entries and data sections can all be addressed
via the TOC pointer using a 32-bit offset. Cooperation with the linker
allows 16-bit offsets to be used when these are sufficient, reducing the
number of extra instructions that need to be executed. Medium code model
also does not generate explicit TOC entries in ".section toc" for variables
that are wholly internal to the compilation unit.
Consider a load of an external 4-byte integer. With small code model, the
compiler generates:
ld 3, .LC1@toc(2)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc ei[TC],ei
With medium model, it instead generates:
addis 3, 2, .LC1@toc@ha
ld 3, .LC1@toc@l(3)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc ei[TC],ei
Here .LC1@toc@ha is a relocation requesting the upper 16 bits of the
32-bit offset of ei's TOC entry from the TOC base pointer. Similarly,
.LC1@toc@l is a relocation requesting the lower 16 bits. Note that if
the linker determines that ei's TOC entry is within a 16-bit offset of
the TOC base pointer, it will replace the "addis" with a "nop", and
replace the "ld" with the identical "ld" instruction from the small
code model example.
Consider next a load of a function-scope static integer. For small code
model, the compiler generates:
ld 3, .LC1@toc(2)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc test_fn_static.si[TC],test_fn_static.si
.type test_fn_static.si,@object
.local test_fn_static.si
.comm test_fn_static.si,4,4
For medium code model, the compiler generates:
addis 3, 2, test_fn_static.si@toc@ha
addi 3, 3, test_fn_static.si@toc@l
lwz 4, 0(3)
.type test_fn_static.si,@object
.local test_fn_static.si
.comm test_fn_static.si,4,4
Again, the linker may replace the "addis" with a "nop", calculating only
a 16-bit offset when this is sufficient.
Note that it would be more efficient for the compiler to generate:
addis 3, 2, test_fn_static.si@toc@ha
lwz 4, test_fn_static.si@toc@l(3)
The current patch does not perform this optimization yet. This will be
addressed as a peephole optimization in a later patch.
For the moment, the default code model for 64-bit PowerPC will remain the
small code model. We plan to eventually change the default to medium code
model, which matches current upstream GCC behavior. Note that the different
code models are ABI-compatible, so code compiled with different models will
be linked and execute correctly.
I've tested the regression suite and the application/benchmark test suite in
two ways: Once with the patch as submitted here, and once with additional
logic to force medium code model as the default. The tests all compile
cleanly, with one exception. The mandel-2 application test fails due to an
unrelated ABI compatibility with passing complex numbers. It just so happens
that small code model was incredibly lucky, in that temporary values in
floating-point registers held the expected values needed by the external
library routine that was called incorrectly. My current thought is to correct
the ABI problems with _Complex before making medium code model the default,
to avoid introducing this "regression."
Here are a few comments on how the patch works, since the selection code
can be difficult to follow:
The existing logic for small code model defines three pseudo-instructions:
LDtoc for most uses, LDtocJTI for jump table addresses, and LDtocCPT for
constant pool addresses. These are expanded by SelectCodeCommon(). The
pseudo-instruction approach doesn't work for medium code model, because
we need to generate two instructions when we match the same pattern.
Instead, new logic in PPCDAGToDAGISel::Select() intercepts the TOC_ENTRY
node for medium code model, and generates an ADDIStocHA followed by either
a LDtocL or an ADDItocL. These new node types correspond naturally to
the sequences described above.
The addis/ld sequence is generated for the following cases:
* Jump table addresses
* Function addresses
* External global variables
* Tentative definitions of global variables (common linkage)
The addis/addi sequence is generated for the following cases:
* Constant pool entries
* File-scope static global variables
* Function-scope static variables
Expanding to the two-instruction sequences at select time exposes the
instructions to subsequent optimization, particularly scheduling.
The rest of the processing occurs at assembly time, in
PPCAsmPrinter::EmitInstruction. Each of the instructions is converted to
a "real" PowerPC instruction. When a TOC entry needs to be created, this
is done here in the same manner as for the existing LDtoc, LDtocJTI, and
LDtocCPT pseudo-instructions (I factored out a new routine to handle this).
I had originally thought that if a TOC entry was needed for LDtocL or
ADDItocL, it would already have been generated for the previous ADDIStocHA.
However, at higher optimization levels, the ADDIStocHA may appear in a
different block, which may be assembled textually following the block
containing the LDtocL or ADDItocL. So it is necessary to include the
possibility of creating a new TOC entry for those two instructions.
Note that for LDtocL, we generate a new form of LD called LDrs. This
allows specifying the @toc@l relocation for the offset field of the LD
instruction (i.e., the offset is replaced by a SymbolLo relocation).
When the peephole optimization described above is added, we will need
to do similar things for all immediate-form load and store operations.
The seven "mcm-n.ll" test cases are kept separate because otherwise the
intermingling of various TOC entries and so forth makes the tests fragile
and hard to understand.
The above assumes use of an external assembler. For use of the
integrated assembler, new relocations are added and used by
PPCELFObjectWriter. Testing is done with "mcm-obj.ll", which tests for
proper generation of the various relocations for the same sequences
tested with the external assembler.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@168708 91177308-0d34-0410-b5e6-96231b3b80d8
2012-11-27 17:35:46 +00:00
|
|
|
|
|
|
|
MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol);
|
2012-09-18 17:10:37 +00:00
|
|
|
|
2010-11-15 03:39:06 +00:00
|
|
|
const MCExpr *Exp =
|
2013-06-21 14:42:20 +00:00
|
|
|
MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC,
|
2010-11-15 03:39:06 +00:00
|
|
|
OutContext);
|
|
|
|
TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
|
2010-11-14 19:53:02 +00:00
|
|
|
OutStreamer.EmitInstruction(TmpInst);
|
|
|
|
return;
|
|
|
|
}
|
2010-11-15 03:39:06 +00:00
|
|
|
|
This patch implements medium code model support for 64-bit PowerPC.
The default for 64-bit PowerPC is small code model, in which TOC entries
must be addressable using a 16-bit offset from the TOC pointer. Additionally,
only TOC entries are addressed via the TOC pointer.
With medium code model, TOC entries and data sections can all be addressed
via the TOC pointer using a 32-bit offset. Cooperation with the linker
allows 16-bit offsets to be used when these are sufficient, reducing the
number of extra instructions that need to be executed. Medium code model
also does not generate explicit TOC entries in ".section toc" for variables
that are wholly internal to the compilation unit.
Consider a load of an external 4-byte integer. With small code model, the
compiler generates:
ld 3, .LC1@toc(2)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc ei[TC],ei
With medium model, it instead generates:
addis 3, 2, .LC1@toc@ha
ld 3, .LC1@toc@l(3)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc ei[TC],ei
Here .LC1@toc@ha is a relocation requesting the upper 16 bits of the
32-bit offset of ei's TOC entry from the TOC base pointer. Similarly,
.LC1@toc@l is a relocation requesting the lower 16 bits. Note that if
the linker determines that ei's TOC entry is within a 16-bit offset of
the TOC base pointer, it will replace the "addis" with a "nop", and
replace the "ld" with the identical "ld" instruction from the small
code model example.
Consider next a load of a function-scope static integer. For small code
model, the compiler generates:
ld 3, .LC1@toc(2)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc test_fn_static.si[TC],test_fn_static.si
.type test_fn_static.si,@object
.local test_fn_static.si
.comm test_fn_static.si,4,4
For medium code model, the compiler generates:
addis 3, 2, test_fn_static.si@toc@ha
addi 3, 3, test_fn_static.si@toc@l
lwz 4, 0(3)
.type test_fn_static.si,@object
.local test_fn_static.si
.comm test_fn_static.si,4,4
Again, the linker may replace the "addis" with a "nop", calculating only
a 16-bit offset when this is sufficient.
Note that it would be more efficient for the compiler to generate:
addis 3, 2, test_fn_static.si@toc@ha
lwz 4, test_fn_static.si@toc@l(3)
The current patch does not perform this optimization yet. This will be
addressed as a peephole optimization in a later patch.
For the moment, the default code model for 64-bit PowerPC will remain the
small code model. We plan to eventually change the default to medium code
model, which matches current upstream GCC behavior. Note that the different
code models are ABI-compatible, so code compiled with different models will
be linked and execute correctly.
I've tested the regression suite and the application/benchmark test suite in
two ways: Once with the patch as submitted here, and once with additional
logic to force medium code model as the default. The tests all compile
cleanly, with one exception. The mandel-2 application test fails due to an
unrelated ABI compatibility with passing complex numbers. It just so happens
that small code model was incredibly lucky, in that temporary values in
floating-point registers held the expected values needed by the external
library routine that was called incorrectly. My current thought is to correct
the ABI problems with _Complex before making medium code model the default,
to avoid introducing this "regression."
Here are a few comments on how the patch works, since the selection code
can be difficult to follow:
The existing logic for small code model defines three pseudo-instructions:
LDtoc for most uses, LDtocJTI for jump table addresses, and LDtocCPT for
constant pool addresses. These are expanded by SelectCodeCommon(). The
pseudo-instruction approach doesn't work for medium code model, because
we need to generate two instructions when we match the same pattern.
Instead, new logic in PPCDAGToDAGISel::Select() intercepts the TOC_ENTRY
node for medium code model, and generates an ADDIStocHA followed by either
a LDtocL or an ADDItocL. These new node types correspond naturally to
the sequences described above.
The addis/ld sequence is generated for the following cases:
* Jump table addresses
* Function addresses
* External global variables
* Tentative definitions of global variables (common linkage)
The addis/addi sequence is generated for the following cases:
* Constant pool entries
* File-scope static global variables
* Function-scope static variables
Expanding to the two-instruction sequences at select time exposes the
instructions to subsequent optimization, particularly scheduling.
The rest of the processing occurs at assembly time, in
PPCAsmPrinter::EmitInstruction. Each of the instructions is converted to
a "real" PowerPC instruction. When a TOC entry needs to be created, this
is done here in the same manner as for the existing LDtoc, LDtocJTI, and
LDtocCPT pseudo-instructions (I factored out a new routine to handle this).
I had originally thought that if a TOC entry was needed for LDtocL or
ADDItocL, it would already have been generated for the previous ADDIStocHA.
However, at higher optimization levels, the ADDIStocHA may appear in a
different block, which may be assembled textually following the block
containing the LDtocL or ADDItocL. So it is necessary to include the
possibility of creating a new TOC entry for those two instructions.
Note that for LDtocL, we generate a new form of LD called LDrs. This
allows specifying the @toc@l relocation for the offset field of the LD
instruction (i.e., the offset is replaced by a SymbolLo relocation).
When the peephole optimization described above is added, we will need
to do similar things for all immediate-form load and store operations.
The seven "mcm-n.ll" test cases are kept separate because otherwise the
intermingling of various TOC entries and so forth makes the tests fragile
and hard to understand.
The above assumes use of an external assembler. For use of the
integrated assembler, new relocations are added and used by
PPCELFObjectWriter. Testing is done with "mcm-obj.ll", which tests for
proper generation of the various relocations for the same sequences
tested with the external assembler.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@168708 91177308-0d34-0410-b5e6-96231b3b80d8
2012-11-27 17:35:46 +00:00
|
|
|
case PPC::ADDIStocHA: {
|
|
|
|
// Transform %Xd = ADDIStocHA %X2, <ga:@sym>
|
2013-06-20 16:58:14 +00:00
|
|
|
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
|
This patch implements medium code model support for 64-bit PowerPC.
The default for 64-bit PowerPC is small code model, in which TOC entries
must be addressable using a 16-bit offset from the TOC pointer. Additionally,
only TOC entries are addressed via the TOC pointer.
With medium code model, TOC entries and data sections can all be addressed
via the TOC pointer using a 32-bit offset. Cooperation with the linker
allows 16-bit offsets to be used when these are sufficient, reducing the
number of extra instructions that need to be executed. Medium code model
also does not generate explicit TOC entries in ".section toc" for variables
that are wholly internal to the compilation unit.
Consider a load of an external 4-byte integer. With small code model, the
compiler generates:
ld 3, .LC1@toc(2)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc ei[TC],ei
With medium model, it instead generates:
addis 3, 2, .LC1@toc@ha
ld 3, .LC1@toc@l(3)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc ei[TC],ei
Here .LC1@toc@ha is a relocation requesting the upper 16 bits of the
32-bit offset of ei's TOC entry from the TOC base pointer. Similarly,
.LC1@toc@l is a relocation requesting the lower 16 bits. Note that if
the linker determines that ei's TOC entry is within a 16-bit offset of
the TOC base pointer, it will replace the "addis" with a "nop", and
replace the "ld" with the identical "ld" instruction from the small
code model example.
Consider next a load of a function-scope static integer. For small code
model, the compiler generates:
ld 3, .LC1@toc(2)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc test_fn_static.si[TC],test_fn_static.si
.type test_fn_static.si,@object
.local test_fn_static.si
.comm test_fn_static.si,4,4
For medium code model, the compiler generates:
addis 3, 2, test_fn_static.si@toc@ha
addi 3, 3, test_fn_static.si@toc@l
lwz 4, 0(3)
.type test_fn_static.si,@object
.local test_fn_static.si
.comm test_fn_static.si,4,4
Again, the linker may replace the "addis" with a "nop", calculating only
a 16-bit offset when this is sufficient.
Note that it would be more efficient for the compiler to generate:
addis 3, 2, test_fn_static.si@toc@ha
lwz 4, test_fn_static.si@toc@l(3)
The current patch does not perform this optimization yet. This will be
addressed as a peephole optimization in a later patch.
For the moment, the default code model for 64-bit PowerPC will remain the
small code model. We plan to eventually change the default to medium code
model, which matches current upstream GCC behavior. Note that the different
code models are ABI-compatible, so code compiled with different models will
be linked and execute correctly.
I've tested the regression suite and the application/benchmark test suite in
two ways: Once with the patch as submitted here, and once with additional
logic to force medium code model as the default. The tests all compile
cleanly, with one exception. The mandel-2 application test fails due to an
unrelated ABI compatibility with passing complex numbers. It just so happens
that small code model was incredibly lucky, in that temporary values in
floating-point registers held the expected values needed by the external
library routine that was called incorrectly. My current thought is to correct
the ABI problems with _Complex before making medium code model the default,
to avoid introducing this "regression."
Here are a few comments on how the patch works, since the selection code
can be difficult to follow:
The existing logic for small code model defines three pseudo-instructions:
LDtoc for most uses, LDtocJTI for jump table addresses, and LDtocCPT for
constant pool addresses. These are expanded by SelectCodeCommon(). The
pseudo-instruction approach doesn't work for medium code model, because
we need to generate two instructions when we match the same pattern.
Instead, new logic in PPCDAGToDAGISel::Select() intercepts the TOC_ENTRY
node for medium code model, and generates an ADDIStocHA followed by either
a LDtocL or an ADDItocL. These new node types correspond naturally to
the sequences described above.
The addis/ld sequence is generated for the following cases:
* Jump table addresses
* Function addresses
* External global variables
* Tentative definitions of global variables (common linkage)
The addis/addi sequence is generated for the following cases:
* Constant pool entries
* File-scope static global variables
* Function-scope static variables
Expanding to the two-instruction sequences at select time exposes the
instructions to subsequent optimization, particularly scheduling.
The rest of the processing occurs at assembly time, in
PPCAsmPrinter::EmitInstruction. Each of the instructions is converted to
a "real" PowerPC instruction. When a TOC entry needs to be created, this
is done here in the same manner as for the existing LDtoc, LDtocJTI, and
LDtocCPT pseudo-instructions (I factored out a new routine to handle this).
I had originally thought that if a TOC entry was needed for LDtocL or
ADDItocL, it would already have been generated for the previous ADDIStocHA.
However, at higher optimization levels, the ADDIStocHA may appear in a
different block, which may be assembled textually following the block
containing the LDtocL or ADDItocL. So it is necessary to include the
possibility of creating a new TOC entry for those two instructions.
Note that for LDtocL, we generate a new form of LD called LDrs. This
allows specifying the @toc@l relocation for the offset field of the LD
instruction (i.e., the offset is replaced by a SymbolLo relocation).
When the peephole optimization described above is added, we will need
to do similar things for all immediate-form load and store operations.
The seven "mcm-n.ll" test cases are kept separate because otherwise the
intermingling of various TOC entries and so forth makes the tests fragile
and hard to understand.
The above assumes use of an external assembler. For use of the
integrated assembler, new relocations are added and used by
PPCELFObjectWriter. Testing is done with "mcm-obj.ll", which tests for
proper generation of the various relocations for the same sequences
tested with the external assembler.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@168708 91177308-0d34-0410-b5e6-96231b3b80d8
2012-11-27 17:35:46 +00:00
|
|
|
|
|
|
|
// Change the opcode to ADDIS8. If the global address is external,
|
|
|
|
// has common linkage, is a function address, or is a jump table
|
|
|
|
// address, then generate a TOC entry and reference that. Otherwise
|
|
|
|
// reference the symbol directly.
|
|
|
|
TmpInst.setOpcode(PPC::ADDIS8);
|
|
|
|
const MachineOperand &MO = MI->getOperand(2);
|
|
|
|
assert((MO.isGlobal() || MO.isCPI() || MO.isJTI()) &&
|
|
|
|
"Invalid operand for ADDIStocHA!");
|
|
|
|
MCSymbol *MOSymbol = 0;
|
|
|
|
bool IsExternal = false;
|
|
|
|
bool IsFunction = false;
|
|
|
|
bool IsCommon = false;
|
2013-01-07 19:29:18 +00:00
|
|
|
bool IsAvailExt = false;
|
This patch implements medium code model support for 64-bit PowerPC.
The default for 64-bit PowerPC is small code model, in which TOC entries
must be addressable using a 16-bit offset from the TOC pointer. Additionally,
only TOC entries are addressed via the TOC pointer.
With medium code model, TOC entries and data sections can all be addressed
via the TOC pointer using a 32-bit offset. Cooperation with the linker
allows 16-bit offsets to be used when these are sufficient, reducing the
number of extra instructions that need to be executed. Medium code model
also does not generate explicit TOC entries in ".section toc" for variables
that are wholly internal to the compilation unit.
Consider a load of an external 4-byte integer. With small code model, the
compiler generates:
ld 3, .LC1@toc(2)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc ei[TC],ei
With medium model, it instead generates:
addis 3, 2, .LC1@toc@ha
ld 3, .LC1@toc@l(3)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc ei[TC],ei
Here .LC1@toc@ha is a relocation requesting the upper 16 bits of the
32-bit offset of ei's TOC entry from the TOC base pointer. Similarly,
.LC1@toc@l is a relocation requesting the lower 16 bits. Note that if
the linker determines that ei's TOC entry is within a 16-bit offset of
the TOC base pointer, it will replace the "addis" with a "nop", and
replace the "ld" with the identical "ld" instruction from the small
code model example.
Consider next a load of a function-scope static integer. For small code
model, the compiler generates:
ld 3, .LC1@toc(2)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc test_fn_static.si[TC],test_fn_static.si
.type test_fn_static.si,@object
.local test_fn_static.si
.comm test_fn_static.si,4,4
For medium code model, the compiler generates:
addis 3, 2, test_fn_static.si@toc@ha
addi 3, 3, test_fn_static.si@toc@l
lwz 4, 0(3)
.type test_fn_static.si,@object
.local test_fn_static.si
.comm test_fn_static.si,4,4
Again, the linker may replace the "addis" with a "nop", calculating only
a 16-bit offset when this is sufficient.
Note that it would be more efficient for the compiler to generate:
addis 3, 2, test_fn_static.si@toc@ha
lwz 4, test_fn_static.si@toc@l(3)
The current patch does not perform this optimization yet. This will be
addressed as a peephole optimization in a later patch.
For the moment, the default code model for 64-bit PowerPC will remain the
small code model. We plan to eventually change the default to medium code
model, which matches current upstream GCC behavior. Note that the different
code models are ABI-compatible, so code compiled with different models will
be linked and execute correctly.
I've tested the regression suite and the application/benchmark test suite in
two ways: Once with the patch as submitted here, and once with additional
logic to force medium code model as the default. The tests all compile
cleanly, with one exception. The mandel-2 application test fails due to an
unrelated ABI compatibility with passing complex numbers. It just so happens
that small code model was incredibly lucky, in that temporary values in
floating-point registers held the expected values needed by the external
library routine that was called incorrectly. My current thought is to correct
the ABI problems with _Complex before making medium code model the default,
to avoid introducing this "regression."
Here are a few comments on how the patch works, since the selection code
can be difficult to follow:
The existing logic for small code model defines three pseudo-instructions:
LDtoc for most uses, LDtocJTI for jump table addresses, and LDtocCPT for
constant pool addresses. These are expanded by SelectCodeCommon(). The
pseudo-instruction approach doesn't work for medium code model, because
we need to generate two instructions when we match the same pattern.
Instead, new logic in PPCDAGToDAGISel::Select() intercepts the TOC_ENTRY
node for medium code model, and generates an ADDIStocHA followed by either
a LDtocL or an ADDItocL. These new node types correspond naturally to
the sequences described above.
The addis/ld sequence is generated for the following cases:
* Jump table addresses
* Function addresses
* External global variables
* Tentative definitions of global variables (common linkage)
The addis/addi sequence is generated for the following cases:
* Constant pool entries
* File-scope static global variables
* Function-scope static variables
Expanding to the two-instruction sequences at select time exposes the
instructions to subsequent optimization, particularly scheduling.
The rest of the processing occurs at assembly time, in
PPCAsmPrinter::EmitInstruction. Each of the instructions is converted to
a "real" PowerPC instruction. When a TOC entry needs to be created, this
is done here in the same manner as for the existing LDtoc, LDtocJTI, and
LDtocCPT pseudo-instructions (I factored out a new routine to handle this).
I had originally thought that if a TOC entry was needed for LDtocL or
ADDItocL, it would already have been generated for the previous ADDIStocHA.
However, at higher optimization levels, the ADDIStocHA may appear in a
different block, which may be assembled textually following the block
containing the LDtocL or ADDItocL. So it is necessary to include the
possibility of creating a new TOC entry for those two instructions.
Note that for LDtocL, we generate a new form of LD called LDrs. This
allows specifying the @toc@l relocation for the offset field of the LD
instruction (i.e., the offset is replaced by a SymbolLo relocation).
When the peephole optimization described above is added, we will need
to do similar things for all immediate-form load and store operations.
The seven "mcm-n.ll" test cases are kept separate because otherwise the
intermingling of various TOC entries and so forth makes the tests fragile
and hard to understand.
The above assumes use of an external assembler. For use of the
integrated assembler, new relocations are added and used by
PPCELFObjectWriter. Testing is done with "mcm-obj.ll", which tests for
proper generation of the various relocations for the same sequences
tested with the external assembler.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@168708 91177308-0d34-0410-b5e6-96231b3b80d8
2012-11-27 17:35:46 +00:00
|
|
|
|
|
|
|
if (MO.isGlobal()) {
|
|
|
|
const GlobalValue *GValue = MO.getGlobal();
|
2013-01-07 19:29:18 +00:00
|
|
|
const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
|
|
|
|
const GlobalValue *RealGValue = GAlias ?
|
|
|
|
GAlias->resolveAliasedGlobal(false) : GValue;
|
|
|
|
MOSymbol = Mang->getSymbol(RealGValue);
|
|
|
|
const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
|
This patch implements medium code model support for 64-bit PowerPC.
The default for 64-bit PowerPC is small code model, in which TOC entries
must be addressable using a 16-bit offset from the TOC pointer. Additionally,
only TOC entries are addressed via the TOC pointer.
With medium code model, TOC entries and data sections can all be addressed
via the TOC pointer using a 32-bit offset. Cooperation with the linker
allows 16-bit offsets to be used when these are sufficient, reducing the
number of extra instructions that need to be executed. Medium code model
also does not generate explicit TOC entries in ".section toc" for variables
that are wholly internal to the compilation unit.
Consider a load of an external 4-byte integer. With small code model, the
compiler generates:
ld 3, .LC1@toc(2)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc ei[TC],ei
With medium model, it instead generates:
addis 3, 2, .LC1@toc@ha
ld 3, .LC1@toc@l(3)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc ei[TC],ei
Here .LC1@toc@ha is a relocation requesting the upper 16 bits of the
32-bit offset of ei's TOC entry from the TOC base pointer. Similarly,
.LC1@toc@l is a relocation requesting the lower 16 bits. Note that if
the linker determines that ei's TOC entry is within a 16-bit offset of
the TOC base pointer, it will replace the "addis" with a "nop", and
replace the "ld" with the identical "ld" instruction from the small
code model example.
Consider next a load of a function-scope static integer. For small code
model, the compiler generates:
ld 3, .LC1@toc(2)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc test_fn_static.si[TC],test_fn_static.si
.type test_fn_static.si,@object
.local test_fn_static.si
.comm test_fn_static.si,4,4
For medium code model, the compiler generates:
addis 3, 2, test_fn_static.si@toc@ha
addi 3, 3, test_fn_static.si@toc@l
lwz 4, 0(3)
.type test_fn_static.si,@object
.local test_fn_static.si
.comm test_fn_static.si,4,4
Again, the linker may replace the "addis" with a "nop", calculating only
a 16-bit offset when this is sufficient.
Note that it would be more efficient for the compiler to generate:
addis 3, 2, test_fn_static.si@toc@ha
lwz 4, test_fn_static.si@toc@l(3)
The current patch does not perform this optimization yet. This will be
addressed as a peephole optimization in a later patch.
For the moment, the default code model for 64-bit PowerPC will remain the
small code model. We plan to eventually change the default to medium code
model, which matches current upstream GCC behavior. Note that the different
code models are ABI-compatible, so code compiled with different models will
be linked and execute correctly.
I've tested the regression suite and the application/benchmark test suite in
two ways: Once with the patch as submitted here, and once with additional
logic to force medium code model as the default. The tests all compile
cleanly, with one exception. The mandel-2 application test fails due to an
unrelated ABI compatibility with passing complex numbers. It just so happens
that small code model was incredibly lucky, in that temporary values in
floating-point registers held the expected values needed by the external
library routine that was called incorrectly. My current thought is to correct
the ABI problems with _Complex before making medium code model the default,
to avoid introducing this "regression."
Here are a few comments on how the patch works, since the selection code
can be difficult to follow:
The existing logic for small code model defines three pseudo-instructions:
LDtoc for most uses, LDtocJTI for jump table addresses, and LDtocCPT for
constant pool addresses. These are expanded by SelectCodeCommon(). The
pseudo-instruction approach doesn't work for medium code model, because
we need to generate two instructions when we match the same pattern.
Instead, new logic in PPCDAGToDAGISel::Select() intercepts the TOC_ENTRY
node for medium code model, and generates an ADDIStocHA followed by either
a LDtocL or an ADDItocL. These new node types correspond naturally to
the sequences described above.
The addis/ld sequence is generated for the following cases:
* Jump table addresses
* Function addresses
* External global variables
* Tentative definitions of global variables (common linkage)
The addis/addi sequence is generated for the following cases:
* Constant pool entries
* File-scope static global variables
* Function-scope static variables
Expanding to the two-instruction sequences at select time exposes the
instructions to subsequent optimization, particularly scheduling.
The rest of the processing occurs at assembly time, in
PPCAsmPrinter::EmitInstruction. Each of the instructions is converted to
a "real" PowerPC instruction. When a TOC entry needs to be created, this
is done here in the same manner as for the existing LDtoc, LDtocJTI, and
LDtocCPT pseudo-instructions (I factored out a new routine to handle this).
I had originally thought that if a TOC entry was needed for LDtocL or
ADDItocL, it would already have been generated for the previous ADDIStocHA.
However, at higher optimization levels, the ADDIStocHA may appear in a
different block, which may be assembled textually following the block
containing the LDtocL or ADDItocL. So it is necessary to include the
possibility of creating a new TOC entry for those two instructions.
Note that for LDtocL, we generate a new form of LD called LDrs. This
allows specifying the @toc@l relocation for the offset field of the LD
instruction (i.e., the offset is replaced by a SymbolLo relocation).
When the peephole optimization described above is added, we will need
to do similar things for all immediate-form load and store operations.
The seven "mcm-n.ll" test cases are kept separate because otherwise the
intermingling of various TOC entries and so forth makes the tests fragile
and hard to understand.
The above assumes use of an external assembler. For use of the
integrated assembler, new relocations are added and used by
PPCELFObjectWriter. Testing is done with "mcm-obj.ll", which tests for
proper generation of the various relocations for the same sequences
tested with the external assembler.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@168708 91177308-0d34-0410-b5e6-96231b3b80d8
2012-11-27 17:35:46 +00:00
|
|
|
IsExternal = GVar && !GVar->hasInitializer();
|
2013-01-07 19:29:18 +00:00
|
|
|
IsCommon = GVar && RealGValue->hasCommonLinkage();
|
This patch implements medium code model support for 64-bit PowerPC.
The default for 64-bit PowerPC is small code model, in which TOC entries
must be addressable using a 16-bit offset from the TOC pointer. Additionally,
only TOC entries are addressed via the TOC pointer.
With medium code model, TOC entries and data sections can all be addressed
via the TOC pointer using a 32-bit offset. Cooperation with the linker
allows 16-bit offsets to be used when these are sufficient, reducing the
number of extra instructions that need to be executed. Medium code model
also does not generate explicit TOC entries in ".section toc" for variables
that are wholly internal to the compilation unit.
Consider a load of an external 4-byte integer. With small code model, the
compiler generates:
ld 3, .LC1@toc(2)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc ei[TC],ei
With medium model, it instead generates:
addis 3, 2, .LC1@toc@ha
ld 3, .LC1@toc@l(3)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc ei[TC],ei
Here .LC1@toc@ha is a relocation requesting the upper 16 bits of the
32-bit offset of ei's TOC entry from the TOC base pointer. Similarly,
.LC1@toc@l is a relocation requesting the lower 16 bits. Note that if
the linker determines that ei's TOC entry is within a 16-bit offset of
the TOC base pointer, it will replace the "addis" with a "nop", and
replace the "ld" with the identical "ld" instruction from the small
code model example.
Consider next a load of a function-scope static integer. For small code
model, the compiler generates:
ld 3, .LC1@toc(2)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc test_fn_static.si[TC],test_fn_static.si
.type test_fn_static.si,@object
.local test_fn_static.si
.comm test_fn_static.si,4,4
For medium code model, the compiler generates:
addis 3, 2, test_fn_static.si@toc@ha
addi 3, 3, test_fn_static.si@toc@l
lwz 4, 0(3)
.type test_fn_static.si,@object
.local test_fn_static.si
.comm test_fn_static.si,4,4
Again, the linker may replace the "addis" with a "nop", calculating only
a 16-bit offset when this is sufficient.
Note that it would be more efficient for the compiler to generate:
addis 3, 2, test_fn_static.si@toc@ha
lwz 4, test_fn_static.si@toc@l(3)
The current patch does not perform this optimization yet. This will be
addressed as a peephole optimization in a later patch.
For the moment, the default code model for 64-bit PowerPC will remain the
small code model. We plan to eventually change the default to medium code
model, which matches current upstream GCC behavior. Note that the different
code models are ABI-compatible, so code compiled with different models will
be linked and execute correctly.
I've tested the regression suite and the application/benchmark test suite in
two ways: Once with the patch as submitted here, and once with additional
logic to force medium code model as the default. The tests all compile
cleanly, with one exception. The mandel-2 application test fails due to an
unrelated ABI compatibility with passing complex numbers. It just so happens
that small code model was incredibly lucky, in that temporary values in
floating-point registers held the expected values needed by the external
library routine that was called incorrectly. My current thought is to correct
the ABI problems with _Complex before making medium code model the default,
to avoid introducing this "regression."
Here are a few comments on how the patch works, since the selection code
can be difficult to follow:
The existing logic for small code model defines three pseudo-instructions:
LDtoc for most uses, LDtocJTI for jump table addresses, and LDtocCPT for
constant pool addresses. These are expanded by SelectCodeCommon(). The
pseudo-instruction approach doesn't work for medium code model, because
we need to generate two instructions when we match the same pattern.
Instead, new logic in PPCDAGToDAGISel::Select() intercepts the TOC_ENTRY
node for medium code model, and generates an ADDIStocHA followed by either
a LDtocL or an ADDItocL. These new node types correspond naturally to
the sequences described above.
The addis/ld sequence is generated for the following cases:
* Jump table addresses
* Function addresses
* External global variables
* Tentative definitions of global variables (common linkage)
The addis/addi sequence is generated for the following cases:
* Constant pool entries
* File-scope static global variables
* Function-scope static variables
Expanding to the two-instruction sequences at select time exposes the
instructions to subsequent optimization, particularly scheduling.
The rest of the processing occurs at assembly time, in
PPCAsmPrinter::EmitInstruction. Each of the instructions is converted to
a "real" PowerPC instruction. When a TOC entry needs to be created, this
is done here in the same manner as for the existing LDtoc, LDtocJTI, and
LDtocCPT pseudo-instructions (I factored out a new routine to handle this).
I had originally thought that if a TOC entry was needed for LDtocL or
ADDItocL, it would already have been generated for the previous ADDIStocHA.
However, at higher optimization levels, the ADDIStocHA may appear in a
different block, which may be assembled textually following the block
containing the LDtocL or ADDItocL. So it is necessary to include the
possibility of creating a new TOC entry for those two instructions.
Note that for LDtocL, we generate a new form of LD called LDrs. This
allows specifying the @toc@l relocation for the offset field of the LD
instruction (i.e., the offset is replaced by a SymbolLo relocation).
When the peephole optimization described above is added, we will need
to do similar things for all immediate-form load and store operations.
The seven "mcm-n.ll" test cases are kept separate because otherwise the
intermingling of various TOC entries and so forth makes the tests fragile
and hard to understand.
The above assumes use of an external assembler. For use of the
integrated assembler, new relocations are added and used by
PPCELFObjectWriter. Testing is done with "mcm-obj.ll", which tests for
proper generation of the various relocations for the same sequences
tested with the external assembler.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@168708 91177308-0d34-0410-b5e6-96231b3b80d8
2012-11-27 17:35:46 +00:00
|
|
|
IsFunction = !GVar;
|
2013-01-07 19:29:18 +00:00
|
|
|
IsAvailExt = GVar && RealGValue->hasAvailableExternallyLinkage();
|
This patch implements medium code model support for 64-bit PowerPC.
The default for 64-bit PowerPC is small code model, in which TOC entries
must be addressable using a 16-bit offset from the TOC pointer. Additionally,
only TOC entries are addressed via the TOC pointer.
With medium code model, TOC entries and data sections can all be addressed
via the TOC pointer using a 32-bit offset. Cooperation with the linker
allows 16-bit offsets to be used when these are sufficient, reducing the
number of extra instructions that need to be executed. Medium code model
also does not generate explicit TOC entries in ".section toc" for variables
that are wholly internal to the compilation unit.
Consider a load of an external 4-byte integer. With small code model, the
compiler generates:
ld 3, .LC1@toc(2)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc ei[TC],ei
With medium model, it instead generates:
addis 3, 2, .LC1@toc@ha
ld 3, .LC1@toc@l(3)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc ei[TC],ei
Here .LC1@toc@ha is a relocation requesting the upper 16 bits of the
32-bit offset of ei's TOC entry from the TOC base pointer. Similarly,
.LC1@toc@l is a relocation requesting the lower 16 bits. Note that if
the linker determines that ei's TOC entry is within a 16-bit offset of
the TOC base pointer, it will replace the "addis" with a "nop", and
replace the "ld" with the identical "ld" instruction from the small
code model example.
Consider next a load of a function-scope static integer. For small code
model, the compiler generates:
ld 3, .LC1@toc(2)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc test_fn_static.si[TC],test_fn_static.si
.type test_fn_static.si,@object
.local test_fn_static.si
.comm test_fn_static.si,4,4
For medium code model, the compiler generates:
addis 3, 2, test_fn_static.si@toc@ha
addi 3, 3, test_fn_static.si@toc@l
lwz 4, 0(3)
.type test_fn_static.si,@object
.local test_fn_static.si
.comm test_fn_static.si,4,4
Again, the linker may replace the "addis" with a "nop", calculating only
a 16-bit offset when this is sufficient.
Note that it would be more efficient for the compiler to generate:
addis 3, 2, test_fn_static.si@toc@ha
lwz 4, test_fn_static.si@toc@l(3)
The current patch does not perform this optimization yet. This will be
addressed as a peephole optimization in a later patch.
For the moment, the default code model for 64-bit PowerPC will remain the
small code model. We plan to eventually change the default to medium code
model, which matches current upstream GCC behavior. Note that the different
code models are ABI-compatible, so code compiled with different models will
be linked and execute correctly.
I've tested the regression suite and the application/benchmark test suite in
two ways: Once with the patch as submitted here, and once with additional
logic to force medium code model as the default. The tests all compile
cleanly, with one exception. The mandel-2 application test fails due to an
unrelated ABI compatibility with passing complex numbers. It just so happens
that small code model was incredibly lucky, in that temporary values in
floating-point registers held the expected values needed by the external
library routine that was called incorrectly. My current thought is to correct
the ABI problems with _Complex before making medium code model the default,
to avoid introducing this "regression."
Here are a few comments on how the patch works, since the selection code
can be difficult to follow:
The existing logic for small code model defines three pseudo-instructions:
LDtoc for most uses, LDtocJTI for jump table addresses, and LDtocCPT for
constant pool addresses. These are expanded by SelectCodeCommon(). The
pseudo-instruction approach doesn't work for medium code model, because
we need to generate two instructions when we match the same pattern.
Instead, new logic in PPCDAGToDAGISel::Select() intercepts the TOC_ENTRY
node for medium code model, and generates an ADDIStocHA followed by either
a LDtocL or an ADDItocL. These new node types correspond naturally to
the sequences described above.
The addis/ld sequence is generated for the following cases:
* Jump table addresses
* Function addresses
* External global variables
* Tentative definitions of global variables (common linkage)
The addis/addi sequence is generated for the following cases:
* Constant pool entries
* File-scope static global variables
* Function-scope static variables
Expanding to the two-instruction sequences at select time exposes the
instructions to subsequent optimization, particularly scheduling.
The rest of the processing occurs at assembly time, in
PPCAsmPrinter::EmitInstruction. Each of the instructions is converted to
a "real" PowerPC instruction. When a TOC entry needs to be created, this
is done here in the same manner as for the existing LDtoc, LDtocJTI, and
LDtocCPT pseudo-instructions (I factored out a new routine to handle this).
I had originally thought that if a TOC entry was needed for LDtocL or
ADDItocL, it would already have been generated for the previous ADDIStocHA.
However, at higher optimization levels, the ADDIStocHA may appear in a
different block, which may be assembled textually following the block
containing the LDtocL or ADDItocL. So it is necessary to include the
possibility of creating a new TOC entry for those two instructions.
Note that for LDtocL, we generate a new form of LD called LDrs. This
allows specifying the @toc@l relocation for the offset field of the LD
instruction (i.e., the offset is replaced by a SymbolLo relocation).
When the peephole optimization described above is added, we will need
to do similar things for all immediate-form load and store operations.
The seven "mcm-n.ll" test cases are kept separate because otherwise the
intermingling of various TOC entries and so forth makes the tests fragile
and hard to understand.
The above assumes use of an external assembler. For use of the
integrated assembler, new relocations are added and used by
PPCELFObjectWriter. Testing is done with "mcm-obj.ll", which tests for
proper generation of the various relocations for the same sequences
tested with the external assembler.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@168708 91177308-0d34-0410-b5e6-96231b3b80d8
2012-11-27 17:35:46 +00:00
|
|
|
} else if (MO.isCPI())
|
|
|
|
MOSymbol = GetCPISymbol(MO.getIndex());
|
|
|
|
else if (MO.isJTI())
|
|
|
|
MOSymbol = GetJTISymbol(MO.getIndex());
|
|
|
|
|
2013-01-07 19:29:18 +00:00
|
|
|
if (IsExternal || IsFunction || IsCommon || IsAvailExt || MO.isJTI())
|
This patch implements medium code model support for 64-bit PowerPC.
The default for 64-bit PowerPC is small code model, in which TOC entries
must be addressable using a 16-bit offset from the TOC pointer. Additionally,
only TOC entries are addressed via the TOC pointer.
With medium code model, TOC entries and data sections can all be addressed
via the TOC pointer using a 32-bit offset. Cooperation with the linker
allows 16-bit offsets to be used when these are sufficient, reducing the
number of extra instructions that need to be executed. Medium code model
also does not generate explicit TOC entries in ".section toc" for variables
that are wholly internal to the compilation unit.
Consider a load of an external 4-byte integer. With small code model, the
compiler generates:
ld 3, .LC1@toc(2)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc ei[TC],ei
With medium model, it instead generates:
addis 3, 2, .LC1@toc@ha
ld 3, .LC1@toc@l(3)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc ei[TC],ei
Here .LC1@toc@ha is a relocation requesting the upper 16 bits of the
32-bit offset of ei's TOC entry from the TOC base pointer. Similarly,
.LC1@toc@l is a relocation requesting the lower 16 bits. Note that if
the linker determines that ei's TOC entry is within a 16-bit offset of
the TOC base pointer, it will replace the "addis" with a "nop", and
replace the "ld" with the identical "ld" instruction from the small
code model example.
Consider next a load of a function-scope static integer. For small code
model, the compiler generates:
ld 3, .LC1@toc(2)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc test_fn_static.si[TC],test_fn_static.si
.type test_fn_static.si,@object
.local test_fn_static.si
.comm test_fn_static.si,4,4
For medium code model, the compiler generates:
addis 3, 2, test_fn_static.si@toc@ha
addi 3, 3, test_fn_static.si@toc@l
lwz 4, 0(3)
.type test_fn_static.si,@object
.local test_fn_static.si
.comm test_fn_static.si,4,4
Again, the linker may replace the "addis" with a "nop", calculating only
a 16-bit offset when this is sufficient.
Note that it would be more efficient for the compiler to generate:
addis 3, 2, test_fn_static.si@toc@ha
lwz 4, test_fn_static.si@toc@l(3)
The current patch does not perform this optimization yet. This will be
addressed as a peephole optimization in a later patch.
For the moment, the default code model for 64-bit PowerPC will remain the
small code model. We plan to eventually change the default to medium code
model, which matches current upstream GCC behavior. Note that the different
code models are ABI-compatible, so code compiled with different models will
be linked and execute correctly.
I've tested the regression suite and the application/benchmark test suite in
two ways: Once with the patch as submitted here, and once with additional
logic to force medium code model as the default. The tests all compile
cleanly, with one exception. The mandel-2 application test fails due to an
unrelated ABI compatibility with passing complex numbers. It just so happens
that small code model was incredibly lucky, in that temporary values in
floating-point registers held the expected values needed by the external
library routine that was called incorrectly. My current thought is to correct
the ABI problems with _Complex before making medium code model the default,
to avoid introducing this "regression."
Here are a few comments on how the patch works, since the selection code
can be difficult to follow:
The existing logic for small code model defines three pseudo-instructions:
LDtoc for most uses, LDtocJTI for jump table addresses, and LDtocCPT for
constant pool addresses. These are expanded by SelectCodeCommon(). The
pseudo-instruction approach doesn't work for medium code model, because
we need to generate two instructions when we match the same pattern.
Instead, new logic in PPCDAGToDAGISel::Select() intercepts the TOC_ENTRY
node for medium code model, and generates an ADDIStocHA followed by either
a LDtocL or an ADDItocL. These new node types correspond naturally to
the sequences described above.
The addis/ld sequence is generated for the following cases:
* Jump table addresses
* Function addresses
* External global variables
* Tentative definitions of global variables (common linkage)
The addis/addi sequence is generated for the following cases:
* Constant pool entries
* File-scope static global variables
* Function-scope static variables
Expanding to the two-instruction sequences at select time exposes the
instructions to subsequent optimization, particularly scheduling.
The rest of the processing occurs at assembly time, in
PPCAsmPrinter::EmitInstruction. Each of the instructions is converted to
a "real" PowerPC instruction. When a TOC entry needs to be created, this
is done here in the same manner as for the existing LDtoc, LDtocJTI, and
LDtocCPT pseudo-instructions (I factored out a new routine to handle this).
I had originally thought that if a TOC entry was needed for LDtocL or
ADDItocL, it would already have been generated for the previous ADDIStocHA.
However, at higher optimization levels, the ADDIStocHA may appear in a
different block, which may be assembled textually following the block
containing the LDtocL or ADDItocL. So it is necessary to include the
possibility of creating a new TOC entry for those two instructions.
Note that for LDtocL, we generate a new form of LD called LDrs. This
allows specifying the @toc@l relocation for the offset field of the LD
instruction (i.e., the offset is replaced by a SymbolLo relocation).
When the peephole optimization described above is added, we will need
to do similar things for all immediate-form load and store operations.
The seven "mcm-n.ll" test cases are kept separate because otherwise the
intermingling of various TOC entries and so forth makes the tests fragile
and hard to understand.
The above assumes use of an external assembler. For use of the
integrated assembler, new relocations are added and used by
PPCELFObjectWriter. Testing is done with "mcm-obj.ll", which tests for
proper generation of the various relocations for the same sequences
tested with the external assembler.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@168708 91177308-0d34-0410-b5e6-96231b3b80d8
2012-11-27 17:35:46 +00:00
|
|
|
MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
|
|
|
|
|
|
|
|
const MCExpr *Exp =
|
2013-06-21 14:42:20 +00:00
|
|
|
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_HA,
|
This patch implements medium code model support for 64-bit PowerPC.
The default for 64-bit PowerPC is small code model, in which TOC entries
must be addressable using a 16-bit offset from the TOC pointer. Additionally,
only TOC entries are addressed via the TOC pointer.
With medium code model, TOC entries and data sections can all be addressed
via the TOC pointer using a 32-bit offset. Cooperation with the linker
allows 16-bit offsets to be used when these are sufficient, reducing the
number of extra instructions that need to be executed. Medium code model
also does not generate explicit TOC entries in ".section toc" for variables
that are wholly internal to the compilation unit.
Consider a load of an external 4-byte integer. With small code model, the
compiler generates:
ld 3, .LC1@toc(2)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc ei[TC],ei
With medium model, it instead generates:
addis 3, 2, .LC1@toc@ha
ld 3, .LC1@toc@l(3)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc ei[TC],ei
Here .LC1@toc@ha is a relocation requesting the upper 16 bits of the
32-bit offset of ei's TOC entry from the TOC base pointer. Similarly,
.LC1@toc@l is a relocation requesting the lower 16 bits. Note that if
the linker determines that ei's TOC entry is within a 16-bit offset of
the TOC base pointer, it will replace the "addis" with a "nop", and
replace the "ld" with the identical "ld" instruction from the small
code model example.
Consider next a load of a function-scope static integer. For small code
model, the compiler generates:
ld 3, .LC1@toc(2)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc test_fn_static.si[TC],test_fn_static.si
.type test_fn_static.si,@object
.local test_fn_static.si
.comm test_fn_static.si,4,4
For medium code model, the compiler generates:
addis 3, 2, test_fn_static.si@toc@ha
addi 3, 3, test_fn_static.si@toc@l
lwz 4, 0(3)
.type test_fn_static.si,@object
.local test_fn_static.si
.comm test_fn_static.si,4,4
Again, the linker may replace the "addis" with a "nop", calculating only
a 16-bit offset when this is sufficient.
Note that it would be more efficient for the compiler to generate:
addis 3, 2, test_fn_static.si@toc@ha
lwz 4, test_fn_static.si@toc@l(3)
The current patch does not perform this optimization yet. This will be
addressed as a peephole optimization in a later patch.
For the moment, the default code model for 64-bit PowerPC will remain the
small code model. We plan to eventually change the default to medium code
model, which matches current upstream GCC behavior. Note that the different
code models are ABI-compatible, so code compiled with different models will
be linked and execute correctly.
I've tested the regression suite and the application/benchmark test suite in
two ways: Once with the patch as submitted here, and once with additional
logic to force medium code model as the default. The tests all compile
cleanly, with one exception. The mandel-2 application test fails due to an
unrelated ABI compatibility with passing complex numbers. It just so happens
that small code model was incredibly lucky, in that temporary values in
floating-point registers held the expected values needed by the external
library routine that was called incorrectly. My current thought is to correct
the ABI problems with _Complex before making medium code model the default,
to avoid introducing this "regression."
Here are a few comments on how the patch works, since the selection code
can be difficult to follow:
The existing logic for small code model defines three pseudo-instructions:
LDtoc for most uses, LDtocJTI for jump table addresses, and LDtocCPT for
constant pool addresses. These are expanded by SelectCodeCommon(). The
pseudo-instruction approach doesn't work for medium code model, because
we need to generate two instructions when we match the same pattern.
Instead, new logic in PPCDAGToDAGISel::Select() intercepts the TOC_ENTRY
node for medium code model, and generates an ADDIStocHA followed by either
a LDtocL or an ADDItocL. These new node types correspond naturally to
the sequences described above.
The addis/ld sequence is generated for the following cases:
* Jump table addresses
* Function addresses
* External global variables
* Tentative definitions of global variables (common linkage)
The addis/addi sequence is generated for the following cases:
* Constant pool entries
* File-scope static global variables
* Function-scope static variables
Expanding to the two-instruction sequences at select time exposes the
instructions to subsequent optimization, particularly scheduling.
The rest of the processing occurs at assembly time, in
PPCAsmPrinter::EmitInstruction. Each of the instructions is converted to
a "real" PowerPC instruction. When a TOC entry needs to be created, this
is done here in the same manner as for the existing LDtoc, LDtocJTI, and
LDtocCPT pseudo-instructions (I factored out a new routine to handle this).
I had originally thought that if a TOC entry was needed for LDtocL or
ADDItocL, it would already have been generated for the previous ADDIStocHA.
However, at higher optimization levels, the ADDIStocHA may appear in a
different block, which may be assembled textually following the block
containing the LDtocL or ADDItocL. So it is necessary to include the
possibility of creating a new TOC entry for those two instructions.
Note that for LDtocL, we generate a new form of LD called LDrs. This
allows specifying the @toc@l relocation for the offset field of the LD
instruction (i.e., the offset is replaced by a SymbolLo relocation).
When the peephole optimization described above is added, we will need
to do similar things for all immediate-form load and store operations.
The seven "mcm-n.ll" test cases are kept separate because otherwise the
intermingling of various TOC entries and so forth makes the tests fragile
and hard to understand.
The above assumes use of an external assembler. For use of the
integrated assembler, new relocations are added and used by
PPCELFObjectWriter. Testing is done with "mcm-obj.ll", which tests for
proper generation of the various relocations for the same sequences
tested with the external assembler.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@168708 91177308-0d34-0410-b5e6-96231b3b80d8
2012-11-27 17:35:46 +00:00
|
|
|
OutContext);
|
|
|
|
TmpInst.getOperand(2) = MCOperand::CreateExpr(Exp);
|
|
|
|
OutStreamer.EmitInstruction(TmpInst);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
case PPC::LDtocL: {
|
|
|
|
// Transform %Xd = LDtocL <ga:@sym>, %Xs
|
2013-06-20 16:58:14 +00:00
|
|
|
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
|
This patch implements medium code model support for 64-bit PowerPC.
The default for 64-bit PowerPC is small code model, in which TOC entries
must be addressable using a 16-bit offset from the TOC pointer. Additionally,
only TOC entries are addressed via the TOC pointer.
With medium code model, TOC entries and data sections can all be addressed
via the TOC pointer using a 32-bit offset. Cooperation with the linker
allows 16-bit offsets to be used when these are sufficient, reducing the
number of extra instructions that need to be executed. Medium code model
also does not generate explicit TOC entries in ".section toc" for variables
that are wholly internal to the compilation unit.
Consider a load of an external 4-byte integer. With small code model, the
compiler generates:
ld 3, .LC1@toc(2)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc ei[TC],ei
With medium model, it instead generates:
addis 3, 2, .LC1@toc@ha
ld 3, .LC1@toc@l(3)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc ei[TC],ei
Here .LC1@toc@ha is a relocation requesting the upper 16 bits of the
32-bit offset of ei's TOC entry from the TOC base pointer. Similarly,
.LC1@toc@l is a relocation requesting the lower 16 bits. Note that if
the linker determines that ei's TOC entry is within a 16-bit offset of
the TOC base pointer, it will replace the "addis" with a "nop", and
replace the "ld" with the identical "ld" instruction from the small
code model example.
Consider next a load of a function-scope static integer. For small code
model, the compiler generates:
ld 3, .LC1@toc(2)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc test_fn_static.si[TC],test_fn_static.si
.type test_fn_static.si,@object
.local test_fn_static.si
.comm test_fn_static.si,4,4
For medium code model, the compiler generates:
addis 3, 2, test_fn_static.si@toc@ha
addi 3, 3, test_fn_static.si@toc@l
lwz 4, 0(3)
.type test_fn_static.si,@object
.local test_fn_static.si
.comm test_fn_static.si,4,4
Again, the linker may replace the "addis" with a "nop", calculating only
a 16-bit offset when this is sufficient.
Note that it would be more efficient for the compiler to generate:
addis 3, 2, test_fn_static.si@toc@ha
lwz 4, test_fn_static.si@toc@l(3)
The current patch does not perform this optimization yet. This will be
addressed as a peephole optimization in a later patch.
For the moment, the default code model for 64-bit PowerPC will remain the
small code model. We plan to eventually change the default to medium code
model, which matches current upstream GCC behavior. Note that the different
code models are ABI-compatible, so code compiled with different models will
be linked and execute correctly.
I've tested the regression suite and the application/benchmark test suite in
two ways: Once with the patch as submitted here, and once with additional
logic to force medium code model as the default. The tests all compile
cleanly, with one exception. The mandel-2 application test fails due to an
unrelated ABI compatibility with passing complex numbers. It just so happens
that small code model was incredibly lucky, in that temporary values in
floating-point registers held the expected values needed by the external
library routine that was called incorrectly. My current thought is to correct
the ABI problems with _Complex before making medium code model the default,
to avoid introducing this "regression."
Here are a few comments on how the patch works, since the selection code
can be difficult to follow:
The existing logic for small code model defines three pseudo-instructions:
LDtoc for most uses, LDtocJTI for jump table addresses, and LDtocCPT for
constant pool addresses. These are expanded by SelectCodeCommon(). The
pseudo-instruction approach doesn't work for medium code model, because
we need to generate two instructions when we match the same pattern.
Instead, new logic in PPCDAGToDAGISel::Select() intercepts the TOC_ENTRY
node for medium code model, and generates an ADDIStocHA followed by either
a LDtocL or an ADDItocL. These new node types correspond naturally to
the sequences described above.
The addis/ld sequence is generated for the following cases:
* Jump table addresses
* Function addresses
* External global variables
* Tentative definitions of global variables (common linkage)
The addis/addi sequence is generated for the following cases:
* Constant pool entries
* File-scope static global variables
* Function-scope static variables
Expanding to the two-instruction sequences at select time exposes the
instructions to subsequent optimization, particularly scheduling.
The rest of the processing occurs at assembly time, in
PPCAsmPrinter::EmitInstruction. Each of the instructions is converted to
a "real" PowerPC instruction. When a TOC entry needs to be created, this
is done here in the same manner as for the existing LDtoc, LDtocJTI, and
LDtocCPT pseudo-instructions (I factored out a new routine to handle this).
I had originally thought that if a TOC entry was needed for LDtocL or
ADDItocL, it would already have been generated for the previous ADDIStocHA.
However, at higher optimization levels, the ADDIStocHA may appear in a
different block, which may be assembled textually following the block
containing the LDtocL or ADDItocL. So it is necessary to include the
possibility of creating a new TOC entry for those two instructions.
Note that for LDtocL, we generate a new form of LD called LDrs. This
allows specifying the @toc@l relocation for the offset field of the LD
instruction (i.e., the offset is replaced by a SymbolLo relocation).
When the peephole optimization described above is added, we will need
to do similar things for all immediate-form load and store operations.
The seven "mcm-n.ll" test cases are kept separate because otherwise the
intermingling of various TOC entries and so forth makes the tests fragile
and hard to understand.
The above assumes use of an external assembler. For use of the
integrated assembler, new relocations are added and used by
PPCELFObjectWriter. Testing is done with "mcm-obj.ll", which tests for
proper generation of the various relocations for the same sequences
tested with the external assembler.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@168708 91177308-0d34-0410-b5e6-96231b3b80d8
2012-11-27 17:35:46 +00:00
|
|
|
|
2013-03-26 10:55:45 +00:00
|
|
|
// Change the opcode to LD. If the global address is external, has
|
This patch implements medium code model support for 64-bit PowerPC.
The default for 64-bit PowerPC is small code model, in which TOC entries
must be addressable using a 16-bit offset from the TOC pointer. Additionally,
only TOC entries are addressed via the TOC pointer.
With medium code model, TOC entries and data sections can all be addressed
via the TOC pointer using a 32-bit offset. Cooperation with the linker
allows 16-bit offsets to be used when these are sufficient, reducing the
number of extra instructions that need to be executed. Medium code model
also does not generate explicit TOC entries in ".section toc" for variables
that are wholly internal to the compilation unit.
Consider a load of an external 4-byte integer. With small code model, the
compiler generates:
ld 3, .LC1@toc(2)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc ei[TC],ei
With medium model, it instead generates:
addis 3, 2, .LC1@toc@ha
ld 3, .LC1@toc@l(3)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc ei[TC],ei
Here .LC1@toc@ha is a relocation requesting the upper 16 bits of the
32-bit offset of ei's TOC entry from the TOC base pointer. Similarly,
.LC1@toc@l is a relocation requesting the lower 16 bits. Note that if
the linker determines that ei's TOC entry is within a 16-bit offset of
the TOC base pointer, it will replace the "addis" with a "nop", and
replace the "ld" with the identical "ld" instruction from the small
code model example.
Consider next a load of a function-scope static integer. For small code
model, the compiler generates:
ld 3, .LC1@toc(2)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc test_fn_static.si[TC],test_fn_static.si
.type test_fn_static.si,@object
.local test_fn_static.si
.comm test_fn_static.si,4,4
For medium code model, the compiler generates:
addis 3, 2, test_fn_static.si@toc@ha
addi 3, 3, test_fn_static.si@toc@l
lwz 4, 0(3)
.type test_fn_static.si,@object
.local test_fn_static.si
.comm test_fn_static.si,4,4
Again, the linker may replace the "addis" with a "nop", calculating only
a 16-bit offset when this is sufficient.
Note that it would be more efficient for the compiler to generate:
addis 3, 2, test_fn_static.si@toc@ha
lwz 4, test_fn_static.si@toc@l(3)
The current patch does not perform this optimization yet. This will be
addressed as a peephole optimization in a later patch.
For the moment, the default code model for 64-bit PowerPC will remain the
small code model. We plan to eventually change the default to medium code
model, which matches current upstream GCC behavior. Note that the different
code models are ABI-compatible, so code compiled with different models will
be linked and execute correctly.
I've tested the regression suite and the application/benchmark test suite in
two ways: Once with the patch as submitted here, and once with additional
logic to force medium code model as the default. The tests all compile
cleanly, with one exception. The mandel-2 application test fails due to an
unrelated ABI compatibility with passing complex numbers. It just so happens
that small code model was incredibly lucky, in that temporary values in
floating-point registers held the expected values needed by the external
library routine that was called incorrectly. My current thought is to correct
the ABI problems with _Complex before making medium code model the default,
to avoid introducing this "regression."
Here are a few comments on how the patch works, since the selection code
can be difficult to follow:
The existing logic for small code model defines three pseudo-instructions:
LDtoc for most uses, LDtocJTI for jump table addresses, and LDtocCPT for
constant pool addresses. These are expanded by SelectCodeCommon(). The
pseudo-instruction approach doesn't work for medium code model, because
we need to generate two instructions when we match the same pattern.
Instead, new logic in PPCDAGToDAGISel::Select() intercepts the TOC_ENTRY
node for medium code model, and generates an ADDIStocHA followed by either
a LDtocL or an ADDItocL. These new node types correspond naturally to
the sequences described above.
The addis/ld sequence is generated for the following cases:
* Jump table addresses
* Function addresses
* External global variables
* Tentative definitions of global variables (common linkage)
The addis/addi sequence is generated for the following cases:
* Constant pool entries
* File-scope static global variables
* Function-scope static variables
Expanding to the two-instruction sequences at select time exposes the
instructions to subsequent optimization, particularly scheduling.
The rest of the processing occurs at assembly time, in
PPCAsmPrinter::EmitInstruction. Each of the instructions is converted to
a "real" PowerPC instruction. When a TOC entry needs to be created, this
is done here in the same manner as for the existing LDtoc, LDtocJTI, and
LDtocCPT pseudo-instructions (I factored out a new routine to handle this).
I had originally thought that if a TOC entry was needed for LDtocL or
ADDItocL, it would already have been generated for the previous ADDIStocHA.
However, at higher optimization levels, the ADDIStocHA may appear in a
different block, which may be assembled textually following the block
containing the LDtocL or ADDItocL. So it is necessary to include the
possibility of creating a new TOC entry for those two instructions.
Note that for LDtocL, we generate a new form of LD called LDrs. This
allows specifying the @toc@l relocation for the offset field of the LD
instruction (i.e., the offset is replaced by a SymbolLo relocation).
When the peephole optimization described above is added, we will need
to do similar things for all immediate-form load and store operations.
The seven "mcm-n.ll" test cases are kept separate because otherwise the
intermingling of various TOC entries and so forth makes the tests fragile
and hard to understand.
The above assumes use of an external assembler. For use of the
integrated assembler, new relocations are added and used by
PPCELFObjectWriter. Testing is done with "mcm-obj.ll", which tests for
proper generation of the various relocations for the same sequences
tested with the external assembler.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@168708 91177308-0d34-0410-b5e6-96231b3b80d8
2012-11-27 17:35:46 +00:00
|
|
|
// common linkage, or is a jump table address, then reference the
|
|
|
|
// associated TOC entry. Otherwise reference the symbol directly.
|
2013-03-26 10:55:45 +00:00
|
|
|
TmpInst.setOpcode(PPC::LD);
|
This patch implements medium code model support for 64-bit PowerPC.
The default for 64-bit PowerPC is small code model, in which TOC entries
must be addressable using a 16-bit offset from the TOC pointer. Additionally,
only TOC entries are addressed via the TOC pointer.
With medium code model, TOC entries and data sections can all be addressed
via the TOC pointer using a 32-bit offset. Cooperation with the linker
allows 16-bit offsets to be used when these are sufficient, reducing the
number of extra instructions that need to be executed. Medium code model
also does not generate explicit TOC entries in ".section toc" for variables
that are wholly internal to the compilation unit.
Consider a load of an external 4-byte integer. With small code model, the
compiler generates:
ld 3, .LC1@toc(2)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc ei[TC],ei
With medium model, it instead generates:
addis 3, 2, .LC1@toc@ha
ld 3, .LC1@toc@l(3)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc ei[TC],ei
Here .LC1@toc@ha is a relocation requesting the upper 16 bits of the
32-bit offset of ei's TOC entry from the TOC base pointer. Similarly,
.LC1@toc@l is a relocation requesting the lower 16 bits. Note that if
the linker determines that ei's TOC entry is within a 16-bit offset of
the TOC base pointer, it will replace the "addis" with a "nop", and
replace the "ld" with the identical "ld" instruction from the small
code model example.
Consider next a load of a function-scope static integer. For small code
model, the compiler generates:
ld 3, .LC1@toc(2)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc test_fn_static.si[TC],test_fn_static.si
.type test_fn_static.si,@object
.local test_fn_static.si
.comm test_fn_static.si,4,4
For medium code model, the compiler generates:
addis 3, 2, test_fn_static.si@toc@ha
addi 3, 3, test_fn_static.si@toc@l
lwz 4, 0(3)
.type test_fn_static.si,@object
.local test_fn_static.si
.comm test_fn_static.si,4,4
Again, the linker may replace the "addis" with a "nop", calculating only
a 16-bit offset when this is sufficient.
Note that it would be more efficient for the compiler to generate:
addis 3, 2, test_fn_static.si@toc@ha
lwz 4, test_fn_static.si@toc@l(3)
The current patch does not perform this optimization yet. This will be
addressed as a peephole optimization in a later patch.
For the moment, the default code model for 64-bit PowerPC will remain the
small code model. We plan to eventually change the default to medium code
model, which matches current upstream GCC behavior. Note that the different
code models are ABI-compatible, so code compiled with different models will
be linked and execute correctly.
I've tested the regression suite and the application/benchmark test suite in
two ways: Once with the patch as submitted here, and once with additional
logic to force medium code model as the default. The tests all compile
cleanly, with one exception. The mandel-2 application test fails due to an
unrelated ABI compatibility with passing complex numbers. It just so happens
that small code model was incredibly lucky, in that temporary values in
floating-point registers held the expected values needed by the external
library routine that was called incorrectly. My current thought is to correct
the ABI problems with _Complex before making medium code model the default,
to avoid introducing this "regression."
Here are a few comments on how the patch works, since the selection code
can be difficult to follow:
The existing logic for small code model defines three pseudo-instructions:
LDtoc for most uses, LDtocJTI for jump table addresses, and LDtocCPT for
constant pool addresses. These are expanded by SelectCodeCommon(). The
pseudo-instruction approach doesn't work for medium code model, because
we need to generate two instructions when we match the same pattern.
Instead, new logic in PPCDAGToDAGISel::Select() intercepts the TOC_ENTRY
node for medium code model, and generates an ADDIStocHA followed by either
a LDtocL or an ADDItocL. These new node types correspond naturally to
the sequences described above.
The addis/ld sequence is generated for the following cases:
* Jump table addresses
* Function addresses
* External global variables
* Tentative definitions of global variables (common linkage)
The addis/addi sequence is generated for the following cases:
* Constant pool entries
* File-scope static global variables
* Function-scope static variables
Expanding to the two-instruction sequences at select time exposes the
instructions to subsequent optimization, particularly scheduling.
The rest of the processing occurs at assembly time, in
PPCAsmPrinter::EmitInstruction. Each of the instructions is converted to
a "real" PowerPC instruction. When a TOC entry needs to be created, this
is done here in the same manner as for the existing LDtoc, LDtocJTI, and
LDtocCPT pseudo-instructions (I factored out a new routine to handle this).
I had originally thought that if a TOC entry was needed for LDtocL or
ADDItocL, it would already have been generated for the previous ADDIStocHA.
However, at higher optimization levels, the ADDIStocHA may appear in a
different block, which may be assembled textually following the block
containing the LDtocL or ADDItocL. So it is necessary to include the
possibility of creating a new TOC entry for those two instructions.
Note that for LDtocL, we generate a new form of LD called LDrs. This
allows specifying the @toc@l relocation for the offset field of the LD
instruction (i.e., the offset is replaced by a SymbolLo relocation).
When the peephole optimization described above is added, we will need
to do similar things for all immediate-form load and store operations.
The seven "mcm-n.ll" test cases are kept separate because otherwise the
intermingling of various TOC entries and so forth makes the tests fragile
and hard to understand.
The above assumes use of an external assembler. For use of the
integrated assembler, new relocations are added and used by
PPCELFObjectWriter. Testing is done with "mcm-obj.ll", which tests for
proper generation of the various relocations for the same sequences
tested with the external assembler.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@168708 91177308-0d34-0410-b5e6-96231b3b80d8
2012-11-27 17:35:46 +00:00
|
|
|
const MachineOperand &MO = MI->getOperand(1);
|
2013-02-21 17:12:27 +00:00
|
|
|
assert((MO.isGlobal() || MO.isJTI() || MO.isCPI()) &&
|
|
|
|
"Invalid operand for LDtocL!");
|
This patch implements medium code model support for 64-bit PowerPC.
The default for 64-bit PowerPC is small code model, in which TOC entries
must be addressable using a 16-bit offset from the TOC pointer. Additionally,
only TOC entries are addressed via the TOC pointer.
With medium code model, TOC entries and data sections can all be addressed
via the TOC pointer using a 32-bit offset. Cooperation with the linker
allows 16-bit offsets to be used when these are sufficient, reducing the
number of extra instructions that need to be executed. Medium code model
also does not generate explicit TOC entries in ".section toc" for variables
that are wholly internal to the compilation unit.
Consider a load of an external 4-byte integer. With small code model, the
compiler generates:
ld 3, .LC1@toc(2)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc ei[TC],ei
With medium model, it instead generates:
addis 3, 2, .LC1@toc@ha
ld 3, .LC1@toc@l(3)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc ei[TC],ei
Here .LC1@toc@ha is a relocation requesting the upper 16 bits of the
32-bit offset of ei's TOC entry from the TOC base pointer. Similarly,
.LC1@toc@l is a relocation requesting the lower 16 bits. Note that if
the linker determines that ei's TOC entry is within a 16-bit offset of
the TOC base pointer, it will replace the "addis" with a "nop", and
replace the "ld" with the identical "ld" instruction from the small
code model example.
Consider next a load of a function-scope static integer. For small code
model, the compiler generates:
ld 3, .LC1@toc(2)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc test_fn_static.si[TC],test_fn_static.si
.type test_fn_static.si,@object
.local test_fn_static.si
.comm test_fn_static.si,4,4
For medium code model, the compiler generates:
addis 3, 2, test_fn_static.si@toc@ha
addi 3, 3, test_fn_static.si@toc@l
lwz 4, 0(3)
.type test_fn_static.si,@object
.local test_fn_static.si
.comm test_fn_static.si,4,4
Again, the linker may replace the "addis" with a "nop", calculating only
a 16-bit offset when this is sufficient.
Note that it would be more efficient for the compiler to generate:
addis 3, 2, test_fn_static.si@toc@ha
lwz 4, test_fn_static.si@toc@l(3)
The current patch does not perform this optimization yet. This will be
addressed as a peephole optimization in a later patch.
For the moment, the default code model for 64-bit PowerPC will remain the
small code model. We plan to eventually change the default to medium code
model, which matches current upstream GCC behavior. Note that the different
code models are ABI-compatible, so code compiled with different models will
be linked and execute correctly.
I've tested the regression suite and the application/benchmark test suite in
two ways: Once with the patch as submitted here, and once with additional
logic to force medium code model as the default. The tests all compile
cleanly, with one exception. The mandel-2 application test fails due to an
unrelated ABI compatibility with passing complex numbers. It just so happens
that small code model was incredibly lucky, in that temporary values in
floating-point registers held the expected values needed by the external
library routine that was called incorrectly. My current thought is to correct
the ABI problems with _Complex before making medium code model the default,
to avoid introducing this "regression."
Here are a few comments on how the patch works, since the selection code
can be difficult to follow:
The existing logic for small code model defines three pseudo-instructions:
LDtoc for most uses, LDtocJTI for jump table addresses, and LDtocCPT for
constant pool addresses. These are expanded by SelectCodeCommon(). The
pseudo-instruction approach doesn't work for medium code model, because
we need to generate two instructions when we match the same pattern.
Instead, new logic in PPCDAGToDAGISel::Select() intercepts the TOC_ENTRY
node for medium code model, and generates an ADDIStocHA followed by either
a LDtocL or an ADDItocL. These new node types correspond naturally to
the sequences described above.
The addis/ld sequence is generated for the following cases:
* Jump table addresses
* Function addresses
* External global variables
* Tentative definitions of global variables (common linkage)
The addis/addi sequence is generated for the following cases:
* Constant pool entries
* File-scope static global variables
* Function-scope static variables
Expanding to the two-instruction sequences at select time exposes the
instructions to subsequent optimization, particularly scheduling.
The rest of the processing occurs at assembly time, in
PPCAsmPrinter::EmitInstruction. Each of the instructions is converted to
a "real" PowerPC instruction. When a TOC entry needs to be created, this
is done here in the same manner as for the existing LDtoc, LDtocJTI, and
LDtocCPT pseudo-instructions (I factored out a new routine to handle this).
I had originally thought that if a TOC entry was needed for LDtocL or
ADDItocL, it would already have been generated for the previous ADDIStocHA.
However, at higher optimization levels, the ADDIStocHA may appear in a
different block, which may be assembled textually following the block
containing the LDtocL or ADDItocL. So it is necessary to include the
possibility of creating a new TOC entry for those two instructions.
Note that for LDtocL, we generate a new form of LD called LDrs. This
allows specifying the @toc@l relocation for the offset field of the LD
instruction (i.e., the offset is replaced by a SymbolLo relocation).
When the peephole optimization described above is added, we will need
to do similar things for all immediate-form load and store operations.
The seven "mcm-n.ll" test cases are kept separate because otherwise the
intermingling of various TOC entries and so forth makes the tests fragile
and hard to understand.
The above assumes use of an external assembler. For use of the
integrated assembler, new relocations are added and used by
PPCELFObjectWriter. Testing is done with "mcm-obj.ll", which tests for
proper generation of the various relocations for the same sequences
tested with the external assembler.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@168708 91177308-0d34-0410-b5e6-96231b3b80d8
2012-11-27 17:35:46 +00:00
|
|
|
MCSymbol *MOSymbol = 0;
|
|
|
|
|
|
|
|
if (MO.isJTI())
|
|
|
|
MOSymbol = lookUpOrCreateTOCEntry(GetJTISymbol(MO.getIndex()));
|
2013-02-21 17:12:27 +00:00
|
|
|
else if (MO.isCPI())
|
|
|
|
MOSymbol = GetCPISymbol(MO.getIndex());
|
|
|
|
else if (MO.isGlobal()) {
|
This patch implements medium code model support for 64-bit PowerPC.
The default for 64-bit PowerPC is small code model, in which TOC entries
must be addressable using a 16-bit offset from the TOC pointer. Additionally,
only TOC entries are addressed via the TOC pointer.
With medium code model, TOC entries and data sections can all be addressed
via the TOC pointer using a 32-bit offset. Cooperation with the linker
allows 16-bit offsets to be used when these are sufficient, reducing the
number of extra instructions that need to be executed. Medium code model
also does not generate explicit TOC entries in ".section toc" for variables
that are wholly internal to the compilation unit.
Consider a load of an external 4-byte integer. With small code model, the
compiler generates:
ld 3, .LC1@toc(2)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc ei[TC],ei
With medium model, it instead generates:
addis 3, 2, .LC1@toc@ha
ld 3, .LC1@toc@l(3)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc ei[TC],ei
Here .LC1@toc@ha is a relocation requesting the upper 16 bits of the
32-bit offset of ei's TOC entry from the TOC base pointer. Similarly,
.LC1@toc@l is a relocation requesting the lower 16 bits. Note that if
the linker determines that ei's TOC entry is within a 16-bit offset of
the TOC base pointer, it will replace the "addis" with a "nop", and
replace the "ld" with the identical "ld" instruction from the small
code model example.
Consider next a load of a function-scope static integer. For small code
model, the compiler generates:
ld 3, .LC1@toc(2)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc test_fn_static.si[TC],test_fn_static.si
.type test_fn_static.si,@object
.local test_fn_static.si
.comm test_fn_static.si,4,4
For medium code model, the compiler generates:
addis 3, 2, test_fn_static.si@toc@ha
addi 3, 3, test_fn_static.si@toc@l
lwz 4, 0(3)
.type test_fn_static.si,@object
.local test_fn_static.si
.comm test_fn_static.si,4,4
Again, the linker may replace the "addis" with a "nop", calculating only
a 16-bit offset when this is sufficient.
Note that it would be more efficient for the compiler to generate:
addis 3, 2, test_fn_static.si@toc@ha
lwz 4, test_fn_static.si@toc@l(3)
The current patch does not perform this optimization yet. This will be
addressed as a peephole optimization in a later patch.
For the moment, the default code model for 64-bit PowerPC will remain the
small code model. We plan to eventually change the default to medium code
model, which matches current upstream GCC behavior. Note that the different
code models are ABI-compatible, so code compiled with different models will
be linked and execute correctly.
I've tested the regression suite and the application/benchmark test suite in
two ways: Once with the patch as submitted here, and once with additional
logic to force medium code model as the default. The tests all compile
cleanly, with one exception. The mandel-2 application test fails due to an
unrelated ABI compatibility with passing complex numbers. It just so happens
that small code model was incredibly lucky, in that temporary values in
floating-point registers held the expected values needed by the external
library routine that was called incorrectly. My current thought is to correct
the ABI problems with _Complex before making medium code model the default,
to avoid introducing this "regression."
Here are a few comments on how the patch works, since the selection code
can be difficult to follow:
The existing logic for small code model defines three pseudo-instructions:
LDtoc for most uses, LDtocJTI for jump table addresses, and LDtocCPT for
constant pool addresses. These are expanded by SelectCodeCommon(). The
pseudo-instruction approach doesn't work for medium code model, because
we need to generate two instructions when we match the same pattern.
Instead, new logic in PPCDAGToDAGISel::Select() intercepts the TOC_ENTRY
node for medium code model, and generates an ADDIStocHA followed by either
a LDtocL or an ADDItocL. These new node types correspond naturally to
the sequences described above.
The addis/ld sequence is generated for the following cases:
* Jump table addresses
* Function addresses
* External global variables
* Tentative definitions of global variables (common linkage)
The addis/addi sequence is generated for the following cases:
* Constant pool entries
* File-scope static global variables
* Function-scope static variables
Expanding to the two-instruction sequences at select time exposes the
instructions to subsequent optimization, particularly scheduling.
The rest of the processing occurs at assembly time, in
PPCAsmPrinter::EmitInstruction. Each of the instructions is converted to
a "real" PowerPC instruction. When a TOC entry needs to be created, this
is done here in the same manner as for the existing LDtoc, LDtocJTI, and
LDtocCPT pseudo-instructions (I factored out a new routine to handle this).
I had originally thought that if a TOC entry was needed for LDtocL or
ADDItocL, it would already have been generated for the previous ADDIStocHA.
However, at higher optimization levels, the ADDIStocHA may appear in a
different block, which may be assembled textually following the block
containing the LDtocL or ADDItocL. So it is necessary to include the
possibility of creating a new TOC entry for those two instructions.
Note that for LDtocL, we generate a new form of LD called LDrs. This
allows specifying the @toc@l relocation for the offset field of the LD
instruction (i.e., the offset is replaced by a SymbolLo relocation).
When the peephole optimization described above is added, we will need
to do similar things for all immediate-form load and store operations.
The seven "mcm-n.ll" test cases are kept separate because otherwise the
intermingling of various TOC entries and so forth makes the tests fragile
and hard to understand.
The above assumes use of an external assembler. For use of the
integrated assembler, new relocations are added and used by
PPCELFObjectWriter. Testing is done with "mcm-obj.ll", which tests for
proper generation of the various relocations for the same sequences
tested with the external assembler.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@168708 91177308-0d34-0410-b5e6-96231b3b80d8
2012-11-27 17:35:46 +00:00
|
|
|
const GlobalValue *GValue = MO.getGlobal();
|
2013-01-07 19:29:18 +00:00
|
|
|
const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
|
|
|
|
const GlobalValue *RealGValue = GAlias ?
|
|
|
|
GAlias->resolveAliasedGlobal(false) : GValue;
|
|
|
|
MOSymbol = Mang->getSymbol(RealGValue);
|
|
|
|
const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
|
This patch implements medium code model support for 64-bit PowerPC.
The default for 64-bit PowerPC is small code model, in which TOC entries
must be addressable using a 16-bit offset from the TOC pointer. Additionally,
only TOC entries are addressed via the TOC pointer.
With medium code model, TOC entries and data sections can all be addressed
via the TOC pointer using a 32-bit offset. Cooperation with the linker
allows 16-bit offsets to be used when these are sufficient, reducing the
number of extra instructions that need to be executed. Medium code model
also does not generate explicit TOC entries in ".section toc" for variables
that are wholly internal to the compilation unit.
Consider a load of an external 4-byte integer. With small code model, the
compiler generates:
ld 3, .LC1@toc(2)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc ei[TC],ei
With medium model, it instead generates:
addis 3, 2, .LC1@toc@ha
ld 3, .LC1@toc@l(3)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc ei[TC],ei
Here .LC1@toc@ha is a relocation requesting the upper 16 bits of the
32-bit offset of ei's TOC entry from the TOC base pointer. Similarly,
.LC1@toc@l is a relocation requesting the lower 16 bits. Note that if
the linker determines that ei's TOC entry is within a 16-bit offset of
the TOC base pointer, it will replace the "addis" with a "nop", and
replace the "ld" with the identical "ld" instruction from the small
code model example.
Consider next a load of a function-scope static integer. For small code
model, the compiler generates:
ld 3, .LC1@toc(2)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc test_fn_static.si[TC],test_fn_static.si
.type test_fn_static.si,@object
.local test_fn_static.si
.comm test_fn_static.si,4,4
For medium code model, the compiler generates:
addis 3, 2, test_fn_static.si@toc@ha
addi 3, 3, test_fn_static.si@toc@l
lwz 4, 0(3)
.type test_fn_static.si,@object
.local test_fn_static.si
.comm test_fn_static.si,4,4
Again, the linker may replace the "addis" with a "nop", calculating only
a 16-bit offset when this is sufficient.
Note that it would be more efficient for the compiler to generate:
addis 3, 2, test_fn_static.si@toc@ha
lwz 4, test_fn_static.si@toc@l(3)
The current patch does not perform this optimization yet. This will be
addressed as a peephole optimization in a later patch.
For the moment, the default code model for 64-bit PowerPC will remain the
small code model. We plan to eventually change the default to medium code
model, which matches current upstream GCC behavior. Note that the different
code models are ABI-compatible, so code compiled with different models will
be linked and execute correctly.
I've tested the regression suite and the application/benchmark test suite in
two ways: Once with the patch as submitted here, and once with additional
logic to force medium code model as the default. The tests all compile
cleanly, with one exception. The mandel-2 application test fails due to an
unrelated ABI compatibility with passing complex numbers. It just so happens
that small code model was incredibly lucky, in that temporary values in
floating-point registers held the expected values needed by the external
library routine that was called incorrectly. My current thought is to correct
the ABI problems with _Complex before making medium code model the default,
to avoid introducing this "regression."
Here are a few comments on how the patch works, since the selection code
can be difficult to follow:
The existing logic for small code model defines three pseudo-instructions:
LDtoc for most uses, LDtocJTI for jump table addresses, and LDtocCPT for
constant pool addresses. These are expanded by SelectCodeCommon(). The
pseudo-instruction approach doesn't work for medium code model, because
we need to generate two instructions when we match the same pattern.
Instead, new logic in PPCDAGToDAGISel::Select() intercepts the TOC_ENTRY
node for medium code model, and generates an ADDIStocHA followed by either
a LDtocL or an ADDItocL. These new node types correspond naturally to
the sequences described above.
The addis/ld sequence is generated for the following cases:
* Jump table addresses
* Function addresses
* External global variables
* Tentative definitions of global variables (common linkage)
The addis/addi sequence is generated for the following cases:
* Constant pool entries
* File-scope static global variables
* Function-scope static variables
Expanding to the two-instruction sequences at select time exposes the
instructions to subsequent optimization, particularly scheduling.
The rest of the processing occurs at assembly time, in
PPCAsmPrinter::EmitInstruction. Each of the instructions is converted to
a "real" PowerPC instruction. When a TOC entry needs to be created, this
is done here in the same manner as for the existing LDtoc, LDtocJTI, and
LDtocCPT pseudo-instructions (I factored out a new routine to handle this).
I had originally thought that if a TOC entry was needed for LDtocL or
ADDItocL, it would already have been generated for the previous ADDIStocHA.
However, at higher optimization levels, the ADDIStocHA may appear in a
different block, which may be assembled textually following the block
containing the LDtocL or ADDItocL. So it is necessary to include the
possibility of creating a new TOC entry for those two instructions.
Note that for LDtocL, we generate a new form of LD called LDrs. This
allows specifying the @toc@l relocation for the offset field of the LD
instruction (i.e., the offset is replaced by a SymbolLo relocation).
When the peephole optimization described above is added, we will need
to do similar things for all immediate-form load and store operations.
The seven "mcm-n.ll" test cases are kept separate because otherwise the
intermingling of various TOC entries and so forth makes the tests fragile
and hard to understand.
The above assumes use of an external assembler. For use of the
integrated assembler, new relocations are added and used by
PPCELFObjectWriter. Testing is done with "mcm-obj.ll", which tests for
proper generation of the various relocations for the same sequences
tested with the external assembler.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@168708 91177308-0d34-0410-b5e6-96231b3b80d8
2012-11-27 17:35:46 +00:00
|
|
|
|
2013-01-07 19:29:18 +00:00
|
|
|
if (!GVar || !GVar->hasInitializer() || RealGValue->hasCommonLinkage() ||
|
|
|
|
RealGValue->hasAvailableExternallyLinkage())
|
This patch implements medium code model support for 64-bit PowerPC.
The default for 64-bit PowerPC is small code model, in which TOC entries
must be addressable using a 16-bit offset from the TOC pointer. Additionally,
only TOC entries are addressed via the TOC pointer.
With medium code model, TOC entries and data sections can all be addressed
via the TOC pointer using a 32-bit offset. Cooperation with the linker
allows 16-bit offsets to be used when these are sufficient, reducing the
number of extra instructions that need to be executed. Medium code model
also does not generate explicit TOC entries in ".section toc" for variables
that are wholly internal to the compilation unit.
Consider a load of an external 4-byte integer. With small code model, the
compiler generates:
ld 3, .LC1@toc(2)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc ei[TC],ei
With medium model, it instead generates:
addis 3, 2, .LC1@toc@ha
ld 3, .LC1@toc@l(3)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc ei[TC],ei
Here .LC1@toc@ha is a relocation requesting the upper 16 bits of the
32-bit offset of ei's TOC entry from the TOC base pointer. Similarly,
.LC1@toc@l is a relocation requesting the lower 16 bits. Note that if
the linker determines that ei's TOC entry is within a 16-bit offset of
the TOC base pointer, it will replace the "addis" with a "nop", and
replace the "ld" with the identical "ld" instruction from the small
code model example.
Consider next a load of a function-scope static integer. For small code
model, the compiler generates:
ld 3, .LC1@toc(2)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc test_fn_static.si[TC],test_fn_static.si
.type test_fn_static.si,@object
.local test_fn_static.si
.comm test_fn_static.si,4,4
For medium code model, the compiler generates:
addis 3, 2, test_fn_static.si@toc@ha
addi 3, 3, test_fn_static.si@toc@l
lwz 4, 0(3)
.type test_fn_static.si,@object
.local test_fn_static.si
.comm test_fn_static.si,4,4
Again, the linker may replace the "addis" with a "nop", calculating only
a 16-bit offset when this is sufficient.
Note that it would be more efficient for the compiler to generate:
addis 3, 2, test_fn_static.si@toc@ha
lwz 4, test_fn_static.si@toc@l(3)
The current patch does not perform this optimization yet. This will be
addressed as a peephole optimization in a later patch.
For the moment, the default code model for 64-bit PowerPC will remain the
small code model. We plan to eventually change the default to medium code
model, which matches current upstream GCC behavior. Note that the different
code models are ABI-compatible, so code compiled with different models will
be linked and execute correctly.
I've tested the regression suite and the application/benchmark test suite in
two ways: Once with the patch as submitted here, and once with additional
logic to force medium code model as the default. The tests all compile
cleanly, with one exception. The mandel-2 application test fails due to an
unrelated ABI compatibility with passing complex numbers. It just so happens
that small code model was incredibly lucky, in that temporary values in
floating-point registers held the expected values needed by the external
library routine that was called incorrectly. My current thought is to correct
the ABI problems with _Complex before making medium code model the default,
to avoid introducing this "regression."
Here are a few comments on how the patch works, since the selection code
can be difficult to follow:
The existing logic for small code model defines three pseudo-instructions:
LDtoc for most uses, LDtocJTI for jump table addresses, and LDtocCPT for
constant pool addresses. These are expanded by SelectCodeCommon(). The
pseudo-instruction approach doesn't work for medium code model, because
we need to generate two instructions when we match the same pattern.
Instead, new logic in PPCDAGToDAGISel::Select() intercepts the TOC_ENTRY
node for medium code model, and generates an ADDIStocHA followed by either
a LDtocL or an ADDItocL. These new node types correspond naturally to
the sequences described above.
The addis/ld sequence is generated for the following cases:
* Jump table addresses
* Function addresses
* External global variables
* Tentative definitions of global variables (common linkage)
The addis/addi sequence is generated for the following cases:
* Constant pool entries
* File-scope static global variables
* Function-scope static variables
Expanding to the two-instruction sequences at select time exposes the
instructions to subsequent optimization, particularly scheduling.
The rest of the processing occurs at assembly time, in
PPCAsmPrinter::EmitInstruction. Each of the instructions is converted to
a "real" PowerPC instruction. When a TOC entry needs to be created, this
is done here in the same manner as for the existing LDtoc, LDtocJTI, and
LDtocCPT pseudo-instructions (I factored out a new routine to handle this).
I had originally thought that if a TOC entry was needed for LDtocL or
ADDItocL, it would already have been generated for the previous ADDIStocHA.
However, at higher optimization levels, the ADDIStocHA may appear in a
different block, which may be assembled textually following the block
containing the LDtocL or ADDItocL. So it is necessary to include the
possibility of creating a new TOC entry for those two instructions.
Note that for LDtocL, we generate a new form of LD called LDrs. This
allows specifying the @toc@l relocation for the offset field of the LD
instruction (i.e., the offset is replaced by a SymbolLo relocation).
When the peephole optimization described above is added, we will need
to do similar things for all immediate-form load and store operations.
The seven "mcm-n.ll" test cases are kept separate because otherwise the
intermingling of various TOC entries and so forth makes the tests fragile
and hard to understand.
The above assumes use of an external assembler. For use of the
integrated assembler, new relocations are added and used by
PPCELFObjectWriter. Testing is done with "mcm-obj.ll", which tests for
proper generation of the various relocations for the same sequences
tested with the external assembler.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@168708 91177308-0d34-0410-b5e6-96231b3b80d8
2012-11-27 17:35:46 +00:00
|
|
|
MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
|
|
|
|
}
|
|
|
|
|
|
|
|
const MCExpr *Exp =
|
2013-06-21 14:42:20 +00:00
|
|
|
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO,
|
This patch implements medium code model support for 64-bit PowerPC.
The default for 64-bit PowerPC is small code model, in which TOC entries
must be addressable using a 16-bit offset from the TOC pointer. Additionally,
only TOC entries are addressed via the TOC pointer.
With medium code model, TOC entries and data sections can all be addressed
via the TOC pointer using a 32-bit offset. Cooperation with the linker
allows 16-bit offsets to be used when these are sufficient, reducing the
number of extra instructions that need to be executed. Medium code model
also does not generate explicit TOC entries in ".section toc" for variables
that are wholly internal to the compilation unit.
Consider a load of an external 4-byte integer. With small code model, the
compiler generates:
ld 3, .LC1@toc(2)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc ei[TC],ei
With medium model, it instead generates:
addis 3, 2, .LC1@toc@ha
ld 3, .LC1@toc@l(3)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc ei[TC],ei
Here .LC1@toc@ha is a relocation requesting the upper 16 bits of the
32-bit offset of ei's TOC entry from the TOC base pointer. Similarly,
.LC1@toc@l is a relocation requesting the lower 16 bits. Note that if
the linker determines that ei's TOC entry is within a 16-bit offset of
the TOC base pointer, it will replace the "addis" with a "nop", and
replace the "ld" with the identical "ld" instruction from the small
code model example.
Consider next a load of a function-scope static integer. For small code
model, the compiler generates:
ld 3, .LC1@toc(2)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc test_fn_static.si[TC],test_fn_static.si
.type test_fn_static.si,@object
.local test_fn_static.si
.comm test_fn_static.si,4,4
For medium code model, the compiler generates:
addis 3, 2, test_fn_static.si@toc@ha
addi 3, 3, test_fn_static.si@toc@l
lwz 4, 0(3)
.type test_fn_static.si,@object
.local test_fn_static.si
.comm test_fn_static.si,4,4
Again, the linker may replace the "addis" with a "nop", calculating only
a 16-bit offset when this is sufficient.
Note that it would be more efficient for the compiler to generate:
addis 3, 2, test_fn_static.si@toc@ha
lwz 4, test_fn_static.si@toc@l(3)
The current patch does not perform this optimization yet. This will be
addressed as a peephole optimization in a later patch.
For the moment, the default code model for 64-bit PowerPC will remain the
small code model. We plan to eventually change the default to medium code
model, which matches current upstream GCC behavior. Note that the different
code models are ABI-compatible, so code compiled with different models will
be linked and execute correctly.
I've tested the regression suite and the application/benchmark test suite in
two ways: Once with the patch as submitted here, and once with additional
logic to force medium code model as the default. The tests all compile
cleanly, with one exception. The mandel-2 application test fails due to an
unrelated ABI compatibility with passing complex numbers. It just so happens
that small code model was incredibly lucky, in that temporary values in
floating-point registers held the expected values needed by the external
library routine that was called incorrectly. My current thought is to correct
the ABI problems with _Complex before making medium code model the default,
to avoid introducing this "regression."
Here are a few comments on how the patch works, since the selection code
can be difficult to follow:
The existing logic for small code model defines three pseudo-instructions:
LDtoc for most uses, LDtocJTI for jump table addresses, and LDtocCPT for
constant pool addresses. These are expanded by SelectCodeCommon(). The
pseudo-instruction approach doesn't work for medium code model, because
we need to generate two instructions when we match the same pattern.
Instead, new logic in PPCDAGToDAGISel::Select() intercepts the TOC_ENTRY
node for medium code model, and generates an ADDIStocHA followed by either
a LDtocL or an ADDItocL. These new node types correspond naturally to
the sequences described above.
The addis/ld sequence is generated for the following cases:
* Jump table addresses
* Function addresses
* External global variables
* Tentative definitions of global variables (common linkage)
The addis/addi sequence is generated for the following cases:
* Constant pool entries
* File-scope static global variables
* Function-scope static variables
Expanding to the two-instruction sequences at select time exposes the
instructions to subsequent optimization, particularly scheduling.
The rest of the processing occurs at assembly time, in
PPCAsmPrinter::EmitInstruction. Each of the instructions is converted to
a "real" PowerPC instruction. When a TOC entry needs to be created, this
is done here in the same manner as for the existing LDtoc, LDtocJTI, and
LDtocCPT pseudo-instructions (I factored out a new routine to handle this).
I had originally thought that if a TOC entry was needed for LDtocL or
ADDItocL, it would already have been generated for the previous ADDIStocHA.
However, at higher optimization levels, the ADDIStocHA may appear in a
different block, which may be assembled textually following the block
containing the LDtocL or ADDItocL. So it is necessary to include the
possibility of creating a new TOC entry for those two instructions.
Note that for LDtocL, we generate a new form of LD called LDrs. This
allows specifying the @toc@l relocation for the offset field of the LD
instruction (i.e., the offset is replaced by a SymbolLo relocation).
When the peephole optimization described above is added, we will need
to do similar things for all immediate-form load and store operations.
The seven "mcm-n.ll" test cases are kept separate because otherwise the
intermingling of various TOC entries and so forth makes the tests fragile
and hard to understand.
The above assumes use of an external assembler. For use of the
integrated assembler, new relocations are added and used by
PPCELFObjectWriter. Testing is done with "mcm-obj.ll", which tests for
proper generation of the various relocations for the same sequences
tested with the external assembler.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@168708 91177308-0d34-0410-b5e6-96231b3b80d8
2012-11-27 17:35:46 +00:00
|
|
|
OutContext);
|
|
|
|
TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
|
|
|
|
OutStreamer.EmitInstruction(TmpInst);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
case PPC::ADDItocL: {
|
|
|
|
// Transform %Xd = ADDItocL %Xs, <ga:@sym>
|
2013-06-20 16:58:14 +00:00
|
|
|
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
|
This patch implements medium code model support for 64-bit PowerPC.
The default for 64-bit PowerPC is small code model, in which TOC entries
must be addressable using a 16-bit offset from the TOC pointer. Additionally,
only TOC entries are addressed via the TOC pointer.
With medium code model, TOC entries and data sections can all be addressed
via the TOC pointer using a 32-bit offset. Cooperation with the linker
allows 16-bit offsets to be used when these are sufficient, reducing the
number of extra instructions that need to be executed. Medium code model
also does not generate explicit TOC entries in ".section toc" for variables
that are wholly internal to the compilation unit.
Consider a load of an external 4-byte integer. With small code model, the
compiler generates:
ld 3, .LC1@toc(2)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc ei[TC],ei
With medium model, it instead generates:
addis 3, 2, .LC1@toc@ha
ld 3, .LC1@toc@l(3)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc ei[TC],ei
Here .LC1@toc@ha is a relocation requesting the upper 16 bits of the
32-bit offset of ei's TOC entry from the TOC base pointer. Similarly,
.LC1@toc@l is a relocation requesting the lower 16 bits. Note that if
the linker determines that ei's TOC entry is within a 16-bit offset of
the TOC base pointer, it will replace the "addis" with a "nop", and
replace the "ld" with the identical "ld" instruction from the small
code model example.
Consider next a load of a function-scope static integer. For small code
model, the compiler generates:
ld 3, .LC1@toc(2)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc test_fn_static.si[TC],test_fn_static.si
.type test_fn_static.si,@object
.local test_fn_static.si
.comm test_fn_static.si,4,4
For medium code model, the compiler generates:
addis 3, 2, test_fn_static.si@toc@ha
addi 3, 3, test_fn_static.si@toc@l
lwz 4, 0(3)
.type test_fn_static.si,@object
.local test_fn_static.si
.comm test_fn_static.si,4,4
Again, the linker may replace the "addis" with a "nop", calculating only
a 16-bit offset when this is sufficient.
Note that it would be more efficient for the compiler to generate:
addis 3, 2, test_fn_static.si@toc@ha
lwz 4, test_fn_static.si@toc@l(3)
The current patch does not perform this optimization yet. This will be
addressed as a peephole optimization in a later patch.
For the moment, the default code model for 64-bit PowerPC will remain the
small code model. We plan to eventually change the default to medium code
model, which matches current upstream GCC behavior. Note that the different
code models are ABI-compatible, so code compiled with different models will
be linked and execute correctly.
I've tested the regression suite and the application/benchmark test suite in
two ways: Once with the patch as submitted here, and once with additional
logic to force medium code model as the default. The tests all compile
cleanly, with one exception. The mandel-2 application test fails due to an
unrelated ABI compatibility with passing complex numbers. It just so happens
that small code model was incredibly lucky, in that temporary values in
floating-point registers held the expected values needed by the external
library routine that was called incorrectly. My current thought is to correct
the ABI problems with _Complex before making medium code model the default,
to avoid introducing this "regression."
Here are a few comments on how the patch works, since the selection code
can be difficult to follow:
The existing logic for small code model defines three pseudo-instructions:
LDtoc for most uses, LDtocJTI for jump table addresses, and LDtocCPT for
constant pool addresses. These are expanded by SelectCodeCommon(). The
pseudo-instruction approach doesn't work for medium code model, because
we need to generate two instructions when we match the same pattern.
Instead, new logic in PPCDAGToDAGISel::Select() intercepts the TOC_ENTRY
node for medium code model, and generates an ADDIStocHA followed by either
a LDtocL or an ADDItocL. These new node types correspond naturally to
the sequences described above.
The addis/ld sequence is generated for the following cases:
* Jump table addresses
* Function addresses
* External global variables
* Tentative definitions of global variables (common linkage)
The addis/addi sequence is generated for the following cases:
* Constant pool entries
* File-scope static global variables
* Function-scope static variables
Expanding to the two-instruction sequences at select time exposes the
instructions to subsequent optimization, particularly scheduling.
The rest of the processing occurs at assembly time, in
PPCAsmPrinter::EmitInstruction. Each of the instructions is converted to
a "real" PowerPC instruction. When a TOC entry needs to be created, this
is done here in the same manner as for the existing LDtoc, LDtocJTI, and
LDtocCPT pseudo-instructions (I factored out a new routine to handle this).
I had originally thought that if a TOC entry was needed for LDtocL or
ADDItocL, it would already have been generated for the previous ADDIStocHA.
However, at higher optimization levels, the ADDIStocHA may appear in a
different block, which may be assembled textually following the block
containing the LDtocL or ADDItocL. So it is necessary to include the
possibility of creating a new TOC entry for those two instructions.
Note that for LDtocL, we generate a new form of LD called LDrs. This
allows specifying the @toc@l relocation for the offset field of the LD
instruction (i.e., the offset is replaced by a SymbolLo relocation).
When the peephole optimization described above is added, we will need
to do similar things for all immediate-form load and store operations.
The seven "mcm-n.ll" test cases are kept separate because otherwise the
intermingling of various TOC entries and so forth makes the tests fragile
and hard to understand.
The above assumes use of an external assembler. For use of the
integrated assembler, new relocations are added and used by
PPCELFObjectWriter. Testing is done with "mcm-obj.ll", which tests for
proper generation of the various relocations for the same sequences
tested with the external assembler.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@168708 91177308-0d34-0410-b5e6-96231b3b80d8
2012-11-27 17:35:46 +00:00
|
|
|
|
2013-03-26 10:55:20 +00:00
|
|
|
// Change the opcode to ADDI8. If the global address is external, then
|
This patch implements medium code model support for 64-bit PowerPC.
The default for 64-bit PowerPC is small code model, in which TOC entries
must be addressable using a 16-bit offset from the TOC pointer. Additionally,
only TOC entries are addressed via the TOC pointer.
With medium code model, TOC entries and data sections can all be addressed
via the TOC pointer using a 32-bit offset. Cooperation with the linker
allows 16-bit offsets to be used when these are sufficient, reducing the
number of extra instructions that need to be executed. Medium code model
also does not generate explicit TOC entries in ".section toc" for variables
that are wholly internal to the compilation unit.
Consider a load of an external 4-byte integer. With small code model, the
compiler generates:
ld 3, .LC1@toc(2)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc ei[TC],ei
With medium model, it instead generates:
addis 3, 2, .LC1@toc@ha
ld 3, .LC1@toc@l(3)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc ei[TC],ei
Here .LC1@toc@ha is a relocation requesting the upper 16 bits of the
32-bit offset of ei's TOC entry from the TOC base pointer. Similarly,
.LC1@toc@l is a relocation requesting the lower 16 bits. Note that if
the linker determines that ei's TOC entry is within a 16-bit offset of
the TOC base pointer, it will replace the "addis" with a "nop", and
replace the "ld" with the identical "ld" instruction from the small
code model example.
Consider next a load of a function-scope static integer. For small code
model, the compiler generates:
ld 3, .LC1@toc(2)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc test_fn_static.si[TC],test_fn_static.si
.type test_fn_static.si,@object
.local test_fn_static.si
.comm test_fn_static.si,4,4
For medium code model, the compiler generates:
addis 3, 2, test_fn_static.si@toc@ha
addi 3, 3, test_fn_static.si@toc@l
lwz 4, 0(3)
.type test_fn_static.si,@object
.local test_fn_static.si
.comm test_fn_static.si,4,4
Again, the linker may replace the "addis" with a "nop", calculating only
a 16-bit offset when this is sufficient.
Note that it would be more efficient for the compiler to generate:
addis 3, 2, test_fn_static.si@toc@ha
lwz 4, test_fn_static.si@toc@l(3)
The current patch does not perform this optimization yet. This will be
addressed as a peephole optimization in a later patch.
For the moment, the default code model for 64-bit PowerPC will remain the
small code model. We plan to eventually change the default to medium code
model, which matches current upstream GCC behavior. Note that the different
code models are ABI-compatible, so code compiled with different models will
be linked and execute correctly.
I've tested the regression suite and the application/benchmark test suite in
two ways: Once with the patch as submitted here, and once with additional
logic to force medium code model as the default. The tests all compile
cleanly, with one exception. The mandel-2 application test fails due to an
unrelated ABI compatibility with passing complex numbers. It just so happens
that small code model was incredibly lucky, in that temporary values in
floating-point registers held the expected values needed by the external
library routine that was called incorrectly. My current thought is to correct
the ABI problems with _Complex before making medium code model the default,
to avoid introducing this "regression."
Here are a few comments on how the patch works, since the selection code
can be difficult to follow:
The existing logic for small code model defines three pseudo-instructions:
LDtoc for most uses, LDtocJTI for jump table addresses, and LDtocCPT for
constant pool addresses. These are expanded by SelectCodeCommon(). The
pseudo-instruction approach doesn't work for medium code model, because
we need to generate two instructions when we match the same pattern.
Instead, new logic in PPCDAGToDAGISel::Select() intercepts the TOC_ENTRY
node for medium code model, and generates an ADDIStocHA followed by either
a LDtocL or an ADDItocL. These new node types correspond naturally to
the sequences described above.
The addis/ld sequence is generated for the following cases:
* Jump table addresses
* Function addresses
* External global variables
* Tentative definitions of global variables (common linkage)
The addis/addi sequence is generated for the following cases:
* Constant pool entries
* File-scope static global variables
* Function-scope static variables
Expanding to the two-instruction sequences at select time exposes the
instructions to subsequent optimization, particularly scheduling.
The rest of the processing occurs at assembly time, in
PPCAsmPrinter::EmitInstruction. Each of the instructions is converted to
a "real" PowerPC instruction. When a TOC entry needs to be created, this
is done here in the same manner as for the existing LDtoc, LDtocJTI, and
LDtocCPT pseudo-instructions (I factored out a new routine to handle this).
I had originally thought that if a TOC entry was needed for LDtocL or
ADDItocL, it would already have been generated for the previous ADDIStocHA.
However, at higher optimization levels, the ADDIStocHA may appear in a
different block, which may be assembled textually following the block
containing the LDtocL or ADDItocL. So it is necessary to include the
possibility of creating a new TOC entry for those two instructions.
Note that for LDtocL, we generate a new form of LD called LDrs. This
allows specifying the @toc@l relocation for the offset field of the LD
instruction (i.e., the offset is replaced by a SymbolLo relocation).
When the peephole optimization described above is added, we will need
to do similar things for all immediate-form load and store operations.
The seven "mcm-n.ll" test cases are kept separate because otherwise the
intermingling of various TOC entries and so forth makes the tests fragile
and hard to understand.
The above assumes use of an external assembler. For use of the
integrated assembler, new relocations are added and used by
PPCELFObjectWriter. Testing is done with "mcm-obj.ll", which tests for
proper generation of the various relocations for the same sequences
tested with the external assembler.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@168708 91177308-0d34-0410-b5e6-96231b3b80d8
2012-11-27 17:35:46 +00:00
|
|
|
// generate a TOC entry and reference that. Otherwise reference the
|
|
|
|
// symbol directly.
|
2013-03-26 10:55:20 +00:00
|
|
|
TmpInst.setOpcode(PPC::ADDI8);
|
This patch implements medium code model support for 64-bit PowerPC.
The default for 64-bit PowerPC is small code model, in which TOC entries
must be addressable using a 16-bit offset from the TOC pointer. Additionally,
only TOC entries are addressed via the TOC pointer.
With medium code model, TOC entries and data sections can all be addressed
via the TOC pointer using a 32-bit offset. Cooperation with the linker
allows 16-bit offsets to be used when these are sufficient, reducing the
number of extra instructions that need to be executed. Medium code model
also does not generate explicit TOC entries in ".section toc" for variables
that are wholly internal to the compilation unit.
Consider a load of an external 4-byte integer. With small code model, the
compiler generates:
ld 3, .LC1@toc(2)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc ei[TC],ei
With medium model, it instead generates:
addis 3, 2, .LC1@toc@ha
ld 3, .LC1@toc@l(3)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc ei[TC],ei
Here .LC1@toc@ha is a relocation requesting the upper 16 bits of the
32-bit offset of ei's TOC entry from the TOC base pointer. Similarly,
.LC1@toc@l is a relocation requesting the lower 16 bits. Note that if
the linker determines that ei's TOC entry is within a 16-bit offset of
the TOC base pointer, it will replace the "addis" with a "nop", and
replace the "ld" with the identical "ld" instruction from the small
code model example.
Consider next a load of a function-scope static integer. For small code
model, the compiler generates:
ld 3, .LC1@toc(2)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc test_fn_static.si[TC],test_fn_static.si
.type test_fn_static.si,@object
.local test_fn_static.si
.comm test_fn_static.si,4,4
For medium code model, the compiler generates:
addis 3, 2, test_fn_static.si@toc@ha
addi 3, 3, test_fn_static.si@toc@l
lwz 4, 0(3)
.type test_fn_static.si,@object
.local test_fn_static.si
.comm test_fn_static.si,4,4
Again, the linker may replace the "addis" with a "nop", calculating only
a 16-bit offset when this is sufficient.
Note that it would be more efficient for the compiler to generate:
addis 3, 2, test_fn_static.si@toc@ha
lwz 4, test_fn_static.si@toc@l(3)
The current patch does not perform this optimization yet. This will be
addressed as a peephole optimization in a later patch.
For the moment, the default code model for 64-bit PowerPC will remain the
small code model. We plan to eventually change the default to medium code
model, which matches current upstream GCC behavior. Note that the different
code models are ABI-compatible, so code compiled with different models will
be linked and execute correctly.
I've tested the regression suite and the application/benchmark test suite in
two ways: Once with the patch as submitted here, and once with additional
logic to force medium code model as the default. The tests all compile
cleanly, with one exception. The mandel-2 application test fails due to an
unrelated ABI compatibility with passing complex numbers. It just so happens
that small code model was incredibly lucky, in that temporary values in
floating-point registers held the expected values needed by the external
library routine that was called incorrectly. My current thought is to correct
the ABI problems with _Complex before making medium code model the default,
to avoid introducing this "regression."
Here are a few comments on how the patch works, since the selection code
can be difficult to follow:
The existing logic for small code model defines three pseudo-instructions:
LDtoc for most uses, LDtocJTI for jump table addresses, and LDtocCPT for
constant pool addresses. These are expanded by SelectCodeCommon(). The
pseudo-instruction approach doesn't work for medium code model, because
we need to generate two instructions when we match the same pattern.
Instead, new logic in PPCDAGToDAGISel::Select() intercepts the TOC_ENTRY
node for medium code model, and generates an ADDIStocHA followed by either
a LDtocL or an ADDItocL. These new node types correspond naturally to
the sequences described above.
The addis/ld sequence is generated for the following cases:
* Jump table addresses
* Function addresses
* External global variables
* Tentative definitions of global variables (common linkage)
The addis/addi sequence is generated for the following cases:
* Constant pool entries
* File-scope static global variables
* Function-scope static variables
Expanding to the two-instruction sequences at select time exposes the
instructions to subsequent optimization, particularly scheduling.
The rest of the processing occurs at assembly time, in
PPCAsmPrinter::EmitInstruction. Each of the instructions is converted to
a "real" PowerPC instruction. When a TOC entry needs to be created, this
is done here in the same manner as for the existing LDtoc, LDtocJTI, and
LDtocCPT pseudo-instructions (I factored out a new routine to handle this).
I had originally thought that if a TOC entry was needed for LDtocL or
ADDItocL, it would already have been generated for the previous ADDIStocHA.
However, at higher optimization levels, the ADDIStocHA may appear in a
different block, which may be assembled textually following the block
containing the LDtocL or ADDItocL. So it is necessary to include the
possibility of creating a new TOC entry for those two instructions.
Note that for LDtocL, we generate a new form of LD called LDrs. This
allows specifying the @toc@l relocation for the offset field of the LD
instruction (i.e., the offset is replaced by a SymbolLo relocation).
When the peephole optimization described above is added, we will need
to do similar things for all immediate-form load and store operations.
The seven "mcm-n.ll" test cases are kept separate because otherwise the
intermingling of various TOC entries and so forth makes the tests fragile
and hard to understand.
The above assumes use of an external assembler. For use of the
integrated assembler, new relocations are added and used by
PPCELFObjectWriter. Testing is done with "mcm-obj.ll", which tests for
proper generation of the various relocations for the same sequences
tested with the external assembler.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@168708 91177308-0d34-0410-b5e6-96231b3b80d8
2012-11-27 17:35:46 +00:00
|
|
|
const MachineOperand &MO = MI->getOperand(2);
|
|
|
|
assert((MO.isGlobal() || MO.isCPI()) && "Invalid operand for ADDItocL");
|
|
|
|
MCSymbol *MOSymbol = 0;
|
|
|
|
bool IsExternal = false;
|
|
|
|
bool IsFunction = false;
|
|
|
|
|
|
|
|
if (MO.isGlobal()) {
|
|
|
|
const GlobalValue *GValue = MO.getGlobal();
|
2013-01-07 19:29:18 +00:00
|
|
|
const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
|
|
|
|
const GlobalValue *RealGValue = GAlias ?
|
|
|
|
GAlias->resolveAliasedGlobal(false) : GValue;
|
|
|
|
MOSymbol = Mang->getSymbol(RealGValue);
|
|
|
|
const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
|
This patch implements medium code model support for 64-bit PowerPC.
The default for 64-bit PowerPC is small code model, in which TOC entries
must be addressable using a 16-bit offset from the TOC pointer. Additionally,
only TOC entries are addressed via the TOC pointer.
With medium code model, TOC entries and data sections can all be addressed
via the TOC pointer using a 32-bit offset. Cooperation with the linker
allows 16-bit offsets to be used when these are sufficient, reducing the
number of extra instructions that need to be executed. Medium code model
also does not generate explicit TOC entries in ".section toc" for variables
that are wholly internal to the compilation unit.
Consider a load of an external 4-byte integer. With small code model, the
compiler generates:
ld 3, .LC1@toc(2)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc ei[TC],ei
With medium model, it instead generates:
addis 3, 2, .LC1@toc@ha
ld 3, .LC1@toc@l(3)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc ei[TC],ei
Here .LC1@toc@ha is a relocation requesting the upper 16 bits of the
32-bit offset of ei's TOC entry from the TOC base pointer. Similarly,
.LC1@toc@l is a relocation requesting the lower 16 bits. Note that if
the linker determines that ei's TOC entry is within a 16-bit offset of
the TOC base pointer, it will replace the "addis" with a "nop", and
replace the "ld" with the identical "ld" instruction from the small
code model example.
Consider next a load of a function-scope static integer. For small code
model, the compiler generates:
ld 3, .LC1@toc(2)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc test_fn_static.si[TC],test_fn_static.si
.type test_fn_static.si,@object
.local test_fn_static.si
.comm test_fn_static.si,4,4
For medium code model, the compiler generates:
addis 3, 2, test_fn_static.si@toc@ha
addi 3, 3, test_fn_static.si@toc@l
lwz 4, 0(3)
.type test_fn_static.si,@object
.local test_fn_static.si
.comm test_fn_static.si,4,4
Again, the linker may replace the "addis" with a "nop", calculating only
a 16-bit offset when this is sufficient.
Note that it would be more efficient for the compiler to generate:
addis 3, 2, test_fn_static.si@toc@ha
lwz 4, test_fn_static.si@toc@l(3)
The current patch does not perform this optimization yet. This will be
addressed as a peephole optimization in a later patch.
For the moment, the default code model for 64-bit PowerPC will remain the
small code model. We plan to eventually change the default to medium code
model, which matches current upstream GCC behavior. Note that the different
code models are ABI-compatible, so code compiled with different models will
be linked and execute correctly.
I've tested the regression suite and the application/benchmark test suite in
two ways: Once with the patch as submitted here, and once with additional
logic to force medium code model as the default. The tests all compile
cleanly, with one exception. The mandel-2 application test fails due to an
unrelated ABI compatibility with passing complex numbers. It just so happens
that small code model was incredibly lucky, in that temporary values in
floating-point registers held the expected values needed by the external
library routine that was called incorrectly. My current thought is to correct
the ABI problems with _Complex before making medium code model the default,
to avoid introducing this "regression."
Here are a few comments on how the patch works, since the selection code
can be difficult to follow:
The existing logic for small code model defines three pseudo-instructions:
LDtoc for most uses, LDtocJTI for jump table addresses, and LDtocCPT for
constant pool addresses. These are expanded by SelectCodeCommon(). The
pseudo-instruction approach doesn't work for medium code model, because
we need to generate two instructions when we match the same pattern.
Instead, new logic in PPCDAGToDAGISel::Select() intercepts the TOC_ENTRY
node for medium code model, and generates an ADDIStocHA followed by either
a LDtocL or an ADDItocL. These new node types correspond naturally to
the sequences described above.
The addis/ld sequence is generated for the following cases:
* Jump table addresses
* Function addresses
* External global variables
* Tentative definitions of global variables (common linkage)
The addis/addi sequence is generated for the following cases:
* Constant pool entries
* File-scope static global variables
* Function-scope static variables
Expanding to the two-instruction sequences at select time exposes the
instructions to subsequent optimization, particularly scheduling.
The rest of the processing occurs at assembly time, in
PPCAsmPrinter::EmitInstruction. Each of the instructions is converted to
a "real" PowerPC instruction. When a TOC entry needs to be created, this
is done here in the same manner as for the existing LDtoc, LDtocJTI, and
LDtocCPT pseudo-instructions (I factored out a new routine to handle this).
I had originally thought that if a TOC entry was needed for LDtocL or
ADDItocL, it would already have been generated for the previous ADDIStocHA.
However, at higher optimization levels, the ADDIStocHA may appear in a
different block, which may be assembled textually following the block
containing the LDtocL or ADDItocL. So it is necessary to include the
possibility of creating a new TOC entry for those two instructions.
Note that for LDtocL, we generate a new form of LD called LDrs. This
allows specifying the @toc@l relocation for the offset field of the LD
instruction (i.e., the offset is replaced by a SymbolLo relocation).
When the peephole optimization described above is added, we will need
to do similar things for all immediate-form load and store operations.
The seven "mcm-n.ll" test cases are kept separate because otherwise the
intermingling of various TOC entries and so forth makes the tests fragile
and hard to understand.
The above assumes use of an external assembler. For use of the
integrated assembler, new relocations are added and used by
PPCELFObjectWriter. Testing is done with "mcm-obj.ll", which tests for
proper generation of the various relocations for the same sequences
tested with the external assembler.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@168708 91177308-0d34-0410-b5e6-96231b3b80d8
2012-11-27 17:35:46 +00:00
|
|
|
IsExternal = GVar && !GVar->hasInitializer();
|
|
|
|
IsFunction = !GVar;
|
|
|
|
} else if (MO.isCPI())
|
|
|
|
MOSymbol = GetCPISymbol(MO.getIndex());
|
|
|
|
|
|
|
|
if (IsFunction || IsExternal)
|
|
|
|
MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
|
|
|
|
|
|
|
|
const MCExpr *Exp =
|
2013-06-21 14:42:20 +00:00
|
|
|
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO,
|
This patch implements medium code model support for 64-bit PowerPC.
The default for 64-bit PowerPC is small code model, in which TOC entries
must be addressable using a 16-bit offset from the TOC pointer. Additionally,
only TOC entries are addressed via the TOC pointer.
With medium code model, TOC entries and data sections can all be addressed
via the TOC pointer using a 32-bit offset. Cooperation with the linker
allows 16-bit offsets to be used when these are sufficient, reducing the
number of extra instructions that need to be executed. Medium code model
also does not generate explicit TOC entries in ".section toc" for variables
that are wholly internal to the compilation unit.
Consider a load of an external 4-byte integer. With small code model, the
compiler generates:
ld 3, .LC1@toc(2)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc ei[TC],ei
With medium model, it instead generates:
addis 3, 2, .LC1@toc@ha
ld 3, .LC1@toc@l(3)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc ei[TC],ei
Here .LC1@toc@ha is a relocation requesting the upper 16 bits of the
32-bit offset of ei's TOC entry from the TOC base pointer. Similarly,
.LC1@toc@l is a relocation requesting the lower 16 bits. Note that if
the linker determines that ei's TOC entry is within a 16-bit offset of
the TOC base pointer, it will replace the "addis" with a "nop", and
replace the "ld" with the identical "ld" instruction from the small
code model example.
Consider next a load of a function-scope static integer. For small code
model, the compiler generates:
ld 3, .LC1@toc(2)
lwz 4, 0(3)
.section .toc,"aw",@progbits
.LC1:
.tc test_fn_static.si[TC],test_fn_static.si
.type test_fn_static.si,@object
.local test_fn_static.si
.comm test_fn_static.si,4,4
For medium code model, the compiler generates:
addis 3, 2, test_fn_static.si@toc@ha
addi 3, 3, test_fn_static.si@toc@l
lwz 4, 0(3)
.type test_fn_static.si,@object
.local test_fn_static.si
.comm test_fn_static.si,4,4
Again, the linker may replace the "addis" with a "nop", calculating only
a 16-bit offset when this is sufficient.
Note that it would be more efficient for the compiler to generate:
addis 3, 2, test_fn_static.si@toc@ha
lwz 4, test_fn_static.si@toc@l(3)
The current patch does not perform this optimization yet. This will be
addressed as a peephole optimization in a later patch.
For the moment, the default code model for 64-bit PowerPC will remain the
small code model. We plan to eventually change the default to medium code
model, which matches current upstream GCC behavior. Note that the different
code models are ABI-compatible, so code compiled with different models will
be linked and execute correctly.
I've tested the regression suite and the application/benchmark test suite in
two ways: Once with the patch as submitted here, and once with additional
logic to force medium code model as the default. The tests all compile
cleanly, with one exception. The mandel-2 application test fails due to an
unrelated ABI compatibility with passing complex numbers. It just so happens
that small code model was incredibly lucky, in that temporary values in
floating-point registers held the expected values needed by the external
library routine that was called incorrectly. My current thought is to correct
the ABI problems with _Complex before making medium code model the default,
to avoid introducing this "regression."
Here are a few comments on how the patch works, since the selection code
can be difficult to follow:
The existing logic for small code model defines three pseudo-instructions:
LDtoc for most uses, LDtocJTI for jump table addresses, and LDtocCPT for
constant pool addresses. These are expanded by SelectCodeCommon(). The
pseudo-instruction approach doesn't work for medium code model, because
we need to generate two instructions when we match the same pattern.
Instead, new logic in PPCDAGToDAGISel::Select() intercepts the TOC_ENTRY
node for medium code model, and generates an ADDIStocHA followed by either
a LDtocL or an ADDItocL. These new node types correspond naturally to
the sequences described above.
The addis/ld sequence is generated for the following cases:
* Jump table addresses
* Function addresses
* External global variables
* Tentative definitions of global variables (common linkage)
The addis/addi sequence is generated for the following cases:
* Constant pool entries
* File-scope static global variables
* Function-scope static variables
Expanding to the two-instruction sequences at select time exposes the
instructions to subsequent optimization, particularly scheduling.
The rest of the processing occurs at assembly time, in
PPCAsmPrinter::EmitInstruction. Each of the instructions is converted to
a "real" PowerPC instruction. When a TOC entry needs to be created, this
is done here in the same manner as for the existing LDtoc, LDtocJTI, and
LDtocCPT pseudo-instructions (I factored out a new routine to handle this).
I had originally thought that if a TOC entry was needed for LDtocL or
ADDItocL, it would already have been generated for the previous ADDIStocHA.
However, at higher optimization levels, the ADDIStocHA may appear in a
different block, which may be assembled textually following the block
containing the LDtocL or ADDItocL. So it is necessary to include the
possibility of creating a new TOC entry for those two instructions.
Note that for LDtocL, we generate a new form of LD called LDrs. This
allows specifying the @toc@l relocation for the offset field of the LD
instruction (i.e., the offset is replaced by a SymbolLo relocation).
When the peephole optimization described above is added, we will need
to do similar things for all immediate-form load and store operations.
The seven "mcm-n.ll" test cases are kept separate because otherwise the
intermingling of various TOC entries and so forth makes the tests fragile
and hard to understand.
The above assumes use of an external assembler. For use of the
integrated assembler, new relocations are added and used by
PPCELFObjectWriter. Testing is done with "mcm-obj.ll", which tests for
proper generation of the various relocations for the same sequences
tested with the external assembler.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@168708 91177308-0d34-0410-b5e6-96231b3b80d8
2012-11-27 17:35:46 +00:00
|
|
|
OutContext);
|
|
|
|
TmpInst.getOperand(2) = MCOperand::CreateExpr(Exp);
|
|
|
|
OutStreamer.EmitInstruction(TmpInst);
|
|
|
|
return;
|
|
|
|
}
|
This patch improves the 64-bit PowerPC InitialExec TLS support by providing
for a wider range of GOT entries that can hold thread-relative offsets.
This matches the behavior of GCC, which was not documented in the PPC64 TLS
ABI. The ABI will be updated with the new code sequence.
Former sequence:
ld 9,x@got@tprel(2)
add 9,9,x@tls
New sequence:
addis 9,2,x@got@tprel@ha
ld 9,x@got@tprel@l(9)
add 9,9,x@tls
Note that a linker optimization exists to transform the new sequence into
the shorter sequence when appropriate, by replacing the addis with a nop
and modifying the base register and relocation type of the ld.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170209 91177308-0d34-0410-b5e6-96231b3b80d8
2012-12-14 17:02:38 +00:00
|
|
|
case PPC::ADDISgotTprelHA: {
|
|
|
|
// Transform: %Xd = ADDISgotTprelHA %X2, <ga:@sym>
|
|
|
|
// Into: %Xd = ADDIS8 %X2, sym@got@tlsgd@ha
|
|
|
|
assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
|
|
|
|
const MachineOperand &MO = MI->getOperand(2);
|
|
|
|
const GlobalValue *GValue = MO.getGlobal();
|
|
|
|
MCSymbol *MOSymbol = Mang->getSymbol(GValue);
|
|
|
|
const MCExpr *SymGotTprel =
|
2013-06-21 14:42:20 +00:00
|
|
|
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_HA,
|
This patch improves the 64-bit PowerPC InitialExec TLS support by providing
for a wider range of GOT entries that can hold thread-relative offsets.
This matches the behavior of GCC, which was not documented in the PPC64 TLS
ABI. The ABI will be updated with the new code sequence.
Former sequence:
ld 9,x@got@tprel(2)
add 9,9,x@tls
New sequence:
addis 9,2,x@got@tprel@ha
ld 9,x@got@tprel@l(9)
add 9,9,x@tls
Note that a linker optimization exists to transform the new sequence into
the shorter sequence when appropriate, by replacing the addis with a nop
and modifying the base register and relocation type of the ld.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170209 91177308-0d34-0410-b5e6-96231b3b80d8
2012-12-14 17:02:38 +00:00
|
|
|
OutContext);
|
|
|
|
OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8)
|
|
|
|
.addReg(MI->getOperand(0).getReg())
|
|
|
|
.addReg(PPC::X2)
|
|
|
|
.addExpr(SymGotTprel));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
case PPC::LDgotTprelL: {
|
|
|
|
// Transform %Xd = LDgotTprelL <ga:@sym>, %Xs
|
2013-06-20 16:58:14 +00:00
|
|
|
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
|
2012-12-04 16:18:08 +00:00
|
|
|
|
2013-03-26 10:55:45 +00:00
|
|
|
// Change the opcode to LD.
|
|
|
|
TmpInst.setOpcode(PPC::LD);
|
2012-12-04 16:18:08 +00:00
|
|
|
const MachineOperand &MO = MI->getOperand(1);
|
|
|
|
const GlobalValue *GValue = MO.getGlobal();
|
|
|
|
MCSymbol *MOSymbol = Mang->getSymbol(GValue);
|
|
|
|
const MCExpr *Exp =
|
2013-06-21 14:42:20 +00:00
|
|
|
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_LO,
|
2012-12-04 16:18:08 +00:00
|
|
|
OutContext);
|
|
|
|
TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
|
|
|
|
OutStreamer.EmitInstruction(TmpInst);
|
|
|
|
return;
|
|
|
|
}
|
This patch implements the general dynamic TLS model for 64-bit PowerPC.
Given a thread-local symbol x with global-dynamic access, the generated
code to obtain x's address is:
Instruction Relocation Symbol
addis ra,r2,x@got@tlsgd@ha R_PPC64_GOT_TLSGD16_HA x
addi r3,ra,x@got@tlsgd@l R_PPC64_GOT_TLSGD16_L x
bl __tls_get_addr(x@tlsgd) R_PPC64_TLSGD x
R_PPC64_REL24 __tls_get_addr
nop
<use address in r3>
The implementation borrows from the medium code model work for introducing
special forms of ADDIS and ADDI into the DAG representation. This is made
slightly more complicated by having to introduce a call to the external
function __tls_get_addr. Using the full call machinery is overkill and,
more importantly, makes it difficult to add a special relocation. So I've
introduced another opcode GET_TLS_ADDR to represent the function call, and
surrounded it with register copies to set up the parameter and return value.
Most of the code is pretty straightforward. I ran into one peculiarity
when I introduced a new PPC opcode BL8_NOP_ELF_TLSGD, which is just like
BL8_NOP_ELF except that it takes another parameter to represent the symbol
("x" above) that requires a relocation on the call. Something in the
TblGen machinery causes BL8_NOP_ELF and BL8_NOP_ELF_TLSGD to be treated
identically during the emit phase, so this second operand was never
visited to generate relocations. This is the reason for the slightly
messy workaround in PPCMCCodeEmitter.cpp:getDirectBrEncoding().
Two new tests are included to demonstrate correct external assembly and
correct generation of relocations using the integrated assembler.
Comments welcome!
Thanks,
Bill
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@169910 91177308-0d34-0410-b5e6-96231b3b80d8
2012-12-11 20:30:11 +00:00
|
|
|
case PPC::ADDIStlsgdHA: {
|
|
|
|
// Transform: %Xd = ADDIStlsgdHA %X2, <ga:@sym>
|
|
|
|
// Into: %Xd = ADDIS8 %X2, sym@got@tlsgd@ha
|
|
|
|
assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
|
|
|
|
const MachineOperand &MO = MI->getOperand(2);
|
|
|
|
const GlobalValue *GValue = MO.getGlobal();
|
|
|
|
MCSymbol *MOSymbol = Mang->getSymbol(GValue);
|
|
|
|
const MCExpr *SymGotTlsGD =
|
2013-06-21 14:42:20 +00:00
|
|
|
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD_HA,
|
This patch implements the general dynamic TLS model for 64-bit PowerPC.
Given a thread-local symbol x with global-dynamic access, the generated
code to obtain x's address is:
Instruction Relocation Symbol
addis ra,r2,x@got@tlsgd@ha R_PPC64_GOT_TLSGD16_HA x
addi r3,ra,x@got@tlsgd@l R_PPC64_GOT_TLSGD16_L x
bl __tls_get_addr(x@tlsgd) R_PPC64_TLSGD x
R_PPC64_REL24 __tls_get_addr
nop
<use address in r3>
The implementation borrows from the medium code model work for introducing
special forms of ADDIS and ADDI into the DAG representation. This is made
slightly more complicated by having to introduce a call to the external
function __tls_get_addr. Using the full call machinery is overkill and,
more importantly, makes it difficult to add a special relocation. So I've
introduced another opcode GET_TLS_ADDR to represent the function call, and
surrounded it with register copies to set up the parameter and return value.
Most of the code is pretty straightforward. I ran into one peculiarity
when I introduced a new PPC opcode BL8_NOP_ELF_TLSGD, which is just like
BL8_NOP_ELF except that it takes another parameter to represent the symbol
("x" above) that requires a relocation on the call. Something in the
TblGen machinery causes BL8_NOP_ELF and BL8_NOP_ELF_TLSGD to be treated
identically during the emit phase, so this second operand was never
visited to generate relocations. This is the reason for the slightly
messy workaround in PPCMCCodeEmitter.cpp:getDirectBrEncoding().
Two new tests are included to demonstrate correct external assembly and
correct generation of relocations using the integrated assembler.
Comments welcome!
Thanks,
Bill
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@169910 91177308-0d34-0410-b5e6-96231b3b80d8
2012-12-11 20:30:11 +00:00
|
|
|
OutContext);
|
|
|
|
OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8)
|
|
|
|
.addReg(MI->getOperand(0).getReg())
|
|
|
|
.addReg(PPC::X2)
|
|
|
|
.addExpr(SymGotTlsGD));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
case PPC::ADDItlsgdL: {
|
|
|
|
// Transform: %Xd = ADDItlsgdL %Xs, <ga:@sym>
|
2013-03-26 10:55:20 +00:00
|
|
|
// Into: %Xd = ADDI8 %Xs, sym@got@tlsgd@l
|
This patch implements the general dynamic TLS model for 64-bit PowerPC.
Given a thread-local symbol x with global-dynamic access, the generated
code to obtain x's address is:
Instruction Relocation Symbol
addis ra,r2,x@got@tlsgd@ha R_PPC64_GOT_TLSGD16_HA x
addi r3,ra,x@got@tlsgd@l R_PPC64_GOT_TLSGD16_L x
bl __tls_get_addr(x@tlsgd) R_PPC64_TLSGD x
R_PPC64_REL24 __tls_get_addr
nop
<use address in r3>
The implementation borrows from the medium code model work for introducing
special forms of ADDIS and ADDI into the DAG representation. This is made
slightly more complicated by having to introduce a call to the external
function __tls_get_addr. Using the full call machinery is overkill and,
more importantly, makes it difficult to add a special relocation. So I've
introduced another opcode GET_TLS_ADDR to represent the function call, and
surrounded it with register copies to set up the parameter and return value.
Most of the code is pretty straightforward. I ran into one peculiarity
when I introduced a new PPC opcode BL8_NOP_ELF_TLSGD, which is just like
BL8_NOP_ELF except that it takes another parameter to represent the symbol
("x" above) that requires a relocation on the call. Something in the
TblGen machinery causes BL8_NOP_ELF and BL8_NOP_ELF_TLSGD to be treated
identically during the emit phase, so this second operand was never
visited to generate relocations. This is the reason for the slightly
messy workaround in PPCMCCodeEmitter.cpp:getDirectBrEncoding().
Two new tests are included to demonstrate correct external assembly and
correct generation of relocations using the integrated assembler.
Comments welcome!
Thanks,
Bill
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@169910 91177308-0d34-0410-b5e6-96231b3b80d8
2012-12-11 20:30:11 +00:00
|
|
|
assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
|
|
|
|
const MachineOperand &MO = MI->getOperand(2);
|
|
|
|
const GlobalValue *GValue = MO.getGlobal();
|
|
|
|
MCSymbol *MOSymbol = Mang->getSymbol(GValue);
|
|
|
|
const MCExpr *SymGotTlsGD =
|
2013-06-21 14:42:20 +00:00
|
|
|
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO,
|
This patch implements the general dynamic TLS model for 64-bit PowerPC.
Given a thread-local symbol x with global-dynamic access, the generated
code to obtain x's address is:
Instruction Relocation Symbol
addis ra,r2,x@got@tlsgd@ha R_PPC64_GOT_TLSGD16_HA x
addi r3,ra,x@got@tlsgd@l R_PPC64_GOT_TLSGD16_L x
bl __tls_get_addr(x@tlsgd) R_PPC64_TLSGD x
R_PPC64_REL24 __tls_get_addr
nop
<use address in r3>
The implementation borrows from the medium code model work for introducing
special forms of ADDIS and ADDI into the DAG representation. This is made
slightly more complicated by having to introduce a call to the external
function __tls_get_addr. Using the full call machinery is overkill and,
more importantly, makes it difficult to add a special relocation. So I've
introduced another opcode GET_TLS_ADDR to represent the function call, and
surrounded it with register copies to set up the parameter and return value.
Most of the code is pretty straightforward. I ran into one peculiarity
when I introduced a new PPC opcode BL8_NOP_ELF_TLSGD, which is just like
BL8_NOP_ELF except that it takes another parameter to represent the symbol
("x" above) that requires a relocation on the call. Something in the
TblGen machinery causes BL8_NOP_ELF and BL8_NOP_ELF_TLSGD to be treated
identically during the emit phase, so this second operand was never
visited to generate relocations. This is the reason for the slightly
messy workaround in PPCMCCodeEmitter.cpp:getDirectBrEncoding().
Two new tests are included to demonstrate correct external assembly and
correct generation of relocations using the integrated assembler.
Comments welcome!
Thanks,
Bill
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@169910 91177308-0d34-0410-b5e6-96231b3b80d8
2012-12-11 20:30:11 +00:00
|
|
|
OutContext);
|
2013-03-26 10:55:20 +00:00
|
|
|
OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8)
|
This patch implements the general dynamic TLS model for 64-bit PowerPC.
Given a thread-local symbol x with global-dynamic access, the generated
code to obtain x's address is:
Instruction Relocation Symbol
addis ra,r2,x@got@tlsgd@ha R_PPC64_GOT_TLSGD16_HA x
addi r3,ra,x@got@tlsgd@l R_PPC64_GOT_TLSGD16_L x
bl __tls_get_addr(x@tlsgd) R_PPC64_TLSGD x
R_PPC64_REL24 __tls_get_addr
nop
<use address in r3>
The implementation borrows from the medium code model work for introducing
special forms of ADDIS and ADDI into the DAG representation. This is made
slightly more complicated by having to introduce a call to the external
function __tls_get_addr. Using the full call machinery is overkill and,
more importantly, makes it difficult to add a special relocation. So I've
introduced another opcode GET_TLS_ADDR to represent the function call, and
surrounded it with register copies to set up the parameter and return value.
Most of the code is pretty straightforward. I ran into one peculiarity
when I introduced a new PPC opcode BL8_NOP_ELF_TLSGD, which is just like
BL8_NOP_ELF except that it takes another parameter to represent the symbol
("x" above) that requires a relocation on the call. Something in the
TblGen machinery causes BL8_NOP_ELF and BL8_NOP_ELF_TLSGD to be treated
identically during the emit phase, so this second operand was never
visited to generate relocations. This is the reason for the slightly
messy workaround in PPCMCCodeEmitter.cpp:getDirectBrEncoding().
Two new tests are included to demonstrate correct external assembly and
correct generation of relocations using the integrated assembler.
Comments welcome!
Thanks,
Bill
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@169910 91177308-0d34-0410-b5e6-96231b3b80d8
2012-12-11 20:30:11 +00:00
|
|
|
.addReg(MI->getOperand(0).getReg())
|
|
|
|
.addReg(MI->getOperand(1).getReg())
|
|
|
|
.addExpr(SymGotTlsGD));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
case PPC::GETtlsADDR: {
|
|
|
|
// Transform: %X3 = GETtlsADDR %X3, <ga:@sym>
|
2013-03-22 15:24:13 +00:00
|
|
|
// Into: BL8_NOP_TLSGD __tls_get_addr(sym@tlsgd)
|
This patch implements the general dynamic TLS model for 64-bit PowerPC.
Given a thread-local symbol x with global-dynamic access, the generated
code to obtain x's address is:
Instruction Relocation Symbol
addis ra,r2,x@got@tlsgd@ha R_PPC64_GOT_TLSGD16_HA x
addi r3,ra,x@got@tlsgd@l R_PPC64_GOT_TLSGD16_L x
bl __tls_get_addr(x@tlsgd) R_PPC64_TLSGD x
R_PPC64_REL24 __tls_get_addr
nop
<use address in r3>
The implementation borrows from the medium code model work for introducing
special forms of ADDIS and ADDI into the DAG representation. This is made
slightly more complicated by having to introduce a call to the external
function __tls_get_addr. Using the full call machinery is overkill and,
more importantly, makes it difficult to add a special relocation. So I've
introduced another opcode GET_TLS_ADDR to represent the function call, and
surrounded it with register copies to set up the parameter and return value.
Most of the code is pretty straightforward. I ran into one peculiarity
when I introduced a new PPC opcode BL8_NOP_ELF_TLSGD, which is just like
BL8_NOP_ELF except that it takes another parameter to represent the symbol
("x" above) that requires a relocation on the call. Something in the
TblGen machinery causes BL8_NOP_ELF and BL8_NOP_ELF_TLSGD to be treated
identically during the emit phase, so this second operand was never
visited to generate relocations. This is the reason for the slightly
messy workaround in PPCMCCodeEmitter.cpp:getDirectBrEncoding().
Two new tests are included to demonstrate correct external assembly and
correct generation of relocations using the integrated assembler.
Comments welcome!
Thanks,
Bill
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@169910 91177308-0d34-0410-b5e6-96231b3b80d8
2012-12-11 20:30:11 +00:00
|
|
|
assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
|
|
|
|
|
|
|
|
StringRef Name = "__tls_get_addr";
|
|
|
|
MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name);
|
|
|
|
const MCSymbolRefExpr *TlsRef =
|
|
|
|
MCSymbolRefExpr::Create(TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext);
|
|
|
|
const MachineOperand &MO = MI->getOperand(2);
|
|
|
|
const GlobalValue *GValue = MO.getGlobal();
|
|
|
|
MCSymbol *MOSymbol = Mang->getSymbol(GValue);
|
|
|
|
const MCExpr *SymVar =
|
|
|
|
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSGD,
|
|
|
|
OutContext);
|
2013-03-22 15:24:13 +00:00
|
|
|
OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_TLSGD)
|
This patch implements the general dynamic TLS model for 64-bit PowerPC.
Given a thread-local symbol x with global-dynamic access, the generated
code to obtain x's address is:
Instruction Relocation Symbol
addis ra,r2,x@got@tlsgd@ha R_PPC64_GOT_TLSGD16_HA x
addi r3,ra,x@got@tlsgd@l R_PPC64_GOT_TLSGD16_L x
bl __tls_get_addr(x@tlsgd) R_PPC64_TLSGD x
R_PPC64_REL24 __tls_get_addr
nop
<use address in r3>
The implementation borrows from the medium code model work for introducing
special forms of ADDIS and ADDI into the DAG representation. This is made
slightly more complicated by having to introduce a call to the external
function __tls_get_addr. Using the full call machinery is overkill and,
more importantly, makes it difficult to add a special relocation. So I've
introduced another opcode GET_TLS_ADDR to represent the function call, and
surrounded it with register copies to set up the parameter and return value.
Most of the code is pretty straightforward. I ran into one peculiarity
when I introduced a new PPC opcode BL8_NOP_ELF_TLSGD, which is just like
BL8_NOP_ELF except that it takes another parameter to represent the symbol
("x" above) that requires a relocation on the call. Something in the
TblGen machinery causes BL8_NOP_ELF and BL8_NOP_ELF_TLSGD to be treated
identically during the emit phase, so this second operand was never
visited to generate relocations. This is the reason for the slightly
messy workaround in PPCMCCodeEmitter.cpp:getDirectBrEncoding().
Two new tests are included to demonstrate correct external assembly and
correct generation of relocations using the integrated assembler.
Comments welcome!
Thanks,
Bill
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@169910 91177308-0d34-0410-b5e6-96231b3b80d8
2012-12-11 20:30:11 +00:00
|
|
|
.addExpr(TlsRef)
|
|
|
|
.addExpr(SymVar));
|
|
|
|
return;
|
|
|
|
}
|
2012-12-12 19:29:35 +00:00
|
|
|
case PPC::ADDIStlsldHA: {
|
|
|
|
// Transform: %Xd = ADDIStlsldHA %X2, <ga:@sym>
|
|
|
|
// Into: %Xd = ADDIS8 %X2, sym@got@tlsld@ha
|
|
|
|
assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
|
|
|
|
const MachineOperand &MO = MI->getOperand(2);
|
|
|
|
const GlobalValue *GValue = MO.getGlobal();
|
|
|
|
MCSymbol *MOSymbol = Mang->getSymbol(GValue);
|
|
|
|
const MCExpr *SymGotTlsLD =
|
2013-06-21 14:42:20 +00:00
|
|
|
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD_HA,
|
2012-12-12 19:29:35 +00:00
|
|
|
OutContext);
|
|
|
|
OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8)
|
|
|
|
.addReg(MI->getOperand(0).getReg())
|
|
|
|
.addReg(PPC::X2)
|
|
|
|
.addExpr(SymGotTlsLD));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
case PPC::ADDItlsldL: {
|
|
|
|
// Transform: %Xd = ADDItlsldL %Xs, <ga:@sym>
|
2013-03-26 10:55:20 +00:00
|
|
|
// Into: %Xd = ADDI8 %Xs, sym@got@tlsld@l
|
2012-12-12 19:29:35 +00:00
|
|
|
assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
|
|
|
|
const MachineOperand &MO = MI->getOperand(2);
|
|
|
|
const GlobalValue *GValue = MO.getGlobal();
|
|
|
|
MCSymbol *MOSymbol = Mang->getSymbol(GValue);
|
|
|
|
const MCExpr *SymGotTlsLD =
|
2013-06-21 14:42:20 +00:00
|
|
|
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO,
|
2012-12-12 19:29:35 +00:00
|
|
|
OutContext);
|
2013-03-26 10:55:20 +00:00
|
|
|
OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8)
|
2012-12-12 19:29:35 +00:00
|
|
|
.addReg(MI->getOperand(0).getReg())
|
|
|
|
.addReg(MI->getOperand(1).getReg())
|
|
|
|
.addExpr(SymGotTlsLD));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
case PPC::GETtlsldADDR: {
|
|
|
|
// Transform: %X3 = GETtlsldADDR %X3, <ga:@sym>
|
2013-03-22 15:24:13 +00:00
|
|
|
// Into: BL8_NOP_TLSLD __tls_get_addr(sym@tlsld)
|
2012-12-12 19:29:35 +00:00
|
|
|
assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
|
|
|
|
|
|
|
|
StringRef Name = "__tls_get_addr";
|
|
|
|
MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name);
|
|
|
|
const MCSymbolRefExpr *TlsRef =
|
|
|
|
MCSymbolRefExpr::Create(TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext);
|
|
|
|
const MachineOperand &MO = MI->getOperand(2);
|
|
|
|
const GlobalValue *GValue = MO.getGlobal();
|
|
|
|
MCSymbol *MOSymbol = Mang->getSymbol(GValue);
|
|
|
|
const MCExpr *SymVar =
|
|
|
|
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSLD,
|
|
|
|
OutContext);
|
2013-03-22 15:24:13 +00:00
|
|
|
OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_TLSLD)
|
2012-12-12 19:29:35 +00:00
|
|
|
.addExpr(TlsRef)
|
|
|
|
.addExpr(SymVar));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
case PPC::ADDISdtprelHA: {
|
|
|
|
// Transform: %Xd = ADDISdtprelHA %X3, <ga:@sym>
|
|
|
|
// Into: %Xd = ADDIS8 %X3, sym@dtprel@ha
|
|
|
|
assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
|
|
|
|
const MachineOperand &MO = MI->getOperand(2);
|
|
|
|
const GlobalValue *GValue = MO.getGlobal();
|
|
|
|
MCSymbol *MOSymbol = Mang->getSymbol(GValue);
|
|
|
|
const MCExpr *SymDtprel =
|
2013-06-21 14:42:20 +00:00
|
|
|
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_HA,
|
2012-12-12 19:29:35 +00:00
|
|
|
OutContext);
|
|
|
|
OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8)
|
|
|
|
.addReg(MI->getOperand(0).getReg())
|
|
|
|
.addReg(PPC::X3)
|
|
|
|
.addExpr(SymDtprel));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
case PPC::ADDIdtprelL: {
|
|
|
|
// Transform: %Xd = ADDIdtprelL %Xs, <ga:@sym>
|
2013-03-26 10:55:20 +00:00
|
|
|
// Into: %Xd = ADDI8 %Xs, sym@dtprel@l
|
2012-12-12 19:29:35 +00:00
|
|
|
assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
|
|
|
|
const MachineOperand &MO = MI->getOperand(2);
|
|
|
|
const GlobalValue *GValue = MO.getGlobal();
|
|
|
|
MCSymbol *MOSymbol = Mang->getSymbol(GValue);
|
|
|
|
const MCExpr *SymDtprel =
|
2013-06-21 14:42:20 +00:00
|
|
|
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_LO,
|
2012-12-12 19:29:35 +00:00
|
|
|
OutContext);
|
2013-03-26 10:55:20 +00:00
|
|
|
OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8)
|
2012-12-12 19:29:35 +00:00
|
|
|
.addReg(MI->getOperand(0).getReg())
|
|
|
|
.addReg(MI->getOperand(1).getReg())
|
|
|
|
.addExpr(SymDtprel));
|
|
|
|
return;
|
|
|
|
}
|
2010-11-15 03:39:06 +00:00
|
|
|
case PPC::MFCRpseud:
|
2011-12-07 06:34:06 +00:00
|
|
|
case PPC::MFCR8pseud:
|
2010-11-15 03:39:06 +00:00
|
|
|
// Transform: %R3 = MFCRpseud %CR7
|
|
|
|
// Into: %R3 = MFCR ;; cr7
|
|
|
|
OutStreamer.AddComment(PPCInstPrinter::
|
|
|
|
getRegisterName(MI->getOperand(1).getReg()));
|
2012-11-26 18:05:52 +00:00
|
|
|
OutStreamer.EmitInstruction(MCInstBuilder(Subtarget.isPPC64() ? PPC::MFCR8 : PPC::MFCR)
|
|
|
|
.addReg(MI->getOperand(0).getReg()));
|
2010-01-28 01:28:58 +00:00
|
|
|
return;
|
2011-10-17 04:03:49 +00:00
|
|
|
case PPC::SYNC:
|
|
|
|
// In Book E sync is called msync, handle this special case here...
|
|
|
|
if (Subtarget.isBookE()) {
|
|
|
|
OutStreamer.EmitRawText(StringRef("\tmsync"));
|
|
|
|
return;
|
|
|
|
}
|
Index: test/CodeGen/PowerPC/reloc-align.ll
===================================================================
--- test/CodeGen/PowerPC/reloc-align.ll (revision 0)
+++ test/CodeGen/PowerPC/reloc-align.ll (revision 0)
@@ -0,0 +1,34 @@
+; RUN: llc -mcpu=pwr7 -O1 < %s | FileCheck %s
+
+; This test verifies that the peephole optimization of address accesses
+; does not produce a load or store with a relocation that can't be
+; satisfied for a given instruction encoding. Reduced from a test supplied
+; by Hal Finkel.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.S1 = type { [8 x i8] }
+
+@main.l_1554 = internal global { i8, i8, i8, i8, i8, i8, i8, i8 } { i8 -1, i8 -6, i8 57, i8 62, i8 -48, i8 0, i8 58, i8 80 }, align 1
+
+; Function Attrs: nounwind readonly
+define signext i32 @main() #0 {
+entry:
+ %call = tail call fastcc signext i32 @func_90(%struct.S1* byval bitcast ({ i8, i8, i8, i8, i8, i8, i8, i8 }* @main.l_1554 to %struct.S1*))
+; CHECK-NOT: ld {{[0-9]+}}, main.l_1554@toc@l
+ ret i32 %call
+}
+
+; Function Attrs: nounwind readonly
+define internal fastcc signext i32 @func_90(%struct.S1* byval nocapture %p_91) #0 {
+entry:
+ %0 = bitcast %struct.S1* %p_91 to i64*
+ %bf.load = load i64* %0, align 1
+ %bf.shl = shl i64 %bf.load, 26
+ %bf.ashr = ashr i64 %bf.shl, 54
+ %bf.cast = trunc i64 %bf.ashr to i32
+ ret i32 %bf.cast
+}
+
+attributes #0 = { nounwind readonly "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
Index: lib/Target/PowerPC/PPCAsmPrinter.cpp
===================================================================
--- lib/Target/PowerPC/PPCAsmPrinter.cpp (revision 185327)
+++ lib/Target/PowerPC/PPCAsmPrinter.cpp (working copy)
@@ -679,7 +679,26 @@ void PPCAsmPrinter::EmitInstruction(const MachineI
OutStreamer.EmitRawText(StringRef("\tmsync"));
return;
}
+ break;
+ case PPC::LD:
+ case PPC::STD:
+ case PPC::LWA: {
+ // Verify alignment is legal, so we don't create relocations
+ // that can't be supported.
+ // FIXME: This test is currently disabled for Darwin. The test
+ // suite shows a handful of test cases that fail this check for
+ // Darwin. Those need to be investigated before this sanity test
+ // can be enabled for those subtargets.
+ if (!Subtarget.isDarwin()) {
+ unsigned OpNum = (MI->getOpcode() == PPC::STD) ? 2 : 1;
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ if (MO.isGlobal() && MO.getGlobal()->getAlignment() < 4)
+ llvm_unreachable("Global must be word-aligned for LD, STD, LWA!");
+ }
+ // Now process the instruction normally.
+ break;
}
+ }
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
OutStreamer.EmitInstruction(TmpInst);
Index: lib/Target/PowerPC/PPCISelDAGToDAG.cpp
===================================================================
--- lib/Target/PowerPC/PPCISelDAGToDAG.cpp (revision 185327)
+++ lib/Target/PowerPC/PPCISelDAGToDAG.cpp (working copy)
@@ -1530,6 +1530,14 @@ void PPCDAGToDAGISel::PostprocessISelDAG() {
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
SDLoc dl(GA);
const GlobalValue *GV = GA->getGlobal();
+ // We can't perform this optimization for data whose alignment
+ // is insufficient for the instruction encoding.
+ if (GV->getAlignment() < 4 &&
+ (StorageOpcode == PPC::LD || StorageOpcode == PPC::STD ||
+ StorageOpcode == PPC::LWA)) {
+ DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n");
+ continue;
+ }
ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, 0, Flags);
} else if (ConstantPoolSDNode *CP =
dyn_cast<ConstantPoolSDNode>(ImmOpnd)) {
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185380 91177308-0d34-0410-b5e6-96231b3b80d8
2013-07-01 20:52:27 +00:00
|
|
|
break;
|
|
|
|
case PPC::LD:
|
|
|
|
case PPC::STD:
|
|
|
|
case PPC::LWA: {
|
|
|
|
// Verify alignment is legal, so we don't create relocations
|
|
|
|
// that can't be supported.
|
|
|
|
// FIXME: This test is currently disabled for Darwin. The test
|
|
|
|
// suite shows a handful of test cases that fail this check for
|
|
|
|
// Darwin. Those need to be investigated before this sanity test
|
|
|
|
// can be enabled for those subtargets.
|
|
|
|
if (!Subtarget.isDarwin()) {
|
|
|
|
unsigned OpNum = (MI->getOpcode() == PPC::STD) ? 2 : 1;
|
|
|
|
const MachineOperand &MO = MI->getOperand(OpNum);
|
|
|
|
if (MO.isGlobal() && MO.getGlobal()->getAlignment() < 4)
|
|
|
|
llvm_unreachable("Global must be word-aligned for LD, STD, LWA!");
|
|
|
|
}
|
|
|
|
// Now process the instruction normally.
|
|
|
|
break;
|
|
|
|
}
|
2010-01-28 01:28:58 +00:00
|
|
|
}
|
2005-04-21 23:30:14 +00:00
|
|
|
|
2013-06-20 16:58:14 +00:00
|
|
|
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
|
2010-11-15 03:39:06 +00:00
|
|
|
OutStreamer.EmitInstruction(TmpInst);
|
2004-09-04 05:00:00 +00:00
|
|
|
}
|
|
|
|
|
2010-01-27 07:21:55 +00:00
|
|
|
void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
|
|
|
|
if (!Subtarget.isPPC64()) // linux/ppc32 - Normal entry label.
|
|
|
|
return AsmPrinter::EmitFunctionEntryLabel();
|
|
|
|
|
|
|
|
// Emit an official procedure descriptor.
|
2013-04-17 21:18:16 +00:00
|
|
|
MCSectionSubPair Current = OutStreamer.getCurrentSection();
|
2012-02-27 20:20:47 +00:00
|
|
|
const MCSectionELF *Section = OutStreamer.getContext().getELFSection(".opd",
|
|
|
|
ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC,
|
|
|
|
SectionKind::getReadOnly());
|
|
|
|
OutStreamer.SwitchSection(Section);
|
2010-01-27 07:21:55 +00:00
|
|
|
OutStreamer.EmitLabel(CurrentFnSym);
|
2012-02-27 20:20:47 +00:00
|
|
|
OutStreamer.EmitValueToAlignment(8);
|
|
|
|
MCSymbol *Symbol1 =
|
|
|
|
OutContext.GetOrCreateSymbol(".L." + Twine(CurrentFnSym->getName()));
|
2012-10-25 12:27:42 +00:00
|
|
|
// Generates a R_PPC64_ADDR64 (from FK_DATA_8) relocation for the function
|
|
|
|
// entry point.
|
2012-02-27 20:20:47 +00:00
|
|
|
OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol1, OutContext),
|
2013-01-09 01:57:54 +00:00
|
|
|
8 /*size*/);
|
2012-10-25 12:27:42 +00:00
|
|
|
MCSymbol *Symbol2 = OutContext.GetOrCreateSymbol(StringRef(".TOC."));
|
|
|
|
// Generates a R_PPC64_TOC relocation for TOC base insertion.
|
|
|
|
OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol2,
|
2013-06-20 22:39:42 +00:00
|
|
|
MCSymbolRefExpr::VK_PPC_TOCBASE, OutContext),
|
2013-01-09 01:57:54 +00:00
|
|
|
8/*size*/);
|
2012-09-18 16:55:29 +00:00
|
|
|
// Emit a null environment pointer.
|
2013-01-09 01:57:54 +00:00
|
|
|
OutStreamer.EmitIntValue(0, 8 /* size */);
|
2013-04-17 21:18:16 +00:00
|
|
|
OutStreamer.SwitchSection(Current.first, Current.second);
|
2012-02-22 21:11:47 +00:00
|
|
|
|
|
|
|
MCSymbol *RealFnSym = OutContext.GetOrCreateSymbol(
|
|
|
|
".L." + Twine(CurrentFnSym->getName()));
|
|
|
|
OutStreamer.EmitLabel(RealFnSym);
|
|
|
|
CurrentFnSymForSize = RealFnSym;
|
2010-01-27 07:21:55 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2009-08-15 11:54:46 +00:00
|
|
|
bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
|
2012-10-08 16:38:25 +00:00
|
|
|
const DataLayout *TD = TM.getDataLayout();
|
2009-08-15 11:54:46 +00:00
|
|
|
|
Revert the majority of the next patch in the address space series:
r165941: Resubmit the changes to llvm core to update the functions to
support different pointer sizes on a per address space basis.
Despite this commit log, this change primarily changed stuff outside of
VMCore, and those changes do not carry any tests for correctness (or
even plausibility), and we have consistently found questionable or flat
out incorrect cases in these changes. Most of them are probably correct,
but we need to devise a system that makes it more clear when we have
handled the address space concerns correctly, and ideally each pass that
gets updated would receive an accompanying test case that exercises that
pass specificaly w.r.t. alternate address spaces.
However, from this commit, I have retained the new C API entry points.
Those were an orthogonal change that probably should have been split
apart, but they seem entirely good.
In several places the changes were very obvious cleanups with no actual
multiple address space code added; these I have not reverted when
I spotted them.
In a few other places there were merge conflicts due to a cleaner
solution being implemented later, often not using address spaces at all.
In those cases, I've preserved the new code which isn't address space
dependent.
This is part of my ongoing effort to clean out the partial address space
code which carries high risk and low test coverage, and not likely to be
finished before the 3.2 release looms closer. Duncan and I would both
like to see the above issues addressed before we return to these
changes.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@167222 91177308-0d34-0410-b5e6-96231b3b80d8
2012-11-01 09:14:31 +00:00
|
|
|
bool isPPC64 = TD->getPointerSizeInBits() == 64;
|
2009-08-15 11:54:46 +00:00
|
|
|
|
|
|
|
if (isPPC64 && !TOC.empty()) {
|
2012-02-28 18:15:25 +00:00
|
|
|
const MCSectionELF *Section = OutStreamer.getContext().getELFSection(".toc",
|
|
|
|
ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC,
|
|
|
|
SectionKind::getReadOnly());
|
|
|
|
OutStreamer.SwitchSection(Section);
|
2009-08-15 11:54:46 +00:00
|
|
|
|
2012-11-12 19:13:24 +00:00
|
|
|
for (MapVector<MCSymbol*, MCSymbol*>::iterator I = TOC.begin(),
|
2010-01-16 02:09:06 +00:00
|
|
|
E = TOC.end(); I != E; ++I) {
|
2010-04-04 07:05:53 +00:00
|
|
|
OutStreamer.EmitLabel(I->second);
|
2012-10-15 15:43:14 +00:00
|
|
|
MCSymbol *S = OutContext.GetOrCreateSymbol(I->first->getName());
|
|
|
|
OutStreamer.EmitTCEntry(*S);
|
2009-08-15 11:54:46 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-01-09 17:08:15 +00:00
|
|
|
MachineModuleInfoELF &MMIELF =
|
|
|
|
MMI->getObjFileInfo<MachineModuleInfoELF>();
|
|
|
|
|
|
|
|
MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
|
|
|
|
if (!Stubs.empty()) {
|
|
|
|
OutStreamer.SwitchSection(getObjFileLowering().getDataSection());
|
|
|
|
for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
|
|
|
|
// L_foo$stub:
|
|
|
|
OutStreamer.EmitLabel(Stubs[i].first);
|
|
|
|
// .long _foo
|
|
|
|
OutStreamer.EmitValue(MCSymbolRefExpr::Create(Stubs[i].second.getPointer(),
|
|
|
|
OutContext),
|
|
|
|
isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
|
|
|
|
}
|
|
|
|
|
|
|
|
Stubs.clear();
|
|
|
|
OutStreamer.AddBlankLine();
|
|
|
|
}
|
|
|
|
|
2009-08-15 11:54:46 +00:00
|
|
|
return AsmPrinter::doFinalization(M);
|
|
|
|
}
|
2006-12-21 20:26:09 +00:00
|
|
|
|
2012-08-28 19:06:55 +00:00
|
|
|
/// EmitFunctionBodyEnd - Print the traceback table before the .size
|
|
|
|
/// directive.
|
|
|
|
///
|
|
|
|
void PPCLinuxAsmPrinter::EmitFunctionBodyEnd() {
|
|
|
|
// Only the 64-bit target requires a traceback table. For now,
|
|
|
|
// we only emit the word of zeroes that GDB requires to find
|
2012-08-29 20:22:24 +00:00
|
|
|
// the end of the function, and zeroes for the eight-byte
|
|
|
|
// mandatory fields.
|
|
|
|
// FIXME: We should fill in the eight-byte mandatory fields as described in
|
|
|
|
// the PPC64 ELF ABI (this is a low-priority item because GDB does not
|
|
|
|
// currently make use of these fields).
|
|
|
|
if (Subtarget.isPPC64()) {
|
2012-08-28 19:06:55 +00:00
|
|
|
OutStreamer.EmitIntValue(0, 4/*size*/);
|
2012-08-29 20:22:24 +00:00
|
|
|
OutStreamer.EmitIntValue(0, 8/*size*/);
|
|
|
|
}
|
2012-08-28 19:06:55 +00:00
|
|
|
}
|
|
|
|
|
2009-09-30 22:06:26 +00:00
|
|
|
void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
|
2008-03-25 21:45:14 +00:00
|
|
|
static const char *const CPUDirectives[] = {
|
2008-02-14 23:35:16 +00:00
|
|
|
"",
|
2006-12-12 20:57:08 +00:00
|
|
|
"ppc",
|
2011-10-17 04:03:49 +00:00
|
|
|
"ppc440",
|
2006-12-12 20:57:08 +00:00
|
|
|
"ppc601",
|
|
|
|
"ppc602",
|
|
|
|
"ppc603",
|
|
|
|
"ppc7400",
|
|
|
|
"ppc750",
|
|
|
|
"ppc970",
|
2012-04-01 19:22:40 +00:00
|
|
|
"ppcA2",
|
2012-08-28 16:12:39 +00:00
|
|
|
"ppce500mc",
|
|
|
|
"ppce5500",
|
2013-02-04 00:47:33 +00:00
|
|
|
"power3",
|
|
|
|
"power4",
|
|
|
|
"power5",
|
|
|
|
"power5x",
|
2012-06-11 15:43:08 +00:00
|
|
|
"power6",
|
2013-02-04 00:47:33 +00:00
|
|
|
"power6x",
|
2012-06-11 15:43:08 +00:00
|
|
|
"power7",
|
2006-12-12 20:57:08 +00:00
|
|
|
"ppc64"
|
|
|
|
};
|
|
|
|
|
|
|
|
unsigned Directive = Subtarget.getDarwinDirective();
|
2012-06-11 19:57:01 +00:00
|
|
|
if (Subtarget.hasMFOCRF() && Directive < PPC::DIR_970)
|
2006-12-12 20:57:08 +00:00
|
|
|
Directive = PPC::DIR_970;
|
|
|
|
if (Subtarget.hasAltivec() && Directive < PPC::DIR_7400)
|
|
|
|
Directive = PPC::DIR_7400;
|
2011-12-01 01:43:47 +00:00
|
|
|
if (Subtarget.isPPC64() && Directive < PPC::DIR_64)
|
2006-12-12 20:57:08 +00:00
|
|
|
Directive = PPC::DIR_64;
|
|
|
|
assert(Directive <= PPC::DIR_64 && "Directive out of range.");
|
2010-11-15 08:49:58 +00:00
|
|
|
|
|
|
|
// FIXME: This is a total hack, finish mc'izing the PPC backend.
|
2013-02-04 00:47:38 +00:00
|
|
|
if (OutStreamer.hasRawTextSupport()) {
|
|
|
|
assert(Directive < sizeof(CPUDirectives) / sizeof(*CPUDirectives) &&
|
|
|
|
"CPUDirectives[] might not be up-to-date!");
|
2010-11-15 08:49:58 +00:00
|
|
|
OutStreamer.EmitRawText("\t.machine " + Twine(CPUDirectives[Directive]));
|
2013-02-04 00:47:38 +00:00
|
|
|
}
|
2008-08-08 18:22:59 +00:00
|
|
|
|
2006-11-28 18:21:52 +00:00
|
|
|
// Prime text sections so they are adjacent. This reduces the likelihood a
|
|
|
|
// large data or debug section causes a branch to exceed 16M limit.
|
2010-04-17 16:44:48 +00:00
|
|
|
const TargetLoweringObjectFileMachO &TLOFMacho =
|
|
|
|
static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
|
2009-08-19 05:49:37 +00:00
|
|
|
OutStreamer.SwitchSection(TLOFMacho.getTextCoalSection());
|
2006-11-28 18:21:52 +00:00
|
|
|
if (TM.getRelocationModel() == Reloc::PIC_) {
|
2009-08-19 05:49:37 +00:00
|
|
|
OutStreamer.SwitchSection(
|
2010-04-08 20:40:11 +00:00
|
|
|
OutContext.getMachOSection("__TEXT", "__picsymbolstub1",
|
2009-08-19 05:49:37 +00:00
|
|
|
MCSectionMachO::S_SYMBOL_STUBS |
|
|
|
|
MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
|
|
|
|
32, SectionKind::getText()));
|
2006-11-28 18:21:52 +00:00
|
|
|
} else if (TM.getRelocationModel() == Reloc::DynamicNoPIC) {
|
2009-08-19 05:49:37 +00:00
|
|
|
OutStreamer.SwitchSection(
|
2010-04-08 20:40:11 +00:00
|
|
|
OutContext.getMachOSection("__TEXT","__symbol_stub1",
|
2009-08-19 05:49:37 +00:00
|
|
|
MCSectionMachO::S_SYMBOL_STUBS |
|
|
|
|
MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
|
|
|
|
16, SectionKind::getText()));
|
2006-11-28 18:21:52 +00:00
|
|
|
}
|
2009-08-19 05:49:37 +00:00
|
|
|
OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
|
2005-07-21 01:25:49 +00:00
|
|
|
}
|
|
|
|
|
2010-04-04 07:12:28 +00:00
|
|
|
static MCSymbol *GetLazyPtr(MCSymbol *Sym, MCContext &Ctx) {
|
2010-01-20 21:36:48 +00:00
|
|
|
// Remove $stub suffix, add $lazy_ptr.
|
2012-11-24 13:18:11 +00:00
|
|
|
StringRef NoStub = Sym->getName().substr(0, Sym->getName().size()-5);
|
|
|
|
return Ctx.GetOrCreateSymbol(NoStub + "$lazy_ptr");
|
2010-01-20 21:36:48 +00:00
|
|
|
}
|
|
|
|
|
2010-04-04 07:12:28 +00:00
|
|
|
static MCSymbol *GetAnonSym(MCSymbol *Sym, MCContext &Ctx) {
|
2010-01-20 21:36:48 +00:00
|
|
|
// Add $tmp suffix to $stub, yielding $stub$tmp.
|
2012-11-24 13:18:11 +00:00
|
|
|
return Ctx.GetOrCreateSymbol(Sym->getName() + "$tmp");
|
2010-01-20 21:36:48 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void PPCDarwinAsmPrinter::
|
|
|
|
EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
|
Revert the majority of the next patch in the address space series:
r165941: Resubmit the changes to llvm core to update the functions to
support different pointer sizes on a per address space basis.
Despite this commit log, this change primarily changed stuff outside of
VMCore, and those changes do not carry any tests for correctness (or
even plausibility), and we have consistently found questionable or flat
out incorrect cases in these changes. Most of them are probably correct,
but we need to devise a system that makes it more clear when we have
handled the address space concerns correctly, and ideally each pass that
gets updated would receive an accompanying test case that exercises that
pass specificaly w.r.t. alternate address spaces.
However, from this commit, I have retained the new C API entry points.
Those were an orthogonal change that probably should have been split
apart, but they seem entirely good.
In several places the changes were very obvious cleanups with no actual
multiple address space code added; these I have not reverted when
I spotted them.
In a few other places there were merge conflicts due to a cleaner
solution being implemented later, often not using address spaces at all.
In those cases, I've preserved the new code which isn't address space
dependent.
This is part of my ongoing effort to clean out the partial address space
code which carries high risk and low test coverage, and not likely to be
finished before the 3.2 release looms closer. Duncan and I would both
like to see the above issues addressed before we return to these
changes.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@167222 91177308-0d34-0410-b5e6-96231b3b80d8
2012-11-01 09:14:31 +00:00
|
|
|
bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits() == 64;
|
2010-01-20 21:19:44 +00:00
|
|
|
|
2010-04-17 16:44:48 +00:00
|
|
|
const TargetLoweringObjectFileMachO &TLOFMacho =
|
|
|
|
static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
|
2010-01-20 21:19:44 +00:00
|
|
|
|
|
|
|
// .lazy_symbol_pointer
|
|
|
|
const MCSection *LSPSection = TLOFMacho.getLazySymbolPointerSection();
|
2009-08-10 01:39:42 +00:00
|
|
|
|
2004-07-16 20:29:04 +00:00
|
|
|
// Output stubs for dynamically-linked functions
|
2010-01-20 21:19:44 +00:00
|
|
|
if (TM.getRelocationModel() == Reloc::PIC_) {
|
2009-08-03 22:52:21 +00:00
|
|
|
const MCSection *StubSection =
|
2010-04-08 20:40:11 +00:00
|
|
|
OutContext.getMachOSection("__TEXT", "__picsymbolstub1",
|
|
|
|
MCSectionMachO::S_SYMBOL_STUBS |
|
|
|
|
MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
|
|
|
|
32, SectionKind::getText());
|
2010-01-20 21:36:48 +00:00
|
|
|
for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
|
2009-08-19 05:49:37 +00:00
|
|
|
OutStreamer.SwitchSection(StubSection);
|
2006-06-27 01:02:25 +00:00
|
|
|
EmitAlignment(4);
|
2010-01-20 21:36:48 +00:00
|
|
|
|
2010-04-04 07:05:53 +00:00
|
|
|
MCSymbol *Stub = Stubs[i].first;
|
2010-04-04 07:12:28 +00:00
|
|
|
MCSymbol *RawSym = Stubs[i].second.getPointer();
|
|
|
|
MCSymbol *LazyPtr = GetLazyPtr(Stub, OutContext);
|
|
|
|
MCSymbol *AnonSymbol = GetAnonSym(Stub, OutContext);
|
2010-01-20 21:36:48 +00:00
|
|
|
|
2010-04-04 07:05:53 +00:00
|
|
|
OutStreamer.EmitLabel(Stub);
|
2010-04-04 07:12:28 +00:00
|
|
|
OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
|
2012-11-24 13:18:25 +00:00
|
|
|
|
2013-03-23 20:53:15 +00:00
|
|
|
const MCExpr *Anon = MCSymbolRefExpr::Create(AnonSymbol, OutContext);
|
2013-05-23 22:26:41 +00:00
|
|
|
const MCExpr *LazyPtrExpr = MCSymbolRefExpr::Create(LazyPtr, OutContext);
|
|
|
|
const MCExpr *Sub =
|
|
|
|
MCBinaryExpr::CreateSub(LazyPtrExpr, Anon, OutContext);
|
2013-03-23 20:53:15 +00:00
|
|
|
|
2012-11-24 13:18:25 +00:00
|
|
|
// mflr r0
|
2012-11-26 18:05:52 +00:00
|
|
|
OutStreamer.EmitInstruction(MCInstBuilder(PPC::MFLR).addReg(PPC::R0));
|
2013-03-23 20:53:15 +00:00
|
|
|
// bcl 20, 31, AnonSymbol
|
2013-04-04 22:55:54 +00:00
|
|
|
OutStreamer.EmitInstruction(MCInstBuilder(PPC::BCLalways).addExpr(Anon));
|
2010-04-04 07:12:28 +00:00
|
|
|
OutStreamer.EmitLabel(AnonSymbol);
|
2012-11-24 13:18:25 +00:00
|
|
|
// mflr r11
|
2012-11-26 18:05:52 +00:00
|
|
|
OutStreamer.EmitInstruction(MCInstBuilder(PPC::MFLR).addReg(PPC::R11));
|
2012-11-24 13:18:25 +00:00
|
|
|
// addis r11, r11, ha16(LazyPtr - AnonSymbol)
|
2013-06-21 14:42:20 +00:00
|
|
|
const MCExpr *SubHa16 = PPCMCExpr::CreateHa(Sub, OutContext);
|
2012-11-26 18:05:52 +00:00
|
|
|
OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS)
|
2012-11-26 13:34:22 +00:00
|
|
|
.addReg(PPC::R11)
|
|
|
|
.addReg(PPC::R11)
|
2013-05-23 22:26:41 +00:00
|
|
|
.addExpr(SubHa16));
|
2012-11-24 13:18:25 +00:00
|
|
|
// mtlr r0
|
2012-11-26 18:05:52 +00:00
|
|
|
OutStreamer.EmitInstruction(MCInstBuilder(PPC::MTLR).addReg(PPC::R0));
|
2012-11-26 13:34:22 +00:00
|
|
|
|
|
|
|
// ldu r12, lo16(LazyPtr - AnonSymbol)(r11)
|
|
|
|
// lwzu r12, lo16(LazyPtr - AnonSymbol)(r11)
|
2013-06-21 14:42:20 +00:00
|
|
|
const MCExpr *SubLo16 = PPCMCExpr::CreateLo(Sub, OutContext);
|
2012-11-26 18:05:52 +00:00
|
|
|
OutStreamer.EmitInstruction(MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU)
|
2012-11-26 13:34:22 +00:00
|
|
|
.addReg(PPC::R12)
|
2013-05-23 22:26:41 +00:00
|
|
|
.addExpr(SubLo16).addExpr(SubLo16)
|
2012-11-26 18:05:52 +00:00
|
|
|
.addReg(PPC::R11));
|
2012-11-24 13:18:25 +00:00
|
|
|
// mtctr r12
|
2012-11-26 18:05:52 +00:00
|
|
|
OutStreamer.EmitInstruction(MCInstBuilder(PPC::MTCTR).addReg(PPC::R12));
|
2012-11-24 13:18:25 +00:00
|
|
|
// bctr
|
2012-11-26 18:05:52 +00:00
|
|
|
OutStreamer.EmitInstruction(MCInstBuilder(PPC::BCTR));
|
2012-11-24 13:18:25 +00:00
|
|
|
|
2009-08-19 05:49:37 +00:00
|
|
|
OutStreamer.SwitchSection(LSPSection);
|
2010-04-04 07:12:28 +00:00
|
|
|
OutStreamer.EmitLabel(LazyPtr);
|
|
|
|
OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
|
2012-11-24 13:18:25 +00:00
|
|
|
|
|
|
|
MCSymbol *DyldStubBindingHelper =
|
|
|
|
OutContext.GetOrCreateSymbol(StringRef("dyld_stub_binding_helper"));
|
|
|
|
if (isPPC64) {
|
|
|
|
// .quad dyld_stub_binding_helper
|
|
|
|
OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 8);
|
|
|
|
} else {
|
|
|
|
// .long dyld_stub_binding_helper
|
|
|
|
OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 4);
|
|
|
|
}
|
2005-12-13 04:33:58 +00:00
|
|
|
}
|
2010-04-04 07:05:53 +00:00
|
|
|
OutStreamer.AddBlankLine();
|
2010-01-20 21:19:44 +00:00
|
|
|
return;
|
2004-06-24 23:04:11 +00:00
|
|
|
}
|
2010-01-20 21:19:44 +00:00
|
|
|
|
|
|
|
const MCSection *StubSection =
|
2010-04-08 20:40:11 +00:00
|
|
|
OutContext.getMachOSection("__TEXT","__symbol_stub1",
|
|
|
|
MCSectionMachO::S_SYMBOL_STUBS |
|
|
|
|
MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
|
|
|
|
16, SectionKind::getText());
|
2010-01-20 21:36:48 +00:00
|
|
|
for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
|
2010-04-04 07:05:53 +00:00
|
|
|
MCSymbol *Stub = Stubs[i].first;
|
2010-04-04 07:12:28 +00:00
|
|
|
MCSymbol *RawSym = Stubs[i].second.getPointer();
|
|
|
|
MCSymbol *LazyPtr = GetLazyPtr(Stub, OutContext);
|
2013-05-23 22:26:41 +00:00
|
|
|
const MCExpr *LazyPtrExpr = MCSymbolRefExpr::Create(LazyPtr, OutContext);
|
2010-01-20 21:36:48 +00:00
|
|
|
|
2010-01-20 21:19:44 +00:00
|
|
|
OutStreamer.SwitchSection(StubSection);
|
|
|
|
EmitAlignment(4);
|
2010-04-04 07:05:53 +00:00
|
|
|
OutStreamer.EmitLabel(Stub);
|
2010-04-04 07:12:28 +00:00
|
|
|
OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
|
2013-05-23 22:26:41 +00:00
|
|
|
|
2012-11-24 13:18:25 +00:00
|
|
|
// lis r11, ha16(LazyPtr)
|
2013-06-21 14:42:20 +00:00
|
|
|
const MCExpr *LazyPtrHa16 = PPCMCExpr::CreateHa(LazyPtrExpr, OutContext);
|
2012-11-26 18:05:52 +00:00
|
|
|
OutStreamer.EmitInstruction(MCInstBuilder(PPC::LIS)
|
2012-11-26 13:34:22 +00:00
|
|
|
.addReg(PPC::R11)
|
2012-11-26 18:05:52 +00:00
|
|
|
.addExpr(LazyPtrHa16));
|
2012-11-26 13:34:22 +00:00
|
|
|
|
|
|
|
// ldu r12, lo16(LazyPtr)(r11)
|
|
|
|
// lwzu r12, lo16(LazyPtr)(r11)
|
2013-06-21 14:42:20 +00:00
|
|
|
const MCExpr *LazyPtrLo16 = PPCMCExpr::CreateLo(LazyPtrExpr, OutContext);
|
2012-11-26 18:05:52 +00:00
|
|
|
OutStreamer.EmitInstruction(MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU)
|
2012-11-26 13:34:22 +00:00
|
|
|
.addReg(PPC::R12)
|
|
|
|
.addExpr(LazyPtrLo16).addExpr(LazyPtrLo16)
|
2012-11-26 18:05:52 +00:00
|
|
|
.addReg(PPC::R11));
|
2012-11-26 13:34:22 +00:00
|
|
|
|
2012-11-24 13:18:25 +00:00
|
|
|
// mtctr r12
|
2012-11-26 18:05:52 +00:00
|
|
|
OutStreamer.EmitInstruction(MCInstBuilder(PPC::MTCTR).addReg(PPC::R12));
|
2012-11-24 13:18:25 +00:00
|
|
|
// bctr
|
2012-11-26 18:05:52 +00:00
|
|
|
OutStreamer.EmitInstruction(MCInstBuilder(PPC::BCTR));
|
2012-11-26 13:34:22 +00:00
|
|
|
|
2010-01-20 21:19:44 +00:00
|
|
|
OutStreamer.SwitchSection(LSPSection);
|
2010-04-04 07:12:28 +00:00
|
|
|
OutStreamer.EmitLabel(LazyPtr);
|
|
|
|
OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
|
2012-11-24 13:18:25 +00:00
|
|
|
|
|
|
|
MCSymbol *DyldStubBindingHelper =
|
|
|
|
OutContext.GetOrCreateSymbol(StringRef("dyld_stub_binding_helper"));
|
|
|
|
if (isPPC64) {
|
|
|
|
// .quad dyld_stub_binding_helper
|
|
|
|
OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 8);
|
|
|
|
} else {
|
|
|
|
// .long dyld_stub_binding_helper
|
|
|
|
OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 4);
|
|
|
|
}
|
2010-01-20 21:19:44 +00:00
|
|
|
}
|
|
|
|
|
2010-04-04 07:12:28 +00:00
|
|
|
OutStreamer.AddBlankLine();
|
2010-01-20 21:19:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
|
Revert the majority of the next patch in the address space series:
r165941: Resubmit the changes to llvm core to update the functions to
support different pointer sizes on a per address space basis.
Despite this commit log, this change primarily changed stuff outside of
VMCore, and those changes do not carry any tests for correctness (or
even plausibility), and we have consistently found questionable or flat
out incorrect cases in these changes. Most of them are probably correct,
but we need to devise a system that makes it more clear when we have
handled the address space concerns correctly, and ideally each pass that
gets updated would receive an accompanying test case that exercises that
pass specificaly w.r.t. alternate address spaces.
However, from this commit, I have retained the new C API entry points.
Those were an orthogonal change that probably should have been split
apart, but they seem entirely good.
In several places the changes were very obvious cleanups with no actual
multiple address space code added; these I have not reverted when
I spotted them.
In a few other places there were merge conflicts due to a cleaner
solution being implemented later, often not using address spaces at all.
In those cases, I've preserved the new code which isn't address space
dependent.
This is part of my ongoing effort to clean out the partial address space
code which carries high risk and low test coverage, and not likely to be
finished before the 3.2 release looms closer. Duncan and I would both
like to see the above issues addressed before we return to these
changes.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@167222 91177308-0d34-0410-b5e6-96231b3b80d8
2012-11-01 09:14:31 +00:00
|
|
|
bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits() == 64;
|
2010-01-20 21:19:44 +00:00
|
|
|
|
|
|
|
// Darwin/PPC always uses mach-o.
|
2010-04-17 16:44:48 +00:00
|
|
|
const TargetLoweringObjectFileMachO &TLOFMacho =
|
|
|
|
static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
|
2010-01-20 21:19:44 +00:00
|
|
|
MachineModuleInfoMachO &MMIMacho =
|
|
|
|
MMI->getObjFileInfo<MachineModuleInfoMachO>();
|
|
|
|
|
2010-01-20 21:36:48 +00:00
|
|
|
MachineModuleInfoMachO::SymbolListTy Stubs = MMIMacho.GetFnStubList();
|
|
|
|
if (!Stubs.empty())
|
|
|
|
EmitFunctionStubs(Stubs);
|
2004-07-16 20:29:04 +00:00
|
|
|
|
2009-08-22 21:43:10 +00:00
|
|
|
if (MAI->doesSupportExceptionHandling() && MMI) {
|
2007-11-20 23:24:42 +00:00
|
|
|
// Add the (possibly multiple) personalities to the set of global values.
|
2008-04-02 00:25:04 +00:00
|
|
|
// Only referenced functions get into the Personalities list.
|
2010-04-15 01:51:59 +00:00
|
|
|
const std::vector<const Function*> &Personalities = MMI->getPersonalities();
|
|
|
|
for (std::vector<const Function*>::const_iterator I = Personalities.begin(),
|
2009-07-15 01:14:44 +00:00
|
|
|
E = Personalities.end(); I != E; ++I) {
|
2010-01-20 21:16:14 +00:00
|
|
|
if (*I) {
|
2010-02-03 06:18:30 +00:00
|
|
|
MCSymbol *NLPSym = GetSymbolWithGlobalValueBase(*I, "$non_lazy_ptr");
|
2010-03-10 22:34:10 +00:00
|
|
|
MachineModuleInfoImpl::StubValueTy &StubSym =
|
|
|
|
MMIMacho.getGVStubEntry(NLPSym);
|
2010-03-12 21:19:23 +00:00
|
|
|
StubSym = MachineModuleInfoImpl::StubValueTy(Mang->getSymbol(*I), true);
|
2010-01-20 21:16:14 +00:00
|
|
|
}
|
2009-07-15 01:14:44 +00:00
|
|
|
}
|
2007-11-20 23:24:42 +00:00
|
|
|
}
|
|
|
|
|
2010-01-20 21:16:14 +00:00
|
|
|
// Output stubs for dynamically-linked functions.
|
2010-01-20 21:36:48 +00:00
|
|
|
Stubs = MMIMacho.GetGVStubList();
|
2010-01-20 21:16:14 +00:00
|
|
|
|
2009-08-03 22:52:21 +00:00
|
|
|
// Output macho stubs for external and common global variables.
|
2010-01-20 21:16:14 +00:00
|
|
|
if (!Stubs.empty()) {
|
2009-08-10 01:39:42 +00:00
|
|
|
// Switch with ".non_lazy_symbol_pointer" directive.
|
2009-08-19 05:49:37 +00:00
|
|
|
OutStreamer.SwitchSection(TLOFMacho.getNonLazySymbolPointerSection());
|
2009-08-10 17:58:51 +00:00
|
|
|
EmitAlignment(isPPC64 ? 3 : 2);
|
|
|
|
|
2010-01-20 21:16:14 +00:00
|
|
|
for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
|
2010-03-11 23:39:44 +00:00
|
|
|
// L_foo$stub:
|
|
|
|
OutStreamer.EmitLabel(Stubs[i].first);
|
|
|
|
// .indirect_symbol _foo
|
|
|
|
MachineModuleInfoImpl::StubValueTy &MCSym = Stubs[i].second;
|
|
|
|
OutStreamer.EmitSymbolAttribute(MCSym.getPointer(), MCSA_IndirectSymbol);
|
2010-03-12 02:00:43 +00:00
|
|
|
|
|
|
|
if (MCSym.getInt())
|
|
|
|
// External to current translation unit.
|
2013-01-09 01:57:54 +00:00
|
|
|
OutStreamer.EmitIntValue(0, isPPC64 ? 8 : 4/*size*/);
|
2010-03-12 02:00:43 +00:00
|
|
|
else
|
|
|
|
// Internal to current translation unit.
|
2010-03-31 18:47:10 +00:00
|
|
|
//
|
|
|
|
// When we place the LSDA into the TEXT section, the type info pointers
|
|
|
|
// need to be indirect and pc-rel. We accomplish this by using NLPs.
|
|
|
|
// However, sometimes the types are local to the file. So we need to
|
|
|
|
// fill in the value for the NLP in those cases.
|
2010-03-12 02:00:43 +00:00
|
|
|
OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
|
|
|
|
OutContext),
|
2013-01-09 01:57:54 +00:00
|
|
|
isPPC64 ? 8 : 4/*size*/);
|
2005-12-13 04:33:58 +00:00
|
|
|
}
|
2010-03-11 23:39:44 +00:00
|
|
|
|
|
|
|
Stubs.clear();
|
|
|
|
OutStreamer.AddBlankLine();
|
2004-08-14 22:09:10 +00:00
|
|
|
}
|
2005-04-21 23:30:14 +00:00
|
|
|
|
2010-01-20 21:16:14 +00:00
|
|
|
Stubs = MMIMacho.GetHiddenGVStubList();
|
|
|
|
if (!Stubs.empty()) {
|
2009-08-19 05:49:37 +00:00
|
|
|
OutStreamer.SwitchSection(getObjFileLowering().getDataSection());
|
2009-07-15 01:14:44 +00:00
|
|
|
EmitAlignment(isPPC64 ? 3 : 2);
|
2010-01-20 21:16:14 +00:00
|
|
|
|
|
|
|
for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
|
2010-03-11 23:39:44 +00:00
|
|
|
// L_foo$stub:
|
|
|
|
OutStreamer.EmitLabel(Stubs[i].first);
|
|
|
|
// .long _foo
|
|
|
|
OutStreamer.EmitValue(MCSymbolRefExpr::
|
|
|
|
Create(Stubs[i].second.getPointer(),
|
|
|
|
OutContext),
|
2013-01-09 01:57:54 +00:00
|
|
|
isPPC64 ? 8 : 4/*size*/);
|
2008-12-05 01:06:39 +00:00
|
|
|
}
|
2010-03-11 23:39:44 +00:00
|
|
|
|
|
|
|
Stubs.clear();
|
|
|
|
OutStreamer.AddBlankLine();
|
2008-12-05 01:06:39 +00:00
|
|
|
}
|
|
|
|
|
2005-11-01 00:12:36 +00:00
|
|
|
// Funny Darwin hack: This flag tells the linker that no global symbols
|
|
|
|
// contain code that falls through to other global symbols (e.g. the obvious
|
|
|
|
// implementation of multiple entry points). If this doesn't occur, the
|
|
|
|
// linker can safely perform dead code stripping. Since LLVM never generates
|
|
|
|
// code that does this, it is always safe to set.
|
2010-01-23 06:39:22 +00:00
|
|
|
OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
|
2005-11-01 00:12:36 +00:00
|
|
|
|
2007-07-25 19:33:14 +00:00
|
|
|
return AsmPrinter::doFinalization(M);
|
2004-06-21 16:55:25 +00:00
|
|
|
}
|
2004-09-04 05:00:00 +00:00
|
|
|
|
2006-12-20 20:56:46 +00:00
|
|
|
/// createPPCAsmPrinterPass - Returns a pass that prints the PPC assembly code
|
|
|
|
/// for a MachineFunction to the given output stream, in a format that the
|
2006-09-20 17:12:19 +00:00
|
|
|
/// Darwin assembler can deal with.
|
|
|
|
///
|
2010-04-04 08:18:47 +00:00
|
|
|
static AsmPrinter *createPPCAsmPrinterPass(TargetMachine &tm,
|
2010-03-13 20:55:24 +00:00
|
|
|
MCStreamer &Streamer) {
|
2006-12-21 20:26:09 +00:00
|
|
|
const PPCSubtarget *Subtarget = &tm.getSubtarget<PPCSubtarget>();
|
|
|
|
|
2009-07-21 18:38:57 +00:00
|
|
|
if (Subtarget->isDarwin())
|
2010-04-04 08:18:47 +00:00
|
|
|
return new PPCDarwinAsmPrinter(tm, Streamer);
|
|
|
|
return new PPCLinuxAsmPrinter(tm, Streamer);
|
2006-09-20 17:12:19 +00:00
|
|
|
}
|
2008-08-17 13:54:28 +00:00
|
|
|
|
2009-06-23 23:59:40 +00:00
|
|
|
// Force static initialization.
|
2009-07-15 20:24:03 +00:00
|
|
|
extern "C" void LLVMInitializePowerPCAsmPrinter() {
|
|
|
|
TargetRegistry::RegisterAsmPrinter(ThePPC32Target, createPPCAsmPrinterPass);
|
|
|
|
TargetRegistry::RegisterAsmPrinter(ThePPC64Target, createPPCAsmPrinterPass);
|
|
|
|
}
|