Move structures and classes into header files, providing two new headers and

one new .cpp file, in preparation for merging in the Direct Object Emission
changes we're working on. No functional changes.
Fixed coding style issues on the original patch. Patch by Aaron Gray


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@72754 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Bruno Cardoso Lopes 2009-06-03 03:43:31 +00:00
parent 6ecc2602a6
commit a321dcd38d
5 changed files with 909 additions and 781 deletions

423
lib/CodeGen/MachO.h Normal file
View File

@ -0,0 +1,423 @@
//=== MachO.h - Mach-O structures and constants -----------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines MachO .
//
//===----------------------------------------------------------------------===//
#ifndef MACHO_H
#define MACHO_H
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/CodeGen/MachineRelocation.h"
#include "llvm/Target/TargetAsmInfo.h"
#include <string>
#include <vector>
namespace llvm {
typedef std::vector<unsigned char> DataBuffer;
/// MachOSym - This struct contains information about each symbol that is
/// added to logical symbol table for the module. This is eventually
/// turned into a real symbol table in the file.
struct MachOSym {
const GlobalValue *GV; // The global value this corresponds to.
std::string GVName; // The mangled name of the global value.
uint32_t n_strx; // index into the string table
uint8_t n_type; // type flag
uint8_t n_sect; // section number or NO_SECT
int16_t n_desc; // see <mach-o/stab.h>
uint64_t n_value; // value for this symbol (or stab offset)
// Constants for the n_sect field
// see <mach-o/nlist.h>
enum { NO_SECT = 0 }; // symbol is not in any section
// Constants for the n_type field
// see <mach-o/nlist.h>
enum { N_UNDF = 0x0, // undefined, n_sect == NO_SECT
N_ABS = 0x2, // absolute, n_sect == NO_SECT
N_SECT = 0xe, // defined in section number n_sect
N_PBUD = 0xc, // prebound undefined (defined in a dylib)
N_INDR = 0xa // indirect
};
// The following bits are OR'd into the types above. For example, a type
// of 0x0f would be an external N_SECT symbol (0x0e | 0x01).
enum { N_EXT = 0x01, // external symbol bit
N_PEXT = 0x10 // private external symbol bit
};
// Constants for the n_desc field
// see <mach-o/loader.h>
enum { REFERENCE_FLAG_UNDEFINED_NON_LAZY = 0,
REFERENCE_FLAG_UNDEFINED_LAZY = 1,
REFERENCE_FLAG_DEFINED = 2,
REFERENCE_FLAG_PRIVATE_DEFINED = 3,
REFERENCE_FLAG_PRIVATE_UNDEFINED_NON_LAZY = 4,
REFERENCE_FLAG_PRIVATE_UNDEFINED_LAZY = 5
};
enum { N_NO_DEAD_STRIP = 0x0020, // symbol is not to be dead stripped
N_WEAK_REF = 0x0040, // symbol is weak referenced
N_WEAK_DEF = 0x0080 // coalesced symbol is a weak definition
};
MachOSym(const GlobalValue *gv, std::string name, uint8_t sect,
const TargetAsmInfo *TAI);
struct SymCmp {
// FIXME: this does not appear to be sorting 'f' after 'F'
bool operator()(const MachOSym &LHS, const MachOSym &RHS) {
return LHS.GVName < RHS.GVName;
}
};
/// PartitionByLocal - Simple boolean predicate that returns true if Sym is
/// a local symbol rather than an external symbol.
static inline bool PartitionByLocal(const MachOSym &Sym) {
return (Sym.n_type & (MachOSym::N_EXT | MachOSym::N_PEXT)) == 0;
}
/// PartitionByDefined - Simple boolean predicate that returns true if Sym is
/// defined in this module.
static inline bool PartitionByDefined(const MachOSym &Sym) {
// FIXME: Do N_ABS or N_INDR count as defined?
return (Sym.n_type & MachOSym::N_SECT) == MachOSym::N_SECT;
}
}; // end struct MachOSym
/// MachOHeader - This struct contains the header information about a
/// specific architecture type/subtype pair that is emitted to the file.
struct MachOHeader {
uint32_t magic; // mach magic number identifier
uint32_t filetype; // type of file
uint32_t ncmds; // number of load commands
uint32_t sizeofcmds; // the size of all the load commands
uint32_t flags; // flags
uint32_t reserved; // 64-bit only
/// HeaderData - The actual data for the header which we are building
/// up for emission to the file.
DataBuffer HeaderData;
// Constants for the filetype field
// see <mach-o/loader.h> for additional info on the various types
enum { MH_OBJECT = 1, // relocatable object file
MH_EXECUTE = 2, // demand paged executable file
MH_FVMLIB = 3, // fixed VM shared library file
MH_CORE = 4, // core file
MH_PRELOAD = 5, // preloaded executable file
MH_DYLIB = 6, // dynamically bound shared library
MH_DYLINKER = 7, // dynamic link editor
MH_BUNDLE = 8, // dynamically bound bundle file
MH_DYLIB_STUB = 9, // shared library stub for static linking only
MH_DSYM = 10 // companion file wiht only debug sections
};
// Constants for the flags field
enum { MH_NOUNDEFS = 1 << 0,
// the object file has no undefined references
MH_INCRLINK = 1 << 1,
// the object file is the output of an incremental link against
// a base file and cannot be link edited again
MH_DYLDLINK = 1 << 2,
// the object file is input for the dynamic linker and cannot be
// statically link edited again.
MH_BINDATLOAD = 1 << 3,
// the object file's undefined references are bound by the
// dynamic linker when loaded.
MH_PREBOUND = 1 << 4,
// the file has its dynamic undefined references prebound
MH_SPLIT_SEGS = 1 << 5,
// the file has its read-only and read-write segments split
// see <mach/shared_memory_server.h>
MH_LAZY_INIT = 1 << 6,
// the shared library init routine is to be run lazily via
// catching memory faults to its writable segments (obsolete)
MH_TWOLEVEL = 1 << 7,
// the image is using two-level namespace bindings
MH_FORCE_FLAT = 1 << 8,
// the executable is forcing all images to use flat namespace
// bindings.
MH_NOMULTIDEFS = 1 << 8,
// this umbrella guarantees no multiple definitions of symbols
// in its sub-images so the two-level namespace hints can
// always be used.
MH_NOFIXPREBINDING = 1 << 10,
// do not have dyld notify the prebidning agent about this
// executable.
MH_PREBINDABLE = 1 << 11,
// the binary is not prebound but can have its prebinding
// redone. only used when MH_PREBOUND is not set.
MH_ALLMODSBOUND = 1 << 12,
// indicates that this binary binds to all two-level namespace
// modules of its dependent libraries. Only used when
// MH_PREBINDABLE and MH_TWOLEVEL are both set.
MH_SUBSECTIONS_VIA_SYMBOLS = 1 << 13,
// safe to divide up the sections into sub-sections via symbols
// for dead code stripping.
MH_CANONICAL = 1 << 14,
// the binary has been canonicalized via the unprebind operation
MH_WEAK_DEFINES = 1 << 15,
// the final linked image contains external weak symbols
MH_BINDS_TO_WEAK = 1 << 16,
// the final linked image uses weak symbols
MH_ALLOW_STACK_EXECUTION = 1 << 17
// When this bit is set, all stacks in the task will be given
// stack execution privilege. Only used in MH_EXECUTE filetype
};
MachOHeader() : magic(0), filetype(0), ncmds(0), sizeofcmds(0), flags(0),
reserved(0) { }
/// cmdSize - This routine returns the size of the MachOSection as written
/// to disk, depending on whether the destination is a 64 bit Mach-O file.
unsigned cmdSize(bool is64Bit) const {
if (is64Bit)
return 8 * sizeof(uint32_t);
else
return 7 * sizeof(uint32_t);
}
/// setMagic - This routine sets the appropriate value for the 'magic'
/// field based on pointer size and endianness.
void setMagic(bool isLittleEndian, bool is64Bit) {
if (isLittleEndian)
if (is64Bit) magic = 0xcffaedfe;
else magic = 0xcefaedfe;
else
if (is64Bit) magic = 0xfeedfacf;
else magic = 0xfeedface;
}
}; // end struct MachOHeader
/// MachOSegment - This struct contains the necessary information to
/// emit the load commands for each section in the file.
struct MachOSegment {
uint32_t cmd; // LC_SEGMENT or LC_SEGMENT_64
uint32_t cmdsize; // Total size of this struct and section commands
std::string segname; // segment name
uint64_t vmaddr; // address of this segment
uint64_t vmsize; // size of this segment, may be larger than filesize
uint64_t fileoff; // offset in file
uint64_t filesize; // amount to read from file
uint32_t maxprot; // maximum VM protection
uint32_t initprot; // initial VM protection
uint32_t nsects; // number of sections in this segment
uint32_t flags; // flags
// The following constants are getting pulled in by one of the
// system headers, which creates a neat clash with the enum.
#if !defined(VM_PROT_NONE)
#define VM_PROT_NONE 0x00
#endif
#if !defined(VM_PROT_READ)
#define VM_PROT_READ 0x01
#endif
#if !defined(VM_PROT_WRITE)
#define VM_PROT_WRITE 0x02
#endif
#if !defined(VM_PROT_EXECUTE)
#define VM_PROT_EXECUTE 0x04
#endif
#if !defined(VM_PROT_ALL)
#define VM_PROT_ALL 0x07
#endif
// Constants for the vm protection fields
// see <mach-o/vm_prot.h>
enum { SEG_VM_PROT_NONE = VM_PROT_NONE,
SEG_VM_PROT_READ = VM_PROT_READ, // read permission
SEG_VM_PROT_WRITE = VM_PROT_WRITE, // write permission
SEG_VM_PROT_EXECUTE = VM_PROT_EXECUTE,
SEG_VM_PROT_ALL = VM_PROT_ALL
};
// Constants for the cmd field
// see <mach-o/loader.h>
enum { LC_SEGMENT = 0x01, // segment of this file to be mapped
LC_SEGMENT_64 = 0x19 // 64-bit segment of this file to be mapped
};
/// cmdSize - This routine returns the size of the MachOSection as written
/// to disk, depending on whether the destination is a 64 bit Mach-O file.
unsigned cmdSize(bool is64Bit) const {
if (is64Bit)
return 6 * sizeof(uint32_t) + 4 * sizeof(uint64_t) + 16;
else
return 10 * sizeof(uint32_t) + 16; // addresses only 32 bits
}
MachOSegment(const std::string &seg, bool is64Bit)
: cmd(is64Bit ? LC_SEGMENT_64 : LC_SEGMENT), cmdsize(0), segname(seg),
vmaddr(0), vmsize(0), fileoff(0), filesize(0), maxprot(VM_PROT_ALL),
initprot(VM_PROT_ALL), nsects(0), flags(0) { }
};
/// MachOSection - This struct contains information about each section in a
/// particular segment that is emitted to the file. This is eventually
/// turned into the SectionCommand in the load command for a particlar
/// segment.
struct MachOSection {
std::string sectname; // name of this section,
std::string segname; // segment this section goes in
uint64_t addr; // memory address of this section
uint64_t size; // size in bytes of this section
uint32_t offset; // file offset of this section
uint32_t align; // section alignment (power of 2)
uint32_t reloff; // file offset of relocation entries
uint32_t nreloc; // number of relocation entries
uint32_t flags; // flags (section type and attributes)
uint32_t reserved1; // reserved (for offset or index)
uint32_t reserved2; // reserved (for count or sizeof)
uint32_t reserved3; // reserved (64 bit only)
/// A unique number for this section, which will be used to match symbols
/// to the correct section.
uint32_t Index;
/// SectionData - The actual data for this section which we are building
/// up for emission to the file.
DataBuffer SectionData;
/// RelocBuffer - A buffer to hold the mach-o relocations before we write
/// them out at the appropriate location in the file.
DataBuffer RelocBuffer;
/// Relocations - The relocations that we have encountered so far in this
/// section that we will need to convert to MachORelocation entries when
/// the file is written.
std::vector<MachineRelocation> Relocations;
// Constants for the section types (low 8 bits of flags field)
// see <mach-o/loader.h>
enum { S_REGULAR = 0,
// regular section
S_ZEROFILL = 1,
// zero fill on demand section
S_CSTRING_LITERALS = 2,
// section with only literal C strings
S_4BYTE_LITERALS = 3,
// section with only 4 byte literals
S_8BYTE_LITERALS = 4,
// section with only 8 byte literals
S_LITERAL_POINTERS = 5,
// section with only pointers to literals
S_NON_LAZY_SYMBOL_POINTERS = 6,
// section with only non-lazy symbol pointers
S_LAZY_SYMBOL_POINTERS = 7,
// section with only lazy symbol pointers
S_SYMBOL_STUBS = 8,
// section with only symbol stubs
// byte size of stub in the reserved2 field
S_MOD_INIT_FUNC_POINTERS = 9,
// section with only function pointers for initialization
S_MOD_TERM_FUNC_POINTERS = 10,
// section with only function pointers for termination
S_COALESCED = 11,
// section contains symbols that are coalesced
S_GB_ZEROFILL = 12,
// zero fill on demand section (that can be larger than 4GB)
S_INTERPOSING = 13,
// section with only pairs of function pointers for interposing
S_16BYTE_LITERALS = 14
// section with only 16 byte literals
};
// Constants for the section flags (high 24 bits of flags field)
// see <mach-o/loader.h>
enum { S_ATTR_PURE_INSTRUCTIONS = 1 << 31,
// section contains only true machine instructions
S_ATTR_NO_TOC = 1 << 30,
// section contains coalesced symbols that are not to be in a
// ranlib table of contents
S_ATTR_STRIP_STATIC_SYMS = 1 << 29,
// ok to strip static symbols in this section in files with the
// MY_DYLDLINK flag
S_ATTR_NO_DEAD_STRIP = 1 << 28,
// no dead stripping
S_ATTR_LIVE_SUPPORT = 1 << 27,
// blocks are live if they reference live blocks
S_ATTR_SELF_MODIFYING_CODE = 1 << 26,
// used with i386 code stubs written on by dyld
S_ATTR_DEBUG = 1 << 25,
// a debug section
S_ATTR_SOME_INSTRUCTIONS = 1 << 10,
// section contains some machine instructions
S_ATTR_EXT_RELOC = 1 << 9,
// section has external relocation entries
S_ATTR_LOC_RELOC = 1 << 8
// section has local relocation entries
};
/// cmdSize - This routine returns the size of the MachOSection as written
/// to disk, depending on whether the destination is a 64 bit Mach-O file.
unsigned cmdSize(bool is64Bit) const {
if (is64Bit)
return 7 * sizeof(uint32_t) + 2 * sizeof(uint64_t) + 32;
else
return 9 * sizeof(uint32_t) + 32; // addresses only 32 bits
}
MachOSection(const std::string &seg, const std::string &sect)
: sectname(sect), segname(seg), addr(0), size(0), offset(0), align(2),
reloff(0), nreloc(0), flags(0), reserved1(0), reserved2(0),
reserved3(0) { }
}; // end struct MachOSection
/// MachOSymTab - This struct contains information about the offsets and
/// size of symbol table information.
/// segment.
struct MachODySymTab {
uint32_t cmd; // LC_DYSYMTAB
uint32_t cmdsize; // sizeof( MachODySymTab )
uint32_t ilocalsym; // index to local symbols
uint32_t nlocalsym; // number of local symbols
uint32_t iextdefsym; // index to externally defined symbols
uint32_t nextdefsym; // number of externally defined symbols
uint32_t iundefsym; // index to undefined symbols
uint32_t nundefsym; // number of undefined symbols
uint32_t tocoff; // file offset to table of contents
uint32_t ntoc; // number of entries in table of contents
uint32_t modtaboff; // file offset to module table
uint32_t nmodtab; // number of module table entries
uint32_t extrefsymoff; // offset to referenced symbol table
uint32_t nextrefsyms; // number of referenced symbol table entries
uint32_t indirectsymoff; // file offset to the indirect symbol table
uint32_t nindirectsyms; // number of indirect symbol table entries
uint32_t extreloff; // offset to external relocation entries
uint32_t nextrel; // number of external relocation entries
uint32_t locreloff; // offset to local relocation entries
uint32_t nlocrel; // number of local relocation entries
// Constants for the cmd field
// see <mach-o/loader.h>
enum { LC_DYSYMTAB = 0x0B // dynamic link-edit symbol table info
};
MachODySymTab() : cmd(LC_DYSYMTAB), cmdsize(20 * sizeof(uint32_t)),
ilocalsym(0), nlocalsym(0), iextdefsym(0), nextdefsym(0),
iundefsym(0), nundefsym(0), tocoff(0), ntoc(0), modtaboff(0),
nmodtab(0), extrefsymoff(0), nextrefsyms(0), indirectsymoff(0),
nindirectsyms(0), extreloff(0), nextrel(0), locreloff(0), nlocrel(0) { }
};
} // end namespace llvm
#endif

View File

@ -0,0 +1,207 @@
//===-- MachOEmitter.cpp - Target-independent Mach-O Emitter code --------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "MachOCodeEmitter.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/Target/TargetAsmInfo.h"
#include "llvm/Support/Mangler.h"
#include "llvm/Support/OutputBuffer.h"
//===----------------------------------------------------------------------===//
// MachOCodeEmitter Implementation
//===----------------------------------------------------------------------===//
namespace llvm {
/// startFunction - This callback is invoked when a new machine function is
/// about to be emitted.
void MachOCodeEmitter::startFunction(MachineFunction &MF) {
const TargetData *TD = TM.getTargetData();
const Function *F = MF.getFunction();
// Align the output buffer to the appropriate alignment, power of 2.
unsigned FnAlign = F->getAlignment();
unsigned TDAlign = TD->getPrefTypeAlignment(F->getType());
unsigned Align = Log2_32(std::max(FnAlign, TDAlign));
assert(!(Align & (Align-1)) && "Alignment is not a power of two!");
// Get the Mach-O Section that this function belongs in.
MachOSection *MOS = MOW.getTextSection();
// FIXME: better memory management
MOS->SectionData.reserve(4096);
BufferBegin = &MOS->SectionData[0];
BufferEnd = BufferBegin + MOS->SectionData.capacity();
// Upgrade the section alignment if required.
if (MOS->align < Align) MOS->align = Align;
// Round the size up to the correct alignment for starting the new function.
if ((MOS->size & ((1 << Align) - 1)) != 0) {
MOS->size += (1 << Align);
MOS->size &= ~((1 << Align) - 1);
}
// FIXME: Using MOS->size directly here instead of calculating it from the
// output buffer size (impossible because the code emitter deals only in raw
// bytes) forces us to manually synchronize size and write padding zero bytes
// to the output buffer for all non-text sections. For text sections, we do
// not synchonize the output buffer, and we just blow up if anyone tries to
// write non-code to it. An assert should probably be added to
// AddSymbolToSection to prevent calling it on the text section.
CurBufferPtr = BufferBegin + MOS->size;
}
/// finishFunction - This callback is invoked after the function is completely
/// finished.
bool MachOCodeEmitter::finishFunction(MachineFunction &MF) {
// Get the Mach-O Section that this function belongs in.
MachOSection *MOS = MOW.getTextSection();
// Get a symbol for the function to add to the symbol table
// FIXME: it seems like we should call something like AddSymbolToSection
// in startFunction rather than changing the section size and symbol n_value
// here.
const GlobalValue *FuncV = MF.getFunction();
MachOSym FnSym(FuncV, MOW.Mang->getValueName(FuncV), MOS->Index, TAI);
FnSym.n_value = MOS->size;
MOS->size = CurBufferPtr - BufferBegin;
// Emit constant pool to appropriate section(s)
emitConstantPool(MF.getConstantPool());
// Emit jump tables to appropriate section
emitJumpTables(MF.getJumpTableInfo());
// If we have emitted any relocations to function-specific objects such as
// basic blocks, constant pools entries, or jump tables, record their
// addresses now so that we can rewrite them with the correct addresses
// later.
for (unsigned i = 0, e = Relocations.size(); i != e; ++i) {
MachineRelocation &MR = Relocations[i];
intptr_t Addr;
if (MR.isBasicBlock()) {
Addr = getMachineBasicBlockAddress(MR.getBasicBlock());
MR.setConstantVal(MOS->Index);
MR.setResultPointer((void*)Addr);
} else if (MR.isJumpTableIndex()) {
Addr = getJumpTableEntryAddress(MR.getJumpTableIndex());
MR.setConstantVal(MOW.getJumpTableSection()->Index);
MR.setResultPointer((void*)Addr);
} else if (MR.isConstantPoolIndex()) {
Addr = getConstantPoolEntryAddress(MR.getConstantPoolIndex());
MR.setConstantVal(CPSections[MR.getConstantPoolIndex()]);
MR.setResultPointer((void*)Addr);
} else if (MR.isGlobalValue()) {
// FIXME: This should be a set or something that uniques
MOW.PendingGlobals.push_back(MR.getGlobalValue());
} else {
assert(0 && "Unhandled relocation type");
}
MOS->Relocations.push_back(MR);
}
Relocations.clear();
// Finally, add it to the symtab.
MOW.SymbolTable.push_back(FnSym);
// Clear per-function data structures.
CPLocations.clear();
CPSections.clear();
JTLocations.clear();
MBBLocations.clear();
return false;
}
/// emitConstantPool - For each constant pool entry, figure out which section
/// the constant should live in, allocate space for it, and emit it to the
/// Section data buffer.
void MachOCodeEmitter::emitConstantPool(MachineConstantPool *MCP) {
const std::vector<MachineConstantPoolEntry> &CP = MCP->getConstants();
if (CP.empty()) return;
// FIXME: handle PIC codegen
assert(TM.getRelocationModel() != Reloc::PIC_ &&
"PIC codegen not yet handled for mach-o jump tables!");
// Although there is no strict necessity that I am aware of, we will do what
// gcc for OS X does and put each constant pool entry in a section of constant
// objects of a certain size. That means that float constants go in the
// literal4 section, and double objects go in literal8, etc.
//
// FIXME: revisit this decision if we ever do the "stick everything into one
// "giant object for PIC" optimization.
for (unsigned i = 0, e = CP.size(); i != e; ++i) {
const Type *Ty = CP[i].getType();
unsigned Size = TM.getTargetData()->getTypeAllocSize(Ty);
MachOSection *Sec = MOW.getConstSection(CP[i].Val.ConstVal);
OutputBuffer SecDataOut(Sec->SectionData, is64Bit, isLittleEndian);
CPLocations.push_back(Sec->SectionData.size());
CPSections.push_back(Sec->Index);
// FIXME: remove when we have unified size + output buffer
Sec->size += Size;
// Allocate space in the section for the global.
// FIXME: need alignment?
// FIXME: share between here and AddSymbolToSection?
for (unsigned j = 0; j < Size; ++j)
SecDataOut.outbyte(0);
MOW.InitMem(CP[i].Val.ConstVal, &Sec->SectionData[0], CPLocations[i],
TM.getTargetData(), Sec->Relocations);
}
}
/// emitJumpTables - Emit all the jump tables for a given jump table info
/// record to the appropriate section.
void MachOCodeEmitter::emitJumpTables(MachineJumpTableInfo *MJTI) {
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
if (JT.empty()) return;
// FIXME: handle PIC codegen
assert(TM.getRelocationModel() != Reloc::PIC_ &&
"PIC codegen not yet handled for mach-o jump tables!");
MachOSection *Sec = MOW.getJumpTableSection();
unsigned TextSecIndex = MOW.getTextSection()->Index;
OutputBuffer SecDataOut(Sec->SectionData, is64Bit, isLittleEndian);
for (unsigned i = 0, e = JT.size(); i != e; ++i) {
// For each jump table, record its offset from the start of the section,
// reserve space for the relocations to the MBBs, and add the relocations.
const std::vector<MachineBasicBlock*> &MBBs = JT[i].MBBs;
JTLocations.push_back(Sec->SectionData.size());
for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi) {
MachineRelocation MR(MOW.GetJTRelocation(Sec->SectionData.size(),
MBBs[mi]));
MR.setResultPointer((void *)JTLocations[i]);
MR.setConstantVal(TextSecIndex);
Sec->Relocations.push_back(MR);
SecDataOut.outaddr(0);
}
}
// FIXME: remove when we have unified size + output buffer
Sec->size = Sec->SectionData.size();
}
} // end namespace llvm

View File

@ -0,0 +1,129 @@
//===-- MachOEmitter.h - Target-independent Mach-O Emitter class ----------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef MACHOCODEEMITTER_H
#define MACHOCODEEMITTER_H
#include "MachOWriter.h"
#include "llvm/CodeGen/MachineCodeEmitter.h"
#include <vector>
namespace llvm {
/// MachOCodeEmitter - This class is used by the MachOWriter to emit the code
/// for functions to the Mach-O file.
class MachOCodeEmitter : public MachineCodeEmitter {
MachOWriter &MOW;
/// Target machine description.
TargetMachine &TM;
/// is64Bit/isLittleEndian - This information is inferred from the target
/// machine directly, indicating what header values and flags to set.
bool is64Bit, isLittleEndian;
const TargetAsmInfo *TAI;
/// Relocations - These are the relocations that the function needs, as
/// emitted.
std::vector<MachineRelocation> Relocations;
/// CPLocations - This is a map of constant pool indices to offsets from the
/// start of the section for that constant pool index.
std::vector<uintptr_t> CPLocations;
/// CPSections - This is a map of constant pool indices to the MachOSection
/// containing the constant pool entry for that index.
std::vector<unsigned> CPSections;
/// JTLocations - This is a map of jump table indices to offsets from the
/// start of the section for that jump table index.
std::vector<uintptr_t> JTLocations;
/// MBBLocations - This vector is a mapping from MBB ID's to their address.
/// It is filled in by the StartMachineBasicBlock callback and queried by
/// the getMachineBasicBlockAddress callback.
std::vector<uintptr_t> MBBLocations;
public:
MachOCodeEmitter(MachOWriter &mow) : MOW(mow), TM(MOW.TM)
{
is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64;
isLittleEndian = TM.getTargetData()->isLittleEndian();
TAI = TM.getTargetAsmInfo();
}
virtual void startFunction(MachineFunction &MF);
virtual bool finishFunction(MachineFunction &MF);
virtual void addRelocation(const MachineRelocation &MR) {
Relocations.push_back(MR);
}
void emitConstantPool(MachineConstantPool *MCP);
void emitJumpTables(MachineJumpTableInfo *MJTI);
virtual uintptr_t getConstantPoolEntryAddress(unsigned Index) const {
assert(CPLocations.size() > Index && "CP not emitted!");
return CPLocations[Index];
}
virtual uintptr_t getJumpTableEntryAddress(unsigned Index) const {
assert(JTLocations.size() > Index && "JT not emitted!");
return JTLocations[Index];
}
virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) {
if (MBBLocations.size() <= (unsigned)MBB->getNumber())
MBBLocations.resize((MBB->getNumber()+1)*2);
MBBLocations[MBB->getNumber()] = getCurrentPCOffset();
}
virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const {
assert(MBBLocations.size() > (unsigned)MBB->getNumber() &&
MBBLocations[MBB->getNumber()] && "MBB not emitted!");
return MBBLocations[MBB->getNumber()];
}
virtual uintptr_t getLabelAddress(uint64_t Label) const {
assert(0 && "get Label not implemented");
abort();
return 0;
}
virtual void emitLabel(uint64_t LabelID) {
assert(0 && "emit Label not implemented");
abort();
}
virtual void setModuleInfo(llvm::MachineModuleInfo* MMI) { }
/// JIT SPECIFIC FUNCTIONS - DO NOT IMPLEMENT THESE HERE!
virtual void startGVStub(const GlobalValue* F, unsigned StubSize,
unsigned Alignment = 1) {
assert(0 && "JIT specific function called!");
abort();
}
virtual void startGVStub(const GlobalValue* F, void *Buffer,
unsigned StubSize) {
assert(0 && "JIT specific function called!");
abort();
}
virtual void *finishGVStub(const GlobalValue* F) {
assert(0 && "JIT specific function called!");
abort();
return 0;
}
}; // end class MachOCodeEmitter
} // end namespace llvm
#endif

View File

@ -23,6 +23,7 @@
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
#include "MachOWriter.h" #include "MachOWriter.h"
#include "MachOCodeEmitter.h"
#include "llvm/Constants.h" #include "llvm/Constants.h"
#include "llvm/DerivedTypes.h" #include "llvm/DerivedTypes.h"
#include "llvm/Module.h" #include "llvm/Module.h"
@ -40,11 +41,12 @@
#include "llvm/Support/raw_ostream.h" #include "llvm/Support/raw_ostream.h"
#include <algorithm> #include <algorithm>
#include <cstring> #include <cstring>
using namespace llvm;
namespace llvm {
/// AddMachOWriter - Concrete function to add the Mach-O writer to the function /// AddMachOWriter - Concrete function to add the Mach-O writer to the function
/// pass manager. /// pass manager.
MachineCodeEmitter *llvm::AddMachOWriter(PassManagerBase &PM, MachineCodeEmitter *AddMachOWriter(PassManagerBase &PM,
raw_ostream &O, raw_ostream &O,
TargetMachine &TM) { TargetMachine &TM) {
MachOWriter *MOW = new MachOWriter(O, TM); MachOWriter *MOW = new MachOWriter(O, TM);
@ -52,300 +54,21 @@ MachineCodeEmitter *llvm::AddMachOWriter(PassManagerBase &PM,
return &MOW->getMachineCodeEmitter(); return &MOW->getMachineCodeEmitter();
} }
//===----------------------------------------------------------------------===//
// MachOCodeEmitter Implementation
//===----------------------------------------------------------------------===//
namespace llvm {
/// MachOCodeEmitter - This class is used by the MachOWriter to emit the code
/// for functions to the Mach-O file.
class MachOCodeEmitter : public MachineCodeEmitter {
MachOWriter &MOW;
/// Target machine description.
TargetMachine &TM;
/// is64Bit/isLittleEndian - This information is inferred from the target
/// machine directly, indicating what header values and flags to set.
bool is64Bit, isLittleEndian;
/// Relocations - These are the relocations that the function needs, as
/// emitted.
std::vector<MachineRelocation> Relocations;
/// CPLocations - This is a map of constant pool indices to offsets from the
/// start of the section for that constant pool index.
std::vector<uintptr_t> CPLocations;
/// CPSections - This is a map of constant pool indices to the MachOSection
/// containing the constant pool entry for that index.
std::vector<unsigned> CPSections;
/// JTLocations - This is a map of jump table indices to offsets from the
/// start of the section for that jump table index.
std::vector<uintptr_t> JTLocations;
/// MBBLocations - This vector is a mapping from MBB ID's to their address.
/// It is filled in by the StartMachineBasicBlock callback and queried by
/// the getMachineBasicBlockAddress callback.
std::vector<uintptr_t> MBBLocations;
public:
MachOCodeEmitter(MachOWriter &mow) : MOW(mow), TM(MOW.TM) {
is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64;
isLittleEndian = TM.getTargetData()->isLittleEndian();
}
virtual void startFunction(MachineFunction &MF);
virtual bool finishFunction(MachineFunction &MF);
virtual void addRelocation(const MachineRelocation &MR) {
Relocations.push_back(MR);
}
void emitConstantPool(MachineConstantPool *MCP);
void emitJumpTables(MachineJumpTableInfo *MJTI);
virtual uintptr_t getConstantPoolEntryAddress(unsigned Index) const {
assert(CPLocations.size() > Index && "CP not emitted!");
return CPLocations[Index];
}
virtual uintptr_t getJumpTableEntryAddress(unsigned Index) const {
assert(JTLocations.size() > Index && "JT not emitted!");
return JTLocations[Index];
}
virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) {
if (MBBLocations.size() <= (unsigned)MBB->getNumber())
MBBLocations.resize((MBB->getNumber()+1)*2);
MBBLocations[MBB->getNumber()] = getCurrentPCOffset();
}
virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const {
assert(MBBLocations.size() > (unsigned)MBB->getNumber() &&
MBBLocations[MBB->getNumber()] && "MBB not emitted!");
return MBBLocations[MBB->getNumber()];
}
virtual uintptr_t getLabelAddress(uint64_t Label) const {
assert(0 && "get Label not implemented");
abort();
return 0;
}
virtual void emitLabel(uint64_t LabelID) {
assert(0 && "emit Label not implemented");
abort();
}
virtual void setModuleInfo(llvm::MachineModuleInfo* MMI) { }
/// JIT SPECIFIC FUNCTIONS - DO NOT IMPLEMENT THESE HERE!
virtual void startGVStub(const GlobalValue* F, unsigned StubSize,
unsigned Alignment = 1) {
assert(0 && "JIT specific function called!");
abort();
}
virtual void startGVStub(const GlobalValue* F, void *Buffer,
unsigned StubSize) {
assert(0 && "JIT specific function called!");
abort();
}
virtual void *finishGVStub(const GlobalValue* F) {
assert(0 && "JIT specific function called!");
abort();
return 0;
}
};
}
/// startFunction - This callback is invoked when a new machine function is
/// about to be emitted.
void MachOCodeEmitter::startFunction(MachineFunction &MF) {
const TargetData *TD = TM.getTargetData();
const Function *F = MF.getFunction();
// Align the output buffer to the appropriate alignment, power of 2.
unsigned FnAlign = F->getAlignment();
unsigned TDAlign = TD->getPrefTypeAlignment(F->getType());
unsigned Align = Log2_32(std::max(FnAlign, TDAlign));
assert(!(Align & (Align-1)) && "Alignment is not a power of two!");
// Get the Mach-O Section that this function belongs in.
MachOWriter::MachOSection *MOS = MOW.getTextSection();
// FIXME: better memory management
MOS->SectionData.reserve(4096);
BufferBegin = &MOS->SectionData[0];
BufferEnd = BufferBegin + MOS->SectionData.capacity();
// Upgrade the section alignment if required.
if (MOS->align < Align) MOS->align = Align;
// Round the size up to the correct alignment for starting the new function.
if ((MOS->size & ((1 << Align) - 1)) != 0) {
MOS->size += (1 << Align);
MOS->size &= ~((1 << Align) - 1);
}
// FIXME: Using MOS->size directly here instead of calculating it from the
// output buffer size (impossible because the code emitter deals only in raw
// bytes) forces us to manually synchronize size and write padding zero bytes
// to the output buffer for all non-text sections. For text sections, we do
// not synchonize the output buffer, and we just blow up if anyone tries to
// write non-code to it. An assert should probably be added to
// AddSymbolToSection to prevent calling it on the text section.
CurBufferPtr = BufferBegin + MOS->size;
// Clear per-function data structures.
CPLocations.clear();
CPSections.clear();
JTLocations.clear();
MBBLocations.clear();
}
/// finishFunction - This callback is invoked after the function is completely
/// finished.
bool MachOCodeEmitter::finishFunction(MachineFunction &MF) {
// Get the Mach-O Section that this function belongs in.
MachOWriter::MachOSection *MOS = MOW.getTextSection();
// Get a symbol for the function to add to the symbol table
// FIXME: it seems like we should call something like AddSymbolToSection
// in startFunction rather than changing the section size and symbol n_value
// here.
const GlobalValue *FuncV = MF.getFunction();
MachOSym FnSym(FuncV, MOW.Mang->getValueName(FuncV), MOS->Index, TM);
FnSym.n_value = MOS->size;
MOS->size = CurBufferPtr - BufferBegin;
// Emit constant pool to appropriate section(s)
emitConstantPool(MF.getConstantPool());
// Emit jump tables to appropriate section
emitJumpTables(MF.getJumpTableInfo());
// If we have emitted any relocations to function-specific objects such as
// basic blocks, constant pools entries, or jump tables, record their
// addresses now so that we can rewrite them with the correct addresses
// later.
for (unsigned i = 0, e = Relocations.size(); i != e; ++i) {
MachineRelocation &MR = Relocations[i];
intptr_t Addr;
if (MR.isBasicBlock()) {
Addr = getMachineBasicBlockAddress(MR.getBasicBlock());
MR.setConstantVal(MOS->Index);
MR.setResultPointer((void*)Addr);
} else if (MR.isJumpTableIndex()) {
Addr = getJumpTableEntryAddress(MR.getJumpTableIndex());
MR.setConstantVal(MOW.getJumpTableSection()->Index);
MR.setResultPointer((void*)Addr);
} else if (MR.isConstantPoolIndex()) {
Addr = getConstantPoolEntryAddress(MR.getConstantPoolIndex());
MR.setConstantVal(CPSections[MR.getConstantPoolIndex()]);
MR.setResultPointer((void*)Addr);
} else if (MR.isGlobalValue()) {
// FIXME: This should be a set or something that uniques
MOW.PendingGlobals.push_back(MR.getGlobalValue());
} else {
assert(0 && "Unhandled relocation type");
}
MOS->Relocations.push_back(MR);
}
Relocations.clear();
// Finally, add it to the symtab.
MOW.SymbolTable.push_back(FnSym);
return false;
}
/// emitConstantPool - For each constant pool entry, figure out which section
/// the constant should live in, allocate space for it, and emit it to the
/// Section data buffer.
void MachOCodeEmitter::emitConstantPool(MachineConstantPool *MCP) {
const std::vector<MachineConstantPoolEntry> &CP = MCP->getConstants();
if (CP.empty()) return;
// FIXME: handle PIC codegen
assert(TM.getRelocationModel() != Reloc::PIC_ &&
"PIC codegen not yet handled for mach-o jump tables!");
// Although there is no strict necessity that I am aware of, we will do what
// gcc for OS X does and put each constant pool entry in a section of constant
// objects of a certain size. That means that float constants go in the
// literal4 section, and double objects go in literal8, etc.
//
// FIXME: revisit this decision if we ever do the "stick everything into one
// "giant object for PIC" optimization.
for (unsigned i = 0, e = CP.size(); i != e; ++i) {
const Type *Ty = CP[i].getType();
unsigned Size = TM.getTargetData()->getTypeAllocSize(Ty);
MachOWriter::MachOSection *Sec = MOW.getConstSection(CP[i].Val.ConstVal);
OutputBuffer SecDataOut(Sec->SectionData, is64Bit, isLittleEndian);
CPLocations.push_back(Sec->SectionData.size());
CPSections.push_back(Sec->Index);
// FIXME: remove when we have unified size + output buffer
Sec->size += Size;
// Allocate space in the section for the global.
// FIXME: need alignment?
// FIXME: share between here and AddSymbolToSection?
for (unsigned j = 0; j < Size; ++j)
SecDataOut.outbyte(0);
MOW.InitMem(CP[i].Val.ConstVal, &Sec->SectionData[0], CPLocations[i],
TM.getTargetData(), Sec->Relocations);
}
}
/// emitJumpTables - Emit all the jump tables for a given jump table info
/// record to the appropriate section.
void MachOCodeEmitter::emitJumpTables(MachineJumpTableInfo *MJTI) {
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
if (JT.empty()) return;
// FIXME: handle PIC codegen
assert(TM.getRelocationModel() != Reloc::PIC_ &&
"PIC codegen not yet handled for mach-o jump tables!");
MachOWriter::MachOSection *Sec = MOW.getJumpTableSection();
unsigned TextSecIndex = MOW.getTextSection()->Index;
OutputBuffer SecDataOut(Sec->SectionData, is64Bit, isLittleEndian);
for (unsigned i = 0, e = JT.size(); i != e; ++i) {
// For each jump table, record its offset from the start of the section,
// reserve space for the relocations to the MBBs, and add the relocations.
const std::vector<MachineBasicBlock*> &MBBs = JT[i].MBBs;
JTLocations.push_back(Sec->SectionData.size());
for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi) {
MachineRelocation MR(MOW.GetJTRelocation(Sec->SectionData.size(),
MBBs[mi]));
MR.setResultPointer((void *)JTLocations[i]);
MR.setConstantVal(TextSecIndex);
Sec->Relocations.push_back(MR);
SecDataOut.outaddr(0);
}
}
// FIXME: remove when we have unified size + output buffer
Sec->size = Sec->SectionData.size();
}
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// MachOWriter Implementation // MachOWriter Implementation
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
char MachOWriter::ID = 0; char MachOWriter::ID = 0;
MachOWriter::MachOWriter(raw_ostream &o, TargetMachine &tm)
MachOWriter::MachOWriter(raw_ostream &o, TargetMachine &tm)
: MachineFunctionPass(&ID), O(o), TM(tm) { : MachineFunctionPass(&ID), O(o), TM(tm) {
is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64; is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64;
isLittleEndian = TM.getTargetData()->isLittleEndian(); isLittleEndian = TM.getTargetData()->isLittleEndian();
TAI = TM.getTargetAsmInfo();
// Create the machine code emitter object for this target. // Create the machine code emitter object for this target.
MCE = new MachOCodeEmitter(*this); MCE = new MachOCodeEmitter(*this);
} }
@ -353,6 +76,55 @@ MachOWriter::~MachOWriter() {
delete MCE; delete MCE;
} }
bool MachOWriter::doInitialization(Module &M) {
// Set the magic value, now that we know the pointer size and endianness
Header.setMagic(isLittleEndian, is64Bit);
// Set the file type
// FIXME: this only works for object files, we do not support the creation
// of dynamic libraries or executables at this time.
Header.filetype = MachOHeader::MH_OBJECT;
Mang = new Mangler(M);
return false;
}
bool MachOWriter::runOnMachineFunction(MachineFunction &MF) {
return false;
}
/// doFinalization - Now that the module has been completely processed, emit
/// the Mach-O file to 'O'.
bool MachOWriter::doFinalization(Module &M) {
// FIXME: we don't handle debug info yet, we should probably do that.
// Okay, the.text section has been completed, build the .data, .bss, and
// "common" sections next.
for (Module::global_iterator I = M.global_begin(), E = M.global_end();
I != E; ++I)
EmitGlobal(I);
// Emit the header and load commands.
EmitHeaderAndLoadCommands();
// Emit the various sections and their relocation info.
EmitSections();
EmitRelocations();
// Write the symbol table and the string table to the end of the file.
O.write((char*)&SymT[0], SymT.size());
O.write((char*)&StrT[0], StrT.size());
// We are done with the abstract symbols.
SectionList.clear();
SymbolTable.clear();
DynamicSymbolTable.clear();
// Release the name mangler object.
delete Mang; Mang = 0;
return false;
}
void MachOWriter::AddSymbolToSection(MachOSection *Sec, GlobalVariable *GV) { void MachOWriter::AddSymbolToSection(MachOSection *Sec, GlobalVariable *GV) {
const Type *Ty = GV->getType()->getElementType(); const Type *Ty = GV->getType()->getElementType();
unsigned Size = TM.getTargetData()->getTypeAllocSize(Ty); unsigned Size = TM.getTargetData()->getTypeAllocSize(Ty);
@ -368,7 +140,7 @@ void MachOWriter::AddSymbolToSection(MachOSection *Sec, GlobalVariable *GV) {
Align = Log2_32(Align); Align = Log2_32(Align);
Sec->align = std::max(unsigned(Sec->align), Align); Sec->align = std::max(unsigned(Sec->align), Align);
Sec->size = (Sec->size + Align - 1) & ~(Align-1); Sec->size = (Sec->size + Align - 1) & ~(Align-1);
// Add alignment padding to buffer as well. // Add alignment padding to buffer as well.
// FIXME: remove when we have unified size + output buffer // FIXME: remove when we have unified size + output buffer
unsigned AlignedSize = Sec->size - OrigSize; unsigned AlignedSize = Sec->size - OrigSize;
@ -377,7 +149,7 @@ void MachOWriter::AddSymbolToSection(MachOSection *Sec, GlobalVariable *GV) {
} }
// Globals without external linkage apparently do not go in the symbol table. // Globals without external linkage apparently do not go in the symbol table.
if (!GV->hasLocalLinkage()) { if (!GV->hasLocalLinkage()) {
MachOSym Sym(GV, Mang->getValueName(GV), Sec->Index, TM); MachOSym Sym(GV, Mang->getValueName(GV), Sec->Index, TAI);
Sym.n_value = Sec->size; Sym.n_value = Sec->size;
SymbolTable.push_back(Sym); SymbolTable.push_back(Sym);
} }
@ -385,14 +157,14 @@ void MachOWriter::AddSymbolToSection(MachOSection *Sec, GlobalVariable *GV) {
// Record the offset of the symbol, and then allocate space for it. // Record the offset of the symbol, and then allocate space for it.
// FIXME: remove when we have unified size + output buffer // FIXME: remove when we have unified size + output buffer
Sec->size += Size; Sec->size += Size;
// Now that we know what section the GlovalVariable is going to be emitted // Now that we know what section the GlovalVariable is going to be emitted
// into, update our mappings. // into, update our mappings.
// FIXME: We may also need to update this when outputting non-GlobalVariable // FIXME: We may also need to update this when outputting non-GlobalVariable
// GlobalValues such as functions. // GlobalValues such as functions.
GVSection[GV] = Sec; GVSection[GV] = Sec;
GVOffset[GV] = Sec->SectionData.size(); GVOffset[GV] = Sec->SectionData.size();
// Allocate space in the section for the global. // Allocate space in the section for the global.
for (unsigned i = 0; i < Size; ++i) for (unsigned i = 0; i < Size; ++i)
SecDataOut.outbyte(0); SecDataOut.outbyte(0);
@ -402,7 +174,7 @@ void MachOWriter::EmitGlobal(GlobalVariable *GV) {
const Type *Ty = GV->getType()->getElementType(); const Type *Ty = GV->getType()->getElementType();
unsigned Size = TM.getTargetData()->getTypeAllocSize(Ty); unsigned Size = TM.getTargetData()->getTypeAllocSize(Ty);
bool NoInit = !GV->hasInitializer(); bool NoInit = !GV->hasInitializer();
// If this global has a zero initializer, it is part of the .bss or common // If this global has a zero initializer, it is part of the .bss or common
// section. // section.
if (NoInit || GV->getInitializer()->isNullValue()) { if (NoInit || GV->getInitializer()->isNullValue()) {
@ -411,7 +183,8 @@ void MachOWriter::EmitGlobal(GlobalVariable *GV) {
// merged with other symbols. // merged with other symbols.
if (NoInit || GV->hasLinkOnceLinkage() || GV->hasWeakLinkage() || if (NoInit || GV->hasLinkOnceLinkage() || GV->hasWeakLinkage() ||
GV->hasCommonLinkage()) { GV->hasCommonLinkage()) {
MachOSym ExtOrCommonSym(GV, Mang->getValueName(GV), MachOSym::NO_SECT,TM); MachOSym ExtOrCommonSym(GV, Mang->getValueName(GV),
MachOSym::NO_SECT, TAI);
// For undefined (N_UNDF) external (N_EXT) types, n_value is the size in // For undefined (N_UNDF) external (N_EXT) types, n_value is the size in
// bytes of the symbol. // bytes of the symbol.
ExtOrCommonSym.n_value = Size; ExtOrCommonSym.n_value = Size;
@ -425,11 +198,11 @@ void MachOWriter::EmitGlobal(GlobalVariable *GV) {
AddSymbolToSection(BSS, GV); AddSymbolToSection(BSS, GV);
return; return;
} }
// Scalar read-only data goes in a literal section if the scalar is 4, 8, or // Scalar read-only data goes in a literal section if the scalar is 4, 8, or
// 16 bytes, or a cstring. Other read only data goes into a regular const // 16 bytes, or a cstring. Other read only data goes into a regular const
// section. Read-write data goes in the data section. // section. Read-write data goes in the data section.
MachOSection *Sec = GV->isConstant() ? getConstSection(GV->getInitializer()) : MachOSection *Sec = GV->isConstant() ? getConstSection(GV->getInitializer()) :
getDataSection(); getDataSection();
AddSymbolToSection(Sec, GV); AddSymbolToSection(Sec, GV);
InitMem(GV->getInitializer(), &Sec->SectionData[0], GVOffset[GV], InitMem(GV->getInitializer(), &Sec->SectionData[0], GVOffset[GV],
@ -437,73 +210,25 @@ void MachOWriter::EmitGlobal(GlobalVariable *GV) {
} }
bool MachOWriter::runOnMachineFunction(MachineFunction &MF) {
// Nothing to do here, this is all done through the MCE object.
return false;
}
bool MachOWriter::doInitialization(Module &M) {
// Set the magic value, now that we know the pointer size and endianness
Header.setMagic(isLittleEndian, is64Bit);
// Set the file type
// FIXME: this only works for object files, we do not support the creation
// of dynamic libraries or executables at this time.
Header.filetype = MachOHeader::MH_OBJECT;
Mang = new Mangler(M);
return false;
}
/// doFinalization - Now that the module has been completely processed, emit
/// the Mach-O file to 'O'.
bool MachOWriter::doFinalization(Module &M) {
// FIXME: we don't handle debug info yet, we should probably do that.
// Okay, the.text section has been completed, build the .data, .bss, and
// "common" sections next.
for (Module::global_iterator I = M.global_begin(), E = M.global_end();
I != E; ++I)
EmitGlobal(I);
// Emit the header and load commands.
EmitHeaderAndLoadCommands();
// Emit the various sections and their relocation info.
EmitSections();
// Write the symbol table and the string table to the end of the file.
O.write((char*)&SymT[0], SymT.size());
O.write((char*)&StrT[0], StrT.size());
// We are done with the abstract symbols.
SectionList.clear();
SymbolTable.clear();
DynamicSymbolTable.clear();
// Release the name mangler object.
delete Mang; Mang = 0;
return false;
}
void MachOWriter::EmitHeaderAndLoadCommands() { void MachOWriter::EmitHeaderAndLoadCommands() {
// Step #0: Fill in the segment load command size, since we need it to figure // Step #0: Fill in the segment load command size, since we need it to figure
// out the rest of the header fields // out the rest of the header fields
MachOSegment SEG("", is64Bit); MachOSegment SEG("", is64Bit);
SEG.nsects = SectionList.size(); SEG.nsects = SectionList.size();
SEG.cmdsize = SEG.cmdSize(is64Bit) + SEG.cmdsize = SEG.cmdSize(is64Bit) +
SEG.nsects * SectionList[0]->cmdSize(is64Bit); SEG.nsects * SectionList[0]->cmdSize(is64Bit);
// Step #1: calculate the number of load commands. We always have at least // Step #1: calculate the number of load commands. We always have at least
// one, for the LC_SEGMENT load command, plus two for the normal // one, for the LC_SEGMENT load command, plus two for the normal
// and dynamic symbol tables, if there are any symbols. // and dynamic symbol tables, if there are any symbols.
Header.ncmds = SymbolTable.empty() ? 1 : 3; Header.ncmds = SymbolTable.empty() ? 1 : 3;
// Step #2: calculate the size of the load commands // Step #2: calculate the size of the load commands
Header.sizeofcmds = SEG.cmdsize; Header.sizeofcmds = SEG.cmdsize;
if (!SymbolTable.empty()) if (!SymbolTable.empty())
Header.sizeofcmds += SymTab.cmdsize + DySymTab.cmdsize; Header.sizeofcmds += SymTab.cmdsize + DySymTab.cmdsize;
// Step #3: write the header to the file // Step #3: write the header to the file
// Local alias to shortenify coming code. // Local alias to shortenify coming code.
DataBuffer &FH = Header.HeaderData; DataBuffer &FH = Header.HeaderData;
@ -518,7 +243,7 @@ void MachOWriter::EmitHeaderAndLoadCommands() {
FHOut.outword(Header.flags); FHOut.outword(Header.flags);
if (is64Bit) if (is64Bit)
FHOut.outword(Header.reserved); FHOut.outword(Header.reserved);
// Step #4: Finish filling in the segment load command and write it out // Step #4: Finish filling in the segment load command and write it out
for (std::vector<MachOSection*>::iterator I = SectionList.begin(), for (std::vector<MachOSection*>::iterator I = SectionList.begin(),
E = SectionList.end(); I != E; ++I) E = SectionList.end(); I != E; ++I)
@ -526,7 +251,7 @@ void MachOWriter::EmitHeaderAndLoadCommands() {
SEG.vmsize = SEG.filesize; SEG.vmsize = SEG.filesize;
SEG.fileoff = Header.cmdSize(is64Bit) + Header.sizeofcmds; SEG.fileoff = Header.cmdSize(is64Bit) + Header.sizeofcmds;
FHOut.outword(SEG.cmd); FHOut.outword(SEG.cmd);
FHOut.outword(SEG.cmdsize); FHOut.outword(SEG.cmdsize);
FHOut.outstring(SEG.segname, 16); FHOut.outstring(SEG.segname, 16);
@ -538,8 +263,8 @@ void MachOWriter::EmitHeaderAndLoadCommands() {
FHOut.outword(SEG.initprot); FHOut.outword(SEG.initprot);
FHOut.outword(SEG.nsects); FHOut.outword(SEG.nsects);
FHOut.outword(SEG.flags); FHOut.outword(SEG.flags);
// Step #5: Finish filling in the fields of the MachOSections // Step #5: Finish filling in the fields of the MachOSections
uint64_t currentAddr = 0; uint64_t currentAddr = 0;
for (std::vector<MachOSection*>::iterator I = SectionList.begin(), for (std::vector<MachOSection*>::iterator I = SectionList.begin(),
E = SectionList.end(); I != E; ++I) { E = SectionList.end(); I != E; ++I) {
@ -550,13 +275,13 @@ void MachOWriter::EmitHeaderAndLoadCommands() {
// FIXME: do we need to do something with alignment here? // FIXME: do we need to do something with alignment here?
currentAddr += MOS->size; currentAddr += MOS->size;
} }
// Step #6: Emit the symbol table to temporary buffers, so that we know the // Step #6: Emit the symbol table to temporary buffers, so that we know the
// size of the string table when we write the next load command. This also // size of the string table when we write the next load command. This also
// sorts and assigns indices to each of the symbols, which is necessary for // sorts and assigns indices to each of the symbols, which is necessary for
// emitting relocations to externally-defined objects. // emitting relocations to externally-defined objects.
BufferSymbolAndStringTable(); BufferSymbolAndStringTable();
// Step #7: Calculate the number of relocations for each section and write out // Step #7: Calculate the number of relocations for each section and write out
// the section commands for each section // the section commands for each section
currentAddr += SEG.fileoff; currentAddr += SEG.fileoff;
@ -568,7 +293,7 @@ void MachOWriter::EmitHeaderAndLoadCommands() {
CalculateRelocations(*MOS); CalculateRelocations(*MOS);
MOS->reloff = MOS->nreloc ? currentAddr : 0; MOS->reloff = MOS->nreloc ? currentAddr : 0;
currentAddr += MOS->nreloc * 8; currentAddr += MOS->nreloc * 8;
// write the finalized section command to the output buffer // write the finalized section command to the output buffer
FHOut.outstring(MOS->sectname, 16); FHOut.outstring(MOS->sectname, 16);
FHOut.outstring(MOS->segname, 16); FHOut.outstring(MOS->segname, 16);
@ -584,7 +309,7 @@ void MachOWriter::EmitHeaderAndLoadCommands() {
if (is64Bit) if (is64Bit)
FHOut.outword(MOS->reserved3); FHOut.outword(MOS->reserved3);
} }
// Step #8: Emit LC_SYMTAB/LC_DYSYMTAB load commands // Step #8: Emit LC_SYMTAB/LC_DYSYMTAB load commands
SymTab.symoff = currentAddr; SymTab.symoff = currentAddr;
SymTab.nsyms = SymbolTable.size(); SymTab.nsyms = SymbolTable.size();
@ -620,94 +345,92 @@ void MachOWriter::EmitHeaderAndLoadCommands() {
FHOut.outword(DySymTab.nextrel); FHOut.outword(DySymTab.nextrel);
FHOut.outword(DySymTab.locreloff); FHOut.outword(DySymTab.locreloff);
FHOut.outword(DySymTab.nlocrel); FHOut.outword(DySymTab.nlocrel);
O.write((char*)&FH[0], FH.size()); O.write((char*)&FH[0], FH.size());
} }
/// EmitSections - Now that we have constructed the file header and load /// EmitSections - Now that we have constructed the file header and load
/// commands, emit the data for each section to the file. /// commands, emit the data for each section to the file.
void MachOWriter::EmitSections() { void MachOWriter::EmitSections() {
for (std::vector<MachOSection*>::iterator I = SectionList.begin(), for (std::vector<MachOSection*>::iterator I = SectionList.begin(),
E = SectionList.end(); I != E; ++I) E = SectionList.end(); I != E; ++I)
// Emit the contents of each section // Emit the contents of each section
O.write((char*)&(*I)->SectionData[0], (*I)->size); O.write((char*)&(*I)->SectionData[0], (*I)->size);
}
void MachOWriter::EmitRelocations() {
for (std::vector<MachOSection*>::iterator I = SectionList.begin(), for (std::vector<MachOSection*>::iterator I = SectionList.begin(),
E = SectionList.end(); I != E; ++I) E = SectionList.end(); I != E; ++I)
// Emit the relocation entry data for each section. // Emit the relocation entry data for each section.
O.write((char*)&(*I)->RelocBuffer[0], (*I)->RelocBuffer.size()); O.write((char*)&(*I)->RelocBuffer[0], (*I)->RelocBuffer.size());
} }
/// PartitionByLocal - Simple boolean predicate that returns true if Sym is
/// a local symbol rather than an external symbol.
bool MachOWriter::PartitionByLocal(const MachOSym &Sym) {
return (Sym.n_type & (MachOSym::N_EXT | MachOSym::N_PEXT)) == 0;
}
/// PartitionByDefined - Simple boolean predicate that returns true if Sym is
/// defined in this module.
bool MachOWriter::PartitionByDefined(const MachOSym &Sym) {
// FIXME: Do N_ABS or N_INDR count as defined?
return (Sym.n_type & MachOSym::N_SECT) == MachOSym::N_SECT;
}
/// BufferSymbolAndStringTable - Sort the symbols we encountered and assign them /// BufferSymbolAndStringTable - Sort the symbols we encountered and assign them
/// each a string table index so that they appear in the correct order in the /// each a string table index so that they appear in the correct order in the
/// output file. /// output file.
void MachOWriter::BufferSymbolAndStringTable() { void MachOWriter::BufferSymbolAndStringTable() {
// The order of the symbol table is: // The order of the symbol table is:
// 1. local symbols // 1. local symbols
// 2. defined external symbols (sorted by name) // 2. defined external symbols (sorted by name)
// 3. undefined external symbols (sorted by name) // 3. undefined external symbols (sorted by name)
// Before sorting the symbols, check the PendingGlobals for any undefined // Before sorting the symbols, check the PendingGlobals for any undefined
// globals that need to be put in the symbol table. // globals that need to be put in the symbol table.
for (std::vector<GlobalValue*>::iterator I = PendingGlobals.begin(), for (std::vector<GlobalValue*>::iterator I = PendingGlobals.begin(),
E = PendingGlobals.end(); I != E; ++I) { E = PendingGlobals.end(); I != E; ++I) {
if (GVOffset[*I] == 0 && GVSection[*I] == 0) { if (GVOffset[*I] == 0 && GVSection[*I] == 0) {
MachOSym UndfSym(*I, Mang->getValueName(*I), MachOSym::NO_SECT, TM); MachOSym UndfSym(*I, Mang->getValueName(*I), MachOSym::NO_SECT, TAI);
SymbolTable.push_back(UndfSym); SymbolTable.push_back(UndfSym);
GVOffset[*I] = -1; GVOffset[*I] = -1;
} }
} }
// Sort the symbols by name, so that when we partition the symbols by scope // Sort the symbols by name, so that when we partition the symbols by scope
// of definition, we won't have to sort by name within each partition. // of definition, we won't have to sort by name within each partition.
std::sort(SymbolTable.begin(), SymbolTable.end(), MachOSymCmp());
// Parition the symbol table entries so that all local symbols come before std::sort(SymbolTable.begin(), SymbolTable.end(), MachOSym::SymCmp());
// Parition the symbol table entries so that all local symbols come before
// all symbols with external linkage. { 1 | 2 3 } // all symbols with external linkage. { 1 | 2 3 }
std::partition(SymbolTable.begin(), SymbolTable.end(), PartitionByLocal);
std::partition(SymbolTable.begin(), SymbolTable.end(),
MachOSym::PartitionByLocal);
// Advance iterator to beginning of external symbols and partition so that // Advance iterator to beginning of external symbols and partition so that
// all external symbols defined in this module come before all external // all external symbols defined in this module come before all external
// symbols defined elsewhere. { 1 | 2 | 3 } // symbols defined elsewhere. { 1 | 2 | 3 }
for (std::vector<MachOSym>::iterator I = SymbolTable.begin(), for (std::vector<MachOSym>::iterator I = SymbolTable.begin(),
E = SymbolTable.end(); I != E; ++I) { E = SymbolTable.end(); I != E; ++I) {
if (!PartitionByLocal(*I)) { if (!MachOSym::PartitionByLocal(*I)) {
std::partition(I, E, PartitionByDefined); std::partition(I, E, MachOSym::PartitionByDefined);
break; break;
} }
} }
// Calculate the starting index for each of the local, extern defined, and // Calculate the starting index for each of the local, extern defined, and
// undefined symbols, as well as the number of each to put in the LC_DYSYMTAB // undefined symbols, as well as the number of each to put in the LC_DYSYMTAB
// load command. // load command.
for (std::vector<MachOSym>::iterator I = SymbolTable.begin(), for (std::vector<MachOSym>::iterator I = SymbolTable.begin(),
E = SymbolTable.end(); I != E; ++I) { E = SymbolTable.end(); I != E; ++I) {
if (PartitionByLocal(*I)) { if (MachOSym::PartitionByLocal(*I)) {
++DySymTab.nlocalsym; ++DySymTab.nlocalsym;
++DySymTab.iextdefsym; ++DySymTab.iextdefsym;
++DySymTab.iundefsym; ++DySymTab.iundefsym;
} else if (PartitionByDefined(*I)) { } else if (MachOSym::PartitionByDefined(*I)) {
++DySymTab.nextdefsym; ++DySymTab.nextdefsym;
++DySymTab.iundefsym; ++DySymTab.iundefsym;
} else { } else {
++DySymTab.nundefsym; ++DySymTab.nundefsym;
} }
} }
// Write out a leading zero byte when emitting string table, for n_strx == 0 // Write out a leading zero byte when emitting string table, for n_strx == 0
// which means an empty string. // which means an empty string.
OutputBuffer StrTOut(StrT, is64Bit, isLittleEndian); OutputBuffer StrTOut(StrT, is64Bit, isLittleEndian);
StrTOut.outbyte(0); StrTOut.outbyte(0);
@ -716,6 +439,7 @@ void MachOWriter::BufferSymbolAndStringTable() {
// 2. strings for local symbols // 2. strings for local symbols
// Since this is the opposite order from the symbol table, which we have just // Since this is the opposite order from the symbol table, which we have just
// sorted, we can walk the symbol table backwards to output the string table. // sorted, we can walk the symbol table backwards to output the string table.
for (std::vector<MachOSym>::reverse_iterator I = SymbolTable.rbegin(), for (std::vector<MachOSym>::reverse_iterator I = SymbolTable.rbegin(),
E = SymbolTable.rend(); I != E; ++I) { E = SymbolTable.rend(); I != E; ++I) {
if (I->GVName == "") { if (I->GVName == "") {
@ -739,7 +463,7 @@ void MachOWriter::BufferSymbolAndStringTable() {
I->n_value += GVSection[GV]->addr; I->n_value += GVSection[GV]->addr;
if (GV && (GVOffset[GV] == -1)) if (GV && (GVOffset[GV] == -1))
GVOffset[GV] = index; GVOffset[GV] = index;
// Emit nlist to buffer // Emit nlist to buffer
SymTOut.outword(I->n_strx); SymTOut.outword(I->n_strx);
SymTOut.outbyte(I->n_type); SymTOut.outbyte(I->n_type);
@ -754,6 +478,7 @@ void MachOWriter::BufferSymbolAndStringTable() {
/// and the offset into that section. From this information, create the /// and the offset into that section. From this information, create the
/// appropriate target-specific MachORelocation type and add buffer it to be /// appropriate target-specific MachORelocation type and add buffer it to be
/// written out after we are finished writing out sections. /// written out after we are finished writing out sections.
void MachOWriter::CalculateRelocations(MachOSection &MOS) { void MachOWriter::CalculateRelocations(MachOSection &MOS) {
for (unsigned i = 0, e = MOS.Relocations.size(); i != e; ++i) { for (unsigned i = 0, e = MOS.Relocations.size(); i != e; ++i) {
MachineRelocation &MR = MOS.Relocations[i]; MachineRelocation &MR = MOS.Relocations[i];
@ -763,19 +488,22 @@ void MachOWriter::CalculateRelocations(MachOSection &MOS) {
// This is a scattered relocation entry if it points to a global value with // This is a scattered relocation entry if it points to a global value with
// a non-zero offset. // a non-zero offset.
bool Scattered = false; bool Scattered = false;
bool Extern = false; bool Extern = false;
// Since we may not have seen the GlobalValue we were interested in yet at // Since we may not have seen the GlobalValue we were interested in yet at
// the time we emitted the relocation for it, fix it up now so that it // the time we emitted the relocation for it, fix it up now so that it
// points to the offset into the correct section. // points to the offset into the correct section.
if (MR.isGlobalValue()) { if (MR.isGlobalValue()) {
GlobalValue *GV = MR.getGlobalValue(); GlobalValue *GV = MR.getGlobalValue();
MachOSection *MOSPtr = GVSection[GV]; MachOSection *MOSPtr = GVSection[GV];
intptr_t Offset = GVOffset[GV]; intptr_t Offset = GVOffset[GV];
// If we have never seen the global before, it must be to a symbol // If we have never seen the global before, it must be to a symbol
// defined in another module (N_UNDF). // defined in another module (N_UNDF).
if (!MOSPtr) { if (!MOSPtr) {
// FIXME: need to append stub suffix // FIXME: need to append stub suffix
Extern = true; Extern = true;
@ -787,9 +515,10 @@ void MachOWriter::CalculateRelocations(MachOSection &MOS) {
} }
MR.setResultPointer((void*)Offset); MR.setResultPointer((void*)Offset);
} }
// If the symbol is locally defined, pass in the address of the section and // If the symbol is locally defined, pass in the address of the section and
// the section index to the code which will generate the target relocation. // the section index to the code which will generate the target relocation.
if (!Extern) { if (!Extern) {
MachOSection &To = *SectionList[TargetSection - 1]; MachOSection &To = *SectionList[TargetSection - 1];
TargetAddr = To.addr; TargetAddr = To.addr;
@ -798,7 +527,7 @@ void MachOWriter::CalculateRelocations(MachOSection &MOS) {
OutputBuffer RelocOut(MOS.RelocBuffer, is64Bit, isLittleEndian); OutputBuffer RelocOut(MOS.RelocBuffer, is64Bit, isLittleEndian);
OutputBuffer SecOut(MOS.SectionData, is64Bit, isLittleEndian); OutputBuffer SecOut(MOS.SectionData, is64Bit, isLittleEndian);
MOS.nreloc += GetTargetRelocation(MR, MOS.Index, TargetAddr, TargetIndex, MOS.nreloc += GetTargetRelocation(MR, MOS.Index, TargetAddr, TargetIndex,
RelocOut, SecOut, Scattered, Extern); RelocOut, SecOut, Scattered, Extern);
} }
@ -806,21 +535,22 @@ void MachOWriter::CalculateRelocations(MachOSection &MOS) {
// InitMem - Write the value of a Constant to the specified memory location, // InitMem - Write the value of a Constant to the specified memory location,
// converting it into bytes and relocations. // converting it into bytes and relocations.
void MachOWriter::InitMem(const Constant *C, void *Addr, intptr_t Offset, void MachOWriter::InitMem(const Constant *C, void *Addr, intptr_t Offset,
const TargetData *TD, const TargetData *TD,
std::vector<MachineRelocation> &MRs) { std::vector<MachineRelocation> &MRs) {
typedef std::pair<const Constant*, intptr_t> CPair; typedef std::pair<const Constant*, intptr_t> CPair;
std::vector<CPair> WorkList; std::vector<CPair> WorkList;
WorkList.push_back(CPair(C,(intptr_t)Addr + Offset)); WorkList.push_back(CPair(C,(intptr_t)Addr + Offset));
intptr_t ScatteredOffset = 0; intptr_t ScatteredOffset = 0;
while (!WorkList.empty()) { while (!WorkList.empty()) {
const Constant *PC = WorkList.back().first; const Constant *PC = WorkList.back().first;
intptr_t PA = WorkList.back().second; intptr_t PA = WorkList.back().second;
WorkList.pop_back(); WorkList.pop_back();
if (isa<UndefValue>(PC)) { if (isa<UndefValue>(PC)) {
continue; continue;
} else if (const ConstantVector *CP = dyn_cast<ConstantVector>(PC)) { } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(PC)) {
@ -847,7 +577,7 @@ void MachOWriter::InitMem(const Constant *C, void *Addr, intptr_t Offset,
break; break;
} }
} else if (PC->getType()->isSingleValueType()) { } else if (PC->getType()->isSingleValueType()) {
uint8_t *ptr = (uint8_t *)PA; unsigned char *ptr = (unsigned char *)PA;
switch (PC->getType()->getTypeID()) { switch (PC->getType()->getTypeID()) {
case Type::IntegerTyID: { case Type::IntegerTyID: {
unsigned NumBits = cast<IntegerType>(PC->getType())->getBitWidth(); unsigned NumBits = cast<IntegerType>(PC->getType())->getBitWidth();
@ -945,13 +675,15 @@ void MachOWriter::InitMem(const Constant *C, void *Addr, intptr_t Offset,
} }
} }
//===----------------------------------------------------------------------===//
// MachOSym Implementation
//===----------------------------------------------------------------------===//
MachOSym::MachOSym(const GlobalValue *gv, std::string name, uint8_t sect, MachOSym::MachOSym(const GlobalValue *gv, std::string name, uint8_t sect,
TargetMachine &TM) : const TargetAsmInfo *TAI) :
GV(gv), n_strx(0), n_type(sect == NO_SECT ? N_UNDF : N_SECT), n_sect(sect), GV(gv), n_strx(0), n_type(sect == NO_SECT ? N_UNDF : N_SECT), n_sect(sect),
n_desc(0), n_value(0) { n_desc(0), n_value(0) {
const TargetAsmInfo *TAI = TM.getTargetAsmInfo();
switch (GV->getLinkage()) { switch (GV->getLinkage()) {
default: default:
assert(0 && "Unexpected linkage type!"); assert(0 && "Unexpected linkage type!");
@ -974,3 +706,6 @@ MachOSym::MachOSym(const GlobalValue *gv, std::string name, uint8_t sect,
break; break;
} }
} }
} // end namespace llvm

View File

@ -14,10 +14,8 @@
#ifndef MACHOWRITER_H #ifndef MACHOWRITER_H
#define MACHOWRITER_H #define MACHOWRITER_H
#include "llvm/Constants.h" #include "MachO.h"
#include "llvm/DerivedTypes.h"
#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRelocation.h"
#include "llvm/Target/TargetData.h" #include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetMachOWriterInfo.h" #include "llvm/Target/TargetMachOWriterInfo.h"
@ -31,53 +29,6 @@ namespace llvm {
class OutputBuffer; class OutputBuffer;
class raw_ostream; class raw_ostream;
/// MachOSym - This struct contains information about each symbol that is
/// added to logical symbol table for the module. This is eventually
/// turned into a real symbol table in the file.
struct MachOSym {
const GlobalValue *GV; // The global value this corresponds to.
std::string GVName; // The mangled name of the global value.
uint32_t n_strx; // index into the string table
uint8_t n_type; // type flag
uint8_t n_sect; // section number or NO_SECT
int16_t n_desc; // see <mach-o/stab.h>
uint64_t n_value; // value for this symbol (or stab offset)
// Constants for the n_sect field
// see <mach-o/nlist.h>
enum { NO_SECT = 0 }; // symbol is not in any section
// Constants for the n_type field
// see <mach-o/nlist.h>
enum { N_UNDF = 0x0, // undefined, n_sect == NO_SECT
N_ABS = 0x2, // absolute, n_sect == NO_SECT
N_SECT = 0xe, // defined in section number n_sect
N_PBUD = 0xc, // prebound undefined (defined in a dylib)
N_INDR = 0xa // indirect
};
// The following bits are OR'd into the types above. For example, a type
// of 0x0f would be an external N_SECT symbol (0x0e | 0x01).
enum { N_EXT = 0x01, // external symbol bit
N_PEXT = 0x10 // private external symbol bit
};
// Constants for the n_desc field
// see <mach-o/loader.h>
enum { REFERENCE_FLAG_UNDEFINED_NON_LAZY = 0,
REFERENCE_FLAG_UNDEFINED_LAZY = 1,
REFERENCE_FLAG_DEFINED = 2,
REFERENCE_FLAG_PRIVATE_DEFINED = 3,
REFERENCE_FLAG_PRIVATE_UNDEFINED_NON_LAZY = 4,
REFERENCE_FLAG_PRIVATE_UNDEFINED_LAZY = 5
};
enum { N_NO_DEAD_STRIP = 0x0020, // symbol is not to be dead stripped
N_WEAK_REF = 0x0040, // symbol is weak referenced
N_WEAK_DEF = 0x0080 // coalesced symbol is a weak definition
};
MachOSym(const GlobalValue *gv, std::string name, uint8_t sect,
TargetMachine &TM);
};
/// MachOWriter - This class implements the common target-independent code for /// MachOWriter - This class implements the common target-independent code for
/// writing Mach-O files. Targets should derive a class from this to /// writing Mach-O files. Targets should derive a class from this to
@ -98,7 +49,6 @@ namespace llvm {
return "Mach-O Writer"; return "Mach-O Writer";
} }
typedef std::vector<uint8_t> DataBuffer;
protected: protected:
/// Output stream to send the resultant object file to. /// Output stream to send the resultant object file to.
/// ///
@ -114,326 +64,61 @@ namespace llvm {
/// MCE - The MachineCodeEmitter object that we are exposing to emit machine /// MCE - The MachineCodeEmitter object that we are exposing to emit machine
/// code for functions to the .o file. /// code for functions to the .o file.
MachOCodeEmitter *MCE; MachOCodeEmitter *MCE;
/// is64Bit/isLittleEndian - This information is inferred from the target /// is64Bit/isLittleEndian - This information is inferred from the target
/// machine directly, indicating what header values and flags to set. /// machine directly, indicating what header values and flags to set.
bool is64Bit, isLittleEndian; bool is64Bit, isLittleEndian;
// Target Asm Info
const TargetAsmInfo *TAI;
/// Header - An instance of MachOHeader that we will update while we build
/// the file, and then emit during finalization.
MachOHeader Header;
/// doInitialization - Emit the file header and all of the global variables /// doInitialization - Emit the file header and all of the global variables
/// for the module to the Mach-O file. /// for the module to the Mach-O file.
bool doInitialization(Module &M); bool doInitialization(Module &M);
bool runOnMachineFunction(MachineFunction &MF); bool runOnMachineFunction(MachineFunction &MF);
/// doFinalization - Now that the module has been completely processed, emit /// doFinalization - Now that the module has been completely processed, emit
/// the Mach-O file to 'O'. /// the Mach-O file to 'O'.
bool doFinalization(Module &M); bool doFinalization(Module &M);
/// MachOHeader - This struct contains the header information about a
/// specific architecture type/subtype pair that is emitted to the file.
struct MachOHeader {
uint32_t magic; // mach magic number identifier
uint32_t filetype; // type of file
uint32_t ncmds; // number of load commands
uint32_t sizeofcmds; // the size of all the load commands
uint32_t flags; // flags
uint32_t reserved; // 64-bit only
/// HeaderData - The actual data for the header which we are building
/// up for emission to the file.
DataBuffer HeaderData;
// Constants for the filetype field
// see <mach-o/loader.h> for additional info on the various types
enum { MH_OBJECT = 1, // relocatable object file
MH_EXECUTE = 2, // demand paged executable file
MH_FVMLIB = 3, // fixed VM shared library file
MH_CORE = 4, // core file
MH_PRELOAD = 5, // preloaded executable file
MH_DYLIB = 6, // dynamically bound shared library
MH_DYLINKER = 7, // dynamic link editor
MH_BUNDLE = 8, // dynamically bound bundle file
MH_DYLIB_STUB = 9, // shared library stub for static linking only
MH_DSYM = 10 // companion file wiht only debug sections
};
// Constants for the flags field
enum { MH_NOUNDEFS = 1 << 0,
// the object file has no undefined references
MH_INCRLINK = 1 << 1,
// the object file is the output of an incremental link against
// a base file and cannot be link edited again
MH_DYLDLINK = 1 << 2,
// the object file is input for the dynamic linker and cannot be
// statically link edited again.
MH_BINDATLOAD = 1 << 3,
// the object file's undefined references are bound by the
// dynamic linker when loaded.
MH_PREBOUND = 1 << 4,
// the file has its dynamic undefined references prebound
MH_SPLIT_SEGS = 1 << 5,
// the file has its read-only and read-write segments split
// see <mach/shared_memory_server.h>
MH_LAZY_INIT = 1 << 6,
// the shared library init routine is to be run lazily via
// catching memory faults to its writable segments (obsolete)
MH_TWOLEVEL = 1 << 7,
// the image is using two-level namespace bindings
MH_FORCE_FLAT = 1 << 8,
// the executable is forcing all images to use flat namespace
// bindings.
MH_NOMULTIDEFS = 1 << 8,
// this umbrella guarantees no multiple definitions of symbols
// in its sub-images so the two-level namespace hints can
// always be used.
MH_NOFIXPREBINDING = 1 << 10,
// do not have dyld notify the prebidning agent about this
// executable.
MH_PREBINDABLE = 1 << 11,
// the binary is not prebound but can have its prebinding
// redone. only used when MH_PREBOUND is not set.
MH_ALLMODSBOUND = 1 << 12,
// indicates that this binary binds to all two-level namespace
// modules of its dependent libraries. Only used when
// MH_PREBINDABLE and MH_TWOLEVEL are both set.
MH_SUBSECTIONS_VIA_SYMBOLS = 1 << 13,
// safe to divide up the sections into sub-sections via symbols
// for dead code stripping.
MH_CANONICAL = 1 << 14,
// the binary has been canonicalized via the unprebind operation
MH_WEAK_DEFINES = 1 << 15,
// the final linked image contains external weak symbols
MH_BINDS_TO_WEAK = 1 << 16,
// the final linked image uses weak symbols
MH_ALLOW_STACK_EXECUTION = 1 << 17
// When this bit is set, all stacks in the task will be given
// stack execution privilege. Only used in MH_EXECUTE filetype
};
MachOHeader() : magic(0), filetype(0), ncmds(0), sizeofcmds(0), flags(0),
reserved(0) { }
/// cmdSize - This routine returns the size of the MachOSection as written
/// to disk, depending on whether the destination is a 64 bit Mach-O file.
unsigned cmdSize(bool is64Bit) const {
if (is64Bit)
return 8 * sizeof(uint32_t);
else
return 7 * sizeof(uint32_t);
}
/// setMagic - This routine sets the appropriate value for the 'magic'
/// field based on pointer size and endianness.
void setMagic(bool isLittleEndian, bool is64Bit) {
if (isLittleEndian)
if (is64Bit) magic = 0xcffaedfe;
else magic = 0xcefaedfe;
else
if (is64Bit) magic = 0xfeedfacf;
else magic = 0xfeedface;
}
};
/// Header - An instance of MachOHeader that we will update while we build
/// the file, and then emit during finalization.
MachOHeader Header;
/// MachOSegment - This struct contains the necessary information to
/// emit the load commands for each section in the file.
struct MachOSegment {
uint32_t cmd; // LC_SEGMENT or LC_SEGMENT_64
uint32_t cmdsize; // Total size of this struct and section commands
std::string segname; // segment name
uint64_t vmaddr; // address of this segment
uint64_t vmsize; // size of this segment, may be larger than filesize
uint64_t fileoff; // offset in file
uint64_t filesize; // amount to read from file
uint32_t maxprot; // maximum VM protection
uint32_t initprot; // initial VM protection
uint32_t nsects; // number of sections in this segment
uint32_t flags; // flags
// The following constants are getting pulled in by one of the
// system headers, which creates a neat clash with the enum.
#if !defined(VM_PROT_NONE)
#define VM_PROT_NONE 0x00
#endif
#if !defined(VM_PROT_READ)
#define VM_PROT_READ 0x01
#endif
#if !defined(VM_PROT_WRITE)
#define VM_PROT_WRITE 0x02
#endif
#if !defined(VM_PROT_EXECUTE)
#define VM_PROT_EXECUTE 0x04
#endif
#if !defined(VM_PROT_ALL)
#define VM_PROT_ALL 0x07
#endif
// Constants for the vm protection fields
// see <mach-o/vm_prot.h>
enum { SEG_VM_PROT_NONE = VM_PROT_NONE,
SEG_VM_PROT_READ = VM_PROT_READ, // read permission
SEG_VM_PROT_WRITE = VM_PROT_WRITE, // write permission
SEG_VM_PROT_EXECUTE = VM_PROT_EXECUTE,
SEG_VM_PROT_ALL = VM_PROT_ALL
};
// Constants for the cmd field
// see <mach-o/loader.h>
enum { LC_SEGMENT = 0x01, // segment of this file to be mapped
LC_SEGMENT_64 = 0x19 // 64-bit segment of this file to be mapped
};
/// cmdSize - This routine returns the size of the MachOSection as written
/// to disk, depending on whether the destination is a 64 bit Mach-O file.
unsigned cmdSize(bool is64Bit) const {
if (is64Bit)
return 6 * sizeof(uint32_t) + 4 * sizeof(uint64_t) + 16;
else
return 10 * sizeof(uint32_t) + 16; // addresses only 32 bits
}
MachOSegment(const std::string &seg, bool is64Bit)
: cmd(is64Bit ? LC_SEGMENT_64 : LC_SEGMENT), cmdsize(0), segname(seg),
vmaddr(0), vmsize(0), fileoff(0), filesize(0), maxprot(VM_PROT_ALL),
initprot(VM_PROT_ALL), nsects(0), flags(0) { }
};
/// MachOSection - This struct contains information about each section in a
/// particular segment that is emitted to the file. This is eventually
/// turned into the SectionCommand in the load command for a particlar
/// segment.
struct MachOSection {
std::string sectname; // name of this section,
std::string segname; // segment this section goes in
uint64_t addr; // memory address of this section
uint64_t size; // size in bytes of this section
uint32_t offset; // file offset of this section
uint32_t align; // section alignment (power of 2)
uint32_t reloff; // file offset of relocation entries
uint32_t nreloc; // number of relocation entries
uint32_t flags; // flags (section type and attributes)
uint32_t reserved1; // reserved (for offset or index)
uint32_t reserved2; // reserved (for count or sizeof)
uint32_t reserved3; // reserved (64 bit only)
/// A unique number for this section, which will be used to match symbols
/// to the correct section.
uint32_t Index;
/// SectionData - The actual data for this section which we are building
/// up for emission to the file.
DataBuffer SectionData;
/// RelocBuffer - A buffer to hold the mach-o relocations before we write
/// them out at the appropriate location in the file.
DataBuffer RelocBuffer;
/// Relocations - The relocations that we have encountered so far in this
/// section that we will need to convert to MachORelocation entries when
/// the file is written.
std::vector<MachineRelocation> Relocations;
// Constants for the section types (low 8 bits of flags field)
// see <mach-o/loader.h>
enum { S_REGULAR = 0,
// regular section
S_ZEROFILL = 1,
// zero fill on demand section
S_CSTRING_LITERALS = 2,
// section with only literal C strings
S_4BYTE_LITERALS = 3,
// section with only 4 byte literals
S_8BYTE_LITERALS = 4,
// section with only 8 byte literals
S_LITERAL_POINTERS = 5,
// section with only pointers to literals
S_NON_LAZY_SYMBOL_POINTERS = 6,
// section with only non-lazy symbol pointers
S_LAZY_SYMBOL_POINTERS = 7,
// section with only lazy symbol pointers
S_SYMBOL_STUBS = 8,
// section with only symbol stubs
// byte size of stub in the reserved2 field
S_MOD_INIT_FUNC_POINTERS = 9,
// section with only function pointers for initialization
S_MOD_TERM_FUNC_POINTERS = 10,
// section with only function pointers for termination
S_COALESCED = 11,
// section contains symbols that are coalesced
S_GB_ZEROFILL = 12,
// zero fill on demand section (that can be larger than 4GB)
S_INTERPOSING = 13,
// section with only pairs of function pointers for interposing
S_16BYTE_LITERALS = 14
// section with only 16 byte literals
};
// Constants for the section flags (high 24 bits of flags field)
// see <mach-o/loader.h>
enum { S_ATTR_PURE_INSTRUCTIONS = 1 << 31,
// section contains only true machine instructions
S_ATTR_NO_TOC = 1 << 30,
// section contains coalesced symbols that are not to be in a
// ranlib table of contents
S_ATTR_STRIP_STATIC_SYMS = 1 << 29,
// ok to strip static symbols in this section in files with the
// MY_DYLDLINK flag
S_ATTR_NO_DEAD_STRIP = 1 << 28,
// no dead stripping
S_ATTR_LIVE_SUPPORT = 1 << 27,
// blocks are live if they reference live blocks
S_ATTR_SELF_MODIFYING_CODE = 1 << 26,
// used with i386 code stubs written on by dyld
S_ATTR_DEBUG = 1 << 25,
// a debug section
S_ATTR_SOME_INSTRUCTIONS = 1 << 10,
// section contains some machine instructions
S_ATTR_EXT_RELOC = 1 << 9,
// section has external relocation entries
S_ATTR_LOC_RELOC = 1 << 8
// section has local relocation entries
};
/// cmdSize - This routine returns the size of the MachOSection as written
/// to disk, depending on whether the destination is a 64 bit Mach-O file.
unsigned cmdSize(bool is64Bit) const {
if (is64Bit)
return 7 * sizeof(uint32_t) + 2 * sizeof(uint64_t) + 32;
else
return 9 * sizeof(uint32_t) + 32; // addresses only 32 bits
}
MachOSection(const std::string &seg, const std::string &sect)
: sectname(sect), segname(seg), addr(0), size(0), offset(0), align(2),
reloff(0), nreloc(0), flags(0), reserved1(0), reserved2(0),
reserved3(0) { }
};
private: private:
/// SectionList - This is the list of sections that we have emitted to the /// SectionList - This is the list of sections that we have emitted to the
/// file. Once the file has been completely built, the segment load command /// file. Once the file has been completely built, the segment load command
/// SectionCommands are constructed from this info. /// SectionCommands are constructed from this info.
std::vector<MachOSection*> SectionList; std::vector<MachOSection*> SectionList;
/// SectionLookup - This is a mapping from section name to SectionList entry /// SectionLookup - This is a mapping from section name to SectionList entry
std::map<std::string, MachOSection*> SectionLookup; std::map<std::string, MachOSection*> SectionLookup;
/// GVSection - This is a mapping from a GlobalValue to a MachOSection, /// GVSection - This is a mapping from a GlobalValue to a MachOSection,
/// to aid in emitting relocations. /// to aid in emitting relocations.
std::map<GlobalValue*, MachOSection*> GVSection; std::map<GlobalValue*, MachOSection*> GVSection;
/// GVOffset - This is a mapping from a GlobalValue to an offset from the /// GVOffset - This is a mapping from a GlobalValue to an offset from the
/// start of the section in which the GV resides, to aid in emitting /// start of the section in which the GV resides, to aid in emitting
/// relocations. /// relocations.
std::map<GlobalValue*, intptr_t> GVOffset; std::map<GlobalValue*, intptr_t> GVOffset;
/// getSection - Return the section with the specified name, creating a new /// getSection - Return the section with the specified name, creating a new
/// section if one does not already exist. /// section if one does not already exist.
MachOSection *getSection(const std::string &seg, const std::string &sect, MachOSection *getSection(const std::string &seg, const std::string &sect,
unsigned Flags = 0) { unsigned Flags = 0) {
MachOSection *MOS = SectionLookup[seg+sect]; MachOSection *MOS = SectionLookup[seg+sect];
@ -511,63 +196,11 @@ namespace llvm {
nsyms(0), stroff(0), strsize(0) { } nsyms(0), stroff(0), strsize(0) { }
}; };
/// MachOSymTab - This struct contains information about the offsets and
/// size of symbol table information.
/// segment.
struct MachODySymTab {
uint32_t cmd; // LC_DYSYMTAB
uint32_t cmdsize; // sizeof( MachODySymTab )
uint32_t ilocalsym; // index to local symbols
uint32_t nlocalsym; // number of local symbols
uint32_t iextdefsym; // index to externally defined symbols
uint32_t nextdefsym; // number of externally defined symbols
uint32_t iundefsym; // index to undefined symbols
uint32_t nundefsym; // number of undefined symbols
uint32_t tocoff; // file offset to table of contents
uint32_t ntoc; // number of entries in table of contents
uint32_t modtaboff; // file offset to module table
uint32_t nmodtab; // number of module table entries
uint32_t extrefsymoff; // offset to referenced symbol table
uint32_t nextrefsyms; // number of referenced symbol table entries
uint32_t indirectsymoff; // file offset to the indirect symbol table
uint32_t nindirectsyms; // number of indirect symbol table entries
uint32_t extreloff; // offset to external relocation entries
uint32_t nextrel; // number of external relocation entries
uint32_t locreloff; // offset to local relocation entries
uint32_t nlocrel; // number of local relocation entries
// Constants for the cmd field
// see <mach-o/loader.h>
enum { LC_DYSYMTAB = 0x0B // dynamic link-edit symbol table info
};
MachODySymTab() : cmd(LC_DYSYMTAB), cmdsize(20 * sizeof(uint32_t)),
ilocalsym(0), nlocalsym(0), iextdefsym(0), nextdefsym(0),
iundefsym(0), nundefsym(0), tocoff(0), ntoc(0), modtaboff(0),
nmodtab(0), extrefsymoff(0), nextrefsyms(0), indirectsymoff(0),
nindirectsyms(0), extreloff(0), nextrel(0), locreloff(0), nlocrel(0) { }
};
/// SymTab - The "stab" style symbol table information /// SymTab - The "stab" style symbol table information
MachOSymTab SymTab; MachOSymTab SymTab;
/// DySymTab - symbol table info for the dynamic link editor /// DySymTab - symbol table info for the dynamic link editor
MachODySymTab DySymTab; MachODySymTab DySymTab;
struct MachOSymCmp {
// FIXME: this does not appear to be sorting 'f' after 'F'
bool operator()(const MachOSym &LHS, const MachOSym &RHS) {
return LHS.GVName < RHS.GVName;
}
};
/// PartitionByLocal - Simple boolean predicate that returns true if Sym is
/// a local symbol rather than an external symbol.
static bool PartitionByLocal(const MachOSym &Sym);
/// PartitionByDefined - Simple boolean predicate that returns true if Sym
/// is defined in this module.
static bool PartitionByDefined(const MachOSym &Sym);
protected: protected:
/// SymbolTable - This is the list of symbols we have emitted to the file. /// SymbolTable - This is the list of symbols we have emitted to the file.
@ -601,6 +234,7 @@ namespace llvm {
void EmitGlobal(GlobalVariable *GV); void EmitGlobal(GlobalVariable *GV);
void EmitHeaderAndLoadCommands(); void EmitHeaderAndLoadCommands();
void EmitSections(); void EmitSections();
void EmitRelocations();
void BufferSymbolAndStringTable(); void BufferSymbolAndStringTable();
void CalculateRelocations(MachOSection &MOS); void CalculateRelocations(MachOSection &MOS);