mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-11-23 22:00:10 +00:00
376 lines
12 KiB
C++
376 lines
12 KiB
C++
//===- InputFiles.h ---------------------------------------------*- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef LLD_MACHO_INPUT_FILES_H
|
|
#define LLD_MACHO_INPUT_FILES_H
|
|
|
|
#include "MachOStructs.h"
|
|
#include "Target.h"
|
|
|
|
#include "lld/Common/DWARF.h"
|
|
#include "lld/Common/LLVM.h"
|
|
#include "lld/Common/Memory.h"
|
|
#include "llvm/ADT/CachedHashString.h"
|
|
#include "llvm/ADT/DenseSet.h"
|
|
#include "llvm/ADT/SetVector.h"
|
|
#include "llvm/BinaryFormat/MachO.h"
|
|
#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
|
|
#include "llvm/Object/Archive.h"
|
|
#include "llvm/Support/MemoryBuffer.h"
|
|
#include "llvm/Support/Threading.h"
|
|
#include "llvm/TextAPI/TextAPIReader.h"
|
|
|
|
#include <vector>
|
|
|
|
namespace llvm {
|
|
namespace lto {
|
|
class InputFile;
|
|
} // namespace lto
|
|
namespace MachO {
|
|
class InterfaceFile;
|
|
} // namespace MachO
|
|
class TarWriter;
|
|
} // namespace llvm
|
|
|
|
namespace lld {
|
|
namespace macho {
|
|
|
|
struct PlatformInfo;
|
|
class ConcatInputSection;
|
|
class Symbol;
|
|
class Defined;
|
|
class AliasSymbol;
|
|
struct Reloc;
|
|
enum class RefState : uint8_t;
|
|
|
|
// If --reproduce option is given, all input files are written
|
|
// to this tar archive.
|
|
extern std::unique_ptr<llvm::TarWriter> tar;
|
|
|
|
// If .subsections_via_symbols is set, each InputSection will be split along
|
|
// symbol boundaries. The field offset represents the offset of the subsection
|
|
// from the start of the original pre-split InputSection.
|
|
struct Subsection {
|
|
uint64_t offset = 0;
|
|
InputSection *isec = nullptr;
|
|
};
|
|
|
|
using Subsections = std::vector<Subsection>;
|
|
class InputFile;
|
|
|
|
class Section {
|
|
public:
|
|
InputFile *file;
|
|
StringRef segname;
|
|
StringRef name;
|
|
uint32_t flags;
|
|
uint64_t addr;
|
|
Subsections subsections;
|
|
|
|
Section(InputFile *file, StringRef segname, StringRef name, uint32_t flags,
|
|
uint64_t addr)
|
|
: file(file), segname(segname), name(name), flags(flags), addr(addr) {}
|
|
// Ensure pointers to Sections are never invalidated.
|
|
Section(const Section &) = delete;
|
|
Section &operator=(const Section &) = delete;
|
|
Section(Section &&) = delete;
|
|
Section &operator=(Section &&) = delete;
|
|
|
|
private:
|
|
// Whether we have already split this section into individual subsections.
|
|
// For sections that cannot be split (e.g. literal sections), this is always
|
|
// false.
|
|
bool doneSplitting = false;
|
|
friend class ObjFile;
|
|
};
|
|
|
|
// Represents a call graph profile edge.
|
|
struct CallGraphEntry {
|
|
// The index of the caller in the symbol table.
|
|
uint32_t fromIndex;
|
|
// The index of the callee in the symbol table.
|
|
uint32_t toIndex;
|
|
// Number of calls from callee to caller in the profile.
|
|
uint64_t count;
|
|
|
|
CallGraphEntry(uint32_t fromIndex, uint32_t toIndex, uint64_t count)
|
|
: fromIndex(fromIndex), toIndex(toIndex), count(count) {}
|
|
};
|
|
|
|
class InputFile {
|
|
public:
|
|
enum Kind {
|
|
ObjKind,
|
|
OpaqueKind,
|
|
DylibKind,
|
|
ArchiveKind,
|
|
BitcodeKind,
|
|
};
|
|
|
|
virtual ~InputFile() = default;
|
|
Kind kind() const { return fileKind; }
|
|
StringRef getName() const { return name; }
|
|
static void resetIdCount() { idCount = 0; }
|
|
|
|
MemoryBufferRef mb;
|
|
|
|
std::vector<Symbol *> symbols;
|
|
std::vector<Section *> sections;
|
|
ArrayRef<uint8_t> objCImageInfo;
|
|
|
|
// If not empty, this stores the name of the archive containing this file.
|
|
// We use this string for creating error messages.
|
|
std::string archiveName;
|
|
|
|
// Provides an easy way to sort InputFiles deterministically.
|
|
const int id;
|
|
|
|
// True if this is a lazy ObjFile or BitcodeFile.
|
|
bool lazy = false;
|
|
|
|
protected:
|
|
InputFile(Kind kind, MemoryBufferRef mb, bool lazy = false)
|
|
: mb(mb), id(idCount++), lazy(lazy), fileKind(kind),
|
|
name(mb.getBufferIdentifier()) {}
|
|
|
|
InputFile(Kind, const llvm::MachO::InterfaceFile &);
|
|
|
|
// If true, this input's arch is compatible with target.
|
|
bool compatArch = true;
|
|
|
|
private:
|
|
const Kind fileKind;
|
|
const StringRef name;
|
|
|
|
static int idCount;
|
|
};
|
|
|
|
struct FDE {
|
|
uint32_t funcLength;
|
|
Symbol *personality;
|
|
InputSection *lsda;
|
|
};
|
|
|
|
// .o file
|
|
class ObjFile final : public InputFile {
|
|
public:
|
|
ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
|
|
bool lazy = false, bool forceHidden = false, bool compatArch = true,
|
|
bool builtFromBitcode = false);
|
|
ArrayRef<llvm::MachO::data_in_code_entry> getDataInCode() const;
|
|
ArrayRef<uint8_t> getOptimizationHints() const;
|
|
template <class LP> void parse();
|
|
template <class LP>
|
|
void parseLinkerOptions(llvm::SmallVectorImpl<StringRef> &LinkerOptions);
|
|
|
|
static bool classof(const InputFile *f) { return f->kind() == ObjKind; }
|
|
|
|
std::string sourceFile() const;
|
|
// Parses line table information for diagnostics. compileUnit should be used
|
|
// for other purposes.
|
|
lld::DWARFCache *getDwarf();
|
|
|
|
llvm::DWARFUnit *compileUnit = nullptr;
|
|
std::unique_ptr<lld::DWARFCache> dwarfCache;
|
|
Section *addrSigSection = nullptr;
|
|
const uint32_t modTime;
|
|
bool forceHidden;
|
|
bool builtFromBitcode;
|
|
std::vector<ConcatInputSection *> debugSections;
|
|
std::vector<CallGraphEntry> callGraph;
|
|
llvm::DenseMap<ConcatInputSection *, FDE> fdes;
|
|
std::vector<AliasSymbol *> aliases;
|
|
|
|
private:
|
|
llvm::once_flag initDwarf;
|
|
template <class LP> void parseLazy();
|
|
template <class SectionHeader> void parseSections(ArrayRef<SectionHeader>);
|
|
template <class LP>
|
|
void parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
|
|
ArrayRef<typename LP::nlist> nList, const char *strtab,
|
|
bool subsectionsViaSymbols);
|
|
template <class NList>
|
|
Symbol *parseNonSectionSymbol(const NList &sym, const char *strtab);
|
|
template <class SectionHeader>
|
|
void parseRelocations(ArrayRef<SectionHeader> sectionHeaders,
|
|
const SectionHeader &, Section &);
|
|
void parseDebugInfo();
|
|
void splitEhFrames(ArrayRef<uint8_t> dataArr, Section &ehFrameSection);
|
|
void registerCompactUnwind(Section &compactUnwindSection);
|
|
void registerEhFrames(Section &ehFrameSection);
|
|
};
|
|
|
|
// command-line -sectcreate file
|
|
class OpaqueFile final : public InputFile {
|
|
public:
|
|
OpaqueFile(MemoryBufferRef mb, StringRef segName, StringRef sectName);
|
|
static bool classof(const InputFile *f) { return f->kind() == OpaqueKind; }
|
|
};
|
|
|
|
// .dylib or .tbd file
|
|
class DylibFile final : public InputFile {
|
|
public:
|
|
// Mach-O dylibs can re-export other dylibs as sub-libraries, meaning that the
|
|
// symbols in those sub-libraries will be available under the umbrella
|
|
// library's namespace. Those sub-libraries can also have their own
|
|
// re-exports. When loading a re-exported dylib, `umbrella` should be set to
|
|
// the root dylib to ensure symbols in the child library are correctly bound
|
|
// to the root. On the other hand, if a dylib is being directly loaded
|
|
// (through an -lfoo flag), then `umbrella` should be a nullptr.
|
|
explicit DylibFile(MemoryBufferRef mb, DylibFile *umbrella,
|
|
bool isBundleLoader, bool explicitlyLinked);
|
|
explicit DylibFile(const llvm::MachO::InterfaceFile &interface,
|
|
DylibFile *umbrella, bool isBundleLoader,
|
|
bool explicitlyLinked);
|
|
explicit DylibFile(DylibFile *umbrella);
|
|
|
|
void parseLoadCommands(MemoryBufferRef mb);
|
|
void parseReexports(const llvm::MachO::InterfaceFile &interface);
|
|
bool isReferenced() const { return numReferencedSymbols > 0; }
|
|
bool isExplicitlyLinked() const;
|
|
void setExplicitlyLinked() { explicitlyLinked = true; }
|
|
|
|
static bool classof(const InputFile *f) { return f->kind() == DylibKind; }
|
|
|
|
StringRef installName;
|
|
DylibFile *exportingFile = nullptr;
|
|
DylibFile *umbrella;
|
|
SmallVector<StringRef, 2> rpaths;
|
|
uint32_t compatibilityVersion = 0;
|
|
uint32_t currentVersion = 0;
|
|
int64_t ordinal = 0; // Ordinal numbering starts from 1, so 0 is a sentinel
|
|
unsigned numReferencedSymbols = 0;
|
|
RefState refState;
|
|
bool reexport = false;
|
|
bool forceNeeded = false;
|
|
bool forceWeakImport = false;
|
|
bool deadStrippable = false;
|
|
|
|
private:
|
|
bool explicitlyLinked = false; // Access via isExplicitlyLinked().
|
|
|
|
public:
|
|
// An executable can be used as a bundle loader that will load the output
|
|
// file being linked, and that contains symbols referenced, but not
|
|
// implemented in the bundle. When used like this, it is very similar
|
|
// to a dylib, so we've used the same class to represent it.
|
|
bool isBundleLoader;
|
|
|
|
// Synthetic Dylib objects created by $ld$previous symbols in this dylib.
|
|
// Usually empty. These synthetic dylibs won't have synthetic dylibs
|
|
// themselves.
|
|
SmallVector<DylibFile *, 2> extraDylibs;
|
|
|
|
private:
|
|
DylibFile *getSyntheticDylib(StringRef installName, uint32_t currentVersion,
|
|
uint32_t compatVersion);
|
|
|
|
bool handleLDSymbol(StringRef originalName);
|
|
void handleLDPreviousSymbol(StringRef name, StringRef originalName);
|
|
void handleLDInstallNameSymbol(StringRef name, StringRef originalName);
|
|
void handleLDHideSymbol(StringRef name, StringRef originalName);
|
|
void checkAppExtensionSafety(bool dylibIsAppExtensionSafe) const;
|
|
void parseExportedSymbols(uint32_t offset, uint32_t size);
|
|
void loadReexport(StringRef path, DylibFile *umbrella,
|
|
const llvm::MachO::InterfaceFile *currentTopLevelTapi);
|
|
|
|
llvm::DenseSet<llvm::CachedHashStringRef> hiddenSymbols;
|
|
};
|
|
|
|
// .a file
|
|
class ArchiveFile final : public InputFile {
|
|
public:
|
|
explicit ArchiveFile(std::unique_ptr<llvm::object::Archive> &&file,
|
|
bool forceHidden);
|
|
void addLazySymbols();
|
|
void fetch(const llvm::object::Archive::Symbol &);
|
|
// LLD normally doesn't use Error for error-handling, but the underlying
|
|
// Archive library does, so this is the cleanest way to wrap it.
|
|
Error fetch(const llvm::object::Archive::Child &, StringRef reason);
|
|
const llvm::object::Archive &getArchive() const { return *file; };
|
|
static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }
|
|
|
|
private:
|
|
std::unique_ptr<llvm::object::Archive> file;
|
|
// Keep track of children fetched from the archive by tracking
|
|
// which address offsets have been fetched already.
|
|
llvm::DenseSet<uint64_t> seen;
|
|
// Load all symbols with hidden visibility (-load_hidden).
|
|
bool forceHidden;
|
|
};
|
|
|
|
class BitcodeFile final : public InputFile {
|
|
public:
|
|
explicit BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
|
|
uint64_t offsetInArchive, bool lazy = false,
|
|
bool forceHidden = false, bool compatArch = true);
|
|
static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }
|
|
void parse();
|
|
|
|
std::unique_ptr<llvm::lto::InputFile> obj;
|
|
bool forceHidden;
|
|
|
|
private:
|
|
void parseLazy();
|
|
};
|
|
|
|
extern llvm::SetVector<InputFile *> inputFiles;
|
|
extern llvm::DenseMap<llvm::CachedHashStringRef, MemoryBufferRef> cachedReads;
|
|
extern llvm::SmallVector<StringRef> unprocessedLCLinkerOptions;
|
|
|
|
std::optional<MemoryBufferRef> readFile(StringRef path);
|
|
|
|
void extract(InputFile &file, StringRef reason);
|
|
|
|
namespace detail {
|
|
|
|
template <class CommandType, class... Types>
|
|
std::vector<const CommandType *>
|
|
findCommands(const void *anyHdr, size_t maxCommands, Types... types) {
|
|
std::vector<const CommandType *> cmds;
|
|
std::initializer_list<uint32_t> typesList{types...};
|
|
const auto *hdr = reinterpret_cast<const llvm::MachO::mach_header *>(anyHdr);
|
|
const uint8_t *p =
|
|
reinterpret_cast<const uint8_t *>(hdr) + target->headerSize;
|
|
for (uint32_t i = 0, n = hdr->ncmds; i < n; ++i) {
|
|
auto *cmd = reinterpret_cast<const CommandType *>(p);
|
|
if (llvm::is_contained(typesList, cmd->cmd)) {
|
|
cmds.push_back(cmd);
|
|
if (cmds.size() == maxCommands)
|
|
return cmds;
|
|
}
|
|
p += cmd->cmdsize;
|
|
}
|
|
return cmds;
|
|
}
|
|
|
|
} // namespace detail
|
|
|
|
// anyHdr should be a pointer to either mach_header or mach_header_64
|
|
template <class CommandType = llvm::MachO::load_command, class... Types>
|
|
const CommandType *findCommand(const void *anyHdr, Types... types) {
|
|
std::vector<const CommandType *> cmds =
|
|
detail::findCommands<CommandType>(anyHdr, 1, types...);
|
|
return cmds.size() ? cmds[0] : nullptr;
|
|
}
|
|
|
|
template <class CommandType = llvm::MachO::load_command, class... Types>
|
|
std::vector<const CommandType *> findCommands(const void *anyHdr,
|
|
Types... types) {
|
|
return detail::findCommands<CommandType>(anyHdr, 0, types...);
|
|
}
|
|
|
|
std::string replaceThinLTOSuffix(StringRef path);
|
|
} // namespace macho
|
|
|
|
std::string toString(const macho::InputFile *file);
|
|
std::string toString(const macho::Section &);
|
|
} // namespace lld
|
|
|
|
#endif
|