[lld][MachO] Add support for LC_DATA_IN_CODE

Add first bits for emitting LC_DATA_IN_CODE.

Test plan: make check-lld-macho

Differential revision: https://reviews.llvm.org/D103006
This commit is contained in:
Alexander Shaposhnikov 2021-06-14 19:21:43 -07:00
parent 1c450c3d7e
commit 928394d109
9 changed files with 194 additions and 5 deletions

View File

@ -735,6 +735,7 @@ template <class LP> void ObjFile::parse() {
parseRelocations(sectionHeaders, sectionHeaders[i], subsections[i]);
parseDebugInfo();
parseDataInCode();
}
void ObjFile::parseDebugInfo() {
@ -760,6 +761,21 @@ void ObjFile::parseDebugInfo() {
compileUnit = it->get();
}
void ObjFile::parseDataInCode() {
const auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
const load_command *cmd = findCommand(buf, LC_DATA_IN_CODE);
if (!cmd)
return;
const auto *c = reinterpret_cast<const linkedit_data_command *>(cmd);
dataInCodeEntries = {
reinterpret_cast<const data_in_code_entry *>(buf + c->dataoff),
c->datasize / sizeof(data_in_code_entry)};
assert(is_sorted(dataInCodeEntries, [](const data_in_code_entry &lhs,
const data_in_code_entry &rhs) {
return lhs.offset < rhs.offset;
}));
}
// The path can point to either a dylib or a .tbd file.
static DylibFile *loadDylib(StringRef path, DylibFile *umbrella) {
Optional<MemoryBufferRef> mbref = readFile(path);

View File

@ -104,6 +104,7 @@ public:
llvm::DWARFUnit *compileUnit = nullptr;
const uint32_t modTime;
std::vector<ConcatInputSection *> debugSections;
ArrayRef<llvm::MachO::data_in_code_entry> dataInCodeEntries;
private:
template <class LP> void parse();
@ -118,6 +119,7 @@ private:
void parseRelocations(ArrayRef<Section> sectionHeaders, const Section &,
SubsectionMap &);
void parseDebugInfo();
void parseDataInCode();
};
// command-line -sectcreate file

View File

@ -239,6 +239,7 @@ constexpr const char debugInfo[] = "__debug_info";
constexpr const char debugStr[] = "__debug_str";
constexpr const char ehFrame[] = "__eh_frame";
constexpr const char export_[] = "__export";
constexpr const char dataInCode[] = "__data_in_code";
constexpr const char functionStarts[] = "__func_starts";
constexpr const char got[] = "__got";
constexpr const char header[] = "__mach_header";

View File

@ -574,6 +574,68 @@ void ExportSection::finalizeContents() {
void ExportSection::writeTo(uint8_t *buf) const { trieBuilder.writeTo(buf); }
DataInCodeSection::DataInCodeSection()
: LinkEditSection(segment_names::linkEdit, section_names::dataInCode) {}
template <class LP>
static std::vector<MachO::data_in_code_entry> collectDataInCodeEntries() {
using SegmentCommand = typename LP::segment_command;
using Section = typename LP::section;
std::vector<MachO::data_in_code_entry> dataInCodeEntries;
for (const InputFile *inputFile : inputFiles) {
if (!isa<ObjFile>(inputFile))
continue;
const ObjFile *objFile = cast<ObjFile>(inputFile);
const auto *c = reinterpret_cast<const SegmentCommand *>(
findCommand(objFile->mb.getBufferStart(), LP::segmentLCType));
if (!c)
continue;
ArrayRef<Section> sections{reinterpret_cast<const Section *>(c + 1),
c->nsects};
ArrayRef<MachO::data_in_code_entry> entries = objFile->dataInCodeEntries;
if (entries.empty())
continue;
// For each code subsection find 'data in code' entries residing in it.
// Compute the new offset values as
// <offset within subsection> + <subsection address> - <__TEXT address>.
for (size_t i = 0, n = sections.size(); i < n; ++i) {
const SubsectionMap &subsecMap = objFile->subsections[i];
for (const SubsectionEntry &subsecEntry : subsecMap) {
const InputSection *isec = subsecEntry.isec;
if (!isCodeSection(isec))
continue;
if (cast<ConcatInputSection>(isec)->shouldOmitFromOutput())
continue;
const uint64_t beginAddr = sections[i].addr + subsecEntry.offset;
auto it = llvm::lower_bound(
entries, beginAddr,
[](const MachO::data_in_code_entry &entry, uint64_t addr) {
return entry.offset < addr;
});
const uint64_t endAddr = beginAddr + isec->getFileSize();
for (const auto end = entries.end();
it != end && it->offset + it->length <= endAddr; ++it)
dataInCodeEntries.push_back(
{static_cast<uint32_t>(isec->getVA(it->offset - beginAddr) -
in.header->addr),
it->length, it->kind});
}
}
}
return dataInCodeEntries;
}
void DataInCodeSection::finalizeContents() {
entries = target->wordSize == 8 ? collectDataInCodeEntries<LP64>()
: collectDataInCodeEntries<ILP32>();
}
void DataInCodeSection::writeTo(uint8_t *buf) const {
memcpy(buf, entries.data(), getRawSize());
}
FunctionStartsSection::FunctionStartsSection()
: LinkEditSection(segment_names::linkEdit, section_names::functionStarts) {}

View File

@ -371,6 +371,21 @@ private:
size_t size = 0;
};
// Stores 'data in code' entries that describe the locations of
// data regions inside code sections.
class DataInCodeSection final : public LinkEditSection {
public:
DataInCodeSection();
void finalizeContents() override;
uint64_t getRawSize() const override {
return sizeof(llvm::MachO::data_in_code_entry) * entries.size();
}
void writeTo(uint8_t *buf) const override;
private:
std::vector<llvm::MachO::data_in_code_entry> entries;
};
// Stores ULEB128 delta encoded addresses of functions.
class FunctionStartsSection final : public LinkEditSection {
public:

View File

@ -71,6 +71,7 @@ public:
SymtabSection *symtabSection = nullptr;
IndirectSymtabSection *indirectSymtabSection = nullptr;
CodeSignatureSection *codeSignatureSection = nullptr;
DataInCodeSection *dataInCodeSection = nullptr;
FunctionStartsSection *functionStartsSection = nullptr;
LCUuid *uuidCommand = nullptr;
@ -142,6 +143,25 @@ private:
FunctionStartsSection *functionStartsSection;
};
class LCDataInCode final : public LoadCommand {
public:
explicit LCDataInCode(DataInCodeSection *dataInCodeSection)
: dataInCodeSection(dataInCodeSection) {}
uint32_t getSize() const override { return sizeof(linkedit_data_command); }
void writeTo(uint8_t *buf) const override {
auto *c = reinterpret_cast<linkedit_data_command *>(buf);
c->cmd = LC_DATA_IN_CODE;
c->cmdsize = getSize();
c->dataoff = dataInCodeSection->fileOff;
c->datasize = dataInCodeSection->getFileSize();
}
private:
DataInCodeSection *dataInCodeSection;
};
class LCDysymtab final : public LoadCommand {
public:
LCDysymtab(SymtabSection *symtabSection,
@ -646,6 +666,7 @@ template <class LP> void Writer::createLoadCommands() {
make<LCDysymtab>(symtabSection, indirectSymtabSection));
if (functionStartsSection)
in.header->addLoadCommand(make<LCFunctionStarts>(functionStartsSection));
in.header->addLoadCommand(make<LCDataInCode>(dataInCodeSection));
if (config->emitEncryptionInfo)
in.header->addLoadCommand(make<LCEncryptionInfo<LP>>());
for (StringRef path : config->runtimePaths)
@ -844,6 +865,7 @@ template <class LP> void Writer::createOutputSections() {
indirectSymtabSection = make<IndirectSymtabSection>();
if (config->adhocCodesign)
codeSignatureSection = make<CodeSignatureSection>();
dataInCodeSection = make<DataInCodeSection>();
if (config->emitFunctionStarts)
functionStartsSection = make<FunctionStartsSection>();
if (config->emitBitcodeBundle)
@ -944,8 +966,15 @@ void Writer::finalizeLinkEditSegment() {
TimeTraceScope timeScope("Finalize __LINKEDIT segment");
// Fill __LINKEDIT contents.
std::vector<LinkEditSection *> linkEditSections{
in.rebase, in.binding, in.weakBinding, in.lazyBinding,
in.exports, symtabSection, indirectSymtabSection, functionStartsSection,
in.rebase,
in.binding,
in.weakBinding,
in.lazyBinding,
in.exports,
symtabSection,
indirectSymtabSection,
dataInCodeSection,
functionStartsSection,
};
parallelForEach(linkEditSections, [](LinkEditSection *osec) {
if (osec)

View File

@ -0,0 +1,64 @@
# REQUIRES: x86
# RUN: rm -rf %t; split-file %s %t
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/foo.s -o %t/foo.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/bar.s -o %t/bar.o
# RUN: %lld %t/foo.o %t/bar.o -o %t/main.exe
# RUN: llvm-objdump --private-headers %t/main.exe > %t/objdump
# RUN: llvm-objdump --macho --data-in-code %t/main.exe >> %t/objdump
# RUN: FileCheck %s < %t/objdump
# CHECK-LABEL: sectname __text
# CHECK-NEXT: segname __TEXT
# CHECK-NEXT: addr
# CHECK-NEXT: size
# CHECK-NEXT: offset [[#%,TEXT:]]
# CHECK-LABEL: cmd LC_DATA_IN_CODE
# CHECK-NEXT: cmdsize 16
# CHECK-NEXT: dataoff
# CHECK-NEXT: datasize 16
# CHECK-LABEL: Data in code table (2 entries)
# CHECK-NEXT: offset length kind
# CHECK-NEXT: [[#%x,TEXT + 28]] 24 JUMP_TABLE32
# CHECK-NEXT: [[#%x,TEXT + 68]] 12 JUMP_TABLE32
#--- foo.s
.text
.globl _main
.p2align 4, 0x90
_main:
pushq %rbp
movq %rsp, %rbp
subq $16, %rsp
movl $0, -4(%rbp)
movb $0, %al
callq _bar
addq $16, %rsp
popq %rbp
retq
.p2align 2, 0x90
.data_region jt32
.long 0
.long 0
.long 0
.long 0
.long 0
.long 0
.end_data_region
#--- bar.s
.text
.globl _bar
.p2align 4
_bar:
retq
.p2align 2, 0x90
.data_region jt32
.long 0
.long 0
.long 0
.end_data_region

View File

@ -72,7 +72,7 @@
# PADMAX-NEXT: segname __TEXT
# PADMAX-NEXT: addr
# PADMAX-NEXT: size
# PADMAX-NEXT: offset [[#%u, CMDSIZE + 0x20 + mul(0x400, N - 8)]]
# PADMAX-NEXT: offset [[#%u, CMDSIZE + 0x20 + mul(0x400, N - 9)]]
################ All 3 kinds of LCDylib swamped by a larger override
# RUN: %lld -o %t/libnull.dylib %t/null.o -dylib \

View File

@ -15,12 +15,12 @@
## address offset and the contents at that address very similarly, so am using
## --match-full-lines to make sure we match on the right thing.
# CHECK: Contents of section __TEXT,__cstring:
# CHECK-NEXT: 100000434 {{.*}}
# CHECK-NEXT: 100000444 {{.*}}
## 1st 8 bytes refer to the start of __cstring + 0xe, 2nd 8 bytes refer to the
## start of __cstring
# CHECK: Contents of section __DATA_CONST,__got:
# CHECK-NEXT: [[#%X,ADDR:]] 42040000 01000000 34040000 01000000 {{.*}}
# CHECK-NEXT: [[#%X,ADDR:]] 52040000 01000000 44040000 01000000 {{.*}}
# CHECK-NEXT: [[#ADDR + 16]] 00000000 00000000 {{.*}}
## Check that the rebase table is empty.