mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-01-05 07:32:36 +00:00
[lld-macho] Fix & refactor symbol size calculations
I noticed two problems with the previous implementation: * N_ALT_ENTRY symbols weren't being handled correctly -- they should determine the size of the previous symbol, even though they don't cause a new section to be created * The last symbol in a section had its size calculated wrongly; the first subsection's size was used instead of the last one I decided to take the opportunity to refactor things as well, mainly to realize my observation [here](https://reviews.llvm.org/D98837#inline-931511) that we could avoid doing a binary search to match symbols with subsections. I think the resulting code is a bit simpler too. N Min Max Median Avg Stddev x 20 4.31 4.43 4.37 4.3775 0.034162922 + 20 4.32 4.43 4.38 4.3755 0.02799906 No difference proven at 95.0% confidence Reviewed By: #lld-macho, alexshap Differential Revision: https://reviews.llvm.org/D99972
This commit is contained in:
parent
94f75202ac
commit
ceec610754
@ -198,8 +198,8 @@ void ObjFile::parseSections(ArrayRef<Section> sections) {
|
||||
static InputSection *findContainingSubsection(SubsectionMap &map,
|
||||
uint64_t *offset) {
|
||||
auto it = std::prev(llvm::upper_bound(
|
||||
map, *offset, [](uint64_t value, SubsectionEntry subsectionEntry) {
|
||||
return value < subsectionEntry.offset;
|
||||
map, *offset, [](uint64_t value, SubsectionEntry subsecEntry) {
|
||||
return value < subsecEntry.offset;
|
||||
}));
|
||||
*offset -= it->offset;
|
||||
return it->isec;
|
||||
@ -423,96 +423,79 @@ template <class LP>
|
||||
void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
|
||||
ArrayRef<typename LP::nlist> nList,
|
||||
const char *strtab, bool subsectionsViaSymbols) {
|
||||
using Section = typename LP::section;
|
||||
using NList = typename LP::nlist;
|
||||
|
||||
// Precompute the boundaries of symbols within a section.
|
||||
// If subsectionsViaSymbols is True then the corresponding subsections will be
|
||||
// created, otherwise these boundaries are used for the calculation of symbols
|
||||
// sizes only.
|
||||
for (const NList &sym : nList) {
|
||||
if ((sym.n_type & N_TYPE) == N_SECT && !(sym.n_desc & N_ALT_ENTRY) &&
|
||||
!subsections[sym.n_sect - 1].empty()) {
|
||||
SubsectionMap &subsecMapping = subsections[sym.n_sect - 1];
|
||||
subsecMapping.push_back(
|
||||
{sym.n_value - sectionHeaders[sym.n_sect - 1].addr,
|
||||
subsecMapping.front().isec});
|
||||
// Groups indices of the symbols by the sections that contain them.
|
||||
std::vector<std::vector<uint32_t>> symbolsBySection(subsections.size());
|
||||
symbols.resize(nList.size());
|
||||
for (uint32_t i = 0; i < nList.size(); ++i) {
|
||||
const NList &sym = nList[i];
|
||||
StringRef name = strtab + sym.n_strx;
|
||||
if ((sym.n_type & N_TYPE) == N_SECT) {
|
||||
SubsectionMap &subsecMap = subsections[sym.n_sect - 1];
|
||||
// parseSections() may have chosen not to parse this section.
|
||||
if (subsecMap.empty())
|
||||
continue;
|
||||
symbolsBySection[sym.n_sect - 1].push_back(i);
|
||||
} else {
|
||||
symbols[i] = parseNonSectionSymbol(sym, name);
|
||||
}
|
||||
}
|
||||
|
||||
for (SubsectionMap &subsecMap : subsections) {
|
||||
// Calculate symbol sizes and create subsections by splitting the sections
|
||||
// along symbol boundaries.
|
||||
for (size_t i = 0; i < subsections.size(); ++i) {
|
||||
SubsectionMap &subsecMap = subsections[i];
|
||||
if (subsecMap.empty())
|
||||
continue;
|
||||
llvm::sort(subsecMap,
|
||||
[](const SubsectionEntry &lhs, const SubsectionEntry &rhs) {
|
||||
return lhs.offset < rhs.offset;
|
||||
});
|
||||
subsecMap.erase(
|
||||
std::unique(subsecMap.begin(), subsecMap.end(),
|
||||
[](const SubsectionEntry &lhs, const SubsectionEntry &rhs) {
|
||||
return lhs.offset == rhs.offset;
|
||||
}),
|
||||
subsecMap.end());
|
||||
if (!subsectionsViaSymbols)
|
||||
continue;
|
||||
for (size_t i = 0; i < subsecMap.size(); ++i) {
|
||||
uint32_t offset = subsecMap[i].offset;
|
||||
InputSection *&isec = subsecMap[i].isec;
|
||||
uint32_t end = i + 1 < subsecMap.size() ? subsecMap[i + 1].offset
|
||||
: isec->data.size();
|
||||
isec = make<InputSection>(*isec);
|
||||
isec->data = isec->data.slice(offset, end - offset);
|
||||
|
||||
std::vector<uint32_t> &symbolIndices = symbolsBySection[i];
|
||||
llvm::sort(symbolIndices, [&](uint32_t lhs, uint32_t rhs) {
|
||||
return nList[lhs].n_value < nList[rhs].n_value;
|
||||
});
|
||||
uint64_t sectionAddr = sectionHeaders[i].addr;
|
||||
|
||||
// We populate subsecMap by repeatedly splitting the last (highest address)
|
||||
// subsection.
|
||||
SubsectionEntry subsecEntry = subsecMap.back();
|
||||
for (size_t j = 0; j < symbolIndices.size(); ++j) {
|
||||
uint32_t symIndex = symbolIndices[j];
|
||||
const NList &sym = nList[symIndex];
|
||||
StringRef name = strtab + sym.n_strx;
|
||||
InputSection *isec = subsecEntry.isec;
|
||||
|
||||
uint64_t subsecAddr = sectionAddr + subsecEntry.offset;
|
||||
uint64_t symbolOffset = sym.n_value - subsecAddr;
|
||||
uint64_t symbolSize =
|
||||
j + 1 < symbolIndices.size()
|
||||
? nList[symbolIndices[j + 1]].n_value - sym.n_value
|
||||
: isec->data.size() - symbolOffset;
|
||||
// There are 3 cases where we do not need to create a new subsection:
|
||||
// 1. If the input file does not use subsections-via-symbols.
|
||||
// 2. Multiple symbols at the same address only induce one subsection.
|
||||
// 3. Alternative entry points do not induce new subsections.
|
||||
if (!subsectionsViaSymbols || symbolOffset == 0 ||
|
||||
sym.n_desc & N_ALT_ENTRY) {
|
||||
symbols[symIndex] =
|
||||
createDefined(sym, name, isec, symbolOffset, symbolSize);
|
||||
continue;
|
||||
}
|
||||
|
||||
auto *nextIsec = make<InputSection>(*isec);
|
||||
nextIsec->data = isec->data.slice(symbolOffset);
|
||||
isec->data = isec->data.slice(0, symbolOffset);
|
||||
|
||||
// By construction, the symbol will be at offset zero in the new section.
|
||||
symbols[symIndex] =
|
||||
createDefined(sym, name, nextIsec, /*value=*/0, symbolSize);
|
||||
// TODO: ld64 appears to preserve the original alignment as well as each
|
||||
// subsection's offset from the last aligned address. We should consider
|
||||
// emulating that behavior.
|
||||
isec->align = MinAlign(isec->align, offset);
|
||||
nextIsec->align = MinAlign(isec->align, sym.n_value);
|
||||
subsecMap.push_back({sym.n_value - sectionAddr, nextIsec});
|
||||
subsecEntry = subsecMap.back();
|
||||
}
|
||||
}
|
||||
|
||||
symbols.resize(nList.size());
|
||||
for (size_t i = 0, n = nList.size(); i < n; ++i) {
|
||||
const NList &sym = nList[i];
|
||||
StringRef name = strtab + sym.n_strx;
|
||||
|
||||
if ((sym.n_type & N_TYPE) != N_SECT) {
|
||||
symbols[i] = parseNonSectionSymbol(sym, name);
|
||||
continue;
|
||||
}
|
||||
|
||||
const Section &sec = sectionHeaders[sym.n_sect - 1];
|
||||
SubsectionMap &subsecMap = subsections[sym.n_sect - 1];
|
||||
|
||||
// parseSections() may have chosen not to parse this section.
|
||||
if (subsecMap.empty())
|
||||
continue;
|
||||
|
||||
uint64_t offset = sym.n_value - sec.addr;
|
||||
|
||||
auto it = llvm::upper_bound(
|
||||
subsecMap, offset, [](uint64_t value, SubsectionEntry subsectionEntry) {
|
||||
return value < subsectionEntry.offset;
|
||||
});
|
||||
uint32_t size = it != subsecMap.end()
|
||||
? it->offset - offset
|
||||
: subsecMap.front().isec->getSize() - offset;
|
||||
|
||||
// If the input file does not use subsections-via-symbols, all symbols can
|
||||
// use the same subsection. Otherwise, we must split the sections along
|
||||
// symbol boundaries.
|
||||
if (!subsectionsViaSymbols) {
|
||||
symbols[i] =
|
||||
createDefined(sym, name, subsecMap.front().isec, offset, size);
|
||||
continue;
|
||||
}
|
||||
|
||||
InputSection *subsec = (--it)->isec;
|
||||
symbols[i] = createDefined(sym, name, subsec, offset - it->offset, size);
|
||||
}
|
||||
|
||||
if (!subsectionsViaSymbols)
|
||||
for (SubsectionMap &subsecMap : subsections)
|
||||
if (!subsecMap.empty())
|
||||
subsecMap = {subsecMap.front()};
|
||||
}
|
||||
|
||||
OpaqueFile::OpaqueFile(MemoryBufferRef mb, StringRef segName,
|
||||
|
@ -161,7 +161,7 @@ Symbol *SymbolTable::addLazy(StringRef name, ArchiveFile *file,
|
||||
Defined *SymbolTable::addSynthetic(StringRef name, InputSection *isec,
|
||||
uint32_t value, bool isPrivateExtern,
|
||||
bool includeInSymtab) {
|
||||
Defined *s = addDefined(name, nullptr, isec, value, /*size*/ 0,
|
||||
Defined *s = addDefined(name, nullptr, isec, value, /*size=*/0,
|
||||
/*isWeakDef=*/false, isPrivateExtern);
|
||||
s->includeInSymtab = includeInSymtab;
|
||||
return s;
|
||||
|
@ -118,7 +118,10 @@ public:
|
||||
static bool classof(const Symbol *s) { return s->kind() == DefinedKind; }
|
||||
|
||||
InputSection *isec;
|
||||
// Contains the offset from the containing subsection. Note that this is
|
||||
// different from nlist::n_value, which is the absolute address of the symbol.
|
||||
uint64_t value;
|
||||
// size is only calculated for regular (non-bitcode) symbols.
|
||||
uint64_t size;
|
||||
|
||||
bool overridesWeakDef : 1;
|
||||
|
@ -42,25 +42,41 @@
|
||||
# CHECK-NEXT: [[#%x, STATIC:]] - 0[[#MORE_DATA_ID + 1]] 0000 STSYM _static_var
|
||||
# CHECK-NEXT: [[#%x, MAIN:]] - 0[[#TEXT_ID + 1]] 0000 FUN _main
|
||||
# CHECK-NEXT: 0000000000000006 - 00 0000 FUN
|
||||
# CHECK-NEXT: [[#%x, FUN:]] - 0[[#MORE_TEXT_ID + 1]] 0000 FUN _fun
|
||||
# CHECK-NEXT: [[#%x, BAR:]] - 0[[#TEXT_ID + 1]] 0000 FUN _bar
|
||||
# CHECK-NEXT: 0000000000000000 - 00 0000 FUN
|
||||
# CHECK-NEXT: [[#%x, BAR2:]] - 0[[#TEXT_ID + 1]] 0000 FUN _bar2
|
||||
# CHECK-NEXT: 0000000000000001 - 00 0000 FUN
|
||||
# CHECK-NEXT: [[#%x, BAZ:]] - 0[[#TEXT_ID + 1]] 0000 FUN _baz
|
||||
# CHECK-NEXT: 0000000000000000 - 00 0000 FUN
|
||||
# CHECK-NEXT: [[#%x, BAZ2:]] - 0[[#TEXT_ID + 1]] 0000 FUN _baz
|
||||
# CHECK-NEXT: 0000000000000002 - 00 0000 FUN
|
||||
# CHECK-NEXT: [[#%x, QUX:]] - 0[[#TEXT_ID + 1]] 0000 FUN _qux
|
||||
# CHECK-NEXT: 0000000000000003 - 00 0000 FUN
|
||||
# CHECK-NEXT: [[#%x, QUUX:]] - 0[[#TEXT_ID + 1]] 0000 FUN _quux
|
||||
# CHECK-NEXT: 0000000000000004 - 00 0000 FUN
|
||||
# CHECK-NEXT: [[#%x, GLOB:]] - 0[[#DATA_ID + 1]] 0000 GSYM _global_var
|
||||
# CHECK-NEXT: [[#%x, ZERO:]] - 0[[#COMM_ID + 1]] 0000 GSYM _zero
|
||||
# CHECK-NEXT: [[#%x, FUN:]] - 0[[#MORE_TEXT_ID + 1]] 0000 FUN _fun
|
||||
# CHECK-NEXT: 0000000000000001 - 00 0000 FUN
|
||||
# CHECK-NEXT: 0000000000000000 - 01 0000 SO
|
||||
# CHECK-NEXT: 0000000000000000 - 00 0000 SO /foo.cpp
|
||||
# CHECK-NEXT: 0000000000000020 - 03 0001 OSO [[FOO_PATH]]
|
||||
# CHECK-NEXT: [[#%x, FOO:]] - 0[[#TEXT_ID + 1]] 0000 FUN _foo
|
||||
# CHECK-NEXT: 0000000000000001 - 00 0000 FUN
|
||||
# CHECK-NEXT: 0000000000000000 - 01 0000 SO
|
||||
# CHECK-NEXT: [[#STATIC]] s _static_var
|
||||
# CHECK-NEXT: [[#MAIN]] T _main
|
||||
# CHECK-NEXT: {{[0-9af]+}} A _abs
|
||||
# CHECK-NEXT: [[#FUN]] S _fun
|
||||
# CHECK-NEXT: [[#GLOB]] D _global_var
|
||||
# CHECK-NEXT: [[#ZERO]] S _zero
|
||||
# CHECK-NEXT: [[#FOO]] T _foo
|
||||
# CHECK-NEXT: {{[0-9af]+}} T _no_debug
|
||||
# CHECK-NEXT: {{0+}} A __mh_execute_header
|
||||
# CHECK-DAG: [[#STATIC]] s _static_var
|
||||
# CHECK-DAG: [[#MAIN]] T _main
|
||||
# CHECK-DAG: {{[0-9af]+}} A _abs
|
||||
# CHECK-DAG: [[#BAR]] T _bar
|
||||
# CHECK-DAG: [[#BAR2]] T _bar2
|
||||
# CHECK-DAG: [[#BAZ]] T _baz
|
||||
# CHECK-DAG: [[#BAZ2]] T _baz2
|
||||
# CHECK-DAG: [[#FUN]] S _fun
|
||||
# CHECK-DAG: [[#GLOB]] D _global_var
|
||||
# CHECK-DAG: [[#ZERO]] S _zero
|
||||
# CHECK-DAG: [[#FOO]] T _foo
|
||||
# CHECK-DAG: {{[0-9af]+}} T _no_debug
|
||||
# CHECK-DAG: {{0+}} A __mh_execute_header
|
||||
# CHECK-EMPTY:
|
||||
|
||||
## Check that we don't attempt to emit rebase opcodes for the debug sections
|
||||
@ -90,13 +106,30 @@ _static_var:
|
||||
.zerofill __DATA,__common,_zero,4,2
|
||||
|
||||
.text
|
||||
.globl _main
|
||||
.globl _main, _bar, _bar2, _baz, _baz2, _qux, _quux
|
||||
.alt_entry _baz
|
||||
.alt_entry _qux
|
||||
|
||||
_bar:
|
||||
_bar2:
|
||||
.space 1
|
||||
|
||||
_baz:
|
||||
_baz2:
|
||||
.space 2
|
||||
|
||||
_main:
|
||||
Lfunc_begin0:
|
||||
callq _foo
|
||||
retq
|
||||
Lfunc_end0:
|
||||
|
||||
_qux:
|
||||
.space 3
|
||||
|
||||
_quux:
|
||||
.space 4
|
||||
|
||||
.section __DWARF,__debug_str,regular,debug
|
||||
.asciz "test.cpp" ## string offset=0
|
||||
.asciz "/tmp" ## string offset=9
|
||||
|
@ -36,16 +36,6 @@
|
||||
# CHECK-NEXT: Value: 0x1{{[0-9a-f]*}}
|
||||
# CHECK-NEXT: }
|
||||
# CHECK-NEXT: Symbol {
|
||||
# CHECK-NEXT: Name: _external
|
||||
# CHECK-NEXT: Extern
|
||||
# CHECK-NEXT: Type: Section (0xE)
|
||||
# CHECK-NEXT: Section: __data
|
||||
# CHECK-NEXT: RefType: UndefinedNonLazy (0x0)
|
||||
# CHECK-NEXT: Flags [ (0x0)
|
||||
# CHECK-NEXT: ]
|
||||
# CHECK-NEXT: Value: 0x1{{[0-9a-f]*}}
|
||||
# CHECK-NEXT: }
|
||||
# CHECK-NEXT: Symbol {
|
||||
# CHECK-NEXT: Name: _external_weak
|
||||
# CHECK-NEXT: Extern
|
||||
# CHECK-NEXT: Type: Section (0xE)
|
||||
@ -56,6 +46,16 @@
|
||||
# CHECK-NEXT: ]
|
||||
# CHECK-NEXT: Value: 0x1{{[0-9a-f]*}}
|
||||
# CHECK-NEXT: }
|
||||
# CHECK-NEXT: Symbol {
|
||||
# CHECK-NEXT: Name: _external
|
||||
# CHECK-NEXT: Extern
|
||||
# CHECK-NEXT: Type: Section (0xE)
|
||||
# CHECK-NEXT: Section: __data
|
||||
# CHECK-NEXT: RefType: UndefinedNonLazy (0x0)
|
||||
# CHECK-NEXT: Flags [ (0x0)
|
||||
# CHECK-NEXT: ]
|
||||
# CHECK-NEXT: Value: 0x1{{[0-9a-f]*}}
|
||||
# CHECK-NEXT: }
|
||||
# CHECK-NEXT: Symbol {
|
||||
# CHECK-NEXT: Name: __mh_execute_header (81)
|
||||
# CHECK-NEXT: Extern
|
||||
|
Loading…
Reference in New Issue
Block a user