darling-dyld/dyld3/MachOAnalyzerSet.cpp
Thomas A 8450bb533f Add Constructor For MachOAnalyzerSet
Works around a linkage issue.
2023-04-30 14:54:59 -07:00

535 lines
24 KiB
C++

/*
* Copyright (c) 2019 Apple Inc. All rights reserved.
*
* @APPLE_LICENSE_HEADER_START@
*
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
* compliance with the License. Please obtain a copy of the License at
* http://www.opensource.apple.com/apsl/ and read it before using this
* file.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
* Please see the License for the specific language governing rights and
* limitations under the License.
*
* @APPLE_LICENSE_HEADER_END@
*/
#include <sys/types.h>
#include <mach/mach.h>
#include <assert.h>
#include <limits.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <mach-o/reloc.h>
#include <mach-o/nlist.h>
#include <TargetConditionals.h>
#include "MachOAnalyzerSet.h"
#include "DyldSharedCache.h"
#if BUILDING_DYLD
namespace dyld { void log(const char*, ...); }
#endif
namespace dyld3 {
static bool hasHigh8(uint64_t addend)
{
// distinguish negative addend from TBI
if ( (addend >> 56) == 0 )
return false;
return ( (addend >> 48) != 0xFFFF );
}
#ifdef DARLING
MachOAnalyzerSet::MachOAnalyzerSet() {}
#endif
void MachOAnalyzerSet::WrappedMachO::forEachBind(Diagnostics& diag, FixUpHandler fixUpHandler, CachePatchHandler patchHandler) const
{
const bool is64 = _mh->is64();
__block int lastLibOrdinal = 256;
__block const char* lastSymbolName = nullptr;
__block uint64_t lastAddend = 0;
__block FixupTarget target;
__block PointerMetaData pmd;
_mh->forEachBind(diag, ^(uint64_t runtimeOffset, int libOrdinal, const char* symbolName, bool weakImport, bool lazyBind, uint64_t addend, bool& stop) {
if ( (symbolName == lastSymbolName) && (libOrdinal == lastLibOrdinal) && (addend == lastAddend) ) {
// same symbol lookup as last location
fixUpHandler(runtimeOffset, pmd, target, stop);
}
else if ( this->findSymbolFrom(diag, libOrdinal, symbolName, weakImport, lazyBind, addend, patchHandler, target) ) {
pmd.high8 = 0;
if ( is64 && (target.addend != 0) ) {
if ( hasHigh8(target.addend) ) {
pmd.high8 = (target.addend >> 56);
target.offsetInImage &= 0x00FFFFFFFFFFFFFFULL;
target.addend &= 0x00FFFFFFFFFFFFFFULL;
}
}
if ( !target.skippableWeakDef ) {
fixUpHandler(runtimeOffset, pmd, target, stop);
lastSymbolName = symbolName;
lastLibOrdinal = libOrdinal;
lastAddend = addend;
}
}
else {
// call handler with missing symbol before stopping
if ( target.kind == FixupTarget::Kind::bindMissingSymbol )
fixUpHandler(runtimeOffset, pmd, target, stop);
stop = true;
}
}, ^(const char* symbolName) {
});
}
MachOAnalyzerSet::PointerMetaData::PointerMetaData()
{
this->diversity = 0;
this->high8 = 0;
this->authenticated = 0;
this->key = 0;
this->usesAddrDiversity = 0;
}
MachOAnalyzerSet::PointerMetaData::PointerMetaData(const MachOLoaded::ChainedFixupPointerOnDisk* fixupLoc, uint16_t pointer_format)
{
this->diversity = 0;
this->high8 = 0;
this->authenticated = 0;
this->key = 0;
this->usesAddrDiversity = 0;
switch ( pointer_format ) {
case DYLD_CHAINED_PTR_ARM64E:
case DYLD_CHAINED_PTR_ARM64E_KERNEL:
case DYLD_CHAINED_PTR_ARM64E_USERLAND:
case DYLD_CHAINED_PTR_ARM64E_FIRMWARE:
case DYLD_CHAINED_PTR_ARM64E_USERLAND24:
this->authenticated = fixupLoc->arm64e.authRebase.auth;
if ( this->authenticated ) {
this->key = fixupLoc->arm64e.authRebase.key;
this->usesAddrDiversity = fixupLoc->arm64e.authRebase.addrDiv;
this->diversity = fixupLoc->arm64e.authRebase.diversity;
}
else if ( fixupLoc->arm64e.bind.bind == 0 ) {
this->high8 = fixupLoc->arm64e.rebase.high8;
}
break;
case DYLD_CHAINED_PTR_64:
case DYLD_CHAINED_PTR_64_OFFSET:
if ( fixupLoc->generic64.bind.bind == 0 )
this->high8 = fixupLoc->generic64.rebase.high8;
break;
}
}
void MachOAnalyzerSet::WrappedMachO::forEachFixup(Diagnostics& diag, FixUpHandler fixup, CachePatchHandler patcher) const
{
uint16_t fmPointerFormat;
uint32_t fmStartsCount;
const uint32_t* fmStarts;
const MachOAnalyzer* ma = _mh;
const uint64_t prefLoadAddr = ma->preferredLoadAddress();
if ( ma->hasChainedFixups() ) {
// build targets table
STACK_ALLOC_OVERFLOW_SAFE_ARRAY(FixupTarget, targets, 512);
ma->forEachChainedFixupTarget(diag, ^(int libOrdinal, const char* symbolName, uint64_t addend, bool weakImport, bool& stop) {
targets.default_constuct_back();
FixupTarget& foundTarget = targets.back();
if ( !this->findSymbolFrom(diag, libOrdinal, symbolName, weakImport, false, addend, patcher, foundTarget) ) {
// call handler with missing symbol before stopping
if ( foundTarget.kind == FixupTarget::Kind::bindMissingSymbol )
fixup(0, PointerMetaData(), foundTarget, stop);
stop = true;
}
});
if ( diag.hasError() )
return;
// walk all chains
ma->withChainStarts(diag, ma->chainStartsOffset(), ^(const dyld_chained_starts_in_image* startsInfo) {
ma->forEachFixupInAllChains(diag, startsInfo, false, ^(MachOLoaded::ChainedFixupPointerOnDisk* fixupLoc,
const dyld_chained_starts_in_segment* segInfo, bool& fixupsStop) {
uint64_t fixupOffset = (uint8_t*)fixupLoc - (uint8_t*)ma;
uint64_t targetOffset;
uint32_t bindOrdinal;
int64_t embeddedAddend;
PointerMetaData pmd(fixupLoc, segInfo->pointer_format);
if ( fixupLoc->isBind(segInfo->pointer_format, bindOrdinal, embeddedAddend) ) {
if ( bindOrdinal < targets.count() ) {
if ( embeddedAddend == 0 ) {
if ( hasHigh8(targets[bindOrdinal].addend) ) {
FixupTarget targetWithoutHigh8 = targets[bindOrdinal];
pmd.high8 = (targetWithoutHigh8.addend >> 56);
targetWithoutHigh8.offsetInImage &= 0x00FFFFFFFFFFFFFFULL;
targetWithoutHigh8.addend &= 0x00FFFFFFFFFFFFFFULL;
fixup(fixupOffset, pmd, targetWithoutHigh8, fixupsStop);
}
else {
fixup(fixupOffset, pmd, targets[bindOrdinal], fixupsStop);
}
}
else {
// pointer on disk encodes extra addend, make pseudo target for that
FixupTarget targetWithAddend = targets[bindOrdinal];
targetWithAddend.addend += embeddedAddend;
targetWithAddend.offsetInImage += embeddedAddend;
fixup(fixupOffset, pmd, targetWithAddend, fixupsStop);
}
}
else {
diag.error("out of range bind ordinal %d (max %lu)", bindOrdinal, targets.count());
fixupsStop = true;
}
}
else if ( fixupLoc->isRebase(segInfo->pointer_format, prefLoadAddr, targetOffset) ) {
FixupTarget rebaseTarget;
rebaseTarget.kind = FixupTarget::Kind::rebase;
rebaseTarget.foundInImage = *this;
rebaseTarget.offsetInImage = targetOffset & 0x00FFFFFFFFFFFFFFULL;
rebaseTarget.isLazyBindRebase = false; // FIXME
fixup(fixupOffset, pmd, rebaseTarget, fixupsStop);
}
});
});
}
else if ( ma->hasFirmwareChainStarts(&fmPointerFormat, &fmStartsCount, &fmStarts) ) {
// This is firmware which only has rebases, the chain starts info is in a section (not LINKEDIT)
ma->forEachFixupInAllChains(diag, fmPointerFormat, fmStartsCount, fmStarts, ^(MachOLoaded::ChainedFixupPointerOnDisk* fixupLoc, bool& stop) {
uint64_t fixupOffset = (uint8_t*)fixupLoc - (uint8_t*)ma;
PointerMetaData pmd(fixupLoc, fmPointerFormat);
uint64_t targetOffset;
fixupLoc->isRebase(fmPointerFormat, prefLoadAddr, targetOffset);
FixupTarget rebaseTarget;
rebaseTarget.kind = FixupTarget::Kind::rebase;
rebaseTarget.foundInImage = *this;
rebaseTarget.offsetInImage = targetOffset & 0x00FFFFFFFFFFFFFFULL;
rebaseTarget.isLazyBindRebase = false;
fixup(fixupOffset, pmd, rebaseTarget, stop);
});
}
else {
// process all rebase opcodes
const bool is64 = ma->is64();
ma->forEachRebase(diag, ^(uint64_t runtimeOffset, bool isLazyPointerRebase, bool& stop) {
uint64_t* loc = (uint64_t*)((uint8_t*)ma + runtimeOffset);
uint64_t locValue = is64 ? *loc : *((uint32_t*)loc);
FixupTarget rebaseTarget;
PointerMetaData pmd;
if ( is64 )
pmd.high8 = (locValue >> 56);
rebaseTarget.kind = FixupTarget::Kind::rebase;
rebaseTarget.foundInImage = *this;
rebaseTarget.offsetInImage = (locValue & 0x00FFFFFFFFFFFFFFULL) - prefLoadAddr;
rebaseTarget.isLazyBindRebase = isLazyPointerRebase;
fixup(runtimeOffset, pmd, rebaseTarget, stop);
});
if ( diag.hasError() )
return;
// process all bind opcodes
this->forEachBind(diag, fixup, patcher);
}
if ( diag.hasError() )
return;
// main executable may define operator new/delete symbols that overrides weak-defs but have no fixups
if ( ma->isMainExecutable() && ma->hasWeakDefs() ) {
_set->wmo_findExtraSymbolFrom(this, patcher);
}
}
bool MachOAnalyzerSet::wmo_findSymbolFrom(const WrappedMachO* fromWmo, Diagnostics& diag, int libOrdinal, const char* symbolName, bool weakImport,
bool lazyBind, uint64_t addend, CachePatchHandler patcher, FixupTarget& target) const
{
target.libOrdinal = libOrdinal;
if ( libOrdinal == BIND_SPECIAL_DYLIB_FLAT_LOOKUP ) {
__block bool found = false;
this->mas_forEachImage(^(const WrappedMachO& anImage, bool hidden, bool& stop) {
// when an image is hidden (RTLD_LOCAL) it can still look up symbols in itself
if ( hidden && (fromWmo->_mh != anImage._mh) )
return;
if ( anImage.findSymbolIn(diag, symbolName, addend, target) ) {
stop = true;
found = true;
}
});
if ( found )
return true;
// see if missing symbol resolver can find something
if ( fromWmo->missingSymbolResolver(weakImport, lazyBind, symbolName, "flat namespace", fromWmo->path(), target) )
return true;
// fill out target info about missing symbol
target.kind = FixupTarget::Kind::bindMissingSymbol;
target.requestedSymbolName = symbolName;
target.foundSymbolName = nullptr;
target.foundInImage = WrappedMachO(); // no image it should be in
diag.error("symbol '%s' not found, expected in flat namespace by '%s'", symbolName, fromWmo->path());
return false;
}
else if ( libOrdinal == BIND_SPECIAL_DYLIB_WEAK_LOOKUP ) {
if ( this->mas_fromImageWeakDefLookup(*fromWmo, symbolName, addend, patcher, target) ) {
target.weakCoalesced = true;
return true;
}
if ( !fromWmo->_mh->hasChainedFixups() ) {
// support old binaries where symbols have been stripped and have weak_bind to itself
target.skippableWeakDef = true;
return true;
}
// see if missing symbol resolver can find something
if ( fromWmo->missingSymbolResolver(weakImport, lazyBind, symbolName, "flat namespace", fromWmo->path(), target) )
return true;
// fill out target info about missing symbol
target.kind = FixupTarget::Kind::bindMissingSymbol;
target.requestedSymbolName = symbolName;
target.foundSymbolName = nullptr;
target.foundInImage = WrappedMachO(); // no image it should be in
diag.error("symbol '%s' not found, expected to be weak-def coalesced in '%s'", symbolName, fromWmo->path());
return false;
}
else {
int depIndex = libOrdinal - 1;
bool missingWeakDylib = false;
WrappedMachO depHelper;
const WrappedMachO* targetImage = nullptr;
if ( libOrdinal == BIND_SPECIAL_DYLIB_SELF ) {
targetImage = fromWmo;
}
else if ( libOrdinal == BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE ) {
this->mas_mainExecutable(depHelper);
targetImage = &depHelper;
}
else if ( fromWmo->dependent(depIndex, depHelper, missingWeakDylib) ) {
targetImage = &depHelper;
}
else {
diag.error("unknown library ordinal %d in %s", libOrdinal, fromWmo->path());
return false;
}
// use two-level namespace target image
if ( !missingWeakDylib && targetImage->findSymbolIn(diag, symbolName, addend, target) )
return true;
// see if missing symbol resolver can find something
const char* expectedInPath = missingWeakDylib ? "missing dylib" : targetImage->path();
if ( fromWmo->missingSymbolResolver(weakImport, lazyBind, symbolName, expectedInPath, fromWmo->path(), target) )
return true;
// fill out target info about missing symbol
target.kind = FixupTarget::Kind::bindMissingSymbol;
target.requestedSymbolName = symbolName;
target.foundSymbolName = nullptr;
target.foundInImage = *targetImage; // no image it is expected to be in
// symbol not found and not weak or lazy so error out
diag.error("symbol '%s' not found, expected in '%s', needed by '%s'", symbolName, expectedInPath, fromWmo->path());
return false;
}
return false;
}
// These are mangled symbols for all the variants of operator new and delete
// which a main executable can define (non-weak) and override the
// weak-def implementation in the OS.
static const char* const sTreatAsWeak[] = {
"__Znwm", "__ZnwmRKSt9nothrow_t",
"__Znam", "__ZnamRKSt9nothrow_t",
"__ZdlPv", "__ZdlPvRKSt9nothrow_t", "__ZdlPvm",
"__ZdaPv", "__ZdaPvRKSt9nothrow_t", "__ZdaPvm",
"__ZnwmSt11align_val_t", "__ZnwmSt11align_val_tRKSt9nothrow_t",
"__ZnamSt11align_val_t", "__ZnamSt11align_val_tRKSt9nothrow_t",
"__ZdlPvSt11align_val_t", "__ZdlPvSt11align_val_tRKSt9nothrow_t", "__ZdlPvmSt11align_val_t",
"__ZdaPvSt11align_val_t", "__ZdaPvSt11align_val_tRKSt9nothrow_t", "__ZdaPvmSt11align_val_t"
};
void MachOAnalyzerSet::wmo_findExtraSymbolFrom(const WrappedMachO* fromWmo, CachePatchHandler patcher) const
{
for (const char* weakSymbolName : sTreatAsWeak) {
Diagnostics exportDiag;
FixupTarget dummyTarget;
// pretend main executable does have a use of this operator new/delete and look up the impl
// this has the side effect of adding a cache patch if there is an impl outside the cache
wmo_findSymbolFrom(fromWmo, exportDiag, -3, weakSymbolName, true, false, 0, patcher, dummyTarget);
}
}
bool MachOAnalyzerSet::WrappedMachO::findSymbolIn(Diagnostics& diag, const char* symbolName, uint64_t addend, FixupTarget& target) const
{
const MachOAnalyzer* ma = _mh;
// if exports trie location not computed yet, do it now
ExportsTrie exportsTrie = this->getExportsTrie();
target.foundSymbolName = nullptr;
if ( exportsTrie.start ) {
if ( const uint8_t* node = this->_mh->trieWalk(diag, exportsTrie.start, exportsTrie.end, symbolName)) {
const uint8_t* p = node;
const uint64_t flags = this->_mh->read_uleb128(diag, p, exportsTrie.end);
if ( flags & EXPORT_SYMBOL_FLAGS_REEXPORT ) {
// re-export from another dylib, lookup there
const uint64_t libOrdinal = ma->read_uleb128(diag, p, exportsTrie.end);
const char* importedName = (char*)p;
if ( importedName[0] == '\0' )
importedName = symbolName;
const int depIndex = (int)(libOrdinal - 1);
bool missingWeakDylib;
WrappedMachO depHelper;
if ( this->dependent(depIndex, depHelper, missingWeakDylib) && !missingWeakDylib ) {
if ( depHelper.findSymbolIn(diag, importedName, addend, target) ) {
target.requestedSymbolName = symbolName;
return true;
}
}
if ( !missingWeakDylib )
diag.error("re-export ordinal %lld out of range for %s", libOrdinal, symbolName);
return false;
}
target.kind = FixupTarget::Kind::bindToImage;
target.requestedSymbolName = symbolName;
target.foundSymbolName = symbolName;
target.foundInImage = *this;
target.isWeakDef = false;
target.addend = addend;
uint64_t trieValue = ma->read_uleb128(diag, p, exportsTrie.end);
switch ( flags & EXPORT_SYMBOL_FLAGS_KIND_MASK ) {
case EXPORT_SYMBOL_FLAGS_KIND_REGULAR:
target.offsetInImage = trieValue + addend;
if ( flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER ) {
// for now, just return address of resolver helper stub
// FIXME handle running resolver
(void)this->_mh->read_uleb128(diag, p, exportsTrie.end);
}
if ( flags & EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION )
target.isWeakDef = true;
break;
case EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL:
// no type checking that client expected TLV yet
target.offsetInImage = trieValue;
break;
case EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE:
target.kind = FixupTarget::Kind::bindAbsolute;
target.offsetInImage = trieValue + addend;
break;
default:
diag.error("unsupported exported symbol kind. flags=%llu at node offset=0x%0lX", flags, (long)(node-exportsTrie.start));
return false;
}
return true;
}
}
else {
ma->forEachGlobalSymbol(diag, ^(const char* n_name, uint64_t n_value, uint8_t n_type, uint8_t n_sect, uint16_t n_desc, bool& stop) {
if ( strcmp(n_name, symbolName) == 0 ) {
target.kind = FixupTarget::Kind::bindToImage;
target.foundSymbolName = symbolName;
target.requestedSymbolName = symbolName;
target.foundInImage = *this;
target.offsetInImage = n_value - ma->preferredLoadAddress() + addend;
target.addend = addend;
stop = true;
}
});
if ( target.foundSymbolName )
return true;
}
// symbol not exported from this image
// if this is a dylib and has re-exported dylibs, search those too
if ( (ma->filetype == MH_DYLIB) && ((ma->flags & MH_NO_REEXPORTED_DYLIBS) == 0) ) {
__block unsigned depIndex = 0;
ma->forEachDependentDylib(^(const char* loadPath, bool isWeak, bool isReExport, bool isUpward, uint32_t compatVersion, uint32_t curVersion, bool& stop) {
if ( isReExport ) {
bool missingWeakDylib;
WrappedMachO child;
if ( this->dependent(depIndex, child, missingWeakDylib) && !missingWeakDylib ) {
if ( child.findSymbolIn(diag, symbolName, addend, target) )
stop = true;
}
}
++depIndex;
});
}
return (target.foundSymbolName != nullptr);
}
MachOAnalyzerSet::ExportsTrie MachOAnalyzerSet::wmo_getExportsTrie(const WrappedMachO* wmo) const
{
const uint8_t* start = nullptr;
const uint8_t* end = nullptr;
uint32_t runtimeOffset;
uint32_t size;
if ( wmo->_mh->hasExportTrie(runtimeOffset, size) ) {
start = (uint8_t*)wmo->_mh + runtimeOffset;
end = start + size;
}
return { start, end };
}
// scan all weak-def images in load order
// return first non-weak defintion found
// otherwise first weak definition found
bool MachOAnalyzerSet::mas_fromImageWeakDefLookup(const WrappedMachO& fromWmo, const char* symbolName, uint64_t addend, CachePatchHandler patcher, FixupTarget& target) const
{
// walk all images in load order, looking only at ones with weak-defs
const DyldSharedCache* dyldCache = (DyldSharedCache*)mas_dyldCache();
__block bool foundImpl = false;
this->mas_forEachImage(^(const WrappedMachO& anImage, bool hidden, bool& stop) {
if ( !anImage._mh->hasWeakDefs() )
return;
// when an image is hidden (RTLD_LOCAL) it can still look up symbols in itself
if ( hidden && (fromWmo._mh != anImage._mh) )
return;
FixupTarget tempTarget;
Diagnostics diag;
if ( anImage.findSymbolIn(diag, symbolName, addend, tempTarget) ) {
// ignore symbol re-exports, we will find the real definition later in forEachImage()
if ( anImage._mh != tempTarget.foundInImage._mh )
return;
if ( foundImpl && anImage._mh->inDyldCache() && (anImage._mh != target.foundInImage._mh) ) {
// we have already found the target, but now we see something in the dyld cache
// that also implements this symbol, so we need to change all caches uses of that
// to use the found one instead
uint32_t cachedDylibIndex = 0;
if ( dyldCache->findMachHeaderImageIndex(anImage._mh, cachedDylibIndex) ) {
uintptr_t exportCacheOffset = (uint8_t*)tempTarget.foundInImage._mh + tempTarget.offsetInImage - (uint8_t*)dyldCache;
patcher(cachedDylibIndex, (uint32_t)exportCacheOffset, target);
}
}
if ( !foundImpl ) {
// this is first found, so copy this to result
target = tempTarget;
foundImpl = true;
}
else if ( target.isWeakDef && !tempTarget.isWeakDef ) {
// we found a non-weak impl later on, switch to it
target = tempTarget;
}
}
});
return foundImpl;
}
} // dyld3