mirror of
https://github.com/darlinghq/darling-dyld.git
synced 2025-02-19 22:41:54 +00:00
857 lines
34 KiB
C++
857 lines
34 KiB
C++
/* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*-
|
|
*
|
|
* Copyright (c) 2011 Apple Inc. All rights reserved.
|
|
*
|
|
* @APPLE_LICENSE_HEADER_START@
|
|
*
|
|
* This file contains Original Code and/or Modifications of Original Code
|
|
* as defined in and that are subject to the Apple Public Source License
|
|
* Version 2.0 (the 'License'). You may not use this file except in
|
|
* compliance with the License. Please obtain a copy of the License at
|
|
* http://www.opensource.apple.com/apsl/ and read it before using this
|
|
* file.
|
|
*
|
|
* The Original Code and all software distributed under the License are
|
|
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
|
|
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
|
|
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
|
|
* Please see the License for the specific language governing rights and
|
|
* limitations under the License.
|
|
*
|
|
* @APPLE_LICENSE_HEADER_END@
|
|
*/
|
|
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <unistd.h>
|
|
#include <sys/stat.h>
|
|
#include <string.h>
|
|
#include <fcntl.h>
|
|
#include <stdlib.h>
|
|
#include <errno.h>
|
|
#include <sys/mman.h>
|
|
#include <sys/syslimits.h>
|
|
#include <libkern/OSByteOrder.h>
|
|
#include <mach-o/arch.h>
|
|
#include <mach-o/loader.h>
|
|
#include <Availability.h>
|
|
|
|
#include "CodeSigningTypes.h"
|
|
#include <CommonCrypto/CommonHMAC.h>
|
|
#include <CommonCrypto/CommonDigest.h>
|
|
#include <CommonCrypto/CommonDigestSPI.h>
|
|
|
|
#define NO_ULEB
|
|
#include "Architectures.hpp"
|
|
#include "MachOFileAbstraction.hpp"
|
|
|
|
#include "dsc_iterator.h"
|
|
#include "dsc_extractor.h"
|
|
#include "DyldSharedCache.h"
|
|
#include "MachOAnalyzer.h"
|
|
#include "SupportedArchs.h"
|
|
#include "Trie.hpp"
|
|
|
|
#include <vector>
|
|
#include <set>
|
|
#include <map>
|
|
#include <unordered_map>
|
|
#include <algorithm>
|
|
#include <dispatch/dispatch.h>
|
|
|
|
struct seg_info
|
|
{
|
|
seg_info(const char* n, uint64_t o, uint64_t s)
|
|
: segName(n), offset(o), sizem(s) { }
|
|
const char* segName;
|
|
uint64_t offset;
|
|
uint64_t sizem;
|
|
};
|
|
|
|
class CStringHash {
|
|
public:
|
|
size_t operator()(const char* __s) const {
|
|
size_t __h = 0;
|
|
for ( ; *__s; ++__s)
|
|
__h = 5 * __h + *__s;
|
|
return __h;
|
|
};
|
|
};
|
|
class CStringEquals {
|
|
public:
|
|
bool operator()(const char* left, const char* right) const { return (strcmp(left, right) == 0); }
|
|
};
|
|
typedef std::unordered_map<const char*, std::vector<seg_info>, CStringHash, CStringEquals> NameToSegments;
|
|
|
|
// Filter to find individual symbol re-exports in trie
|
|
class NotReExportSymbol {
|
|
public:
|
|
NotReExportSymbol(const std::set<int> &rd) :_reexportDeps(rd) {}
|
|
bool operator()(const ExportInfoTrie::Entry &entry) const {
|
|
return isSymbolReExport(entry);
|
|
}
|
|
private:
|
|
bool isSymbolReExport(const ExportInfoTrie::Entry &entry) const {
|
|
if ( (entry.info.flags & EXPORT_SYMBOL_FLAGS_KIND_MASK) != EXPORT_SYMBOL_FLAGS_KIND_REGULAR )
|
|
return true;
|
|
if ( (entry.info.flags & EXPORT_SYMBOL_FLAGS_REEXPORT) == 0 )
|
|
return true;
|
|
// If the symbol comes from a dylib that is re-exported, this is not an individual symbol re-export
|
|
if ( _reexportDeps.count((int)entry.info.other) != 0 )
|
|
return true;
|
|
return false;
|
|
}
|
|
const std::set<int> &_reexportDeps;
|
|
};
|
|
|
|
template <typename P>
|
|
struct LoadCommandInfo {
|
|
};
|
|
|
|
template <typename A>
|
|
class LinkeditOptimizer {
|
|
typedef typename A::P P;
|
|
typedef typename A::P::E E;
|
|
typedef typename A::P::uint_t pint_t;
|
|
|
|
private:
|
|
macho_segment_command<P>* linkEditSegCmd = nullptr;
|
|
symtab_command* symtab = nullptr;
|
|
dysymtab_command* dynamicSymTab = nullptr;
|
|
linkedit_data_command* functionStarts = nullptr;
|
|
linkedit_data_command* dataInCode = nullptr;
|
|
uint32_t exportsTrieOffset = 0;
|
|
uint32_t exportsTrieSize = 0;
|
|
std::set<int> reexportDeps;
|
|
|
|
public:
|
|
|
|
void optimize_loadcommands(dyld3::MachOAnalyzer* mh)
|
|
{
|
|
// update header flags
|
|
mh->flags &= 0x7FFFFFFF; // remove in-cache bit
|
|
|
|
// update load commands
|
|
__block uint64_t cumulativeFileSize = 0;
|
|
__block int depIndex = 0;
|
|
Diagnostics diag;
|
|
mh->forEachLoadCommand(diag, ^(const load_command* cmd, bool &stop) {
|
|
switch ( cmd->cmd ) {
|
|
case macho_segment_command<P>::CMD: {
|
|
auto segCmd = (macho_segment_command<P>*)cmd;
|
|
segCmd->set_fileoff(cumulativeFileSize);
|
|
segCmd->set_filesize(segCmd->vmsize());
|
|
|
|
auto const sectionsStart = (macho_section<P>*)((char*)segCmd + sizeof(macho_segment_command<P>));
|
|
auto const sectionsEnd = §ionsStart[segCmd->nsects()];
|
|
for (auto sect = sectionsStart; sect < sectionsEnd; ++sect) {
|
|
if ( sect->offset() != 0 ) {
|
|
sect->set_offset((uint32_t)(cumulativeFileSize + sect->addr() - segCmd->vmaddr()));
|
|
}
|
|
}
|
|
if ( strcmp(segCmd->segname(), "__LINKEDIT") == 0 )
|
|
linkEditSegCmd = segCmd;
|
|
cumulativeFileSize += segCmd->filesize();
|
|
} break;
|
|
case LC_DYLD_INFO_ONLY: {
|
|
// zero out all dyld info. lldb only uses symbol table
|
|
auto dyldInfo = (dyld_info_command*)cmd;
|
|
exportsTrieOffset = dyldInfo->export_off;
|
|
exportsTrieSize = dyldInfo->export_size;
|
|
dyldInfo->rebase_off = 0;
|
|
dyldInfo->rebase_size = 0;
|
|
dyldInfo->bind_off = 0;
|
|
dyldInfo->bind_size = 0;
|
|
dyldInfo->weak_bind_off = 0;
|
|
dyldInfo->weak_bind_size = 0;
|
|
dyldInfo->lazy_bind_off = 0;
|
|
dyldInfo->lazy_bind_size = 0;
|
|
dyldInfo->export_off = 0;
|
|
dyldInfo->export_size = 0;
|
|
} break;
|
|
case LC_DYLD_EXPORTS_TRIE: {
|
|
// don't put export trie into extracted dylib. lldb only uses symbol table
|
|
linkedit_data_command* exportsTrie = (linkedit_data_command*)cmd;
|
|
exportsTrieOffset = exportsTrie->dataoff;
|
|
exportsTrieSize = exportsTrie->datasize;
|
|
exportsTrie->dataoff = 0;
|
|
exportsTrie->datasize = 0;
|
|
} break;
|
|
case LC_SYMTAB:
|
|
symtab = (symtab_command*)cmd;
|
|
break;
|
|
case LC_DYSYMTAB:
|
|
dynamicSymTab = (dysymtab_command*)cmd;
|
|
break;
|
|
case LC_FUNCTION_STARTS:
|
|
functionStarts = (linkedit_data_command*)cmd;
|
|
break;
|
|
case LC_DATA_IN_CODE:
|
|
dataInCode = (linkedit_data_command*)cmd;
|
|
break;
|
|
case LC_LOAD_DYLIB:
|
|
case LC_LOAD_WEAK_DYLIB:
|
|
case LC_REEXPORT_DYLIB:
|
|
case LC_LOAD_UPWARD_DYLIB:
|
|
depIndex++;
|
|
if ( cmd->cmd == LC_REEXPORT_DYLIB ) {
|
|
reexportDeps.insert(depIndex);
|
|
}
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
});
|
|
|
|
mh->removeLoadCommand(diag, ^(const load_command* cmd, bool& remove, bool &stop) {
|
|
switch ( cmd->cmd ) {
|
|
case LC_SEGMENT_SPLIT_INFO:
|
|
// <rdar://problem/23212513> dylibs iOS 9 dyld caches have bogus LC_SEGMENT_SPLIT_INFO
|
|
remove = true;
|
|
stop = true;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
});
|
|
}
|
|
|
|
int optimize_linkedit(std::vector<uint8_t> &new_linkedit_data, uint64_t textOffsetInCache, const void* mapped_cache)
|
|
{
|
|
// rebuild symbol table
|
|
if ( linkEditSegCmd == nullptr ) {
|
|
fprintf(stderr, "__LINKEDIT not found\n");
|
|
return -1;
|
|
}
|
|
if ( symtab == nullptr ) {
|
|
fprintf(stderr, "LC_SYMTAB not found\n");
|
|
return -1;
|
|
}
|
|
if ( dynamicSymTab == nullptr ) {
|
|
fprintf(stderr, "LC_DYSYMTAB not found\n");
|
|
return -1;
|
|
}
|
|
|
|
const uint64_t newFunctionStartsOffset = new_linkedit_data.size();
|
|
uint32_t functionStartsSize = 0;
|
|
if ( functionStarts != NULL ) {
|
|
// copy function starts from original cache file to new mapped dylib file
|
|
functionStartsSize = functionStarts->datasize;
|
|
new_linkedit_data.insert(new_linkedit_data.end(),
|
|
(char*)mapped_cache + functionStarts->dataoff,
|
|
(char*)mapped_cache + functionStarts->dataoff + functionStartsSize);
|
|
}
|
|
|
|
// pointer align
|
|
while ((linkEditSegCmd->fileoff() + new_linkedit_data.size()) % sizeof(pint_t))
|
|
new_linkedit_data.push_back(0);
|
|
|
|
const uint64_t newDataInCodeOffset = new_linkedit_data.size();
|
|
uint32_t dataInCodeSize = 0;
|
|
if ( dataInCode != NULL ) {
|
|
// copy data-in-code info from original cache file to new mapped dylib file
|
|
dataInCodeSize = dataInCode->datasize;
|
|
new_linkedit_data.insert(new_linkedit_data.end(),
|
|
(char*)mapped_cache + dataInCode->dataoff,
|
|
(char*)mapped_cache + dataInCode->dataoff + dataInCodeSize);
|
|
}
|
|
|
|
std::vector<ExportInfoTrie::Entry> exports;
|
|
if ( exportsTrieSize != 0 ) {
|
|
const uint8_t* exportsStart = ((uint8_t*)mapped_cache) + exportsTrieOffset;
|
|
const uint8_t* exportsEnd = &exportsStart[exportsTrieSize];
|
|
ExportInfoTrie::parseTrie(exportsStart, exportsEnd, exports);
|
|
exports.erase(std::remove_if(exports.begin(), exports.end(), NotReExportSymbol(reexportDeps)), exports.end());
|
|
}
|
|
|
|
const DyldSharedCache* cache = (DyldSharedCache*)mapped_cache;
|
|
macho_nlist<P>* allLocalNlists = (macho_nlist<P>*)cache->getLocalNlistEntries();
|
|
__block macho_nlist<P>* localNlists = nullptr;
|
|
__block uint32_t localNlistCount = 0;
|
|
cache->forEachLocalSymbolEntry(^(uint32_t dylibOffset, uint32_t nlistStartIndex, uint32_t nlistCount, bool& stop){
|
|
if (dylibOffset == textOffsetInCache) {
|
|
localNlists = &allLocalNlists[nlistStartIndex];
|
|
localNlistCount = nlistCount;
|
|
stop = true;
|
|
}
|
|
});
|
|
// compute number of symbols in new symbol table
|
|
const macho_nlist<P>* mergedSymTabStart = (macho_nlist<P>*)(((uint8_t*)mapped_cache) + symtab->symoff);
|
|
const macho_nlist<P>* const mergedSymTabend = &mergedSymTabStart[symtab->nsyms];
|
|
uint32_t newSymCount = symtab->nsyms;
|
|
if ( localNlistCount != 0 ) {
|
|
// if we are recombining with unmapped locals, recompute new total size
|
|
newSymCount = localNlistCount + dynamicSymTab->nextdefsym + dynamicSymTab->nundefsym;
|
|
}
|
|
|
|
// add room for N_INDR symbols for re-exported symbols
|
|
newSymCount += exports.size();
|
|
|
|
// copy symbol entries and strings from original cache file to new mapped dylib file
|
|
const char* mergedStringPoolStart = (char*)mapped_cache + symtab->stroff;
|
|
const char* mergedStringPoolEnd = &mergedStringPoolStart[symtab->strsize];
|
|
|
|
// First count how many entries we need
|
|
std::vector<macho_nlist<P>> newSymTab;
|
|
newSymTab.reserve(newSymCount);
|
|
std::vector<char> newSymNames;
|
|
|
|
// first pool entry is always empty string
|
|
newSymNames.push_back('\0');
|
|
|
|
// local symbols are first in dylibs, if this cache has unmapped locals, insert them all first
|
|
uint32_t undefSymbolShift = 0;
|
|
if ( localNlistCount != 0 ) {
|
|
const char* localStrings = cache->getLocalStrings();
|
|
undefSymbolShift = localNlistCount - dynamicSymTab->nlocalsym;
|
|
// update load command to reflect new count of locals
|
|
dynamicSymTab->ilocalsym = (uint32_t)newSymTab.size();
|
|
dynamicSymTab->nlocalsym = localNlistCount;
|
|
// copy local symbols
|
|
for (uint32_t i=0; i < localNlistCount; ++i) {
|
|
const char* localName = &localStrings[localNlists[i].n_strx()];
|
|
if ( localName > localStrings + cache->getLocalStringsSize() )
|
|
localName = "<corrupt local symbol name>";
|
|
macho_nlist<P> t = localNlists[i];
|
|
t.set_n_strx((uint32_t)newSymNames.size());
|
|
newSymNames.insert(newSymNames.end(),
|
|
localName,
|
|
localName + (strlen(localName) + 1));
|
|
newSymTab.push_back(t);
|
|
}
|
|
// now start copying symbol table from start of externs instead of start of locals
|
|
mergedSymTabStart = &mergedSymTabStart[dynamicSymTab->iextdefsym];
|
|
}
|
|
// copy full symbol table from cache (skipping locals if they where elsewhere)
|
|
for (const macho_nlist<P>* s = mergedSymTabStart; s != mergedSymTabend; ++s) {
|
|
macho_nlist<P> t = *s;
|
|
t.set_n_strx((uint32_t)newSymNames.size());
|
|
const char* symName = &mergedStringPoolStart[s->n_strx()];
|
|
if ( symName > mergedStringPoolEnd )
|
|
symName = "<corrupt symbol name>";
|
|
newSymNames.insert(newSymNames.end(),
|
|
symName,
|
|
symName + (strlen(symName) + 1));
|
|
newSymTab.push_back(t);
|
|
}
|
|
// <rdar://problem/16529213> recreate N_INDR symbols in extracted dylibs for debugger
|
|
for (std::vector<ExportInfoTrie::Entry>::iterator it = exports.begin(); it != exports.end(); ++it) {
|
|
macho_nlist<P> t;
|
|
memset(&t, 0, sizeof(t));
|
|
t.set_n_strx((uint32_t)newSymNames.size());
|
|
t.set_n_type(N_INDR | N_EXT);
|
|
t.set_n_sect(0);
|
|
t.set_n_desc(0);
|
|
newSymNames.insert(newSymNames.end(),
|
|
it->name.c_str(),
|
|
it->name.c_str() + (it->name.size() + 1));
|
|
const char* importName = it->info.importName.c_str();
|
|
if ( *importName == '\0' )
|
|
importName = it->name.c_str();
|
|
t.set_n_value(newSymNames.size());
|
|
newSymNames.insert(newSymNames.end(),
|
|
importName,
|
|
importName + (strlen(importName) + 1));
|
|
newSymTab.push_back(t);
|
|
}
|
|
|
|
if ( newSymCount != newSymTab.size() ) {
|
|
fprintf(stderr, "symbol count miscalculation\n");
|
|
return -1;
|
|
}
|
|
|
|
//const uint64_t newStringPoolOffset = newIndSymTabOffset + dynamicSymTab->nindirectsyms()*sizeof(uint32_t);
|
|
//macho_nlist<P>* const newSymTabStart = (macho_nlist<P>*)(((uint8_t*)mh) + newSymTabOffset);
|
|
//char* const newStringPoolStart = (char*)mh + newStringPoolOffset;
|
|
|
|
// pointer align
|
|
while ((linkEditSegCmd->fileoff() + new_linkedit_data.size()) % sizeof(pint_t))
|
|
new_linkedit_data.push_back(0);
|
|
|
|
const uint64_t newSymTabOffset = new_linkedit_data.size();
|
|
|
|
// Copy sym tab
|
|
for (macho_nlist<P>& sym : newSymTab) {
|
|
uint8_t symData[sizeof(macho_nlist<P>)];
|
|
memcpy(&symData, &sym, sizeof(sym));
|
|
new_linkedit_data.insert(new_linkedit_data.end(), &symData[0], &symData[sizeof(macho_nlist<P>)]);
|
|
}
|
|
|
|
const uint64_t newIndSymTabOffset = new_linkedit_data.size();
|
|
|
|
// Copy (and adjust) indirect symbol table
|
|
const uint32_t* mergedIndSymTab = (uint32_t*)((char*)mapped_cache + dynamicSymTab->indirectsymoff);
|
|
new_linkedit_data.insert(new_linkedit_data.end(),
|
|
(char*)mergedIndSymTab,
|
|
(char*)(mergedIndSymTab + dynamicSymTab->nindirectsyms));
|
|
if ( undefSymbolShift != 0 ) {
|
|
uint32_t* newIndSymTab = (uint32_t*)&new_linkedit_data[newIndSymTabOffset];
|
|
for (int i=0; i < dynamicSymTab->nindirectsyms; ++i) {
|
|
newIndSymTab[i] += undefSymbolShift;
|
|
}
|
|
}
|
|
const uint64_t newStringPoolOffset = new_linkedit_data.size();
|
|
|
|
// pointer align string pool size
|
|
while (newSymNames.size() % sizeof(pint_t))
|
|
newSymNames.push_back('\0');
|
|
|
|
new_linkedit_data.insert(new_linkedit_data.end(), newSymNames.begin(), newSymNames.end());
|
|
|
|
// update load commands
|
|
if ( functionStarts != NULL ) {
|
|
functionStarts->dataoff = (uint32_t)(newFunctionStartsOffset + linkEditSegCmd->fileoff());
|
|
functionStarts->datasize = functionStartsSize;
|
|
}
|
|
if ( dataInCode != NULL ) {
|
|
dataInCode->dataoff = (uint32_t)(newDataInCodeOffset + linkEditSegCmd->fileoff());
|
|
dataInCode->datasize = dataInCodeSize;
|
|
}
|
|
|
|
symtab->nsyms = newSymCount;
|
|
symtab->symoff = (uint32_t)(newSymTabOffset + linkEditSegCmd->fileoff());
|
|
symtab->stroff = (uint32_t)(newStringPoolOffset + linkEditSegCmd->fileoff());
|
|
symtab->strsize = (uint32_t)newSymNames.size();
|
|
dynamicSymTab->extreloff = 0;
|
|
dynamicSymTab->nextrel = 0;
|
|
dynamicSymTab->locreloff = 0;
|
|
dynamicSymTab->nlocrel = 0;
|
|
dynamicSymTab->indirectsymoff = (uint32_t)(newIndSymTabOffset + linkEditSegCmd->fileoff());
|
|
linkEditSegCmd->set_filesize(symtab->stroff + symtab->strsize - linkEditSegCmd->fileoff());
|
|
linkEditSegCmd->set_vmsize((linkEditSegCmd->filesize() + 4095) & (-4096));
|
|
|
|
return 0;
|
|
}
|
|
|
|
};
|
|
|
|
static void make_dirs(const char* file_path)
|
|
{
|
|
//printf("make_dirs(%s)\n", file_path);
|
|
char dirs[strlen(file_path)+1];
|
|
strcpy(dirs, file_path);
|
|
char* lastSlash = strrchr(dirs, '/');
|
|
if ( lastSlash == NULL )
|
|
return;
|
|
lastSlash[1] = '\0';
|
|
struct stat stat_buf;
|
|
if ( stat(dirs, &stat_buf) != 0 ) {
|
|
char* afterSlash = &dirs[1];
|
|
char* slash;
|
|
while ( (slash = strchr(afterSlash, '/')) != NULL ) {
|
|
*slash = '\0';
|
|
::mkdir(dirs, S_IRWXU | S_IRGRP|S_IXGRP | S_IROTH|S_IXOTH);
|
|
//printf("mkdir(%s)\n", dirs);
|
|
*slash = '/';
|
|
afterSlash = slash+1;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
|
|
template <typename A>
|
|
void dylib_maker(const void* mapped_cache, std::vector<uint8_t> &dylib_data, const std::vector<seg_info>& segments) {
|
|
typedef typename A::P P;
|
|
|
|
size_t additionalSize = 0;
|
|
for(std::vector<seg_info>::const_iterator it=segments.begin(); it != segments.end(); ++it) {
|
|
if ( strcmp(it->segName, "__LINKEDIT") != 0 )
|
|
additionalSize += it->sizem;
|
|
}
|
|
|
|
std::vector<uint8_t> new_dylib_data;
|
|
new_dylib_data.reserve(additionalSize);
|
|
|
|
// Write regular segments into the buffer
|
|
uint64_t textOffsetInCache = 0;
|
|
for( std::vector<seg_info>::const_iterator it=segments.begin(); it != segments.end(); ++it) {
|
|
|
|
if(strcmp(it->segName, "__TEXT") == 0 )
|
|
textOffsetInCache = it->offset;
|
|
|
|
//printf("segName=%s, offset=0x%llX, size=0x%0llX\n", it->segName, it->offset, it->sizem);
|
|
// Copy all but the __LINKEDIT. It will be copied later during the optimizer in to a temporary buffer but it would
|
|
// not be efficient to copy it all now for each dylib.
|
|
if (strcmp(it->segName, "__LINKEDIT") == 0 )
|
|
continue;
|
|
std::copy(((uint8_t*)mapped_cache)+it->offset, ((uint8_t*)mapped_cache)+it->offset+it->sizem, std::back_inserter(new_dylib_data));
|
|
}
|
|
|
|
// optimize linkedit
|
|
std::vector<uint8_t> new_linkedit_data;
|
|
new_linkedit_data.reserve(1 << 20);
|
|
|
|
LinkeditOptimizer<A> linkeditOptimizer;
|
|
dyld3::MachOAnalyzer* mh = (dyld3::MachOAnalyzer*)&new_dylib_data.front();
|
|
linkeditOptimizer.optimize_loadcommands(mh);
|
|
linkeditOptimizer.optimize_linkedit(new_linkedit_data, textOffsetInCache, mapped_cache);
|
|
|
|
new_dylib_data.insert(new_dylib_data.end(), new_linkedit_data.begin(), new_linkedit_data.end());
|
|
|
|
// Page align file
|
|
while (new_dylib_data.size() % 4096)
|
|
new_dylib_data.push_back(0);
|
|
|
|
dylib_data.insert(dylib_data.end(), new_dylib_data.begin(), new_dylib_data.end());
|
|
}
|
|
|
|
typedef __typeof(dylib_maker<x86>) dylib_maker_func;
|
|
typedef void (^progress_block)(unsigned current, unsigned total);
|
|
|
|
class SharedCacheExtractor;
|
|
struct SharedCacheDylibExtractor {
|
|
SharedCacheDylibExtractor(const char* name, std::vector<seg_info> segInfo)
|
|
: name(name), segInfo(segInfo) { }
|
|
|
|
void extractCache(SharedCacheExtractor& context);
|
|
|
|
const char* name;
|
|
const std::vector<seg_info> segInfo;
|
|
int result = 0;
|
|
};
|
|
|
|
struct SharedCacheExtractor {
|
|
SharedCacheExtractor(const NameToSegments& map,
|
|
const char* extraction_root_path,
|
|
dylib_maker_func* dylib_create_func,
|
|
void* mapped_cache,
|
|
progress_block progress)
|
|
: map(map), extraction_root_path(extraction_root_path),
|
|
dylib_create_func(dylib_create_func), mapped_cache(mapped_cache),
|
|
progress(progress) {
|
|
|
|
extractors.reserve(map.size());
|
|
for (auto it : map)
|
|
extractors.emplace_back(it.first, it.second);
|
|
|
|
// Limit the number of open files. 16 seems to give better performance than higher numbers.
|
|
sema = dispatch_semaphore_create(16);
|
|
}
|
|
int extractCaches();
|
|
|
|
static void extractCache(void *ctx, size_t i);
|
|
|
|
const NameToSegments& map;
|
|
std::vector<SharedCacheDylibExtractor> extractors;
|
|
dispatch_semaphore_t sema;
|
|
const char* extraction_root_path;
|
|
dylib_maker_func* dylib_create_func;
|
|
void* mapped_cache;
|
|
progress_block progress;
|
|
std::atomic_int count = { 0 };
|
|
};
|
|
|
|
int SharedCacheExtractor::extractCaches() {
|
|
dispatch_queue_t process_queue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_LOW, 0);
|
|
dispatch_apply_f(map.size(), process_queue,
|
|
this, extractCache);
|
|
|
|
int result = 0;
|
|
for (const SharedCacheDylibExtractor& extractor : extractors) {
|
|
if (extractor.result != 0) {
|
|
result = extractor.result;
|
|
break;
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
void SharedCacheExtractor::extractCache(void *ctx, size_t i) {
|
|
SharedCacheExtractor& context = *(SharedCacheExtractor*)ctx;
|
|
dispatch_semaphore_wait(context.sema, DISPATCH_TIME_FOREVER);
|
|
context.extractors[i].extractCache(context);
|
|
dispatch_semaphore_signal(context.sema);
|
|
}
|
|
|
|
void SharedCacheDylibExtractor::extractCache(SharedCacheExtractor &context) {
|
|
|
|
char dylib_path[PATH_MAX];
|
|
strcpy(dylib_path, context.extraction_root_path);
|
|
strcat(dylib_path, "/");
|
|
strcat(dylib_path, name);
|
|
|
|
//printf("%s with %lu segments\n", dylib_path, it->second.size());
|
|
// make sure all directories in this path exist
|
|
make_dirs(dylib_path);
|
|
|
|
// open file, create if does not already exist
|
|
int fd = ::open(dylib_path, O_CREAT | O_TRUNC | O_EXLOCK | O_RDWR, 0644);
|
|
if ( fd == -1 ) {
|
|
fprintf(stderr, "can't open or create dylib file %s, errnor=%d\n", dylib_path, errno);
|
|
result = -1;
|
|
return;
|
|
}
|
|
|
|
std::vector<uint8_t> vec;
|
|
context.dylib_create_func(context.mapped_cache, vec, segInfo);
|
|
context.progress(context.count++, (unsigned)context.map.size());
|
|
|
|
// Write file data
|
|
if( write(fd, &vec.front(), vec.size()) == -1) {
|
|
fprintf(stderr, "error writing, errnor=%d\n", errno);
|
|
result = -1;
|
|
}
|
|
|
|
close(fd);
|
|
}
|
|
|
|
static int sharedCacheIsValid(const void* mapped_cache, uint64_t size) {
|
|
// First check that the size is good.
|
|
// Note the shared cache may not have a codeSignatureSize value set so we need to first make
|
|
// sure we have space for the CS_SuperBlob, then later crack that to check for the size of the rest.
|
|
const DyldSharedCache* dyldSharedCache = (DyldSharedCache*)mapped_cache;
|
|
uint64_t requiredSizeForCSSuperBlob = dyldSharedCache->header.codeSignatureOffset + sizeof(CS_SuperBlob);
|
|
const dyld_cache_mapping_info* mappings = (dyld_cache_mapping_info*)((uint8_t*)mapped_cache + dyldSharedCache->header.mappingOffset);
|
|
if ( requiredSizeForCSSuperBlob > size ) {
|
|
fprintf(stderr, "Error: dyld shared cache size 0x%08llx is less than required size of 0x%08llx.\n", size, requiredSizeForCSSuperBlob);
|
|
return -1;
|
|
}
|
|
|
|
// Now see if the code signatures are valid as that tells us the pages aren't corrupt.
|
|
// First find all of the regions of the shared cache we computed cd hashes
|
|
std::vector<std::pair<uint64_t, uint64_t>> sharedCacheRegions;
|
|
for (uint32_t i = 0; i != dyldSharedCache->header.mappingCount; ++i) {
|
|
sharedCacheRegions.emplace_back(std::make_pair(mappings[i].fileOffset, mappings[i].fileOffset + mappings[i].size));
|
|
}
|
|
if (dyldSharedCache->header.localSymbolsSize)
|
|
sharedCacheRegions.emplace_back(std::make_pair(dyldSharedCache->header.localSymbolsOffset, dyldSharedCache->header.localSymbolsOffset + dyldSharedCache->header.localSymbolsSize));
|
|
size_t inBbufferSize = 0;
|
|
for (auto& sharedCacheRegion : sharedCacheRegions)
|
|
inBbufferSize += (sharedCacheRegion.second - sharedCacheRegion.first);
|
|
|
|
// Now take the cd hash from the cache itself and validate the regions we found.
|
|
uint8_t* codeSignatureRegion = (uint8_t*)mapped_cache + dyldSharedCache->header.codeSignatureOffset;
|
|
CS_SuperBlob* sb = reinterpret_cast<CS_SuperBlob*>(codeSignatureRegion);
|
|
if (sb->magic != htonl(CSMAGIC_EMBEDDED_SIGNATURE)) {
|
|
fprintf(stderr, "Error: dyld shared cache code signature magic is incorrect.\n");
|
|
return -1;
|
|
}
|
|
|
|
size_t sbSize = ntohl(sb->length);
|
|
uint64_t requiredSizeForCS = dyldSharedCache->header.codeSignatureOffset + sbSize;
|
|
if ( requiredSizeForCS > size ) {
|
|
fprintf(stderr, "Error: dyld shared cache size 0x%08llx is less than required size of 0x%08llx.\n", size, requiredSizeForCS);
|
|
return -1;
|
|
}
|
|
|
|
// Find the offset to the code directory.
|
|
CS_CodeDirectory* cd = nullptr;
|
|
for (unsigned i =0; i != sb->count; ++i) {
|
|
if (ntohl(sb->index[i].type) == CSSLOT_CODEDIRECTORY) {
|
|
cd = (CS_CodeDirectory*)(codeSignatureRegion + ntohl(sb->index[i].offset));
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!cd) {
|
|
fprintf(stderr, "Error: dyld shared cache code signature directory is missing.\n");
|
|
return -1;
|
|
}
|
|
|
|
if ( (uint8_t*)cd > (codeSignatureRegion + sbSize) ) {
|
|
fprintf(stderr, "Error: dyld shared cache code signature directory is out of bounds.\n");
|
|
return -1;
|
|
}
|
|
|
|
if ( cd->magic != htonl(CSMAGIC_CODEDIRECTORY) ) {
|
|
fprintf(stderr, "Error: dyld shared cache code signature directory magic is incorrect.\n");
|
|
return -1;
|
|
}
|
|
|
|
uint32_t pageSize = 1 << cd->pageSize;
|
|
uint32_t slotCountFromRegions = (uint32_t)((inBbufferSize + pageSize - 1) / pageSize);
|
|
if ( ntohl(cd->nCodeSlots) < slotCountFromRegions ) {
|
|
fprintf(stderr, "Error: dyld shared cache code signature directory num slots is incorrect.\n");
|
|
return -1;
|
|
}
|
|
|
|
uint32_t dscDigestFormat = kCCDigestNone;
|
|
switch (cd->hashType) {
|
|
case CS_HASHTYPE_SHA1:
|
|
#pragma clang diagnostic push
|
|
#pragma clang diagnostic ignored "-Wdeprecated-declarations"
|
|
dscDigestFormat = kCCDigestSHA1;
|
|
#pragma clang diagnostic pop
|
|
break;
|
|
case CS_HASHTYPE_SHA256:
|
|
dscDigestFormat = kCCDigestSHA256;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
if (dscDigestFormat != kCCDigestNone) {
|
|
const uint64_t csPageSize = 1 << cd->pageSize;
|
|
size_t hashOffset = ntohl(cd->hashOffset);
|
|
uint8_t* hashSlot = (uint8_t*)cd + hashOffset;
|
|
uint8_t cdHashBuffer[cd->hashSize];
|
|
|
|
// Skip local symbols for now as those aren't being codesign correctly right now.
|
|
size_t inBbufferSize = 0;
|
|
for (auto& sharedCacheRegion : sharedCacheRegions) {
|
|
if (sharedCacheRegion.first == dyldSharedCache->header.localSymbolsOffset)
|
|
continue;
|
|
inBbufferSize += (sharedCacheRegion.second - sharedCacheRegion.first);
|
|
}
|
|
uint32_t slotCountToProcess = (uint32_t)((inBbufferSize + pageSize - 1) / pageSize);
|
|
|
|
for (unsigned i = 0; i != slotCountToProcess; ++i) {
|
|
// Skip data pages as those may have been slid by ASLR in the extracted file
|
|
uint64_t fileOffset = i * csPageSize;
|
|
bool isDataPage = false;
|
|
for (unsigned mappingIndex = 1; mappingIndex != (dyldSharedCache->header.mappingCount - 1); ++mappingIndex) {
|
|
if ( (fileOffset >= mappings[mappingIndex].fileOffset) && (fileOffset < (mappings[mappingIndex].fileOffset + mappings[mappingIndex].size)) ) {
|
|
isDataPage = true;
|
|
break;
|
|
}
|
|
}
|
|
if ( isDataPage )
|
|
continue;
|
|
|
|
CCDigest(dscDigestFormat, (uint8_t*)mapped_cache + fileOffset, (size_t)csPageSize, cdHashBuffer);
|
|
uint8_t* cacheCdHashBuffer = hashSlot + (i * cd->hashSize);
|
|
if (memcmp(cdHashBuffer, cacheCdHashBuffer, cd->hashSize) != 0) {
|
|
fprintf(stderr, "Error: dyld shared cache code signature for page %d is incorrect.\n", i);
|
|
return -1;
|
|
}
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int dyld_shared_cache_extract_dylibs_progress(const char* shared_cache_file_path, const char* extraction_root_path,
|
|
progress_block progress)
|
|
{
|
|
struct stat statbuf;
|
|
if (stat(shared_cache_file_path, &statbuf)) {
|
|
fprintf(stderr, "Error: stat failed for dyld shared cache at %s\n", shared_cache_file_path);
|
|
return -1;
|
|
}
|
|
|
|
int cache_fd = open(shared_cache_file_path, O_RDONLY);
|
|
if (cache_fd < 0) {
|
|
fprintf(stderr, "Error: failed to open shared cache file at %s\n", shared_cache_file_path);
|
|
return -1;
|
|
}
|
|
|
|
void* mapped_cache = mmap(NULL, (size_t)statbuf.st_size, PROT_READ, MAP_PRIVATE, cache_fd, 0);
|
|
if (mapped_cache == MAP_FAILED) {
|
|
fprintf(stderr, "Error: mmap() for shared cache at %s failed, errno=%d\n", shared_cache_file_path, errno);
|
|
return -1;
|
|
}
|
|
|
|
close(cache_fd);
|
|
|
|
// instantiate arch specific dylib maker
|
|
dylib_maker_func* dylib_create_func = nullptr;
|
|
if ( strcmp((char*)mapped_cache, "dyld_v1 i386") == 0 )
|
|
dylib_create_func = dylib_maker<x86>;
|
|
else if ( strcmp((char*)mapped_cache, "dyld_v1 x86_64") == 0 )
|
|
dylib_create_func = dylib_maker<x86_64>;
|
|
else if ( strcmp((char*)mapped_cache, "dyld_v1 x86_64h") == 0 )
|
|
dylib_create_func = dylib_maker<x86_64>;
|
|
else if ( strcmp((char*)mapped_cache, "dyld_v1 armv5") == 0 )
|
|
dylib_create_func = dylib_maker<arm>;
|
|
else if ( strcmp((char*)mapped_cache, "dyld_v1 armv6") == 0 )
|
|
dylib_create_func = dylib_maker<arm>;
|
|
else if ( strcmp((char*)mapped_cache, "dyld_v1 armv7") == 0 )
|
|
dylib_create_func = dylib_maker<arm>;
|
|
else if ( strncmp((char*)mapped_cache, "dyld_v1 armv7", 14) == 0 )
|
|
dylib_create_func = dylib_maker<arm>;
|
|
else if ( strcmp((char*)mapped_cache, "dyld_v1 arm64") == 0 )
|
|
dylib_create_func = dylib_maker<arm64>;
|
|
#if SUPPORT_ARCH_arm64e
|
|
else if ( strcmp((char*)mapped_cache, "dyld_v1 arm64e") == 0 )
|
|
dylib_create_func = dylib_maker<arm64>;
|
|
#endif
|
|
#if SUPPORT_ARCH_arm64_32
|
|
else if ( strcmp((char*)mapped_cache, "dyld_v1arm64_32") == 0 )
|
|
dylib_create_func = dylib_maker<arm64_32>;
|
|
#endif
|
|
else {
|
|
fprintf(stderr, "Error: unrecognized dyld shared cache magic.\n");
|
|
munmap(mapped_cache, (size_t)statbuf.st_size);
|
|
return -1;
|
|
}
|
|
|
|
// Verify that the cache isn't corrupt.
|
|
if (int result = sharedCacheIsValid(mapped_cache, (uint64_t)statbuf.st_size)) {
|
|
munmap(mapped_cache, (size_t)statbuf.st_size);
|
|
return result;
|
|
}
|
|
|
|
// iterate through all images in cache and build map of dylibs and segments
|
|
__block NameToSegments map;
|
|
int result = 0;
|
|
|
|
result = dyld_shared_cache_iterate(mapped_cache, (uint32_t)statbuf.st_size, ^(const dyld_shared_cache_dylib_info* dylibInfo, const dyld_shared_cache_segment_info* segInfo) {
|
|
map[dylibInfo->path].push_back(seg_info(segInfo->name, segInfo->fileOffset, segInfo->fileSize));
|
|
});
|
|
|
|
if(result != 0) {
|
|
fprintf(stderr, "Error: dyld_shared_cache_iterate_segments_with_slide failed.\n");
|
|
munmap(mapped_cache, (size_t)statbuf.st_size);
|
|
return result;
|
|
}
|
|
|
|
// for each dylib instantiate a dylib file
|
|
SharedCacheExtractor extractor(map, extraction_root_path, dylib_create_func, mapped_cache, progress);
|
|
result = extractor.extractCaches();
|
|
|
|
munmap(mapped_cache, (size_t)statbuf.st_size);
|
|
return result;
|
|
}
|
|
|
|
|
|
|
|
int dyld_shared_cache_extract_dylibs(const char* shared_cache_file_path, const char* extraction_root_path)
|
|
{
|
|
return dyld_shared_cache_extract_dylibs_progress(shared_cache_file_path, extraction_root_path,
|
|
^(unsigned , unsigned) {} );
|
|
}
|
|
|
|
|
|
#if 0
|
|
// test program
|
|
#include <stdio.h>
|
|
#include <stddef.h>
|
|
#include <dlfcn.h>
|
|
|
|
|
|
typedef int (*extractor_proc)(const char* shared_cache_file_path, const char* extraction_root_path,
|
|
void (^progress)(unsigned current, unsigned total));
|
|
|
|
int main(int argc, const char* argv[])
|
|
{
|
|
if ( argc != 3 ) {
|
|
fprintf(stderr, "usage: dsc_extractor <path-to-cache-file> <path-to-device-dir>\n");
|
|
return 1;
|
|
}
|
|
|
|
//void* handle = dlopen("/Volumes/my/src/dyld/build/Debug/dsc_extractor.bundle", RTLD_LAZY);
|
|
void* handle = dlopen("/Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform/usr/lib/dsc_extractor.bundle", RTLD_LAZY);
|
|
if ( handle == NULL ) {
|
|
fprintf(stderr, "dsc_extractor.bundle could not be loaded\n");
|
|
return 1;
|
|
}
|
|
|
|
extractor_proc proc = (extractor_proc)dlsym(handle, "dyld_shared_cache_extract_dylibs_progress");
|
|
if ( proc == NULL ) {
|
|
fprintf(stderr, "dsc_extractor.bundle did not have dyld_shared_cache_extract_dylibs_progress symbol\n");
|
|
return 1;
|
|
}
|
|
|
|
int result = (*proc)(argv[1], argv[2], ^(unsigned c, unsigned total) { printf("%d/%d\n", c, total); } );
|
|
fprintf(stderr, "dyld_shared_cache_extract_dylibs_progress() => %d\n", result);
|
|
return 0;
|
|
}
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|