darling-objc4/markgc.cpp
2022-03-31 21:15:07 -07:00

587 lines
21 KiB
C++

/*
* Copyright (c) 2007-2009 Apple Inc. All Rights Reserved.
*
* @APPLE_LICENSE_HEADER_START@
*
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
* compliance with the License. Please obtain a copy of the License at
* http://www.opensource.apple.com/apsl/ and read it before using this
* file.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
* Please see the License for the specific language governing rights and
* limitations under the License.
*
* @APPLE_LICENSE_HEADER_END@
*/
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <stdio.h>
#include <stdbool.h>
#include <fcntl.h>
#include <limits.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/errno.h>
#include <os/overflow.h>
#include <mach-o/fat.h>
#include <mach-o/arch.h>
#include <mach-o/loader.h>
// Some OS X SDKs don't define these.
#ifndef CPU_TYPE_ARM
#define CPU_TYPE_ARM ((cpu_type_t) 12)
#endif
#ifndef CPU_ARCH_ABI64
#define CPU_ARCH_ABI64 0x01000000 /* 64 bit ABI */
#endif
#ifndef CPU_TYPE_ARM64
#define CPU_TYPE_ARM64 (CPU_TYPE_ARM | CPU_ARCH_ABI64)
#endif
// File abstraction taken from ld64/FileAbstraction.hpp
// and ld64/MachOFileAbstraction.hpp.
#ifdef __OPTIMIZE__
#define INLINE __attribute__((always_inline))
#else
#define INLINE
#endif
//
// This abstraction layer is for use with file formats that have 64-bit/32-bit and Big-Endian/Little-Endian variants
//
// For example: to make a utility that handles 32-bit little enidan files use: Pointer32<LittleEndian>
//
//
// get16() read a 16-bit number from an E endian struct
// set16() write a 16-bit number to an E endian struct
// get32() read a 32-bit number from an E endian struct
// set32() write a 32-bit number to an E endian struct
// get64() read a 64-bit number from an E endian struct
// set64() write a 64-bit number to an E endian struct
//
// getBits() read a bit field from an E endian struct (bitCount=number of bits in field, firstBit=bit index of field)
// setBits() write a bit field to an E endian struct (bitCount=number of bits in field, firstBit=bit index of field)
//
// getBitsRaw() read a bit field from a struct with native endianness
// setBitsRaw() write a bit field from a struct with native endianness
//
class BigEndian
{
public:
static uint16_t get16(const uint16_t& from) INLINE { return OSReadBigInt16(&from, 0); }
static void set16(uint16_t& into, uint16_t value) INLINE { OSWriteBigInt16(&into, 0, value); }
static uint32_t get32(const uint32_t& from) INLINE { return OSReadBigInt32(&from, 0); }
static void set32(uint32_t& into, uint32_t value) INLINE { OSWriteBigInt32(&into, 0, value); }
static uint64_t get64(const uint64_t& from) INLINE { return OSReadBigInt64(&from, 0); }
static void set64(uint64_t& into, uint64_t value) INLINE { OSWriteBigInt64(&into, 0, value); }
static uint32_t getBits(const uint32_t& from,
uint8_t firstBit, uint8_t bitCount) INLINE { return getBitsRaw(get32(from), firstBit, bitCount); }
static void setBits(uint32_t& into, uint32_t value,
uint8_t firstBit, uint8_t bitCount) INLINE { uint32_t temp = get32(into); setBitsRaw(temp, value, firstBit, bitCount); set32(into, temp); }
static uint32_t getBitsRaw(const uint32_t& from,
uint8_t firstBit, uint8_t bitCount) INLINE { return ((from >> (32-firstBit-bitCount)) & ((1<<bitCount)-1)); }
static void setBitsRaw(uint32_t& into, uint32_t value,
uint8_t firstBit, uint8_t bitCount) INLINE { uint32_t temp = into;
const uint32_t mask = ((1<<bitCount)-1);
temp &= ~(mask << (32-firstBit-bitCount));
temp |= ((value & mask) << (32-firstBit-bitCount));
into = temp; }
enum { little_endian = 0 };
};
class LittleEndian
{
public:
static uint16_t get16(const uint16_t& from) INLINE { return OSReadLittleInt16(&from, 0); }
static void set16(uint16_t& into, uint16_t value) INLINE { OSWriteLittleInt16(&into, 0, value); }
static uint32_t get32(const uint32_t& from) INLINE { return OSReadLittleInt32(&from, 0); }
static void set32(uint32_t& into, uint32_t value) INLINE { OSWriteLittleInt32(&into, 0, value); }
static uint64_t get64(const uint64_t& from) INLINE { return OSReadLittleInt64(&from, 0); }
static void set64(uint64_t& into, uint64_t value) INLINE { OSWriteLittleInt64(&into, 0, value); }
static uint32_t getBits(const uint32_t& from,
uint8_t firstBit, uint8_t bitCount) INLINE { return getBitsRaw(get32(from), firstBit, bitCount); }
static void setBits(uint32_t& into, uint32_t value,
uint8_t firstBit, uint8_t bitCount) INLINE { uint32_t temp = get32(into); setBitsRaw(temp, value, firstBit, bitCount); set32(into, temp); }
static uint32_t getBitsRaw(const uint32_t& from,
uint8_t firstBit, uint8_t bitCount) INLINE { return ((from >> firstBit) & ((1<<bitCount)-1)); }
static void setBitsRaw(uint32_t& into, uint32_t value,
uint8_t firstBit, uint8_t bitCount) INLINE { uint32_t temp = into;
const uint32_t mask = ((1<<bitCount)-1);
temp &= ~(mask << firstBit);
temp |= ((value & mask) << firstBit);
into = temp; }
enum { little_endian = 1 };
};
#if __BIG_ENDIAN__
typedef BigEndian CurrentEndian;
typedef LittleEndian OtherEndian;
#elif __LITTLE_ENDIAN__
typedef LittleEndian CurrentEndian;
typedef BigEndian OtherEndian;
#else
#error unknown endianness
#endif
template <typename _E>
class Pointer32
{
public:
typedef uint32_t uint_t;
typedef int32_t sint_t;
typedef _E E;
static uint64_t getP(const uint_t& from) INLINE { return _E::get32(from); }
static void setP(uint_t& into, uint64_t value) INLINE { _E::set32(into, value); }
};
template <typename _E>
class Pointer64
{
public:
typedef uint64_t uint_t;
typedef int64_t sint_t;
typedef _E E;
static uint64_t getP(const uint_t& from) INLINE { return _E::get64(from); }
static void setP(uint_t& into, uint64_t value) INLINE { _E::set64(into, value); }
};
//
// mach-o file header
//
template <typename P> struct macho_header_content {};
template <> struct macho_header_content<Pointer32<BigEndian> > { mach_header fields; };
template <> struct macho_header_content<Pointer64<BigEndian> > { mach_header_64 fields; };
template <> struct macho_header_content<Pointer32<LittleEndian> > { mach_header fields; };
template <> struct macho_header_content<Pointer64<LittleEndian> > { mach_header_64 fields; };
template <typename P>
class macho_header {
public:
uint32_t magic() const INLINE { return E::get32(header.fields.magic); }
void set_magic(uint32_t value) INLINE { E::set32(header.fields.magic, value); }
uint32_t cputype() const INLINE { return E::get32(header.fields.cputype); }
void set_cputype(uint32_t value) INLINE { E::set32((uint32_t&)header.fields.cputype, value); }
uint32_t cpusubtype() const INLINE { return E::get32(header.fields.cpusubtype); }
void set_cpusubtype(uint32_t value) INLINE { E::set32((uint32_t&)header.fields.cpusubtype, value); }
uint32_t filetype() const INLINE { return E::get32(header.fields.filetype); }
void set_filetype(uint32_t value) INLINE { E::set32(header.fields.filetype, value); }
uint32_t ncmds() const INLINE { return E::get32(header.fields.ncmds); }
void set_ncmds(uint32_t value) INLINE { E::set32(header.fields.ncmds, value); }
uint32_t sizeofcmds() const INLINE { return E::get32(header.fields.sizeofcmds); }
void set_sizeofcmds(uint32_t value) INLINE { E::set32(header.fields.sizeofcmds, value); }
uint32_t flags() const INLINE { return E::get32(header.fields.flags); }
void set_flags(uint32_t value) INLINE { E::set32(header.fields.flags, value); }
uint32_t reserved() const INLINE { return E::get32(header.fields.reserved); }
void set_reserved(uint32_t value) INLINE { E::set32(header.fields.reserved, value); }
typedef typename P::E E;
private:
macho_header_content<P> header;
};
//
// mach-o load command
//
template <typename P>
class macho_load_command {
public:
uint32_t cmd() const INLINE { return E::get32(command.cmd); }
void set_cmd(uint32_t value) INLINE { E::set32(command.cmd, value); }
uint32_t cmdsize() const INLINE { return E::get32(command.cmdsize); }
void set_cmdsize(uint32_t value) INLINE { E::set32(command.cmdsize, value); }
typedef typename P::E E;
private:
load_command command;
};
//
// mach-o segment load command
//
template <typename P> struct macho_segment_content {};
template <> struct macho_segment_content<Pointer32<BigEndian> > { segment_command fields; enum { CMD = LC_SEGMENT }; };
template <> struct macho_segment_content<Pointer64<BigEndian> > { segment_command_64 fields; enum { CMD = LC_SEGMENT_64 }; };
template <> struct macho_segment_content<Pointer32<LittleEndian> > { segment_command fields; enum { CMD = LC_SEGMENT }; };
template <> struct macho_segment_content<Pointer64<LittleEndian> > { segment_command_64 fields; enum { CMD = LC_SEGMENT_64 }; };
template <typename P>
class macho_segment_command {
public:
uint32_t cmd() const INLINE { return E::get32(segment.fields.cmd); }
void set_cmd(uint32_t value) INLINE { E::set32(segment.fields.cmd, value); }
uint32_t cmdsize() const INLINE { return E::get32(segment.fields.cmdsize); }
void set_cmdsize(uint32_t value) INLINE { E::set32(segment.fields.cmdsize, value); }
const char* segname() const INLINE { return segment.fields.segname; }
void set_segname(const char* value) INLINE { strncpy(segment.fields.segname, value, 16); }
uint64_t vmaddr() const INLINE { return P::getP(segment.fields.vmaddr); }
void set_vmaddr(uint64_t value) INLINE { P::setP(segment.fields.vmaddr, value); }
uint64_t vmsize() const INLINE { return P::getP(segment.fields.vmsize); }
void set_vmsize(uint64_t value) INLINE { P::setP(segment.fields.vmsize, value); }
uint64_t fileoff() const INLINE { return P::getP(segment.fields.fileoff); }
void set_fileoff(uint64_t value) INLINE { P::setP(segment.fields.fileoff, value); }
uint64_t filesize() const INLINE { return P::getP(segment.fields.filesize); }
void set_filesize(uint64_t value) INLINE { P::setP(segment.fields.filesize, value); }
uint32_t maxprot() const INLINE { return E::get32(segment.fields.maxprot); }
void set_maxprot(uint32_t value) INLINE { E::set32((uint32_t&)segment.fields.maxprot, value); }
uint32_t initprot() const INLINE { return E::get32(segment.fields.initprot); }
void set_initprot(uint32_t value) INLINE { E::set32((uint32_t&)segment.fields.initprot, value); }
uint32_t nsects() const INLINE { return E::get32(segment.fields.nsects); }
void set_nsects(uint32_t value) INLINE { E::set32(segment.fields.nsects, value); }
uint32_t flags() const INLINE { return E::get32(segment.fields.flags); }
void set_flags(uint32_t value) INLINE { E::set32(segment.fields.flags, value); }
enum {
CMD = macho_segment_content<P>::CMD
};
typedef typename P::E E;
private:
macho_segment_content<P> segment;
};
//
// mach-o section
//
template <typename P> struct macho_section_content {};
template <> struct macho_section_content<Pointer32<BigEndian> > { section fields; };
template <> struct macho_section_content<Pointer64<BigEndian> > { section_64 fields; };
template <> struct macho_section_content<Pointer32<LittleEndian> > { section fields; };
template <> struct macho_section_content<Pointer64<LittleEndian> > { section_64 fields; };
template <typename P>
class macho_section {
public:
const char* sectname() const INLINE { return section.fields.sectname; }
void set_sectname(const char* value) INLINE { strncpy(section.fields.sectname, value, 16); }
const char* segname() const INLINE { return section.fields.segname; }
void set_segname(const char* value) INLINE { strncpy(section.fields.segname, value, 16); }
uint64_t addr() const INLINE { return P::getP(section.fields.addr); }
void set_addr(uint64_t value) INLINE { P::setP(section.fields.addr, value); }
uint64_t size() const INLINE { return P::getP(section.fields.size); }
void set_size(uint64_t value) INLINE { P::setP(section.fields.size, value); }
uint32_t offset() const INLINE { return E::get32(section.fields.offset); }
void set_offset(uint32_t value) INLINE { E::set32(section.fields.offset, value); }
uint32_t align() const INLINE { return E::get32(section.fields.align); }
void set_align(uint32_t value) INLINE { E::set32(section.fields.align, value); }
uint32_t reloff() const INLINE { return E::get32(section.fields.reloff); }
void set_reloff(uint32_t value) INLINE { E::set32(section.fields.reloff, value); }
uint32_t nreloc() const INLINE { return E::get32(section.fields.nreloc); }
void set_nreloc(uint32_t value) INLINE { E::set32(section.fields.nreloc, value); }
uint32_t flags() const INLINE { return E::get32(section.fields.flags); }
void set_flags(uint32_t value) INLINE { E::set32(section.fields.flags, value); }
uint32_t reserved1() const INLINE { return E::get32(section.fields.reserved1); }
void set_reserved1(uint32_t value) INLINE { E::set32(section.fields.reserved1, value); }
uint32_t reserved2() const INLINE { return E::get32(section.fields.reserved2); }
void set_reserved2(uint32_t value) INLINE { E::set32(section.fields.reserved2, value); }
typedef typename P::E E;
private:
macho_section_content<P> section;
};
static bool debug = true;
bool processFile(const char *filename);
int main(int argc, const char *argv[]) {
for (int i = 1; i < argc; ++i) {
if (!processFile(argv[i])) return 1;
}
return 0;
}
struct imageinfo {
uint32_t version;
uint32_t flags;
};
// Segment and section names are 16 bytes and may be un-terminated.
bool segnameEquals(const char *lhs, const char *rhs)
{
return 0 == strncmp(lhs, rhs, 16);
}
bool segnameStartsWith(const char *segname, const char *prefix)
{
return 0 == strncmp(segname, prefix, strlen(prefix));
}
bool sectnameEquals(const char *lhs, const char *rhs)
{
return segnameEquals(lhs, rhs);
}
template <typename P>
void dosect(uint8_t *start, macho_section<P> *sect)
{
if (debug) printf("section %.16s from segment %.16s\n",
sect->sectname(), sect->segname());
// Strip S_MOD_INIT/TERM_FUNC_POINTERS. We don't want dyld to call
// our init funcs because it is too late, and we don't want anyone to
// call our term funcs ever.
if (segnameStartsWith(sect->segname(), "__DATA") &&
sectnameEquals(sect->sectname(), "__mod_init_func"))
{
// section type 0 is S_REGULAR
sect->set_flags(sect->flags() & ~SECTION_TYPE);
sect->set_sectname("__objc_init_func");
if (debug) printf("disabled __mod_init_func section\n");
}
if (segnameStartsWith(sect->segname(), "__TEXT") &&
sectnameEquals(sect->sectname(), "__init_offsets"))
{
// section type 0 is S_REGULAR
sect->set_flags(sect->flags() & ~SECTION_TYPE);
sect->set_sectname("__objc_init_offs");
if (debug) printf("disabled __mod_init_func section\n");
}
if (segnameStartsWith(sect->segname(), "__DATA") &&
sectnameEquals(sect->sectname(), "__mod_term_func"))
{
// section type 0 is S_REGULAR
sect->set_flags(sect->flags() & ~SECTION_TYPE);
sect->set_sectname("__objc_term_func");
if (debug) printf("disabled __mod_term_func section\n");
}
}
template <typename P>
void doseg(uint8_t *start, macho_segment_command<P> *seg)
{
if (debug) printf("segment name: %.16s, nsects %u\n",
seg->segname(), seg->nsects());
macho_section<P> *sect = (macho_section<P> *)(seg + 1);
for (uint32_t i = 0; i < seg->nsects(); ++i) {
dosect(start, &sect[i]);
}
}
template<typename P>
bool parse_macho(uint8_t *buffer)
{
macho_header<P>* mh = (macho_header<P>*)buffer;
uint8_t *cmds = (uint8_t *)(mh + 1);
for (uint32_t c = 0; c < mh->ncmds(); c++) {
macho_load_command<P>* cmd = (macho_load_command<P>*)cmds;
cmds += cmd->cmdsize();
if (cmd->cmd() == LC_SEGMENT || cmd->cmd() == LC_SEGMENT_64) {
doseg(buffer, (macho_segment_command<P>*)cmd);
}
}
return true;
}
bool parse_macho(uint8_t *buffer)
{
uint32_t magic = *(uint32_t *)buffer;
switch (magic) {
case MH_MAGIC_64:
return parse_macho<Pointer64<CurrentEndian>>(buffer);
case MH_MAGIC:
return parse_macho<Pointer32<CurrentEndian>>(buffer);
case MH_CIGAM_64:
return parse_macho<Pointer64<OtherEndian>>(buffer);
case MH_CIGAM:
return parse_macho<Pointer32<OtherEndian>>(buffer);
default:
printf("file is not mach-o (magic %x)\n", magic);
return false;
}
}
bool parse_fat(uint8_t *buffer, size_t size)
{
uint32_t magic;
if (size < sizeof(magic)) {
printf("file is too small\n");
return false;
}
magic = *(uint32_t *)buffer;
if (magic != FAT_MAGIC && magic != FAT_CIGAM) {
/* Not a fat file */
return parse_macho(buffer);
} else {
struct fat_header *fh;
uint32_t fat_magic, fat_nfat_arch;
struct fat_arch *archs;
if (size < sizeof(struct fat_header)) {
printf("file is too small\n");
return false;
}
fh = (struct fat_header *)buffer;
fat_magic = OSSwapBigToHostInt32(fh->magic);
fat_nfat_arch = OSSwapBigToHostInt32(fh->nfat_arch);
size_t fat_arch_size;
// fat_nfat_arch * sizeof(struct fat_arch) + sizeof(struct fat_header)
if (os_mul_and_add_overflow(fat_nfat_arch, sizeof(struct fat_arch),
sizeof(struct fat_header), &fat_arch_size))
{
printf("too many fat archs\n");
return false;
}
if (size < fat_arch_size) {
printf("file is too small\n");
return false;
}
archs = (struct fat_arch *)(buffer + sizeof(struct fat_header));
/* Special case hidden CPU_TYPE_ARM64 */
size_t fat_arch_plus_one_size;
if (os_add_overflow(fat_arch_size, sizeof(struct fat_arch),
&fat_arch_plus_one_size))
{
printf("too many fat archs\n");
return false;
}
if (size >= fat_arch_plus_one_size) {
if (fat_nfat_arch > 0
&& OSSwapBigToHostInt32(archs[fat_nfat_arch].cputype) == CPU_TYPE_ARM64) {
fat_nfat_arch++;
}
}
/* End special case hidden CPU_TYPE_ARM64 */
if (debug) printf("%d fat architectures\n",
fat_nfat_arch);
for (uint32_t i = 0; i < fat_nfat_arch; i++) {
uint32_t arch_cputype = OSSwapBigToHostInt32(archs[i].cputype);
uint32_t arch_cpusubtype = OSSwapBigToHostInt32(archs[i].cpusubtype);
uint32_t arch_offset = OSSwapBigToHostInt32(archs[i].offset);
uint32_t arch_size = OSSwapBigToHostInt32(archs[i].size);
if (debug) printf("cputype %d cpusubtype %d\n",
arch_cputype, arch_cpusubtype);
/* Check that slice data is after all fat headers and archs */
if (arch_offset < fat_arch_size) {
printf("file is badly formed\n");
return false;
}
/* Check that the slice ends before the file does */
if (arch_offset > size) {
printf("file is badly formed\n");
return false;
}
if (arch_size > size) {
printf("file is badly formed\n");
return false;
}
if (arch_offset > (size - arch_size)) {
printf("file is badly formed\n");
return false;
}
bool ok = parse_macho(buffer + arch_offset);
if (!ok) return false;
}
return true;
}
}
bool processFile(const char *filename)
{
if (debug) printf("file %s\n", filename);
int fd = open(filename, O_RDWR);
if (fd < 0) {
printf("open %s: %s\n", filename, strerror(errno));
return false;
}
struct stat st;
if (fstat(fd, &st) < 0) {
printf("fstat %s: %s\n", filename, strerror(errno));
return false;
}
void *buffer = mmap(NULL, (size_t)st.st_size, PROT_READ|PROT_WRITE,
MAP_FILE|MAP_SHARED, fd, 0);
if (buffer == MAP_FAILED) {
printf("mmap %s: %s\n", filename, strerror(errno));
return false;
}
bool result = parse_fat((uint8_t *)buffer, (size_t)st.st_size);
munmap(buffer, (size_t)st.st_size);
close(fd);
return result;
}