[lld-macho] Ensure reads from nlist_64 structs are aligned when necessary

My test refactoring in D80217 seems to have caused yaml2obj to emit
unaligned nlist_64 structs, causing ASAN'd lld to be unhappy. I don't
think this is an issue with yaml2obj though -- llvm-mc also seems to
emit unaligned nlist_64s. This diff makes lld able to safely do aligned
reads under ASAN builds while hopefully creating no overhead for regular
builds on architectures that support unaligned reads.

Reviewed By: thakis

Differential Revision: https://reviews.llvm.org/D80414
This commit is contained in:
Jez Ng 2020-05-21 15:26:35 -07:00
parent f04d1c3b90
commit 1e1a3f67ee
5 changed files with 50 additions and 12 deletions

View File

@ -45,6 +45,7 @@
#include "Config.h"
#include "ExportTrie.h"
#include "InputSection.h"
#include "MachOStructs.h"
#include "OutputSection.h"
#include "SymbolTable.h"
#include "Symbols.h"
@ -211,14 +212,14 @@ void InputFile::parseRelocations(const section_64 &sec,
}
}
void InputFile::parseSymbols(ArrayRef<nlist_64> nList, const char *strtab,
bool subsectionsViaSymbols) {
void InputFile::parseSymbols(ArrayRef<structs::nlist_64> nList,
const char *strtab, bool subsectionsViaSymbols) {
// resize(), not reserve(), because we are going to create N_ALT_ENTRY symbols
// out-of-sequence.
symbols.resize(nList.size());
std::vector<size_t> altEntrySymIdxs;
auto createDefined = [&](const nlist_64 &sym, InputSection *isec,
auto createDefined = [&](const structs::nlist_64 &sym, InputSection *isec,
uint32_t value) -> Symbol * {
StringRef name = strtab + sym.n_strx;
if (sym.n_type & N_EXT)
@ -230,7 +231,7 @@ void InputFile::parseSymbols(ArrayRef<nlist_64> nList, const char *strtab,
};
for (size_t i = 0, n = nList.size(); i < n; ++i) {
const nlist_64 &sym = nList[i];
const structs::nlist_64 &sym = nList[i];
// Undefined symbol
if (!sym.n_sect) {
@ -289,7 +290,7 @@ void InputFile::parseSymbols(ArrayRef<nlist_64> nList, const char *strtab,
}
for (size_t idx : altEntrySymIdxs) {
const nlist_64 &sym = nList[idx];
const structs::nlist_64 &sym = nList[idx];
SubsectionMap &subsecMap = subsections[sym.n_sect - 1];
uint32_t off = sym.n_value - sectionHeaders[sym.n_sect - 1].addr;
InputSection *subsec = findContainingSubsection(subsecMap, &off);
@ -311,8 +312,8 @@ ObjFile::ObjFile(MemoryBufferRef mb) : InputFile(ObjKind, mb) {
// TODO: Error on missing LC_SYMTAB?
if (const load_command *cmd = findCommand(hdr, LC_SYMTAB)) {
auto *c = reinterpret_cast<const symtab_command *>(cmd);
ArrayRef<nlist_64> nList(
reinterpret_cast<const nlist_64 *>(buf + c->symoff), c->nsyms);
ArrayRef<structs::nlist_64> nList(
reinterpret_cast<const structs::nlist_64 *>(buf + c->symoff), c->nsyms);
const char *strtab = reinterpret_cast<const char *>(buf) + c->stroff;
bool subsectionsViaSymbols = hdr->flags & MH_SUBSECTIONS_VIA_SYMBOLS;
parseSymbols(nList, strtab, subsectionsViaSymbols);

View File

@ -9,6 +9,8 @@
#ifndef LLD_MACHO_INPUT_FILES_H
#define LLD_MACHO_INPUT_FILES_H
#include "MachOStructs.h"
#include "lld/Common/LLVM.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/BinaryFormat/MachO.h"
@ -52,7 +54,7 @@ protected:
void parseSections(ArrayRef<llvm::MachO::section_64>);
void parseSymbols(ArrayRef<llvm::MachO::nlist_64> nList, const char *strtab,
void parseSymbols(ArrayRef<lld::structs::nlist_64> nList, const char *strtab,
bool subsectionsViaSymbols);
void parseRelocations(const llvm::MachO::section_64 &, SubsectionMap &);

36
lld/MachO/MachOStructs.h Normal file
View File

@ -0,0 +1,36 @@
//===- MachOStructs.h -------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines structures used in the MachO object file format. Note that
// unlike llvm/BinaryFormat/MachO.h, the structs here are defined in terms of
// endian- and alignment-compatibility wrappers.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_MACHO_MACHO_STRUCTS_H
#define LLD_MACHO_MACHO_STRUCTS_H
#include "llvm/Support/Endian.h"
namespace lld {
namespace structs {
struct nlist_64 {
llvm::support::ulittle32_t n_strx;
uint8_t n_type;
uint8_t n_sect;
llvm::support::ulittle16_t n_desc;
llvm::support::ulittle64_t n_value;
};
} // namespace structs
} // namespace lld
#endif

View File

@ -10,6 +10,7 @@
#include "Config.h"
#include "ExportTrie.h"
#include "InputFiles.h"
#include "MachOStructs.h"
#include "OutputSegment.h"
#include "SymbolTable.h"
#include "Symbols.h"
@ -281,7 +282,7 @@ SymtabSection::SymtabSection(StringTableSection &stringTableSection)
}
size_t SymtabSection::getSize() const {
return symbols.size() * sizeof(nlist_64);
return symbols.size() * sizeof(structs::nlist_64);
}
void SymtabSection::finalizeContents() {
@ -292,7 +293,7 @@ void SymtabSection::finalizeContents() {
}
void SymtabSection::writeTo(uint8_t *buf) const {
auto *nList = reinterpret_cast<nlist_64 *>(buf);
auto *nList = reinterpret_cast<structs::nlist_64 *>(buf);
for (const SymtabEntry &entry : symbols) {
nList->n_strx = entry.strx;
// TODO support other symbol types

View File

@ -414,8 +414,6 @@ void Writer::assignAddresses(OutputSegment *seg) {
for (auto &p : seg->getSections()) {
OutputSection *section = p.second;
addr = alignTo(addr, section->align);
// We must align the file offsets too to avoid misaligned writes of
// structs.
fileOff = alignTo(fileOff, section->align);
section->addr = addr;
section->fileOff = fileOff;