[ELF] Change rawData to content() and data() to contentMaybeDecompress()

Clarify data() which may trigger decompression and make it feasible to refactor
the member variable rawData.
This commit is contained in:
Fangrui Song 2022-11-20 22:43:22 +00:00
parent 30f9eb1eb8
commit 2bf5d86422
17 changed files with 70 additions and 66 deletions

View File

@ -350,7 +350,7 @@ static uint64_t scanCortexA53Errata843419(InputSection *isec, uint64_t &off,
}
uint64_t patchOff = 0;
const uint8_t *buf = isec->rawData.begin();
const uint8_t *buf = isec->content().begin();
const ulittle32_t *instBuf = reinterpret_cast<const ulittle32_t *>(buf + off);
uint32_t instr1 = *instBuf++;
uint32_t instr2 = *instBuf++;
@ -409,7 +409,7 @@ uint64_t Patch843419Section::getLDSTAddr() const {
void Patch843419Section::writeTo(uint8_t *buf) {
// Copy the instruction that we will be replacing with a branch in the
// patchee Section.
write32le(buf, read32le(patchee->rawData.begin() + patcheeOffset));
write32le(buf, read32le(patchee->content().begin() + patcheeOffset));
// Apply any relocation transferred from the original patchee section.
target->relocateAlloc(*this, buf);
@ -591,8 +591,8 @@ AArch64Err843419Patcher::patchInputSectionDescription(
while (codeSym != mapSyms.end()) {
auto dataSym = std::next(codeSym);
uint64_t off = (*codeSym)->value;
uint64_t limit =
(dataSym == mapSyms.end()) ? isec->rawData.size() : (*dataSym)->value;
uint64_t limit = (dataSym == mapSyms.end()) ? isec->content().size()
: (*dataSym)->value;
while (off < limit) {
uint64_t startAddr = isec->getVA(off);

View File

@ -266,7 +266,7 @@ static ScanResult scanCortexA8Errata657417(InputSection *isec, uint64_t &off,
}
ScanResult scanRes = {0, 0, nullptr};
const uint8_t *buf = isec->rawData.begin();
const uint8_t *buf = isec->content().begin();
// ARMv7-A Thumb 32-bit instructions are encoded 2 consecutive
// little-endian halfwords.
const ulittle16_t *instBuf = reinterpret_cast<const ulittle16_t *>(buf + off);
@ -498,8 +498,8 @@ ARMErr657417Patcher::patchInputSectionDescription(
while (thumbSym != mapSyms.end()) {
auto nonThumbSym = std::next(thumbSym);
uint64_t off = (*thumbSym)->value;
uint64_t limit = (nonThumbSym == mapSyms.end()) ? isec->rawData.size()
: (*nonThumbSym)->value;
uint64_t limit = nonThumbSym == mapSyms.end() ? isec->content().size()
: (*nonThumbSym)->value;
while (off < limit) {
ScanResult sr = scanCortexA8Errata657417(isec, off, limit);

View File

@ -1561,7 +1561,7 @@ void PPC64::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
// recursive calls even if the function is preemptible. This is not
// wrong in the common case where the function is not preempted at
// runtime. Just ignore.
if ((rel.offset + 8 > sec.rawData.size() ||
if ((rel.offset + 8 > sec.content().size() ||
read32(loc + 4) != 0x60000000) &&
rel.sym->file != sec.file) {
// Use substr(6) to remove the "__plt_" prefix.

View File

@ -556,7 +556,7 @@ static void relaxCall(const InputSection &sec, size_t i, uint64_t loc,
Relocation &r, uint32_t &remove) {
const bool rvc = config->eflags & EF_RISCV_RVC;
const Symbol &sym = *r.sym;
const uint64_t insnPair = read64le(sec.rawData.data() + r.offset);
const uint64_t insnPair = read64le(sec.content().data() + r.offset);
const uint32_t rd = extractBits(insnPair, 32 + 11, 32 + 7);
const uint64_t dest =
(r.expr == R_PLT_PC ? sym.getPltVA() : sym.getVA()) + r.addend;
@ -584,7 +584,7 @@ static void relaxTlsLe(const InputSection &sec, size_t i, uint64_t loc,
uint64_t val = r.sym->getVA(r.addend);
if (hi20(val) != 0)
return;
uint32_t insn = read32le(sec.rawData.data() + r.offset);
uint32_t insn = read32le(sec.content().data() + r.offset);
switch (r.type) {
case R_RISCV_TPREL_HI20:
case R_RISCV_TPREL_ADD:
@ -728,7 +728,7 @@ void elf::riscvFinalizeRelax(int passes) {
continue;
auto &rels = sec->relocations;
ArrayRef<uint8_t> old = sec->rawData;
ArrayRef<uint8_t> old = sec->content();
size_t newSize =
old.size() - aux.relocDeltas[sec->relocations.size() - 1];
size_t writesIdx = 0;

View File

@ -253,7 +253,7 @@ bool X86_64::deleteFallThruJmpInsn(InputSection &is, InputFile *file,
Relocation &r = is.relocations[rIndex];
// Check if the relocation corresponds to a direct jmp.
const uint8_t *secContents = is.rawData.data();
const uint8_t *secContents = is.content().data();
// If it is not a direct jmp instruction, there is nothing to do here.
if (*(secContents + r.offset - 1) != 0xe9)
return false;

View File

@ -44,17 +44,17 @@ template <class ELFT> LLDDwarfObj<ELFT>::LLDDwarfObj(ObjFile<ELFT> *obj) {
.Case(".debug_str_offsets", &strOffsetsSection)
.Case(".debug_line", &lineSection)
.Default(nullptr)) {
m->Data = toStringRef(sec->data());
m->Data = toStringRef(sec->contentMaybeDecompress());
m->sec = sec;
continue;
}
if (sec->name == ".debug_abbrev")
abbrevSection = toStringRef(sec->data());
abbrevSection = toStringRef(sec->contentMaybeDecompress());
else if (sec->name == ".debug_str")
strSection = toStringRef(sec->data());
strSection = toStringRef(sec->contentMaybeDecompress());
else if (sec->name == ".debug_line_str")
lineStrSection = toStringRef(sec->data());
lineStrSection = toStringRef(sec->contentMaybeDecompress());
else if (sec->name == ".debug_info" &&
!(objSections[i].sh_flags & ELF::SHF_GROUP)) {
// In DWARF v5, -fdebug-types-section places type units in .debug_info
@ -66,7 +66,7 @@ template <class ELFT> LLDDwarfObj<ELFT>::LLDDwarfObj(ObjFile<ELFT> *obj) {
// need to perform a lightweight parsing. We drop the SHF_GROUP flag when
// the InputSection was created, so we need to retrieve sh_flags from the
// associated ELF section header.
infoSection.Data = toStringRef(sec->data());
infoSection.Data = toStringRef(sec->contentMaybeDecompress());
infoSection.sec = sec;
}
}

View File

@ -2123,7 +2123,7 @@ static void readSymbolPartitionSection(InputSectionBase *s) {
if (!isa<Defined>(sym) || !sym->includeInDynsym())
return;
StringRef partName = reinterpret_cast<const char *>(s->rawData.data());
StringRef partName = reinterpret_cast<const char *>(s->content().data());
for (Partition &part : partitions) {
if (part.name == partName) {
sym->partition = part.getNumber();

View File

@ -42,7 +42,7 @@ public:
private:
template <class P> void failOn(const P *loc, const Twine &msg) {
fatal("corrupted .eh_frame: " + msg + "\n>>> defined in " +
isec->getObjMsg((const uint8_t *)loc - isec->rawData.data()));
isec->getObjMsg((const uint8_t *)loc - isec->content().data()));
}
uint8_t readByte();

View File

@ -313,7 +313,7 @@ bool ICF<ELFT>::constantEq(const InputSection *secA, ArrayRef<RelTy> ra,
template <class ELFT>
bool ICF<ELFT>::equalsConstant(const InputSection *a, const InputSection *b) {
if (a->flags != b->flags || a->getSize() != b->getSize() ||
a->rawData != b->rawData)
a->content() != b->content())
return false;
// If two sections have different output sections, we cannot merge them.
@ -492,7 +492,7 @@ template <class ELFT> void ICF<ELFT>::run() {
// Initially, we use hash values to partition sections.
parallelForEach(sections, [&](InputSection *s) {
// Set MSB to 1 to avoid collisions with unique IDs.
s->eqClass[0] = xxHash64(s->rawData) | (1U << 31);
s->eqClass[0] = xxHash64(s->content()) | (1U << 31);
});
// Perform 2 rounds of relocation hash propagation. 2 is an empirical value to

View File

@ -809,8 +809,9 @@ void ObjFile<ELFT>::initializeSections(bool ignoreComdats,
// simply handle such sections as non-mergeable ones. Degrading like this
// is acceptable because section merging is optional.
if (auto *ms = dyn_cast<MergeInputSection>(s)) {
s = makeThreadLocal<InputSection>(ms->file, ms->flags, ms->type,
ms->alignment, ms->data(), ms->name);
s = makeThreadLocal<InputSection>(
ms->file, ms->flags, ms->type, ms->alignment,
ms->contentMaybeDecompress(), ms->name);
sections[info] = s;
}
@ -877,10 +878,10 @@ template <class ELFT> static uint32_t readAndFeatures(const InputSection &sec) {
using Elf_Note = typename ELFT::Note;
uint32_t featuresSet = 0;
ArrayRef<uint8_t> data = sec.rawData;
ArrayRef<uint8_t> data = sec.content();
auto reportFatal = [&](const uint8_t *place, const char *msg) {
fatal(toString(sec.file) + ":(" + sec.name + "+0x" +
Twine::utohexstr(place - sec.rawData.data()) + "): " + msg);
Twine::utohexstr(place - sec.content().data()) + "): " + msg);
};
while (!data.empty()) {
// Read one NOTE record.

View File

@ -59,7 +59,7 @@ InputSectionBase::InputSectionBase(InputFile *file, uint64_t flags,
// In order to reduce memory allocation, we assume that mergeable
// sections are smaller than 4 GiB, which is not an unreasonable
// assumption as of 2017.
if (sectionKind == SectionBase::Merge && rawData.size() > UINT32_MAX)
if (sectionKind == SectionBase::Merge && content().size() > UINT32_MAX)
error(toString(this) + ": section too large");
// The ELF spec states that a value of 0 means the section has
@ -105,14 +105,15 @@ size_t InputSectionBase::getSize() const {
return s->getSize();
if (compressed)
return size;
return rawData.size() - bytesDropped;
return content().size() - bytesDropped;
}
template <class ELFT>
static void decompressAux(const InputSectionBase &sec, uint8_t *out,
size_t size) {
auto *hdr = reinterpret_cast<const typename ELFT::Chdr *>(sec.rawData.data());
auto compressed = sec.rawData.slice(sizeof(typename ELFT::Chdr));
auto *hdr =
reinterpret_cast<const typename ELFT::Chdr *>(sec.content().data());
auto compressed = sec.content().slice(sizeof(typename ELFT::Chdr));
if (Error e = hdr->ch_type == ELFCOMPRESS_ZLIB
? compression::zlib::decompress(compressed, out, size)
: compression::zstd::decompress(compressed, out, size))
@ -170,7 +171,7 @@ uint64_t SectionBase::getOffset(uint64_t offset) const {
// Second, InputSection::copyRelocations on .eh_frame. Some pieces may be
// discarded due to GC/ICF. We should compute the output section offset.
const EhInputSection *es = cast<EhInputSection>(this);
if (!es->rawData.empty())
if (!es->content().empty())
if (InputSection *isec = es->getParent())
return isec->outSecOff + es->getParentOffset(offset);
return offset;
@ -209,12 +210,12 @@ template <typename ELFT> void InputSectionBase::parseCompressedHeader() {
flags &= ~(uint64_t)SHF_COMPRESSED;
// New-style header
if (rawData.size() < sizeof(typename ELFT::Chdr)) {
if (content().size() < sizeof(typename ELFT::Chdr)) {
error(toString(this) + ": corrupted compressed section");
return;
}
auto *hdr = reinterpret_cast<const typename ELFT::Chdr *>(rawData.data());
auto *hdr = reinterpret_cast<const typename ELFT::Chdr *>(content().data());
if (hdr->ch_type == ELFCOMPRESS_ZLIB) {
if (!compression::zlib::isAvailable())
error(toString(this) + " is compressed with ELFCOMPRESS_ZLIB, but lld is "
@ -356,7 +357,7 @@ template <class ELFT, class RelTy>
void InputSection::copyRelocations(uint8_t *buf, ArrayRef<RelTy> rels) {
const TargetInfo &target = *elf::target;
InputSectionBase *sec = getRelocatedSection();
(void)sec->data(); // uncompress if needed
(void)sec->contentMaybeDecompress(); // uncompress if needed
for (const RelTy &rel : rels) {
RelType type = rel.getType(config->isMips64EL);
@ -409,7 +410,7 @@ void InputSection::copyRelocations(uint8_t *buf, ArrayRef<RelTy> rels) {
}
int64_t addend = getAddend<ELFT>(rel);
const uint8_t *bufLoc = sec->rawData.begin() + rel.r_offset;
const uint8_t *bufLoc = sec->content().begin() + rel.r_offset;
if (!RelTy::IsRela)
addend = target.getImplicitAddend(bufLoc, type);
@ -1103,8 +1104,8 @@ template <class ELFT> void InputSection::writeTo(uint8_t *buf) {
// If this is a compressed section, uncompress section contents directly
// to the buffer.
if (compressed) {
auto *hdr = reinterpret_cast<const typename ELFT::Chdr *>(rawData.data());
auto compressed = rawData.slice(sizeof(typename ELFT::Chdr));
auto *hdr = reinterpret_cast<const typename ELFT::Chdr *>(content().data());
auto compressed = content().slice(sizeof(typename ELFT::Chdr));
size_t size = this->size;
if (Error e = hdr->ch_type == ELFCOMPRESS_ZLIB
? compression::zlib::decompress(compressed, buf, size)
@ -1118,8 +1119,8 @@ template <class ELFT> void InputSection::writeTo(uint8_t *buf) {
// Copy section contents from source object file to output file
// and then apply relocations.
memcpy(buf, rawData.data(), rawData.size());
relocate<ELFT>(buf, buf + rawData.size());
memcpy(buf, content().data(), content().size());
relocate<ELFT>(buf, buf + content().size());
}
void InputSection::replace(InputSection *other) {
@ -1166,7 +1167,7 @@ template <class ELFT> void EhInputSection::split() {
template <class ELFT, class RelTy>
void EhInputSection::split(ArrayRef<RelTy> rels) {
ArrayRef<uint8_t> d = rawData;
ArrayRef<uint8_t> d = content();
const char *msg = nullptr;
unsigned relI = 0;
while (!d.empty()) {
@ -1190,7 +1191,7 @@ void EhInputSection::split(ArrayRef<RelTy> rels) {
// Find the first relocation that points to [off,off+size). Relocations
// have been sorted by r_offset.
const uint64_t off = d.data() - rawData.data();
const uint64_t off = d.data() - content().data();
while (relI != rels.size() && rels[relI].r_offset < off)
++relI;
unsigned firstRel = -1;
@ -1201,7 +1202,7 @@ void EhInputSection::split(ArrayRef<RelTy> rels) {
}
if (msg)
errorOrWarn("corrupted .eh_frame: " + Twine(msg) + "\n>>> defined in " +
getObjMsg(d.data() - rawData.data()));
getObjMsg(d.data() - content().data()));
}
// Return the offset in an output section for a given input offset.
@ -1286,13 +1287,13 @@ void MergeInputSection::splitIntoPieces() {
assert(pieces.empty());
if (flags & SHF_STRINGS)
splitStrings(toStringRef(data()), entsize);
splitStrings(toStringRef(contentMaybeDecompress()), entsize);
else
splitNonStrings(data(), entsize);
splitNonStrings(contentMaybeDecompress(), entsize);
}
SectionPiece &MergeInputSection::getSectionPiece(uint64_t offset) {
if (rawData.size() <= offset)
if (content().size() <= offset)
fatal(toString(this) + ": offset is outside the section");
return partition_point(
pieces, [=](SectionPiece p) { return p.inputOff <= offset; })[-1];

View File

@ -164,7 +164,8 @@ public:
}
}
ArrayRef<uint8_t> data() const {
ArrayRef<uint8_t> content() const { return rawData; }
ArrayRef<uint8_t> contentMaybeDecompress() const {
if (compressed)
decompress();
return rawData;
@ -228,9 +229,9 @@ public:
template <typename T> llvm::ArrayRef<T> getDataAs() const {
size_t s = rawData.size();
size_t s = content().size();
assert(s % sizeof(T) == 0);
return llvm::makeArrayRef<T>((const T *)rawData.data(), s / sizeof(T));
return llvm::makeArrayRef<T>((const T *)content().data(), s / sizeof(T));
}
protected:
@ -288,8 +289,8 @@ public:
llvm::CachedHashStringRef getData(size_t i) const {
size_t begin = pieces[i].inputOff;
size_t end =
(pieces.size() - 1 == i) ? rawData.size() : pieces[i + 1].inputOff;
return {toStringRef(rawData.slice(begin, end - begin)), pieces[i].hash};
(pieces.size() - 1 == i) ? content().size() : pieces[i + 1].inputOff;
return {toStringRef(content().slice(begin, end - begin)), pieces[i].hash};
}
// Returns the SectionPiece at a given input section offset.
@ -313,7 +314,7 @@ struct EhSectionPiece {
: inputOff(off), sec(sec), size(size), firstRelocation(firstRelocation) {}
ArrayRef<uint8_t> data() const {
return {sec->rawData.data() + this->inputOff, size};
return {sec->content().data() + this->inputOff, size};
}
size_t inputOff;

View File

@ -75,7 +75,7 @@ private:
template <class ELFT>
static uint64_t getAddend(InputSectionBase &sec,
const typename ELFT::Rel &rel) {
return target->getImplicitAddend(sec.rawData.begin() + rel.r_offset,
return target->getImplicitAddend(sec.content().begin() + rel.r_offset,
rel.getType(config->isMips64EL));
}

View File

@ -480,7 +480,7 @@ int64_t RelocationScanner::computeMipsAddend(const RelTy &rel, RelExpr expr,
if (pairTy == R_MIPS_NONE)
return 0;
const uint8_t *buf = sec->rawData.data();
const uint8_t *buf = sec->content().data();
uint32_t symIndex = rel.getSymbol(config->isMips64EL);
// To make things worse, paired relocations might not be contiguous in
@ -1027,7 +1027,7 @@ void RelocationScanner::processAux(RelExpr expr, RelType type, uint64_t offset,
expr = fromPlt(expr);
} else if (!isAbsoluteValue(sym)) {
expr =
target->adjustGotPcExpr(type, addend, sec->rawData.data() + offset);
target->adjustGotPcExpr(type, addend, sec->content().data() + offset);
}
}
@ -1348,11 +1348,11 @@ template <class ELFT, class RelTy> void RelocationScanner::scanOne(RelTy *&i) {
if (offset == uint64_t(-1))
return;
RelExpr expr = target->getRelExpr(type, sym, sec->rawData.data() + offset);
int64_t addend =
RelTy::IsRela
? getAddend<ELFT>(rel)
: target->getImplicitAddend(sec->rawData.data() + rel.r_offset, type);
RelExpr expr = target->getRelExpr(type, sym, sec->content().data() + offset);
int64_t addend = RelTy::IsRela
? getAddend<ELFT>(rel)
: target->getImplicitAddend(
sec->content().data() + rel.r_offset, type);
if (LLVM_UNLIKELY(config->emachine == EM_MIPS))
addend += computeMipsAddend<ELFT>(rel, expr, sym.isLocal());
else if (config->emachine == EM_PPC64 && config->isPic && type == R_PPC64_TOC)

View File

@ -114,7 +114,7 @@ std::unique_ptr<MipsAbiFlagsSection<ELFT>> MipsAbiFlagsSection<ELFT>::create() {
create = true;
std::string filename = toString(sec->file);
const size_t size = sec->rawData.size();
const size_t size = sec->content().size();
// Older version of BFD (such as the default FreeBSD linker) concatenate
// .MIPS.abiflags instead of merging. To allow for this case (or potential
// zero padding) we ignore everything after the first Elf_Mips_ABIFlags
@ -123,7 +123,8 @@ std::unique_ptr<MipsAbiFlagsSection<ELFT>> MipsAbiFlagsSection<ELFT>::create() {
Twine(size) + " instead of " + Twine(sizeof(Elf_Mips_ABIFlags)));
return nullptr;
}
auto *s = reinterpret_cast<const Elf_Mips_ABIFlags *>(sec->rawData.data());
auto *s =
reinterpret_cast<const Elf_Mips_ABIFlags *>(sec->content().data());
if (s->version != 0) {
error(filename + ": unexpected .MIPS.abiflags version " +
Twine(s->version));
@ -186,7 +187,7 @@ std::unique_ptr<MipsOptionsSection<ELFT>> MipsOptionsSection<ELFT>::create() {
sec->markDead();
std::string filename = toString(sec->file);
ArrayRef<uint8_t> d = sec->rawData;
ArrayRef<uint8_t> d = sec->content();
while (!d.empty()) {
if (d.size() < sizeof(Elf_Mips_Options)) {
@ -242,12 +243,12 @@ std::unique_ptr<MipsReginfoSection<ELFT>> MipsReginfoSection<ELFT>::create() {
for (InputSectionBase *sec : sections) {
sec->markDead();
if (sec->rawData.size() != sizeof(Elf_Mips_RegInfo)) {
if (sec->content().size() != sizeof(Elf_Mips_RegInfo)) {
error(toString(sec->file) + ": invalid size of .reginfo section");
return nullptr;
}
auto *r = reinterpret_cast<const Elf_Mips_RegInfo *>(sec->rawData.data());
auto *r = reinterpret_cast<const Elf_Mips_RegInfo *>(sec->content().data());
reginfo.ri_gprmask |= r->ri_gprmask;
sec->getFile<ELFT>()->mipsGp0 = r->ri_gp_value;
};
@ -3524,7 +3525,7 @@ void ARMExidxSyntheticSection::writeTo(uint8_t *buf) {
for (InputSection *isec : executableSections) {
assert(isec->getParent() != nullptr);
if (InputSection *d = findExidxSection(isec)) {
memcpy(buf + offset, d->rawData.data(), d->rawData.size());
memcpy(buf + offset, d->content().data(), d->content().size());
target->relocateAlloc(*d, buf + d->outSecOff);
offset += d->getSize();
} else {

View File

@ -101,7 +101,7 @@ ErrorPlace elf::getErrorPlace(const uint8_t *loc) {
const uint8_t *isecLoc =
Out::bufferStart
? (Out::bufferStart + isec->getParent()->offset + isec->outSecOff)
: isec->data().data();
: isec->contentMaybeDecompress().data();
if (isecLoc == nullptr) {
assert(isa<SyntheticSection>(isec) && "No data but not synthetic?");
continue;

View File

@ -1711,7 +1711,7 @@ static void fixSymbolsAfterShrinking() {
if (!inputSec || !inputSec->bytesDropped)
return;
const size_t OldSize = inputSec->rawData.size();
const size_t OldSize = inputSec->content().size();
const size_t NewSize = OldSize - inputSec->bytesDropped;
if (def->value > NewSize && def->value <= OldSize) {