From 0f7cedaa1e76a0147607665c8066653abc83618f Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Wed, 20 Jul 2016 17:58:07 +0000 Subject: [PATCH] Create thunks before regular relocation scan. We will need to do something like this to support range extension thunks since that process is iterative. Doing this also has the advantage that when doing the regular relocation scan the offset in the output section is known and we can just store that. This reduces the number of times we have to run getOffset and I think will allow a more specialized .eh_frame representation. By itself this is already a performance win. firefox master 7.295045737 patch 7.209466989 0.98826892235 chromium master 4.531254468 patch 4.509221804 0.995137623774 chromium fast master 1.836928973 patch 1.823805241 0.992855612714 the gold plugin master 0.379768791 patch 0.380043405 1.00072310839 clang master 0.642698284 patch 0.642215663 0.999249070657 llvm-as master 0.036665467 patch 0.036456225 0.994293213284 the gold plugin fsds master 0.40395817 patch 0.404384555 1.0010555177 clang fsds master 0.722045545 patch 0.720946135 0.998477367518 llvm-as fsds master 0.03292646 patch 0.032759965 0.994943428477 scylla master 3.427376378 patch 3.368316181 0.98276810292 llvm-svn: 276146 --- lld/ELF/InputSection.cpp | 2 +- lld/ELF/OutputSections.cpp | 2 +- lld/ELF/Relocations.cpp | 89 ++++++++++++++++++++++++-------------- lld/ELF/Relocations.h | 5 ++- lld/ELF/Target.cpp | 2 + lld/ELF/Target.h | 2 + lld/ELF/Writer.cpp | 53 ++++++++++++++++------- 7 files changed, 102 insertions(+), 53 deletions(-) diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index f768dfb7a719..1cb1137217ae 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -338,7 +338,7 @@ void InputSectionBase::relocate(uint8_t *Buf, uint8_t *BufEnd) { const unsigned Bits = sizeof(uintX_t) * 8; for (const Relocation &Rel : Relocations) { - uintX_t Offset = Rel.InputSec->getOffset(Rel.Offset); + uintX_t Offset = Rel.Offset; uint8_t *BufLoc = Buf + Offset; uint32_t Type = Rel.Type; uintX_t A = Rel.Addend; diff --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp index cd5d496c9225..7ff5ba914e80 100644 --- a/lld/ELF/OutputSections.cpp +++ b/lld/ELF/OutputSections.cpp @@ -1278,7 +1278,7 @@ template typename ELFT::uint DynamicReloc::getOffset() const { if (OutputSec) return OutputSec->getVA() + OffsetInSec; - return InputSec->OutSec->getVA() + InputSec->getOffset(OffsetInSec); + return InputSec->OutSec->getVA() + OffsetInSec; } template diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index e5e409dd6c0d..66493302bdde 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -548,9 +548,19 @@ static void scanRelocs(InputSectionBase &C, ArrayRef Rels) { while (PieceI != PieceE && (PieceI->InputOff + PieceI->size() <= RI.r_offset)) ++PieceI; - if (PieceI != PieceE && PieceI->InputOff <= RI.r_offset && - PieceI->OutputOff == (uintX_t)-1) - continue; + + // Compute the offset of this section in the output section. We do it here + // to try to compute it only once. + uintX_t Offset; + if (PieceI != PieceE) { + assert(PieceI->InputOff <= RI.r_offset && "Relocation not in any piece"); + if (PieceI->OutputOff == (uintX_t)-1) + continue; + Offset = PieceI->OutputOff + RI.r_offset - PieceI->InputOff; + assert(Offset == C.getOffset(RI.r_offset)); + } else { + Offset = C.getOffset(RI.r_offset); + } // This relocation does not require got entry, but it is relative to got and // needs it to be created. Here we request for that. @@ -559,8 +569,8 @@ static void scanRelocs(InputSectionBase &C, ArrayRef Rels) { uintX_t Addend = computeAddend(File, Buf, E, RI, Expr, Body); - if (unsigned Processed = handleTlsRelocation( - Type, Body, C, RI.r_offset, Addend, Expr)) { + if (unsigned Processed = + handleTlsRelocation(Type, Body, C, Offset, Addend, Expr)) { I += (Processed - 1); continue; } @@ -581,17 +591,17 @@ static void scanRelocs(InputSectionBase &C, ArrayRef Rels) { // relocation. We can process some of it and and just ask the dynamic // linker to add the load address. if (!Constant) - AddDyn({Target->RelativeRel, &C, RI.r_offset, true, &Body, Addend}); + AddDyn({Target->RelativeRel, &C, Offset, true, &Body, Addend}); // If the produced value is a constant, we just remember to write it // when outputting this section. We also have to do it if the format // uses Elf_Rel, since in that case the written value is the addend. if (Constant || !RelTy::IsRela) - C.Relocations.push_back({Expr, Type, &C, RI.r_offset, Addend, &Body}); + C.Relocations.push_back({Expr, Type, &C, Offset, Addend, &Body}); } else { // We don't know anything about the finaly symbol. Just ask the dynamic // linker to handle the relocation for us. - AddDyn({Target->getDynRel(Type), &C, RI.r_offset, false, &Body, Addend}); + AddDyn({Target->getDynRel(Type), &C, Offset, false, &Body, Addend}); // MIPS ABI turns using of GOT and dynamic relocations inside out. // While regular ABI uses dynamic relocations to fill up GOT entries // MIPS ABI requires dynamic linker to fills up GOT entries using @@ -612,14 +622,6 @@ static void scanRelocs(InputSectionBase &C, ArrayRef Rels) { continue; } - // Some targets might require creation of thunks for relocations. - // Now we support only MIPS which requires LA25 thunk to call PIC - // code from non-PIC one, and ARM which requires interworking. - if (Expr == R_THUNK_ABS || Expr == R_THUNK_PC || Expr == R_THUNK_PLT_PC) { - auto *Sec = cast>(&C); - addThunk(Type, Body, *Sec); - } - // At this point we are done with the relocated position. Some relocations // also require us to create a got or plt entry. @@ -676,19 +678,6 @@ static void scanRelocs(InputSectionBase &C, ArrayRef Rels) { } } -template void scanRelocations(InputSection &C) { - typedef typename ELFT::Shdr Elf_Shdr; - - // Scan all relocations. Each relocation goes through a series - // of tests to determine if it needs special treatment, such as - // creating GOT, PLT, copy relocations, etc. - // Note that relocations for non-alloc sections are directly - // processed by InputSection::relocateNonAlloc. - if (C.getSectionHdr()->sh_flags & SHF_ALLOC) - for (const Elf_Shdr *RelSec : C.RelocSections) - scanRelocations(C, *RelSec); -} - template void scanRelocations(InputSectionBase &S, const typename ELFT::Shdr &RelSec) { @@ -699,10 +688,35 @@ void scanRelocations(InputSectionBase &S, scanRelocs(S, EObj.rels(&RelSec)); } -template void scanRelocations(InputSection &); -template void scanRelocations(InputSection &); -template void scanRelocations(InputSection &); -template void scanRelocations(InputSection &); +template +static void createThunks(InputSectionBase &C, ArrayRef Rels) { + const elf::ObjectFile &File = *C.getFile(); + for (const RelTy &Rel : Rels) { + SymbolBody &Body = File.getRelocTargetSym(Rel); + uint32_t Type = Rel.getType(Config->Mips64EL); + RelExpr Expr = Target->getRelExpr(Type, Body); + if (!isPreemptible(Body, Type) && needsPlt(Expr)) + Expr = fromPlt(Expr); + Expr = Target->getThunkExpr(Expr, Type, File, Body); + // Some targets might require creation of thunks for relocations. + // Now we support only MIPS which requires LA25 thunk to call PIC + // code from non-PIC one, and ARM which requires interworking. + if (Expr == R_THUNK_ABS || Expr == R_THUNK_PC || Expr == R_THUNK_PLT_PC) { + auto *Sec = cast>(&C); + addThunk(Type, Body, *Sec); + } + } +} + +template +void createThunks(InputSectionBase &S, + const typename ELFT::Shdr &RelSec) { + ELFFile &EObj = S.getFile()->getObj(); + if (RelSec.sh_type == SHT_RELA) + createThunks(S, EObj.relas(&RelSec)); + else + createThunks(S, EObj.rels(&RelSec)); +} template void scanRelocations(InputSectionBase &, const ELF32LE::Shdr &); @@ -712,5 +726,14 @@ template void scanRelocations(InputSectionBase &, const ELF64LE::Shdr &); template void scanRelocations(InputSectionBase &, const ELF64BE::Shdr &); + +template void createThunks(InputSectionBase &, + const ELF32LE::Shdr &); +template void createThunks(InputSectionBase &, + const ELF32BE::Shdr &); +template void createThunks(InputSectionBase &, + const ELF64LE::Shdr &); +template void createThunks(InputSectionBase &, + const ELF64BE::Shdr &); } } diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h index 4c1c74efb0da..1710acc994f1 100644 --- a/lld/ELF/Relocations.h +++ b/lld/ELF/Relocations.h @@ -73,11 +73,12 @@ template struct Relocation { SymbolBody *Sym; }; -template void scanRelocations(InputSection &); - template void scanRelocations(InputSectionBase &, const typename ELFT::Shdr &); +template +void createThunks(InputSectionBase &, const typename ELFT::Shdr &); + template static inline typename ELFT::uint getAddend(const typename ELFT::Rel &Rel) { return 0; diff --git a/lld/ELF/Target.cpp b/lld/ELF/Target.cpp index 8c5959cbeee3..4bdde13fa21c 100644 --- a/lld/ELF/Target.cpp +++ b/lld/ELF/Target.cpp @@ -1496,6 +1496,7 @@ ARMTargetInfo::ARMTargetInfo() { PltHeaderSize = 20; // ARM uses Variant 1 TLS TcbSize = 8; + NeedsThunks = true; } RelExpr ARMTargetInfo::getRelExpr(uint32_t Type, const SymbolBody &S) const { @@ -1836,6 +1837,7 @@ template MipsTargetInfo::MipsTargetInfo() { PltHeaderSize = 32; CopyRel = R_MIPS_COPY; PltRel = R_MIPS_JUMP_SLOT; + NeedsThunks = true; if (ELFT::Is64Bits) { RelativeRel = (R_MIPS_64 << 8) | R_MIPS_REL32; TlsGotRel = R_MIPS_TLS_TPREL64; diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h index d335c1e051b7..67bf3a27ff7e 100644 --- a/lld/ELF/Target.h +++ b/lld/ELF/Target.h @@ -92,6 +92,8 @@ public: // Set to 0 for variant 2 unsigned TcbSize = 0; + bool NeedsThunks = false; + virtual RelExpr adjustRelaxExpr(uint32_t Type, const uint8_t *Data, RelExpr Expr) const; virtual void relaxGot(uint8_t *Loc, uint64_t Val) const; diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 5b6c5f664438..a9235c073125 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -49,6 +49,9 @@ private: void copyLocalSymbols(); void addReservedSymbols(); std::vector *> createSections(); + void forEachRelSec( + std::function &, const typename ELFT::Shdr &)> + Fn); void finalizeSections(); void addPredefinedSections(); bool needsGot(); @@ -632,6 +635,34 @@ template static void sortCtorsDtors(OutputSectionBase *S) { reinterpret_cast *>(S)->sortCtorsDtors(); } +template +void Writer::forEachRelSec( + std::function &, const typename ELFT::Shdr &)> + Fn) { + for (const std::unique_ptr> &F : + Symtab.getObjectFiles()) { + for (InputSectionBase *C : F->getSections()) { + if (isDiscarded(C)) + continue; + // Scan all relocations. Each relocation goes through a series + // of tests to determine if it needs special treatment, such as + // creating GOT, PLT, copy relocations, etc. + // Note that relocations for non-alloc sections are directly + // processed by InputSection::relocateNonAlloc. + if (!(C->getSectionHdr()->sh_flags & SHF_ALLOC)) + continue; + if (auto *S = dyn_cast>(C)) { + for (const Elf_Shdr *RelSec : S->RelocSections) + Fn(*S, *RelSec); + continue; + } + if (auto *S = dyn_cast>(C)) + if (S->RelocSection) + Fn(*S, *S->RelocSection); + } + } +} + template std::vector *> Writer::createSections() { std::vector *> Result; @@ -705,26 +736,16 @@ template void Writer::finalizeSections() { Out::EhFrame->finalize(); } - // Scan relocations. This must be done after every symbol is declared so that - // we can correctly decide if a dynamic relocation is needed. - for (const std::unique_ptr> &F : - Symtab.getObjectFiles()) { - for (InputSectionBase *C : F->getSections()) { - if (isDiscarded(C)) - continue; - if (auto *S = dyn_cast>(C)) { - scanRelocations(*S); - continue; - } - if (auto *S = dyn_cast>(C)) - if (S->RelocSection) - scanRelocations(*S, *S->RelocSection); - } - } + if (Target->NeedsThunks) + forEachRelSec(createThunks); for (OutputSectionBase *Sec : OutputSections) Sec->assignOffsets(); + // Scan relocations. This must be done after every symbol is declared so that + // we can correctly decide if a dynamic relocation is needed. + forEachRelSec(scanRelocations); + // Now that we have defined all possible symbols including linker- // synthesized ones. Visit all symbols to give the finishing touches. std::vector CommonSymbols;