From e35929e02664090667b0f9b8f4633fa005636f68 Mon Sep 17 00:00:00 2001 From: Peter Smith Date: Fri, 26 Feb 2021 13:14:21 +0000 Subject: [PATCH] [LLD][ELF][ARM] Refactor inBranchRange to use addend for PC Bias In AArch32 ARM, the PC reads two instructions ahead of the currently executiing instruction. This evaluates to 8 in ARM state and 4 in Thumb state. Branch instructions on AArch32 compensate for this by subtracting the PC bias from the addend. For a branch to symbol this will result in an addend of -8 in ARM state and -4 in Thumb state. The existing ARM Target::inBranchRange function accounted for this implict addend within the function meaning that if the addend were to be taken into account by the caller then it would be double counted. This complicates the interface for all Targets as callers wanting to account for addends had to account for the ARM PC-bias. In certain situations such as: https://github.com/ClangBuiltLinux/linux/issues/1305 the PC-bias compensation code didn't match up. In particular normalizeExistingThunk() didn't put the PC-bias back in as Arm thunks did not store the addend. The simplest fix for the problem is to add the PC bias in normalizeExistingThunk when restoring the addend. However I think it is worth refactoring the Arm inBranchRange implementation so that fewer calls to getPCBias are needed for other Targets. I wasn't able to remove getPCBias completely but hopefully the Relocations.cpp code is simpler now. In principle a test could be written to replicate the linux kernel build failure but I wasn't able to reproduce with a small example that I could build up from scratch. Fixes https://github.com/ClangBuiltLinux/linux/issues/1305 Differential Revision: https://reviews.llvm.org/D97550 --- lld/ELF/Arch/ARM.cpp | 55 ++++++++------------- lld/ELF/Relocations.cpp | 37 ++++++++------ lld/ELF/Relocations.h | 4 +- lld/ELF/Thunks.cpp | 53 ++++++++++---------- lld/test/ELF/arm-thunk-arm-thumb-reuse.s | 61 ++++++++++++++++++++++++ 5 files changed, 134 insertions(+), 76 deletions(-) create mode 100644 lld/test/ELF/arm-thunk-arm-thumb-reuse.s diff --git a/lld/ELF/Arch/ARM.cpp b/lld/ELF/Arch/ARM.cpp index e90e7efd34bd..13a7001e2296 100644 --- a/lld/ELF/Arch/ARM.cpp +++ b/lld/ELF/Arch/ARM.cpp @@ -279,7 +279,7 @@ void ARM::addPltSymbols(InputSection &isec, uint64_t off) const { bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file, uint64_t branchAddr, const Symbol &s, - int64_t /*a*/) const { + int64_t a) const { // If S is an undefined weak symbol and does not have a PLT entry then it // will be resolved as a branch to the next instruction. if (s.isUndefWeak() && !s.isInPlt()) @@ -298,7 +298,7 @@ bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file, LLVM_FALLTHROUGH; case R_ARM_CALL: { uint64_t dst = (expr == R_PLT_PC) ? s.getPltVA() : s.getVA(); - return !inBranchRange(type, branchAddr, dst); + return !inBranchRange(type, branchAddr, dst + a); } case R_ARM_THM_JUMP19: case R_ARM_THM_JUMP24: @@ -309,7 +309,7 @@ bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file, LLVM_FALLTHROUGH; case R_ARM_THM_CALL: { uint64_t dst = (expr == R_PLT_PC) ? s.getPltVA() : s.getVA(); - return !inBranchRange(type, branchAddr, dst); + return !inBranchRange(type, branchAddr, dst + a); } } return false; @@ -350,46 +350,31 @@ uint32_t ARM::getThunkSectionSpacing() const { } bool ARM::inBranchRange(RelType type, uint64_t src, uint64_t dst) const { - uint64_t range; - uint64_t instrSize; - - switch (type) { - case R_ARM_PC24: - case R_ARM_PLT32: - case R_ARM_JUMP24: - case R_ARM_CALL: - range = 0x2000000; - instrSize = 4; - break; - case R_ARM_THM_JUMP19: - range = 0x100000; - instrSize = 2; - break; - case R_ARM_THM_JUMP24: - case R_ARM_THM_CALL: - range = config->armJ1J2BranchEncoding ? 0x1000000 : 0x400000; - instrSize = 2; - break; - default: - return true; - } - // PC at Src is 2 instructions ahead, immediate of branch is signed - if (src > dst) - range -= 2 * instrSize; - else - range += instrSize; - if ((dst & 0x1) == 0) // Destination is ARM, if ARM caller then Src is already 4-byte aligned. // If Thumb Caller (BLX) the Src address has bottom 2 bits cleared to ensure // destination will be 4 byte aligned. src &= ~0x3; else - // Bit 0 == 1 denotes Thumb state, it is not part of the range + // Bit 0 == 1 denotes Thumb state, it is not part of the range. dst &= ~0x1; - uint64_t distance = (src > dst) ? src - dst : dst - src; - return distance <= range; + int64_t offset = dst - src; + switch (type) { + case R_ARM_PC24: + case R_ARM_PLT32: + case R_ARM_JUMP24: + case R_ARM_CALL: + return llvm::isInt<26>(offset); + case R_ARM_THM_JUMP19: + return llvm::isInt<21>(offset); + case R_ARM_THM_JUMP24: + case R_ARM_THM_CALL: + return config->armJ1J2BranchEncoding ? llvm::isInt<25>(offset) + : llvm::isInt<23>(offset); + default: + return true; + } } // Helper to produce message text when LLD detects that a CALL relocation to diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index b4847dace4f7..8e4af09cfe02 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -1763,14 +1763,17 @@ void ThunkCreator::mergeThunks(ArrayRef outputSections) { // Find or create a ThunkSection within the InputSectionDescription (ISD) that // is in range of Src. An ISD maps to a range of InputSections described by a // linker script section pattern such as { .text .text.* }. -ThunkSection *ThunkCreator::getISDThunkSec(OutputSection *os, InputSection *isec, +ThunkSection *ThunkCreator::getISDThunkSec(OutputSection *os, + InputSection *isec, InputSectionDescription *isd, - uint32_t type, uint64_t src) { + const Relocation &rel, + uint64_t src) { for (std::pair tp : isd->thunkSections) { ThunkSection *ts = tp.first; - uint64_t tsBase = os->addr + ts->outSecOff; - uint64_t tsLimit = tsBase + ts->getSize(); - if (target->inBranchRange(type, src, (src > tsLimit) ? tsBase : tsLimit)) + uint64_t tsBase = os->addr + ts->outSecOff + rel.addend; + uint64_t tsLimit = tsBase + ts->getSize() + rel.addend; + if (target->inBranchRange(rel.type, src, + (src > tsLimit) ? tsBase : tsLimit)) return ts; } @@ -1780,9 +1783,11 @@ ThunkSection *ThunkCreator::getISDThunkSec(OutputSection *os, InputSection *isec // possible. Error if InputSection is so large we cannot place ThunkSection // anywhere in Range. uint64_t thunkSecOff = isec->outSecOff; - if (!target->inBranchRange(type, src, os->addr + thunkSecOff)) { + if (!target->inBranchRange(rel.type, src, + os->addr + thunkSecOff + rel.addend)) { thunkSecOff = isec->outSecOff + isec->getSize(); - if (!target->inBranchRange(type, src, os->addr + thunkSecOff)) + if (!target->inBranchRange(rel.type, src, + os->addr + thunkSecOff + rel.addend)) fatal("InputSection too large for range extension thunk " + isec->getObjMsg(src - (os->addr + isec->outSecOff))); } @@ -1933,7 +1938,11 @@ static int64_t getPCBias(RelType type) { std::pair ThunkCreator::getThunk(InputSection *isec, Relocation &rel, uint64_t src) { std::vector *thunkVec = nullptr; - int64_t addend = rel.addend + getPCBias(rel.type); + // Arm and Thumb have a PC Bias of 8 and 4 respectively, this is cancelled + // out in the relocation addend. We compensate for the PC bias so that + // an Arm and Thumb relocation to the same destination get the same keyAddend, + // which is usually 0. + int64_t keyAddend = rel.addend + getPCBias(rel.type); // We use a ((section, offset), addend) pair to find the thunk position if // possible so that we create only one thunk for aliased symbols or ICFed @@ -1943,17 +1952,16 @@ std::pair ThunkCreator::getThunk(InputSection *isec, if (auto *d = dyn_cast(rel.sym)) if (!d->isInPlt() && d->section) thunkVec = &thunkedSymbolsBySectionAndAddend[{ - {d->section->repl, d->value}, addend}]; + {d->section->repl, d->value}, keyAddend}]; if (!thunkVec) - thunkVec = &thunkedSymbols[{rel.sym, addend}]; + thunkVec = &thunkedSymbols[{rel.sym, keyAddend}]; // Check existing Thunks for Sym to see if they can be reused for (Thunk *t : *thunkVec) if (isThunkSectionCompatible(isec, t->getThunkTargetSym()->section) && t->isCompatibleWith(*isec, rel) && target->inBranchRange(rel.type, src, - t->getThunkTargetSym()->getVA(rel.addend) + - getPCBias(rel.type))) + t->getThunkTargetSym()->getVA(rel.addend))) return std::make_pair(t, false); // No existing compatible Thunk in range, create a new one @@ -1968,8 +1976,7 @@ std::pair ThunkCreator::getThunk(InputSection *isec, // relocation back to its original non-Thunk target. bool ThunkCreator::normalizeExistingThunk(Relocation &rel, uint64_t src) { if (Thunk *t = thunks.lookup(rel.sym)) { - if (target->inBranchRange(rel.type, src, - rel.sym->getVA(rel.addend) + getPCBias(rel.type))) + if (target->inBranchRange(rel.type, src, rel.sym->getVA(rel.addend))) return true; rel.sym = &t->destination; rel.addend = t->addend; @@ -2041,7 +2048,7 @@ bool ThunkCreator::createThunks(ArrayRef outputSections) { if (auto *tis = t->getTargetInputSection()) ts = getISThunkSec(tis); else - ts = getISDThunkSec(os, isec, isd, rel.type, src); + ts = getISDThunkSec(os, isec, isd, rel, src); ts->addThunk(t); thunks[t->getThunkTargetSym()] = t; } diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h index 2c7d790d154c..dd0c9c385d12 100644 --- a/lld/ELF/Relocations.h +++ b/lld/ELF/Relocations.h @@ -148,8 +148,8 @@ private: void mergeThunks(ArrayRef outputSections); ThunkSection *getISDThunkSec(OutputSection *os, InputSection *isec, - InputSectionDescription *isd, uint32_t type, - uint64_t src); + InputSectionDescription *isd, + const Relocation &rel, uint64_t src); ThunkSection *getISThunkSec(InputSection *isec); diff --git a/lld/ELF/Thunks.cpp b/lld/ELF/Thunks.cpp index 6ac895051389..1fb3abaa60f5 100644 --- a/lld/ELF/Thunks.cpp +++ b/lld/ELF/Thunks.cpp @@ -72,7 +72,7 @@ public: // if the target is in range, otherwise it creates a long thunk. class ARMThunk : public Thunk { public: - ARMThunk(Symbol &dest) : Thunk(dest, 0) {} + ARMThunk(Symbol &dest, int64_t addend) : Thunk(dest, addend) {} bool getMayUseShortThunk(); uint32_t size() override { return getMayUseShortThunk() ? 4 : sizeLong(); } @@ -102,7 +102,9 @@ private: // which has a range of 16MB. class ThumbThunk : public Thunk { public: - ThumbThunk(Symbol &dest) : Thunk(dest, 0) { alignment = 2; } + ThumbThunk(Symbol &dest, int64_t addend) : Thunk(dest, addend) { + alignment = 2; + } bool getMayUseShortThunk(); uint32_t size() override { return getMayUseShortThunk() ? 4 : sizeLong(); } @@ -125,7 +127,7 @@ private: // Source State, TargetState, Target Requirement, ABS or PI, Range class ARMV7ABSLongThunk final : public ARMThunk { public: - ARMV7ABSLongThunk(Symbol &dest) : ARMThunk(dest) {} + ARMV7ABSLongThunk(Symbol &dest, int64_t addend) : ARMThunk(dest, addend) {} uint32_t sizeLong() override { return 12; } void writeLong(uint8_t *buf) override; @@ -134,7 +136,7 @@ public: class ARMV7PILongThunk final : public ARMThunk { public: - ARMV7PILongThunk(Symbol &dest) : ARMThunk(dest) {} + ARMV7PILongThunk(Symbol &dest, int64_t addend) : ARMThunk(dest, addend) {} uint32_t sizeLong() override { return 16; } void writeLong(uint8_t *buf) override; @@ -143,7 +145,8 @@ public: class ThumbV7ABSLongThunk final : public ThumbThunk { public: - ThumbV7ABSLongThunk(Symbol &dest) : ThumbThunk(dest) {} + ThumbV7ABSLongThunk(Symbol &dest, int64_t addend) + : ThumbThunk(dest, addend) {} uint32_t sizeLong() override { return 10; } void writeLong(uint8_t *buf) override; @@ -152,7 +155,7 @@ public: class ThumbV7PILongThunk final : public ThumbThunk { public: - ThumbV7PILongThunk(Symbol &dest) : ThumbThunk(dest) {} + ThumbV7PILongThunk(Symbol &dest, int64_t addend) : ThumbThunk(dest, addend) {} uint32_t sizeLong() override { return 12; } void writeLong(uint8_t *buf) override; @@ -166,7 +169,7 @@ public: // can result in a thunk class ARMV5ABSLongThunk final : public ARMThunk { public: - ARMV5ABSLongThunk(Symbol &dest) : ARMThunk(dest) {} + ARMV5ABSLongThunk(Symbol &dest, int64_t addend) : ARMThunk(dest, addend) {} uint32_t sizeLong() override { return 8; } void writeLong(uint8_t *buf) override; @@ -177,7 +180,7 @@ public: class ARMV5PILongThunk final : public ARMThunk { public: - ARMV5PILongThunk(Symbol &dest) : ARMThunk(dest) {} + ARMV5PILongThunk(Symbol &dest, int64_t addend) : ARMThunk(dest, addend) {} uint32_t sizeLong() override { return 16; } void writeLong(uint8_t *buf) override; @@ -189,7 +192,8 @@ public: // Implementations of Thunks for Arm v6-M. Only Thumb instructions are permitted class ThumbV6MABSLongThunk final : public ThumbThunk { public: - ThumbV6MABSLongThunk(Symbol &dest) : ThumbThunk(dest) {} + ThumbV6MABSLongThunk(Symbol &dest, int64_t addend) + : ThumbThunk(dest, addend) {} uint32_t sizeLong() override { return 12; } void writeLong(uint8_t *buf) override; @@ -198,7 +202,8 @@ public: class ThumbV6MPILongThunk final : public ThumbThunk { public: - ThumbV6MPILongThunk(Symbol &dest) : ThumbThunk(dest) {} + ThumbV6MPILongThunk(Symbol &dest, int64_t addend) + : ThumbThunk(dest, addend) {} uint32_t sizeLong() override { return 16; } void writeLong(uint8_t *buf) override; @@ -1040,7 +1045,7 @@ static Thunk *addThunkAArch64(RelType type, Symbol &s, int64_t a) { // - MOVT and MOVW instructions cannot be used // - Only Thumb relocation that can generate a Thunk is a BL, this can always // be transformed into a BLX -static Thunk *addThunkPreArmv7(RelType reloc, Symbol &s) { +static Thunk *addThunkPreArmv7(RelType reloc, Symbol &s, int64_t a) { switch (reloc) { case R_ARM_PC24: case R_ARM_PLT32: @@ -1048,8 +1053,8 @@ static Thunk *addThunkPreArmv7(RelType reloc, Symbol &s) { case R_ARM_CALL: case R_ARM_THM_CALL: if (config->picThunk) - return make(s); - return make(s); + return make(s, a); + return make(s, a); } fatal("relocation " + toString(reloc) + " to " + toString(s) + " not supported for Armv5 or Armv6 targets"); @@ -1060,21 +1065,21 @@ static Thunk *addThunkPreArmv7(RelType reloc, Symbol &s) { // - MOVT and MOVW instructions cannot be used. // - Only a limited number of instructions can access registers r8 and above // - No interworking support is needed (all Thumb). -static Thunk *addThunkV6M(RelType reloc, Symbol &s) { +static Thunk *addThunkV6M(RelType reloc, Symbol &s, int64_t a) { switch (reloc) { case R_ARM_THM_JUMP19: case R_ARM_THM_JUMP24: case R_ARM_THM_CALL: if (config->isPic) - return make(s); - return make(s); + return make(s, a); + return make(s, a); } fatal("relocation " + toString(reloc) + " to " + toString(s) + " not supported for Armv6-M targets"); } // Creates a thunk for Thumb-ARM interworking or branch range extension. -static Thunk *addThunkArm(RelType reloc, Symbol &s) { +static Thunk *addThunkArm(RelType reloc, Symbol &s, int64_t a) { // Decide which Thunk is needed based on: // Available instruction set // - An Arm Thunk can only be used if Arm state is available. @@ -1093,8 +1098,8 @@ static Thunk *addThunkArm(RelType reloc, Symbol &s) { // architecture to flag. if (!config->armHasMovtMovw) { if (!config->armJ1J2BranchEncoding) - return addThunkPreArmv7(reloc, s); - return addThunkV6M(reloc, s); + return addThunkPreArmv7(reloc, s, a); + return addThunkV6M(reloc, s, a); } switch (reloc) { @@ -1103,14 +1108,14 @@ static Thunk *addThunkArm(RelType reloc, Symbol &s) { case R_ARM_JUMP24: case R_ARM_CALL: if (config->picThunk) - return make(s); - return make(s); + return make(s, a); + return make(s, a); case R_ARM_THM_JUMP19: case R_ARM_THM_JUMP24: case R_ARM_THM_CALL: if (config->picThunk) - return make(s); - return make(s); + return make(s, a); + return make(s, a); } fatal("unrecognized relocation type"); } @@ -1166,7 +1171,7 @@ Thunk *elf::addThunk(const InputSection &isec, Relocation &rel) { return addThunkAArch64(rel.type, s, a); if (config->emachine == EM_ARM) - return addThunkArm(rel.type, s); + return addThunkArm(rel.type, s, a); if (config->emachine == EM_MIPS) return addThunkMips(rel.type, s); diff --git a/lld/test/ELF/arm-thunk-arm-thumb-reuse.s b/lld/test/ELF/arm-thunk-arm-thumb-reuse.s new file mode 100644 index 000000000000..5b77c4fcf567 --- /dev/null +++ b/lld/test/ELF/arm-thunk-arm-thumb-reuse.s @@ -0,0 +1,61 @@ +// REQUIRES: arm +// RUN: split-file %s %t +// RUN: llvm-mc -arm-add-build-attributes -filetype=obj -triple=thumbv7a-none-linux-gnueabi %t/test.s -o %t.o +// RUN: ld.lld --script %t/script %t.o -o %t2 +// RUN: llvm-objdump --no-show-raw-insn -d %t2 | FileCheck %s + +/// Test that we can reuse thunks between Arm and Thumb callers +/// using a BL. Expect two thunks, one for far, one for far2. + +//--- script +SECTIONS { + .text 0x10000 : { *(.text) } + .text.far 0x10000000 : AT (0x10000000) { *(.far) } +} + +//--- test.s + +.syntax unified +.text +.globl _start +.type _start, %function + .arm +_start: + bl far + .thumb + bl far + bl far2 + .arm + bl far2 + +// CHECK: 00010000 <_start>: +// CHECK-NEXT: 10000: bl #8 <__ARMv7ABSLongThunk_far> +// CHECK: 00010004 <$t.1>: +// CHECK-NEXT: 10004: blx #8 +// CHECK-NEXT: 10008: bl #16 +// CHECK: 0001000c <$a.2>: +// CHECK-NEXT: 1000c: blx #8 <__Thumbv7ABSLongThunk_far2> +// CHECK: 00010010 <__ARMv7ABSLongThunk_far>: +// CHECK-NEXT: 10010: movw r12, #0 +// CHECK-NEXT: 10014: movt r12, #4096 +// CHECK-NEXT: 10018: bx r12 +// CHECK: 0001001c <__Thumbv7ABSLongThunk_far2>: +// CHECK-NEXT: 1001c: movw r12, #4 +// CHECK-NEXT: 10020: movt r12, #4096 +// CHECK-NEXT: 10024: bx r12 + +.section .text.far, "ax", %progbits +.globl far +.type far, %function +far: + bx lr +.globl far2 +.type far2, %function +far2: + bx lr + +// CHECK: Disassembly of section .text.far: +// CHECK: 10000000 : +// CHECK-NEXT: 10000000: bx lr +// CHECK: 10000004 : +// CHECK-NEXT: 10000004: bx lr