[LLD][ELF][ARM] Refactor inBranchRange to use addend for PC Bias

In AArch32 ARM, the PC reads two instructions ahead of the currently
executiing instruction. This evaluates to 8 in ARM state and 4 in
Thumb state. Branch instructions on AArch32 compensate for this by
subtracting the PC bias from the addend. For a branch to symbol this
will result in an addend of -8 in ARM state and -4 in Thumb state.

The existing ARM Target::inBranchRange function accounted for this
implict addend within the function meaning that if the addend were
to be taken into account by the caller then it would be double
counted. This complicates the interface for all Targets as callers
wanting to account for addends had to account for the ARM PC-bias.

In certain situations such as:
https://github.com/ClangBuiltLinux/linux/issues/1305
the PC-bias compensation code didn't match up. In particular
normalizeExistingThunk() didn't put the PC-bias back in as Arm
thunks did not store the addend.

The simplest fix for the problem is to add the PC bias in
normalizeExistingThunk when restoring the addend. However I think
it is worth refactoring the Arm inBranchRange implementation so
that fewer calls to getPCBias are needed for other Targets. I
wasn't able to remove getPCBias completely but hopefully the
Relocations.cpp code is simpler now.

In principle a test could be written to replicate the linux kernel
build failure but I wasn't able to reproduce with a small example
that I could build up from scratch.

Fixes https://github.com/ClangBuiltLinux/linux/issues/1305

Differential Revision: https://reviews.llvm.org/D97550
This commit is contained in:
Peter Smith 2021-02-26 13:14:21 +00:00
parent 4545813b17
commit e35929e026
5 changed files with 134 additions and 76 deletions

View File

@ -279,7 +279,7 @@ void ARM::addPltSymbols(InputSection &isec, uint64_t off) const {
bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file,
uint64_t branchAddr, const Symbol &s,
int64_t /*a*/) const {
int64_t a) const {
// If S is an undefined weak symbol and does not have a PLT entry then it
// will be resolved as a branch to the next instruction.
if (s.isUndefWeak() && !s.isInPlt())
@ -298,7 +298,7 @@ bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file,
LLVM_FALLTHROUGH;
case R_ARM_CALL: {
uint64_t dst = (expr == R_PLT_PC) ? s.getPltVA() : s.getVA();
return !inBranchRange(type, branchAddr, dst);
return !inBranchRange(type, branchAddr, dst + a);
}
case R_ARM_THM_JUMP19:
case R_ARM_THM_JUMP24:
@ -309,7 +309,7 @@ bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file,
LLVM_FALLTHROUGH;
case R_ARM_THM_CALL: {
uint64_t dst = (expr == R_PLT_PC) ? s.getPltVA() : s.getVA();
return !inBranchRange(type, branchAddr, dst);
return !inBranchRange(type, branchAddr, dst + a);
}
}
return false;
@ -350,46 +350,31 @@ uint32_t ARM::getThunkSectionSpacing() const {
}
bool ARM::inBranchRange(RelType type, uint64_t src, uint64_t dst) const {
uint64_t range;
uint64_t instrSize;
switch (type) {
case R_ARM_PC24:
case R_ARM_PLT32:
case R_ARM_JUMP24:
case R_ARM_CALL:
range = 0x2000000;
instrSize = 4;
break;
case R_ARM_THM_JUMP19:
range = 0x100000;
instrSize = 2;
break;
case R_ARM_THM_JUMP24:
case R_ARM_THM_CALL:
range = config->armJ1J2BranchEncoding ? 0x1000000 : 0x400000;
instrSize = 2;
break;
default:
return true;
}
// PC at Src is 2 instructions ahead, immediate of branch is signed
if (src > dst)
range -= 2 * instrSize;
else
range += instrSize;
if ((dst & 0x1) == 0)
// Destination is ARM, if ARM caller then Src is already 4-byte aligned.
// If Thumb Caller (BLX) the Src address has bottom 2 bits cleared to ensure
// destination will be 4 byte aligned.
src &= ~0x3;
else
// Bit 0 == 1 denotes Thumb state, it is not part of the range
// Bit 0 == 1 denotes Thumb state, it is not part of the range.
dst &= ~0x1;
uint64_t distance = (src > dst) ? src - dst : dst - src;
return distance <= range;
int64_t offset = dst - src;
switch (type) {
case R_ARM_PC24:
case R_ARM_PLT32:
case R_ARM_JUMP24:
case R_ARM_CALL:
return llvm::isInt<26>(offset);
case R_ARM_THM_JUMP19:
return llvm::isInt<21>(offset);
case R_ARM_THM_JUMP24:
case R_ARM_THM_CALL:
return config->armJ1J2BranchEncoding ? llvm::isInt<25>(offset)
: llvm::isInt<23>(offset);
default:
return true;
}
}
// Helper to produce message text when LLD detects that a CALL relocation to

View File

@ -1763,14 +1763,17 @@ void ThunkCreator::mergeThunks(ArrayRef<OutputSection *> outputSections) {
// Find or create a ThunkSection within the InputSectionDescription (ISD) that
// is in range of Src. An ISD maps to a range of InputSections described by a
// linker script section pattern such as { .text .text.* }.
ThunkSection *ThunkCreator::getISDThunkSec(OutputSection *os, InputSection *isec,
ThunkSection *ThunkCreator::getISDThunkSec(OutputSection *os,
InputSection *isec,
InputSectionDescription *isd,
uint32_t type, uint64_t src) {
const Relocation &rel,
uint64_t src) {
for (std::pair<ThunkSection *, uint32_t> tp : isd->thunkSections) {
ThunkSection *ts = tp.first;
uint64_t tsBase = os->addr + ts->outSecOff;
uint64_t tsLimit = tsBase + ts->getSize();
if (target->inBranchRange(type, src, (src > tsLimit) ? tsBase : tsLimit))
uint64_t tsBase = os->addr + ts->outSecOff + rel.addend;
uint64_t tsLimit = tsBase + ts->getSize() + rel.addend;
if (target->inBranchRange(rel.type, src,
(src > tsLimit) ? tsBase : tsLimit))
return ts;
}
@ -1780,9 +1783,11 @@ ThunkSection *ThunkCreator::getISDThunkSec(OutputSection *os, InputSection *isec
// possible. Error if InputSection is so large we cannot place ThunkSection
// anywhere in Range.
uint64_t thunkSecOff = isec->outSecOff;
if (!target->inBranchRange(type, src, os->addr + thunkSecOff)) {
if (!target->inBranchRange(rel.type, src,
os->addr + thunkSecOff + rel.addend)) {
thunkSecOff = isec->outSecOff + isec->getSize();
if (!target->inBranchRange(type, src, os->addr + thunkSecOff))
if (!target->inBranchRange(rel.type, src,
os->addr + thunkSecOff + rel.addend))
fatal("InputSection too large for range extension thunk " +
isec->getObjMsg(src - (os->addr + isec->outSecOff)));
}
@ -1933,7 +1938,11 @@ static int64_t getPCBias(RelType type) {
std::pair<Thunk *, bool> ThunkCreator::getThunk(InputSection *isec,
Relocation &rel, uint64_t src) {
std::vector<Thunk *> *thunkVec = nullptr;
int64_t addend = rel.addend + getPCBias(rel.type);
// Arm and Thumb have a PC Bias of 8 and 4 respectively, this is cancelled
// out in the relocation addend. We compensate for the PC bias so that
// an Arm and Thumb relocation to the same destination get the same keyAddend,
// which is usually 0.
int64_t keyAddend = rel.addend + getPCBias(rel.type);
// We use a ((section, offset), addend) pair to find the thunk position if
// possible so that we create only one thunk for aliased symbols or ICFed
@ -1943,17 +1952,16 @@ std::pair<Thunk *, bool> ThunkCreator::getThunk(InputSection *isec,
if (auto *d = dyn_cast<Defined>(rel.sym))
if (!d->isInPlt() && d->section)
thunkVec = &thunkedSymbolsBySectionAndAddend[{
{d->section->repl, d->value}, addend}];
{d->section->repl, d->value}, keyAddend}];
if (!thunkVec)
thunkVec = &thunkedSymbols[{rel.sym, addend}];
thunkVec = &thunkedSymbols[{rel.sym, keyAddend}];
// Check existing Thunks for Sym to see if they can be reused
for (Thunk *t : *thunkVec)
if (isThunkSectionCompatible(isec, t->getThunkTargetSym()->section) &&
t->isCompatibleWith(*isec, rel) &&
target->inBranchRange(rel.type, src,
t->getThunkTargetSym()->getVA(rel.addend) +
getPCBias(rel.type)))
t->getThunkTargetSym()->getVA(rel.addend)))
return std::make_pair(t, false);
// No existing compatible Thunk in range, create a new one
@ -1968,8 +1976,7 @@ std::pair<Thunk *, bool> ThunkCreator::getThunk(InputSection *isec,
// relocation back to its original non-Thunk target.
bool ThunkCreator::normalizeExistingThunk(Relocation &rel, uint64_t src) {
if (Thunk *t = thunks.lookup(rel.sym)) {
if (target->inBranchRange(rel.type, src,
rel.sym->getVA(rel.addend) + getPCBias(rel.type)))
if (target->inBranchRange(rel.type, src, rel.sym->getVA(rel.addend)))
return true;
rel.sym = &t->destination;
rel.addend = t->addend;
@ -2041,7 +2048,7 @@ bool ThunkCreator::createThunks(ArrayRef<OutputSection *> outputSections) {
if (auto *tis = t->getTargetInputSection())
ts = getISThunkSec(tis);
else
ts = getISDThunkSec(os, isec, isd, rel.type, src);
ts = getISDThunkSec(os, isec, isd, rel, src);
ts->addThunk(t);
thunks[t->getThunkTargetSym()] = t;
}

View File

@ -148,8 +148,8 @@ private:
void mergeThunks(ArrayRef<OutputSection *> outputSections);
ThunkSection *getISDThunkSec(OutputSection *os, InputSection *isec,
InputSectionDescription *isd, uint32_t type,
uint64_t src);
InputSectionDescription *isd,
const Relocation &rel, uint64_t src);
ThunkSection *getISThunkSec(InputSection *isec);

View File

@ -72,7 +72,7 @@ public:
// if the target is in range, otherwise it creates a long thunk.
class ARMThunk : public Thunk {
public:
ARMThunk(Symbol &dest) : Thunk(dest, 0) {}
ARMThunk(Symbol &dest, int64_t addend) : Thunk(dest, addend) {}
bool getMayUseShortThunk();
uint32_t size() override { return getMayUseShortThunk() ? 4 : sizeLong(); }
@ -102,7 +102,9 @@ private:
// which has a range of 16MB.
class ThumbThunk : public Thunk {
public:
ThumbThunk(Symbol &dest) : Thunk(dest, 0) { alignment = 2; }
ThumbThunk(Symbol &dest, int64_t addend) : Thunk(dest, addend) {
alignment = 2;
}
bool getMayUseShortThunk();
uint32_t size() override { return getMayUseShortThunk() ? 4 : sizeLong(); }
@ -125,7 +127,7 @@ private:
// Source State, TargetState, Target Requirement, ABS or PI, Range
class ARMV7ABSLongThunk final : public ARMThunk {
public:
ARMV7ABSLongThunk(Symbol &dest) : ARMThunk(dest) {}
ARMV7ABSLongThunk(Symbol &dest, int64_t addend) : ARMThunk(dest, addend) {}
uint32_t sizeLong() override { return 12; }
void writeLong(uint8_t *buf) override;
@ -134,7 +136,7 @@ public:
class ARMV7PILongThunk final : public ARMThunk {
public:
ARMV7PILongThunk(Symbol &dest) : ARMThunk(dest) {}
ARMV7PILongThunk(Symbol &dest, int64_t addend) : ARMThunk(dest, addend) {}
uint32_t sizeLong() override { return 16; }
void writeLong(uint8_t *buf) override;
@ -143,7 +145,8 @@ public:
class ThumbV7ABSLongThunk final : public ThumbThunk {
public:
ThumbV7ABSLongThunk(Symbol &dest) : ThumbThunk(dest) {}
ThumbV7ABSLongThunk(Symbol &dest, int64_t addend)
: ThumbThunk(dest, addend) {}
uint32_t sizeLong() override { return 10; }
void writeLong(uint8_t *buf) override;
@ -152,7 +155,7 @@ public:
class ThumbV7PILongThunk final : public ThumbThunk {
public:
ThumbV7PILongThunk(Symbol &dest) : ThumbThunk(dest) {}
ThumbV7PILongThunk(Symbol &dest, int64_t addend) : ThumbThunk(dest, addend) {}
uint32_t sizeLong() override { return 12; }
void writeLong(uint8_t *buf) override;
@ -166,7 +169,7 @@ public:
// can result in a thunk
class ARMV5ABSLongThunk final : public ARMThunk {
public:
ARMV5ABSLongThunk(Symbol &dest) : ARMThunk(dest) {}
ARMV5ABSLongThunk(Symbol &dest, int64_t addend) : ARMThunk(dest, addend) {}
uint32_t sizeLong() override { return 8; }
void writeLong(uint8_t *buf) override;
@ -177,7 +180,7 @@ public:
class ARMV5PILongThunk final : public ARMThunk {
public:
ARMV5PILongThunk(Symbol &dest) : ARMThunk(dest) {}
ARMV5PILongThunk(Symbol &dest, int64_t addend) : ARMThunk(dest, addend) {}
uint32_t sizeLong() override { return 16; }
void writeLong(uint8_t *buf) override;
@ -189,7 +192,8 @@ public:
// Implementations of Thunks for Arm v6-M. Only Thumb instructions are permitted
class ThumbV6MABSLongThunk final : public ThumbThunk {
public:
ThumbV6MABSLongThunk(Symbol &dest) : ThumbThunk(dest) {}
ThumbV6MABSLongThunk(Symbol &dest, int64_t addend)
: ThumbThunk(dest, addend) {}
uint32_t sizeLong() override { return 12; }
void writeLong(uint8_t *buf) override;
@ -198,7 +202,8 @@ public:
class ThumbV6MPILongThunk final : public ThumbThunk {
public:
ThumbV6MPILongThunk(Symbol &dest) : ThumbThunk(dest) {}
ThumbV6MPILongThunk(Symbol &dest, int64_t addend)
: ThumbThunk(dest, addend) {}
uint32_t sizeLong() override { return 16; }
void writeLong(uint8_t *buf) override;
@ -1040,7 +1045,7 @@ static Thunk *addThunkAArch64(RelType type, Symbol &s, int64_t a) {
// - MOVT and MOVW instructions cannot be used
// - Only Thumb relocation that can generate a Thunk is a BL, this can always
// be transformed into a BLX
static Thunk *addThunkPreArmv7(RelType reloc, Symbol &s) {
static Thunk *addThunkPreArmv7(RelType reloc, Symbol &s, int64_t a) {
switch (reloc) {
case R_ARM_PC24:
case R_ARM_PLT32:
@ -1048,8 +1053,8 @@ static Thunk *addThunkPreArmv7(RelType reloc, Symbol &s) {
case R_ARM_CALL:
case R_ARM_THM_CALL:
if (config->picThunk)
return make<ARMV5PILongThunk>(s);
return make<ARMV5ABSLongThunk>(s);
return make<ARMV5PILongThunk>(s, a);
return make<ARMV5ABSLongThunk>(s, a);
}
fatal("relocation " + toString(reloc) + " to " + toString(s) +
" not supported for Armv5 or Armv6 targets");
@ -1060,21 +1065,21 @@ static Thunk *addThunkPreArmv7(RelType reloc, Symbol &s) {
// - MOVT and MOVW instructions cannot be used.
// - Only a limited number of instructions can access registers r8 and above
// - No interworking support is needed (all Thumb).
static Thunk *addThunkV6M(RelType reloc, Symbol &s) {
static Thunk *addThunkV6M(RelType reloc, Symbol &s, int64_t a) {
switch (reloc) {
case R_ARM_THM_JUMP19:
case R_ARM_THM_JUMP24:
case R_ARM_THM_CALL:
if (config->isPic)
return make<ThumbV6MPILongThunk>(s);
return make<ThumbV6MABSLongThunk>(s);
return make<ThumbV6MPILongThunk>(s, a);
return make<ThumbV6MABSLongThunk>(s, a);
}
fatal("relocation " + toString(reloc) + " to " + toString(s) +
" not supported for Armv6-M targets");
}
// Creates a thunk for Thumb-ARM interworking or branch range extension.
static Thunk *addThunkArm(RelType reloc, Symbol &s) {
static Thunk *addThunkArm(RelType reloc, Symbol &s, int64_t a) {
// Decide which Thunk is needed based on:
// Available instruction set
// - An Arm Thunk can only be used if Arm state is available.
@ -1093,8 +1098,8 @@ static Thunk *addThunkArm(RelType reloc, Symbol &s) {
// architecture to flag.
if (!config->armHasMovtMovw) {
if (!config->armJ1J2BranchEncoding)
return addThunkPreArmv7(reloc, s);
return addThunkV6M(reloc, s);
return addThunkPreArmv7(reloc, s, a);
return addThunkV6M(reloc, s, a);
}
switch (reloc) {
@ -1103,14 +1108,14 @@ static Thunk *addThunkArm(RelType reloc, Symbol &s) {
case R_ARM_JUMP24:
case R_ARM_CALL:
if (config->picThunk)
return make<ARMV7PILongThunk>(s);
return make<ARMV7ABSLongThunk>(s);
return make<ARMV7PILongThunk>(s, a);
return make<ARMV7ABSLongThunk>(s, a);
case R_ARM_THM_JUMP19:
case R_ARM_THM_JUMP24:
case R_ARM_THM_CALL:
if (config->picThunk)
return make<ThumbV7PILongThunk>(s);
return make<ThumbV7ABSLongThunk>(s);
return make<ThumbV7PILongThunk>(s, a);
return make<ThumbV7ABSLongThunk>(s, a);
}
fatal("unrecognized relocation type");
}
@ -1166,7 +1171,7 @@ Thunk *elf::addThunk(const InputSection &isec, Relocation &rel) {
return addThunkAArch64(rel.type, s, a);
if (config->emachine == EM_ARM)
return addThunkArm(rel.type, s);
return addThunkArm(rel.type, s, a);
if (config->emachine == EM_MIPS)
return addThunkMips(rel.type, s);

View File

@ -0,0 +1,61 @@
// REQUIRES: arm
// RUN: split-file %s %t
// RUN: llvm-mc -arm-add-build-attributes -filetype=obj -triple=thumbv7a-none-linux-gnueabi %t/test.s -o %t.o
// RUN: ld.lld --script %t/script %t.o -o %t2
// RUN: llvm-objdump --no-show-raw-insn -d %t2 | FileCheck %s
/// Test that we can reuse thunks between Arm and Thumb callers
/// using a BL. Expect two thunks, one for far, one for far2.
//--- script
SECTIONS {
.text 0x10000 : { *(.text) }
.text.far 0x10000000 : AT (0x10000000) { *(.far) }
}
//--- test.s
.syntax unified
.text
.globl _start
.type _start, %function
.arm
_start:
bl far
.thumb
bl far
bl far2
.arm
bl far2
// CHECK: 00010000 <_start>:
// CHECK-NEXT: 10000: bl #8 <__ARMv7ABSLongThunk_far>
// CHECK: 00010004 <$t.1>:
// CHECK-NEXT: 10004: blx #8
// CHECK-NEXT: 10008: bl #16
// CHECK: 0001000c <$a.2>:
// CHECK-NEXT: 1000c: blx #8 <__Thumbv7ABSLongThunk_far2>
// CHECK: 00010010 <__ARMv7ABSLongThunk_far>:
// CHECK-NEXT: 10010: movw r12, #0
// CHECK-NEXT: 10014: movt r12, #4096
// CHECK-NEXT: 10018: bx r12
// CHECK: 0001001c <__Thumbv7ABSLongThunk_far2>:
// CHECK-NEXT: 1001c: movw r12, #4
// CHECK-NEXT: 10020: movt r12, #4096
// CHECK-NEXT: 10024: bx r12
.section .text.far, "ax", %progbits
.globl far
.type far, %function
far:
bx lr
.globl far2
.type far2, %function
far2:
bx lr
// CHECK: Disassembly of section .text.far:
// CHECK: 10000000 <far>:
// CHECK-NEXT: 10000000: bx lr
// CHECK: 10000004 <far2>:
// CHECK-NEXT: 10000004: bx lr