mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-10-10 04:44:53 +00:00
[ELF][ARM] Implement --fix-cortex-a8 to fix erratum 657417
The --fix-cortex-a8 option implements a linker workaround for the coretex-a8 erratum 657417. A summary of the erratum conditions is: - A 32-bit Thumb-2 branch instruction B.w, Bcc.w, BL, BLX spans two 4KiB regions. - The destination of the branch is to the first 4KiB region. - The instruction before the branch is a 32-bit Thumb-2 non-branch instruction. The linker fix is to redirect the branch to a patch not in the first 4KiB region. The patch forwards the branch on to its target. The cortex-a8, is an old CPU, with the first implementation of this workaround in ld.bfd appearing in 2009. The cortex-a8 has been used in early Android Phones and there are some critical applications that still need to run on a cortex-a8 that have the erratum. The patch is applied roughly 10 times on LLD and 20 on Clang when they are built with --fix-cortex-a8 on an Arm system. The formal erratum description is avaliable in the ARM Core Cortex-A8 (AT400/AT401) Errata Notice document. This is available from Arm on request but it seems to be findable via a web search. Differential Revision: https://reviews.llvm.org/D67284 llvm-svn: 371965
This commit is contained in:
parent
ad7a7cea89
commit
ea99ce5e9b
@ -6,7 +6,10 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
// This file implements Section Patching for the purpose of working around
|
||||
// errata in CPUs. The general principle is that an erratum sequence of one or
|
||||
// the AArch64 Cortex-53 errata 843419 that affects r0p0, r0p1, r0p2 and r0p4
|
||||
// versions of the core.
|
||||
//
|
||||
// The general principle is that an erratum sequence of one or
|
||||
// more instructions is detected in the instruction stream, one of the
|
||||
// instructions in the sequence is replaced with a branch to a patch sequence
|
||||
// of replacement instructions. At the end of the replacement sequence the
|
||||
@ -20,12 +23,6 @@
|
||||
// - We can overwrite an instruction in the erratum sequence with a branch to
|
||||
// the replacement sequence.
|
||||
// - We can place the replacement sequence within range of the branch.
|
||||
|
||||
// FIXME:
|
||||
// - The implementation here only supports one patch, the AArch64 Cortex-53
|
||||
// errata 843419 that affects r0p0, r0p1, r0p2 and r0p4 versions of the core.
|
||||
// To keep the initial version simple there is no support for multiple
|
||||
// architectures or selection of different patches.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AArch64ErrataFix.h"
|
||||
|
528
lld/ELF/ARMErrataFix.cpp
Normal file
528
lld/ELF/ARMErrataFix.cpp
Normal file
@ -0,0 +1,528 @@
|
||||
//===- ARMErrataFix.cpp ---------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
// This file implements Section Patching for the purpose of working around the
|
||||
// Cortex-a8 erratum 657417 "A 32bit branch instruction that spans 2 4K regions
|
||||
// can result in an incorrect instruction fetch or processor deadlock." The
|
||||
// erratum affects all but r1p7, r2p5, r2p6, r3p1 and r3p2 revisions of the
|
||||
// Cortex-A8. A high level description of the patching technique is given in
|
||||
// the opening comment of AArch64ErrataFix.cpp.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "ARMErrataFix.h"
|
||||
|
||||
#include "Config.h"
|
||||
#include "LinkerScript.h"
|
||||
#include "OutputSections.h"
|
||||
#include "Relocations.h"
|
||||
#include "Symbols.h"
|
||||
#include "SyntheticSections.h"
|
||||
#include "Target.h"
|
||||
#include "lld/Common/Memory.h"
|
||||
#include "lld/Common/Strings.h"
|
||||
#include "llvm/Support/Endian.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include <algorithm>
|
||||
|
||||
using namespace llvm;
|
||||
using namespace llvm::ELF;
|
||||
using namespace llvm::object;
|
||||
using namespace llvm::support;
|
||||
using namespace llvm::support::endian;
|
||||
|
||||
namespace lld {
|
||||
namespace elf {
|
||||
|
||||
// The documented title for Erratum 657417 is:
|
||||
// "A 32bit branch instruction that spans two 4K regions can result in an
|
||||
// incorrect instruction fetch or processor deadlock". Graphically using a
|
||||
// 32-bit B.w instruction encoded as a pair of halfwords 0xf7fe 0xbfff
|
||||
// xxxxxx000 // Memory region 1 start
|
||||
// target:
|
||||
// ...
|
||||
// xxxxxxffe f7fe // First halfword of branch to target:
|
||||
// xxxxxx000 // Memory region 2 start
|
||||
// xxxxxx002 bfff // Second halfword of branch to target:
|
||||
//
|
||||
// The specific trigger conditions that can be detected at link time are:
|
||||
// - There is a 32-bit Thumb-2 branch instruction with an address of the form
|
||||
// xxxxxxFFE. The first 2 bytes of the instruction are in 4KiB region 1, the
|
||||
// second 2 bytes are in region 2.
|
||||
// - The branch instruction is one of BLX, BL, B.w BCC.w
|
||||
// - The instruction preceding the branch is a 32-bit non-branch instruction.
|
||||
// - The target of the branch is in region 1.
|
||||
//
|
||||
// The linker mitigation for the fix is to redirect any branch that meets the
|
||||
// erratum conditions to a patch section containing a branch to the target.
|
||||
//
|
||||
// As adding patch sections may move branches onto region boundaries the patch
|
||||
// must iterate until no more patches are added.
|
||||
//
|
||||
// Example, before:
|
||||
// 00000FFA func: NOP.w // 32-bit Thumb function
|
||||
// 00000FFE B.W func // 32-bit branch spanning 2 regions, dest in 1st.
|
||||
// Example, after:
|
||||
// 00000FFA func: NOP.w // 32-bit Thumb function
|
||||
// 00000FFE B.w __CortexA8657417_00000FFE
|
||||
// 00001002 2 - bytes padding
|
||||
// 00001004 __CortexA8657417_00000FFE: B.w func
|
||||
|
||||
class Patch657417Section : public SyntheticSection {
|
||||
public:
|
||||
Patch657417Section(InputSection *p, uint64_t off, uint32_t instr, bool isARM);
|
||||
|
||||
void writeTo(uint8_t *buf) override;
|
||||
|
||||
size_t getSize() const override { return 4; }
|
||||
|
||||
// Get the virtual address of the branch instruction at patcheeOffset.
|
||||
uint64_t getBranchAddr() const;
|
||||
|
||||
// The Section we are patching.
|
||||
const InputSection *patchee;
|
||||
// The offset of the instruction in the Patchee section we are patching.
|
||||
uint64_t patcheeOffset;
|
||||
// A label for the start of the Patch that we can use as a relocation target.
|
||||
Symbol *patchSym;
|
||||
// A decoding of the branch instruction at patcheeOffset.
|
||||
uint32_t instr;
|
||||
// True If the patch is to be written in ARM state, otherwise the patch will
|
||||
// be written in Thumb state.
|
||||
bool isARM;
|
||||
};
|
||||
|
||||
// Return true if the half-word, when taken as the first of a pair of halfwords
|
||||
// is the first half of a 32-bit instruction.
|
||||
// Reference from ARM Architecure Reference Manual ARMv7-A and ARMv7-R edition
|
||||
// section A6.3: 32-bit Thumb instruction encoding
|
||||
// | HW1 | HW2 |
|
||||
// | 1 1 1 | op1 (2) | op2 (7) | x (4) |op| x (15) |
|
||||
// With op1 == 0b00, a 16-bit instruction is encoded.
|
||||
//
|
||||
// We test only the first halfword, looking for op != 0b00.
|
||||
static bool is32bitInstruction(uint16_t hw) {
|
||||
return (hw & 0xe000) == 0xe000 && (hw & 0x1800) != 0x0000;
|
||||
}
|
||||
|
||||
// Reference from ARM Architecure Reference Manual ARMv7-A and ARMv7-R edition
|
||||
// section A6.3.4 Branches and miscellaneous control.
|
||||
// | HW1 | HW2 |
|
||||
// | 1 1 1 | 1 0 | op (7) | x (4) | 1 | op1 (3) | op2 (4) | imm8 (8) |
|
||||
// op1 == 0x0 op != x111xxx | Conditional branch (Bcc.W)
|
||||
// op1 == 0x1 | Branch (B.W)
|
||||
// op1 == 1x0 | Branch with Link and Exchange (BLX.w)
|
||||
// op1 == 1x1 | Branch with Link (BL.W)
|
||||
|
||||
static bool isBcc(uint32_t instr) {
|
||||
return (instr & 0xf800d000) == 0xf0008000 &&
|
||||
(instr & 0x03800000) != 0x03800000;
|
||||
}
|
||||
|
||||
static bool isB(uint32_t instr) { return (instr & 0xf800d000) == 0xf0009000; }
|
||||
|
||||
static bool isBLX(uint32_t instr) { return (instr & 0xf800d000) == 0xf000c000; }
|
||||
|
||||
static bool isBL(uint32_t instr) { return (instr & 0xf800d000) == 0xf000d000; }
|
||||
|
||||
static bool is32bitBranch(uint32_t instr) {
|
||||
return isBcc(instr) || isB(instr) || isBL(instr) || isBLX(instr);
|
||||
}
|
||||
|
||||
Patch657417Section::Patch657417Section(InputSection *p, uint64_t off,
|
||||
uint32_t instr, bool isARM)
|
||||
: SyntheticSection(SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, 4,
|
||||
".text.patch"),
|
||||
patchee(p), patcheeOffset(off), instr(instr), isARM(isARM) {
|
||||
parent = p->getParent();
|
||||
patchSym = addSyntheticLocal(
|
||||
saver.save("__CortexA8657417_" + utohexstr(getBranchAddr())), STT_FUNC,
|
||||
isARM ? 0 : 1, getSize(), *this);
|
||||
addSyntheticLocal(saver.save(isARM ? "$a" : "$t"), STT_NOTYPE, 0, 0, *this);
|
||||
}
|
||||
|
||||
uint64_t Patch657417Section::getBranchAddr() const {
|
||||
return patchee->getVA(patcheeOffset);
|
||||
}
|
||||
|
||||
// Given a branch instruction instr at sourceAddr work out its destination
|
||||
// address. This is only used when the branch instruction has no relocation.
|
||||
static uint64_t getThumbDestAddr(uint64_t sourceAddr, uint32_t instr) {
|
||||
uint8_t buf[4];
|
||||
write16le(buf, instr >> 16);
|
||||
write16le(buf + 2, instr & 0x0000ffff);
|
||||
int64_t offset;
|
||||
if (isBcc(instr))
|
||||
offset = target->getImplicitAddend(buf, R_ARM_THM_JUMP19);
|
||||
else if (isB(instr))
|
||||
offset = target->getImplicitAddend(buf, R_ARM_THM_JUMP24);
|
||||
else
|
||||
offset = target->getImplicitAddend(buf, R_ARM_THM_CALL);
|
||||
return sourceAddr + offset + 4;
|
||||
}
|
||||
|
||||
void Patch657417Section::writeTo(uint8_t *buf) {
|
||||
// The base instruction of the patch is always a 32-bit unconditional branch.
|
||||
if (isARM)
|
||||
write32le(buf, 0xea000000);
|
||||
else
|
||||
write32le(buf, 0x9000f000);
|
||||
// If we have a relocation then apply it. For a SyntheticSection buf already
|
||||
// has outSecOff added, but relocateAlloc also adds outSecOff so we need to
|
||||
// subtract to avoid double counting.
|
||||
if (!relocations.empty()) {
|
||||
relocateAlloc(buf - outSecOff, buf - outSecOff + getSize());
|
||||
return;
|
||||
}
|
||||
|
||||
// If we don't have a relocation then we must calculate and write the offset
|
||||
// ourselves.
|
||||
// Get the destination offset from the addend in the branch instruction.
|
||||
// We cannot use the instruction in the patchee section as this will have
|
||||
// been altered to point to us!
|
||||
uint64_t s = getThumbDestAddr(getBranchAddr(), instr);
|
||||
uint64_t p = getVA(4);
|
||||
target->relocateOne(buf, isARM ? R_ARM_JUMP24 : R_ARM_THM_JUMP24, s - p);
|
||||
}
|
||||
|
||||
// Given a branch instruction spanning two 4KiB regions, at offset off from the
|
||||
// start of isec, return true if the destination of the branch is within the
|
||||
// first of the two 4Kib regions.
|
||||
static bool branchDestInFirstRegion(const InputSection *isec, uint64_t off,
|
||||
uint32_t instr, const Relocation *r) {
|
||||
uint64_t sourceAddr = isec->getVA(0) + off;
|
||||
assert((sourceAddr & 0xfff) == 0xffe);
|
||||
uint64_t destAddr = sourceAddr;
|
||||
// If there is a branch relocation at the same offset we must use this to
|
||||
// find the destination address as the branch could be indirected via a thunk
|
||||
// or the PLT.
|
||||
if (r) {
|
||||
uint64_t dst = (r->expr == R_PLT_PC) ? r->sym->getPltVA() : r->sym->getVA();
|
||||
// Account for Thumb PC bias, usually cancelled to 0 by addend of -4.
|
||||
destAddr = dst + r->addend + 4;
|
||||
} else {
|
||||
// If there is no relocation, we must have an intra-section branch
|
||||
// We must extract the offset from the addend manually.
|
||||
destAddr = getThumbDestAddr(sourceAddr, instr);
|
||||
}
|
||||
|
||||
return (destAddr & 0xfffff000) == (sourceAddr & 0xfffff000);
|
||||
}
|
||||
|
||||
// Return true if a branch can reach a patch section placed after isec.
|
||||
// The Bcc.w instruction has a range of 1 MiB, all others have 16 MiB.
|
||||
static bool patchInRange(const InputSection *isec, uint64_t off,
|
||||
uint32_t instr) {
|
||||
|
||||
// We need the branch at source to reach a patch section placed immediately
|
||||
// after isec. As there can be more than one patch in the patch section we
|
||||
// add 0x100 as contingency to account for worst case of 1 branch every 4KiB
|
||||
// for a 1 MiB range.
|
||||
return target->inBranchRange(
|
||||
isBcc(instr) ? R_ARM_THM_JUMP19 : R_ARM_THM_JUMP24, isec->getVA(off),
|
||||
isec->getVA() + isec->getSize() + 0x100);
|
||||
}
|
||||
|
||||
struct ScanResult {
|
||||
// Offset of branch within its InputSection.
|
||||
uint64_t off;
|
||||
// Cached decoding of the branch instruction.
|
||||
uint32_t instr;
|
||||
// Branch relocation at off. Will be nullptr if no relocation exists.
|
||||
Relocation *rel;
|
||||
};
|
||||
|
||||
// Detect the erratum sequence, returning the offset of the branch instruction
|
||||
// and a decoding of the branch. If the erratum sequence is not found then
|
||||
// return an offset of 0 for the branch. 0 is a safe value to use for no patch
|
||||
// as there must be at least one 32-bit non-branch instruction before the
|
||||
// branch so the minimum offset for a patch is 4.
|
||||
static ScanResult scanCortexA8Errata657417(InputSection *isec, uint64_t &off,
|
||||
uint64_t limit) {
|
||||
uint64_t isecAddr = isec->getVA(0);
|
||||
// Advance Off so that (isecAddr + off) modulo 0x1000 is at least 0xffa. We
|
||||
// need to check for a 32-bit instruction immediately before a 32-bit branch
|
||||
// at 0xffe modulo 0x1000.
|
||||
off = alignTo(isecAddr + off, 0x1000, 0xffa) - isecAddr;
|
||||
if (off >= limit || limit - off < 8) {
|
||||
// Need at least 2 4-byte sized instructions to trigger erratum.
|
||||
off = limit;
|
||||
return {0, 0};
|
||||
}
|
||||
|
||||
ScanResult scanRes = {0, 0, nullptr};
|
||||
const uint8_t *buf = isec->data().begin();
|
||||
// ARMv7-A Thumb 32-bit instructions are encoded 2 consecutive
|
||||
// little-endian halfwords.
|
||||
const ulittle16_t *instBuf = reinterpret_cast<const ulittle16_t *>(buf + off);
|
||||
uint16_t hw11 = *instBuf++;
|
||||
uint16_t hw12 = *instBuf++;
|
||||
uint16_t hw21 = *instBuf++;
|
||||
uint16_t hw22 = *instBuf++;
|
||||
if (is32bitInstruction(hw11) && is32bitInstruction(hw21)) {
|
||||
uint32_t instr1 = (hw11 << 16) | hw12;
|
||||
uint32_t instr2 = (hw21 << 16) | hw22;
|
||||
if (!is32bitBranch(instr1) && is32bitBranch(instr2)) {
|
||||
// Find a relocation for the branch if it exists. This will be used
|
||||
// to determine the target.
|
||||
uint64_t branchOff = off + 4;
|
||||
auto relIt = llvm::find_if(isec->relocations, [=](const Relocation &r) {
|
||||
return r.offset == branchOff &&
|
||||
(r.type == R_ARM_THM_JUMP19 || r.type == R_ARM_THM_JUMP24 ||
|
||||
r.type == R_ARM_THM_CALL);
|
||||
});
|
||||
if (relIt != isec->relocations.end())
|
||||
scanRes.rel = &(*relIt);
|
||||
if (branchDestInFirstRegion(isec, branchOff, instr2, scanRes.rel)) {
|
||||
if (patchInRange(isec, branchOff, instr2)) {
|
||||
scanRes.off = branchOff;
|
||||
scanRes.instr = instr2;
|
||||
} else {
|
||||
warn(toString(isec->file) +
|
||||
": skipping cortex-a8 657417 erratum sequence, section " +
|
||||
isec->name + " is too large to patch");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
off += 0x1000;
|
||||
return scanRes;
|
||||
}
|
||||
|
||||
void ARMErr657417Patcher::init() {
|
||||
// The Arm ABI permits a mix of ARM, Thumb and Data in the same
|
||||
// InputSection. We must only scan Thumb instructions to avoid false
|
||||
// matches. We use the mapping symbols in the InputObjects to identify this
|
||||
// data, caching the results in sectionMap so we don't have to recalculate
|
||||
// it each pass.
|
||||
|
||||
// The ABI Section 4.5.5 Mapping symbols; defines local symbols that describe
|
||||
// half open intervals [Symbol Value, Next Symbol Value) of code and data
|
||||
// within sections. If there is no next symbol then the half open interval is
|
||||
// [Symbol Value, End of section). The type, code or data, is determined by
|
||||
// the mapping symbol name, $a for Arm code, $t for Thumb code, $d for data.
|
||||
auto isArmMapSymbol = [](const Symbol *s) {
|
||||
return s->getName() == "$a" || s->getName().startswith("$a.");
|
||||
};
|
||||
auto isThumbMapSymbol = [](const Symbol *s) {
|
||||
return s->getName() == "$t" || s->getName().startswith("$t.");
|
||||
};
|
||||
auto isDataMapSymbol = [](const Symbol *s) {
|
||||
return s->getName() == "$d" || s->getName().startswith("$d.");
|
||||
};
|
||||
|
||||
// Collect mapping symbols for every executable InputSection.
|
||||
for (InputFile *file : objectFiles) {
|
||||
auto *f = cast<ObjFile<ELF32LE>>(file);
|
||||
for (Symbol *s : f->getLocalSymbols()) {
|
||||
auto *def = dyn_cast<Defined>(s);
|
||||
if (!def)
|
||||
continue;
|
||||
if (!isArmMapSymbol(def) && !isThumbMapSymbol(def) &&
|
||||
!isDataMapSymbol(def))
|
||||
continue;
|
||||
if (auto *sec = dyn_cast_or_null<InputSection>(def->section))
|
||||
if (sec->flags & SHF_EXECINSTR)
|
||||
sectionMap[sec].push_back(def);
|
||||
}
|
||||
}
|
||||
// For each InputSection make sure the mapping symbols are in sorted in
|
||||
// ascending order and are in alternating Thumb, non-Thumb order.
|
||||
for (auto &kv : sectionMap) {
|
||||
std::vector<const Defined *> &mapSyms = kv.second;
|
||||
llvm::stable_sort(mapSyms, [](const Defined *a, const Defined *b) {
|
||||
return a->value < b->value;
|
||||
});
|
||||
mapSyms.erase(std::unique(mapSyms.begin(), mapSyms.end(),
|
||||
[=](const Defined *a, const Defined *b) {
|
||||
return (isThumbMapSymbol(a) ==
|
||||
isThumbMapSymbol(b));
|
||||
}),
|
||||
mapSyms.end());
|
||||
// Always start with a Thumb Mapping Symbol
|
||||
if (!mapSyms.empty() && !isThumbMapSymbol(mapSyms.front()))
|
||||
mapSyms.erase(mapSyms.begin());
|
||||
}
|
||||
initialized = true;
|
||||
}
|
||||
|
||||
void ARMErr657417Patcher::insertPatches(
|
||||
InputSectionDescription &isd, std::vector<Patch657417Section *> &patches) {
|
||||
uint64_t spacing = 0x100000 - 0x7500;
|
||||
uint64_t isecLimit;
|
||||
uint64_t prevIsecLimit = isd.sections.front()->outSecOff;
|
||||
uint64_t patchUpperBound = prevIsecLimit + spacing;
|
||||
uint64_t outSecAddr = isd.sections.front()->getParent()->addr;
|
||||
|
||||
// Set the outSecOff of patches to the place where we want to insert them.
|
||||
// We use a similar strategy to initial thunk placement, using 1 MiB as the
|
||||
// range of the Thumb-2 conditional branch with a contingency accounting for
|
||||
// thunk generation.
|
||||
auto patchIt = patches.begin();
|
||||
auto patchEnd = patches.end();
|
||||
for (const InputSection *isec : isd.sections) {
|
||||
isecLimit = isec->outSecOff + isec->getSize();
|
||||
if (isecLimit > patchUpperBound) {
|
||||
for (; patchIt != patchEnd; ++patchIt) {
|
||||
if ((*patchIt)->getBranchAddr() - outSecAddr >= prevIsecLimit)
|
||||
break;
|
||||
(*patchIt)->outSecOff = prevIsecLimit;
|
||||
}
|
||||
patchUpperBound = prevIsecLimit + spacing;
|
||||
}
|
||||
prevIsecLimit = isecLimit;
|
||||
}
|
||||
for (; patchIt != patchEnd; ++patchIt)
|
||||
(*patchIt)->outSecOff = isecLimit;
|
||||
|
||||
// Merge all patch sections. We use the outSecOff assigned above to
|
||||
// determine the insertion point. This is ok as we only merge into an
|
||||
// InputSectionDescription once per pass, and at the end of the pass
|
||||
// assignAddresses() will recalculate all the outSecOff values.
|
||||
std::vector<InputSection *> tmp;
|
||||
tmp.reserve(isd.sections.size() + patches.size());
|
||||
auto mergeCmp = [](const InputSection *a, const InputSection *b) {
|
||||
if (a->outSecOff != b->outSecOff)
|
||||
return a->outSecOff < b->outSecOff;
|
||||
return isa<Patch657417Section>(a) && !isa<Patch657417Section>(b);
|
||||
};
|
||||
std::merge(isd.sections.begin(), isd.sections.end(), patches.begin(),
|
||||
patches.end(), std::back_inserter(tmp), mergeCmp);
|
||||
isd.sections = std::move(tmp);
|
||||
}
|
||||
|
||||
// Given a branch instruction described by ScanRes redirect it to a patch
|
||||
// section containing an unconditional branch instruction to the target.
|
||||
// Ensure that this patch section is 4-byte aligned so that the branch cannot
|
||||
// span two 4 KiB regions. Place the patch section so that it is always after
|
||||
// isec so the branch we are patching always goes forwards.
|
||||
static void implementPatch(ScanResult sr, InputSection *isec,
|
||||
std::vector<Patch657417Section *> &patches) {
|
||||
|
||||
log("detected cortex-a8-657419 erratum sequence starting at " +
|
||||
utohexstr(isec->getVA(sr.off)) + " in unpatched output.");
|
||||
Patch657417Section *psec;
|
||||
// We have two cases to deal with.
|
||||
// Case 1. There is a relocation at patcheeOffset to a symbol. The
|
||||
// unconditional branch in the patch must have a relocation so that any
|
||||
// further redirection via the PLT or a Thunk happens as normal. At
|
||||
// patcheeOffset we redirect the existing relocation to a Symbol defined at
|
||||
// the start of the patch section.
|
||||
//
|
||||
// Case 2. There is no relocation at patcheeOffset. We are unlikely to have
|
||||
// a symbol that we can use as a target for a relocation in the patch section.
|
||||
// Luckily we know that the destination cannot be indirected via the PLT or
|
||||
// a Thunk so we can just write the destination directly.
|
||||
if (sr.rel) {
|
||||
// Case 1. We have an existing relocation to redirect to patch and a
|
||||
// Symbol target.
|
||||
|
||||
// Create a branch relocation for the unconditional branch in the patch.
|
||||
// This can be redirected via the PLT or Thunks.
|
||||
RelType patchRelType = R_ARM_THM_JUMP24;
|
||||
int64_t patchRelAddend = sr.rel->addend;
|
||||
bool destIsARM = false;
|
||||
if (isBL(sr.instr) || isBLX(sr.instr)) {
|
||||
// The final target of the branch may be ARM or Thumb, if the target
|
||||
// is ARM then we write the patch in ARM state to avoid a state change
|
||||
// Thunk from the patch to the target.
|
||||
uint64_t dstSymAddr = (sr.rel->expr == R_PLT_PC) ? sr.rel->sym->getPltVA()
|
||||
: sr.rel->sym->getVA();
|
||||
destIsARM = (dstSymAddr & 1) == 0;
|
||||
}
|
||||
psec = make<Patch657417Section>(isec, sr.off, sr.instr, destIsARM);
|
||||
if (destIsARM) {
|
||||
// The patch will be in ARM state. Use an ARM relocation and account for
|
||||
// the larger ARM PC-bias of 8 rather than Thumb's 4.
|
||||
patchRelType = R_ARM_JUMP24;
|
||||
patchRelAddend -= 4;
|
||||
}
|
||||
psec->relocations.push_back(
|
||||
Relocation{sr.rel->expr, patchRelType, 0, patchRelAddend, sr.rel->sym});
|
||||
// Redirect the existing branch relocation to the patch.
|
||||
sr.rel->expr = R_PC;
|
||||
sr.rel->addend = -4;
|
||||
sr.rel->sym = psec->patchSym;
|
||||
} else {
|
||||
// Case 2. We do not have a relocation to the patch. Add a relocation of the
|
||||
// appropriate type to the patch at patcheeOffset.
|
||||
|
||||
// The destination is ARM if we have a BLX.
|
||||
psec = make<Patch657417Section>(isec, sr.off, sr.instr, isBLX(sr.instr));
|
||||
RelType type;
|
||||
if (isBcc(sr.instr))
|
||||
type = R_ARM_THM_JUMP19;
|
||||
else if (isB(sr.instr))
|
||||
type = R_ARM_THM_JUMP24;
|
||||
else
|
||||
type = R_ARM_THM_CALL;
|
||||
isec->relocations.push_back(
|
||||
Relocation{R_PC, type, sr.off, -4, psec->patchSym});
|
||||
}
|
||||
patches.push_back(psec);
|
||||
}
|
||||
|
||||
// Scan all the instructions in InputSectionDescription, for each instance of
|
||||
// the erratum sequence create a Patch657417Section. We return the list of
|
||||
// Patch657417Sections that need to be applied to the InputSectionDescription.
|
||||
std::vector<Patch657417Section *>
|
||||
ARMErr657417Patcher::patchInputSectionDescription(
|
||||
InputSectionDescription &isd) {
|
||||
std::vector<Patch657417Section *> patches;
|
||||
for (InputSection *isec : isd.sections) {
|
||||
// LLD doesn't use the erratum sequence in SyntheticSections.
|
||||
if (isa<SyntheticSection>(isec))
|
||||
continue;
|
||||
// Use sectionMap to make sure we only scan Thumb code and not Arm or inline
|
||||
// data. We have already sorted mapSyms in ascending order and removed
|
||||
// consecutive mapping symbols of the same type. Our range of executable
|
||||
// instructions to scan is therefore [thumbSym->value, nonThumbSym->value)
|
||||
// or [thumbSym->value, section size).
|
||||
std::vector<const Defined *> &mapSyms = sectionMap[isec];
|
||||
|
||||
auto thumbSym = mapSyms.begin();
|
||||
while (thumbSym != mapSyms.end()) {
|
||||
auto nonThumbSym = std::next(thumbSym);
|
||||
uint64_t off = (*thumbSym)->value;
|
||||
uint64_t limit = (nonThumbSym == mapSyms.end()) ? isec->data().size()
|
||||
: (*nonThumbSym)->value;
|
||||
|
||||
while (off < limit) {
|
||||
ScanResult sr = scanCortexA8Errata657417(isec, off, limit);
|
||||
if (sr.off)
|
||||
implementPatch(sr, isec, patches);
|
||||
}
|
||||
if (nonThumbSym == mapSyms.end())
|
||||
break;
|
||||
thumbSym = std::next(nonThumbSym);
|
||||
}
|
||||
}
|
||||
return patches;
|
||||
}
|
||||
|
||||
bool ARMErr657417Patcher::createFixes() {
|
||||
if (!initialized)
|
||||
init();
|
||||
|
||||
bool addressesChanged = false;
|
||||
for (OutputSection *os : outputSections) {
|
||||
if (!(os->flags & SHF_ALLOC) || !(os->flags & SHF_EXECINSTR))
|
||||
continue;
|
||||
for (BaseCommand *bc : os->sectionCommands)
|
||||
if (auto *isd = dyn_cast<InputSectionDescription>(bc)) {
|
||||
std::vector<Patch657417Section *> patches =
|
||||
patchInputSectionDescription(*isd);
|
||||
if (!patches.empty()) {
|
||||
insertPatches(*isd, patches);
|
||||
addressesChanged = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return addressesChanged;
|
||||
}
|
||||
|
||||
} // namespace elf
|
||||
} // namespace lld
|
51
lld/ELF/ARMErrataFix.h
Normal file
51
lld/ELF/ARMErrataFix.h
Normal file
@ -0,0 +1,51 @@
|
||||
//===- ARMErrataFix.h -------------------------------------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLD_ELF_ARMA8ERRATAFIX_H
|
||||
#define LLD_ELF_ARMA8ERRATAFIX_H
|
||||
|
||||
#include "lld/Common/LLVM.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
namespace lld {
|
||||
namespace elf {
|
||||
|
||||
class Defined;
|
||||
class InputSection;
|
||||
struct InputSectionDescription;
|
||||
class OutputSection;
|
||||
class Patch657417Section;
|
||||
|
||||
class ARMErr657417Patcher {
|
||||
public:
|
||||
// Return true if Patches have been added to the OutputSections.
|
||||
bool createFixes();
|
||||
|
||||
private:
|
||||
std::vector<Patch657417Section *>
|
||||
patchInputSectionDescription(InputSectionDescription &isd);
|
||||
|
||||
void insertPatches(InputSectionDescription &isd,
|
||||
std::vector<Patch657417Section *> &patches);
|
||||
|
||||
void init();
|
||||
|
||||
// A cache of the mapping symbols defined by the InputSection sorted in order
|
||||
// of ascending value with redundant symbols removed. These describe
|
||||
// the ranges of code and data in an executable InputSection.
|
||||
llvm::DenseMap<InputSection *, std::vector<const Defined *>> sectionMap;
|
||||
|
||||
bool initialized = false;
|
||||
};
|
||||
|
||||
} // namespace elf
|
||||
} // namespace lld
|
||||
|
||||
#endif
|
@ -22,6 +22,7 @@ add_lld_library(lldELF
|
||||
Arch/SPARCV9.cpp
|
||||
Arch/X86.cpp
|
||||
Arch/X86_64.cpp
|
||||
ARMErrataFix.cpp
|
||||
CallGraphSort.cpp
|
||||
DWARF.cpp
|
||||
Driver.cpp
|
||||
|
@ -145,6 +145,7 @@ struct Configuration {
|
||||
bool executeOnly;
|
||||
bool exportDynamic;
|
||||
bool fixCortexA53Errata843419;
|
||||
bool fixCortexA8;
|
||||
bool forceBTI;
|
||||
bool formatBinary = false;
|
||||
bool requireCET;
|
||||
|
@ -299,6 +299,9 @@ static void checkOptions() {
|
||||
if (config->fixCortexA53Errata843419 && config->emachine != EM_AARCH64)
|
||||
error("--fix-cortex-a53-843419 is only supported on AArch64 targets");
|
||||
|
||||
if (config->fixCortexA8 && config->emachine != EM_ARM)
|
||||
error("--fix-cortex-a8 is only supported on ARM targets");
|
||||
|
||||
if (config->tocOptimize && config->emachine != EM_PPC64)
|
||||
error("--toc-optimize is only supported on the PowerPC64 target");
|
||||
|
||||
@ -835,6 +838,7 @@ static void readConfigs(opt::InputArgList &args) {
|
||||
config->filterList = args::getStrings(args, OPT_filter);
|
||||
config->fini = args.getLastArgValue(OPT_fini, "_fini");
|
||||
config->fixCortexA53Errata843419 = args.hasArg(OPT_fix_cortex_a53_843419);
|
||||
config->fixCortexA8 = args.hasArg(OPT_fix_cortex_a8);
|
||||
config->forceBTI = args.hasArg(OPT_force_bti);
|
||||
config->requireCET = args.hasArg(OPT_require_cet);
|
||||
config->gcSections = args.hasFlag(OPT_gc_sections, OPT_no_gc_sections, false);
|
||||
|
@ -171,6 +171,9 @@ defm fini: Eq<"fini", "Specify a finalizer function">, MetaVarName<"<symbol>">;
|
||||
def fix_cortex_a53_843419: F<"fix-cortex-a53-843419">,
|
||||
HelpText<"Apply fixes for AArch64 Cortex-A53 erratum 843419">;
|
||||
|
||||
def fix_cortex_a8: F<"fix-cortex-a8">,
|
||||
HelpText<"Apply fixes for ARM Cortex-A8 erratum 657417">;
|
||||
|
||||
// This option is intentionally hidden from the user as the implementation
|
||||
// is not complete.
|
||||
def require_cet: F<"require-cet">;
|
||||
|
@ -8,6 +8,7 @@
|
||||
|
||||
#include "Writer.h"
|
||||
#include "AArch64ErrataFix.h"
|
||||
#include "ARMErrataFix.h"
|
||||
#include "CallGraphSort.h"
|
||||
#include "Config.h"
|
||||
#include "LinkerScript.h"
|
||||
@ -1532,6 +1533,7 @@ template <class ELFT> void Writer<ELFT>::resolveShfLinkOrder() {
|
||||
template <class ELFT> void Writer<ELFT>::finalizeAddressDependentContent() {
|
||||
ThunkCreator tc;
|
||||
AArch64Err843419Patcher a64p;
|
||||
ARMErr657417Patcher a32p;
|
||||
script->assignAddresses();
|
||||
|
||||
int assignPasses = 0;
|
||||
@ -1550,6 +1552,11 @@ template <class ELFT> void Writer<ELFT>::finalizeAddressDependentContent() {
|
||||
script->assignAddresses();
|
||||
changed |= a64p.createFixes();
|
||||
}
|
||||
if (config->fixCortexA8) {
|
||||
if (changed)
|
||||
script->assignAddresses();
|
||||
changed |= a32p.createFixes();
|
||||
}
|
||||
|
||||
if (in.mipsGot)
|
||||
in.mipsGot->updateAllocSize();
|
||||
|
33
lld/test/ELF/arm-fix-cortex-a8-blx.s
Normal file
33
lld/test/ELF/arm-fix-cortex-a8-blx.s
Normal file
@ -0,0 +1,33 @@
|
||||
// REQUIRES: arm
|
||||
// RUN: llvm-mc -filetype=obj -triple=armv7a-linux-gnueabihf --arm-add-build-attributes %s -o %t.o
|
||||
// RUN: ld.lld --fix-cortex-a8 -verbose %t.o -o %t2 2>&1 | FileCheck %s
|
||||
// RUN: llvm-objdump -d --no-show-raw-insn --start-address=0x12ffa --stop-address=0x13008 %t2 | FileCheck --check-prefix=CHECK-PATCH %s
|
||||
|
||||
/// Test that the patch can work on an unrelocated BLX. Neither clang or GCC
|
||||
/// will emit these without a relocation, but they could be produced by ELF
|
||||
/// processing tools.
|
||||
|
||||
// CHECK: ld.lld: detected cortex-a8-657419 erratum sequence starting at 12FFE in unpatched output.
|
||||
|
||||
.syntax unified
|
||||
.text
|
||||
|
||||
.type _start, %function
|
||||
.balign 4096
|
||||
.global _start
|
||||
.arm
|
||||
_start:
|
||||
bx lr
|
||||
.space 4086
|
||||
.thumb
|
||||
/// 32-bit Branch link and exchange spans 2 4KiB regions, preceded by a
|
||||
/// 32-bit non branch instruction. Expect a patch.
|
||||
nop.w
|
||||
/// Encoding for blx _start. Use .inst.n directives to avoid a relocation.
|
||||
.inst.n 0xf7ff
|
||||
.inst.n 0xe800
|
||||
|
||||
// CHECK-PATCH: 12ffa: nop.w
|
||||
// CHECK-PATCH-NEXT: 12ffe: blx #4
|
||||
// CHECK-PATCH: 00013004 __CortexA8657417_12FFE:
|
||||
// CHECK-PATCH-NEXT: 13004: b #-4104
|
123
lld/test/ELF/arm-fix-cortex-a8-nopatch.s
Normal file
123
lld/test/ELF/arm-fix-cortex-a8-nopatch.s
Normal file
@ -0,0 +1,123 @@
|
||||
// REQUIRES: arm
|
||||
// RUN: llvm-mc -filetype=obj -triple=armv7a-linux-gnueabihf --arm-add-build-attributes %s -o %t.o
|
||||
// RUN: ld.lld --fix-cortex-a8 -verbose %t.o -o %t2
|
||||
// RUN: llvm-objdump -d %t2 --start-address=0x12ffa --stop-address=0x13002 --no-show-raw-insn | FileCheck --check-prefix=CALLSITE1 %s
|
||||
// RUN: llvm-objdump -d %t2 --start-address=0x13ffa --stop-address=0x14002 --no-show-raw-insn | FileCheck --check-prefix=CALLSITE2 %s
|
||||
// RUN: llvm-objdump -d %t2 --start-address=0x14ffa --stop-address=0x15002 --no-show-raw-insn | FileCheck --check-prefix=CALLSITE3 %s
|
||||
// RUN: llvm-objdump -d %t2 --start-address=0x15ffa --stop-address=0x16006 --no-show-raw-insn | FileCheck --check-prefix=CALLSITE4 %s
|
||||
// RUN: llvm-objdump -d %t2 --start-address=0x16ffe --stop-address=0x17002 --no-show-raw-insn | FileCheck --check-prefix=CALLSITE5 %s
|
||||
// RUN: llvm-objdump -d %t2 --start-address=0x18000 --stop-address=0x18004 --no-show-raw-insn | FileCheck --check-prefix=CALLSITE6 %s
|
||||
// RUN: llvm-objdump -d %t2 --start-address=0x19002 --stop-address=0x19006 --no-show-raw-insn | FileCheck --check-prefix=CALLSITE7 %s
|
||||
|
||||
/// Test boundary conditions of the cortex-a8 erratum. The following cases
|
||||
/// should not trigger the Erratum
|
||||
.syntax unified
|
||||
.thumb
|
||||
.text
|
||||
.global _start
|
||||
.balign 4096
|
||||
.thumb_func
|
||||
_start:
|
||||
nop.w
|
||||
.space 4086
|
||||
.thumb_func
|
||||
target:
|
||||
/// 32-bit branch spans 2 4KiB regions, preceded by a 32-bit branch so no patch
|
||||
/// expected.
|
||||
b.w target
|
||||
b.w target
|
||||
|
||||
// CALLSITE1: 00012ffa target:
|
||||
// CALLSITE1-NEXT: 12ffa: b.w #-4
|
||||
// CALLSITE1-NEXT: 12ffe: b.w #-8
|
||||
|
||||
.space 4088
|
||||
.type target2, %function
|
||||
target2:
|
||||
/// 32-bit Branch and link spans 2 4KiB regions, preceded by a 16-bit
|
||||
/// instruction so no patch expected.
|
||||
nop
|
||||
nop
|
||||
bl target2
|
||||
|
||||
// CALLSITE2: 00013ffa target2:
|
||||
// CALLSITE2-NEXT: 13ffa: nop
|
||||
// CALLSITE2-NEXT: 13ffc: nop
|
||||
// CALLSITE2-NEXT: 13ffe: bl #-8
|
||||
|
||||
.space 4088
|
||||
.type target3, %function
|
||||
target3:
|
||||
/// 32-bit conditional branch spans 2 4KiB regions, preceded by a 32-bit
|
||||
/// non branch instruction, branch is backwards but outside 4KiB region. So
|
||||
/// expect no patch.
|
||||
nop.w
|
||||
beq.w target2
|
||||
|
||||
// CALLSITE3: 00014ffa target3:
|
||||
// CALLSITE3-NEXT: 14ffa: nop.w
|
||||
// CALLSITE3-NEXT: 14ffe: beq.w #-4104
|
||||
|
||||
.space 4088
|
||||
.type source4, %function
|
||||
source4:
|
||||
/// 32-bit conditional branch spans 2 4KiB regions, preceded by a 32-bit
|
||||
/// non branch instruction, branch is forwards to 2nd region so expect no patch.
|
||||
nop.w
|
||||
beq.w target4
|
||||
.thumb_func
|
||||
target4:
|
||||
nop.w
|
||||
|
||||
// CALLSITE4: 00015ffa source4:
|
||||
// CALLSITE4-NEXT: 15ffa: nop.w
|
||||
// CALLSITE4-NEXT: 15ffe: beq.w #0
|
||||
// CALLSITE4: 00016002 target4:
|
||||
// CALLSITE4-NEXT: 16002: nop.w
|
||||
|
||||
.space 4084
|
||||
.type target5, %function
|
||||
|
||||
target5:
|
||||
/// 32-bit conditional branch spans 2 4KiB regions, preceded by the encoding of
|
||||
/// a 32-bit thumb instruction, but in ARM state (illegal instruction), we
|
||||
/// should not decode and match it as Thumb, expect no patch.
|
||||
.arm
|
||||
.inst 0x800f3af /// nop.w encoding in Thumb
|
||||
.thumb
|
||||
.thumb_func
|
||||
source5:
|
||||
beq.w target5
|
||||
|
||||
// CALLSITE5: 00016ffe source5:
|
||||
// CALLSITE5-NEXT: 16ffe: beq.w #-8
|
||||
|
||||
/// Edge case where two word sequence starts at offset 0xffc, check that
|
||||
/// we don't match. In this case the branch will be completely in the 2nd
|
||||
/// region and the branch will target the second region. This will pass a
|
||||
/// branch destination in the same region test, but not the branch must have
|
||||
/// and address of the form xxxxxffe.
|
||||
.space 4090
|
||||
.type target6, %function
|
||||
nop.w
|
||||
/// Make sure target of branch is in the same 4KiB region as the branch.
|
||||
target6:
|
||||
bl target6
|
||||
|
||||
// CALLSITE6: 00018000 target6:
|
||||
// CALLSITE6-NEXT: 18000: bl #-4
|
||||
|
||||
/// Edge case where two word sequence starts at offset 0xffe, check that
|
||||
/// we don't match. In this case the branch will be completely in the 2nd
|
||||
/// region and the branch will target the second region. This will pass a
|
||||
/// branch destination in the same region test, but not the branch must have
|
||||
/// and address of the form xxxxxffe.
|
||||
.space 4090
|
||||
.type target7, %function
|
||||
nop.w
|
||||
/// Make sure target of branch is in the same 4KiB region as the branch.
|
||||
target7:
|
||||
bl target7
|
||||
|
||||
// CALLSITE7: 00019002 target7:
|
||||
// CALLSITE7: 19002: bl #-4
|
39
lld/test/ELF/arm-fix-cortex-a8-plt.s
Normal file
39
lld/test/ELF/arm-fix-cortex-a8-plt.s
Normal file
@ -0,0 +1,39 @@
|
||||
// REQUIRES: arm
|
||||
// RUN: llvm-mc -filetype=obj -triple=armv7a-linux-gnueabihf --arm-add-build-attributes %s -o %t.o
|
||||
// RUN: echo "SECTIONS { \
|
||||
// RUN: .plt 0x2000 : { *(.plt) *(.plt.*) } \
|
||||
// RUN: .text : { *(.text) } \
|
||||
// RUN: }" > %t.script
|
||||
|
||||
// RUN: ld.lld --script %t.script --fix-cortex-a8 --shared -verbose %t.o -o %t2
|
||||
// RUN: llvm-objdump -d --start-address=0x2020 --stop-address=0x202c --no-show-raw-insn %t2 | FileCheck --check-prefix=CHECK-PLT %s
|
||||
// RUN: llvm-objdump -d --start-address=0x2ffa --stop-address=0x3008 --no-show-raw-insn %t2 | FileCheck %s
|
||||
|
||||
/// If we patch a branch instruction that is indirected via the PLT then we
|
||||
/// must make sure the patch goes via the PLT
|
||||
|
||||
// CHECK-PLT: 2020: add r12, pc, #0, #12
|
||||
// CHECK-PLT-NEXT: 2024: add r12, r12, #4096
|
||||
// CHECK-PLT-NEXT: 2028: ldr pc, [r12, #68]!
|
||||
|
||||
.syntax unified
|
||||
.thumb
|
||||
|
||||
.global external
|
||||
.type external, %function
|
||||
|
||||
.text
|
||||
.balign 2048
|
||||
|
||||
.space 2042
|
||||
.global source
|
||||
.thumb_func
|
||||
source:
|
||||
nop.w
|
||||
bl external
|
||||
|
||||
// CHECK: 00002ffa source:
|
||||
// CHECK-NEXT: 2ffa: nop.w
|
||||
// CHECK-NEXT: 2ffe: blx #4
|
||||
// CHECK: 00003004 __CortexA8657417_2FFE:
|
||||
// CHECK-NEXT: 3004: b #-4076
|
201
lld/test/ELF/arm-fix-cortex-a8-recognize.s
Normal file
201
lld/test/ELF/arm-fix-cortex-a8-recognize.s
Normal file
@ -0,0 +1,201 @@
|
||||
// REQUIRES: arm
|
||||
// RUN: llvm-mc -filetype=obj -triple=armv7a-linux-gnueabihf --arm-add-build-attributes %s -o %t.o
|
||||
// RUN: ld.lld --fix-cortex-a8 -verbose %t.o -o %t2 2>&1 | FileCheck %s
|
||||
// RUN: llvm-objdump -d %t2 --start-address=0x1a004 --stop-address=0x1a024 --no-show-raw-insn | FileCheck --check-prefix=CHECK-PATCHES %s
|
||||
// RUN: llvm-objdump -d %t2 --start-address=0x12ffa --stop-address=0x13002 --no-show-raw-insn | FileCheck --check-prefix=CALLSITE1 %s
|
||||
// RUN: llvm-objdump -d %t2 --start-address=0x13ffa --stop-address=0x14002 --no-show-raw-insn | FileCheck --check-prefix=CALLSITE2 %s
|
||||
// RUN: llvm-objdump -d %t2 --start-address=0x14ffa --stop-address=0x15002 --no-show-raw-insn | FileCheck --check-prefix=CALLSITE3 %s
|
||||
// RUN: llvm-objdump -d %t2 --start-address=0x15ff4 --stop-address=0x16002 --no-show-raw-insn | FileCheck --check-prefix=CALLSITE4 %s
|
||||
// RUN: llvm-objdump -d %t2 --start-address=0x16ffa --stop-address=0x17002 --no-show-raw-insn | FileCheck --check-prefix=CALLSITE5 %s
|
||||
// RUN: llvm-objdump -d %t2 --start-address=0x17ffa --stop-address=0x18002 --no-show-raw-insn | FileCheck --check-prefix=CALLSITE6 %s
|
||||
// RUN: llvm-objdump -d %t2 --start-address=0x18ffa --stop-address=0x19002 --no-show-raw-insn | FileCheck --check-prefix=CALLSITE7 %s
|
||||
// RUN: llvm-objdump -d %t2 --start-address=0x19ff4 --stop-address=0x1a002 --no-show-raw-insn | FileCheck --check-prefix=CALLSITE8 %s
|
||||
|
||||
// CHECK: ld.lld: detected cortex-a8-657419 erratum sequence starting at 12FFE in unpatched output.
|
||||
// CHECK-NEXT: ld.lld: detected cortex-a8-657419 erratum sequence starting at 13FFE in unpatched output.
|
||||
// CHECK-NEXT: ld.lld: detected cortex-a8-657419 erratum sequence starting at 14FFE in unpatched output.
|
||||
// CHECK-NEXT: ld.lld: detected cortex-a8-657419 erratum sequence starting at 15FFE in unpatched output.
|
||||
// CHECK-NEXT: ld.lld: detected cortex-a8-657419 erratum sequence starting at 16FFE in unpatched output.
|
||||
// CHECK-NEXT: ld.lld: detected cortex-a8-657419 erratum sequence starting at 17FFE in unpatched output.
|
||||
// CHECK-NEXT: ld.lld: detected cortex-a8-657419 erratum sequence starting at 18FFE in unpatched output.
|
||||
|
||||
/// Basic tests for the -fix-cortex-a8 erratum fix. The full details of the
|
||||
/// erratum and the patch are in ARMA8ErrataFix.cpp . The test creates an
|
||||
/// instance of the erratum every 4KiB (32-bit non-branch, followed by 32-bit
|
||||
/// branch instruction, where the branch instruction spans two 4 KiB regions,
|
||||
/// and the branch destination is in the first 4KiB region.
|
||||
///
|
||||
/// Test each 32-bit branch b.w, bcc.w, bl, blx. For b.w, bcc.w, and bl we
|
||||
/// check the relocated and non-relocated forms. The blx instruction
|
||||
/// always has a relocation in assembler.
|
||||
.syntax unified
|
||||
.thumb
|
||||
.text
|
||||
.global _start
|
||||
.type _start, %function
|
||||
.balign 4096
|
||||
.thumb_func
|
||||
_start:
|
||||
nop.w
|
||||
.space 4086
|
||||
.thumb_func
|
||||
.global target
|
||||
.type target, %function
|
||||
target:
|
||||
/// 32-bit Branch spans 2 4KiB regions, preceded by a 32-bit non branch
|
||||
/// instruction, expect a patch.
|
||||
nop.w
|
||||
b.w target
|
||||
|
||||
// CALLSITE1: 00012ffa target:
|
||||
// CALLSITE1-NEXT: 12ffa: nop.w
|
||||
// CALLSITE1-NEXT: 12ffe: b.w #28674
|
||||
|
||||
.space 4088
|
||||
.type target2, %function
|
||||
.local target2
|
||||
target2:
|
||||
/// 32-bit Branch and link spans 2 4KiB regions, preceded by a 32-bit
|
||||
/// non branch instruction, expect a patch.
|
||||
nop.w
|
||||
bl target2
|
||||
|
||||
// CALLSITE2: 00013ffa target2:
|
||||
// CALLSITE2-NEXT: 13ffa: nop.w
|
||||
// CALLSITE2-NEXT: 13ffe: bl #24582
|
||||
|
||||
.space 4088
|
||||
.type target3, %function
|
||||
.local target3
|
||||
target3:
|
||||
/// 32-bit conditional branch spans 2 4KiB regions, preceded by a 32-bit
|
||||
/// non branch instruction, expect a patch.
|
||||
nop.w
|
||||
beq.w target3
|
||||
|
||||
// CALLSITE3: 00014ffa target3:
|
||||
// CALLSITE3-NEXT: 14ffa: nop.w
|
||||
// CALLSITE3-NEXT: 14ffe: beq.w #20490
|
||||
|
||||
.space 4082
|
||||
.type target4, %function
|
||||
.local target4
|
||||
.arm
|
||||
target4:
|
||||
bx lr
|
||||
.space 2
|
||||
.thumb
|
||||
/// 32-bit Branch link and exchange spans 2 4KiB regions, preceded by a
|
||||
/// 32-bit non branch instruction, blx always goes via relocation. Expect
|
||||
/// a patch.
|
||||
nop.w
|
||||
blx target4
|
||||
|
||||
/// Target = 0x19010 __CortexA8657417_15FFE
|
||||
// CALLSITE4: 00015ff4 target4:
|
||||
// CALLSITE4-NEXT: 15ff4: bx lr
|
||||
// CALLSITE4: 15ff8: 00 00 .short 0x0000
|
||||
// CALLSITE4: 15ffa: nop.w
|
||||
// CALLSITE4-NEXT: 15ffe: blx #16400
|
||||
|
||||
/// Separate sections for source and destination of branches to force
|
||||
/// a relocation.
|
||||
.section .text.0, "ax", %progbits
|
||||
.balign 2
|
||||
.global target5
|
||||
.type target5, %function
|
||||
target5:
|
||||
nop.w
|
||||
.section .text.1, "ax", %progbits
|
||||
.space 4084
|
||||
/// 32-bit branch spans 2 4KiB regions, preceded by a 32-bit non branch
|
||||
/// instruction, expect a patch. Branch to global symbol so goes via a
|
||||
/// relocation.
|
||||
nop.w
|
||||
b.w target5
|
||||
|
||||
/// Target = 0x19014 __CortexA8657417_16FFE
|
||||
// CALLSITE5: 16ffa: nop.w
|
||||
// CALLSITE5-NEXT: 16ffe: b.w #12306
|
||||
|
||||
.section .text.2, "ax", %progbits
|
||||
.balign 2
|
||||
.global target6
|
||||
.type target6, %function
|
||||
target6:
|
||||
nop.w
|
||||
.section .text.3, "ax", %progbits
|
||||
.space 4084
|
||||
/// 32-bit branch and link spans 2 4KiB regions, preceded by a 32-bit
|
||||
/// non branch instruction, expect a patch. Branch to global symbol so
|
||||
/// goes via a relocation.
|
||||
nop.w
|
||||
bl target6
|
||||
|
||||
/// Target = 0x19018 __CortexA8657417_17FFE
|
||||
// CALLSITE6: 17ffa: nop.w
|
||||
// CALLSITE6-NEXT: 17ffe: bl #8214
|
||||
|
||||
.section .text.4, "ax", %progbits
|
||||
.global target7
|
||||
.type target7, %function
|
||||
target7:
|
||||
nop.w
|
||||
.section .text.5, "ax", %progbits
|
||||
.space 4084
|
||||
/// 32-bit conditional branch spans 2 4KiB regions, preceded by a 32-bit
|
||||
/// non branch instruction, expect a patch. Branch to global symbol so
|
||||
/// goes via a relocation.
|
||||
nop.w
|
||||
bne.w target7
|
||||
|
||||
// CALLSITE7: 18ffa: nop.w
|
||||
// CALLSITE7-NEXT: 18ffe: bne.w #4122
|
||||
|
||||
.section .text.6, "ax", %progbits
|
||||
.space 4082
|
||||
.arm
|
||||
.global target8
|
||||
.type target8, %function
|
||||
target8:
|
||||
bx lr
|
||||
|
||||
.section .text.7, "ax", %progbits
|
||||
.space 2
|
||||
.thumb
|
||||
/// 32-bit Branch link spans 2 4KiB regions, preceded by a 32-bit non branch
|
||||
/// instruction, expect a patch. The target of the BL is in ARM state so we
|
||||
/// expect it to be turned into a BLX. The patch must be in ARM state to
|
||||
/// avoid a state change thunk.
|
||||
nop.w
|
||||
bl target8
|
||||
|
||||
// CALLSITE8: 00019ff4 target8:
|
||||
// CALLSITE8-NEXT: 19ff4: bx lr
|
||||
// CALLSITE8: 19ff8: 00 00 .short 0x0000
|
||||
// CALLSITE8: 19ffa: nop.w
|
||||
// CALLSITE8-NEXT: 19ffe: blx #32
|
||||
|
||||
// CHECK-PATCHES: 0001a004 __CortexA8657417_12FFE:
|
||||
// CHECK-PATCHES-NEXT: 1a004: b.w #-28686
|
||||
|
||||
// CHECK-PATCHES: 0001a008 __CortexA8657417_13FFE:
|
||||
// CHECK-PATCHES-NEXT: 1a008: b.w #-24594
|
||||
|
||||
// CHECK-PATCHES: 0001a00c __CortexA8657417_14FFE:
|
||||
// CHECK-PATCHES-NEXT: 1a00c: b.w #-20502
|
||||
|
||||
// CHECK-PATCHES: 0001a010 __CortexA8657417_15FFE:
|
||||
// CHECK-PATCHES-NEXT: 1a010: b #-16420
|
||||
|
||||
// CHECK-PATCHES: 0001a014 __CortexA8657417_16FFE:
|
||||
// CHECK-PATCHES-NEXT: 1a014: b.w #-16406
|
||||
|
||||
// CHECK-PATCHES: 0001a018 __CortexA8657417_17FFE:
|
||||
// CHECK-PATCHES-NEXT: 1a018: b.w #-12314
|
||||
|
||||
// CHECK-PATCHES: 0001a01c __CortexA8657417_18FFE:
|
||||
// CHECK-PATCHES-NEXT: 1a01c: b.w #-8222
|
||||
|
||||
// CHECK-PATCHES: 0001a020 __CortexA8657417_19FFE:
|
||||
// CHECK-PATCHES-NEXT: 1a020: b #-52
|
69
lld/test/ELF/arm-fix-cortex-a8-thunk.s
Normal file
69
lld/test/ELF/arm-fix-cortex-a8-thunk.s
Normal file
@ -0,0 +1,69 @@
|
||||
// REQUIRES: arm
|
||||
// RUN: llvm-mc -filetype=obj -triple=armv7a-linux-gnueabihf --arm-add-build-attributes %s -o %t.o
|
||||
// RUN: echo "SECTIONS { \
|
||||
// RUN: .text0 0x011006 : { *(.text.00) } \
|
||||
// RUN: .text1 0x110000 : { *(.text.01) *(.text.02) *(.text.03) \
|
||||
// RUN: *(.text.04) } \
|
||||
// RUN: .text2 0x210000 : { *(.text.05) } } " > %t.script
|
||||
// RUN: ld.lld --script %t.script --fix-cortex-a8 --shared -verbose %t.o -o %t2 2>&1
|
||||
// RUN: llvm-objdump -d --no-show-raw-insn --start-address=0x110000 --stop-address=0x110010 %t2 | FileCheck --check-prefix=THUNK %s
|
||||
// RUN: llvm-objdump -d --no-show-raw-insn --start-address=0x110ffa --stop-address=0x111008 %t2 | FileCheck --check-prefix=PATCH %s
|
||||
// RUN: llvm-objdump -d --no-show-raw-insn --start-address=0x111008 --stop-address=0x111010 %t2 | FileCheck --check-prefix=THUNK2 %s
|
||||
|
||||
/// Test cases for Cortex-a8 Erratum 657417 that involve interactions with
|
||||
/// range extension thunks. Both erratum fixes and range extension thunks need
|
||||
/// precise information and after creation alter address information.
|
||||
.thumb
|
||||
|
||||
.section .text.00, "ax", %progbits
|
||||
.thumb_func
|
||||
early:
|
||||
bx lr
|
||||
|
||||
.section .text.01, "ax", %progbits
|
||||
.balign 4096
|
||||
.globl _start
|
||||
.type _start, %function
|
||||
_start:
|
||||
beq.w far_away
|
||||
/// Thunk to far_away and state change needed, size 12-bytes goes here.
|
||||
// THUNK: 00110000 _start:
|
||||
// THUNK-NEXT: 110000: beq.w #0 <__ThumbV7PILongThunk_far_away+0x4>
|
||||
// THUNK: 00110004 __ThumbV7PILongThunk_far_away:
|
||||
// THUNK-NEXT: 110004: movw r12, #65524
|
||||
// THUNK-NEXT: 110008: movt r12, #15
|
||||
// THUNK-NEXT: 11000c: add r12, pc
|
||||
// THUNK-NEXT: 11000e: bx r12
|
||||
|
||||
.section .text.02, "ax", %progbits
|
||||
.space 4096 - 22
|
||||
|
||||
.section .text.03, "ax", %progbits
|
||||
.thumb_func
|
||||
target:
|
||||
/// After thunk is added this branch will line up across 2 4 KiB regions
|
||||
/// and will trigger a patch.
|
||||
nop.w
|
||||
bl target
|
||||
|
||||
/// Expect erratum patch inserted here
|
||||
// PATCH: 00110ffa target:
|
||||
// PATCH-NEXT: 110ffa: nop.w
|
||||
// PATCH-NEXT: 110ffe: bl #2
|
||||
// PATCH: 00111004 __CortexA8657417_110FFE:
|
||||
// PATCH-NEXT: 111004: b.w #-14
|
||||
|
||||
// THUNK2: 00111008 __ThumbV7PILongThunk_early:
|
||||
// THUNK2-NEXT: 111008: b.w #-1048582
|
||||
.section .text.04, "ax", %progbits
|
||||
/// The erratum patch will push this branch out of range, so another
|
||||
/// range extension thunk will be needed.
|
||||
beq.w early
|
||||
// THUNK2-NEXT 11100c: beq.w #-8
|
||||
/// Expect range extension thunk here.
|
||||
.section .text.05, "ax", %progbits
|
||||
.arm
|
||||
nop
|
||||
.type far_away, %function
|
||||
far_away:
|
||||
bx lr
|
45
lld/test/ELF/arm-fix-cortex-a8-toolarge.s
Normal file
45
lld/test/ELF/arm-fix-cortex-a8-toolarge.s
Normal file
@ -0,0 +1,45 @@
|
||||
// REQUIRES: arm
|
||||
// RUN: llvm-mc -filetype=obj -triple=armv7a-linux-gnueabihf --arm-add-build-attributes %s -o %t.o
|
||||
// RUN: ld.lld --fix-cortex-a8 -verbose %t.o -o /dev/null 2>&1 | FileCheck %s
|
||||
/// Test that we warn, but don't attempt to patch when it is impossible to
|
||||
/// redirect the branch as the Section is too large.
|
||||
|
||||
// CHECK: skipping cortex-a8 657417 erratum sequence, section .text is too large to patch
|
||||
// CHECK: skipping cortex-a8 657417 erratum sequence, section .text.02 is too large to patch
|
||||
|
||||
.syntax unified
|
||||
.thumb
|
||||
/// Case 1: 1 MiB conditional branch range without relocation.
|
||||
.text
|
||||
.global _start
|
||||
.type _start, %function
|
||||
.balign 4096
|
||||
.thumb_func
|
||||
_start:
|
||||
nop.w
|
||||
.space 4086
|
||||
.thumb_func
|
||||
.global target
|
||||
.type target, %function
|
||||
target:
|
||||
/// 32-bit Branch spans 2 4KiB regions, preceded by a 32-bit non branch
|
||||
/// instruction, a patch will be attempted. Unfortunately the branch
|
||||
/// cannot reach outside the section so we have to abort the patch.
|
||||
nop.w
|
||||
beq.w target
|
||||
.space 1024 * 1024
|
||||
|
||||
/// Case 2: 16 MiB
|
||||
.section .text.01, "ax", %progbits
|
||||
.balign 4096
|
||||
.space 4090
|
||||
.global target2
|
||||
.thumb_func
|
||||
target2:
|
||||
.section .text.02, "ax", %progbits
|
||||
/// 32-bit Branch and link spans 2 4KiB regions, preceded by a 32-bit
|
||||
/// non branch instruction, a patch will be be attempted. Unfortunately the
|
||||
/// the BL cannot reach outside the section so we have to abort the patch.
|
||||
nop.w
|
||||
bl target2
|
||||
.space 16 * 1024 * 1024
|
Loading…
Reference in New Issue
Block a user