LLD Support for Basic Block Sections

This is part of the Propeller framework to do post link code layout
optimizations. Please see the RFC here:
https://groups.google.com/forum/#!msg/llvm-dev/ef3mKzAdJ7U/1shV64BYBAAJ and the
detailed RFC doc here:
https://github.com/google/llvm-propeller/blob/plo-dev/Propeller_RFC.pdf

This patch adds lld support for basic block sections and performs relaxations
after the basic blocks have been reordered.

After the linker has reordered the basic block sections according to the
desired sequence, it runs a relaxation pass to optimize jump instructions.
Currently, the compiler emits the long form of all jump instructions. AMD64 ISA
supports variants of jump instructions with one byte offset or a four byte
offset. The compiler generates jump instructions with R_X86_64 32-bit PC
relative relocations. We would like to use a new relocation type for these jump
instructions as it makes it easy and accurate while relaxing these instructions.

The relaxation pass does two things:

First, it deletes all explicit fall-through direct jump instructions between
adjacent basic blocks. This is done by discarding the tail of the basic block
section.

Second, If there are consecutive jump instructions, it checks if the first
conditional jump can be inverted to convert the second into a fall through and
delete the second.

The jump instructions are relaxed by using jump instruction mods, something
like relocations. These are used to modify the opcode of the jump instruction.
Jump instruction mods contain three values, instruction offset, jump type and
size. While writing this jump instruction out to the final binary, the linker
uses the jump instruction mod to determine the opcode and the size of the
modified jump instruction. These mods are required because the input object
files are memory-mapped without write permissions and directly modifying the
object files requires copying these sections. Copying a large number of basic
block sections significantly bloats memory.

Differential Revision: https://reviews.llvm.org/D68065
This commit is contained in:
Sriraman Tallam 2020-04-07 06:48:18 -07:00
parent c97be2c377
commit 94317878d8
14 changed files with 780 additions and 4 deletions

View File

@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "InputFiles.h"
#include "OutputSections.h"
#include "Symbols.h"
#include "SyntheticSections.h"
#include "Target.h"
@ -37,6 +38,8 @@ public:
uint64_t pltEntryAddr) const override;
void relocate(uint8_t *loc, const Relocation &rel,
uint64_t val) const override;
void applyJumpInstrMod(uint8_t *loc, JumpModType type,
unsigned size) const override;
RelExpr adjustRelaxExpr(RelType type, const uint8_t *data,
RelExpr expr) const override;
@ -52,9 +55,25 @@ public:
uint64_t val) const override;
bool adjustPrologueForCrossSplitStack(uint8_t *loc, uint8_t *end,
uint8_t stOther) const override;
bool deleteFallThruJmpInsn(InputSection &is, InputFile *file,
InputSection *nextIS) const override;
};
} // namespace
// This is vector of NOP instructions of sizes from 1 to 8 bytes. The
// appropriately sized instructions are used to fill the gaps between sections
// which are executed during fall through.
static const std::vector<std::vector<uint8_t>> nopInstructions = {
{0x90},
{0x66, 0x90},
{0x0f, 0x1f, 0x00},
{0x0f, 0x1f, 0x40, 0x00},
{0x0f, 0x1f, 0x44, 0x00, 0x00},
{0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00},
{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}};
X86_64::X86_64() {
copyRel = R_X86_64_COPY;
gotRel = R_X86_64_GLOB_DAT;
@ -71,6 +90,7 @@ X86_64::X86_64() {
pltEntrySize = 16;
ipltEntrySize = 16;
trapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3
nopInstrs = nopInstructions;
// Align to the large page size (known as a superpage or huge page).
// FreeBSD automatically promotes large, superpage-aligned allocations.
@ -79,6 +99,216 @@ X86_64::X86_64() {
int X86_64::getTlsGdRelaxSkip(RelType type) const { return 2; }
// Opcodes for the different X86_64 jmp instructions.
enum JmpInsnOpcode : uint32_t {
J_JMP_32,
J_JNE_32,
J_JE_32,
J_JG_32,
J_JGE_32,
J_JB_32,
J_JBE_32,
J_JL_32,
J_JLE_32,
J_JA_32,
J_JAE_32,
J_UNKNOWN,
};
// Given the first (optional) and second byte of the insn's opcode, this
// returns the corresponding enum value.
static JmpInsnOpcode getJmpInsnType(const uint8_t *first,
const uint8_t *second) {
if (*second == 0xe9)
return J_JMP_32;
if (first == nullptr)
return J_UNKNOWN;
if (*first == 0x0f) {
switch (*second) {
case 0x84:
return J_JE_32;
case 0x85:
return J_JNE_32;
case 0x8f:
return J_JG_32;
case 0x8d:
return J_JGE_32;
case 0x82:
return J_JB_32;
case 0x86:
return J_JBE_32;
case 0x8c:
return J_JL_32;
case 0x8e:
return J_JLE_32;
case 0x87:
return J_JA_32;
case 0x83:
return J_JAE_32;
}
}
return J_UNKNOWN;
}
// Return the relocation index for input section IS with a specific Offset.
// Returns the maximum size of the vector if no such relocation is found.
static unsigned getRelocationWithOffset(const InputSection &is,
uint64_t offset) {
unsigned size = is.relocations.size();
for (unsigned i = size - 1; i + 1 > 0; --i) {
if (is.relocations[i].offset == offset && is.relocations[i].expr != R_NONE)
return i;
}
return size;
}
// Returns true if R corresponds to a relocation used for a jump instruction.
// TODO: Once special relocations for relaxable jump instructions are available,
// this should be modified to use those relocations.
static bool isRelocationForJmpInsn(Relocation &R) {
return R.type == R_X86_64_PLT32 || R.type == R_X86_64_PC32 ||
R.type == R_X86_64_PC8;
}
// Return true if Relocation R points to the first instruction in the
// next section.
// TODO: Delete this once psABI reserves a new relocation type for fall thru
// jumps.
static bool isFallThruRelocation(InputSection &is, InputFile *file,
InputSection *nextIS, Relocation &r) {
if (!isRelocationForJmpInsn(r))
return false;
uint64_t addrLoc = is.getOutputSection()->addr + is.outSecOff + r.offset;
uint64_t targetOffset = InputSectionBase::getRelocTargetVA(
file, r.type, r.addend, addrLoc, *r.sym, r.expr);
// If this jmp is a fall thru, the target offset is the beginning of the
// next section.
uint64_t nextSectionOffset =
nextIS->getOutputSection()->addr + nextIS->outSecOff;
return (addrLoc + 4 + targetOffset) == nextSectionOffset;
}
// Return the jmp instruction opcode that is the inverse of the given
// opcode. For example, JE inverted is JNE.
static JmpInsnOpcode invertJmpOpcode(const JmpInsnOpcode opcode) {
switch (opcode) {
case J_JE_32:
return J_JNE_32;
case J_JNE_32:
return J_JE_32;
case J_JG_32:
return J_JLE_32;
case J_JGE_32:
return J_JL_32;
case J_JB_32:
return J_JAE_32;
case J_JBE_32:
return J_JA_32;
case J_JL_32:
return J_JGE_32;
case J_JLE_32:
return J_JG_32;
case J_JA_32:
return J_JBE_32;
case J_JAE_32:
return J_JB_32;
default:
return J_UNKNOWN;
}
}
// Deletes direct jump instruction in input sections that jumps to the
// following section as it is not required. If there are two consecutive jump
// instructions, it checks if they can be flipped and one can be deleted.
// For example:
// .section .text
// a.BB.foo:
// ...
// 10: jne aa.BB.foo
// 16: jmp bar
// aa.BB.foo:
// ...
//
// can be converted to:
// a.BB.foo:
// ...
// 10: je bar #jne flipped to je and the jmp is deleted.
// aa.BB.foo:
// ...
bool X86_64::deleteFallThruJmpInsn(InputSection &is, InputFile *file,
InputSection *nextIS) const {
const unsigned sizeOfDirectJmpInsn = 5;
if (nextIS == nullptr)
return false;
if (is.getSize() < sizeOfDirectJmpInsn)
return false;
// If this jmp insn can be removed, it is the last insn and the
// relocation is 4 bytes before the end.
unsigned rIndex = getRelocationWithOffset(is, is.getSize() - 4);
if (rIndex == is.relocations.size())
return false;
Relocation &r = is.relocations[rIndex];
// Check if the relocation corresponds to a direct jmp.
const uint8_t *secContents = is.data().data();
// If it is not a direct jmp instruction, there is nothing to do here.
if (*(secContents + r.offset - 1) != 0xe9)
return false;
if (isFallThruRelocation(is, file, nextIS, r)) {
// This is a fall thru and can be deleted.
r.expr = R_NONE;
r.offset = 0;
is.drop_back(sizeOfDirectJmpInsn);
is.nopFiller = true;
return true;
}
// Now, check if flip and delete is possible.
const unsigned sizeOfJmpCCInsn = 6;
// To flip, there must be atleast one JmpCC and one direct jmp.
if (is.getSize() < sizeOfDirectJmpInsn + sizeOfJmpCCInsn)
return 0;
unsigned rbIndex =
getRelocationWithOffset(is, (is.getSize() - sizeOfDirectJmpInsn - 4));
if (rbIndex == is.relocations.size())
return 0;
Relocation &rB = is.relocations[rbIndex];
const uint8_t *jmpInsnB = secContents + rB.offset - 1;
JmpInsnOpcode jmpOpcodeB = getJmpInsnType(jmpInsnB - 1, jmpInsnB);
if (jmpOpcodeB == J_UNKNOWN)
return false;
if (!isFallThruRelocation(is, file, nextIS, rB))
return false;
// jmpCC jumps to the fall thru block, the branch can be flipped and the
// jmp can be deleted.
JmpInsnOpcode jInvert = invertJmpOpcode(jmpOpcodeB);
if (jInvert == J_UNKNOWN)
return false;
is.jumpInstrMods.push_back({jInvert, (rB.offset - 1), 4});
// Move R's values to rB except the offset.
rB = {r.expr, r.type, rB.offset, r.addend, r.sym};
// Cancel R
r.expr = R_NONE;
r.offset = 0;
is.drop_back(sizeOfDirectJmpInsn);
is.nopFiller = true;
return true;
}
RelExpr X86_64::getRelExpr(RelType type, const Symbol &s,
const uint8_t *loc) const {
if (type == R_X86_64_GOTTPOFF)
@ -357,6 +587,94 @@ void X86_64::relaxTlsLdToLe(uint8_t *loc, const Relocation &rel,
"expected R_X86_64_PLT32 or R_X86_64_GOTPCRELX after R_X86_64_TLSLD");
}
// A JumpInstrMod at a specific offset indicates that the jump instruction
// opcode at that offset must be modified. This is specifically used to relax
// jump instructions with basic block sections. This function looks at the
// JumpMod and effects the change.
void X86_64::applyJumpInstrMod(uint8_t *loc, JumpModType type,
unsigned size) const {
switch (type) {
case J_JMP_32:
if (size == 4)
*loc = 0xe9;
else
*loc = 0xeb;
break;
case J_JE_32:
if (size == 4) {
loc[-1] = 0x0f;
*loc = 0x84;
} else
*loc = 0x74;
break;
case J_JNE_32:
if (size == 4) {
loc[-1] = 0x0f;
*loc = 0x85;
} else
*loc = 0x75;
break;
case J_JG_32:
if (size == 4) {
loc[-1] = 0x0f;
*loc = 0x8f;
} else
*loc = 0x7f;
break;
case J_JGE_32:
if (size == 4) {
loc[-1] = 0x0f;
*loc = 0x8d;
} else
*loc = 0x7d;
break;
case J_JB_32:
if (size == 4) {
loc[-1] = 0x0f;
*loc = 0x82;
} else
*loc = 0x72;
break;
case J_JBE_32:
if (size == 4) {
loc[-1] = 0x0f;
*loc = 0x86;
} else
*loc = 0x76;
break;
case J_JL_32:
if (size == 4) {
loc[-1] = 0x0f;
*loc = 0x8c;
} else
*loc = 0x7c;
break;
case J_JLE_32:
if (size == 4) {
loc[-1] = 0x0f;
*loc = 0x8e;
} else
*loc = 0x7e;
break;
case J_JA_32:
if (size == 4) {
loc[-1] = 0x0f;
*loc = 0x87;
} else
*loc = 0x77;
break;
case J_JAE_32:
if (size == 4) {
loc[-1] = 0x0f;
*loc = 0x83;
} else
*loc = 0x73;
break;
case J_UNKNOWN:
llvm_unreachable("Unknown Jump Relocation");
}
}
void X86_64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
switch (rel.type) {
case R_X86_64_8:

View File

@ -114,6 +114,7 @@ struct Configuration {
llvm::StringRef sysroot;
llvm::StringRef thinLTOCacheDir;
llvm::StringRef thinLTOIndexOnlyArg;
llvm::StringRef ltoBasicBlockSections;
std::pair<llvm::StringRef, llvm::StringRef> thinLTOObjectSuffixReplace;
std::pair<llvm::StringRef, llvm::StringRef> thinLTOPrefixReplace;
std::string rpath;
@ -165,6 +166,7 @@ struct Configuration {
bool ltoCSProfileGenerate;
bool ltoDebugPassManager;
bool ltoNewPassManager;
bool ltoUniqueBBSectionNames;
bool ltoWholeProgramVisibility;
bool mergeArmExidx;
bool mipsN32Abi = false;
@ -175,6 +177,7 @@ struct Configuration {
bool nostdlib;
bool oFormatBinary;
bool omagic;
bool optimizeBBJumps;
bool optRemarksWithHotness;
bool picThunk;
bool pie;

View File

@ -878,6 +878,8 @@ static void readConfigs(opt::InputArgList &args) {
config->cref = args.hasFlag(OPT_cref, OPT_no_cref, false);
config->defineCommon = args.hasFlag(OPT_define_common, OPT_no_define_common,
!args.hasArg(OPT_relocatable));
config->optimizeBBJumps =
args.hasFlag(OPT_optimize_bb_jumps, OPT_no_optimize_bb_jumps, false);
config->demangle = args.hasFlag(OPT_demangle, OPT_no_demangle, true);
config->dependentLibraries = args.hasFlag(OPT_dependent_libraries, OPT_no_dependent_libraries, true);
config->disableVerify = args.hasArg(OPT_disable_verify);
@ -924,6 +926,11 @@ static void readConfigs(opt::InputArgList &args) {
config->ltoObjPath = args.getLastArgValue(OPT_lto_obj_path_eq);
config->ltoPartitions = args::getInteger(args, OPT_lto_partitions, 1);
config->ltoSampleProfile = args.getLastArgValue(OPT_lto_sample_profile);
config->ltoBasicBlockSections =
args.getLastArgValue(OPT_lto_basicblock_sections);
config->ltoUniqueBBSectionNames =
args.hasFlag(OPT_lto_unique_bb_section_names,
OPT_no_lto_unique_bb_section_names, false);
config->mapFile = args.getLastArgValue(OPT_Map);
config->mipsGotSize = args::getInteger(args, OPT_mips_got_size, 0xfff0);
config->mergeArmExidx =

View File

@ -138,7 +138,7 @@ size_t InputSectionBase::getSize() const {
return s->getSize();
if (uncompressedSize >= 0)
return uncompressedSize;
return rawData.size();
return rawData.size() - bytesDropped;
}
void InputSectionBase::uncompress() const {
@ -659,8 +659,9 @@ static int64_t getTlsTpOffset(const Symbol &s) {
}
}
static uint64_t getRelocTargetVA(const InputFile *file, RelType type, int64_t a,
uint64_t p, const Symbol &sym, RelExpr expr) {
uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type,
int64_t a, uint64_t p,
const Symbol &sym, RelExpr expr) {
switch (expr) {
case R_ABS:
case R_DTPREL:
@ -871,6 +872,12 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef<RelTy> rels) {
if (expr == R_NONE)
continue;
if (expr == R_SIZE) {
target->relocateNoSym(bufLoc, type,
SignExtend64<bits>(sym.getSize() + addend));
continue;
}
if (expr != R_ABS && expr != R_DTPREL && expr != R_RISCV_ADD) {
std::string msg = getLocation<ELFT>(offset) +
": has non-ABS relocation " + toString(type) +
@ -942,6 +949,8 @@ void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) {
const unsigned bits = config->wordsize * 8;
for (const Relocation &rel : relocations) {
if (rel.expr == R_NONE)
continue;
uint64_t offset = rel.offset;
if (auto *sec = dyn_cast<InputSection>(this))
offset += sec->outSecOff;
@ -1011,6 +1020,18 @@ void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) {
break;
}
}
// Apply jumpInstrMods. jumpInstrMods are created when the opcode of
// a jmp insn must be modified to shrink the jmp insn or to flip the jmp
// insn. This is primarily used to relax and optimize jumps created with
// basic block sections.
if (auto *sec = dyn_cast<InputSection>(this)) {
for (const JumpInstrMod &jumpMod : jumpInstrMods) {
uint64_t offset = jumpMod.offset + sec->outSecOff;
uint8_t *bufLoc = buf + offset;
target->applyJumpInstrMod(bufLoc, jumpMod.original, jumpMod.size);
}
}
}
// For each function-defining prologue, find any calls to __morestack,

View File

@ -128,6 +128,26 @@ public:
return cast_or_null<ObjFile<ELFT>>(file);
}
// If basic block sections are enabled, many code sections could end up with
// one or two jump instructions at the end that could be relaxed to a smaller
// instruction. The members below help trimming the trailing jump instruction
// and shrinking a section.
unsigned bytesDropped = 0;
void drop_back(uint64_t num) { bytesDropped += num; }
void push_back(uint64_t num) {
assert(bytesDropped >= num);
bytesDropped -= num;
}
void trim() {
if (bytesDropped) {
rawData = rawData.drop_back(bytesDropped);
bytesDropped = 0;
}
}
ArrayRef<uint8_t> data() const {
if (uncompressedSize >= 0)
uncompress();
@ -183,12 +203,25 @@ public:
// the mmap'ed output buffer.
template <class ELFT> void relocate(uint8_t *buf, uint8_t *bufEnd);
void relocateAlloc(uint8_t *buf, uint8_t *bufEnd);
static uint64_t getRelocTargetVA(const InputFile *File, RelType Type,
int64_t A, uint64_t P, const Symbol &Sym,
RelExpr Expr);
// The native ELF reloc data type is not very convenient to handle.
// So we convert ELF reloc records to our own records in Relocations.cpp.
// This vector contains such "cooked" relocations.
std::vector<Relocation> relocations;
// Indicates that this section needs to be padded with a NOP filler if set to
// true.
bool nopFiller = false;
// These are modifiers to jump instructions that are necessary when basic
// block sections are enabled. Basic block sections creates opportunities to
// relax jump instructions at basic block boundaries after reordering the
// basic blocks.
std::vector<JumpInstrMod> jumpInstrMods;
// A function compiled with -fsplit-stack calling a function
// compiled without -fsplit-stack needs its prologue adjusted. Find
// such functions and adjust their prologues. This is very similar

View File

@ -76,6 +76,32 @@ static lto::Config createConfig() {
c.Options.FunctionSections = true;
c.Options.DataSections = true;
// Check if basic block sections must be used.
// Allowed values for --lto-basicblock-sections are "all", "labels",
// "<file name specifying basic block ids>", or none. This is the equivalent
// of -fbasicblock-sections= flag in clang.
if (!config->ltoBasicBlockSections.empty()) {
if (config->ltoBasicBlockSections == "all") {
c.Options.BBSections = BasicBlockSection::All;
} else if (config->ltoBasicBlockSections == "labels") {
c.Options.BBSections = BasicBlockSection::Labels;
} else if (config->ltoBasicBlockSections == "none") {
c.Options.BBSections = BasicBlockSection::None;
} else {
ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr =
MemoryBuffer::getFile(config->ltoBasicBlockSections.str());
if (!MBOrErr) {
error("cannot open " + config->ltoBasicBlockSections + ":" +
MBOrErr.getError().message());
} else {
c.Options.BBSectionsFuncListBuf = std::move(*MBOrErr);
}
c.Options.BBSections = BasicBlockSection::List;
}
}
c.Options.UniqueBBSectionNames = config->ltoUniqueBBSectionNames;
if (auto relocModel = getRelocModelFromCMModel())
c.RelocModel = *relocModel;
else if (config->relocatable)

View File

@ -42,6 +42,10 @@ defm compress_debug_sections:
defm defsym: Eq<"defsym", "Define a symbol alias">, MetaVarName<"<symbol>=<value>">;
defm optimize_bb_jumps: B<"optimize-bb-jumps",
"Remove direct jumps at the end to the next basic block",
"Do not remove any direct jumps at the end to the next basic block (default)">;
defm split_stack_adjust_size
: Eq<"split-stack-adjust-size",
"Specify adjustment to stack size when a split-stack function calls a "
@ -502,6 +506,11 @@ def opt_remarks_format: Separate<["--"], "opt-remarks-format">,
HelpText<"The format used for serializing remarks (default: YAML)">;
defm plugin_opt: Eq<"plugin-opt", "specifies LTO options for compatibility with GNU linkers">;
def save_temps: F<"save-temps">;
def lto_basicblock_sections: J<"lto-basicblock-sections=">,
HelpText<"Enable basic block sections for LTO">;
defm lto_unique_bb_section_names: B<"lto-unique-bb-section-names",
"Give unique names to every basic block section for LTO",
"Do not give unique names to every basic block section for LTO (default)">;
def shuffle_sections: J<"shuffle-sections=">, MetaVarName<"<seed>">,
HelpText<"Shuffle input sections using the given seed. If 0, use a random seed">;
def thinlto_cache_dir: J<"thinlto-cache-dir=">,

View File

@ -242,6 +242,25 @@ void OutputSection::sort(llvm::function_ref<int(InputSectionBase *s)> order) {
sortByOrder(isd->sections, order);
}
static void nopInstrFill(uint8_t *buf, size_t size) {
if (size == 0)
return;
unsigned i = 0;
if (size == 0)
return;
std::vector<std::vector<uint8_t>> nopFiller = *target->nopInstrs;
unsigned num = size / nopFiller.back().size();
for (unsigned c = 0; c < num; ++c) {
memcpy(buf + i, nopFiller.back().data(), nopFiller.back().size());
i += nopFiller.back().size();
}
unsigned remaining = size - i;
if (!remaining)
return;
assert(nopFiller[remaining - 1].size() == remaining);
memcpy(buf + i, nopFiller[remaining - 1].data(), remaining);
}
// Fill [Buf, Buf + Size) with Filler.
// This is used for linker script "=fillexp" command.
static void fill(uint8_t *buf, size_t size,
@ -330,7 +349,11 @@ template <class ELFT> void OutputSection::writeTo(uint8_t *buf) {
end = buf + size;
else
end = buf + sections[i + 1]->outSecOff;
fill(start, end - start, filler);
if (isec->nopFiller) {
assert(target->nopInstrs);
nopInstrFill(start, end - start);
} else
fill(start, end - start, filler);
}
});

View File

@ -24,6 +24,7 @@ class SectionBase;
// Represents a relocation type, such as R_X86_64_PC32 or R_ARM_THM_CALL.
using RelType = uint32_t;
using JumpModType = uint32_t;
// List of target-independent relocation types. Relocations read
// from files are converted to these types so that the main code
@ -108,6 +109,15 @@ struct Relocation {
Symbol *sym;
};
// Manipulate jump instructions with these modifiers. These are used to relax
// jump instruction opcodes at basic block boundaries and are particularly
// useful when basic block sections are enabled.
struct JumpInstrMod {
JumpModType original;
uint64_t offset;
unsigned size;
};
// This function writes undefined symbol diagnostics to an internal buffer.
// Call reportUndefinedSymbols() after calling scanRelocations() to emit
// the diagnostics.

View File

@ -88,8 +88,21 @@ public:
relocate(loc, Relocation{R_NONE, type, 0, 0, nullptr}, val);
}
virtual void applyJumpInstrMod(uint8_t *loc, JumpModType type,
JumpModType val) const {}
virtual ~TargetInfo();
// This deletes a jump insn at the end of the section if it is a fall thru to
// the next section. Further, if there is a conditional jump and a direct
// jump consecutively, it tries to flip the conditional jump to convert the
// direct jump into a fall thru and delete it. Returns true if a jump
// instruction can be deleted.
virtual bool deleteFallThruJmpInsn(InputSection &is, InputFile *file,
InputSection *nextIS) const {
return false;
}
unsigned defaultCommonPageSize = 4096;
unsigned defaultMaxPageSize = 4096;
@ -126,6 +139,10 @@ public:
// executable OutputSections.
std::array<uint8_t, 4> trapInstr;
// Stores the NOP instructions of different sizes for the target and is used
// to pad sections that are relaxed.
llvm::Optional<std::vector<std::vector<uint8_t>>> nopInstrs;
// If a target needs to rewrite calls to __morestack to instead call
// __morestack_non_split when a split-stack enabled caller calls a
// non-split-stack callee this will return true. Otherwise returns false.

View File

@ -31,6 +31,8 @@
#include "llvm/Support/xxhash.h"
#include <climits>
#define DEBUG_TYPE "lld"
using namespace llvm;
using namespace llvm::ELF;
using namespace llvm::object;
@ -57,6 +59,7 @@ private:
void sortSections();
void resolveShfLinkOrder();
void finalizeAddressDependentContent();
void optimizeBasicBlockJumps();
void sortInputSections();
void finalizeSections();
void checkExecuteOnly();
@ -1670,6 +1673,94 @@ template <class ELFT> void Writer<ELFT>::finalizeAddressDependentContent() {
Twine(os->alignment) + ")");
}
// If Input Sections have been shrinked (basic block sections) then
// update symbol values and sizes associated with these sections. With basic
// block sections, input sections can shrink when the jump instructions at
// the end of the section are relaxed.
static void fixSymbolsAfterShrinking() {
for (InputFile *File : objectFiles) {
parallelForEach(File->getSymbols(), [&](Symbol *Sym) {
auto *def = dyn_cast<Defined>(Sym);
if (!def)
return;
const SectionBase *sec = def->section;
if (!sec)
return;
const InputSectionBase *inputSec = dyn_cast<InputSectionBase>(sec->repl);
if (!inputSec || !inputSec->bytesDropped)
return;
const size_t OldSize = inputSec->data().size();
const size_t NewSize = OldSize - inputSec->bytesDropped;
if (def->value > NewSize && def->value <= OldSize) {
LLVM_DEBUG(llvm::dbgs()
<< "Moving symbol " << Sym->getName() << " from "
<< def->value << " to "
<< def->value - inputSec->bytesDropped << " bytes\n");
def->value -= inputSec->bytesDropped;
return;
}
if (def->value + def->size > NewSize && def->value <= OldSize &&
def->value + def->size <= OldSize) {
LLVM_DEBUG(llvm::dbgs()
<< "Shrinking symbol " << Sym->getName() << " from "
<< def->size << " to " << def->size - inputSec->bytesDropped
<< " bytes\n");
def->size -= inputSec->bytesDropped;
}
});
}
}
// If basic block sections exist, there are opportunities to delete fall thru
// jumps and shrink jump instructions after basic block reordering. This
// relaxation pass does that. It is only enabled when --optimize-bb-jumps
// option is used.
template <class ELFT> void Writer<ELFT>::optimizeBasicBlockJumps() {
assert(config->optimizeBBJumps);
script->assignAddresses();
// For every output section that has executable input sections, this
// does the following:
// 1. Deletes all direct jump instructions in input sections that
// jump to the following section as it is not required.
// 2. If there are two consecutive jump instructions, it checks
// if they can be flipped and one can be deleted.
for (OutputSection *os : outputSections) {
if (!(os->flags & SHF_EXECINSTR))
continue;
std::vector<InputSection *> sections = getInputSections(os);
std::vector<unsigned> result(sections.size());
// Delete all fall through jump instructions. Also, check if two
// consecutive jump instructions can be flipped so that a fall
// through jmp instruction can be deleted.
parallelForEachN(0, sections.size(), [&](size_t i) {
InputSection *next = i + 1 < sections.size() ? sections[i + 1] : nullptr;
InputSection &is = *sections[i];
result[i] =
target->deleteFallThruJmpInsn(is, is.getFile<ELFT>(), next) ? 1 : 0;
});
size_t numDeleted = std::count(result.begin(), result.end(), 1);
if (numDeleted > 0) {
script->assignAddresses();
LLVM_DEBUG(llvm::dbgs()
<< "Removing " << numDeleted << " fall through jumps\n");
}
}
fixSymbolsAfterShrinking();
for (OutputSection *os : outputSections) {
std::vector<InputSection *> sections = getInputSections(os);
for (InputSection *is : sections)
is->trim();
}
}
static void finalizeSynthetic(SyntheticSection *sec) {
if (sec && sec->isNeeded() && sec->getParent())
sec->finalizeContents();
@ -1992,6 +2083,12 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() {
finalizeSynthetic(in.symTab);
finalizeSynthetic(in.ppc64LongBranchTarget);
// Relaxation to delete inter-basic block jumps created by basic block
// sections. Run after in.symTab is finalized as optimizeBasicBlockJumps
// can relax jump instructions based on symbol offset.
if (config->optimizeBBJumps)
optimizeBasicBlockJumps();
// Fill other section headers. The dynamic table is finalized
// at the end because some tags like RELSZ depend on result
// of finalizing other sections.

View File

@ -0,0 +1,47 @@
# REQUIRES: x86
## basicblock-sections tests.
## This simple test checks foo is folded into bar with bb sections
## and the jumps are deleted.
# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o
# RUN: ld.lld --optimize-bb-jumps --icf=all %t.o -o %t.out
# RUN: llvm-objdump -d %t.out| FileCheck %s
# CHECK: <foo>:
# CHECK-NEXT: nopl (%rax)
# CHECK-NEXT: je 0x{{[[:xdigit:]]+}} <aa.BB.foo>
# CHECK-NOT: jmp
# CHECK: <a.BB.foo>:
## Explicity check that bar is folded and not emitted.
# CHECK-NOT: <bar>:
# CHECK-NOT: <a.BB.bar>:
# CHECK-NOT: <aa.BB.bar>:
.section .text.bar,"ax",@progbits
.type bar,@function
bar:
nopl (%rax)
jne a.BB.bar
jmp aa.BB.bar
.section .text.a.BB.bar,"ax",@progbits,unique,3
a.BB.bar:
nopl (%rax)
aa.BB.bar:
ret
.section .text.foo,"ax",@progbits
.type foo,@function
foo:
nopl (%rax)
jne a.BB.foo
jmp aa.BB.foo
.section .text.a.BB.foo,"ax",@progbits,unique,2
a.BB.foo:
nopl (%rax)
aa.BB.foo:
ret

View File

@ -0,0 +1,128 @@
# REQUIRES: x86
## basicblock-sections tests.
## This simple test checks if redundant direct jumps are converted to
## implicit fallthrus. The jcc's must be converted to their inverted
## opcode, for instance jne to je and jmp must be deleted.
# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o
# RUN: ld.lld --optimize-bb-jumps %t.o -o %t.out
# RUN: llvm-objdump -d %t.out| FileCheck %s
# CHECK: <foo>:
# CHECK-NEXT: nopl (%rax)
# CHECK-NEXT: jne 0x{{[[:xdigit:]]+}} <r.BB.foo>
# CHECK-NOT: jmp
.section .text,"ax",@progbits
.type foo,@function
foo:
nopl (%rax)
je a.BB.foo
jmp r.BB.foo
# CHECK: <a.BB.foo>:
# CHECK-NEXT: nopl (%rax)
# CHECK-NEXT: je 0x{{[[:xdigit:]]+}} <r.BB.foo>
# CHECK-NOT: jmp
.section .text,"ax",@progbits,unique,3
a.BB.foo:
nopl (%rax)
jne aa.BB.foo
jmp r.BB.foo
# CHECK: <aa.BB.foo>:
# CHECK-NEXT: nopl (%rax)
# CHECK-NEXT: jle 0x{{[[:xdigit:]]+}} <r.BB.foo>
# CHECK-NOT: jmp
#
.section .text,"ax",@progbits,unique,4
aa.BB.foo:
nopl (%rax)
jg aaa.BB.foo
jmp r.BB.foo
# CHECK: <aaa.BB.foo>:
# CHECK-NEXT: nopl (%rax)
# CHECK-NEXT: jl 0x{{[[:xdigit:]]+}} <r.BB.foo>
# CHECK-NOT: jmp
#
.section .text,"ax",@progbits,unique,5
aaa.BB.foo:
nopl (%rax)
jge aaaa.BB.foo
jmp r.BB.foo
# CHECK: <aaaa.BB.foo>:
# CHECK-NEXT: nopl (%rax)
# CHECK-NEXT: jae 0x{{[[:xdigit:]]+}} <r.BB.foo>
# CHECK-NOT: jmp
#
.section .text,"ax",@progbits,unique,6
aaaa.BB.foo:
nopl (%rax)
jb aaaaa.BB.foo
jmp r.BB.foo
# CHECK: <aaaaa.BB.foo>:
# CHECK-NEXT: nopl (%rax)
# CHECK-NEXT: ja 0x{{[[:xdigit:]]+}} <r.BB.foo>
# CHECK-NOT: jmp
#
.section .text,"ax",@progbits,unique,7
aaaaa.BB.foo:
nopl (%rax)
jbe aaaaaa.BB.foo
jmp r.BB.foo
# CHECK: <aaaaaa.BB.foo>:
# CHECK-NEXT: nopl (%rax)
# CHECK-NEXT: jge 0x{{[[:xdigit:]]+}} <r.BB.foo>
# CHECK-NOT: jmp
#
.section .text,"ax",@progbits,unique,8
aaaaaa.BB.foo:
nopl (%rax)
jl aaaaaaa.BB.foo
jmp r.BB.foo
# CHECK: <aaaaaaa.BB.foo>:
# CHECK-NEXT: nopl (%rax)
# CHECK-NEXT: jg 0x{{[[:xdigit:]]+}} <r.BB.foo>
# CHECK-NOT: jmp
#
.section .text,"ax",@progbits,unique,9
aaaaaaa.BB.foo:
nopl (%rax)
jle aaaaaaaa.BB.foo
jmp r.BB.foo
# CHECK: <aaaaaaaa.BB.foo>:
# CHECK-NEXT: nopl (%rax)
# CHECK-NEXT: jbe 0x{{[[:xdigit:]]+}} <r.BB.foo>
# CHECK-NOT: jmp
#
.section .text,"ax",@progbits,unique,10
aaaaaaaa.BB.foo:
nopl (%rax)
ja aaaaaaaaa.BB.foo
jmp r.BB.foo
# CHECK: <aaaaaaaaa.BB.foo>:
# CHECK-NEXT: nopl (%rax)
# CHECK-NEXT: jb 0x{{[[:xdigit:]]+}} <r.BB.foo>
# CHECK-NOT: jmp
#
.section .text,"ax",@progbits,unique,11
aaaaaaaaa.BB.foo:
nopl (%rax)
jae aaaaaaaaaa.BB.foo
jmp r.BB.foo
.section .text,"ax",@progbits,unique,20
aaaaaaaaaa.BB.foo:
nopl (%rax)
r.BB.foo:
ret

View File

@ -0,0 +1,37 @@
# REQUIRES: x86
## basicblock-sections tests.
## This simple test checks if redundant direct jumps are converted to
## implicit fallthrus when PC32 reloc is present. The jcc's must be converted
## to their inverted opcode, for instance jne to je and jmp must be deleted.
# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o
# RUN: llvm-objdump -dr %t.o| FileCheck %s --check-prefix=RELOC
# RUN: ld.lld --optimize-bb-jumps %t.o -o %t.out
# RUN: llvm-objdump -d %t.out| FileCheck %s
# RELOC: jmp
# RELOC-NEXT: R_X86_64_PC32
# CHECK: <foo>:
# CHECK-NEXT: nopl (%rax)
# CHECK-NEXT: jne 0x{{[[:xdigit:]]+}} <r.BB.foo>
# CHECK-NOT: jmp
.section .text,"ax",@progbits
.type foo,@function
foo:
nopl (%rax)
je a.BB.foo
# Encode a jmp r.BB.foo insn using a PC32 reloc
.byte 0xe9
.long r.BB.foo - . - 4
# CHECK: <a.BB.foo>:
# CHECK-NEXT: nopl (%rax)
.section .text,"ax",@progbits,unique,3
a.BB.foo:
nopl (%rax)
r.BB.foo:
ret