[LLD][ELF] Add time-trace to ELF LLD

This adds some of LLD specific scopes and picks up optimisation scopes
via LTO/ThinLTO. Makes use of TimeProfiler multi-thread support added in
77e6bb3c.

Differential Revision: https://reviews.llvm.org/D71060
This commit is contained in:
Russell Gallop 2020-01-28 16:05:13 +00:00
parent abe01e17f6
commit e7cb374433
12 changed files with 182 additions and 32 deletions

View File

@ -192,6 +192,7 @@ struct Configuration {
bool trace;
bool thinLTOEmitImportsFiles;
bool thinLTOIndexOnly;
bool timeTraceEnabled;
bool tocOptimize;
bool undefinedVersion;
bool useAndroidRelrTags = false;
@ -243,6 +244,7 @@ struct Configuration {
unsigned ltoo;
unsigned optimize;
unsigned thinLTOJobs;
unsigned timeTraceGranularity;
int32_t splitStackAdjustSize;
// The following config options do not directly correspond to any

View File

@ -56,6 +56,7 @@
#include "llvm/Support/Path.h"
#include "llvm/Support/TarWriter.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/TimeProfiler.h"
#include "llvm/Support/raw_ostream.h"
#include <cstdlib>
#include <utility>
@ -487,37 +488,60 @@ void LinkerDriver::main(ArrayRef<const char *> argsArr) {
if (args.hasArg(OPT_version))
return;
initLLVM();
createFiles(args);
if (errorCount())
return;
// Initialize time trace profiler.
if (config->timeTraceEnabled)
timeTraceProfilerInitialize(config->timeTraceGranularity, config->progName);
inferMachineType();
setConfigs(args);
checkOptions();
if (errorCount())
return;
{
llvm::TimeTraceScope timeScope("ExecuteLinker");
// The Target instance handles target-specific stuff, such as applying
// relocations or writing a PLT section. It also contains target-dependent
// values such as a default image base address.
target = getTarget();
initLLVM();
createFiles(args);
if (errorCount())
return;
switch (config->ekind) {
case ELF32LEKind:
link<ELF32LE>(args);
return;
case ELF32BEKind:
link<ELF32BE>(args);
return;
case ELF64LEKind:
link<ELF64LE>(args);
return;
case ELF64BEKind:
link<ELF64BE>(args);
return;
default:
llvm_unreachable("unknown Config->EKind");
inferMachineType();
setConfigs(args);
checkOptions();
if (errorCount())
return;
// The Target instance handles target-specific stuff, such as applying
// relocations or writing a PLT section. It also contains target-dependent
// values such as a default image base address.
target = getTarget();
switch (config->ekind) {
case ELF32LEKind:
link<ELF32LE>(args);
break;
case ELF32BEKind:
link<ELF32BE>(args);
break;
case ELF64LEKind:
link<ELF64LE>(args);
break;
case ELF64BEKind:
link<ELF64BE>(args);
break;
default:
llvm_unreachable("unknown Config->EKind");
}
}
if (config->timeTraceEnabled) {
// Write the result of the time trace profiler.
std::string path = args.getLastArgValue(OPT_time_trace_file_eq).str();
if (path.empty())
path = (config->outputFile + ".time-trace").str();
std::error_code ec;
raw_fd_ostream os(path, ec, sys::fs::OF_Text);
if (ec) {
error("cannot open " + path + ": " + ec.message());
return;
}
timeTraceProfilerWrite(os);
timeTraceProfilerCleanup();
}
}
@ -958,6 +982,9 @@ static void readConfigs(opt::InputArgList &args) {
getOldNewOptions(args, OPT_thinlto_object_suffix_replace_eq);
config->thinLTOPrefixReplace =
getOldNewOptions(args, OPT_thinlto_prefix_replace_eq);
config->timeTraceEnabled = args.hasArg(OPT_time_trace);
config->timeTraceGranularity =
args::getInteger(args, OPT_time_trace_granularity, 500);
config->trace = args.hasArg(OPT_trace);
config->undefined = args::getStrings(args, OPT_undefined);
config->undefinedVersion =
@ -1604,6 +1631,7 @@ static Symbol *addUndefined(StringRef name) {
// Because all bitcode files that the program consists of are passed to
// the compiler at once, it can do a whole-program optimization.
template <class ELFT> void LinkerDriver::compileBitcodeFiles() {
llvm::TimeTraceScope timeScope("LTO");
// Compile bitcode files and replace bitcode symbols.
lto.reset(new BitcodeCompiler);
for (BitcodeFile *file : bitcodeFiles)
@ -1733,6 +1761,7 @@ template <class ELFT> static uint32_t getAndFeatures() {
// Do actual linking. Note that when this function is called,
// all linker scripts have already been parsed.
template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) {
llvm::TimeTraceScope timeScope("Link", StringRef("LinkerDriver::Link"));
// If a -hash-style option was not given, set to a default value,
// which varies depending on the target.
if (!args.hasArg(OPT_hash_style)) {
@ -1772,8 +1801,11 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) {
// symbols that we need to the symbol table. This process might
// add files to the link, via autolinking, these files are always
// appended to the Files vector.
for (size_t i = 0; i < files.size(); ++i)
parseFile(files[i]);
{
llvm::TimeTraceScope timeScope("Parse input files");
for (size_t i = 0; i < files.size(); ++i)
parseFile(files[i]);
}
// Now that we have every file, we can decide if we will need a
// dynamic symbol table.

View File

@ -84,6 +84,7 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/Object/ELF.h"
#include "llvm/Support/TimeProfiler.h"
#include "llvm/Support/xxhash.h"
#include <algorithm>
#include <atomic>
@ -525,7 +526,10 @@ template <class ELFT> void ICF<ELFT>::run() {
}
// ICF entry point function.
template <class ELFT> void doIcf() { ICF<ELFT>().run(); }
template <class ELFT> void doIcf() {
llvm::TimeTraceScope timeScope("ICF");
ICF<ELFT>().run();
}
template void doIcf<ELF32LE>();
template void doIcf<ELF32BE>();

View File

@ -113,6 +113,9 @@ static lto::Config createConfig() {
c.HasWholeProgramVisibility = config->ltoWholeProgramVisibility;
c.TimeTraceEnabled = config->timeTraceEnabled;
c.TimeTraceGranularity = config->timeTraceGranularity;
c.CSIRProfile = std::string(config->ltoCSProfileFile);
c.RunCSIRInstr = config->ltoCSProfileGenerate;

View File

@ -31,6 +31,7 @@
#include "lld/Common/Strings.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Object/ELF.h"
#include "llvm/Support/TimeProfiler.h"
#include <functional>
#include <vector>
@ -323,6 +324,7 @@ template <class ELFT> void MarkLive<ELFT>::moveToMain() {
// input sections. This function make some or all of them on
// so that they are emitted to the output file.
template <class ELFT> void markLive() {
llvm::TimeTraceScope timeScope("markLive");
// If -gc-sections is not given, no sections are removed.
if (!config->gcSections) {
for (InputSectionBase *sec : inputSections)

View File

@ -354,6 +354,12 @@ defm threads: B<"threads",
"Run the linker multi-threaded (default)",
"Do not run the linker multi-threaded">;
def time_trace: F<"time-trace">, HelpText<"Record time trace">;
def time_trace_file_eq: J<"time-trace-file=">, HelpText<"Specify time trace output file">;
defm time_trace_granularity: Eq<"time-trace-granularity",
"Minimum time granularity (in microseconds) traced by time profiler">;
defm toc_optimize : B<"toc-optimize",
"(PowerPC64) Enable TOC related optimizations (default)",
"(PowerPC64) Disable TOC related optimizations">;

View File

@ -36,6 +36,7 @@
#include "llvm/Support/Endian.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/MD5.h"
#include "llvm/Support/TimeProfiler.h"
#include <cstdlib>
#include <thread>
@ -3237,6 +3238,7 @@ MergeSyntheticSection *createMergeSynthetic(StringRef name, uint32_t type,
}
template <class ELFT> void splitSections() {
llvm::TimeTraceScope timeScope("Split sections");
// splitIntoPieces needs to be called on each MergeInputSection
// before calling finalizeContents().
parallelForEach(inputSections, [](InputSectionBase *sec) {

View File

@ -27,6 +27,7 @@
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Support/RandomNumberGenerator.h"
#include "llvm/Support/SHA1.h"
#include "llvm/Support/TimeProfiler.h"
#include "llvm/Support/xxhash.h"
#include <climits>
@ -139,7 +140,10 @@ static bool needsInterpSection() {
!config->dynamicLinker.empty() && script->needsInterpSection();
}
template <class ELFT> void writeResult() { Writer<ELFT>().run(); }
template <class ELFT> void writeResult() {
llvm::TimeTraceScope timeScope("Write output file");
Writer<ELFT>().run();
}
static void removeEmptyPTLoad(std::vector<PhdrEntry *> &phdrs) {
llvm::erase_if(phdrs, [&](const PhdrEntry *p) {

View File

@ -0,0 +1,43 @@
; REQUIRES: x86
; Test ThinLTO with time trace
; RUN: opt -module-summary %s -o %t1.o
; RUN: opt -module-summary %p/Inputs/thinlto.ll -o %t2.o
; Test single-threaded
; RUN: ld.lld --thinlto-jobs=1 -time-trace -time-trace-granularity=0 -shared %t1.o %t2.o -o %t3.so
; RUN: cat %t3.so.time-trace \
; RUN: | %python -c 'import json, sys; json.dump(json.loads(sys.stdin.read()), sys.stdout, sort_keys=True, indent=2)' \
; RUN: | FileCheck %s
; Test multi-threaded
; RUN: ld.lld -time-trace -time-trace-granularity=0 -shared %t1.o %t2.o -o %t4.so
; RUN: cat %t4.so.time-trace \
; RUN: | %python -c 'import json, sys; json.dump(json.loads(sys.stdin.read()), sys.stdout, sort_keys=True, indent=2)' \
; RUN: | FileCheck %s
; CHECK: "traceEvents": [
; Check fields for an event are present
; CHECK: "args":
; CHECK-NEXT: "detail":
; CHECK: "dur":
; CHECK-NEXT: "name":
; CHECK-NEXT: "ph":
; CHECK-NEXT: "pid":
; CHECK-NEXT: "tid":
; CHECK-NEXT: "ts":
; Check that an optimisation event is present
; CHECK: "name": "OptModule"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
declare void @g(...)
define void @f() {
entry:
call void (...) @g()
ret void
}

40
lld/test/ELF/time-trace.s Normal file
View File

@ -0,0 +1,40 @@
# REQUIRES: x86
# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o
# Test implicit trace file name
# RUN: ld.lld -time-trace -time-trace-granularity=0 -o %t1.elf %t.o
# RUN: cat %t1.elf.time-trace \
# RUN: | %python -c 'import json, sys; json.dump(json.loads(sys.stdin.read()), sys.stdout, sort_keys=True, indent=2)' \
# RUN: | FileCheck %s
# Test specified trace file name
# RUN: ld.lld -time-trace -time-trace-file=%t2.json -time-trace-granularity=0 -o %t2.elf %t.o
# RUN: cat %t2.json \
# RUN: | %python -c 'import json, sys; json.dump(json.loads(sys.stdin.read()), sys.stdout, sort_keys=True, indent=2)' \
# RUN: | FileCheck %s
# Test trace requested to stdout
# RUN: ld.lld -time-trace -time-trace-file=- -time-trace-granularity=0 -o %t3.elf %t.o \
# RUN: | %python -c 'import json, sys; json.dump(json.loads(sys.stdin.read()), sys.stdout, sort_keys=True, indent=2)' \
# RUN: | FileCheck %s
# CHECK: "traceEvents": [
# Check one event has correct fields
# CHECK: "dur":
# CHECK-NEXT: "name":
# CHECK-NEXT: "ph":
# CHECK-NEXT: "pid":
# CHECK-NEXT: "tid":
# CHECK-NEXT: "ts":
# Check there is an ExecuteLinker event
# CHECK: "name": "ExecuteLinker"
# Check process_name entry field
# CHECK: "name": "ld.lld{{(.exe)?}}"
# CHECK: "name": "process_name"
.globl _start
_start:
ret

View File

@ -124,6 +124,12 @@ struct Config {
/// Statistics output file path.
std::string StatsFile;
/// Time trace enabled.
bool TimeTraceEnabled = false;
/// Time trace granularity.
unsigned TimeTraceGranularity = 500;
bool ShouldDiscardValueNames = true;
DiagnosticHandlerFunction DiagHandler;

View File

@ -40,6 +40,7 @@
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/ThreadPool.h"
#include "llvm/Support/Threading.h"
#include "llvm/Support/TimeProfiler.h"
#include "llvm/Support/VCSRevision.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
@ -1160,6 +1161,9 @@ public:
&ResolvedODR,
const GVSummaryMapTy &DefinedGlobals,
MapVector<StringRef, BitcodeModule> &ModuleMap) {
if (LLVM_ENABLE_THREADS && Conf.TimeTraceEnabled)
timeTraceProfilerInitialize(Conf.TimeTraceGranularity,
"thin backend");
Error E = runThinLTOBackendThread(
AddStream, Cache, Task, BM, CombinedIndex, ImportList, ExportList,
ResolvedODR, DefinedGlobals, ModuleMap);
@ -1170,6 +1174,8 @@ public:
else
Err = std::move(E);
}
if (LLVM_ENABLE_THREADS && Conf.TimeTraceEnabled)
timeTraceProfilerFinishThread();
},
BM, std::ref(CombinedIndex), std::ref(ImportList), std::ref(ExportList),
std::ref(ResolvedODR), std::ref(DefinedGlobals), std::ref(ModuleMap));