FEXCore: Adds support for a timeline profiler interface

This creates a generic interface that FEXCore can use for timeline
profiling. This allows us to create a generic interface which the
backend details are hidden so we can support multiple timeline profile
APIs.

The only API supported right now is ftrace/gpuvis. Which is extremely
lightweight of an interface with minimal overhead.

We must be careful here since in most cases will will have dozens of
FEX instances running at any given time. So a timeline profiler like
Microprofiler can have major issues since that only ever expects a
single process at a time.

Not enabled by default but just needs the `ENABLE_FEXCORE_PROFILER`
cmake option set to enable.
This commit is contained in:
Ryan Houdek 2022-10-13 18:16:06 -07:00
parent 136f1e2fc7
commit 3e48b1a8ac
24 changed files with 247 additions and 3 deletions

View File

@ -29,11 +29,24 @@ option(ENABLE_INTERPRETER "Enables FEX's Interpreter" FALSE)
option(ENABLE_CCACHE "Enables ccache for compile caching" TRUE)
option(ENABLE_TERMUX_BUILD "Forces building for Termux on a non-Termux build machine" FALSE)
option(ENABLE_VIXL_SIMULATOR "Forces the FEX JIT to use the VIXL simulator" FALSE)
option(ENABLE_FEXCORE_PROFILER "Enables use of the FEXCore timeline profiling capabilities" FALSE)
set (FEXCORE_PROFILER_BACKEND "gpuvis" CACHE STRING "Set which backend you want to use for the FEXCore profiler")
set (X86_32_TOOLCHAIN_FILE "${CMAKE_CURRENT_SOURCE_DIR}/toolchain_x86_32.cmake" CACHE FILEPATH "Toolchain file for the (cross-)compiler targeting i686")
set (X86_64_TOOLCHAIN_FILE "${CMAKE_CURRENT_SOURCE_DIR}/toolchain_x86_64.cmake" CACHE FILEPATH "Toolchain file for the (cross-)compiler targeting x86_64")
set (DATA_DIRECTORY "${CMAKE_INSTALL_PREFIX}/share/fex-emu" CACHE PATH "global data directory")
if (ENABLE_FEXCORE_PROFILER)
add_definitions(-DENABLE_FEXCORE_PROFILER=1)
string(TOUPPER "${FEXCORE_PROFILER_BACKEND}" FEXCORE_PROFILER_BACKEND)
if (FEXCORE_PROFILER_BACKEND STREQUAL "GPUVIS")
add_definitions(-DFEXCORE_PROFILER_BACKEND=1)
else()
message(FATAL_ERROR "Unknown FEXCore profiler backend ${FEXCORE_PROFILER_BACKEND}")
endif()
endif()
# uninstall target
if(NOT TARGET uninstall)
configure_file(

View File

@ -143,6 +143,7 @@ set (SRCS
Utils/NetStream.cpp
Utils/Telemetry.cpp
Utils/Threads.cpp
Utils/Profiler.cpp
)
if (ENABLE_INTERPRETER)

View File

@ -44,6 +44,7 @@ $end_info$
#include <FEXCore/Utils/Event.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/Threads.h>
#include <FEXCore/Utils/Profiler.h>
#include <FEXHeaderUtils/Syscalls.h>
#include <FEXHeaderUtils/TodoDefines.h>
@ -674,6 +675,8 @@ namespace FEXCore::Context {
}
void Context::ClearCodeCache(FEXCore::Core::InternalThreadState *Thread) {
FEXCORE_PROFILE_INSTANT("ClearCodeCache");
{
// Ensure the Code Object Serialization service has fully serialized this thread's data before clearing the cache
// Use the thread's object cache ref counter for this
@ -741,6 +744,8 @@ namespace FEXCore::Context {
}
Context::GenerateIRResult Context::GenerateIR(FEXCore::Core::InternalThreadState *Thread, uint64_t GuestRIP, bool ExtendedDebugInfo) {
FEXCORE_PROFILE_SCOPED("GenerateIR");
Thread->OpDispatcher->ReownOrClaimBuffer();
Thread->OpDispatcher->ResetWorkingList();
@ -1011,6 +1016,7 @@ namespace FEXCore::Context {
}
uintptr_t Context::CompileBlock(FEXCore::Core::CpuStateFrame *Frame, uint64_t GuestRIP) {
FEXCORE_PROFILE_SCOPED("CompileBlock");
auto Thread = Frame->Thread;
// Invalidate might take a unique lock on this, to guarantee that during invalidation no code gets compiled

View File

@ -18,6 +18,7 @@ $end_info$
#include <FEXCore/HLE/SyscallHandler.h>
#include <FEXCore/Utils/Allocator.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/Profiler.h>
#include <FEXCore/Utils/Telemetry.h>
#include <FEXHeaderUtils/TypeDefines.h>
#include <set>
@ -1132,6 +1133,7 @@ const uint8_t *Decoder::AdjustAddrForSpecialRegion(uint8_t const* _InstStream, u
}
void Decoder::DecodeInstructionsAtEntry(uint8_t const* _InstStream, uint64_t PC, std::function<void(uint64_t BlockEntry, uint64_t Start, uint64_t Length)> AddContainedCodePage) {
FEXCORE_PROFILE_SCOPED("DecodeInstructions");
Blocks.clear();
BlocksToDecode.clear();
HasBlocks.clear();
@ -1166,7 +1168,6 @@ void Decoder::DecodeInstructionsAtEntry(uint8_t const* _InstStream, uint64_t PC,
std::set<uint64_t> CodePages = { CurrentCodePage };
AddContainedCodePage(PC, CurrentCodePage, FHU::FEX_PAGE_SIZE);
while (!BlocksToDecode.empty()) {
auto BlockDecodeIt = BlocksToDecode.begin();

View File

@ -28,6 +28,7 @@ $end_info$
#include <FEXCore/Utils/Allocator.h>
#include <FEXCore/Utils/CompilerDefs.h>
#include <FEXCore/Utils/EnumUtils.h>
#include <FEXCore/Utils/Profiler.h>
#include "Interface/Core/Interpreter/InterpreterOps.h"
@ -731,6 +732,8 @@ void *Arm64JITCore::CompileCode(uint64_t Entry,
FEXCore::Core::DebugData *DebugData,
FEXCore::IR::RegisterAllocationData *RAData,
bool GDBEnabled) {
FEXCORE_PROFILE_SCOPED("Arm64::CompileCode");
using namespace aarch64;
JumpTargets.clear();
uint32_t SSACount = IR->GetSSACount();

View File

@ -27,6 +27,7 @@ $end_info$
#include <FEXCore/Utils/Allocator.h>
#include <FEXCore/Utils/EnumUtils.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/Profiler.h>
#include <algorithm>
#include <array>
@ -370,7 +371,7 @@ X86JITCore::X86JITCore(FEXCore::Context::Context *ctx, FEXCore::Core::InternalTh
{
auto &Common = ThreadState->CurrentFrame->Pointers.Common;
Common.PrintValue = reinterpret_cast<uint64_t>(PrintValue);
Common.PrintVectorValue = reinterpret_cast<uint64_t>(PrintVectorValue);
Common.ThreadRemoveCodeEntryFromJIT = reinterpret_cast<uintptr_t>(&Context::Context::ThreadRemoveCodeEntryFromJit);
@ -582,6 +583,8 @@ std::tuple<X86JITCore::SetCC, X86JITCore::CMovCC, X86JITCore::JCC> X86JITCore::G
}
void *X86JITCore::CompileCode(uint64_t Entry, [[maybe_unused]] FEXCore::IR::IRListView const *IR, [[maybe_unused]] FEXCore::Core::DebugData *DebugData, FEXCore::IR::RegisterAllocationData *RAData, bool GDBEnabled) {
FEXCORE_PROFILE_SCOPED("x86::CompileCode");
JumpTargets.clear();
uint32_t SSACount = IR->GetSSACount();

View File

@ -12,6 +12,7 @@ $end_info$
#include "Interface/IR/Passes/RegisterAllocationPass.h"
#include <FEXCore/Config/Config.h>
#include <FEXCore/Utils/Profiler.h>
namespace FEXCore::IR {
class IREmitter;
@ -66,6 +67,8 @@ void PassManager::InsertRegisterAllocationPass(bool OptimizeSRA, bool SupportsAV
}
bool PassManager::Run(IREmitter *IREmit) {
FEXCORE_PROFILE_SCOPED("PassManager::Run");
bool Changed = false;
for (auto const &Pass : Passes) {
Changed |= Pass->Run(IREmit);

View File

@ -20,6 +20,7 @@ $end_info$
#include <FEXCore/IR/IREmitter.h>
#include <FEXCore/IR/IntrusiveIRList.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/Profiler.h>
#include <bit>
#include <cstdint>
@ -1028,6 +1029,8 @@ bool ConstProp::ConstantInlining(IREmitter *IREmit, const IRListView& CurrentIR)
}
bool ConstProp::Run(IREmitter *IREmit) {
FEXCORE_PROFILE_SCOPED("PassManager::ConstProp");
bool Changed = false;
auto CurrentIR = IREmit->ViewIR();
auto OriginalWriteCursor = IREmit->GetWriteCursor();

View File

@ -9,6 +9,7 @@ $end_info$
#include <FEXCore/IR/IR.h>
#include <FEXCore/IR/IREmitter.h>
#include <FEXCore/IR/IntrusiveIRList.h>
#include <FEXCore/Utils/Profiler.h>
#include <memory>
@ -22,6 +23,7 @@ private:
};
bool DeadCodeElimination::Run(IREmitter *IREmit) {
FEXCORE_PROFILE_SCOPED("PassManager::DCE");
auto CurrentIR = IREmit->ViewIR();
int NumRemoved = 0;

View File

@ -13,6 +13,7 @@ $end_info$
#include <FEXCore/IR/IREmitter.h>
#include <FEXCore/IR/IntrusiveIRList.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/Profiler.h>
#include <array>
#include <memory>
@ -695,6 +696,7 @@ bool RCLSE::RedundantStoreLoadElimination(FEXCore::IR::IREmitter *IREmit) {
}
bool RCLSE::Run(FEXCore::IR::IREmitter *IREmit) {
FEXCORE_PROFILE_SCOPED("PassManager::RCLSE");
// XXX: We don't do cross-block optimizations yet
//CalculateControlFlowInfo(IREmit);
bool Changed = false;

View File

@ -12,6 +12,7 @@ $end_info$
#include <FEXCore/IR/IREmitter.h>
#include <FEXCore/IR/IntrusiveIRList.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/Profiler.h>
#include <memory>
#include <stddef.h>
@ -154,6 +155,8 @@ struct Info {
*
*/
bool DeadStoreElimination::Run(IREmitter *IREmit) {
FEXCORE_PROFILE_SCOPED("PassManager::DSE");
std::unordered_map<OrderedNode*, Info> InfoMap;
bool Changed = false;

View File

@ -13,6 +13,7 @@ $end_info$
#include <FEXCore/IR/IntrusiveIRList.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/MathUtils.h>
#include <FEXCore/Utils/Profiler.h>
#include <algorithm>
#include <cstdint>
@ -52,6 +53,8 @@ IRCompaction::IRCompaction(FEXCore::Utils::IntrusivePooledAllocator &Allocator)
}
bool IRCompaction::Run(IREmitter *IREmit) {
FEXCORE_PROFILE_SCOPED("PassManager::IRCompaction");
LocalBuilder.ReownOrClaimBuffer();
auto CurrentIR = IREmit->ViewIR();

View File

@ -14,6 +14,7 @@ $end_info$
#include <FEXCore/IR/IntrusiveIRList.h>
#include <FEXCore/IR/RegisterAllocationData.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/Profiler.h>
#include <cstdint>
#include <memory>
@ -32,6 +33,8 @@ IRValidation::~IRValidation() {
}
bool IRValidation::Run(IREmitter *IREmit) {
FEXCORE_PROFILE_SCOPED("PassManager::IRValidation");
bool HadError = false;
bool HadWarning = false;

View File

@ -9,6 +9,7 @@ $end_info$
#include <FEXCore/IR/IR.h>
#include <FEXCore/IR/IREmitter.h>
#include <FEXCore/IR/IntrusiveIRList.h>
#include <FEXCore/Utils/Profiler.h>
#include <memory>
#include <stdint.h>
@ -53,6 +54,8 @@ bool LongDivideEliminationPass::IsSextOp(IREmitter *IREmit, OrderedNodeWrapper L
}
bool LongDivideEliminationPass::Run(IREmitter *IREmit) {
FEXCORE_PROFILE_SCOPED("PassManager::LDE");
bool Changed = false;
auto CurrentIR = IREmit->ViewIR();
auto OriginalWriteCursor = IREmit->GetWriteCursor();

View File

@ -9,6 +9,7 @@ $end_info$
#include <FEXCore/IR/IREmitter.h>
#include <FEXCore/IR/IntrusiveIRList.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/Profiler.h>
#include "Interface/IR/PassManager.h"
@ -24,6 +25,8 @@ public:
};
bool PhiValidation::Run(IREmitter *IREmit) {
FEXCORE_PROFILE_SCOPED("PassManager::PHIValidation");
bool HadError = false;
auto CurrentIR = IREmit->ViewIR();

View File

@ -6,7 +6,7 @@
#include <FEXCore/IR/IREmitter.h>
#include <FEXCore/IR/IntrusiveIRList.h>
#include <FEXCore/IR/RegisterAllocationData.h>
#include <FEXCore/Utils/Profiler.h>
#include <algorithm>
#include <deque>
@ -191,6 +191,8 @@ private:
bool RAValidation::Run(IREmitter *IREmit) {
if (!Manager->HasPass("RA")) return false;
FEXCORE_PROFILE_SCOPED("PassManager::RAValidation");
IR::RegisterAllocationData* RAData = Manager->GetPass<IR::RegisterAllocationPass>("RA")->GetAllocationData();
BlockExitState.clear();
// BlocksToVisit will already be empty

View File

@ -8,6 +8,8 @@ $end_info$
#include <FEXCore/IR/IR.h>
#include <FEXCore/IR/IREmitter.h>
#include <FEXCore/IR/IntrusiveIRList.h>
#include <FEXCore/Utils/Profiler.h>
#include "Interface/IR/PassManager.h"
#include <array>
@ -32,6 +34,8 @@ public:
*
*/
bool DeadFlagCalculationEliminination::Run(IREmitter *IREmit) {
FEXCORE_PROFILE_SCOPED("PassManager::DFE");
std::array<OrderedNode*, 32> LastValidFlagStores{};
bool Changed = false;

View File

@ -15,6 +15,8 @@ $end_info$
#include <FEXCore/Utils/BucketList.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/MathUtils.h>
#include <FEXCore/Utils/Profiler.h>
#include <FEXHeaderUtils/TypeDefines.h>
#include <algorithm>
@ -1527,6 +1529,7 @@ namespace {
}
bool ConstrainedRAPass::Run(IREmitter *IREmit) {
FEXCORE_PROFILE_SCOPED("PassManager::RA");
bool Changed = false;
auto IR = IREmit->ViewIR();

View File

@ -11,6 +11,7 @@ $end_info$
#include <FEXCore/IR/IREmitter.h>
#include <FEXCore/IR/IntrusiveIRList.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/Profiler.h>
#include <memory>
#include <stddef.h>
@ -76,6 +77,8 @@ private:
*
*/
bool StaticRegisterAllocationPass::Run(IREmitter *IREmit) {
FEXCORE_PROFILE_SCOPED("PassManager::SRA");
auto CurrentIR = IREmit->ViewIR();
for (auto [BlockNode, BlockIROp] : CurrentIR.GetBlocks()) {

View File

@ -11,6 +11,7 @@ $end_info$
#include <FEXCore/IR/IREmitter.h>
#include <FEXCore/IR/IntrusiveIRList.h>
#include <FEXCore/HLE/SyscallHandler.h>
#include <FEXCore/Utils/Profiler.h>
#include <memory>
#include <stdint.h>
@ -23,6 +24,8 @@ public:
};
bool SyscallOptimization::Run(IREmitter *IREmit) {
FEXCORE_PROFILE_SCOPED("PassManager::SyscallOpt");
bool Changed = false;
auto CurrentIR = IREmit->ViewIR();

View File

@ -11,6 +11,7 @@ $end_info$
#include <FEXCore/IR/IREmitter.h>
#include <FEXCore/IR/IntrusiveIRList.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/Profiler.h>
#include <functional>
#include <memory>
@ -36,6 +37,8 @@ public:
};
bool ValueDominanceValidation::Run(IREmitter *IREmit) {
FEXCORE_PROFILE_SCOPED("PassManager::ValueDominanceValidation");
bool HadError = false;
auto CurrentIR = IREmit->ViewIR();

View File

@ -0,0 +1,116 @@
#include <array>
#include <cstdint>
#include <fcntl.h>
#include <limits.h>
#include <linux/magic.h>
#include <string>
#include <sys/stat.h>
#include <sys/vfs.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/Profiler.h>
#define BACKEND_OFF 0
#define BACKEND_GPUVIS 1
#ifdef ENABLE_FEXCORE_PROFILER
#if FEXCORE_PROFILER_BACKEND == BACKEND_GPUVIS
namespace FEXCore::Profiler {
ProfilerBlock::ProfilerBlock(std::string_view const Format)
: DurationBegin {GetTime()}
, Format {Format} {
}
ProfilerBlock::~ProfilerBlock() {
auto Duration = GetTime() - DurationBegin;
TraceObject(Format, Duration);
}
}
namespace GPUVis {
// ftrace FD for writing trace data.
// Needs to be a raw FD since we hold this open for the entire application execution.
static int TraceFD {-1};
// Need to search the paths to find the real trace path
static std::array<char const*, 2> TraceFSDirectories {
"/sys/kernel/tracing",
"/sys/kernel/debug/tracing",
};
static bool IsTraceFS(char const* Path) {
struct statfs stat;
if (statfs(Path, &stat)) {
return false;
}
return stat.f_type == TRACEFS_MAGIC;
}
void Init() {
for (auto Path : TraceFSDirectories) {
if (IsTraceFS(Path)) {
std::string FilePath = fmt::format("{}/trace_marker", Path);
TraceFD = open(FilePath.c_str(), O_WRONLY | O_CLOEXEC);
if (TraceFD != -1) {
// Opened TraceFD, early exit
break;
}
}
}
}
void Shutdown() {
if (TraceFD != -1) {
close(TraceFD);
TraceFD = -1;
}
}
void TraceObject(std::string_view const Format, uint64_t Duration) {
if (TraceFD != -1) {
// Print the duration as something that began negative duration ago
std::string Event = fmt::format("{} (lduration=-{})\n", Format, Duration);
write(TraceFD, Event.c_str(), Event.size());
}
}
void TraceObject(std::string_view const Format) {
if (TraceFD != -1) {
std::string Event = fmt::format("{}\n", Format);
write(TraceFD, Format.data(), Format.size());
}
}
}
#else
#error Unknown profiler backend
#endif
#endif
namespace FEXCore::Profiler {
#ifdef ENABLE_FEXCORE_PROFILER
void Init() {
#if FEXCORE_PROFILER_BACKEND == BACKEND_GPUVIS
GPUVis::Init();
#endif
}
void Shutdown() {
#if FEXCORE_PROFILER_BACKEND == BACKEND_GPUVIS
GPUVis::Shutdown();
#endif
}
void TraceObject(std::string_view const Format, uint64_t Duration) {
#if FEXCORE_PROFILER_BACKEND == BACKEND_GPUVIS
GPUVis::TraceObject(Format, Duration);
#endif
}
void TraceObject(std::string_view const Format) {
#if FEXCORE_PROFILER_BACKEND == BACKEND_GPUVIS
GPUVis::TraceObject(Format);
#endif
}
#endif
}

View File

@ -0,0 +1,55 @@
#pragma once
#include <cstdint>
#include <string_view>
#include <time.h>
#include <FEXCore/Utils/CompilerDefs.h>
namespace FEXCore::Profiler {
#ifdef ENABLE_FEXCORE_PROFILER
FEX_DEFAULT_VISIBILITY void Init();
FEX_DEFAULT_VISIBILITY void Shutdown();
FEX_DEFAULT_VISIBILITY void TraceObject(std::string_view const Format);
FEX_DEFAULT_VISIBILITY void TraceObject(std::string_view const Format, uint64_t Duration);
static inline uint64_t GetTime() {
// We want the time in the least amount of overhead possible
// clock_gettime will do a VDSO call with the least amount of overhead
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return ts.tv_sec * 1'000'000'000ULL + ts.tv_nsec;
}
// A class that follows scoping rules to generate a profile duration block
class ProfilerBlock final {
public:
ProfilerBlock(std::string_view const Format);
~ProfilerBlock();
private:
uint64_t DurationBegin;
std::string_view const Format;
};
#define UniqueScopeName2(name, line) name ## line
#define UniqueScopeName(name, line) UniqueScopeName2(name, line)
// Declare an instantaneous profiler event.
#define FEXCORE_PROFILE_INSTANT(name) FEXCore::Profiler::TraceObject(name)
// Declare a scoped profile block variable with a fixed name.
#define FEXCORE_PROFILE_SCOPED(name) \
FEXCore::Profiler::ProfilerBlock UniqueScopeName(ScopedBlock_, __LINE__) (name)
#else
[[maybe_unused]] static void Init() {}
[[maybe_unused]] static void Shutdown() {}
[[maybe_unused]] static void TraceObject(std::string_view const Format) {}
[[maybe_unused]] static void TraceObject(std::string_view const, uint64_t) {}
#define FEXCORE_PROFILE_INSTANT(...) do {} while(0)
#define FEXCORE_PROFILE_SCOPED(...) do {} while(0)
#endif
}

View File

@ -24,6 +24,7 @@ $end_info$
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/Telemetry.h>
#include <FEXCore/Utils/Threads.h>
#include <FEXCore/Utils/Profiler.h>
#include <atomic>
#include <cerrno>
@ -283,6 +284,7 @@ int main(int argc, char **argv, char **const envp) {
}
}
FEXCore::Profiler::Init();
FEXCore::Telemetry::Initialize();
RootFSRedirect(&Program.first, LDPath());
@ -503,6 +505,7 @@ int main(int argc, char **argv, char **const envp) {
FEXCore::Allocator::ReclaimMemoryRegion(Base48Bit);
// Allocator is now original system allocator
FEXCore::Telemetry::Shutdown(Program.second);
FEXCore::Profiler::Shutdown();
if (ShutdownReason == FEXCore::Context::ExitReason::EXIT_SHUTDOWN) {
return ProgramStatus;
}