Merge pull request #2923 from Sonicadvance1/nonnull_legacy_segment_telemetry

FEXCore: Adds telemetry around legacy segment register setting
This commit is contained in:
Ryan Houdek 2023-08-20 10:27:56 -07:00 committed by GitHub
commit a523858f66
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 218 additions and 23 deletions

View File

@ -64,7 +64,7 @@ jobs:
# Note the current convention is to use the -S and -B options here to specify source
# and build directories, but this is only available with CMake 3.13 and higher.
# The CMake binaries on the Github Actions machines are (as of this writing) 3.12
run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE -G Ninja -DENABLE_VIXL_SIMULATOR=False -DENABLE_VIXL_DISASSEMBLER=True -DENABLE_LTO=False -DENABLE_ASSERTIONS=True
run: cmake $GITHUB_WORKSPACE -DENABLE_OFFLINE_TELEMETRY=False -DCMAKE_BUILD_TYPE=$BUILD_TYPE -G Ninja -DENABLE_VIXL_SIMULATOR=False -DENABLE_VIXL_DISASSEMBLER=True -DENABLE_LTO=False -DENABLE_ASSERTIONS=True
- name: Build
working-directory: ${{runner.workspace}}/build

View File

@ -7,7 +7,18 @@ namespace FEXCore {
namespace CPU {
CPUBackend::CPUBackend(FEXCore::Core::InternalThreadState *ThreadState, size_t InitialCodeSize, size_t MaxCodeSize)
: ThreadState(ThreadState), InitialCodeSize(InitialCodeSize), MaxCodeSize(MaxCodeSize) {}
: ThreadState(ThreadState), InitialCodeSize(InitialCodeSize), MaxCodeSize(MaxCodeSize) {
#ifndef FEX_DISABLE_TELEMETRY
auto &Common = ThreadState->CurrentFrame->Pointers.Common;
// Fill in telemetry values
for (size_t i = 0; i < FEXCore::Telemetry::TYPE_LAST; ++i) {
auto &Telem = FEXCore::Telemetry::GetTelemetryValue(static_cast<FEXCore::Telemetry::TelemetryType>(i));
Common.TelemetryValueAddresses[i] = reinterpret_cast<uint64_t>(Telem.GetAddr());
}
#endif
}
CPUBackend::~CPUBackend() {
for (auto CodeBuffer : CodeBuffers) {

View File

@ -771,6 +771,20 @@ DEF_OP(AtomicFetchNeg) {
default: LOGMAN_MSG_A_FMT("Unhandled Atomic size: {}", IROp->Size);
}
}
DEF_OP(TelemetrySetValue) {
#ifndef FEX_DISABLE_TELEMETRY
auto Op = IROp->C<IR::IROp_TelemetrySetValue>();
uint64_t Src = *GetSrc<uint64_t*>(Data->SSAData, Op->Value);
auto TelemetryPtr = reinterpret_cast<std::atomic<uint64_t>*>(Data->State->CurrentFrame->Pointers.Common.TelemetryValueAddresses[Op->TelemetryValueIndex]);
uint64_t Set{};
if (Src != 0) {
Set = 1;
}
*TelemetryPtr |= Set;
#endif
}
#undef DEF_OP

View File

@ -107,6 +107,7 @@ constexpr OpHandlerArray InterpreterOpHandlers = [] {
REGISTER_OP(ATOMICFETCHOR, AtomicFetchOr);
REGISTER_OP(ATOMICFETCHXOR, AtomicFetchXor);
REGISTER_OP(ATOMICFETCHNEG, AtomicFetchNeg);
REGISTER_OP(TELEMETRYSETVALUE, TelemetrySetValue);
// Branch ops
REGISTER_OP(CALLBACKRETURN, CallbackReturn);

View File

@ -143,6 +143,7 @@ namespace FEXCore::CPU {
DEF_OP(AtomicFetchOr);
DEF_OP(AtomicFetchXor);
DEF_OP(AtomicFetchNeg);
DEF_OP(TelemetrySetValue);
///< Branch ops
DEF_OP(CallbackReturn);

View File

@ -6,6 +6,7 @@ $end_info$
#include "Interface/Context/Context.h"
#include "Interface/Core/ArchHelpers/CodeEmitter/Emitter.h"
#include "Interface/Core/Dispatcher/Arm64Dispatcher.h"
#include "Interface/Core/JIT/Arm64/JITClass.h"
namespace FEXCore::CPU {
@ -437,6 +438,31 @@ DEF_OP(AtomicFetchNeg) {
mov(EmitSize, GetReg(Node), TMP2.R());
}
DEF_OP(TelemetrySetValue) {
#ifndef FEX_DISABLE_TELEMETRY
auto Op = IROp->C<IR::IROp_TelemetrySetValue>();
auto Src = GetReg(Op->Value.ID());
ldr(TMP2, STATE_PTR(CpuStateFrame, Pointers.Common.TelemetryValueAddresses[Op->TelemetryValueIndex]));
// Cortex fuses cmp+cset.
cmp(ARMEmitter::Size::i32Bit, Src, 0);
cset(ARMEmitter::Size::i32Bit, TMP1, ARMEmitter::Condition::CC_NE);
if (CTX->HostFeatures.SupportsAtomics) {
stsetl(ARMEmitter::SubRegSize::i64Bit, TMP1, TMP2);
}
else {
ARMEmitter::BackwardLabel LoopTop;
Bind(&LoopTop);
ldaxr(ARMEmitter::SubRegSize::i64Bit, TMP3, TMP2);
orr(ARMEmitter::Size::i32Bit, TMP3, TMP3, Src);
stlxr(ARMEmitter::SubRegSize::i64Bit, TMP3, TMP3, TMP2);
cbnz(ARMEmitter::Size::i32Bit, TMP3, &LoopTop);
}
#endif
}
#undef DEF_OP
}

View File

@ -10,6 +10,7 @@ desc: Main glue logic of the arm64 splatter backend
$end_info$
*/
#include "FEXCore/Utils/Telemetry.h"
#include "Interface/Context/Context.h"
#include "Interface/Core/ArchHelpers/CodeEmitter/Emitter.h"
#include "Interface/Core/LookupCache.h"
@ -622,7 +623,6 @@ Arm64JITCore::Arm64JITCore(FEXCore::Context::ContextImpl *ctx, FEXCore::Core::In
Common.SyscallHandlerFunc = reinterpret_cast<uint64_t>(FEXCore::Context::HandleSyscall);
Common.ExitFunctionLink = reinterpret_cast<uintptr_t>(&Context::ContextImpl::ThreadExitFunctionLink<Arm64JITCore_ExitFunctionLink>);
// Fill in the fallback handlers
InterpreterOps::FillFallbackIndexPointers(Common.FallbackHandlerPointers);
@ -915,6 +915,7 @@ CPUBackend::CompiledCode Arm64JITCore::CompileCode(uint64_t Entry,
REGISTER_OP(ATOMICFETCHOR, AtomicFetchOr);
REGISTER_OP(ATOMICFETCHXOR, AtomicFetchXor);
REGISTER_OP(ATOMICFETCHNEG, AtomicFetchNeg);
REGISTER_OP(TELEMETRYSETVALUE, TelemetrySetValue);
// Branch ops
REGISTER_OP(CALLBACKRETURN, CallbackReturn);

View File

@ -294,6 +294,7 @@ private:
DEF_OP(AtomicFetchOr);
DEF_OP(AtomicFetchXor);
DEF_OP(AtomicFetchNeg);
DEF_OP(TelemetrySetValue);
///< Branch ops
DEF_OP(CallbackReturn);

View File

@ -628,23 +628,38 @@ DEF_OP(AtomicFetchNeg) {
}
}
DEF_OP(TelemetrySetValue) {
#ifndef FEX_DISABLE_TELEMETRY
auto Op = IROp->C<IR::IROp_TelemetrySetValue>();
auto Src = GetSrc<RA_32>(Op->Value.ID());
xor_(TMP1, TMP1);
mov(TMP2, qword [STATE + offsetof(FEXCore::Core::CpuStateFrame, Pointers.Common.TelemetryValueAddresses[Op->TelemetryValueIndex])]);
test(Src, Src);
setne(TMP1.cvt8());
lock(); or_(qword [TMP2], TMP1);
#endif
}
#undef DEF_OP
void X86JITCore::RegisterAtomicHandlers() {
#define REGISTER_OP(op, x) OpHandlers[FEXCore::IR::IROps::OP_##op] = &X86JITCore::Op_##x
REGISTER_OP(CASPAIR, CASPair);
REGISTER_OP(CAS, CAS);
REGISTER_OP(ATOMICADD, AtomicAdd);
REGISTER_OP(ATOMICSUB, AtomicSub);
REGISTER_OP(ATOMICAND, AtomicAnd);
REGISTER_OP(ATOMICOR, AtomicOr);
REGISTER_OP(ATOMICXOR, AtomicXor);
REGISTER_OP(ATOMICSWAP, AtomicSwap);
REGISTER_OP(ATOMICFETCHADD, AtomicFetchAdd);
REGISTER_OP(ATOMICFETCHSUB, AtomicFetchSub);
REGISTER_OP(ATOMICFETCHAND, AtomicFetchAnd);
REGISTER_OP(ATOMICFETCHOR, AtomicFetchOr);
REGISTER_OP(ATOMICFETCHXOR, AtomicFetchXor);
REGISTER_OP(ATOMICFETCHNEG, AtomicFetchNeg);
REGISTER_OP(CASPAIR, CASPair);
REGISTER_OP(CAS, CAS);
REGISTER_OP(ATOMICADD, AtomicAdd);
REGISTER_OP(ATOMICSUB, AtomicSub);
REGISTER_OP(ATOMICAND, AtomicAnd);
REGISTER_OP(ATOMICOR, AtomicOr);
REGISTER_OP(ATOMICXOR, AtomicXor);
REGISTER_OP(ATOMICSWAP, AtomicSwap);
REGISTER_OP(ATOMICFETCHADD, AtomicFetchAdd);
REGISTER_OP(ATOMICFETCHSUB, AtomicFetchSub);
REGISTER_OP(ATOMICFETCHAND, AtomicFetchAnd);
REGISTER_OP(ATOMICFETCHOR, AtomicFetchOr);
REGISTER_OP(ATOMICFETCHXOR, AtomicFetchXor);
REGISTER_OP(ATOMICFETCHNEG, AtomicFetchNeg);
REGISTER_OP(TELEMETRYSETVALUE, TelemetrySetValue);
#undef REGISTER_OP
}
}

View File

@ -304,6 +304,7 @@ private:
DEF_OP(AtomicFetchOr);
DEF_OP(AtomicFetchXor);
DEF_OP(AtomicFetchNeg);
DEF_OP(TelemetrySetValue);
///< Branch ops
DEF_OP(CallbackReturn);

View File

@ -5,6 +5,7 @@ desc: Handles x86/64 ops to IR, no-pf opt, local-flags opt
$end_info$
*/
#include "FEXCore/Utils/Telemetry.h"
#include "Interface/Context/Context.h"
#include "Interface/Core/OpcodeDispatcher.h"
#include "Interface/Core/X86Tables/X86Tables.h"
@ -4817,22 +4818,32 @@ OrderedNode *OpDispatchBuilder::GetSegment(uint32_t Flags, uint32_t DefaultPrefi
Prefix = DefaultPrefix;
}
// With the segment register optimization we store the GDT bases directly in the segment register to remove indexed loads
OrderedNode *SegmentResult{};
switch (Prefix) {
case FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX:
return _LoadContext(GPRSize, GPRClass, offsetof(FEXCore::Core::CPUState, es_cached));
SegmentResult = _LoadContext(GPRSize, GPRClass, offsetof(FEXCore::Core::CPUState, es_cached));
break;
case FEXCore::X86Tables::DecodeFlags::FLAG_CS_PREFIX:
return _LoadContext(GPRSize, GPRClass, offsetof(FEXCore::Core::CPUState, cs_cached));
SegmentResult = _LoadContext(GPRSize, GPRClass, offsetof(FEXCore::Core::CPUState, cs_cached));
break;
case FEXCore::X86Tables::DecodeFlags::FLAG_SS_PREFIX:
return _LoadContext(GPRSize, GPRClass, offsetof(FEXCore::Core::CPUState, ss_cached));
SegmentResult = _LoadContext(GPRSize, GPRClass, offsetof(FEXCore::Core::CPUState, ss_cached));
break;
case FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX:
return _LoadContext(GPRSize, GPRClass, offsetof(FEXCore::Core::CPUState, ds_cached));
SegmentResult = _LoadContext(GPRSize, GPRClass, offsetof(FEXCore::Core::CPUState, ds_cached));
break;
case FEXCore::X86Tables::DecodeFlags::FLAG_FS_PREFIX:
return _LoadContext(GPRSize, GPRClass, offsetof(FEXCore::Core::CPUState, fs_cached));
SegmentResult = _LoadContext(GPRSize, GPRClass, offsetof(FEXCore::Core::CPUState, fs_cached));
break;
case FEXCore::X86Tables::DecodeFlags::FLAG_GS_PREFIX:
return _LoadContext(GPRSize, GPRClass, offsetof(FEXCore::Core::CPUState, gs_cached));
SegmentResult = _LoadContext(GPRSize, GPRClass, offsetof(FEXCore::Core::CPUState, gs_cached));
break;
default:
break; // Do nothing
}
CheckLegacySegmentRead(SegmentResult, Prefix);
return SegmentResult;
}
return nullptr;
}
@ -4846,11 +4857,87 @@ OrderedNode *OpDispatchBuilder::AppendSegmentOffset(OrderedNode *Value, uint32_t
return Value;
}
void OpDispatchBuilder::CheckLegacySegmentRead(OrderedNode *NewNode, uint32_t SegmentReg) {
#ifndef FEX_DISABLE_TELEMETRY
if (SegmentReg == FEXCore::X86Tables::DecodeFlags::FLAG_FS_PREFIX ||
SegmentReg == FEXCore::X86Tables::DecodeFlags::FLAG_GS_PREFIX) {
// FS and GS segments aren't considered legacy.
return;
}
if (!(SegmentsNeedReadCheck & SegmentReg)) {
// If the block has done multiple reads of a segment register then skip redundant read checks.
// Segment write will cause another read check.
return;
}
FEXCore::Telemetry::TelemetryType TelemIndex{};
switch (SegmentReg) {
case FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX:
TelemIndex = FEXCore::Telemetry::TelemetryType::TYPE_WRITES_32BIT_SEGMENT_ES;
SegmentsNeedReadCheck &= ~FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX;
break;
case FEXCore::X86Tables::DecodeFlags::FLAG_CS_PREFIX:
TelemIndex = FEXCore::Telemetry::TelemetryType::TYPE_WRITES_32BIT_SEGMENT_CS;
SegmentsNeedReadCheck &= ~FEXCore::X86Tables::DecodeFlags::FLAG_CS_PREFIX;
break;
case FEXCore::X86Tables::DecodeFlags::FLAG_SS_PREFIX:
TelemIndex = FEXCore::Telemetry::TelemetryType::TYPE_WRITES_32BIT_SEGMENT_SS;
SegmentsNeedReadCheck &= ~FEXCore::X86Tables::DecodeFlags::FLAG_SS_PREFIX;
break;
case FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX:
TelemIndex = FEXCore::Telemetry::TelemetryType::TYPE_WRITES_32BIT_SEGMENT_DS;
SegmentsNeedReadCheck &= ~FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX;
break;
default: FEX_UNREACHABLE;
}
// Will set the telemetry value if NewNode is != 0
_TelemetrySetValue(NewNode, TelemIndex);
#endif
}
void OpDispatchBuilder::CheckLegacySegmentWrite(OrderedNode *NewNode, uint32_t SegmentReg) {
#ifndef FEX_DISABLE_TELEMETRY
if (SegmentReg == FEXCore::X86Tables::DecodeFlags::FLAG_FS_PREFIX ||
SegmentReg == FEXCore::X86Tables::DecodeFlags::FLAG_GS_PREFIX) {
// FS and GS segments aren't considered legacy.
return;
}
FEXCore::Telemetry::TelemetryType TelemIndex{};
switch (SegmentReg) {
case FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX:
TelemIndex = FEXCore::Telemetry::TelemetryType::TYPE_WRITES_32BIT_SEGMENT_ES;
SegmentsNeedReadCheck |= FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX;
break;
case FEXCore::X86Tables::DecodeFlags::FLAG_CS_PREFIX:
TelemIndex = FEXCore::Telemetry::TelemetryType::TYPE_WRITES_32BIT_SEGMENT_CS;
SegmentsNeedReadCheck |= FEXCore::X86Tables::DecodeFlags::FLAG_CS_PREFIX;
break;
case FEXCore::X86Tables::DecodeFlags::FLAG_SS_PREFIX:
TelemIndex = FEXCore::Telemetry::TelemetryType::TYPE_WRITES_32BIT_SEGMENT_SS;
SegmentsNeedReadCheck |= FEXCore::X86Tables::DecodeFlags::FLAG_SS_PREFIX;
break;
case FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX:
TelemIndex = FEXCore::Telemetry::TelemetryType::TYPE_WRITES_32BIT_SEGMENT_DS;
SegmentsNeedReadCheck |= FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX;
break;
default: FEX_UNREACHABLE;
}
// Will set the telemetry value if NewNode is != 0
_TelemetrySetValue(NewNode, TelemIndex);
#endif
}
void OpDispatchBuilder::UpdatePrefixFromSegment(OrderedNode *Segment, uint32_t SegmentReg) {
// Use BFE to extract the selector index in bits [15,3] of the segment register.
// In some cases the upper 16-bits of the 32-bit GPR contain garbage to ignore.
Segment = _Bfe(4, 16 - 3, 3, Segment);
auto NewSegment = _LoadContextIndexed(Segment, 4, offsetof(FEXCore::Core::CPUState, gdt[0]), 4, GPRClass);
CheckLegacySegmentWrite(NewSegment, SegmentReg);
switch (SegmentReg) {
case FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX:
_StoreContext(4, GPRClass, NewSegment, offsetof(FEXCore::Core::CPUState, es_cached));

View File

@ -87,6 +87,9 @@ public:
// If we loaded flags but didn't change them, invalidate the cached copy and move on.
// Changes get stored out by CalculateDeferredFlags.
CachedNZCV = nullptr;
// New block needs to reset segment telemetry.
SegmentsNeedReadCheck = ~0U;
}
bool FinishOp(uint64_t NextRIP, bool LastOp) {
@ -1792,6 +1795,11 @@ private:
}
void InstallHostSpecificOpcodeHandlers();
///< Segment telemetry tracking
uint32_t SegmentsNeedReadCheck{~0U};
void CheckLegacySegmentWrite(OrderedNode *NewNode, uint32_t SegmentReg);
void CheckLegacySegmentRead(OrderedNode *NewNode, uint32_t SegmentReg);
};
void InstallOpcodeHandlers(Context::OperatingMode Mode);

View File

@ -673,6 +673,13 @@
"Dest is the value prior to operating on the value in memory"
],
"DestSize": "Size"
},
"TelemetrySetValue GPR:$Value, u8:$TelemetryValueIndex": {
"HasSideEffects": true,
"Desc": ["Set Telemetry value if the passed in 32-bit value isn't zero.",
"Only useful for 32-bit applications."
],
"DestSize": "8"
}
},
"ALU": {

View File

@ -24,6 +24,14 @@ namespace FEXCore::Telemetry {
"64bit CAS Tear",
"128bit CAS Tear",
"Crash mask",
"Write 32-bit Segment ES",
"Write 32-bit Segment SS",
"Write 32-bit Segment CS",
"Write 32-bit Segment DS",
"Uses 32-bit Segment ES",
"Uses 32-bit Segment SS",
"Uses 32-bit Segment CS",
"Uses 32-bit Segment DS",
};
void Initialize() {
auto DataDirectory = Config::GetDataDirectory();

View File

@ -2,6 +2,7 @@
#include <FEXCore/HLE/Linux/ThreadManagement.h>
#include <FEXCore/Utils/CompilerDefs.h>
#include <FEXCore/Utils/Telemetry.h>
#include <FEXCore/Core/CPUBackend.h>
#include <atomic>
@ -235,6 +236,9 @@ namespace FEXCore::Core {
uint64_t ExitFunctionLink{};
uint64_t FallbackHandlerPointers[FallbackHandlerIndex::OPINDEX_MAX];
#ifndef FEX_DISABLE_TELEMETRY
uint64_t TelemetryValueAddresses[FEXCore::Telemetry::TYPE_LAST];
#endif
// Thread Specific
/**

View File

@ -38,6 +38,16 @@ namespace FEXCore::Telemetry {
TYPE_CAS_64BIT_TEAR,
TYPE_CAS_128BIT_TEAR,
TYPE_CRASH_MASK,
// If a 32-bit application is writing a non-zero value to segments.
TYPE_WRITES_32BIT_SEGMENT_ES,
TYPE_WRITES_32BIT_SEGMENT_SS,
TYPE_WRITES_32BIT_SEGMENT_CS,
TYPE_WRITES_32BIT_SEGMENT_DS,
// If a 32-bit application is prefix/using a non-zero segment on memory access.
TYPE_USES_32BIT_SEGMENT_ES,
TYPE_USES_32BIT_SEGMENT_SS,
TYPE_USES_32BIT_SEGMENT_CS,
TYPE_USES_32BIT_SEGMENT_DS,
TYPE_LAST,
};