FEXCore: Changes ParentThread ownership from the CTX to the frontend, take 2

Similar to #3284 but works around some of the bugs that one introduced.

This is the minimal amount of changes to move the ownership from FEXCore
to the frontend. Since the frontends don't yet have a full thread state
tracking, there is an opaque pointer that needs to be managed.

In the followup commits this will be changed to have the syscall handler
to be the thread object manager.
This commit is contained in:
Ryan Houdek 2023-12-11 09:42:05 -08:00
parent 68d6cf5f14
commit aa2e8704bc
12 changed files with 93 additions and 56 deletions

View File

@ -71,7 +71,7 @@ namespace FEXCore::Context {
class ContextImpl final : public FEXCore::Context::Context {
public:
// Context base class implementation.
FEXCore::Core::InternalThreadState* InitCore(uint64_t InitialRIP, uint64_t StackPointer) override;
bool InitCore() override;
void SetExitHandler(ExitHandler handler) override;
ExitHandler GetExitHandler() const override;
@ -81,7 +81,7 @@ namespace FEXCore::Context {
void Stop() override;
void Step() override;
ExitReason RunUntilExit() override;
ExitReason RunUntilExit(FEXCore::Core::InternalThreadState *Thread) override;
void ExecuteThread(FEXCore::Core::InternalThreadState *Thread) override;
@ -128,7 +128,7 @@ namespace FEXCore::Context {
* - HandleCallback(Thread, RIP);
*/
FEXCore::Core::InternalThreadState* CreateThread(uint64_t InitialRIP, uint64_t StackPointer, FEXCore::Core::CPUState *NewThreadState, uint64_t ParentTID) override;
FEXCore::Core::InternalThreadState* CreateThread(uint64_t InitialRIP, uint64_t StackPointer, ManagedBy WhoManages, FEXCore::Core::CPUState *NewThreadState, uint64_t ParentTID) override;
// Public for threading
void ExecutionThread(FEXCore::Core::InternalThreadState *Thread) override;
@ -144,7 +144,7 @@ namespace FEXCore::Context {
*
* @param Thread The internal FEX thread state object
*/
void DestroyThread(FEXCore::Core::InternalThreadState *Thread) override;
void DestroyThread(FEXCore::Core::InternalThreadState *Thread, bool NeedsTLSUninstall) override;
#ifndef _WIN32
void LockBeforeFork(FEXCore::Core::InternalThreadState *Thread) override;
@ -178,7 +178,7 @@ namespace FEXCore::Context {
}
void InvalidateGuestCodeRange(FEXCore::Core::InternalThreadState *Thread, uint64_t Start, uint64_t Length) override;
void InvalidateGuestCodeRange(FEXCore::Core::InternalThreadState *Thread, uint64_t Start, uint64_t Length, CodeRangeInvalidationFn callback) override;
void MarkMemoryShared() override;
void MarkMemoryShared(FEXCore::Core::InternalThreadState *Thread) override;
void ConfigureAOTGen(FEXCore::Core::InternalThreadState *Thread, fextl::set<uint64_t> *ExternalBranches, uint64_t SectionMaxAddress) override;
// returns false if a handler was already registered

View File

@ -103,10 +103,6 @@ namespace FEXCore::Context {
}
ContextImpl::~ContextImpl() {
if (ParentThread) {
DestroyThread(ParentThread);
}
{
if (CodeObjectCacheService) {
CodeObjectCacheService->Shutdown();
@ -268,7 +264,7 @@ namespace FEXCore::Context {
Frame->State.flags[X86State::RFLAG_IF_LOC] = 1;
}
FEXCore::Core::InternalThreadState* ContextImpl::InitCore(uint64_t InitialRIP, uint64_t StackPointer) {
bool ContextImpl::InitCore() {
// Initialize the CPU core signal handlers & DispatcherConfig
switch (Config.Core) {
case FEXCore::Config::CONFIG_IRJIT:
@ -278,8 +274,8 @@ namespace FEXCore::Context {
// Do nothing
break;
default:
ERROR_AND_DIE_FMT("Unknown core configuration");
break;
LogMan::Msg::EFmt("Unknown core configuration");
return false;
}
DispatcherConfig.StaticRegisterAllocation = Config.StaticRegisterAllocation && BackendFeatures.SupportsStaticRegisterAllocation;
@ -330,12 +326,7 @@ namespace FEXCore::Context {
StartPaused = true;
}
FEXCore::Core::InternalThreadState *Thread = CreateThread(InitialRIP, StackPointer, nullptr, 0);
// We are the parent thread
ParentThread = Thread;
return Thread;
return true;
}
void ContextImpl::HandleCallback(FEXCore::Core::InternalThreadState *Thread, uint64_t RIP) {
@ -485,7 +476,7 @@ namespace FEXCore::Context {
}
}
FEXCore::Context::ExitReason ContextImpl::RunUntilExit() {
FEXCore::Context::ExitReason ContextImpl::RunUntilExit(FEXCore::Core::InternalThreadState *Thread) {
if(!StartPaused) {
// We will only have one thread at this point, but just in case run notify everything
std::lock_guard lk(ThreadCreationMutex);
@ -494,10 +485,10 @@ namespace FEXCore::Context {
}
}
ExecutionThread(ParentThread);
ExecutionThread(Thread);
while(true) {
this->WaitForIdle();
auto reason = ParentThread->ExitReason;
auto reason = Thread->ExitReason;
// Don't return if a custom exit handling the exit
if (!CustomExitHandler || reason == ExitReason::EXIT_SHUTDOWN) {
@ -575,7 +566,7 @@ namespace FEXCore::Context {
Thread->PassManager->Finalize();
}
FEXCore::Core::InternalThreadState* ContextImpl::CreateThread(uint64_t InitialRIP, uint64_t StackPointer, FEXCore::Core::CPUState *NewThreadState, uint64_t ParentTID) {
FEXCore::Core::InternalThreadState* ContextImpl::CreateThread(uint64_t InitialRIP, uint64_t StackPointer, ManagedBy WhoManages, FEXCore::Core::CPUState *NewThreadState, uint64_t ParentTID) {
FEXCore::Core::InternalThreadState *Thread = new FEXCore::Core::InternalThreadState{};
Thread->CurrentFrame->State.gregs[X86State::REG_RSP] = StackPointer;
@ -594,9 +585,10 @@ namespace FEXCore::Context {
Thread->CurrentFrame->State.DeferredSignalRefCount.Store(0);
Thread->CurrentFrame->State.DeferredSignalFaultAddress = reinterpret_cast<Core::NonAtomicRefCounter<uint64_t>*>(FEXCore::Allocator::VirtualAlloc(4096));
Thread->DestroyedByParent = WhoManages == ManagedBy::FRONTEND;
// Insert after the Thread object has been fully initialized
{
if (WhoManages == ManagedBy::CORE) {
std::lock_guard lk(ThreadCreationMutex);
Threads.push_back(Thread);
}
@ -604,15 +596,19 @@ namespace FEXCore::Context {
return Thread;
}
void ContextImpl::DestroyThread(FEXCore::Core::InternalThreadState *Thread) {
void ContextImpl::DestroyThread(FEXCore::Core::InternalThreadState *Thread, bool NeedsTLSUninstall) {
// remove new thread object
{
std::lock_guard lk(ThreadCreationMutex);
auto It = std::find(Threads.begin(), Threads.end(), Thread);
LOGMAN_THROW_A_FMT(It != Threads.end(), "Thread wasn't in Threads");
// TODO: Some threads aren't currently tracked in FEXCore.
// Re-enable once tracking is in frontend.
// LOGMAN_THROW_A_FMT(It != Threads.end(), "Thread wasn't in Threads");
Threads.erase(It);
if (It != Threads.end()) {
Threads.erase(It);
}
}
if (Thread->ExecutionThread &&
@ -621,6 +617,14 @@ namespace FEXCore::Context {
Thread->ExecutionThread->detach();
}
// TODO: This is temporary until the frontend has full ownership of threads.
if (NeedsTLSUninstall) {
#ifndef _WIN32
Alloc::OSAllocator::UninstallTLSData(Thread);
#endif
SignalDelegation->UninstallTLSState(Thread);
}
FEXCore::Allocator::VirtualFree(reinterpret_cast<void*>(Thread->CurrentFrame->State.DeferredSignalFaultAddress), 4096);
delete Thread;
}
@ -1113,7 +1117,7 @@ namespace FEXCore::Context {
// Now notify the thread that we are initialized
Thread->ThreadWaiting.NotifyAll();
if (Thread != static_cast<ContextImpl*>(Thread->CTX)->ParentThread || StartPaused || Thread->StartPaused) {
if (StartPaused || Thread->StartPaused) {
// Parent thread doesn't need to wait to run
Thread->StartRunning.Wait();
}
@ -1157,7 +1161,7 @@ namespace FEXCore::Context {
SignalDelegation->UninstallTLSState(Thread);
// If the parent thread is waiting to join, then we can't destroy our thread object
if (!Thread->DestroyedByParent && Thread != static_cast<ContextImpl*>(Thread->CTX)->ParentThread) {
if (!Thread->DestroyedByParent) {
Thread->CTX->DestroyThread(Thread);
}
}
@ -1176,12 +1180,21 @@ namespace FEXCore::Context {
}
}
static void InvalidateGuestCodeRangeInternal(ContextImpl *CTX, uint64_t Start, uint64_t Length) {
static void InvalidateGuestCodeRangeInternal(FEXCore::Core::InternalThreadState *CallingThread, ContextImpl *CTX, uint64_t Start, uint64_t Length) {
std::lock_guard lk(static_cast<ContextImpl*>(CTX)->ThreadCreationMutex);
for (auto &Thread : static_cast<ContextImpl*>(CTX)->Threads) {
// TODO: Skip calling thread.
// Remove once frontend has thread ownership.
if (CallingThread == Thread) continue;
InvalidateGuestThreadCodeRange(Thread, Start, Length);
}
// Now invalidate calling thread's code.
if (CallingThread) {
InvalidateGuestThreadCodeRange(CallingThread, Start, Length);
}
}
void ContextImpl::InvalidateGuestCodeRange(FEXCore::Core::InternalThreadState *Thread, uint64_t Start, uint64_t Length) {
@ -1190,7 +1203,7 @@ namespace FEXCore::Context {
// To be more optimal the frontend should provide this code with a valid Thread object earlier.
auto lk = GuardSignalDeferringSectionWithFallback(CodeInvalidationMutex, Thread);
InvalidateGuestCodeRangeInternal(this, Start, Length);
InvalidateGuestCodeRangeInternal(Thread, this, Start, Length);
}
void ContextImpl::InvalidateGuestCodeRange(FEXCore::Core::InternalThreadState *Thread, uint64_t Start, uint64_t Length, CodeRangeInvalidationFn CallAfter) {
@ -1199,20 +1212,17 @@ namespace FEXCore::Context {
// To be more optimal the frontend should provide this code with a valid Thread object earlier.
auto lk = GuardSignalDeferringSectionWithFallback(CodeInvalidationMutex, Thread);
InvalidateGuestCodeRangeInternal(this, Start, Length);
InvalidateGuestCodeRangeInternal(Thread, this, Start, Length);
CallAfter(Start, Length);
}
void ContextImpl::MarkMemoryShared() {
void ContextImpl::MarkMemoryShared(FEXCore::Core::InternalThreadState *Thread) {
if (!IsMemoryShared) {
IsMemoryShared = true;
UpdateAtomicTSOEmulationConfig();
if (Config.TSOAutoMigration) {
std::lock_guard<std::mutex> lkThreads(ThreadCreationMutex);
LogMan::Throw::AFmt(Threads.size() == 1, "First MarkMemoryShared called must be before creating any threads");
auto Thread = Threads[0];
// Only the lookup cache is cleared here, so that old code can keep running until next compilation
std::lock_guard<std::recursive_mutex> lkLookupCache(Thread->LookupCache->WriteLock);

View File

@ -124,7 +124,7 @@ namespace FEXCore::Context {
*
* @return true if we loaded code
*/
FEX_DEFAULT_VISIBILITY virtual FEXCore::Core::InternalThreadState* InitCore(uint64_t InitialRIP, uint64_t StackPointer) = 0;
FEX_DEFAULT_VISIBILITY virtual bool InitCore() = 0;
FEX_DEFAULT_VISIBILITY virtual void SetExitHandler(ExitHandler handler) = 0;
FEX_DEFAULT_VISIBILITY virtual ExitHandler GetExitHandler() const = 0;
@ -181,7 +181,7 @@ namespace FEXCore::Context {
*
* @return The ExitReason for the parentthread.
*/
FEX_DEFAULT_VISIBILITY virtual ExitReason RunUntilExit() = 0;
FEX_DEFAULT_VISIBILITY virtual ExitReason RunUntilExit(FEXCore::Core::InternalThreadState *Thread) = 0;
/**
* @brief Executes the supplied thread context on the current thread until a return is requested
@ -248,17 +248,24 @@ namespace FEXCore::Context {
*
* @param InitialRIP The starting RIP of this thread
* @param StackPointer The starting RSP of this thread
* @param WhoManages The flag to determine what manages ownership of the InternalThreadState object
* @param NewThreadState The thread state to inherit from if not nullptr.
* @param ParentTID The thread ID that the parent is inheriting from
*
* @return A new InternalThreadState object for using with a new guest thread.
*/
FEX_DEFAULT_VISIBILITY virtual FEXCore::Core::InternalThreadState* CreateThread(uint64_t InitialRIP, uint64_t StackPointer, FEXCore::Core::CPUState *NewThreadState = nullptr, uint64_t ParentTID = 0) = 0;
// TODO: This is a temporary construct and will be removed once the frontend has full ownership of InternalThreadState objects.
enum class [[deprecated]] ManagedBy {
CORE,
FRONTEND,
};
FEX_DEFAULT_VISIBILITY virtual FEXCore::Core::InternalThreadState* CreateThread(uint64_t InitialRIP, uint64_t StackPointer, ManagedBy WhoManages, FEXCore::Core::CPUState *NewThreadState = nullptr, uint64_t ParentTID = 0) = 0;
FEX_DEFAULT_VISIBILITY virtual void ExecutionThread(FEXCore::Core::InternalThreadState *Thread) = 0;
FEX_DEFAULT_VISIBILITY virtual void RunThread(FEXCore::Core::InternalThreadState *Thread) = 0;
FEX_DEFAULT_VISIBILITY virtual void StopThread(FEXCore::Core::InternalThreadState *Thread) = 0;
FEX_DEFAULT_VISIBILITY virtual void DestroyThread(FEXCore::Core::InternalThreadState *Thread) = 0;
FEX_DEFAULT_VISIBILITY virtual void DestroyThread(FEXCore::Core::InternalThreadState *Thread, bool NeedsTLSUninstall = false) = 0;
#ifndef _WIN32
FEX_DEFAULT_VISIBILITY virtual void LockBeforeFork(FEXCore::Core::InternalThreadState *Thread) {}
FEX_DEFAULT_VISIBILITY virtual void UnlockAfterFork(FEXCore::Core::InternalThreadState *Thread, bool Child) {}
@ -281,7 +288,7 @@ namespace FEXCore::Context {
FEX_DEFAULT_VISIBILITY virtual void WriteFilesWithCode(AOTIRCodeFileWriterFn Writer) = 0;
FEX_DEFAULT_VISIBILITY virtual void InvalidateGuestCodeRange(FEXCore::Core::InternalThreadState *Thread, uint64_t Start, uint64_t Length) = 0;
FEX_DEFAULT_VISIBILITY virtual void InvalidateGuestCodeRange(FEXCore::Core::InternalThreadState *Thread, uint64_t Start, uint64_t Length, CodeRangeInvalidationFn callback) = 0;
FEX_DEFAULT_VISIBILITY virtual void MarkMemoryShared() = 0;
FEX_DEFAULT_VISIBILITY virtual void MarkMemoryShared(FEXCore::Core::InternalThreadState *Thread) = 0;
FEX_DEFAULT_VISIBILITY virtual void ConfigureAOTGen(FEXCore::Core::InternalThreadState *Thread, fextl::set<uint64_t> *ExternalBranches, uint64_t SectionMaxAddress) = 0;
FEX_DEFAULT_VISIBILITY virtual CustomIRResult AddCustomIREntrypoint(uintptr_t Entrypoint, CustomIREntrypointHandler Handler, void *Creator = nullptr, void *Data = nullptr) = 0;

View File

@ -558,11 +558,16 @@ int main(int argc, char **argv, char **const envp) {
CTX->SetSignalDelegator(SignalDelegation.get());
CTX->SetSyscallHandler(SyscallHandler.get());
auto ParentThread = CTX->InitCore(0, 0);
if (!CTX->InitCore()) {
return -1;
}
auto ParentThread = CTX->CreateThread(0, 0, FEXCore::Context::Context::ManagedBy::FRONTEND);
// Calculate the base stats for instruction testing.
CodeSize::Validation.CalculateBaseStats(CTX.get(), ParentThread);
// Test all the instructions.
return TestInstructions(CTX.get(), ParentThread, argc >= 2 ? argv[2] : nullptr) ? 0 : 1;
auto Result = TestInstructions(CTX.get(), ParentThread, argc >= 2 ? argv[2] : nullptr) ? 0 : 1;
CTX->DestroyThread(ParentThread);
return Result;
}

View File

@ -106,7 +106,7 @@ void AOTGenSection(FEXCore::Context::Context *CTX, ELFCodeLoader::LoadedSection
setpriority(PRIO_PROCESS, FHU::Syscalls::gettid(), 19);
// Setup thread - Each compilation thread uses its own backing FEX thread
auto Thread = CTX->CreateThread(0, 0);
auto Thread = CTX->CreateThread(0, 0, FEXCore::Context::Context::ManagedBy::FRONTEND);
fextl::set<uint64_t> ExternalBranchesLocal;
CTX->ConfigureAOTGen(Thread, &ExternalBranchesLocal, SectionMaxAddress);

View File

@ -486,7 +486,11 @@ int main(int argc, char **argv, char **const envp) {
DebugServer = fextl::make_unique<FEX::GdbServer>(CTX.get(), SignalDelegation.get(), SyscallHandler.get());
}
auto ParentThread = CTX->InitCore(Loader.DefaultRIP(), Loader.GetStackPointer());
if (!CTX->InitCore()) {
return 1;
}
auto ParentThread = CTX->CreateThread(Loader.DefaultRIP(), Loader.GetStackPointer(), FEXCore::Context::Context::ManagedBy::FRONTEND);
// Pass in our VDSO thunks
CTX->AppendThunkDefinitions(FEX::VDSO::GetVDSOThunkDefinitions());
@ -541,7 +545,7 @@ int main(int argc, char **argv, char **const envp) {
FEX::AOT::AOTGenSection(CTX.get(), Section);
}
} else {
CTX->RunUntilExit();
CTX->RunUntilExit(ParentThread);
}
if (AOTEnabled) {
@ -564,6 +568,8 @@ int main(int argc, char **argv, char **const envp) {
auto ProgramStatus = ParentThread->StatusCode;
CTX->DestroyThread(ParentThread);
DebugServer.reset();
SyscallHandler.reset();
SignalDelegation.reset();

View File

@ -178,7 +178,10 @@ int main(int argc, char **argv, char **const envp)
if (Loader.LoadIR(CTX.get()))
{
auto ParentThread = CTX->InitCore(Loader.DefaultRIP(), Loader.GetStackPointer());
if (!CTX->InitCore()) {
return -1;
}
auto ParentThread = CTX->CreateThread(Loader.DefaultRIP(), Loader.GetStackPointer(), FEXCore::Context::Context::ManagedBy::FRONTEND);
auto ShutdownReason = FEXCore::Context::ExitReason::EXIT_SHUTDOWN;
@ -204,7 +207,7 @@ int main(int argc, char **argv, char **const envp)
LongJumpVal = setjmp(LongJump);
if (!LongJumpVal) {
CTX->RunUntilExit();
CTX->RunUntilExit(ParentThread);
}
LogMan::Msg::DFmt("Reason we left VM: {}", FEXCore::ToUnderlying(ShutdownReason));
@ -215,6 +218,7 @@ int main(int argc, char **argv, char **const envp)
LogMan::Msg::IFmt("Passed? {}\n", Passed ? "Yes" : "No");
Return = Passed ? 0 : -1;
CTX->DestroyThread(ParentThread);
}
else
{

View File

@ -573,7 +573,7 @@ uint64_t CloneHandler(FEXCore::Core::CpuStateFrame *Frame, FEX::HLE::clone3_args
};
if (flags & CLONE_VM) {
Frame->Thread->CTX->MarkMemoryShared();
Frame->Thread->CTX->MarkMemoryShared(Frame->Thread);
}
// If there are flags that can't be handled regularly then we need to hand off to the true clone handler

View File

@ -73,7 +73,7 @@ namespace FEX::HLE {
NewThreadState.gregs[FEXCore::X86State::REG_RSP] = args->args.stack;
}
auto NewThread = CTX->CreateThread(0, 0, &NewThreadState, args->args.parent_tid);
auto NewThread = CTX->CreateThread(0, 0, FEXCore::Context::Context::ManagedBy::CORE, &NewThreadState, args->args.parent_tid);
if (FEX::HLE::_SyscallHandler->Is64BitMode()) {
if (flags & CLONE_SETTLS) {
@ -166,7 +166,7 @@ namespace FEX::HLE {
}
// Overwrite thread
NewThread = CTX->CreateThread(0, 0, &NewThreadState, GuestArgs->parent_tid);
NewThread = CTX->CreateThread(0, 0, FEXCore::Context::Context::ManagedBy::CORE, &NewThreadState, GuestArgs->parent_tid);
// CLONE_PARENT_SETTID, CLONE_CHILD_SETTID, CLONE_CHILD_CLEARTID, CLONE_PIDFD will be handled by kernel
// Call execution thread directly since we already are on the new thread

View File

@ -187,7 +187,7 @@ void SyscallHandler::TrackMmap(FEXCore::Core::InternalThreadState *Thread, uintp
Size = FEXCore::AlignUp(Size, FHU::FEX_PAGE_SIZE);
if (Flags & MAP_SHARED) {
CTX->MarkMemoryShared();
CTX->MarkMemoryShared(Thread);
}
{
@ -322,7 +322,7 @@ void SyscallHandler::TrackMremap(FEXCore::Core::InternalThreadState *Thread, uin
}
void SyscallHandler::TrackShmat(FEXCore::Core::InternalThreadState *Thread, int shmid, uintptr_t Base, int shmflg) {
CTX->MarkMemoryShared();
CTX->MarkMemoryShared(Thread);
shmid_ds stat;

View File

@ -301,7 +301,10 @@ int main(int argc, char **argv, char **const envp) {
CTX->SetSignalDelegator(SignalDelegation.get());
CTX->SetSyscallHandler(SyscallHandler.get());
auto ParentThread = CTX->InitCore(Loader.DefaultRIP(), Loader.GetStackPointer());
if (!CTX->InitCore()) {
return 1;
}
auto ParentThread = CTX->CreateThread(Loader.DefaultRIP(), Loader.GetStackPointer(), FEXCore::Context::Context::ManagedBy::FRONTEND);
if (!ParentThread) {
return 1;
@ -309,13 +312,15 @@ int main(int argc, char **argv, char **const envp) {
int LongJumpVal = setjmp(LongJumpHandler::LongJump);
if (!LongJumpVal) {
CTX->RunUntilExit();
CTX->RunUntilExit(ParentThread);
}
// Just re-use compare state. It also checks against the expected values in config.
memcpy(&State, &ParentThread->CurrentFrame->State, sizeof(State));
SyscallHandler.reset();
CTX->DestroyThread(ParentThread, true);
}
#ifndef _WIN32
else {

View File

@ -517,7 +517,7 @@ void BTCpuProcessInit() {
CTX = FEXCore::Context::Context::CreateNewContext();
CTX->SetSignalDelegator(SignalDelegator.get());
CTX->SetSyscallHandler(SyscallHandler.get());
CTX->InitCore(0, 0);
CTX->InitCore();
CpuInfo.ProcessorArchitecture = PROCESSOR_ARCHITECTURE_INTEL;
@ -554,7 +554,7 @@ void BTCpuProcessInit() {
}
NTSTATUS BTCpuThreadInit() {
GetTLS().ThreadState() = CTX->CreateThread(0, 0);
GetTLS().ThreadState() = CTX->CreateThread(0, 0, FEXCore::Context::Context::ManagedBy::FRONTEND);
std::scoped_lock Lock(ThreadSuspendLock);
InitializedWOWThreads.emplace(GetCurrentThreadId());