diff --git a/External/FEXCore/Source/Interface/Config/Config.cpp b/External/FEXCore/Source/Interface/Config/Config.cpp index 407f5969d..69f049eb9 100644 --- a/External/FEXCore/Source/Interface/Config/Config.cpp +++ b/External/FEXCore/Source/Interface/Config/Config.cpp @@ -36,6 +36,9 @@ namespace FEXCore::Config { case FEXCore::Config::CONFIG_TSO_ENABLED: CTX->Config.TSOEnabled = Config != 0; break; + case FEXCore::Config::CONFIG_SMC_CHECKS: + CTX->Config.SMCChecks = Config != 0; + break; default: LogMan::Msg::A("Unknown configuration option"); } } @@ -83,6 +86,9 @@ namespace FEXCore::Config { case FEXCore::Config::CONFIG_TSO_ENABLED: return CTX->Config.TSOEnabled; break; + case FEXCore::Config::CONFIG_SMC_CHECKS: + return CTX->Config.SMCChecks; + break; default: LogMan::Msg::A("Unknown configuration option"); } diff --git a/External/FEXCore/Source/Interface/Context/Context.h b/External/FEXCore/Source/Interface/Context/Context.h index 795117ee5..459f86e08 100644 --- a/External/FEXCore/Source/Interface/Context/Context.h +++ b/External/FEXCore/Source/Interface/Context/Context.h @@ -60,6 +60,7 @@ namespace FEXCore::Context { bool Is64BitMode {true}; uint64_t EmulatedCPUCores{1}; bool TSOEnabled {true}; + bool SMCChecks {false}; } Config; FEXCore::Memory::MemMapper MemoryMapper; @@ -113,6 +114,8 @@ namespace FEXCore::Context { void StopGdbServer(); void HandleCallback(uint64_t RIP); + static void RemoveCodeEntry(FEXCore::Core::InternalThreadState *Thread, uint64_t GuestRIP); + // Debugger interface void CompileRIP(FEXCore::Core::InternalThreadState *Thread, uint64_t RIP); uint64_t GetThreadCount() const; diff --git a/External/FEXCore/Source/Interface/Core/Core.cpp b/External/FEXCore/Source/Interface/Core/Core.cpp index 39c0ddb1d..355ee489f 100644 --- a/External/FEXCore/Source/Interface/Core/Core.cpp +++ b/External/FEXCore/Source/Interface/Core/Core.cpp @@ -647,6 +647,37 @@ namespace FEXCore::Context { TableInfo = Block.DecodedInstructions[i].TableInfo; DecodedInfo = &Block.DecodedInstructions[i]; + if (Config.SMCChecks) { + __uint128_t existing; + + uintptr_t ExistingCodePtr{}; + + if (Thread->CTX->Config.UnifiedMemory) { + ExistingCodePtr = reinterpret_cast(Block.Entry + BlockInstructionsLength); + } + else { + ExistingCodePtr = MemoryMapper.GetPointer(Block.Entry + BlockInstructionsLength); + } + + memcpy(&existing, (void*)(ExistingCodePtr), DecodedInfo->InstSize); + auto CodeChanged = Thread->OpDispatcher->_ValidateCode(existing, ExistingCodePtr, DecodedInfo->InstSize); + + auto InvalidateCodeCond = Thread->OpDispatcher->_CondJump(CodeChanged); + + auto CodeWasChangedBlock = Thread->OpDispatcher->CreateNewCodeBlock(); + Thread->OpDispatcher->SetTrueJumpTarget(InvalidateCodeCond, CodeWasChangedBlock); + + Thread->OpDispatcher->SetCurrentCodeBlock(CodeWasChangedBlock); + Thread->OpDispatcher->_RemoveCodeEntry(GuestRIP); + Thread->OpDispatcher->_StoreContext(IR::GPRClass, 8, offsetof(FEXCore::Core::CPUState, rip), Thread->OpDispatcher->_Constant(Block.Entry + BlockInstructionsLength)); + Thread->OpDispatcher->_ExitFunction(); + + auto NextOpBlock = Thread->OpDispatcher->CreateNewCodeBlock(); + + Thread->OpDispatcher->SetFalseJumpTarget(InvalidateCodeCond, NextOpBlock); + Thread->OpDispatcher->SetCurrentCodeBlock(NextOpBlock); + } + if (TableInfo->OpcodeDispatcher) { auto Fn = TableInfo->OpcodeDispatcher; std::invoke(Fn, Thread->OpDispatcher, DecodedInfo); @@ -799,6 +830,12 @@ namespace FEXCore::Context { SignalDelegation.UninstallTLSState(Thread); } + void Context::RemoveCodeEntry(FEXCore::Core::InternalThreadState *Thread, uint64_t GuestRIP) { + Thread->IRLists.erase(GuestRIP); + Thread->DebugData.erase(GuestRIP); + Thread->BlockCache->Erase(GuestRIP); + } + // Debug interface void Context::CompileRIP(FEXCore::Core::InternalThreadState *Thread, uint64_t RIP) { uint64_t RIPBackup = Thread->State.State.rip; diff --git a/External/FEXCore/Source/Interface/Core/Interpreter/InterpreterCore.cpp b/External/FEXCore/Source/Interface/Core/Interpreter/InterpreterCore.cpp index 183edadb7..005bd5559 100644 --- a/External/FEXCore/Source/Interface/Core/Interpreter/InterpreterCore.cpp +++ b/External/FEXCore/Source/Interface/Core/Interpreter/InterpreterCore.cpp @@ -138,6 +138,23 @@ void InterpreterCore::ExecuteCode(FEXCore::Core::InternalThreadState *Thread) { uint32_t Node = WrapperOp.ID(); switch (IROp->Op) { + case IR::OP_VALIDATECODE: { + auto Op = IROp->C(); + + if (memcmp((void*)Op->CodePtr, &Op->CodeOriginal, Op->CodeLength) != 0) { + GD = 1; + } else { + GD = 0; + } + break; + } + + case IR::OP_REMOVECODEENTRY: { + auto Op = IROp->C(); + CTX->RemoveCodeEntry(Thread, Op->RIP); + break; + } + case IR::OP_DUMMY: case IR::OP_BEGINBLOCK: break; diff --git a/External/FEXCore/Source/Interface/Core/Interpreter/x86_64Dispatcher.cpp b/External/FEXCore/Source/Interface/Core/Interpreter/x86_64Dispatcher.cpp index ab8fe16b4..bda7ab527 100644 --- a/External/FEXCore/Source/Interface/Core/Interpreter/x86_64Dispatcher.cpp +++ b/External/FEXCore/Source/Interface/Core/Interpreter/x86_64Dispatcher.cpp @@ -115,6 +115,11 @@ DispatchGenerator::DispatchGenerator(FEXCore::Context::Context *ctx, FEXCore::Co shl(rax, (int)log2(sizeof(FEXCore::BlockCache::BlockCacheEntry))); + // check for aliasing + mov(rcx, qword [rdi + rax + 8]); + cmp(rcx, rdx); + jne(NoBlock); + // Load the block pointer mov(rax, qword [rdi + rax]); diff --git a/External/FEXCore/Source/Interface/Core/JIT/Arm64/BranchOps.cpp b/External/FEXCore/Source/Interface/Core/JIT/Arm64/BranchOps.cpp index 4218616d7..6fb6e0cb4 100644 --- a/External/FEXCore/Source/Interface/Core/JIT/Arm64/BranchOps.cpp +++ b/External/FEXCore/Source/Interface/Core/JIT/Arm64/BranchOps.cpp @@ -184,6 +184,82 @@ DEF_OP(Thunk) { add(sp, sp, SPOffset); } + +DEF_OP(ValidateCode) { + auto Op = IROp->C(); + uint8_t *NewCode = (uint8_t *)Op->CodePtr; + uint8_t *OldCode = (uint8_t *)&Op->CodeOriginal; + int len = Op->CodeLength; + int idx = 0; + + LoadConstant(GetReg(Node), 0); + LoadConstant(x0, Op->CodePtr); + LoadConstant(x1, 1); + + while (len >= 4) + { + ldr(w2, MemOperand(x0, idx)); + LoadConstant(w3, *(uint32_t *)(OldCode + idx)); + cmp(w2, w3); + csel(GetReg(Node), GetReg(Node), x1, Condition::eq); + len -= 4; + idx += 4; + } + while (len >= 2) + { + ldrh(w2, MemOperand(x0, idx)); + LoadConstant(w3, *(uint16_t *)(OldCode + idx)); + cmp(w2, w3); + csel(GetReg(Node), GetReg(Node), x1, Condition::eq); + len -= 2; + idx += 2; + } + while (len >= 1) + { + ldrb(w2, MemOperand(x0, idx)); + LoadConstant(w3, *(uint8_t *)(OldCode + idx)); + cmp(w2, w3); + csel(GetReg(Node), GetReg(Node), x1, Condition::eq); + len -= 1; + idx += 1; + } +} + +DEF_OP(RemoveCodeEntry) { + auto Op = IROp->C(); + // Arguments are passed as follows: + // X0: Thread + // X1: RIP + + uint64_t SPOffset = AlignUp((RA64.size() + 1) * 8, 16); + + sub(sp, sp, SPOffset); + + int i = 0; + for (auto RA : RA64) { + str(RA, MemOperand(sp, i * 8)); + i++; + } + str(lr, MemOperand(sp, RA64.size() * 8 + 0 * 8)); + + mov(x0, STATE); + LoadConstant(x1, Op->RIP); + + LoadConstant(x2, reinterpret_cast(&Context::Context::RemoveCodeEntry)); + blr(x2); + + // Fix the stack and any values that were stepped on + i = 0; + for (auto RA : RA64) { + ldr(RA, MemOperand(sp, i * 8)); + i++; + } + + ldr(lr, MemOperand(sp, RA64.size() * 8 + 0 * 8)); + + add(sp, sp, SPOffset); +} + DEF_OP(CPUID) { auto Op = IROp->C(); uint64_t SPOffset = AlignUp((RA64.size() + 2 + 2) * 8, 16); @@ -243,6 +319,8 @@ void JITCore::RegisterBranchHandlers() { REGISTER_OP(CONDJUMP, CondJump); REGISTER_OP(SYSCALL, Syscall); REGISTER_OP(THUNK, Thunk); + REGISTER_OP(VALIDATECODE, ValidateCode); + REGISTER_OP(REMOVECODEENTRY, RemoveCodeEntry); REGISTER_OP(CPUID, CPUID); #undef REGISTER_OP } diff --git a/External/FEXCore/Source/Interface/Core/JIT/Arm64/JITClass.h b/External/FEXCore/Source/Interface/Core/JIT/Arm64/JITClass.h index 25b0bf329..ebb9f620f 100644 --- a/External/FEXCore/Source/Interface/Core/JIT/Arm64/JITClass.h +++ b/External/FEXCore/Source/Interface/Core/JIT/Arm64/JITClass.h @@ -309,6 +309,8 @@ private: DEF_OP(CondJump); DEF_OP(Syscall); DEF_OP(Thunk); + DEF_OP(ValidateCode); + DEF_OP(RemoveCodeEntry); DEF_OP(CPUID); ///< Conversion ops diff --git a/External/FEXCore/Source/Interface/Core/JIT/x86_64/JIT.cpp b/External/FEXCore/Source/Interface/Core/JIT/x86_64/JIT.cpp index 648554570..9c74749d8 100644 --- a/External/FEXCore/Source/Interface/Core/JIT/x86_64/JIT.cpp +++ b/External/FEXCore/Source/Interface/Core/JIT/x86_64/JIT.cpp @@ -4845,6 +4845,65 @@ void *JITCore::CompileCode([[maybe_unused]] FEXCore::IR::IRListView const } break; } + case IR::OP_VALIDATECODE: + { + auto Op = IROp->C(); + uint8_t* NewCode = (uint8_t*)Op->CodePtr; + uint8_t* OldCode = (uint8_t*)&Op->CodeOriginal; + int len = Op->CodeLength; + int idx = 0; + + xor_(GetDst(Node), GetDst(Node)); + mov(rax, Op->CodePtr); + mov(rbx, 1); + while (len >= 4) { + cmp(dword[rax + idx], *(uint32_t*)(OldCode + idx)); + cmovne(GetDst(Node), rbx); + len-=4; + idx+=4; + } + while (len >= 2) { + mov(rcx, *(uint16_t*)(OldCode + idx)); + cmp(word[rax + idx], cx); + cmovne(GetDst(Node), rbx); + len-=2; + idx+=2; + } + while (len >= 1) { + cmp(byte[rax + idx], *(uint8_t*)(OldCode + idx)); + cmovne(GetDst(Node), rbx); + len-=1; + idx+=1; + } + break; + } + case IR::OP_REMOVECODEENTRY: { + auto Op = IROp->C(); + + auto NumPush = RA64.size(); + + for (auto &Reg : RA64) + push(Reg); + + if (NumPush & 1) + sub(rsp, 8); // Align + + mov(rdi, STATE); + mov(rax, Op->RIP); // imm64 move + mov(rsi, rax); + + + mov(rax, reinterpret_cast(&Context::Context::RemoveCodeEntry)); + call(rax); + + if (NumPush & 1) + add(rsp, 8); // Align + + for (uint32_t i = RA64.size(); i > 0; --i) + pop(RA64[i - 1]); + + break; + } case IR::OP_DUMMY: case IR::OP_IRHEADER: case IR::OP_PHIVALUE: @@ -4971,6 +5030,11 @@ void JITCore::CreateCustomDispatch(FEXCore::Core::InternalThreadState *Thread) { shl(rax, (int)log2(sizeof(FEXCore::BlockCache::BlockCacheEntry))); + // check for aliasing + mov(rcx, qword [rdi + rax + 8]); + cmp(rcx, rdx); + jne(NoBlock); + // Load the block pointer mov(rax, qword [rdi + rax]); diff --git a/External/FEXCore/Source/Interface/IR/IR.json b/External/FEXCore/Source/Interface/IR/IR.json index 43a7eea77..bcf7b2a8f 100644 --- a/External/FEXCore/Source/Interface/IR/IR.json +++ b/External/FEXCore/Source/Interface/IR/IR.json @@ -86,6 +86,27 @@ ] }, + "ValidateCode": { + "HasSideEffects": true, + "OpClass": "Misc", + "HasDest": true, + "DestClass": "GPR", + "DestSize": "8", + "Args": [ + "__uint128_t", "CodeOriginal", + "uint64_t", "CodePtr", + "uint8_t", "CodeLength" + ] + }, + + "RemoveCodeEntry": { + "HasSideEffects": true, + "OpClass": "Misc", + "Args": [ + "uint64_t", "RIP" + ] + }, + "GuestCallDirect": { "OpClass": "Branch", "Args": [ diff --git a/External/FEXCore/include/FEXCore/Config/Config.h b/External/FEXCore/include/FEXCore/Config/Config.h index a4a1d8dda..112468719 100644 --- a/External/FEXCore/include/FEXCore/Config/Config.h +++ b/External/FEXCore/include/FEXCore/Config/Config.h @@ -17,6 +17,7 @@ namespace FEXCore::Config { CONFIG_IS64BIT_MODE, CONFIG_EMULATED_CPU_CORES, CONFIG_TSO_ENABLED, + CONFIG_SMC_CHECKS }; enum ConfigCore { diff --git a/Source/Common/ArgumentLoader.cpp b/Source/Common/ArgumentLoader.cpp index c0bad958f..14d565df8 100644 --- a/Source/Common/ArgumentLoader.cpp +++ b/Source/Common/ArgumentLoader.cpp @@ -68,6 +68,12 @@ namespace FEX::ArgLoader { .help("Disables TSO IR ops. Highly likely to break any threaded application") .set_default(true); + CPUGroup.add_option("--smc-full-checks") + .dest("SMCChecks") + .action("store_true") + .help("Checks code for modification before execution. Slow.") + .set_default(false); + Parser.add_option_group(CPUGroup); } { @@ -190,6 +196,11 @@ namespace FEX::ArgLoader { bool TSOEnabled = Options.get("TSOEnabled"); Config::Add("TSOEnabled", std::to_string(TSOEnabled)); } + + if (Options.is_set_by_user("SMCChecks")) { + bool SMCChecks = Options.get("SMCChecks"); + Config::Add("SMCChecks", std::to_string(SMCChecks)); + } } { diff --git a/Source/Common/EnvironmentLoader.cpp b/Source/Common/EnvironmentLoader.cpp index d5d6b091c..32af20e76 100644 --- a/Source/Common/EnvironmentLoader.cpp +++ b/Source/Common/EnvironmentLoader.cpp @@ -81,6 +81,10 @@ namespace FEX::EnvLoader { if ((Value = GetVar("FEX_TSO_ENABLED")).size()) { if (isdigit(Value[0])) Config::Add("TSOEnabled", Value); } + + if ((Value = GetVar("FEX_SMC_CHECKS")).size()) { + if (isdigit(Value[0])) Config::Add("SMCChecks", Value); + } } { diff --git a/Source/Tests/ELFLoader.cpp b/Source/Tests/ELFLoader.cpp index 8ae620f0f..8a38b68d9 100644 --- a/Source/Tests/ELFLoader.cpp +++ b/Source/Tests/ELFLoader.cpp @@ -110,6 +110,7 @@ int main(int argc, char **argv, char **const envp) { FEX::Config::Value Environment{"Env", ""}; FEX::Config::Value OutputLog{"OutputLog", "stderr"}; FEX::Config::Value TSOEnabledConfig{"TSOEnabled", true}; + FEX::Config::Value SMCChecksConfig{"SMCChecks", false}; ::SilentLog = SilentLog(); @@ -154,6 +155,8 @@ int main(int argc, char **argv, char **const envp) { FEXCore::Config::SetConfig(CTX, FEXCore::Config::CONFIG_IS64BIT_MODE, Loader.Is64BitMode()); FEXCore::Config::SetConfig(CTX, FEXCore::Config::CONFIG_EMULATED_CPU_CORES, ThreadsConfig()); FEXCore::Config::SetConfig(CTX, FEXCore::Config::CONFIG_TSO_ENABLED, TSOEnabledConfig()); + FEXCore::Config::SetConfig(CTX, FEXCore::Config::CONFIG_SMC_CHECKS, SMCChecksConfig()); + FEXCore::Context::SetCustomCPUBackendFactory(CTX, VMFactory::CPUCreationFactory); // FEXCore::Context::SetFallbackCPUBackendFactory(CTX, VMFactory::CPUCreationFactoryFallback); diff --git a/Source/Tests/TestHarnessRunner.cpp b/Source/Tests/TestHarnessRunner.cpp index edbb3233f..11c4c55c9 100644 --- a/Source/Tests/TestHarnessRunner.cpp +++ b/Source/Tests/TestHarnessRunner.cpp @@ -60,6 +60,7 @@ int main(int argc, char **argv, char **const envp) { FEX::Config::Value BlockSizeConfig{"MaxInst", 1}; FEX::Config::Value SingleStepConfig{"SingleStep", false}; FEX::Config::Value MultiblockConfig{"Multiblock", false}; + FEX::Config::Value SMCChecksConfig{"SMCChecks", false}; auto Args = FEX::ArgLoader::Get(); @@ -78,6 +79,7 @@ int main(int argc, char **argv, char **const envp) { FEXCore::Config::SetConfig(CTX, FEXCore::Config::CONFIG_SINGLESTEP, SingleStepConfig()); FEXCore::Config::SetConfig(CTX, FEXCore::Config::CONFIG_MAXBLOCKINST, BlockSizeConfig()); FEXCore::Config::SetConfig(CTX, FEXCore::Config::CONFIG_IS64BIT_MODE, Loader.Is64BitMode()); + FEXCore::Config::SetConfig(CTX, FEXCore::Config::CONFIG_SMC_CHECKS, SMCChecksConfig()); FEXCore::Context::SetCustomCPUBackendFactory(CTX, VMFactory::CPUCreationFactory); FEXCore::Context::AddGuestMemoryRegion(CTX, SHM); diff --git a/unittests/ASM/CMakeLists.txt b/unittests/ASM/CMakeLists.txt index c47caf86a..7739c6f60 100644 --- a/unittests/ASM/CMakeLists.txt +++ b/unittests/ASM/CMakeLists.txt @@ -61,6 +61,11 @@ foreach(ASM_SRC ${ASM_SOURCES}) set(TEST_NAME "${TEST_DESC}/Test_${REL_TEST_ASM}") string(REPLACE " " ";" ARGS_LIST ${ARGS}) + + if (TEST_NAME MATCHES "SelfModifyingCode") + list(APPEND ARGS_LIST "--smc-full-checks") + endif() + add_test(NAME ${TEST_NAME} COMMAND "python3" "${CMAKE_SOURCE_DIR}/Scripts/testharness_runner.py" "${CMAKE_SOURCE_DIR}/unittests/ASM/Known_Failures" diff --git a/unittests/ASM/SelfModifyingCode/DifferentBlock.asm b/unittests/ASM/SelfModifyingCode/DifferentBlock.asm new file mode 100644 index 000000000..3b0c203cc --- /dev/null +++ b/unittests/ASM/SelfModifyingCode/DifferentBlock.asm @@ -0,0 +1,26 @@ +%ifdef CONFIG +{ + "Match": "All", + "RegData": { + "RAX": "0x20" + } +} +%endif + +jmp main + +patched_op: +mov rax,-1 +ret + +main: + +; warm up the cache +call patched_op + +mov byte [rel patched_op], 0xC3 + +mov rax, 32 +call patched_op + +hlt \ No newline at end of file diff --git a/unittests/ASM/SelfModifyingCode/SameBlock.asm b/unittests/ASM/SelfModifyingCode/SameBlock.asm new file mode 100644 index 000000000..594c3900a --- /dev/null +++ b/unittests/ASM/SelfModifyingCode/SameBlock.asm @@ -0,0 +1,28 @@ +%ifdef CONFIG +{ + "Match": "All", + "RegData": { + "RAX": "0x20" + } +} +%endif + + +mov rax, 32 + +; patch mov rax,... to nops +mov byte [rel patched_op + 0], 0x90 +mov byte [rel patched_op + 1], 0x90 +mov byte [rel patched_op + 2], 0x90 +mov byte [rel patched_op + 3], 0x90 +mov byte [rel patched_op + 4], 0x90 +mov byte [rel patched_op + 5], 0x90 +mov byte [rel patched_op + 6], 0x90 +mov byte [rel patched_op + 7], 0x90 +mov byte [rel patched_op + 8], 0x90 +mov byte [rel patched_op + 9], 0x90 + +patched_op: +mov rax,0xFABCFABCFABC0123 ; 10 bytes long + +hlt \ No newline at end of file