Initial commit

This commit is contained in:
Ryan Houdek 2019-06-29 16:50:48 -07:00 committed by Stefanos Kornilios Mitsis Poiitidis
parent a468d10d56
commit 7252050e42
88 changed files with 22116 additions and 0 deletions

6
.gitmodules vendored Normal file
View File

@ -0,0 +1,6 @@
[submodule "External/SonicUtils"]
path = External/SonicUtils
url = https://github.com/Sonicadvance1/SonicUtils.git
[submodule "External/vixl"]
path = External/vixl
url = https://git.linaro.org/arm/vixl.git

52
CMakeLists.txt Normal file
View File

@ -0,0 +1,52 @@
cmake_minimum_required(VERSION 3.10)
set (PROJECT_NAME FEXCore)
project(${PROJECT_NAME}
VERSION 0.01
LANGUAGES CXX)
option(ENABLE_CLANG_FORMAT "Run clang format over the source" FALSE)
option(FORCE_AARCH64 "Force AArch64 Target for testing" FALSE)
set(CMAKE_INCLUDE_CURRENT_DIR ON)
find_package(LLVM CONFIG QUIET)
if(LLVM_FOUND AND TARGET LLVM)
message(STATUS "LLVM found!")
include_directories(${LLVM_INCLUDE_DIRS})
else()
message("Couldn't find LLVM and this project requires it")
endif()
include(CheckCXXCompilerFlag)
include(CheckIncludeFileCXX)
if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")
set(_M_X86_64 1)
if (NOT FORCE_AARCH64)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-operator-names")
set(CMAKE_REQUIRED_DEFINITIONS "-fno-operator-names")
check_include_file_cxx(xbyak/xbyak.h XBYAK_FOUND)
if (XBYAK_FOUND)
set(ENABLE_JIT 1)
else()
message(STATUS "xbyak not found. Not enabling runtime JIT")
endif()
endif()
endif()
if (FORCE_AARCH64 OR CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
set(_M_ARM_64 1)
set(ENABLE_JIT 1)
add_subdirectory(External/vixl/)
include_directories(External/vixl/src/)
endif()
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
add_subdirectory(External/SonicUtils/)
include_directories(External/SonicUtils/)
add_subdirectory(Source/)
target_include_directories(${PROJECT_NAME} PUBLIC include/)
add_subdirectory(Examples/)

17
Examples/CMakeLists.txt Normal file
View File

@ -0,0 +1,17 @@
set (SRCS
SimpleCodeLoader.cpp)
set (ExampleName SimpleCodeLoader)
add_executable(${ExampleName} ${SRCS})
target_link_libraries(${ExampleName} PRIVATE ${PROJECT_NAME} SonicUtils)
set (SRCS
SimplePrint.cpp)
set (ExampleName SimplePrint)
add_executable(${ExampleName} ${SRCS})
target_link_libraries(${ExampleName} PRIVATE ${PROJECT_NAME} SonicUtils)

View File

@ -0,0 +1,63 @@
#include "LogManager.h"
#include "SimpleCodeLoader.h"
#include <FEXCore/Config/Config.h>
#include <FEXCore/Core/Context.h>
#include <FEXCore/Memory/SharedMem.h>
#include <cstdio>
void MsgHandler(LogMan::DebugLevels Level, const char *Message) {
const char *CharLevel{nullptr};
switch (Level) {
case LogMan::NONE:
CharLevel = "NONE";
break;
case LogMan::ASSERT:
CharLevel = "ASSERT";
break;
case LogMan::ERROR:
CharLevel = "ERROR";
break;
case LogMan::DEBUG:
CharLevel = "DEBUG";
break;
case LogMan::INFO:
CharLevel = "Info";
break;
default:
CharLevel = "???";
break;
}
printf("[%s] %s\n", CharLevel, Message);
}
void AssertHandler(const char *Message) {
printf("[ASSERT] %s\n", Message);
}
int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv) {
LogMan::Throw::InstallHandler(AssertHandler);
LogMan::Msg::InstallHandler(MsgHandler);
static constexpr uint8_t RawCode[] = {
0x90, // NOP
0xF4 // HLT
};
TestCode Test(RawCode, sizeof(RawCode));
FEXCore::Context::InitializeStaticTables();
auto SHM = FEXCore::SHM::AllocateSHMRegion(1ULL << 36);
auto CTX = FEXCore::Context::CreateNewContext();
FEXCore::Config::SetConfig(CTX, FEXCore::Config::CONFIG_MAXBLOCKINST, 1);
FEXCore::Context::AddGuestMemoryRegion(CTX, SHM);
FEXCore::Context::InitCore(CTX, &Test);
auto ShutdownReason = FEXCore::Context::RunLoop(CTX, true);
LogMan::Msg::D("Reason we left VM: %d", ShutdownReason);
FEXCore::Context::DestroyContext(CTX);
FEXCore::SHM::DestroyRegion(SHM);
return 0;
}

View File

@ -0,0 +1,49 @@
#pragma once
#include <FEXCore/Core/CodeLoader.h>
class TestCode final : public FEXCore::CodeLoader {
public:
TestCode(uint8_t const *Code, size_t Size)
: CodePtr {Code}
, CodeSize {Size} {
}
uint64_t StackSize() const override {
return STACK_SIZE;
}
uint64_t SetupStack([[maybe_unused]] void *HostPtr, uint64_t GuestPtr) const override {
return GuestPtr + STACK_SIZE - 16;
}
uint64_t DefaultRIP() const override {
return RIP;
}
FEXCore::CodeLoader::MemoryLayout GetLayout() const override {
// Needs to be page aligned
uint64_t CodeSize = 0x1000;
return std::make_tuple(CODE_START_RANGE, CODE_START_RANGE + CodeSize, CodeSize);
}
void MapMemoryRegion(std::function<void* (uint64_t, uint64_t)> Mapper) override {
}
void LoadMemory(MemoryWriter Writer) override {
Writer(reinterpret_cast<void const*>(CodePtr), 0, CodeSize);
}
uint64_t GetFinalRIP() override { return CODE_START_RANGE + CodeSize; }
private:
static constexpr uint64_t STACK_SIZE = 0x1000;
static constexpr uint64_t CODE_START_RANGE = 0x0;
static constexpr uint64_t RIP = 0;
uint8_t const *CodePtr;
size_t CodeSize;
};

113
Examples/SimplePrint.cpp Normal file
View File

@ -0,0 +1,113 @@
#include "LogManager.h"
#include "SimpleCodeLoader.h"
#include <FEXCore/Config/Config.h>
#include <FEXCore/Core/Context.h>
#include <FEXCore/Memory/SharedMem.h>
#include <cstdio>
void MsgHandler(LogMan::DebugLevels Level, char const *Message) {
const char *CharLevel{nullptr};
switch (Level) {
case LogMan::NONE:
CharLevel = "NONE";
break;
case LogMan::ASSERT:
CharLevel = "ASSERT";
break;
case LogMan::ERROR:
CharLevel = "ERROR";
break;
case LogMan::DEBUG:
CharLevel = "DEBUG";
break;
case LogMan::INFO:
CharLevel = "Info";
break;
case LogMan::STDERR:
CharLevel = "STDERR";
break;
case LogMan::STDOUT:
CharLevel = "STDOUT";
break;
default:
CharLevel = "???";
break;
}
printf("[%s] %s\n", CharLevel, Message);
}
void AssertHandler(char const *Message) {
printf("[ASSERT] %s\n", Message);
}
int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv) {
LogMan::Throw::InstallHandler(AssertHandler);
LogMan::Msg::InstallHandler(MsgHandler);
// Set up a syscall to do a syscall WRITE to stdout
// Syscall handler catches writes to stdout/stderr and pumps it through LogManager
static constexpr uint8_t RawCode[] = {
0X48,
0XC7,
0XC0,
0X01,
0X00,
0X00,
0X00, // MOV RAX, 0x1
0X48,
0XC7,
0XC7,
0X01,
0X00,
0X00,
0X00, // MOV RDI, 0x1
0X48,
0XC7,
0XC6,
0X1F,
0X00,
0X00,
0X00, // MOV RSI, 0x1F
0X48,
0XC7,
0XC2,
0X01,
0X00,
0X00,
0X00, // MOV RDX, 1
0X0F,
0X05, // SYSCALL
0XF4, // HLT
0X54,
0X65,
0X73,
0X74,
0X65,
0X72,
0X00, // 'Tester\0'
};
TestCode Test(RawCode, sizeof(RawCode));
FEXCore::Context::InitializeStaticTables();
auto SHM = FEXCore::SHM::AllocateSHMRegion(1ULL << 36);
auto CTX = FEXCore::Context::CreateNewContext();
// FEXCore::Config::SetConfig(CTX, FEXCore::Config::CONFIG_MAXBLOCKINST, 1);
FEXCore::Context::AddGuestMemoryRegion(CTX, SHM);
FEXCore::Context::InitCore(CTX, &Test);
auto ShutdownReason = FEXCore::Context::RunLoop(CTX, true);
LogMan::Msg::D("Reason we left VM: %d", ShutdownReason);
FEXCore::Context::DestroyContext(CTX);
FEXCore::SHM::DestroyRegion(SHM);
return 0;
}

1
External/vixl vendored Submodule

@ -0,0 +1 @@
Subproject commit 107a535cad4c1483c24021b8a24adec2deb1daed

21
LICENSE Normal file
View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2019 Ryan Houdek <Sonicadvance1@gmail.com>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

41
Readme.md Normal file
View File

@ -0,0 +1,41 @@
# FEXCore - Fast x86 Core emulation library
This is the core emulation library that is used for the FEX emulator project.
This project aims to provide a fast and functional x86-64 emulation library that can meet and surpass other x86-64 emulation libraries.
### Goals
* Be as fast as possible, beating and exceeding current options for x86-64 emulation
* 25% - 50% lower performance than native code would be desired target
* Use an IR to efficiently translate x86-64 to our host architecture
* Support a tiered recompiler to allow for fast runtime performance
* Support offline compilation and offline tooling for inspection and performance analysis
* Support threaded emulation. Including emulating x86-64's strong memory model on weak memory model architectures
* Support a significant portion of the x86-64 instruction space.
* Including MMX, SSE, SSE2, SSE3, SSSE3, and SSE4*
* Support fallback routines for uncommonly used x86-64 instructions
* Including x87 and 3DNow!
* Only support userspace emulation.
* All x86-64 instructions run as if they are under CPL-3(userland) security layer
* Minimal Linux Syscall emulation for testing purposes
* Portable library implementation in order to support easy integration in to applications
### Target Host Architecture
The target host architecture for this library is AArch64. Specifically the ARMv8.1 version or newer.
The CPU IR is designed with AArch64 in mind but there is a desire to run the recompiled code on other architectures as well.
Multiple architecture support is desired for easier bringup and debugging, performance isn't as much of a priority there (ex. x86-64(guest) translated to x86-64(host))
### Not currently goals but will be in the future
* 32bit x86 support
* This will be a desire in the future, but to lower the amount of work required, decided to push this off for now.
* Integration in to WINE
* Later generation of x86-64 instruction sets
* Including AVX, F16C, XOP, FMA, AVX2, etc
### Not desired
* Kernel space emulation
* CPL0-2 emulation
* Real Mode, Protected Mode, Virtual-8086 Mode, System Management Mode
* IRQs
* SVM
* "Cycle Accurate" emulation
### Dependencies
* [SonicUtils](https://github.com/Sonicadvance1/SonicUtils)
* LLVM
* clang-tidy if you want to ensure the code stays tidy
* cmake
* A C++17 compliant compiler (There are assumptions made about using Clang and LTO)

View File

@ -0,0 +1,373 @@
import json
import sys
# Print out enum values
def print_enums(ops, defines):
output_file.write("#ifdef IROP_ENUM\n")
output_file.write("enum IROps : uint8_t {\n")
for op_key, op_vals in ops.items():
output_file.write("\t\tOP_%s,\n" % op_key.upper())
output_file.write("};\n")
output_file.write("#undef IROP_ENUM\n")
output_file.write("#endif\n\n")
# Print out struct definitions
def print_ir_structs(ops, defines):
output_file.write("#ifdef IROP_STRUCTS\n")
# Print out defines here
for op_val in defines:
output_file.write("\t%s;\n" % op_val)
output_file.write("// Default structs\n")
output_file.write("struct __attribute__((packed)) IROp_Header {\n")
output_file.write("\tvoid* Data[0];\n")
output_file.write("\tIROps Op;\n\n")
output_file.write("\tuint8_t Size;\n")
output_file.write("\tuint8_t NumArgs;\n")
output_file.write("\tuint8_t Elements : 7;\n")
output_file.write("\tbool HasDest : 1;\n")
output_file.write("\ttemplate<typename T>\n")
output_file.write("\tT const* C() const { return reinterpret_cast<T const*>(Data); }\n")
output_file.write("\ttemplate<typename T>\n")
output_file.write("\tT* CW() { return reinterpret_cast<T*>(Data); }\n")
output_file.write("\tNodeWrapper Args[0];\n")
output_file.write("};\n\n");
output_file.write("struct __attribute__((packed)) IROp_Empty {\n")
output_file.write("\tIROp_Header Header;\n")
output_file.write("};\n\n")
output_file.write("// User defined IR Op structs\n")
for op_key, op_vals in ops.items():
SSAArgs = 0
HasArgs = False
if ("SSAArgs" in op_vals):
SSAArgs = int(op_vals["SSAArgs"])
if ("Args" in op_vals and len(op_vals["Args"]) != 0):
HasArgs = True
if (HasArgs or SSAArgs != 0):
output_file.write("struct __attribute__((packed)) IROp_%s {\n" % op_key)
output_file.write("\tIROp_Header Header;\n\n")
# SSA arguments have a hard requirement to appear after the header
if (SSAArgs != 0):
output_file.write("private:\n")
for i in range(0, SSAArgs):
output_file.write("\tuint64_t : (sizeof(NodeWrapper) * 8);\n");
output_file.write("public:\n")
if (HasArgs):
output_file.write("\t// User defined data\n")
# Print out arguments in IR Op
for i in range(0, len(op_vals["Args"]), 2):
data_type = op_vals["Args"][i]
data_name = op_vals["Args"][i+1]
output_file.write("\t%s %s;\n" % (data_type, data_name))
output_file.write("};\n")
else:
output_file.write("using IROp_%s = IROp_Empty;\n" % op_key)
# Add a static assert that the IR ops must be pod
output_file.write("static_assert(std::is_pod<IROp_%s>::value);\n\n" % op_key)
output_file.write("#undef IROP_STRUCTS\n")
output_file.write("#endif\n\n")
# Print out const expression to calculate IR Op sizes
def print_ir_sizes(ops, defines):
output_file.write("#ifdef IROP_SIZES\n")
output_file.write("constexpr std::array<size_t, IROps::OP_LAST + 1> IRSizes = {\n")
for op_key, op_vals in ops.items():
if ("Last" in op_vals):
output_file.write("\t-1ULL,\n")
else:
output_file.write("\tsizeof(IROp_%s),\n" % op_key)
output_file.write("};\n\n")
output_file.write("// Make sure our array maps directly to the IROps enum\n")
output_file.write("static_assert(IRSizes[IROps::OP_LAST] == -1ULL);\n\n")
output_file.write("[[maybe_unused]] static size_t GetSize(IROps Op) { return IRSizes[Op]; }\n\n")
output_file.write("std::string_view const& GetName(IROps Op);\n")
output_file.write("#undef IROP_SIZES\n")
output_file.write("#endif\n\n")
# Print out the name printer implementation
def print_ir_getname(ops, defines):
output_file.write("#ifdef IROP_GETNAME_IMPL\n")
output_file.write("constexpr std::array<std::string_view const, OP_LAST + 1> IRNames = {\n")
for op_key, op_vals in ops.items():
output_file.write("\t\"%s\",\n" % op_key)
output_file.write("};\n\n")
output_file.write("static_assert(IRNames[OP_LAST] == \"Last\");\n\n")
output_file.write("std::string_view const& GetName(IROps Op) {\n")
output_file.write(" return IRNames[Op];\n")
output_file.write("}\n")
output_file.write("#undef IROP_GETNAME_IMPL\n")
output_file.write("#endif\n\n")
# Print out IR argument printing
def print_ir_arg_printer(ops, defines):
output_file.write("#ifdef IROP_ARGPRINTER_HELPER\n")
output_file.write("switch (IROp->Op) {\n")
for op_key, op_vals in ops.items():
if not ("Last" in op_vals):
SSAArgs = 0
HasArgs = False
# Does this not want a printer?
if ("ArgPrinter" in op_vals and op_vals["ArgPrinter"] == False):
continue
if ("SSAArgs" in op_vals):
SSAArgs = int(op_vals["SSAArgs"])
if ("Args" in op_vals and len(op_vals["Args"]) != 0):
HasArgs = True
output_file.write("case IROps::OP_%s: {\n" % op_key.upper())
if (HasArgs or SSAArgs != 0):
output_file.write("\tauto Op = IROp->C<IR::IROp_%s>();\n" % op_key)
output_file.write("\t*out << \" \";\n")
# Print SSA args first
if (SSAArgs != 0):
for i in range(0, SSAArgs):
LastArg = (SSAArgs - i - 1) == 0 and not HasArgs
output_file.write("\tPrintArg(out, IR, Op->Header.Args[%d]);\n" % i)
if not (LastArg):
output_file.write("\t*out << \", \";\n")
# Now print user defined arguments
if (HasArgs):
ArgCount = len(op_vals["Args"])
for i in range(0, len(op_vals["Args"]), 2):
data_name = op_vals["Args"][i+1]
LastArg = (ArgCount - i - 2) == 0
CondArg2 = (", ", "")
output_file.write("\tPrintArg(out, IR, Op->%s);\n" % data_name)
if not (LastArg):
output_file.write("\t*out << \", \";\n")
output_file.write("break;\n")
output_file.write("}\n")
output_file.write("#undef IROP_ARGPRINTER_HELPER\n")
output_file.write("#endif\n")
# Print out IR allocator helpers
def print_ir_allocator_helpers(ops, defines):
output_file.write("#ifdef IROP_ALLOCATE_HELPERS\n")
output_file.write("\ttemplate <class T>\n")
output_file.write("\tusing IRPair = FEXCore::IR::Wrapper<T>;\n\n")
output_file.write("\tIRPair<IROp_Header> AllocateRawOp(size_t HeaderSize) {\n")
output_file.write("\t\tauto Op = reinterpret_cast<IROp_Header*>(Data.Allocate(HeaderSize));\n")
output_file.write("\t\tmemset(Op, 0, HeaderSize);\n")
output_file.write("\t\tOp->Op = IROps::OP_DUMMY;\n")
output_file.write("\t\treturn FEXCore::IR::Wrapper<IROp_Header>{Op, CreateNode(Op)};\n")
output_file.write("\t}\n\n")
output_file.write("\ttemplate<class T, IROps T2>\n")
output_file.write("\tIRPair<T> AllocateOp() {\n")
output_file.write("\t\tsize_t Size = FEXCore::IR::GetSize(T2);\n")
output_file.write("\t\tauto Op = reinterpret_cast<T*>(Data.Allocate(Size));\n")
output_file.write("\t\tmemset(Op, 0, Size);\n")
output_file.write("\t\tOp->Header.Op = T2;\n")
output_file.write("\t\treturn FEXCore::IR::Wrapper<T>{Op, CreateNode(&Op->Header)};\n")
output_file.write("\t}\n\n")
output_file.write("\tuint8_t GetOpSize(OrderedNode *Op) const {\n")
output_file.write("\t\tauto HeaderOp = reinterpret_cast<IROp_Header const*>(Op->Header.Value.GetPtr(Data.Begin()));\n")
output_file.write("\t\tLogMan::Throw::A(HeaderOp->HasDest, \"Op %s has no dest\\n\", GetName(HeaderOp->Op));\n")
output_file.write("\t\treturn HeaderOp->Size;\n")
output_file.write("\t}\n\n")
output_file.write("\tuint8_t GetOpElements(OrderedNode *Op) const {\n")
output_file.write("\t\tauto HeaderOp = reinterpret_cast<IROp_Header const*>(Op->Header.Value.GetPtr(Data.Begin()));\n")
output_file.write("\t\tLogMan::Throw::A(HeaderOp->HasDest, \"Op %s has no dest\\n\", GetName(HeaderOp->Op));\n")
output_file.write("\t\treturn HeaderOp->Elements;\n")
output_file.write("\t}\n\n")
output_file.write("\tbool OpHasDest(OrderedNode *Op) const {\n")
output_file.write("\t\tauto HeaderOp = reinterpret_cast<IROp_Header const*>(Op->Header.Value.GetPtr(Data.Begin()));\n")
output_file.write("\t\treturn HeaderOp->HasDest;\n")
output_file.write("\t}\n\n")
for op_key, op_vals in ops.items():
if not ("Last" in op_vals):
HasDest = False
HasFixedDestSize = False
FixedDestSize = 0
HasDestSize = False;
DestSize = ""
if ("HasDest" in op_vals and op_vals["HasDest"] == True):
HasDest = True
if ("FixedDestSize" in op_vals):
HasFixedDestSize = True
FixedDestSize = int(op_vals["FixedDestSize"])
if ("DestSize" in op_vals):
HasDestSize = True
DestSize = op_vals["DestSize"];
output_file.write("\tIRPair<IROp_%s> _%s() {\n" % (op_key, op_key))
output_file.write("\t\tauto Op = AllocateOp<IROp_%s, IROps::OP_%s>();\n" % (op_key, op_key.upper()))
if (HasDest):
if (HasFixedDestSize):
output_file.write("\t\tOp.first->Header.Size = %d;\n" % FixedDestSize)
output_file.write("\t\tOp.first->Header.HasDest = true;\n")
output_file.write("\t\treturn Op;\n")
output_file.write("\t}\n\n")
# Generate helpers with operands
for op_key, op_vals in ops.items():
if not ("Last" in op_vals):
SSAArgs = 0
HasArgs = False
HasDest = False
HasFixedDestSize = False
FixedDestSize = 0
HasDestSize = False;
NumElements = 1
DestSize = ""
if ("SSAArgs" in op_vals):
SSAArgs = int(op_vals["SSAArgs"])
if ("Args" in op_vals and len(op_vals["Args"]) != 0):
HasArgs = True
if not (HasArgs or SSAArgs != 0):
continue
if ("HelperGen" in op_vals and op_vals["HelperGen"] == False):
continue;
if ("HasDest" in op_vals and op_vals["HasDest"] == True):
HasDest = True
if ("FixedDestSize" in op_vals):
HasFixedDestSize = True
FixedDestSize = int(op_vals["FixedDestSize"])
if ("DestSize" in op_vals):
HasDestSize = True
DestSize = op_vals["DestSize"]
if ("NumElements" in op_vals):
NumElements = int(op_vals["NumElements"])
output_file.write("\tIRPair<IROp_%s> _%s(" % (op_key, op_key))
# Output SSA args first
if (SSAArgs != 0):
for i in range(0, SSAArgs):
LastArg = (SSAArgs - i - 1) == 0 and not HasArgs
CondArg2 = (", ", "")
output_file.write("OrderedNode *ssa%d%s" % (i, CondArg2[LastArg]))
if (HasArgs):
ArgCount = len(op_vals["Args"])
for i in range(0, len(op_vals["Args"]), 2):
data_type = op_vals["Args"][i]
data_name = op_vals["Args"][i+1]
LastArg = (ArgCount - i - 2) == 0
CondArg2 = (", ", "")
output_file.write("%s %s%s" % (data_type, data_name, CondArg2[LastArg]))
output_file.write(") {\n")
output_file.write("\t\tauto Op = AllocateOp<IROp_%s, IROps::OP_%s>();\n" % (op_key, op_key.upper()))
output_file.write("\t\tOp.first->Header.NumArgs = %d;\n" % (SSAArgs))
if (SSAArgs != 0):
for i in range(0, SSAArgs):
output_file.write("\t\tOp.first->Header.Args[%d] = ssa%d->Wrapped(ListData.Begin());\n" % (i, i))
if (HasArgs):
for i in range(1, len(op_vals["Args"]), 2):
data_name = op_vals["Args"][i]
output_file.write("\t\tOp.first->%s = %s;\n" % (data_name, data_name))
if (HasDest):
if (HasFixedDestSize):
output_file.write("\t\tOp.first->Header.Size = %d;\n" % FixedDestSize)
if (HasDestSize):
output_file.write("\t\tOp.first->Header.Size = %s;\n" % DestSize)
output_file.write("\t\tOp.first->Header.Elements = %s;\n" % NumElements)
if not (HasFixedDestSize or HasDestSize):
# We need to infer destination size
output_file.write("\t\tuint8_t InferSize = 0;\n")
if (SSAArgs != 0):
for i in range(0, SSAArgs):
output_file.write("\t\tuint8_t Size%d = GetOpSize(ssa%s);\n" % (i, i))
output_file.write("\t\tInferSize = std::max(InferSize, Size%d);\n" % (i))
output_file.write("\t\tOp.first->Header.Size = InferSize;\n")
output_file.write("\t\tOp.first->Header.HasDest = true;\n")
output_file.write("\t\treturn Op;\n")
output_file.write("\t}\n\n")
output_file.write("#undef IROP_ALLOCATE_HELPERS\n")
output_file.write("#endif\n")
if (len(sys.argv) < 3):
sys.exit()
output_filename = sys.argv[2]
json_file = open(sys.argv[1], "r")
json_text = json_file.read()
json_file.close()
json_object = json.loads(json_text)
json_object = {k.upper(): v for k, v in json_object.items()}
ops = json_object["OPS"]
defines = json_object["DEFINES"]
output_file = open(output_filename, "w")
print_enums(ops, defines)
print_ir_structs(ops, defines)
print_ir_sizes(ops, defines)
print_ir_getname(ops, defines)
print_ir_arg_printer(ops, defines)
print_ir_allocator_helpers(ops, defines)
output_file.close()

133
Source/CMakeLists.txt Normal file
View File

@ -0,0 +1,133 @@
if (ENABLE_CLANG_FORMAT)
find_program(CLANG_TIDY_EXE "clang-tidy")
set(CLANG_TIDY_FLAGS
"-checks=*"
"-fuchsia*"
"-bugprone-macro-parentheses"
"-clang-analyzer-core.*"
"-cppcoreguidelines-pro-type-*"
"-cppcoreguidelines-pro-bounds-array-to-pointer-decay"
"-cppcoreguidelines-pro-bounds-pointer-arithmetic"
"-cppcoreguidelines-avoid-c-arrays"
"-cppcoreguidelines-avoid-magic-numbers"
"-cppcoreguidelines-pro-bounds-constant-array-index"
"-cppcoreguidelines-no-malloc"
"-cppcoreguidelines-special-member-functions"
"-cppcoreguidelines-owning-memory"
"-cppcoreguidelines-macro-usage"
"-cppcoreguidelines-avoid-goto"
"-google-readability-function-size"
"-google-readability-namespace-comments"
"-google-readability-braces-around-statements"
"-google-build-using-namespace"
"-hicpp-*"
"-llvm-namespace-comment"
"-llvm-include-order" # Messes up with case sensitivity
"-misc-unused-parameters"
"-modernize-loop-convert"
"-modernize-use-auto"
"-modernize-avoid-c-arrays"
"-modernize-use-nodiscard"
"readability-*"
"-readability-function-size"
"-readability-implicit-bool-conversion"
"-readability-braces-around-statements"
"-readability-else-after-return"
"-readability-magic-numbers"
"-readability-named-parameter"
"-readability-uppercase-literal-suffix"
"-cert-err34-c"
"-cert-err58-cpp"
"-bugprone-exception-escape"
)
string(REPLACE ";" "," CLANG_TIDY_FLAGS "${CLANG_TIDY_FLAGS}")
set(CMAKE_CXX_CLANG_TIDY ${CLANG_TIDY_EXE} "${CLANG_TIDY_FLAGS}")
endif()
set (SRCS
Common/Paths.cpp
Interface/Config/Config.cpp
Interface/Context/Context.cpp
Interface/Core/BlockCache.cpp
Interface/Core/Core.cpp
Interface/Core/CPUID.cpp
Interface/Core/Frontend.cpp
Interface/Core/OpcodeDispatcher.cpp
Interface/Core/RegisterAllocation.cpp
Interface/Core/X86Tables.cpp
Interface/Core/X86DebugInfo.cpp
Interface/Core/Interpreter/InterpreterCore.cpp
Interface/Core/LLVMJIT/LLVMCore.cpp
Interface/Core/LLVMJIT/LLVMMemoryManager.cpp
Interface/HLE/FileManagement.cpp
Interface/HLE/Syscalls.cpp
Interface/Memory/MemMapper.cpp
Interface/Memory/SharedMem.cpp
Interface/IR/IR.cpp
Interface/IR/PassManager.cpp
Interface/IR/Passes/ConstProp.cpp
Interface/IR/Passes/DeadContextStoreElimination.cpp
Interface/IR/Passes/IRCompaction.cpp
Interface/IR/Passes/IRValidation.cpp
Interface/IR/Passes/RedundantFlagCalculationElimination.cpp
Interface/IR/Passes/SyscallOptimization.cpp
)
set (JIT_LIBS )
if (ENABLE_JIT)
if (_M_X86_64)
add_definitions(-D_M_X86_64=1)
if (NOT FORCE_AARCH64)
list(APPEND SRCS Interface/Core/JIT/x86_64/JIT.cpp)
endif()
endif()
if(_M_ARM_64)
add_definitions(-D_M_ARM_64=1)
list(APPEND SRCS Interface/Core/JIT/Arm64/JIT.cpp)
list(APPEND JIT_LIBS vixl)
endif()
endif()
# Generate IR include file
set(OUTPUT_NAME "${CMAKE_CURRENT_BINARY_DIR}/IRDefines.inc")
set(INPUT_NAME "${CMAKE_CURRENT_SOURCE_DIR}/Interface/IR/IR.json")
add_custom_target(IR_INC
DEPENDS "${INPUT_NAME}"
COMMAND "python3" "${CMAKE_CURRENT_SOURCE_DIR}/../Scripts/json_ir_generator.py" "${INPUT_NAME}" "${OUTPUT_NAME}"
)
add_library(${PROJECT_NAME} STATIC ${SRCS})
add_dependencies(${PROJECT_NAME} IR_INC)
target_link_libraries(${PROJECT_NAME} LLVM pthread rt ${JIT_LIBS})
target_include_directories(${PROJECT_NAME} PUBLIC "${CMAKE_CURRENT_BINARY_DIR}")
target_include_directories(${PROJECT_NAME} PRIVATE IncludePrivate/)
target_compile_options(${PROJECT_NAME}
PRIVATE
"-Wno-trigraphs")
# Add in diagnostic colours if the option is available.
# Ninja code generator will kill colours if this isn't here
check_cxx_compiler_flag(-fdiagnostics-color=always GCC_COLOR)
check_cxx_compiler_flag(-fcolor-diagnostics CLANG_COLOR)
if (GCC_COLOR)
target_compile_options(${PROJECT_NAME}
PRIVATE
"-fdiagnostics-color=always")
endif()
if (CLANG_COLOR)
target_compile_options(${PROJECT_NAME}
PRIVATE
"-fcolor-diagnostics")
endif()
target_compile_options(${PROJECT_NAME}
PRIVATE
-Wall)
add_subdirectory(Test/)

49
Source/Common/BitSet.h Normal file
View File

@ -0,0 +1,49 @@
#pragma once
#include "Common/MathUtils.h"
#include <stdint.h>
#include <stdlib.h>
#include <type_traits>
template<typename T>
struct BitSet final {
using ElementType = T;
constexpr static size_t MinimumSize = sizeof(ElementType);
constexpr static size_t MinimumSizeBits = sizeof(ElementType) * 8;
ElementType *Memory;
void Allocate(size_t Elements) {
Memory = static_cast<ElementType*>(malloc(AlignUp(Elements / MinimumSize, MinimumSize)));
}
void Realloc(size_t Elements) {
Memory = static_cast<ElementType*>(realloc(Memory, AlignUp(Elements / MinimumSize, MinimumSize)));
}
void Free() {
free(Memory);
Memory = nullptr;
}
bool Get(T Element) {
return (Memory[Element / MinimumSizeBits] & (1 << (Element % MinimumSizeBits))) != 0;
}
void Set(T Element) {
Memory[Element / MinimumSizeBits] |= (1ULL << (Element % MinimumSizeBits));
}
void Clear(T Element) {
Memory[Element / MinimumSizeBits] &= (1ULL << (Element % MinimumSizeBits));
}
void MemClear(size_t Size) {
memset(Memory, 0, Size);
}
void MemSet(size_t Size) {
memset(Memory, 0xFF, Size);
}
// This very explicitly doesn't let you take an address
// Is only a getter
bool operator[](T Element) {
return Get(Element);
}
};
static_assert(sizeof(BitSet<uint32_t>) == sizeof(uintptr_t), "Needs to just be a pointer");
static_assert(std::is_pod<BitSet<uint32_t>>::value, "Needs to POD");

13
Source/Common/MathUtils.h Normal file
View File

@ -0,0 +1,13 @@
#pragma once
#include <stdint.h>
static inline uint64_t AlignUp(uint64_t value, uint64_t size) {
return value + (size - value % size) % size;
};
static inline uint64_t AlignDown(uint64_t value, uint64_t size) {
return value - value % size;
};

30
Source/Common/Paths.cpp Normal file
View File

@ -0,0 +1,30 @@
#include "Common/Paths.h"
#include <cstdlib>
#include <sys/stat.h>
namespace FEXCore::Paths {
std::string DataPath;
std::string EntryCache;
void InitializePaths() {
char *HomeDir = getenv("HOME");
char *XDGDataDir = getenv("XDG_DATA_DIR");
if (XDGDataDir) {
DataPath = XDGDataDir;
}
else {
if (HomeDir) {
DataPath = HomeDir;
}
}
DataPath += "/.fexcore/";
EntryCache = DataPath + "/EntryCache/";
mkdir(DataPath.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
mkdir(EntryCache.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
}
std::string GetDataPath() {
return DataPath;
}
}

7
Source/Common/Paths.h Normal file
View File

@ -0,0 +1,7 @@
#pragma once
#include <string>
namespace FEXCore::Paths {
void InitializePaths();
std::string GetDataPath();
}

View File

@ -0,0 +1,51 @@
#include "LogManager.h"
#include "Interface/Context/Context.h"
#include <FEXCore/Config/Config.h>
namespace FEXCore::Config {
void SetConfig(FEXCore::Context::Context *CTX, ConfigOption Option, uint64_t Config) {
switch (Option) {
case FEXCore::Config::CONFIG_MULTIBLOCK:
CTX->Config.Multiblock = Config != 0;
break;
case FEXCore::Config::CONFIG_MAXBLOCKINST:
CTX->Config.MaxInstPerBlock = Config;
break;
case FEXCore::Config::CONFIG_DEFAULTCORE:
CTX->Config.Core = static_cast<FEXCore::Config::ConfigCore>(Config);
break;
case FEXCore::Config::CONFIG_VIRTUALMEMSIZE:
CTX->Config.VirtualMemSize = Config;
break;
case FEXCore::Config::CONFIG_SINGLESTEP:
CTX->RunningMode = Config != 0 ? FEXCore::Context::CoreRunningMode::MODE_SINGLESTEP : FEXCore::Context::CoreRunningMode::MODE_RUN;
break;
default: LogMan::Msg::A("Unknown configuration option");
}
}
uint64_t GetConfig(FEXCore::Context::Context *CTX, ConfigOption Option) {
switch (Option) {
case FEXCore::Config::CONFIG_MULTIBLOCK:
return CTX->Config.Multiblock;
break;
case FEXCore::Config::CONFIG_MAXBLOCKINST:
return CTX->Config.MaxInstPerBlock;
break;
case FEXCore::Config::CONFIG_DEFAULTCORE:
return CTX->Config.Core;
break;
case FEXCore::Config::CONFIG_VIRTUALMEMSIZE:
return CTX->Config.VirtualMemSize;
break;
case FEXCore::Config::CONFIG_SINGLESTEP:
return CTX->RunningMode == FEXCore::Context::CoreRunningMode::MODE_SINGLESTEP ? 1 : 0;
break;
default: LogMan::Msg::A("Unknown configuration option");
}
return 0;
}
}

View File

@ -0,0 +1,131 @@
#include "Common/Paths.h"
#include "Interface/Context/Context.h"
#include "Interface/Core/Core.h"
#include "Interface/Core/OpcodeDispatcher.h"
#include <FEXCore/Config/Config.h>
#include <FEXCore/Core/CoreState.h>
#include <FEXCore/Debug/X86Tables.h>
namespace FEXCore::Context {
void InitializeStaticTables() {
FEXCore::Paths::InitializePaths();
X86Tables::InitializeInfoTables();
IR::InstallOpcodeHandlers();
}
FEXCore::Context::Context *CreateNewContext() {
return new FEXCore::Context::Context{};
}
bool InitializeContext(FEXCore::Context::Context *CTX) {
return FEXCore::CPU::CreateCPUCore(CTX);
}
void DestroyContext(FEXCore::Context::Context *CTX) {
delete CTX;
}
bool AddGuestMemoryRegion(FEXCore::Context::Context *CTX, FEXCore::SHM::SHMObject *SHM) {
CTX->MemoryMapper.SetBaseRegion(SHM);
return true;
}
void SetApplicationFile(FEXCore::Context::Context *CTX, std::string const &File) {
CTX->SyscallHandler.SetFilename(File);
// XXX: This isn't good for debugging
// CTX->LoadEntryList();
}
bool InitCore(FEXCore::Context::Context *CTX, FEXCore::CodeLoader *Loader) {
return CTX->InitCore(Loader);
}
FEXCore::Context::ExitReason RunLoop(FEXCore::Context::Context *CTX, bool WaitForIdle) {
return CTX->RunLoop(WaitForIdle);
}
FEXCore::Context::ExitReason GetExitReason(FEXCore::Context::Context *CTX) {
return CTX->ParentThread->ExitReason;
}
bool IsDone(FEXCore::Context::Context *CTX) {
return CTX->IsPaused();
}
void GetCPUState(FEXCore::Context::Context *CTX, FEXCore::Core::CPUState *State) {
memcpy(State, &CTX->ParentThread->State.State, sizeof(FEXCore::Core::CPUState));
}
void SetCPUState(FEXCore::Context::Context *CTX, FEXCore::Core::CPUState *State) {
memcpy(&CTX->ParentThread->State.State, State, sizeof(FEXCore::Core::CPUState));
}
void Pause(FEXCore::Context::Context *CTX) {
CTX->Pause();
}
void SetCustomCPUBackendFactory(FEXCore::Context::Context *CTX, CustomCPUFactoryType Factory) {
CTX->CustomCPUFactory = std::move(Factory);
}
void SetFallbackCPUBackendFactory(FEXCore::Context::Context *CTX, CustomCPUFactoryType Factory) {
CTX->FallbackCPUFactory = std::move(Factory);
}
uint64_t HandleSyscall(FEXCore::Context::Context *CTX, FEXCore::Core::ThreadState *Thread, FEXCore::HLE::SyscallArguments *Args) {
return CTX->SyscallHandler.HandleSyscall(reinterpret_cast<FEXCore::Core::InternalThreadState*>(Thread), Args);
}
bool AddVirtualMemoryMapping([[maybe_unused]] FEXCore::Context::Context *CTX, [[maybe_unused]] uint64_t VirtualAddress, [[maybe_unused]] uint64_t PhysicalAddress, [[maybe_unused]] uint64_t Size) {
return false;
}
void RegisterExternalSyscallVisitor(FEXCore::Context::Context *CTX, [[maybe_unused]] uint64_t Syscall, [[maybe_unused]] FEXCore::HLE::SyscallVisitor *Visitor) {
}
namespace Debug {
void CompileRIP(FEXCore::Context::Context *CTX, uint64_t RIP) {
CTX->CompileRIP(CTX->ParentThread, RIP);
}
uint64_t GetThreadCount(FEXCore::Context::Context *CTX) {
return CTX->GetThreadCount();
}
FEXCore::Core::RuntimeStats *GetRuntimeStatsForThread(FEXCore::Context::Context *CTX, uint64_t Thread) {
return CTX->GetRuntimeStatsForThread(Thread);
}
FEXCore::Core::CPUState GetCPUState(FEXCore::Context::Context *CTX) {
return CTX->GetCPUState();
}
void GetMemoryRegions(FEXCore::Context::Context *CTX, std::vector<FEXCore::Memory::MemRegion> *Regions) {
return CTX->GetMemoryRegions(Regions);
}
bool GetDebugDataForRIP(FEXCore::Context::Context *CTX, uint64_t RIP, FEXCore::Core::DebugData *Data) {
return CTX->GetDebugDataForRIP(RIP, Data);
}
bool FindHostCodeForRIP(FEXCore::Context::Context *CTX, uint64_t RIP, uint8_t **Code) {
return CTX->FindHostCodeForRIP(RIP, Code);
}
// XXX:
// bool FindIRForRIP(FEXCore::Context::Context *CTX, uint64_t RIP, FEXCore::IR::IntrusiveIRList **ir) {
// return CTX->FindIRForRIP(RIP, ir);
// }
// void SetIRForRIP(FEXCore::Context::Context *CTX, uint64_t RIP, FEXCore::IR::IntrusiveIRList *const ir) {
// CTX->SetIRForRIP(RIP, ir);
// }
FEXCore::Core::ThreadState *GetThreadState(FEXCore::Context::Context *CTX) {
return CTX->GetThreadState();
}
}
}

View File

@ -0,0 +1,103 @@
#pragma once
#include "Event.h"
#include "Interface/Core/CPUID.h"
#include "Interface/Core/Frontend.h"
#include "Interface/Core/InternalThreadState.h"
#include "Interface/HLE/Syscalls.h"
#include "Interface/Memory/MemMapper.h"
#include "Interface/IR/PassManager.h"
#include <FEXCore/Config/Config.h>
#include <FEXCore/Core/CPUBackend.h>
#include <stdint.h>
#include <mutex>
namespace FEXCore {
class SyscallHandler;
}
namespace FEXCore::Context {
enum CoreRunningMode {
MODE_RUN,
MODE_SINGLESTEP,
};
struct Context {
friend class FEXCore::SyscallHandler;
struct {
bool Multiblock {false};
bool BreakOnFrontendFailure {true};
int64_t MaxInstPerBlock {-1LL};
uint64_t VirtualMemSize {1ULL << 36};
FEXCore::Config::ConfigCore Core {FEXCore::Config::CONFIG_INTERPRETER};
// LLVM JIT options
bool LLVM_MemoryValidation {false};
bool LLVM_IRValidation {false};
bool LLVM_PrinterPass {false};
} Config;
FEXCore::Memory::MemMapper MemoryMapper;
std::mutex ThreadCreationMutex;
uint64_t ThreadID{};
FEXCore::Core::InternalThreadState* ParentThread;
std::vector<FEXCore::Core::InternalThreadState*> Threads;
std::atomic_bool ShouldStop{};
Event PauseWait;
bool Running{};
CoreRunningMode RunningMode {CoreRunningMode::MODE_RUN};
FEXCore::Frontend::Decoder FrontendDecoder;
FEXCore::IR::PassManager PassManager;
FEXCore::CPUIDEmu CPUID;
FEXCore::SyscallHandler SyscallHandler;
CustomCPUFactoryType CustomCPUFactory;
CustomCPUFactoryType FallbackCPUFactory;
Context();
~Context();
bool InitCore(FEXCore::CodeLoader *Loader);
FEXCore::Context::ExitReason RunLoop(bool WaitForIdle);
bool IsPaused() const { return !Running; }
void Pause();
// Debugger interface
void CompileRIP(FEXCore::Core::InternalThreadState *Thread, uint64_t RIP);
uint64_t GetThreadCount() const;
FEXCore::Core::RuntimeStats *GetRuntimeStatsForThread(uint64_t Thread);
FEXCore::Core::CPUState GetCPUState();
void GetMemoryRegions(std::vector<FEXCore::Memory::MemRegion> *Regions);
bool GetDebugDataForRIP(uint64_t RIP, FEXCore::Core::DebugData *Data);
bool FindHostCodeForRIP(uint64_t RIP, uint8_t **Code);
// XXX:
// bool FindIRForRIP(uint64_t RIP, FEXCore::IR::IntrusiveIRList **ir);
// void SetIRForRIP(uint64_t RIP, FEXCore::IR::IntrusiveIRList *const ir);
FEXCore::Core::ThreadState *GetThreadState();
void LoadEntryList();
private:
void WaitForIdle();
FEXCore::Core::InternalThreadState* CreateThread(FEXCore::Core::CPUState *NewThreadState, uint64_t ParentTID, uint64_t ChildTID);
void *MapRegion(FEXCore::Core::InternalThreadState *Thread, uint64_t Offset, uint64_t Size, bool Fixed = false);
void *ShmBase();
void MirrorRegion(FEXCore::Core::InternalThreadState *Thread, void *HostPtr, uint64_t Offset, uint64_t Size);
void CopyMemoryMapping(FEXCore::Core::InternalThreadState *ParentThread, FEXCore::Core::InternalThreadState *ChildThread);
void InitializeThread(FEXCore::Core::InternalThreadState *Thread);
void ExecutionThread(FEXCore::Core::InternalThreadState *Thread);
void RunThread(FEXCore::Core::InternalThreadState *Thread);
uintptr_t CompileBlock(FEXCore::Core::InternalThreadState *Thread, uint64_t GuestRIP);
uintptr_t AddBlockMapping(FEXCore::Core::InternalThreadState *Thread, uint64_t Address, void *Ptr);
FEXCore::CodeLoader *LocalLoader{};
// Entry Cache
bool GetFilenameHash(std::string const &Filename, std::string &Hash);
void AddThreadRIPsToEntryList(FEXCore::Core::InternalThreadState *Thread);
void SaveEntryList();
std::set<uint64_t> EntryList;
};
}

View File

@ -0,0 +1,54 @@
#include "Interface/Context/Context.h"
#include "Interface/Core/Core.h"
#include "Interface/Core/BlockCache.h"
#include <sys/mman.h>
namespace FEXCore {
BlockCache::BlockCache(FEXCore::Context::Context *CTX)
: ctx {CTX} {
// Block cache ends up looking like this
// PageMemoryMap[VirtualMemoryRegion >> 12]
// |
// v
// PageMemory[Memory & (VIRTUAL_PAGE_SIZE - 1)]
// |
// v
// Pointer to Code
//
// Allocate a region of memory that we can use to back our block pointers
// We need one pointer per page of virtual memory
// At 64GB of virtual memory this will allocate 128MB of virtual memory space
PagePointer = reinterpret_cast<uintptr_t>(mmap(nullptr, ctx->Config.VirtualMemSize / 4096 * 8, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
// Allocate our memory backing our pageso
// We need 32KB per guest page (One pointer per byte)
// XXX: We can drop down to 16KB if we store 4byte offsets from the code base
// We currently limit to 128MB of real memory for caching for the total cache size.
// Can end up being inefficient if we compile a small number of blocks per page
PageMemory = reinterpret_cast<uintptr_t>(mmap(nullptr, CODE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
LogMan::Throw::A(PageMemory != -1ULL, "Failed to allocate page memory");
}
BlockCache::~BlockCache() {
munmap(reinterpret_cast<void*>(PagePointer), ctx->Config.VirtualMemSize / 4096 * 8);
munmap(reinterpret_cast<void*>(PageMemory), CODE_SIZE);
}
void BlockCache::HintUsedRange(uint64_t Address, uint64_t Size) {
// Tell the kernel we will definitely need [Address, Address+Size) mapped for the page pointer
// Page Pointer is allocated per page, so shift by page size
Address >>= 12;
Size >>= 12;
madvise(reinterpret_cast<void*>(PagePointer + Address), Size, MADV_WILLNEED);
}
void BlockCache::ClearCache() {
// Clear out the page memory
madvise(reinterpret_cast<void*>(PagePointer), ctx->Config.VirtualMemSize / 4096 * 8, MADV_DONTNEED);
madvise(reinterpret_cast<void*>(PageMemory), CODE_SIZE, MADV_DONTNEED);
AllocateOffset = 0;
}
}

View File

@ -0,0 +1,104 @@
#pragma once
#include <FEXCore/Core/Context.h>
#include "LogManager.h"
namespace FEXCore {
class BlockCache {
public:
BlockCache(FEXCore::Context::Context *CTX);
~BlockCache();
using BlockCacheIter = uintptr_t;
uintptr_t End() { return 0; }
uintptr_t FindBlock(uint64_t Address) {
return FindCodePointerForAddress(Address);
}
void Erase(uint64_t Address) {
uint64_t PageOffset = Address & (0x0FFF);
Address >>= 12;
uintptr_t *Pointers = reinterpret_cast<uintptr_t*>(PagePointer);
uint64_t PagePointer = Pointers[Address];
if (!PagePointer) {
// Page for this code didn't even exist, nothing to do
return;
}
// Page exists, just set the offset to zero
uintptr_t *BlockPointers = reinterpret_cast<uintptr_t*>(PagePointer);
BlockPointers[PageOffset] = 0;
}
uintptr_t AddBlockMapping(uint64_t Address, void *Ptr) {
uint64_t PageOffset = Address & (0x0FFF);
Address >>= 12;
uintptr_t *Pointers = reinterpret_cast<uintptr_t*>(PagePointer);
uint64_t LocalPagePointer = Pointers[Address];
if (!LocalPagePointer) {
// We don't have a page pointer for this address
// Allocate one now if we can
uintptr_t NewPageBacking = AllocateBackingForPage();
if (!NewPageBacking) {
// Couldn't allocate, return so the frontend can recover from this
return 0;
}
Pointers[Address] = NewPageBacking;
LocalPagePointer = NewPageBacking;
}
// Add the new pointer to the page block
uintptr_t *BlockPointers = reinterpret_cast<uintptr_t*>(LocalPagePointer);
uintptr_t CastPtr = reinterpret_cast<uintptr_t>(Ptr);
BlockPointers[PageOffset] = CastPtr;
return CastPtr;
}
void ClearCache();
void HintUsedRange(uint64_t Address, uint64_t Size);
private:
uintptr_t AllocateBackingForPage() {
uintptr_t NewBase = AllocateOffset;
uintptr_t NewEnd = AllocateOffset + SIZE_PER_PAGE;
if (NewEnd >= CODE_SIZE) {
// We ran out of block backing space. Need to clear the block cache and tell the JIT cores to clear their caches as well
// Tell whatever is calling this that it needs to do it.
return 0;
}
AllocateOffset = NewEnd;
return PageMemory + NewBase;
}
uintptr_t FindCodePointerForAddress(uint64_t Address) {
uint64_t PageOffset = Address & (0x0FFF);
Address >>= 12;
uintptr_t *Pointers = reinterpret_cast<uintptr_t*>(PagePointer);
uint64_t LocalPagePointer = Pointers[Address];
if (!LocalPagePointer) {
// We don't have a page pointer for this address
return 0;
}
// Find there pointer for the address in the blocks
uintptr_t *BlockPointers = reinterpret_cast<uintptr_t*>(LocalPagePointer);
return BlockPointers[PageOffset];
}
uintptr_t PagePointer;
uintptr_t PageMemory;
constexpr static size_t CODE_SIZE = 128 * 1024 * 1024;
constexpr static size_t SIZE_PER_PAGE = 4096 * 8;
size_t AllocateOffset {};
FEXCore::Context::Context *ctx;
};
}

View File

@ -0,0 +1,118 @@
#include "Interface/Core/CPUID.h"
namespace FEXCore {
CPUIDEmu::FunctionResults CPUIDEmu::Function_0h() {
CPUIDEmu::FunctionResults Res{};
Res.Res[0] = 0x16; // Let's say we are a Skylake
// EBX, EDX, ECX become the manufacturer id string
Res.Res[1] = 0x756E6547; // "Genu"
Res.Res[2] = 0x49656E69; // "ineI"
Res.Res[3] = 0x6C65746E; // "ntel"
return Res;
}
// Processor Info and Features bits
CPUIDEmu::FunctionResults CPUIDEmu::Function_01h() {
CPUIDEmu::FunctionResults Res{};
Res.Res[0] = 0 | // Stepping
(0 << 4) | // Model
(0 << 8) | // Family ID
(0 << 12) | // Processor type
(0 << 16) | // Extended model ID
(0 << 20); // Extended family ID
Res.Res[1] = 0 | // Brand index
(8 << 8) | // Cache line size in bytes
(8 << 16) | // Number of addressable IDs for the logical cores in the physical CPU
(0 << 24); // Local APIC ID
Res.Res[2] = ~0U; // Let's say we support every feature for fun
Res.Res[3] = ~0U; // Let's say we support every feature for fun
Res.Res[3] &= ~(3 << 26); // Let's say that XSAVE isn't enabled by the OS. Prevents glibc from using XSAVE/XGETBV
return Res;
}
// Cache and TLB description
CPUIDEmu::FunctionResults CPUIDEmu::Function_02h() {
CPUIDEmu::FunctionResults Res{};
return Res;
}
// Deterministic cache parameters for each level
CPUIDEmu::FunctionResults CPUIDEmu::Function_04h() {
CPUIDEmu::FunctionResults Res{};
return Res;
}
CPUIDEmu::FunctionResults CPUIDEmu::Function_07h() {
CPUIDEmu::FunctionResults Res{};
// Number of subfunctions
Res.Res[0] = 0x0;
Res.Res[1] =
(1 << 0) | // FS/GS support
(1 << 3) | // BMI 1 support
(1 << 5) | // AVX2 support
(1 << 7) | // SMEP support
(1 << 8) // BMI2 support
;
Res.Res[2] = ~0U;
Res.Res[3] = ~0U;
return Res;
}
CPUIDEmu::FunctionResults CPUIDEmu::Function_0Dh() {
CPUIDEmu::FunctionResults Res{};
return Res;
}
// Highest extended function implemented
CPUIDEmu::FunctionResults CPUIDEmu::Function_8000_0000h() {
CPUIDEmu::FunctionResults Res{};
Res.Res[0] = 0x8000001F;
return Res;
}
// Extended processor and feature bits
CPUIDEmu::FunctionResults CPUIDEmu::Function_8000_0001h() {
CPUIDEmu::FunctionResults Res{};
Res.Res[2] = ~0U; // Let's say we support every feature for fun
Res.Res[3] = ~0U; // Let's say we support every feature for fun
return Res;
}
// Advanced power management
CPUIDEmu::FunctionResults CPUIDEmu::Function_8000_0006h() {
CPUIDEmu::FunctionResults Res{};
Res.Res[0] = (1 << 2); // APIC timer not affected by p-state
return Res;
}
CPUIDEmu::FunctionResults CPUIDEmu::Function_8000_0007h() {
CPUIDEmu::FunctionResults Res{};
return Res;
}
// Virtual and physical address sizes
CPUIDEmu::FunctionResults CPUIDEmu::Function_8000_0008h() {
CPUIDEmu::FunctionResults Res{};
return Res;
}
void CPUIDEmu::Init() {
RegisterFunction(0, std::bind(&CPUIDEmu::Function_0h, this));
RegisterFunction(1, std::bind(&CPUIDEmu::Function_01h, this));
RegisterFunction(2, std::bind(&CPUIDEmu::Function_02h, this));
RegisterFunction(4, std::bind(&CPUIDEmu::Function_04h, this));
RegisterFunction(7, std::bind(&CPUIDEmu::Function_07h, this));
RegisterFunction(0xD, std::bind(&CPUIDEmu::Function_0Dh, this));
RegisterFunction(0x8000'0000, std::bind(&CPUIDEmu::Function_8000_0000h, this));
RegisterFunction(0x8000'0001, std::bind(&CPUIDEmu::Function_8000_0001h, this));
RegisterFunction(0x8000'0006, std::bind(&CPUIDEmu::Function_8000_0006h, this));
RegisterFunction(0x8000'0007, std::bind(&CPUIDEmu::Function_8000_0007h, this));
RegisterFunction(0x8000'0008, std::bind(&CPUIDEmu::Function_8000_0008h, this));
}
}

View File

@ -0,0 +1,44 @@
#pragma once
#include <functional>
#include <unordered_map>
#include "LogManager.h"
namespace FEXCore {
class CPUIDEmu final {
public:
void Init();
struct FunctionResults {
// Results in registers EAX, EBX, EDX, ECX respectively
uint32_t Res[4];
};
FunctionResults RunFunction(uint32_t Function) {
LogMan::Throw::A(FunctionHandlers.find(Function) != FunctionHandlers.end(), "Don't have a CPUID handler for 0x%08x", Function);
return FunctionHandlers[Function]();
}
private:
using FunctionHandler = std::function<FunctionResults()>;
void RegisterFunction(uint32_t Function, FunctionHandler Handler) {
FunctionHandlers[Function] = Handler;
}
std::unordered_map<uint32_t, FunctionHandler> FunctionHandlers;
// Functions
FunctionResults Function_0h();
FunctionResults Function_01h();
FunctionResults Function_02h();
FunctionResults Function_04h();
FunctionResults Function_07h();
FunctionResults Function_0Dh();
FunctionResults Function_8000_0000h();
FunctionResults Function_8000_0001h();
FunctionResults Function_8000_0006h();
FunctionResults Function_8000_0007h();
FunctionResults Function_8000_0008h();
};
}

View File

@ -0,0 +1,800 @@
#include "Common/MathUtils.h"
#include "Common/Paths.h"
#include "Interface/Context/Context.h"
#include "Interface/Core/BlockCache.h"
#include "Interface/Core/Core.h"
#include "Interface/Core/DebugData.h"
#include "Interface/Core/OpcodeDispatcher.h"
#include "Interface/Core/Interpreter/InterpreterCore.h"
#include "Interface/Core/JIT/JITCore.h"
#include "Interface/Core/LLVMJIT/LLVMCore.h"
#include <FEXCore/Config/Config.h>
#include <FEXCore/Core/CodeLoader.h>
#include <FEXCore/Core/CoreState.h>
#include <FEXCore/Core/CPUBackend.h>
#include <FEXCore/Core/X86Enums.h>
#include <fstream>
constexpr uint64_t STACK_OFFSET = 0xc000'0000;
constexpr uint64_t FS_OFFSET = 0xb000'0000;
constexpr uint64_t FS_SIZE = 0x1000;
namespace FEXCore::CPU {
bool CreateCPUCore(FEXCore::Context::Context *CTX) {
// This should be used for generating things that are shared between threads
CTX->CPUID.Init();
return true;
}
}
namespace FEXCore::Core {
constexpr std::array<std::string_view const, 22> FlagNames = {
"CF",
"",
"PF",
"",
"AF",
"",
"ZF",
"SF",
"TF",
"IF",
"DF",
"OF",
"IOPL",
"",
"NT",
"",
"RF",
"VM",
"AC",
"VIF",
"VIP",
"ID",
};
std::string_view const& GetFlagName(unsigned Flag) {
return FlagNames[Flag];
}
namespace DefaultFallbackCore {
class DefaultFallbackCore final : public FEXCore::CPU::CPUBackend {
public:
explicit DefaultFallbackCore(FEXCore::Core::ThreadState *Thread)
: ThreadState {reinterpret_cast<FEXCore::Core::InternalThreadState*>(Thread)} {
}
~DefaultFallbackCore() override = default;
std::string GetName() override { return "Default Fallback"; }
void *MapRegion(void *HostPtr, uint64_t VirtualGuestPtr, uint64_t Size) override {
return HostPtr;
}
void Initialize() override {}
bool NeedsOpDispatch() override { return false; }
void *CompileCode(FEXCore::IR::IRListView<true> const *IR, FEXCore::Core::DebugData *DebugData) override {
LogMan::Msg::E("Fell back to default code handler at RIP: 0x%lx", ThreadState->State.State.rip);
return nullptr;
}
private:
FEXCore::Core::InternalThreadState *ThreadState;
};
FEXCore::CPU::CPUBackend *CPUCreationFactory(FEXCore::Context::Context* CTX, FEXCore::Core::ThreadState *Thread) {
return new DefaultFallbackCore(Thread);
}
}
}
namespace FEXCore::Context {
Context::Context()
: FrontendDecoder {this}
, SyscallHandler {this} {
FallbackCPUFactory = FEXCore::Core::DefaultFallbackCore::CPUCreationFactory;
PassManager.AddDefaultPasses();
// PassManager.AddDefaultValidationPasses();
}
bool Context::GetFilenameHash(std::string const &Filename, std::string &Hash) {
// Calculate a hash for the input file
std::ifstream Input (Filename.c_str(), std::ios::in | std::ios::binary | std::ios::ate);
if (Input.is_open()) {
std::streampos Size;
Size = Input.tellg();
Input.seekg(0, std::ios::beg);
std::string Data;
Data.resize(Size);
Input.read(&Data.at(0), Size);
Input.close();
std::hash<std::string> string_hash;
Hash = std::to_string(string_hash(Data));
return true;
}
return false;
}
void Context::AddThreadRIPsToEntryList(FEXCore::Core::InternalThreadState *Thread) {
for (auto &IR : Thread->IRLists) {
EntryList.insert(IR.first);
}
}
void Context::SaveEntryList() {
std::string const &Filename = SyscallHandler.GetFilename();
std::string hash_string;
if (GetFilenameHash(Filename, hash_string)) {
auto DataPath = FEXCore::Paths::GetDataPath();
DataPath += "/EntryCache/Entries_" + hash_string;
std::ofstream Output (DataPath.c_str(), std::ios::out | std::ios::binary);
if (Output.is_open()) {
for (auto Entry : EntryList) {
Output.write(reinterpret_cast<char const*>(&Entry), sizeof(Entry));
}
Output.close();
}
}
}
void Context::LoadEntryList() {
std::string const &Filename = SyscallHandler.GetFilename();
std::string hash_string;
if (GetFilenameHash(Filename, hash_string)) {
auto DataPath = FEXCore::Paths::GetDataPath();
DataPath += "/EntryCache/Entries_" + hash_string;
std::ifstream Input (DataPath.c_str(), std::ios::in | std::ios::binary | std::ios::ate);
if (Input.is_open()) {
std::streampos Size;
Size = Input.tellg();
Input.seekg(0, std::ios::beg);
std::string Data;
Data.resize(Size);
Input.read(&Data.at(0), Size);
Input.close();
size_t EntryCount = Size / sizeof(uint64_t);
uint64_t *Entries = reinterpret_cast<uint64_t*>(&Data.at(0));
for (size_t i = 0; i < EntryCount; ++i) {
EntryList.insert(Entries[i]);
}
}
}
}
Context::~Context() {
ShouldStop.store(true);
Pause();
{
std::lock_guard<std::mutex> lk(ThreadCreationMutex);
for (auto &Thread : Threads) {
Thread->ExecutionThread.join();
}
for (auto &Thread : Threads) {
AddThreadRIPsToEntryList(Thread);
}
for (auto &Thread : Threads) {
delete Thread;
}
Threads.clear();
}
SaveEntryList();
}
bool Context::InitCore(FEXCore::CodeLoader *Loader) {
LocalLoader = Loader;
using namespace FEXCore::Core;
FEXCore::Core::CPUState NewThreadState{};
// Initialize default CPU state
NewThreadState.rip = ~0ULL;
for (int i = 0; i < 16; ++i) {
NewThreadState.gregs[i] = 0;
}
for (int i = 0; i < 16; ++i) {
NewThreadState.xmm[i][0] = 0xDEADBEEFULL;
NewThreadState.xmm[i][1] = 0xBAD0DAD1ULL;
}
memset(NewThreadState.flags, 0, 32);
NewThreadState.gs = 0;
NewThreadState.fs = FS_OFFSET + FS_SIZE / 2;
NewThreadState.flags[1] = 1;
FEXCore::Core::InternalThreadState *Thread = CreateThread(&NewThreadState, 0, 0);
// We are the parent thread
ParentThread = Thread;
auto MemLayout = Loader->GetLayout();
uint64_t BasePtr = AlignDown(std::get<0>(MemLayout), PAGE_SIZE);
uint64_t BaseSize = AlignUp(std::get<2>(MemLayout), PAGE_SIZE);
Thread->BlockCache->HintUsedRange(BasePtr, BaseSize);
uintptr_t BaseRegion = reinterpret_cast<uintptr_t>(MapRegion(Thread, BasePtr, BaseSize, true));
auto MemoryMapperFunction = [&](uint64_t Base, uint64_t Size) -> void* {
return MapRegion(Thread, Base, Size);
};
Loader->MapMemoryRegion(MemoryMapperFunction);
// Set up all of our memory mappings
MapRegion(Thread, FS_OFFSET, FS_SIZE, true);
void *StackPointer = MapRegion(Thread, STACK_OFFSET, Loader->StackSize(), true);
Thread->State.State.gregs[X86State::REG_RSP] = Loader->SetupStack(StackPointer, STACK_OFFSET);
// Now let the code loader setup memory
auto MemoryWriterFunction = [&](void const *Data, uint64_t Addr, uint64_t Size) -> void {
// Writes the machine code to be emulated in to memory
memcpy(reinterpret_cast<void*>(BaseRegion + Addr), Data, Size);
};
Loader->LoadMemory(MemoryWriterFunction);
// Set the RIP to what the code loader wants
Thread->State.State.rip = Loader->DefaultRIP();
LogMan::Msg::D("Memory Base: 0x%016lx", MemoryMapper.GetBaseOffset<uint64_t>(0));
InitializeThread(Thread);
return true;
}
void Context::WaitForIdle() {
do {
bool AllPaused = true;
{
// Grab the mutex lock so a thread doesn't try and spin up while we are waiting
for (size_t i = 0; i < Threads.size(); ++i) {
if (Threads[i]->State.RunningEvents.Running.load() || Threads[i]->State.RunningEvents.WaitingToStart.load()) {
AllPaused = false;
break;
}
}
}
if (AllPaused)
break;
PauseWait.WaitFor(std::chrono::seconds(1));
} while (true);
}
void Context::Pause() {
// Tell all the threads that they should pause
{
std::lock_guard<std::mutex> lk(ThreadCreationMutex);
for (auto &Thread : Threads) {
Thread->State.RunningEvents.ShouldPause.store(true);
}
for (auto &Thread : Threads) {
Thread->StartRunning.NotifyAll();
}
Running = true;
}
WaitForIdle();
Running = false;
}
FEXCore::Context::ExitReason Context::RunLoop(bool WaitForIdle) {
{
// Spin up all the threads
std::lock_guard<std::mutex> lk(ThreadCreationMutex);
for (auto &Thread : Threads) {
Thread->State.RunningEvents.ShouldPause.store(false);
Thread->State.RunningEvents.WaitingToStart.store(true);
}
for (auto &Thread : Threads) {
Thread->StartRunning.NotifyAll();
}
Running = true;
}
if (WaitForIdle) {
this->WaitForIdle();
return ParentThread->ExitReason;
}
return FEXCore::Context::ExitReason::EXIT_ASYNC_RUN;
}
void Context::InitializeThread(FEXCore::Core::InternalThreadState *Thread) {
Thread->CPUBackend->Initialize();
Thread->FallbackBackend->Initialize();
// Compile all of our cached entries
LogMan::Msg::D("Precompiling: %ld blocks", EntryList.size());
for (auto Entry : EntryList) {
CompileRIP(Thread, Entry);
}
// This will create the execution thread but it won't actually start executing
Thread->ExecutionThread = std::thread(&Context::ExecutionThread, this, Thread);
// Wait for the thread to have started
Thread->ThreadWaiting.Wait();
}
void Context::RunThread(FEXCore::Core::InternalThreadState *Thread) {
// Tell the thread to start executing
Thread->StartRunning.NotifyAll();
}
FEXCore::Core::InternalThreadState* Context::CreateThread(FEXCore::Core::CPUState *NewThreadState, uint64_t ParentTID, uint64_t ChildTID) {
FEXCore::Core::InternalThreadState *Thread{};
// Grab the new thread object
{
std::lock_guard<std::mutex> lk(ThreadCreationMutex);
Thread = Threads.emplace_back(new FEXCore::Core::InternalThreadState{});
Thread->State.ThreadManager.TID = ++ThreadID;
}
Thread->OpDispatcher = std::make_unique<FEXCore::IR::OpDispatchBuilder>();
Thread->BlockCache = std::make_unique<FEXCore::BlockCache>(this);
Thread->CTX = this;
// Copy over the new thread state to the new object
memcpy(&Thread->State.State, NewThreadState, sizeof(FEXCore::Core::CPUState));
// Set up the thread manager state
Thread->State.ThreadManager.parent_tid = ParentTID;
Thread->State.ThreadManager.child_tid = ChildTID;
// Create CPU backend
switch (Config.Core) {
case FEXCore::Config::CONFIG_INTERPRETER: Thread->CPUBackend.reset(FEXCore::CPU::CreateInterpreterCore(this)); break;
case FEXCore::Config::CONFIG_IRJIT: Thread->CPUBackend.reset(FEXCore::CPU::CreateJITCore(this, Thread)); break;
case FEXCore::Config::CONFIG_LLVMJIT: Thread->CPUBackend.reset(FEXCore::CPU::CreateLLVMCore(Thread)); break;
case FEXCore::Config::CONFIG_CUSTOM: Thread->CPUBackend.reset(CustomCPUFactory(this, &Thread->State)); break;
default: LogMan::Msg::A("Unknown core configuration");
}
Thread->FallbackBackend.reset(FallbackCPUFactory(this, &Thread->State));
LogMan::Throw::A(!Thread->FallbackBackend->NeedsOpDispatch(), "Fallback CPU backend must not require OpDispatch");
return Thread;
}
uintptr_t Context::AddBlockMapping(FEXCore::Core::InternalThreadState *Thread, uint64_t Address, void *Ptr) {
auto BlockMapPtr = Thread->BlockCache->AddBlockMapping(Address, Ptr);
if (BlockMapPtr == 0) {
Thread->BlockCache->ClearCache();
BlockMapPtr = Thread->BlockCache->AddBlockMapping(Address, Ptr);
LogMan::Throw::A(BlockMapPtr, "Couldn't add mapping after clearing mapping cache");
}
return BlockMapPtr;
}
uintptr_t Context::CompileBlock(FEXCore::Core::InternalThreadState *Thread, uint64_t GuestRIP) {
void *CodePtr {nullptr};
uint8_t const *GuestCode = MemoryMapper.GetPointer<uint8_t const*>(GuestRIP);
uint64_t TotalInstructions {0};
uint64_t TotalInstructionsLength {0};
// Do we already have this in the IR cache?
auto IR = Thread->IRLists.find(GuestRIP);
FEXCore::IR::IRListView<true> *IRList {};
FEXCore::Core::DebugData *DebugData {};
if (IR == Thread->IRLists.end()) {
bool HadDispatchError {false};
[[maybe_unused]] bool HadRIPSetter {false};
Thread->OpDispatcher->BeginBlock();
if (!FrontendDecoder.DecodeInstructionsInBlock(&GuestCode[TotalInstructionsLength], GuestRIP + TotalInstructionsLength)) {
if (Config.BreakOnFrontendFailure) {
LogMan::Msg::E("Had Frontend decoder error");
ShouldStop = true;
}
return 0;
}
auto DecodedOps = FrontendDecoder.GetDecodedInsts();
for (size_t i = 0; i < DecodedOps.second; ++i) {
FEXCore::X86Tables::X86InstInfo const* TableInfo {nullptr};
FEXCore::X86Tables::DecodedInst const* DecodedInfo {nullptr};
TableInfo = DecodedOps.first->at(i).TableInfo;
DecodedInfo = &DecodedOps.first->at(i);
// if (FrontendDecoder.JumpTargets.find(DecodedInfo->PC) != FrontendDecoder.JumpTargets.end()) {
// Thread->OpDispatcher->_EndBlock(0);
// auto JumpTarget = Thread->OpDispatcher->_BeginBlock();
// Thread->OpDispatcher->Arguments.JumpTargets.emplace(DecodedInfo->PC, JumpTarget.location);
// }
// // Check our fixups to see if they still are necessary
// auto fixup = Thread->OpDispatcher->Arguments.Fixups.find(DecodedInfo->PC);
// if (fixup != Thread->OpDispatcher->Arguments.Fixups.end()) {
// IR::AlignmentType JumpTarget;
// auto it = Thread->OpDispatcher->Arguments.JumpTargets.find(DecodedInfo->PC);
// if (it != Thread->OpDispatcher->Arguments.JumpTargets.end()) {
// JumpTarget = it->second;
// }
// for (auto it : fixup->second) {
// switch (it.SourceCondJump->Op) {
// case FEXCore::IR::OP_CONDJUMP: {
// if (JumpTarget.IsInvalid()) {
// Thread->OpDispatcher->_EndBlock(0);
// JumpTarget = Thread->OpDispatcher->_BeginBlock().location;
// }
// auto CondJumpOp = it.SourceCondJump->CW<IR::IROp_CondJump>();
// CondJumpOp->Location = JumpTarget;
// break;
// }
// case FEXCore::IR::OP_JUMP: {
// if (JumpTarget.IsInvalid()) {
// Thread->OpDispatcher->_EndBlock(0);
// JumpTarget = Thread->OpDispatcher->_BeginBlock().location;
// }
// auto JumpOp = it.SourceCondJump->CW<IR::IROp_Jump>();
// JumpOp->Location = JumpTarget;
// break;
// }
// default:
// LogMan::Msg::A("Unknown fixup kind");
// break;
// }
// }
// // No longer need this fixup
// Thread->OpDispatcher->Arguments.Fixups.erase(fixup);
// }
if (TableInfo->OpcodeDispatcher) {
auto Fn = TableInfo->OpcodeDispatcher;
std::invoke(Fn, Thread->OpDispatcher, DecodedInfo);
if (Thread->OpDispatcher->HadDecodeFailure()) {
if (Config.BreakOnFrontendFailure) {
LogMan::Msg::E("Had OpDispatcher error at 0x%lx", GuestRIP);
ShouldStop = true;
}
HadDispatchError = true;
}
else {
TotalInstructionsLength += DecodedInfo->InstSize;
TotalInstructions++;
}
}
else {
// LogMan::Msg::E("Missing OpDispatcher at 0x%lx", GuestRIP);
HadDispatchError = true;
}
// If we had a dispatch error then leave early
if (HadDispatchError) {
if (TotalInstructions == 0) {
// Couldn't handle any instruction in op dispatcher
Thread->OpDispatcher->ResetWorkingList();
return 0;
}
else {
// We had some instructions. Early exit
break;
}
}
// This is to make sure if we are stepping or make a block that is too large that we will still set the block
if (!HadDispatchError && (TableInfo->Flags & X86Tables::InstFlags::FLAGS_SETS_RIP)) {
HadRIPSetter = true;
}
if (TotalInstructions >= Config.MaxInstPerBlock) {
break;
}
}
//LogMan::Throw::A(Thread->OpDispatcher->Arguments.Fixups.empty(), "Still had fixups that weren't fixed!");
if (!Thread->OpDispatcher->Information.HadUnconditionalExit)
{
Thread->OpDispatcher->EndBlock(TotalInstructionsLength);
Thread->OpDispatcher->ExitFunction();
}
// Run the passmanager over the IR from the dispatcher
PassManager.Run(Thread->OpDispatcher.get());
if (Thread->OpDispatcher->ShouldDump)
// if (GuestRIP == 0x48b680)
{
std::stringstream out;
auto NewIR = Thread->OpDispatcher->ViewIR();
FEXCore::IR::Dump(&out, &NewIR);
printf("IR 0x%lx:\n%s\n@@@@@\n", GuestRIP, out.str().c_str());
}
// Do RA on the IR right now?
// Create a copy of the IR and place it in this thread's IR cache
auto IR = Thread->IRLists.try_emplace(GuestRIP, Thread->OpDispatcher->CreateIRCopy());
Thread->OpDispatcher->ResetWorkingList();
auto Debugit = Thread->DebugData.try_emplace(GuestRIP, FEXCore::Core::DebugData{});
Debugit.first->second.GuestCodeSize = TotalInstructionsLength;
Debugit.first->second.GuestInstructionCount = TotalInstructions;
IRList = IR.first->second.get();
DebugData = &Debugit.first->second;
Thread->Stats.BlocksCompiled.fetch_add(1);
}
else {
IRList = IR->second.get();
}
// Attempt to get the CPU backend to compile this code
CodePtr = Thread->CPUBackend->CompileCode(IRList, DebugData);
if (CodePtr != nullptr) {
// The core managed to compile the code.
return AddBlockMapping(Thread, GuestRIP, CodePtr);
}
return 0;
}
void Context::ExecutionThread(FEXCore::Core::InternalThreadState *Thread) {
Thread->ExitReason = FEXCore::Context::ExitReason::EXIT_WAITING;
Thread->ThreadWaiting.NotifyAll();
Thread->StartRunning.Wait();
if (ShouldStop.load() || Thread->State.RunningEvents.ShouldStop.load()) {
ShouldStop = true;
Thread->State.RunningEvents.ShouldStop.store(true);
Thread->State.RunningEvents.Running.store(false);
Thread->ExitReason = FEXCore::Context::ExitReason::EXIT_SHUTDOWN;
return;
}
Thread->ExitReason = FEXCore::Context::ExitReason::EXIT_NONE;
Thread->State.RunningEvents.Running = true;
Thread->State.RunningEvents.ShouldPause = false;
constexpr uint32_t CoreDebugLevel = 0;
while (!ShouldStop.load() && !Thread->State.RunningEvents.ShouldStop.load()) {
uint64_t GuestRIP = Thread->State.State.rip;
if (CoreDebugLevel >= 1) {
char const *Name = LocalLoader->FindSymbolNameInRange(GuestRIP);
LogMan::Msg::D(">>>>RIP: 0x%lx: '%s'", GuestRIP, Name ? Name : "<Unknown>");
}
using BlockFn = void (*)(FEXCore::Core::InternalThreadState *Thread);
if (!Thread->CPUBackend->NeedsOpDispatch()) {
BlockFn Ptr = reinterpret_cast<BlockFn>(Thread->CPUBackend->CompileCode(nullptr, nullptr));
Ptr(Thread);
}
else {
// Do have have this block compiled?
auto it = Thread->BlockCache->FindBlock(GuestRIP);
if (it == 0) {
// If not compile it
it = CompileBlock(Thread, GuestRIP);
}
// Did we successfully compile this block?
if (it != 0) {
// Block is compiled, run it
BlockFn Ptr = reinterpret_cast<BlockFn>(it);
Ptr(Thread);
}
else {
// We have ONE more chance to try and fallback to the fallback CPU backend
// This will most likely fail since regular code use won't be using a fallback core.
// It's mainly for testing new instruction encodings
void *CodePtr = Thread->FallbackBackend->CompileCode(nullptr, nullptr);
if (CodePtr) {
BlockFn Ptr = reinterpret_cast<BlockFn>(AddBlockMapping(Thread, GuestRIP, CodePtr));
Ptr(Thread);
}
else {
// Let the frontend know that something has happened that is unhandled
Thread->State.RunningEvents.ShouldPause = true;
Thread->ExitReason = FEXCore::Context::ExitReason::EXIT_UNKNOWNERROR;
}
}
}
// if (GuestRIP == 0x48c8dd) {
// fflush(stdout);
// __builtin_trap();
// }
if (CoreDebugLevel >= 2) {
int i = 0;
LogMan::Msg::D("\tGPR[%d]: %016lx %016lx %016lx %016lx", i, Thread->State.State.gregs[i + 0], Thread->State.State.gregs[i + 1], Thread->State.State.gregs[i + 2], Thread->State.State.gregs[i + 3]);
i += 4;
LogMan::Msg::D("\tGPR[%d]: %016lx %016lx %016lx %016lx", i, Thread->State.State.gregs[i + 0], Thread->State.State.gregs[i + 1], Thread->State.State.gregs[i + 2], Thread->State.State.gregs[i + 3]);
i += 4;
LogMan::Msg::D("\tGPR[%d]: %016lx %016lx %016lx %016lx", i, Thread->State.State.gregs[i + 0], Thread->State.State.gregs[i + 1], Thread->State.State.gregs[i + 2], Thread->State.State.gregs[i + 3]);
i += 4;
LogMan::Msg::D("\tGPR[%d]: %016lx %016lx %016lx %016lx", i, Thread->State.State.gregs[i + 0], Thread->State.State.gregs[i + 1], Thread->State.State.gregs[i + 2], Thread->State.State.gregs[i + 3]);
uint64_t PackedFlags{};
for (unsigned i = 0; i < 32; ++i) {
PackedFlags |= static_cast<uint64_t>(Thread->State.State.flags[i]) << i;
}
LogMan::Msg::D("\tFlags: %016lx", PackedFlags);
}
if (CoreDebugLevel >= 3) {
int i = 0;
LogMan::Msg::D("\tXMM[%d][0]: %016lx %016lx %016lx %016lx", i, Thread->State.State.xmm[i + 0][0], Thread->State.State.xmm[i + 1][0], Thread->State.State.xmm[i + 2][0], Thread->State.State.xmm[i + 3][0]);
LogMan::Msg::D("\tXMM[%d][1]: %016lx %016lx %016lx %016lx", i, Thread->State.State.xmm[i + 0][1], Thread->State.State.xmm[i + 1][1], Thread->State.State.xmm[i + 2][1], Thread->State.State.xmm[i + 3][1]);
i += 4;
LogMan::Msg::D("\tXMM[%d][0]: %016lx %016lx %016lx %016lx", i, Thread->State.State.xmm[i + 0][0], Thread->State.State.xmm[i + 1][0], Thread->State.State.xmm[i + 2][0], Thread->State.State.xmm[i + 3][0]);
LogMan::Msg::D("\tXMM[%d][1]: %016lx %016lx %016lx %016lx", i, Thread->State.State.xmm[i + 0][1], Thread->State.State.xmm[i + 1][1], Thread->State.State.xmm[i + 2][1], Thread->State.State.xmm[i + 3][1]);
i += 4;
LogMan::Msg::D("\tXMM[%d][0]: %016lx %016lx %016lx %016lx", i, Thread->State.State.xmm[i + 0][0], Thread->State.State.xmm[i + 1][0], Thread->State.State.xmm[i + 2][0], Thread->State.State.xmm[i + 3][0]);
LogMan::Msg::D("\tXMM[%d][1]: %016lx %016lx %016lx %016lx", i, Thread->State.State.xmm[i + 0][1], Thread->State.State.xmm[i + 1][1], Thread->State.State.xmm[i + 2][1], Thread->State.State.xmm[i + 3][1]);
i += 4;
LogMan::Msg::D("\tXMM[%d][0]: %016lx %016lx %016lx %016lx", i, Thread->State.State.xmm[i + 0][0], Thread->State.State.xmm[i + 1][0], Thread->State.State.xmm[i + 2][0], Thread->State.State.xmm[i + 3][0]);
LogMan::Msg::D("\tXMM[%d][1]: %016lx %016lx %016lx %016lx", i, Thread->State.State.xmm[i + 0][1], Thread->State.State.xmm[i + 1][1], Thread->State.State.xmm[i + 2][1], Thread->State.State.xmm[i + 3][1]);
uint64_t PackedFlags{};
for (unsigned i = 0; i < 32; ++i) {
PackedFlags |= static_cast<uint64_t>(Thread->State.State.flags[i]) << i;
}
LogMan::Msg::D("\tFlags: %016lx", PackedFlags);
}
if (Thread->State.RunningEvents.ShouldStop.load()) {
// If it is the parent thread that died then just leave
// XXX: This doesn't make sense when the parent thread doesn't outlive its children
if (Thread->State.ThreadManager.GetTID() == 1) {
ShouldStop = true;
Thread->ExitReason = FEXCore::Context::ExitReason::EXIT_SHUTDOWN;
}
break;
}
if (RunningMode == FEXCore::Context::CoreRunningMode::MODE_SINGLESTEP || Thread->State.RunningEvents.ShouldPause) {
Thread->State.RunningEvents.Running = false;
Thread->State.RunningEvents.WaitingToStart = false;
// If something previously hasn't set the exit state then set it now
if (Thread->ExitReason == FEXCore::Context::ExitReason::EXIT_NONE)
Thread->ExitReason = FEXCore::Context::ExitReason::EXIT_DEBUG;
PauseWait.NotifyAll();
Thread->StartRunning.Wait();
// If we set it to debug then set it back to none after this
// We want to retain the state if the frontend decides to leave
if (Thread->ExitReason == FEXCore::Context::ExitReason::EXIT_DEBUG)
Thread->ExitReason = FEXCore::Context::ExitReason::EXIT_NONE;
Thread->State.RunningEvents.Running = true;
}
}
Thread->State.RunningEvents.WaitingToStart = false;
Thread->State.RunningEvents.Running = false;
}
// Debug interface
void Context::CompileRIP(FEXCore::Core::InternalThreadState *Thread, uint64_t RIP) {
uint64_t RIPBackup = Thread->State.State.rip;
Thread->State.State.rip = RIP;
// Erase the RIP from all the storage backings if it exists
Thread->IRLists.erase(RIP);
Thread->DebugData.erase(RIP);
Thread->BlockCache->Erase(RIP);
// We don't care if compilation passes or not
CompileBlock(Thread, RIP);
Thread->State.State.rip = RIPBackup;
}
void *Context::MapRegion(FEXCore::Core::InternalThreadState *Thread, uint64_t Offset, uint64_t Size, bool Fixed) {
void *Ptr = MemoryMapper.MapRegion(Offset, Size, Fixed);
Thread->CPUBackend->MapRegion(Ptr, Offset, Size);
Thread->FallbackBackend->MapRegion(Ptr, Offset, Size);
return Ptr;
}
void Context::MirrorRegion(FEXCore::Core::InternalThreadState *Thread, void *HostPtr, uint64_t Offset, uint64_t Size) {
Thread->CPUBackend->MapRegion(HostPtr, Offset, Size);
Thread->FallbackBackend->MapRegion(HostPtr, Offset, Size);
}
void *Context::ShmBase() {
return MemoryMapper.GetMemoryBase();
}
void Context::CopyMemoryMapping([[maybe_unused]] FEXCore::Core::InternalThreadState *ParentThread, FEXCore::Core::InternalThreadState *ChildThread) {
auto Regions = MemoryMapper.MappedRegions;
for (auto const& Region : Regions) {
ChildThread->CPUBackend->MapRegion(Region.Ptr, Region.Offset, Region.Size);
ChildThread->FallbackBackend->MapRegion(Region.Ptr, Region.Offset, Region.Size);
}
}
uint64_t Context::GetThreadCount() const {
return Threads.size();
}
FEXCore::Core::RuntimeStats *Context::GetRuntimeStatsForThread(uint64_t Thread) {
return &Threads[Thread]->Stats;
}
FEXCore::Core::CPUState Context::GetCPUState() {
return ParentThread->State.State;
}
void Context::GetMemoryRegions(std::vector<FEXCore::Memory::MemRegion> *Regions) {
Regions->clear();
Regions->resize(MemoryMapper.MappedRegions.size());
memcpy(&Regions->at(0), &MemoryMapper.MappedRegions.at(0), sizeof(FEXCore::Memory::MemRegion) * MemoryMapper.MappedRegions.size());
}
bool Context::GetDebugDataForRIP(uint64_t RIP, FEXCore::Core::DebugData *Data) {
auto it = ParentThread->DebugData.find(RIP);
if (it == ParentThread->DebugData.end()) {
return false;
}
memcpy(Data, &it->second, sizeof(FEXCore::Core::DebugData));
return true;
}
bool Context::FindHostCodeForRIP(uint64_t RIP, uint8_t **Code) {
uintptr_t HostCode = ParentThread->BlockCache->FindBlock(RIP);
if (!HostCode) {
return false;
}
*Code = reinterpret_cast<uint8_t*>(HostCode);
return true;
}
// XXX:
// bool Context::FindIRForRIP(uint64_t RIP, FEXCore::IR::IntrusiveIRList **ir) {
// auto IR = ParentThread->IRLists.find(RIP);
// if (IR == ParentThread->IRLists.end()) {
// return false;
// }
// //*ir = &IR->second;
// return true;
// }
// void Context::SetIRForRIP(uint64_t RIP, FEXCore::IR::IntrusiveIRList *const ir) {
// //ParentThread->IRLists.try_emplace(RIP, *ir);
// }
FEXCore::Core::ThreadState *Context::GetThreadState() {
return &ParentThread->State;
}
}

View File

@ -0,0 +1,23 @@
#pragma once
namespace FEXCore {
class CodeLoader;
}
namespace FEXCore::Context {
struct Context;
}
namespace FEXCore::CPU {
/**
* @brief Create the CPU core backend for the context passed in
*
* @param CTX
*
* @return true if core was able to be create
*/
bool CreateCPUCore(FEXCore::Context::Context *CTX);
bool LoadCode(FEXCore::Context::Context *CTX, FEXCore::CodeLoader *Loader);
}

View File

@ -0,0 +1,5 @@
#pragma once
#include <stdint.h>
namespace FEXCore::CPU {
}

View File

@ -0,0 +1,927 @@
#include "Interface/Context/Context.h"
#include "Interface/Core/Frontend.h"
#include "Interface/Core/InternalThreadState.h"
#include "LogManager.h"
#include <array>
#include <cstring>
#include <FEXCore/Core/X86Enums.h>
#include <FEXCore/Debug/X86Tables.h>
namespace FEXCore::Frontend {
using namespace FEXCore::X86Tables;
constexpr size_t MAX_INST_SIZE = 15;
static uint32_t MapModRMToReg(uint8_t REX, uint8_t bits, bool HighBits, bool HasREX, bool HasXMM, uint8_t InvalidOffset = 16) {
constexpr std::array<uint64_t, 16> GPRIndexes = {
// Classical ordering?
FEXCore::X86State::REG_RAX,
FEXCore::X86State::REG_RCX,
FEXCore::X86State::REG_RDX,
FEXCore::X86State::REG_RBX,
FEXCore::X86State::REG_RSP,
FEXCore::X86State::REG_RBP,
FEXCore::X86State::REG_RSI,
FEXCore::X86State::REG_RDI,
FEXCore::X86State::REG_R8,
FEXCore::X86State::REG_R9,
FEXCore::X86State::REG_R10,
FEXCore::X86State::REG_R11,
FEXCore::X86State::REG_R12,
FEXCore::X86State::REG_R13,
FEXCore::X86State::REG_R14,
FEXCore::X86State::REG_R15,
};
constexpr std::array<uint64_t, 16> GPR8BitHighIndexes = {
// Classical ordering?
FEXCore::X86State::REG_RAX,
FEXCore::X86State::REG_RCX,
FEXCore::X86State::REG_RDX,
FEXCore::X86State::REG_RBX,
FEXCore::X86State::REG_RAX,
FEXCore::X86State::REG_RCX,
FEXCore::X86State::REG_RDX,
FEXCore::X86State::REG_RBX,
FEXCore::X86State::REG_R8,
FEXCore::X86State::REG_R9,
FEXCore::X86State::REG_R10,
FEXCore::X86State::REG_R11,
FEXCore::X86State::REG_R12,
FEXCore::X86State::REG_R13,
FEXCore::X86State::REG_R14,
FEXCore::X86State::REG_R15,
};
constexpr std::array<uint64_t, 16> XMMIndexes = {
FEXCore::X86State::REG_XMM_0,
FEXCore::X86State::REG_XMM_1,
FEXCore::X86State::REG_XMM_2,
FEXCore::X86State::REG_XMM_3,
FEXCore::X86State::REG_XMM_4,
FEXCore::X86State::REG_XMM_5,
FEXCore::X86State::REG_XMM_6,
FEXCore::X86State::REG_XMM_7,
FEXCore::X86State::REG_XMM_8,
FEXCore::X86State::REG_XMM_9,
FEXCore::X86State::REG_XMM_10,
FEXCore::X86State::REG_XMM_11,
FEXCore::X86State::REG_XMM_12,
FEXCore::X86State::REG_XMM_13,
FEXCore::X86State::REG_XMM_14,
FEXCore::X86State::REG_XMM_15,
};
const std::array<uint64_t, 16> *GPRs = &GPRIndexes;
if (HasXMM) {
GPRs = &XMMIndexes;
}
else if (HighBits && !HasREX) {
GPRs = &GPR8BitHighIndexes;
}
uint8_t Offset = (REX << 3) | bits;
if (Offset == InvalidOffset) {
return FEXCore::X86State::REG_INVALID;
}
return (*GPRs)[(REX << 3) | bits];
}
Decoder::Decoder(FEXCore::Context::Context *ctx)
: CTX {ctx} {
DecodedBuffer.resize(DefaultDecodedBufferSize);
}
bool Decoder::DecodeInstruction(uint8_t const* InstStream, uint64_t PC) {
uint8_t InstructionSize = 0;
std::array<uint8_t, MAX_INST_SIZE> Instruction;
bool InstructionDecoded = false;
bool ErrorDuringDecoding = false;
auto ReadByte = [InstStream, &Instruction, &InstructionSize]() -> uint8_t {
uint8_t Byte = InstStream[InstructionSize];
InstructionSize++;
LogMan::Throw::A(InstructionSize < MAX_INST_SIZE, "Max instruction size exceeded!");
Instruction[InstructionSize] = Byte;
return Byte;
};
auto PeekByte = [InstStream, &InstructionSize](uint8_t Offset) -> uint8_t {
uint8_t Byte = InstStream[InstructionSize + Offset];
return Byte;
};
auto ReadData = [&ReadByte, InstStream, &InstructionSize](size_t Size) -> uint64_t {
uint64_t Res;
#define READ_DATA(x, y) \
case x: { \
y const *Data = reinterpret_cast<y const*>(&InstStream[InstructionSize]); \
Res = *Data; \
} \
break
switch (Size) {
case 0: return 0;
READ_DATA(1, uint8_t);
READ_DATA(2, uint16_t);
READ_DATA(4, uint32_t);
READ_DATA(8, uint64_t);
default:
LogMan::Msg::A("Unknown data size to read");
}
#undef READ_DATA
for(size_t i = 0; i < Size; ++i) {
ReadByte();
}
return Res;
};
auto &DecodeInst = DecodedBuffer[DecodedSize];
memset(&DecodeInst, 0, sizeof(DecodedInst));
auto DecodeModRM = [&DecodeInst](FEXCore::X86Tables::ModRMDecoded ModRM, uint8_t *Displacement) {
// Do we have an offset?
if (ModRM.mod == 0b01) {
*Displacement = 1;
}
else if (ModRM.mod == 0b10) {
*Displacement = 4;
}
else if (ModRM.mod == 0 && ModRM.rm == 0b101)
*Displacement = 4;
// Ensure this flag is set
DecodeInst.Flags |= DecodeFlags::FLAG_MODRM_PRESENT;
};
auto DecodeSIB = [&ReadByte, &DecodeInst](FEXCore::X86Tables::ModRMDecoded ModRM, uint8_t *Displacement) -> bool {
bool HasSIB = ((ModRM.mod != 0b11) &&
(ModRM.rm == 0b100));
if (HasSIB) {
FEXCore::X86Tables::SIBDecoded SIB;
if (DecodeInst.DecodedSIB) {
SIB.Hex = DecodeInst.SIB;
}
else {
// Haven't yet grabbed SIB, pull it now
DecodeInst.SIB = ReadByte();
SIB.Hex = DecodeInst.SIB;
DecodeInst.DecodedSIB = true;
}
// Ensure this flag is set
DecodeInst.Flags |= DecodeFlags::FLAG_SIB_PRESENT;
// If the SIB base is 0b101, aka BP or R13 then we have a 32bit displacement
if (ModRM.mod == 0b01) {
*Displacement = 1;
}
else if (ModRM.mod == 0b10) {
*Displacement = 4;
}
else if (ModRM.mod == 0b00 && ModRM.rm == 0b101) {
*Displacement = 4;
}
else if (ModRM.mod == 0b00 && ModRM.rm == 0b100 && SIB.base == 0b101) {
*Displacement = 4;
}
}
return HasSIB;
};
auto NormalOp = [&DecodeModRM, &DecodeSIB, &ReadByte, &ReadData, &DecodeInst, &InstructionSize](auto &Table, auto Op) -> bool {
FEXCore::X86Tables::X86InstInfo *Info = &Table[Op];
DecodeInst.OP = Op;
DecodeInst.TableInfo = Info;
// XXX: Once we support 32bit x86 then this will be necessary to support
if (Info->Type == FEXCore::X86Tables::TYPE_LEGACY_PREFIX) {
DecodeInst.Flags |= DecodeFlags::FLAG_LEGACY_PREFIX;
LogMan::Msg::A("Legacy Prefix");
return false;
}
if (Info->Type == FEXCore::X86Tables::TYPE_UNKNOWN ||
Info->Type == FEXCore::X86Tables::TYPE_INVALID) {
LogMan::Msg::A("Invalid or Unknown instruction: %s 0x%04x 0x%lx", Info->Name, Op, DecodeInst.PC);
return false;
}
if (Info->Type >= FEXCore::X86Tables::TYPE_GROUP_1 &&
Info->Type <= FEXCore::X86Tables::TYPE_GROUP_P) {
LogMan::Msg::A("Group Ops should have been decoded before this!");
return false;
}
// New instruction size decoding
{
// Decode destinations first
uint32_t DstSizeFlag = FEXCore::X86Tables::InstFlags::GetSizeDstFlags(Info->Flags);
uint32_t SrcSizeFlag = FEXCore::X86Tables::InstFlags::GetSizeSrcFlags(Info->Flags);
if (DstSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_8BIT) {
DecodeInst.Flags |= DecodeFlags::GenSizeDstSize(DecodeFlags::SIZE_8BIT);
}
else if (DstSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_16BIT) {
DecodeInst.Flags |= DecodeFlags::GenSizeDstSize(DecodeFlags::SIZE_16BIT);
}
else if (DstSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_128BIT) {
DecodeInst.Flags |= DecodeFlags::GenSizeDstSize(DecodeFlags::SIZE_128BIT);
}
else if (DecodeInst.Flags & DecodeFlags::FLAG_OPERAND_SIZE &&
DstSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_DEF) {
// See table 1-2. Operand-Size Overrides for this decoding
// If the default operating mode is 32bit and we have the operand size flag then the operating size drops to 16bit
DecodeInst.Flags |= DecodeFlags::GenSizeDstSize(DecodeFlags::SIZE_16BIT);
}
else if (DecodeInst.Flags & DecodeFlags::FLAG_REX_WIDENING ||
DstSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_64BIT) {
DecodeInst.Flags |= DecodeFlags::GenSizeDstSize(DecodeFlags::SIZE_64BIT);
}
else {
DecodeInst.Flags |= DecodeFlags::GenSizeDstSize(DecodeFlags::SIZE_32BIT);
}
// Decode sources
if (SrcSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_8BIT) {
DecodeInst.Flags |= DecodeFlags::GenSizeSrcSize(DecodeFlags::SIZE_8BIT);
}
else if (SrcSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_16BIT) {
DecodeInst.Flags |= DecodeFlags::GenSizeSrcSize(DecodeFlags::SIZE_16BIT);
}
else if (SrcSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_128BIT) {
DecodeInst.Flags |= DecodeFlags::GenSizeSrcSize(DecodeFlags::SIZE_128BIT);
}
else if (DecodeInst.Flags & DecodeFlags::FLAG_OPERAND_SIZE &&
SrcSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_DEF) {
// See table 1-2. Operand-Size Overrides for this decoding
// If the default operating mode is 32bit and we have the operand size flag then the operating size drops to 16bit
DecodeInst.Flags |= DecodeFlags::GenSizeSrcSize(DecodeFlags::SIZE_16BIT);
}
else if (DecodeInst.Flags & DecodeFlags::FLAG_REX_WIDENING ||
SrcSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_64BIT) {
DecodeInst.Flags |= DecodeFlags::GenSizeSrcSize(DecodeFlags::SIZE_64BIT);
}
else {
DecodeInst.Flags |= DecodeFlags::GenSizeSrcSize(DecodeFlags::SIZE_32BIT);
}
}
// Is ModRM present via explicit instruction encoded or REX?
bool HasMODRM = !!(DecodeInst.Flags & DecodeFlags::FLAG_MODRM_PRESENT);
HasMODRM |= !!(Info->Flags & FEXCore::X86Tables::InstFlags::FLAGS_MODRM);
bool HasSIB = false;
bool HasWideningDisplacement = DecodeInst.Flags & DecodeFlags::FLAG_REX_WIDENING;
bool HasNarrowingDisplacement = DecodeInst.Flags & DecodeFlags::FLAG_OPERAND_SIZE;
// This is used for ModRM register modification
// For both modrm.reg and modrm.rm(when mod == 0b11) when value is >= 0b100
// then it changes from expected registers to the high 8bits of the lower registers
// Bit annoying to support
// In the case of no modrm (REX in byte situation) then it is unaffected
bool Is8BitSrc = (DecodeFlags::GetSizeSrcFlags(DecodeInst.Flags) == DecodeFlags::SIZE_8BIT);
bool Is8BitDest = (DecodeFlags::GetSizeDstFlags(DecodeInst.Flags) == DecodeFlags::SIZE_8BIT);
bool HasREX = !!(DecodeInst.Flags & DecodeFlags::FLAG_REX_PREFIX);
bool HasXMMSrc = !!(Info->Flags & FEXCore::X86Tables::InstFlags::FLAGS_XMM_FLAGS) && !HAS_XMM_SUBFLAG(Info->Flags, FEXCore::X86Tables::InstFlags::FLAGS_SF_SRC_GPR);
bool HasXMMDst = !!(Info->Flags & FEXCore::X86Tables::InstFlags::FLAGS_XMM_FLAGS) && !HAS_XMM_SUBFLAG(Info->Flags, FEXCore::X86Tables::InstFlags::FLAGS_SF_DST_GPR);
bool HasHighXMM = HAS_XMM_SUBFLAG(Info->Flags, FEXCore::X86Tables::InstFlags::FLAGS_SF_HIGH_XMM_REG);
uint8_t Displacement = 0;
auto *CurrentDest = &DecodeInst.Dest;
if (HAS_NON_XMM_SUBFLAG(Info->Flags, FEXCore::X86Tables::InstFlags::FLAGS_SF_DST_RAX) ||
HAS_NON_XMM_SUBFLAG(Info->Flags, FEXCore::X86Tables::InstFlags::FLAGS_SF_DST_RDX)) {
// Some instructions hardcode their destination as RAX
CurrentDest->TypeGPR.Type = DecodedOperand::TYPE_GPR;
CurrentDest->TypeGPR.HighBits = false;
CurrentDest->TypeGPR.GPR = HAS_NON_XMM_SUBFLAG(Info->Flags, FEXCore::X86Tables::InstFlags::FLAGS_SF_DST_RAX) ? FEXCore::X86State::REG_RAX : FEXCore::X86State::REG_RDX;
CurrentDest = &DecodeInst.Src1;
}
if (HAS_NON_XMM_SUBFLAG(Info->Flags, FEXCore::X86Tables::InstFlags::FLAGS_SF_REX_IN_BYTE)) {
LogMan::Throw::A(!HasMODRM, "This instruction shouldn't have ModRM!");
// If the REX is in the byte that means the lower nibble of the OP contains the destination GPR
// This also means that the destination is always a GPR on these ones
// ADDITIONALLY:
// If there is a REX prefix then that allows extended GPR usage
CurrentDest->TypeGPR.Type = DecodedOperand::TYPE_GPR;
DecodeInst.Dest.TypeGPR.HighBits = (Is8BitDest && !HasREX && (Op & 0b111) >= 0b100) || HasHighXMM;
CurrentDest->TypeGPR.GPR = MapModRMToReg(DecodeInst.Flags & DecodeFlags::FLAG_REX_XGPR_B ? 1 : 0, Op & 0b111, Is8BitDest, HasREX, false);
}
if (HasMODRM) {
if (!DecodeInst.DecodedModRM) {
DecodeInst.ModRM = ReadByte();
DecodeInst.DecodedModRM = true;
}
FEXCore::X86Tables::ModRMDecoded ModRM;
ModRM.Hex = DecodeInst.ModRM;
DecodeModRM(ModRM, &Displacement);
HasSIB = DecodeSIB(ModRM, &Displacement);
}
uint8_t Bytes = Info->MoreBytes + Displacement;
if ((Info->Flags & FEXCore::X86Tables::InstFlags::FLAGS_DISPLACE_SIZE_MUL_2) && HasWideningDisplacement) {
Bytes <<= 1;
}
if ((Info->Flags & FEXCore::X86Tables::InstFlags::FLAGS_DISPLACE_SIZE_DIV_2) && HasNarrowingDisplacement) {
Bytes >>= 1;
}
auto ModRMOperand = [&](FEXCore::X86Tables::DecodedOperand &GPR, FEXCore::X86Tables::DecodedOperand &NonGPR, bool HasXMMGPR, bool HasXMMNonGPR, bool GPR8Bit, bool NonGPR8Bit) {
FEXCore::X86Tables::ModRMDecoded ModRM;
ModRM.Hex = DecodeInst.ModRM;
// Decode the GPR source first
GPR.TypeGPR.Type = DecodedOperand::TYPE_GPR;
GPR.TypeGPR.HighBits = (GPR8Bit && ModRM.reg >= 0b100 && !HasREX) || HasHighXMM;
GPR.TypeGPR.GPR = MapModRMToReg(DecodeInst.Flags & DecodeFlags::FLAG_REX_XGPR_R ? 1 : 0, ModRM.reg, GPR8Bit, HasREX, HasXMMGPR);
// ModRM.mod == 0b11 == Register
// ModRM.Mod != 0b11 == Register-direct addressing
if (ModRM.mod == 0b11) {
NonGPR.TypeGPR.Type = DecodedOperand::TYPE_GPR;
NonGPR.TypeGPR.HighBits = (NonGPR8Bit && ModRM.rm >= 0b100 && !HasREX) || HasHighXMM;
NonGPR.TypeGPR.GPR = MapModRMToReg(DecodeInst.Flags & DecodeFlags::FLAG_REX_XGPR_B ? 1 : 0, ModRM.rm, NonGPR8Bit, HasREX, HasXMMNonGPR);
}
else {
if (HasSIB) {
// SIB
FEXCore::X86Tables::SIBDecoded SIB;
SIB.Hex = DecodeInst.SIB;
NonGPR.TypeSIB.Type = DecodedOperand::TYPE_SIB;
NonGPR.TypeSIB.Scale = 1 << SIB.scale;
// The invalid encoding types are described at Table 1-12. "promoted nsigned is always non-zero"
NonGPR.TypeSIB.Index = MapModRMToReg(DecodeInst.Flags & DecodeFlags::FLAG_REX_XGPR_X ? 1 : 0, SIB.index, false, false, false, 0b100);
NonGPR.TypeSIB.Base = MapModRMToReg(DecodeInst.Flags & DecodeFlags::FLAG_REX_XGPR_B ? 1 : 0, SIB.base, false, false, false, ModRM.mod == 0 ? 0b101 : 16);
uint64_t Literal {0};
LogMan::Throw::A(Displacement <= 4, "Number of bytes should be <= 4 for literal src");
Literal = ReadData(Displacement);
if (Displacement == 1) {
Literal = static_cast<int8_t>(Literal);
}
Bytes -= Displacement;
NonGPR.TypeSIB.Offset = Literal;
}
else if (ModRM.mod == 0) {
// Explained in Table 1-14. "Operand Addressing Using ModRM and SIB Bytes"
LogMan::Throw::A(ModRM.rm != 0b100, "Shouldn't have hit this here");
if (ModRM.rm == 0b101) {
// 32bit Displacement
uint32_t Literal;
Literal = ReadData(4);
Bytes -= 4;
NonGPR.TypeRIPLiteral.Type = DecodedOperand::TYPE_RIP_RELATIVE;
NonGPR.TypeRIPLiteral.Literal = Literal;
}
else {
// Register-direct addressing
NonGPR.TypeGPR.Type = DecodedOperand::TYPE_GPR_DIRECT;
NonGPR.TypeGPR.GPR = MapModRMToReg(DecodeInst.Flags & DecodeFlags::FLAG_REX_XGPR_B ? 1 : 0, ModRM.rm, false, false, false);
}
}
else {
uint8_t DisplacementSize = ModRM.mod == 1 ? 1 : 4;
uint32_t Literal;
Literal = ReadData(DisplacementSize);
if (DisplacementSize == 1) {
Literal = static_cast<int8_t>(Literal);
}
Bytes -= DisplacementSize;
NonGPR.TypeGPRIndirect.Type = DecodedOperand::TYPE_GPR_INDIRECT;
NonGPR.TypeGPRIndirect.GPR = MapModRMToReg(DecodeInst.Flags & DecodeFlags::FLAG_REX_XGPR_B ? 1 : 0, ModRM.rm, false, false, false);
NonGPR.TypeGPRIndirect.Displacement = Literal;
}
}
};
if (Info->Flags & FEXCore::X86Tables::InstFlags::FLAGS_MODRM &&
Info->Flags & FEXCore::X86Tables::InstFlags::FLAGS_SF_MOD_DST) {
ModRMOperand(DecodeInst.Src1, DecodeInst.Dest, HasXMMSrc, HasXMMDst, Is8BitSrc, Is8BitDest);
}
// This is almost the same as when the ModRM is the destination type
// The main different being that Dst and Src flip which bits that use (reg<->rm)
auto *CurrentSrc = &DecodeInst.Src1;
if (Info->Flags & FEXCore::X86Tables::InstFlags::FLAGS_MODRM &&
!(Info->Flags & FEXCore::X86Tables::InstFlags::FLAGS_SF_MOD_DST)) {
ModRMOperand(DecodeInst.Dest, DecodeInst.Src1, HasXMMDst, HasXMMSrc, Is8BitDest, Is8BitSrc);
CurrentSrc = &DecodeInst.Src2;
}
else if (HAS_NON_XMM_SUBFLAG(Info->Flags, FEXCore::X86Tables::InstFlags::FLAGS_SF_SRC_RAX)) {
CurrentSrc->TypeGPR.Type = DecodedOperand::TYPE_GPR;
CurrentSrc->TypeGPR.HighBits = false;
CurrentSrc->TypeGPR.GPR = FEXCore::X86State::REG_RAX;
CurrentSrc = &DecodeInst.Src2;
}
else if (HAS_NON_XMM_SUBFLAG(Info->Flags, FEXCore::X86Tables::InstFlags::FLAGS_SF_SRC_RCX)) {
CurrentSrc->TypeGPR.Type = DecodedOperand::TYPE_GPR;
CurrentSrc->TypeGPR.HighBits = false;
CurrentSrc->TypeGPR.GPR = FEXCore::X86State::REG_RCX;
CurrentSrc = &DecodeInst.Src2;
}
if (Bytes != 0) {
LogMan::Throw::A(Bytes <= 8, "Number of bytes should be <= 8 for literal src");
CurrentSrc->TypeLiteral.Size = Bytes;
uint64_t Literal {0};
Literal = ReadData(Bytes);
if (Info->Flags & FEXCore::X86Tables::InstFlags::FLAGS_SRC_SEXT) {
if (Bytes == 1) {
Literal = static_cast<int8_t>(Literal);
}
else if (Bytes == 2) {
Literal = static_cast<int16_t>(Literal);
}
else {
Literal = static_cast<int32_t>(Literal);
}
}
Bytes = 0;
CurrentSrc->TypeLiteral.Type = DecodedOperand::TYPE_LITERAL;
CurrentSrc->TypeLiteral.Literal = Literal;
}
if (Bytes != 0) {
LogMan::Msg::A("Inst at 0x%lx: 0x%04x '%s' Had an instruction of size %d with %d remaining", DecodeInst.PC, DecodeInst.OP, DecodeInst.TableInfo->Name, InstructionSize, Bytes);
}
LogMan::Throw::A(Bytes == 0, "Had undecoded bytes left in the instruction encoding");
DecodeInst.InstSize = InstructionSize;
return true;
};
auto NormalOpHeader = [&ReadByte, &DecodeInst, &NormalOp](auto &Table, auto Op) -> bool {
FEXCore::X86Tables::X86InstInfo *Info = &Table[Op];
DecodeInst.OP = Op;
DecodeInst.TableInfo = Info;
// XXX: Once we support 32bit x86 then this will be necessary to support
if (Info->Type == FEXCore::X86Tables::TYPE_LEGACY_PREFIX) {
DecodeInst.Flags |= DecodeFlags::FLAG_LEGACY_PREFIX;
LogMan::Msg::A("Legacy Prefix");
return false;
}
if (Info->Type == FEXCore::X86Tables::TYPE_UNKNOWN ||
Info->Type == FEXCore::X86Tables::TYPE_INVALID) {
LogMan::Msg::A("Invalid or Unknown instruction: %s 0x%04x 0x%lx", Info->Name, Op, DecodeInst.PC);
return false;
}
if (Info->Type >= FEXCore::X86Tables::TYPE_GROUP_6 &&
Info->Type <= FEXCore::X86Tables::TYPE_GROUP_P) {
#define OPD(group, prefix, Reg) (((group - FEXCore::X86Tables::TYPE_GROUP_6) << 5) | (prefix) << 3 | (Reg))
constexpr uint16_t PF_NONE = 0;
constexpr uint16_t PF_F3 = 1;
constexpr uint16_t PF_66 = 2;
constexpr uint16_t PF_F2 = 3;
uint16_t PrefixType = PF_NONE;
if (DecodeInst.LastEscapePrefix == 0xF3)
PrefixType = PF_F3;
else if (DecodeInst.LastEscapePrefix == 0xF2)
PrefixType = PF_F2;
else if (DecodeInst.LastEscapePrefix == 0x66)
PrefixType = PF_66;
// We have ModRM
uint8_t ModRMByte = ReadByte();
DecodeInst.ModRM = ModRMByte;
DecodeInst.DecodedModRM = true;
DecodeInst.Flags |= DecodeFlags::FLAG_MODRM_PRESENT;
FEXCore::X86Tables::ModRMDecoded ModRM;
ModRM.Hex = DecodeInst.ModRM;
uint16_t Op = OPD(Info->Type, PrefixType, ModRM.reg);
FEXCore::X86Tables::X86InstInfo *Info = &SecondInstGroupOps[Op];
#undef OPD
if (Info->Type == FEXCore::X86Tables::TYPE_SECOND_GROUP_MODRM) {
// Everything in this group is privileged instructions aside from XGETBV
constexpr std::array<uint8_t, 8> RegToField = {
255,
0,
1,
2,
255,
255,
255,
3,
};
uint8_t Field = RegToField[ModRM.reg];
LogMan::Throw::A(Field != 255, "Invalid field selected!");
uint8_t Op = (Field << 3) | ModRM.rm;
return NormalOp(SecondModRMTableOps, Op);
}
else {
return NormalOp(SecondInstGroupOps, Op);
}
}
else if (Info->Type == FEXCore::X86Tables::TYPE_X87_TABLE_PREFIX) {
// We have ModRM
uint8_t ModRMByte = ReadByte();
DecodeInst.ModRM = ModRMByte;
DecodeInst.DecodedModRM = true;
DecodeInst.Flags |= DecodeFlags::FLAG_MODRM_PRESENT;
FEXCore::X86Tables::ModRMDecoded ModRM;
ModRM.Hex = DecodeInst.ModRM;
uint16_t X87Op = ((Op - 0xD8) << 8) | ModRMByte;
return NormalOp(X87Ops, X87Op);
}
else if (Info->Type >= FEXCore::X86Tables::TYPE_GROUP_1 &&
Info->Type <= FEXCore::X86Tables::TYPE_GROUP_11) {
uint8_t ModRMByte = ReadByte();
DecodeInst.ModRM = ModRMByte;
DecodeInst.DecodedModRM = true;
DecodeInst.Flags |= DecodeFlags::FLAG_MODRM_PRESENT;
FEXCore::X86Tables::ModRMDecoded ModRM;
ModRM.Hex = DecodeInst.ModRM;
#define OPD(group, prefix, Reg) (((group - FEXCore::X86Tables::TYPE_GROUP_1) << 6) | (prefix) << 3 | (Reg))
return NormalOp(PrimaryInstGroupOps, OPD(Info->Type, Info->MoreBytes, ModRM.reg));
#undef OPD
}
else if (Info->Type == FEXCore::X86Tables::TYPE_MODRM_TABLE_PREFIX) {
}
else if (Info->Type == FEXCore::X86Tables::TYPE_VEX_TABLE_PREFIX) {
uint16_t map_select = 1;
uint16_t pp = 0;
uint8_t Byte1 = ReadByte();
if (Op == 0xC5) { // Two byte VEX
pp = Byte1 & 0b11;
}
else { // 0xC4 = Three byte VEX
uint8_t Byte2 = ReadByte();
pp = Byte2 & 0b11;
map_select = Byte1 & 0b11111;
LogMan::Throw::A(map_select >= 1 && map_select <= 3, "We don't understand a map_select of: %d", map_select);
}
uint16_t VEXOp = ReadByte();
#define OPD(map_select, pp, opcode) (((map_select - 1) << 10) | (pp << 8) | (opcode))
return NormalOp(VEXTableOps, OPD(map_select, pp, VEXOp));
#undef OPD
}
else if (Info->Type == FEXCore::X86Tables::TYPE_XOP_TABLE_PREFIX) {
LogMan::Msg::A("XOP and POP <modrm> aren't handled!");
uint16_t Byte1 = ReadByte();
uint16_t Byte2 = ReadByte();
uint16_t XOPOp = ReadByte();
uint16_t map_select = Byte1 & 0b11111;
LogMan::Throw::A(map_select >= 8 && map_select <= 0xA, "We don't understand a map_select of: %d", map_select);
uint16_t pp = Byte2 & 0b11;
map_select -= 8;
#define OPD(group, pp, opcode) ( (group << 10) | (pp << 8) | (opcode))
return NormalOp(XOPTableOps, OPD(map_select, pp, XOPOp));
#undef OPD
}
return NormalOp(Table, Op);
};
DecodeInst.PC = PC;
while (!InstructionDecoded && !ErrorDuringDecoding) {
uint8_t Op = ReadByte();
switch (Op) {
case 0x0F: {// Escape Op
uint8_t EscapeOp = ReadByte();
switch (EscapeOp) {
case 0x0F: { // 3DNow!
// 3DNow! Instruction Encoding: 0F 0F [ModRM] [SIB] [Displacement] [Opcode]
// Decode ModRM
uint8_t ModRMByte = ReadByte();
DecodeInst.ModRM = ModRMByte;
DecodeInst.DecodedModRM = true;
DecodeInst.Flags |= DecodeFlags::FLAG_MODRM_PRESENT;
FEXCore::X86Tables::ModRMDecoded ModRM;
ModRM.Hex = DecodeInst.ModRM;
uint8_t Displacement = 0;
DecodeModRM(ModRM, &Displacement);
DecodeSIB(ModRM, &Displacement);
// Take a peek at the op just past the displacement
uint8_t Op = PeekByte(Displacement);
if (NormalOpHeader(FEXCore::X86Tables::DDDNowOps, Op)) {
InstructionDecoded = true;
}
// Make sure to read the opcode in to our internal structure
ReadByte();
break;
}
case 0x38: { // F38 Table!
constexpr uint16_t PF_38_NONE = 0;
constexpr uint16_t PF_38_66 = 1;
constexpr uint16_t PF_38_F2 = 2;
uint16_t Prefix = PF_38_NONE;
if (DecodeInst.LastEscapePrefix == 0xF2) // REPNE
Prefix = PF_38_F2;
else if (DecodeInst.LastEscapePrefix == 0x66) // Operand Size
Prefix = PF_38_66;
uint16_t Op = (Prefix << 8) | ReadByte();
if (NormalOpHeader(FEXCore::X86Tables::H0F38TableOps, Op)) {
InstructionDecoded = true;
}
break;
}
case 0x3A: { // F3A Table!
constexpr uint16_t PF_3A_NONE = 0;
constexpr uint16_t PF_3A_66 = 1;
constexpr uint16_t PF_3A_REX = (1 << 1);
uint16_t Prefix = PF_3A_NONE;
if (DecodeInst.LastEscapePrefix == 0x66) // Operand Size
Prefix = PF_3A_66;
if (DecodeInst.Flags & DecodeFlags::FLAG_REX_PREFIX)
Prefix |= PF_3A_REX;
uint16_t Op = (Prefix << 8) | ReadByte();
if (NormalOpHeader(FEXCore::X86Tables::H0F3ATableOps, Op)) {
InstructionDecoded = true;
}
break;
}
default: // Two byte table!
// x86-64 abuses three legacy prefixes to extend the table encodings
// 0x66 - Operand Size prefix
// 0xF2 - REPNE prefix
// 0xF3 - REP prefix
// If any of these three prefixes are used then it falls down the subtable
// Additionally: If you hit repeat of differnt prefixes then only the LAST one before this one works for subtable selection
if (DecodeInst.LastEscapePrefix == 0xF3) { // REP
// Remove prefix so it doesn't effect calculations.
// This is only an escape prefix rather tan modifier now
DecodeInst.Flags &= ~DecodeFlags::FLAG_REP_PREFIX;
if (NormalOpHeader(FEXCore::X86Tables::RepModOps, EscapeOp)) {
InstructionDecoded = true;
}
}
else if (DecodeInst.LastEscapePrefix == 0xF2) { // REPNE
// Remove prefix so it doesn't effect calculations.
// This is only an escape prefix rather tan modifier now
DecodeInst.Flags &= ~DecodeFlags::FLAG_REPNE_PREFIX;
if (NormalOpHeader(FEXCore::X86Tables::RepNEModOps, EscapeOp)) {
InstructionDecoded = true;
}
}
else if (DecodeInst.LastEscapePrefix == 0x66) { // Operand Size
// Remove prefix so it doesn't effect calculations.
// This is only an escape prefix rather tan modifier now
DecodeInst.Flags &= ~DecodeFlags::FLAG_OPERAND_SIZE;
if (NormalOpHeader(FEXCore::X86Tables::OpSizeModOps, EscapeOp)) {
InstructionDecoded = true;
}
}
else if (NormalOpHeader(FEXCore::X86Tables::SecondBaseOps, EscapeOp)) {
InstructionDecoded = true;
}
break;
}
break;
}
case 0x66: // Operand Size prefix
DecodeInst.Flags |= DecodeFlags::FLAG_OPERAND_SIZE;
DecodeInst.LastEscapePrefix = Op;
break;
case 0x67: // Address Size override prefix
DecodeInst.Flags |= DecodeFlags::FLAG_ADDRESS_SIZE;
break;
case 0x26: // ES legacy prefix
case 0x2E: // CS legacy prefix
case 0x3E: // DS legacy prefix
// Annoyingly GCC generates NOP ops with these prefixes
// Just ignore them for now
// eg. 66 2e 0f 1f 84 00 00 00 00 00 nop WORD PTR cs:[rax+rax*1+0x0]
break;
case 0x40: // REX - 0x40-0x4F
case 0x41:
case 0x42:
case 0x43:
case 0x44:
case 0x45:
case 0x46:
case 0x47:
case 0x48:
case 0x49:
case 0x4A:
case 0x4B:
case 0x4C:
case 0x4D:
case 0x4E:
case 0x4F: {
DecodeInst.Flags |= DecodeFlags::FLAG_REX_PREFIX;
// Widening displacement
if (Op & 0b1000)
DecodeInst.Flags |= DecodeFlags::FLAG_REX_WIDENING;
// XGPR_B bit set
if (Op & 0b0001)
DecodeInst.Flags |= DecodeFlags::FLAG_REX_XGPR_B;
// XGPR_X bit set
if (Op & 0b0010)
DecodeInst.Flags |= DecodeFlags::FLAG_REX_XGPR_X;
// XGPR_R bit set
if (Op & 0b0100)
DecodeInst.Flags |= DecodeFlags::FLAG_REX_XGPR_R;
break;
}
case 0xF0: // LOCK prefix
DecodeInst.Flags |= DecodeFlags::FLAG_LOCK;
break;
case 0xF2: // REPNE prefix
DecodeInst.Flags |= DecodeFlags::FLAG_REPNE_PREFIX;
DecodeInst.LastEscapePrefix = Op;
break;
case 0xF3: // REP prefix
DecodeInst.Flags |= DecodeFlags::FLAG_REP_PREFIX;
DecodeInst.LastEscapePrefix = Op;
break;
case 0x64: // FS prefix
DecodeInst.Flags |= DecodeFlags::FLAG_FS_PREFIX;
break;
case 0x65: // GS prefix
DecodeInst.Flags |= DecodeFlags::FLAG_FS_PREFIX;
break;
default: { // Default base table
if (NormalOpHeader(FEXCore::X86Tables::BaseOps, Op)) {
InstructionDecoded = true;
}
else {
LogMan::Msg::E("Error during instruction decoding");
ErrorDuringDecoding = true;
}
break;
}
}
}
return !ErrorDuringDecoding;
}
bool Decoder::BlockEndCanContinuePast(FEXCore::X86Tables::DecodedInst const &Inst) {
if (!CTX->Config.Multiblock)
return false;
// Have we had a conditional branch past this PC previously?
// We can continue in this case
//
// ex.
// test eax, eax
// jne .Continue
// ud2 <--- We can continue past this instruction, which is a block ender
// .Continue:
// ...
return Inst.PC <= MaxCondBranchForward;
}
bool Decoder::BranchTargetInMultiblockRange(FEXCore::X86Tables::DecodedInst const &Inst) {
if (!CTX->Config.Multiblock)
return false;
// If the RIP setting is conditional AND within our symbol range then it can be considered for multiblock
uint64_t TargetRIP = 0;
bool Conditional = true;
switch (Inst.OP) {
case 0x70 ... 0x7F: { // Conditional JUMP
// Source is a literal
// auto RIPOffset = LoadSource(Op, Op->Src1, Op->Flags);
// auto RIPTargetConst = _Constant(Op->PC + Op->InstSize);
// Target offset is PC + InstSize + Literal
TargetRIP = Inst.PC + Inst.InstSize + Inst.Src1.TypeLiteral.Literal;
break;
}
case 0xE9:
case 0xEB: // Both are unconditional JMP instructions
TargetRIP = Inst.PC + Inst.InstSize + Inst.Src1.TypeLiteral.Literal;
Conditional = false;
break;
case 0xC2: // RET imm
case 0xC3: // RET
Conditional = false;
break;
default:
return false;
break;
}
// If the target RIP is within the symbol ranges then we are golden
if (TargetRIP > SymbolMinAddress && TargetRIP <= SymbolMaxAddress) {
// Update our conditional branch ranges before we return
if (Conditional) {
MaxCondBranchForward = std::max(MaxCondBranchForward, TargetRIP);
MaxCondBranchBackwards = std::min(MaxCondBranchBackwards, TargetRIP);
}
//JumpTargets.emplace(TargetRIP);
return true;
}
return false;
}
bool Decoder::DecodeInstructionsInBlock(uint8_t const* InstStream, uint64_t PC) {
// Reset internal state management
DecodedSize = 0;
MaxCondBranchForward = 0;
MaxCondBranchBackwards = ~0ULL;
// XXX: Load symbol data
SymbolAvailable = false;
EntryPoint = PC;
JumpTargets.clear();
bool ErrorDuringDecoding = false;
bool Done = false;
uint64_t PCOffset = 0;
// If we don't have symbols available then we become a bit optimistic about multiblock ranges
if (!SymbolAvailable) {
// If we don't have a symbol available then assume all branches are valid for multiblock
SymbolMaxAddress = ~0ULL;
SymbolMinAddress = 0;
}
// LogMan::Msg::I("============================");
// LogMan::Msg::I(">>> Started decoding at 0x%lx", PC);
// LogMan::Msg::I("============================");
while(!Done) {
ErrorDuringDecoding = !DecodeInstruction(InstStream, PC + PCOffset);
if (ErrorDuringDecoding) {
LogMan::Msg::D("Couldn't Decode something at 0x%lx, Started at 0x%lx", PC + PCOffset, PC);
break;
}
auto &DecodeInst = DecodedBuffer[DecodedSize];
++DecodedSize;
bool CanContinue = false;
if (!(DecodeInst.TableInfo->Flags &
(FEXCore::X86Tables::InstFlags::FLAGS_BLOCK_END | FEXCore::X86Tables::InstFlags::FLAGS_SETS_RIP))) {
// If this isn't a block ender then we can keep going regardless
CanContinue = true;
}
// If this is an instruction that just completely kills a block then just end currently
// XXX: If we've had a conditional branch past this then keep going
if (DecodeInst.TableInfo->Flags & FEXCore::X86Tables::InstFlags::FLAGS_BLOCK_END) {
CanContinue = BlockEndCanContinuePast(DecodeInst);
}
if (DecodeInst.TableInfo->Flags & FEXCore::X86Tables::InstFlags::FLAGS_SETS_RIP) {
// If we have multiblock enabled
// If the branch target is within our multiblock range then we can keep going on
// We don't want to short circuit this since we want to calculate our ranges still
CanContinue = CanContinue | BranchTargetInMultiblockRange(DecodeInst);
}
if (!CanContinue) {
break;
}
if (DecodedSize >= CTX->Config.MaxInstPerBlock) {
break;
}
if (DecodedSize >= DecodedBuffer.size()) {
break;
}
PCOffset += DecodeInst.InstSize;
InstStream += DecodeInst.InstSize;
}
return !ErrorDuringDecoding;
}
}

View File

@ -0,0 +1,44 @@
#pragma once
#include <FEXCore/Debug/X86Tables.h>
#include <cstdint>
#include <utility>
#include <set>
#include <vector>
namespace FEXCore::Context {
struct Context;
}
namespace FEXCore::Frontend {
class Decoder {
public:
Decoder(FEXCore::Context::Context *ctx);
bool DecodeInstructionsInBlock(uint8_t const* InstStream, uint64_t PC);
std::pair<std::vector<FEXCore::X86Tables::DecodedInst>*, size_t> const GetDecodedInsts() {
return std::make_pair(&DecodedBuffer, DecodedSize);
}
std::set<uint64_t> JumpTargets;
private:
FEXCore::Context::Context *CTX;
bool DecodeInstruction(uint8_t const *InstStream, uint64_t PC);
bool BlockEndCanContinuePast(FEXCore::X86Tables::DecodedInst const &Inst);
bool BranchTargetInMultiblockRange(FEXCore::X86Tables::DecodedInst const &Inst);
static constexpr size_t DefaultDecodedBufferSize = 0x10000;
std::vector<FEXCore::X86Tables::DecodedInst> DecodedBuffer;
size_t DecodedSize {};
// This is for multiblock data tracking
bool SymbolAvailable {false};
uint64_t EntryPoint {};
uint64_t MaxCondBranchForward {};
uint64_t MaxCondBranchBackwards {~0ULL};
uint64_t SymbolMaxAddress {};
uint64_t SymbolMinAddress {~0ULL};
};
}

View File

@ -0,0 +1 @@
#include <FEXCore/Debug/InternalThreadState.h>

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,12 @@
#pragma once
namespace FEXCore::Context {
struct Context;
}
namespace FEXCore::CPU {
class CPUBackend;
FEXCore::CPU::CPUBackend *CreateInterpreterCore(FEXCore::Context::Context *ctx);
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,15 @@
#pragma once
namespace FEXCore::Context {
struct Context;
}
namespace FEXCore::Core {
struct InternalThreadState;
}
namespace FEXCore::CPU {
class CPUBackend;
FEXCore::CPU::CPUBackend *CreateJITCore(FEXCore::Context::Context *ctx, FEXCore::Core::InternalThreadState *Thread);
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1 @@

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,11 @@
#pragma once
namespace FEXCore::Core {
struct InternalThreadState;
}
namespace FEXCore::CPU {
class CPUBackend;
FEXCore::CPU::CPUBackend *CreateLLVMCore(FEXCore::Core::InternalThreadState *Thread);
}

View File

@ -0,0 +1,62 @@
#include "LogManager.h"
#include "Common/MathUtils.h"
#include "Interface/Core/LLVMJIT/LLVMMemoryManager.h"
#include <sys/mman.h>
namespace FEXCore::CPU {
LLVMMemoryManager::LLVMMemoryManager() {
CodeMemory = reinterpret_cast<uintptr_t>(mmap(nullptr, CODE_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
LogMan::Throw::A(CodeMemory != -1ULL, "Failed to allocate code memory");
}
LLVMMemoryManager::~LLVMMemoryManager() {
munmap(reinterpret_cast<void*>(CodeMemory), CODE_SIZE);
CodeMemory = 0;
}
llvm::JITSymbol LLVMMemoryManager::findSymbol(const std::string &Name) {
return llvm::JITSymbol(getSymbolAddress(Name), llvm::JITSymbolFlags::Exported);
}
uint8_t *LLVMMemoryManager::allocateCodeSection(uintptr_t Size, unsigned Alignment,
[[maybe_unused]] unsigned SectionID,
[[maybe_unused]] llvm::StringRef SectionName) {
size_t Base = AlignUp(AllocateOffset, Alignment);
size_t NewEnd = Base + Size;
if (NewEnd >= CODE_SIZE) {
LogMan::Msg::A("Tried allocating code and code cache is full!");
return nullptr;
}
AllocateOffset = NewEnd;
LastCodeSize = Size;
return reinterpret_cast<uint8_t*>(CodeMemory + Base);
}
uint8_t *LLVMMemoryManager::allocateDataSection(uintptr_t Size, unsigned Alignment,
[[maybe_unused]] unsigned SectionID,
[[maybe_unused]] llvm::StringRef SectionName,
[[maybe_unused]] bool IsReadOnly) {
// Put data section right after code section
size_t Base = AlignUp(AllocateOffset, Alignment);
size_t NewEnd = Base + Size;
if (NewEnd >= CODE_SIZE) {
LogMan::Msg::A("Tried allocating code and code cache is full!");
return nullptr;
}
AllocateOffset = NewEnd;
return reinterpret_cast<uint8_t*>(CodeMemory + Base);
}
bool LLVMMemoryManager::finalizeMemory([[maybe_unused]] std::string *ErrMsg) {
return true;
}
}

View File

@ -0,0 +1,34 @@
#pragma once
#include <llvm/ExecutionEngine/RTDyldMemoryManager.h>
namespace FEXCore::CPU {
class LLVMMemoryManager final : public llvm::RTDyldMemoryManager {
public:
LLVMMemoryManager();
~LLVMMemoryManager();
// uint64_t getSymbolAddress(const std::string &Name) override;
llvm::JITSymbol findSymbol(const std::string &Name) override;
uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
unsigned SectionID,
llvm::StringRef SectionName) override;
uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
unsigned SectionID,
llvm::StringRef SectionName,
bool IsReadOnly) override;
bool finalizeMemory(std::string *ErrMsg) override;
size_t GetLastCodeAllocation() { return LastCodeSize; }
private:
constexpr static size_t CODE_SIZE = 128 * 1024 * 1024;
uintptr_t CodeMemory {};
size_t AllocateOffset {};
size_t LastCodeSize {};
};
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,317 @@
#pragma once
#include <FEXCore/IR/IntrusiveIRList.h>
#include <FEXCore/Debug/X86Tables.h>
#include <FEXCore/IR/IR.h>
#include <cstdint>
#include <functional>
namespace FEXCore::IR {
class Pass;
class PassManager;
class OpDispatchBuilder final {
friend class FEXCore::IR::Pass;
friend class FEXCore::IR::PassManager;
public:
struct {
bool HadUnconditionalExit {false};
} Information;
bool ShouldDump {false};
OpDispatchBuilder();
IRListView<false> ViewIR() { return IRListView<false>(&Data, &ListData); }
IRListView<true> *CreateIRCopy() { return new IRListView<true>(&Data, &ListData); }
void ResetWorkingList();
bool HadDecodeFailure() { return DecodeFailure; }
void BeginBlock();
void EndBlock(uint64_t RIPIncrement);
void ExitFunction();
// Dispatch builder functions
#define OpcodeArgs [[maybe_unused]] FEXCore::X86Tables::DecodedOp Op
void UnhandledOp(OpcodeArgs);
void MOVOp(OpcodeArgs);
void ALUOp(OpcodeArgs);
void INTOp(OpcodeArgs);
void SyscallOp(OpcodeArgs);
void LEAOp(OpcodeArgs);
void NOPOp(OpcodeArgs);
void RETOp(OpcodeArgs);
void SecondaryALUOp(OpcodeArgs);
void ADCOp(OpcodeArgs);
void SBBOp(OpcodeArgs);
void PUSHOp(OpcodeArgs);
void POPOp(OpcodeArgs);
void LEAVEOp(OpcodeArgs);
void CALLOp(OpcodeArgs);
void CALLAbsoluteOp(OpcodeArgs);
void CondJUMPOp(OpcodeArgs);
void JUMPOp(OpcodeArgs);
void JUMPAbsoluteOp(OpcodeArgs);
void TESTOp(OpcodeArgs);
void MOVSXDOp(OpcodeArgs);
void MOVSXOp(OpcodeArgs);
void MOVZXOp(OpcodeArgs);
void CMPOp(OpcodeArgs);
void SETccOp(OpcodeArgs);
void CQOOp(OpcodeArgs);
void CDQOp(OpcodeArgs);
void XCHGOp(OpcodeArgs);
void SAHFOp(OpcodeArgs);
void LAHFOp(OpcodeArgs);
void MOVSegOp(OpcodeArgs);
void FLAGControlOp(OpcodeArgs);
void MOVOffsetOp(OpcodeArgs);
void CMOVOp(OpcodeArgs);
void CPUIDOp(OpcodeArgs);
void SHLOp(OpcodeArgs);
void SHROp(OpcodeArgs);
void ASHROp(OpcodeArgs);
void ROROp(OpcodeArgs);
void ROLOp(OpcodeArgs);
void BTOp(OpcodeArgs);
void IMUL1SrcOp(OpcodeArgs);
void IMUL2SrcOp(OpcodeArgs);
void IMULOp(OpcodeArgs);
void STOSOp(OpcodeArgs);
void MOVSOp(OpcodeArgs);
void CMPSOp(OpcodeArgs);
void BSWAPOp(OpcodeArgs);
void RDTSCOp(OpcodeArgs);
void INCOp(OpcodeArgs);
void DECOp(OpcodeArgs);
void NEGOp(OpcodeArgs);
void DIVOp(OpcodeArgs);
void IDIVOp(OpcodeArgs);
void BSFOp(OpcodeArgs);
void BSROp(OpcodeArgs);
void CMPXCHGOp(OpcodeArgs);
void MULOp(OpcodeArgs);
void NOTOp(OpcodeArgs);
// SSE
void MOVUPSOp(OpcodeArgs);
void MOVLHPSOp(OpcodeArgs);
void MOVHPDOp(OpcodeArgs);
void VectorALUOp(OpcodeArgs);
void MOVQOp(OpcodeArgs);
void PADDQOp(OpcodeArgs);
void PSUBQOp(OpcodeArgs);
template<size_t ElementSize>
void PMINUOp(OpcodeArgs);
void PMINSWOp(OpcodeArgs);
void PMOVMSKBOp(OpcodeArgs);
void PUNPCKLOp(OpcodeArgs);
void PUNPCKHOp(OpcodeArgs);
template<size_t ElementSize, bool Low>
void PSHUFDOp(OpcodeArgs);
void PCMPEQOp(OpcodeArgs);
template<size_t ElementSize>
void PCMPGTOp(OpcodeArgs);
void MOVDOp(OpcodeArgs);
template<size_t ElementSize>
void PSRLD(OpcodeArgs);
template<size_t ElementSize, bool Scalar>
void PSLL(OpcodeArgs);
void PSRLDQ(OpcodeArgs);
void MOVDDUPOp(OpcodeArgs);
template<size_t ElementSize>
void SHUFOp(OpcodeArgs);
#undef OpcodeArgs
/**
* @name IR allocation routines
*
* @{ */
// These handlers add cost to the constructor and destructor
// If it becomes an issue then blow them away
// GCC also generates some pretty atrocious code around these
// Use Clang!
#define IROP_ALLOCATE_HELPERS
#define IROP_DISPATCH_HELPERS
#include "IRDefines.inc"
IRPair<IROp_Constant> _Constant(uint8_t Size, uint64_t Constant) {
auto Op = AllocateOp<IROp_Constant, IROps::OP_CONSTANT>();
Op.first->Constant = Constant;
Op.first->Header.Size = Size / 8;
Op.first->Header.Elements = 1;
Op.first->Header.NumArgs = 0;
Op.first->Header.HasDest = true;
return Op;
}
IRPair<IROp_Bfe> _Bfe(uint8_t Width, uint8_t lsb, OrderedNode *ssa0) {
return _Bfe(ssa0, Width, lsb);
}
IRPair<IROp_Bfi> _Bfi(uint8_t Width, uint8_t lsb, OrderedNode *ssa0, OrderedNode *ssa1) {
return _Bfi(ssa0, ssa1, Width, lsb);
}
IRPair<IROp_StoreMem> _StoreMem(uint8_t Size, OrderedNode *ssa0, OrderedNode *ssa1) {
return _StoreMem(ssa0, ssa1, Size);
}
IRPair<IROp_LoadMem> _LoadMem(uint8_t Size, OrderedNode *ssa0) {
return _LoadMem(ssa0, Size);
}
IRPair<IROp_StoreContext> _StoreContext(uint8_t Size, uint32_t Offset, OrderedNode *ssa0) {
return _StoreContext(ssa0, Size, Offset);
}
IRPair<IROp_Select> _Select(uint8_t Cond, OrderedNode *ssa0, OrderedNode *ssa1, OrderedNode *ssa2, OrderedNode *ssa3) {
return _Select(ssa0, ssa1, ssa2, ssa3, Cond);
}
IRPair<IROp_Sext> _Sext(uint8_t Size, OrderedNode *ssa0) {
return _Sext(ssa0, Size);
}
IRPair<IROp_Zext> _Zext(uint8_t Size, OrderedNode *ssa0) {
return _Zext(ssa0, Size);
}
IRPair<IROp_VInsElement> _VInsElement(uint8_t RegisterSize, uint8_t ElementSize, uint8_t DestIdx, uint8_t SrcIdx, OrderedNode *ssa0, OrderedNode *ssa1) {
return _VInsElement(ssa0, ssa1, RegisterSize, ElementSize, DestIdx, SrcIdx);
}
IRPair<IROp_VAdd> _VAdd(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
return _VAdd(ssa0, ssa1, RegisterSize, ElementSize);
}
IRPair<IROp_VSub> _VSub(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
return _VSub(ssa0, ssa1, RegisterSize, ElementSize);
}
IRPair<IROp_VUMin> _VUMin(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
return _VUMin(ssa0, ssa1, RegisterSize, ElementSize);
}
IRPair<IROp_VSMin> _VSMin(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
return _VSMin(ssa0, ssa1, RegisterSize, ElementSize);
}
IRPair<IROp_VZip> _VZip(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
return _VZip(ssa0, ssa1, RegisterSize, ElementSize);
}
IRPair<IROp_VZip2> _VZip2(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
return _VZip2(ssa0, ssa1, RegisterSize, ElementSize);
}
IRPair<IROp_VCMPEQ> _VCMPEQ(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
return _VCMPEQ(ssa0, ssa1, RegisterSize, ElementSize);
}
IRPair<IROp_VCMPGT> _VCMPGT(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
return _VCMPGT(ssa0, ssa1, RegisterSize, ElementSize);
}
IRPair<IROp_VUShl> _VUShl(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
return _VUShl(ssa0, ssa1, RegisterSize, ElementSize);
}
IRPair<IROp_VUShlS> _VUShlS(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
return _VUShlS(ssa0, ssa1, RegisterSize, ElementSize);
}
IRPair<IROp_VUShr> _VUShr(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
return _VUShr(ssa0, ssa1, RegisterSize, ElementSize);
}
/** @} */
bool IsValueConstant(NodeWrapper ssa, uint64_t *Constant) {
OrderedNode *RealNode = reinterpret_cast<OrderedNode*>(ssa.GetPtr(ListData.Begin()));
FEXCore::IR::IROp_Header *IROp = RealNode->Op(Data.Begin());
if (IROp->Op == OP_CONSTANT) {
auto Op = IROp->C<IR::IROp_Constant>();
*Constant = Op->Constant;
return true;
}
return false;
}
// This is fairly special in how it operates
// Since the node is decoupled from the backing op then we can swap out the backing op without much overhead
// This can potentially cause problems where multiple nodes are pointing to the same IROp
OrderedNode *ReplaceAllUsesWith(OrderedNode *Node, IROp_Header *Op) {
Node->Header.Value.SetOffset(Data.Begin(), reinterpret_cast<uintptr_t>(Op));
return Node;
}
// This is similar to the previous op except that we pass in a node
// This takes the op backing in the new node and replaces the node in the other node
// Again can cause problems where things are pointing to NewNode and haven't been decoupled
OrderedNode *ReplaceAllUsesWith(OrderedNode *Node, OrderedNode *NewNode) {
Node->Header.Value.NodeOffset = NewNode->Header.Value.NodeOffset;
return Node;
}
void Unlink(OrderedNode *Node) {
Node->Unlink(ListData.Begin());
}
void SetPackedRFLAG(bool Lower8, OrderedNode *Src);
OrderedNode *GetPackedRFLAG(bool Lower8);
void CopyData(OpDispatchBuilder const &rhs) {
LogMan::Throw::A(rhs.Data.BackingSize() <= Data.BackingSize(), "Trying to take ownership of data that is too large");
LogMan::Throw::A(rhs.ListData.BackingSize() <= ListData.BackingSize(), "Trying to take ownership of data that is too large");
Data.CopyData(rhs.Data);
ListData.CopyData(rhs.ListData);
}
private:
void TestFunction();
bool DecodeFailure{false};
OrderedNode *LoadSource(FEXCore::X86Tables::DecodedOp const& Op, FEXCore::X86Tables::DecodedOperand const& Operand, uint32_t Flags, bool LoadData = true, bool ForceLoad = false);
OrderedNode *LoadSource_WithOpSize(FEXCore::X86Tables::DecodedOp const& Op, FEXCore::X86Tables::DecodedOperand const& Operand, uint8_t OpSize, uint32_t Flags, bool LoadData = true, bool ForceLoad = false);
void StoreResult_WithOpSize(FEXCore::X86Tables::DecodedOp Op, FEXCore::X86Tables::DecodedOperand const& Operand, OrderedNode *const Src, uint8_t OpSize);
void StoreResult(FEXCore::X86Tables::DecodedOp Op, FEXCore::X86Tables::DecodedOperand const& Operand, OrderedNode *const Src);
void StoreResult(FEXCore::X86Tables::DecodedOp Op, OrderedNode *const Src);
uint8_t GetDstSize(FEXCore::X86Tables::DecodedOp Op);
uint8_t GetSrcSize(FEXCore::X86Tables::DecodedOp Op);
template<unsigned BitOffset>
void SetRFLAG(OrderedNode *Value);
void SetRFLAG(OrderedNode *Value, unsigned BitOffset);
OrderedNode *GetRFLAG(unsigned BitOffset);
void GenerateFlags_ADC(FEXCore::X86Tables::DecodedOp Op, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2, OrderedNode *CF);
void GenerateFlags_SBB(FEXCore::X86Tables::DecodedOp Op, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2, OrderedNode *CF);
void GenerateFlags_SUB(FEXCore::X86Tables::DecodedOp Op, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2);
void GenerateFlags_ADD(FEXCore::X86Tables::DecodedOp Op, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2);
void GenerateFlags_MUL(FEXCore::X86Tables::DecodedOp Op, OrderedNode *Res, OrderedNode *High);
void GenerateFlags_UMUL(FEXCore::X86Tables::DecodedOp Op, OrderedNode *High);
void GenerateFlags_Logical(FEXCore::X86Tables::DecodedOp Op, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2);
void GenerateFlags_Rotate(FEXCore::X86Tables::DecodedOp Op, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2);
OrderedNode *CreateNode(IROp_Header *Op) {
uintptr_t ListBegin = ListData.Begin();
size_t Size = sizeof(OrderedNode);
void *Ptr = ListData.Allocate(Size);
OrderedNode *Node = new (Ptr) OrderedNode();
Node->Header.Value.SetOffset(Data.Begin(), reinterpret_cast<uintptr_t>(Op));
if (CurrentWriteCursor) {
CurrentWriteCursor->append(ListBegin, Node);
}
CurrentWriteCursor = Node;
return Node;
}
void SetWriteCursor(OrderedNode *Node) {
CurrentWriteCursor = Node;
}
OrderedNode *GetWriteCursor() {
return CurrentWriteCursor;
}
OrderedNode *CurrentWriteCursor = nullptr;
// These could be combined with a little bit of work to be more efficient with memory usage. Isn't a big deal
IntrusiveAllocator Data;
IntrusiveAllocator ListData;
};
void InstallOpcodeHandlers();
}

View File

@ -0,0 +1,230 @@
#include "Common/BitSet.h"
#include "Interface/Core/RegisterAllocation.h"
#include <LogManager.h>
#include <vector>
constexpr uint32_t INVALID_REG = ~0U;
constexpr uint32_t INVALID_CLASS = ~0U;
namespace FEXCore::RA {
struct Register {
};
struct RegisterClass {
uint32_t RegisterBase;
uint32_t NumberOfRegisters{0};
BitSet<uint32_t> Registers;
};
struct RegisterNode {
uint32_t RegisterClass;
uint32_t Register;
uint32_t InterferenceCount;
uint32_t InterferenceListSize;
uint32_t *InterferenceList;
BitSet<uint32_t> Interference;
};
static_assert(std::is_pod<RegisterNode>::value, "We want this to be POD");
struct RegisterSet {
Register *Registers;
RegisterClass *RegisterClasses;
uint32_t RegisterCount;
uint32_t ClassCount;
};
struct SpillStackUnit {
uint32_t Node;
uint32_t Class;
};
struct RegisterGraph {
RegisterSet *Set;
RegisterNode *Nodes;
uint32_t NodeCount;
uint32_t MaxNodeCount;
std::vector<SpillStackUnit> SpillStack;
};
RegisterSet *AllocateRegisterSet(uint32_t RegisterCount, uint32_t ClassCount) {
RegisterSet *Set = new RegisterSet;
Set->RegisterCount = RegisterCount;
Set->ClassCount = ClassCount;
Set->Registers = static_cast<Register*>(calloc(RegisterCount, sizeof(Register)));
Set->RegisterClasses = static_cast<RegisterClass*>(calloc(ClassCount, sizeof(RegisterClass)));
for (uint32_t i = 0; i < ClassCount; ++i) {
Set->RegisterClasses[i].Registers.Allocate(RegisterCount);
}
return Set;
}
void FreeRegisterSet(RegisterSet *Set) {
for (uint32_t i = 0; i < Set->ClassCount; ++i) {
Set->RegisterClasses[i].Registers.Free();
}
free(Set->RegisterClasses);
free(Set->Registers);
delete Set;
}
void AddRegisters(RegisterSet *Set, uint32_t Class, uint32_t RegistersBase, uint32_t RegisterCount) {
for (uint32_t i = 0; i < RegisterCount; ++i) {
Set->RegisterClasses[Class].Registers.Set(RegistersBase + i);
}
Set->RegisterClasses[Class].RegisterBase = RegistersBase;
Set->RegisterClasses[Class].NumberOfRegisters += RegisterCount;
}
RegisterGraph *AllocateRegisterGraph(RegisterSet *Set, uint32_t NodeCount) {
RegisterGraph *Graph = new RegisterGraph;
Graph->Set = Set;
Graph->NodeCount = NodeCount;
Graph->MaxNodeCount = NodeCount;
Graph->Nodes = static_cast<RegisterNode*>(calloc(NodeCount, sizeof(RegisterNode)));
// Initialize nodes
for (uint32_t i = 0; i < NodeCount; ++i) {
Graph->Nodes[i].Register = INVALID_REG;
Graph->Nodes[i].RegisterClass = INVALID_CLASS;
Graph->Nodes[i].InterferenceListSize = 32;
Graph->Nodes[i].InterferenceList = reinterpret_cast<uint32_t*>(calloc(Graph->Nodes[i].InterferenceListSize, sizeof(uint32_t)));
Graph->Nodes[i].InterferenceCount = 0;
Graph->Nodes[i].Interference.Allocate(NodeCount);
Graph->Nodes[i].Interference.Clear(NodeCount);
}
return Graph;
}
void ResetRegisterGraph(RegisterGraph *Graph, uint32_t NodeCount) {
if (NodeCount > Graph->MaxNodeCount) {
uint32_t OldNodeCount = Graph->MaxNodeCount;
Graph->NodeCount = NodeCount;
Graph->MaxNodeCount = NodeCount;
Graph->Nodes = static_cast<RegisterNode*>(realloc(Graph->Nodes, NodeCount * sizeof(RegisterNode)));
// Initialize nodes
for (uint32_t i = 0; i < OldNodeCount; ++i) {
Graph->Nodes[i].Register = INVALID_REG;
Graph->Nodes[i].RegisterClass = INVALID_CLASS;
Graph->Nodes[i].InterferenceCount = 0;
Graph->Nodes[i].Interference.Realloc(NodeCount);
Graph->Nodes[i].Interference.Clear(NodeCount);
}
for (uint32_t i = OldNodeCount; i < NodeCount; ++i) {
Graph->Nodes[i].Register = INVALID_REG;
Graph->Nodes[i].RegisterClass = INVALID_CLASS;
Graph->Nodes[i].InterferenceListSize = 32;
Graph->Nodes[i].InterferenceList = reinterpret_cast<uint32_t*>(calloc(Graph->Nodes[i].InterferenceListSize, sizeof(uint32_t)));
Graph->Nodes[i].InterferenceCount = 0;
Graph->Nodes[i].Interference.Allocate(NodeCount);
Graph->Nodes[i].Interference.Clear(NodeCount);
}
}
else {
// We are only handling a node count of this size right now
Graph->NodeCount = NodeCount;
// Initialize nodes
for (uint32_t i = 0; i < NodeCount; ++i) {
Graph->Nodes[i].Register = INVALID_REG;
Graph->Nodes[i].RegisterClass = INVALID_CLASS;
Graph->Nodes[i].InterferenceCount = 0;
Graph->Nodes[i].Interference.Clear(NodeCount);
}
}
}
void FreeRegisterGraph(RegisterGraph *Graph) {
for (uint32_t i = 0; i < Graph->MaxNodeCount; ++i) {
RegisterNode *Node = &Graph->Nodes[i];
Node->InterferenceCount = 0;
Node->InterferenceListSize = 0;
free(Node->InterferenceList);
Node->Interference.Free();
}
free(Graph->Nodes);
Graph->NodeCount = 0;
Graph->MaxNodeCount = 0;
delete Graph;
}
void SetNodeClass(RegisterGraph *Graph, uint32_t Node, uint32_t Class) {
Graph->Nodes[Node].RegisterClass = Class;
}
void AddNodeInterference(RegisterGraph *Graph, uint32_t Node1, uint32_t Node2) {
auto AddInterference = [&Graph](uint32_t Node1, uint32_t Node2) {
RegisterNode *Node = &Graph->Nodes[Node1];
Node->Interference.Set(Node2);
if (Node->InterferenceListSize <= Node->InterferenceCount) {
Node->InterferenceListSize *= 2;
Node->InterferenceList = reinterpret_cast<uint32_t*>(realloc(Node->InterferenceList, Node->InterferenceListSize * sizeof(uint32_t)));
}
Node->InterferenceList[Node->InterferenceCount] = Node2;
++Node->InterferenceCount;
};
AddInterference(Node1, Node2);
AddInterference(Node2, Node1);
}
uint32_t GetNodeRegister(RegisterGraph *Graph, uint32_t Node) {
return Graph->Nodes[Node].Register;
}
static bool HasInterference(RegisterGraph *Graph, RegisterNode *Node, uint32_t Register) {
for (uint32_t i = 0; i < Node->InterferenceCount; ++i) {
RegisterNode *IntNode = &Graph->Nodes[Node->InterferenceList[i]];
if (IntNode->Register == Register) {
return true;
}
}
return false;
}
bool AllocateRegisters(RegisterGraph *Graph) {
Graph->SpillStack.clear();
for (uint32_t i = 0; i < Graph->NodeCount; ++i) {
RegisterNode *CurrentNode = &Graph->Nodes[i];
if (CurrentNode->RegisterClass == INVALID_CLASS)
continue;
uint32_t Reg = ~0U;
RegisterClass *RAClass = &Graph->Set->RegisterClasses[CurrentNode->RegisterClass];
for (uint32_t ri = 0; ri < RAClass->NumberOfRegisters; ++ri) {
if (!HasInterference(Graph, CurrentNode, RAClass->RegisterBase + ri)) {
Reg = ri;
break;
}
}
if (Reg == ~0U) {
Graph->SpillStack.emplace_back(SpillStackUnit{i, CurrentNode->RegisterClass});
}
else {
CurrentNode->Register = RAClass->RegisterBase + Reg;
}
}
if (!Graph->SpillStack.empty()) {
printf("Couldn't allocate %ld registers\n", Graph->SpillStack.size());
return false;
}
return true;
}
}

View File

@ -0,0 +1,30 @@
#pragma once
#include <stdint.h>
#include <vector>
namespace FEXCore::RA {
struct RegisterSet;
struct RegisterGraph;
using CrappyBitset = std::vector<bool>;
RegisterSet *AllocateRegisterSet(uint32_t RegisterCount, uint32_t ClassCount);
void FreeRegisterSet(RegisterSet *Set);
void AddRegisters(RegisterSet *Set, uint32_t Class, uint32_t RegistersBase, uint32_t RegisterCount);
/**
* @name Inference graph handling
* @{ */
RegisterGraph *AllocateRegisterGraph(RegisterSet *Set, uint32_t NodeCount);
void FreeRegisterGraph(RegisterGraph *Graph);
void ResetRegisterGraph(RegisterGraph *Graph, uint32_t NodeCount);
void SetNodeClass(RegisterGraph *Graph, uint32_t Node, uint32_t Class);
void AddNodeInterference(RegisterGraph *Graph, uint32_t Node1, uint32_t Node2);
uint32_t GetNodeRegister(RegisterGraph *Graph, uint32_t Node);
bool AllocateRegisters(RegisterGraph *Graph);
/** @} */
}

View File

@ -0,0 +1,120 @@
#ifndef NDEBUG
#include <FEXCore/Debug/X86Tables.h>
#include <tuple>
#include <vector>
namespace FEXCore::X86Tables::X86InstDebugInfo {
void InstallDebugInfo() {
using namespace FEXCore::X86Tables;
auto NoFlags = Flags {0};
for (auto &BaseOp : BaseOps)
BaseOp.DebugInfo = NoFlags;
for (auto &BaseOp : SecondBaseOps)
BaseOp.DebugInfo = NoFlags;
for (auto &BaseOp : RepModOps)
BaseOp.DebugInfo = NoFlags;
for (auto &BaseOp : RepNEModOps)
BaseOp.DebugInfo = NoFlags;
for (auto &BaseOp : OpSizeModOps)
BaseOp.DebugInfo = NoFlags;
for (auto &BaseOp : PrimaryInstGroupOps)
BaseOp.DebugInfo = NoFlags;
for (auto &BaseOp : SecondInstGroupOps)
BaseOp.DebugInfo = NoFlags;
for (auto &BaseOp : SecondModRMTableOps)
BaseOp.DebugInfo = NoFlags;
for (auto &BaseOp : X87Ops)
BaseOp.DebugInfo = NoFlags;
for (auto &BaseOp : DDDNowOps)
BaseOp.DebugInfo = NoFlags;
for (auto &BaseOp : H0F38TableOps)
BaseOp.DebugInfo = NoFlags;
for (auto &BaseOp : H0F3ATableOps)
BaseOp.DebugInfo = NoFlags;
for (auto &BaseOp : VEXTableOps)
BaseOp.DebugInfo = NoFlags;
for (auto &BaseOp : VEXTableGroupOps)
BaseOp.DebugInfo = NoFlags;
for (auto &BaseOp : XOPTableOps)
BaseOp.DebugInfo = NoFlags;
for (auto &BaseOp : XOPTableGroupOps)
BaseOp.DebugInfo = NoFlags;
const std::vector<std::tuple<uint8_t, uint8_t, Flags>> BaseOpTable = {
{0x50, 8, {FLAGS_MEM_ACCESS}},
{0x58, 8, {FLAGS_MEM_ACCESS}},
{0x68, 1, {FLAGS_MEM_ACCESS}},
{0x6A, 1, {FLAGS_MEM_ACCESS}},
{0xAA, 4, {FLAGS_MEM_ACCESS}},
{0xC8, 1, {FLAGS_MEM_ACCESS}},
{0xCC, 2, {FLAGS_DEBUG}},
{0xD7, 1, {FLAGS_MEM_ACCESS}},
{0xF1, 1, {FLAGS_DEBUG}},
{0xF4, 1, {FLAGS_DEBUG}},
};
const std::vector<std::tuple<uint8_t, uint8_t, Flags>> TwoByteOpTable = {
{0x0B, 1, {FLAGS_DEBUG}},
{0x19, 7, {FLAGS_DEBUG}},
{0x28, 2, {FLAGS_MEM_ALIGN_16}},
{0x31, 1, {FLAGS_DEBUG}},
{0xA2, 1, {FLAGS_DEBUG}},
{0xA3, 1, {FLAGS_MEM_ACCESS}},
{0xAB, 1, {FLAGS_MEM_ACCESS}},
{0xB3, 1, {FLAGS_MEM_ACCESS}},
{0xBB, 1, {FLAGS_MEM_ACCESS}},
{0xFF, 1, {FLAGS_DEBUG}},
};
const std::vector<std::tuple<uint8_t, uint8_t, Flags>> PrimaryGroupOpTable = {
#define OPD(group, prefix, Reg) (((group - FEXCore::X86Tables::TYPE_GROUP_1) << 6) | (prefix) << 3 | (Reg))
{OPD(TYPE_GROUP_3, OpToIndex(0xF6), 6), 2, {FLAGS_DIVIDE}},
{OPD(TYPE_GROUP_3, OpToIndex(0xF7), 6), 2, {FLAGS_DIVIDE}},
#undef OPD
};
const std::vector<std::tuple<uint16_t, uint8_t, Flags>> SecondaryExtensionOpTable = {
#define PF_NONE 0
#define PF_F3 1
#define PF_66 2
#define PF_F2 3
#define OPD(group, prefix, Reg) (((group - FEXCore::X86Tables::TYPE_GROUP_6) << 5) | (prefix) << 3 | (Reg))
{OPD(TYPE_GROUP_15, PF_NONE, 2), 1, {FLAGS_DEBUG}},
{OPD(TYPE_GROUP_15, PF_NONE, 3), 1, {FLAGS_DEBUG}},
#undef PF_F3
#undef PF_66
#undef PF_F2
#undef OPD
};
auto GenerateDebugTable = [](auto& FinalTable, auto& LocalTable) {
for (auto Op : LocalTable) {
auto OpNum = std::get<0>(Op);
auto DebugInfo = std::get<2>(Op);
for (uint8_t i = 0; i < std::get<1>(Op); ++i) {
memcpy(&FinalTable.at(OpNum+i).DebugInfo, &DebugInfo, sizeof(X86InstDebugInfo::Flags));
}
}
};
GenerateDebugTable(BaseOps, BaseOpTable);
GenerateDebugTable(SecondBaseOps, TwoByteOpTable);
GenerateDebugTable(PrimaryInstGroupOps, PrimaryGroupOpTable);
GenerateDebugTable(SecondInstGroupOps, SecondaryExtensionOpTable);
printf("Installing debug info\n");
}
}
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,243 @@
#include "LogManager.h"
#include "Interface/Context/Context.h"
#include "Interface/HLE/FileManagement.h"
#include <fcntl.h>
#include <sys/stat.h>
#include <unistd.h>
namespace FEXCore {
class STDFD final : public FD {
public:
STDFD(FEXCore::Context::Context *ctx, int32_t fd, const char *pathname, int32_t flags, mode_t mode)
: FD (ctx, fd, pathname, flags, mode) {
}
ssize_t writev(int fd, void *iov, int iovcnt) override {
ssize_t FinalSize {};
std::string OutputString;
struct iovStruct {
uint64_t base;
size_t len;
};
iovStruct *iovObject = reinterpret_cast<iovStruct*>(iov);
for (int i = 0; i < iovcnt; ++i) {
const char *String = CTX->MemoryMapper.GetPointer<const char*>(iovObject[i].base);
for (size_t j = 0; j < iovObject[i].len; ++j) {
OutputString += String[j];
}
FinalSize += iovObject[i].len;
}
OutputString += '\0';
if (FDOffset == STDOUT_FILENO)
LogMan::Msg::OUT("[%ld] %s", FinalSize, OutputString.c_str());
else if (FDOffset == STDERR_FILENO)
LogMan::Msg::ERR("[%ld] %s", FinalSize, OutputString.c_str());
return FinalSize;
}
uint64_t write(int fd, void *buf, size_t count) override {
if (FDOffset == STDOUT_FILENO)
LogMan::Msg::OUT("%s", reinterpret_cast<char*>(buf));
else if (FDOffset == STDERR_FILENO)
LogMan::Msg::ERR("%s", reinterpret_cast<char*>(buf));
return count;
}
};
uint64_t FD::read(int fd, void *buf, size_t count) {
return ::read(HostFD, buf, count);
}
ssize_t FD::writev(int fd, void *iov, int iovcnt) {
ssize_t FinalSize {};
LogMan::Msg::I(">>> writev: %d %p %d", fd, iov, iovcnt);
for (int i = 0; i < iovcnt; ++i) {
struct iovStruct {
uint64_t base;
size_t len;
};
iovStruct *iovObject = reinterpret_cast<iovStruct*>(iov);
const char *String = CTX->MemoryMapper.GetPointer<const char*>(iovObject->base);
LogMan::Msg::I("\t0x%lx Size: 0x%zx %p", iovObject->base, iovObject->len, String);
for (size_t j = 0; j < iovObject->len; ++j) {
LogMan::Msg::I("%c", String[j]);
}
FinalSize += iovObject->len;
}
return FinalSize;
}
uint64_t FD::write(int fd, void *buf, size_t count) {
return ::write(fd, buf, count);
}
int FD::openat(int dirfd, const char *pathname, int flags, mode_t mode) {
HostFD = ::openat(dirfd, pathname, flags, mode);
return HostFD;
}
int FD::fstat(int fd, struct stat *buf) {
return ::fstat(HostFD, buf);
}
int FD::close(int fd) {
LogMan::Msg::D("Closing: %s", PathName.c_str());
return ::close(HostFD);
}
FileManager::FileManager(FEXCore::Context::Context *ctx)
: CTX {ctx} {
FDMap[CurrentFDOffset++] = new STDFD{CTX, STDIN_FILENO, "stdin", 0, 0};
FDMap[CurrentFDOffset++] = new STDFD{CTX, STDOUT_FILENO, "stdout", 0, 0};
FDMap[CurrentFDOffset++] = new STDFD{CTX, STDERR_FILENO, "stderr", 0, 0};
}
FileManager::~FileManager() {
for (auto &FD : FDMap) {
delete FD.second;
}
}
uint64_t FileManager::Read(int fd, [[maybe_unused]] void *buf, [[maybe_unused]] size_t count) {
auto FD = FDMap.find(fd);
if (FD == FDMap.end()) {
LogMan::Msg::I("XXX: Implement Read: %d", fd);
return -1;
}
return FD->second->read(fd, buf, count);
}
uint64_t FileManager::Write(int fd, void *buf, size_t count) {
auto FD = FDMap.find(fd);
if (FD == FDMap.end()) {
LogMan::Msg::I("XXX: Implement write: %d", fd);
return -1;
}
return FD->second->write(fd, buf, count);
}
uint64_t FileManager::Open(const char *pathname, [[maybe_unused]] int flags, [[maybe_unused]] uint32_t mode) {
LogMan::Msg::I("XXX: Trying to open: '%s'", pathname);
return 0;
}
uint64_t FileManager::Close(int fd) {
auto FD = FDMap.find(fd);
if (FD == FDMap.end()) {
LogMan::Msg::I("XXX: Trying to close: '%d'", fd);
return 0;
}
int Result = FD->second->close(fd);
delete FD->second;
FDMap.erase(FD);
return Result;
}
uint64_t FileManager::Stat(const char *pathname, void *buf) {
return ::stat(pathname, reinterpret_cast<struct stat*>(buf));
}
uint64_t FileManager::Fstat(int fd, void *buf) {
if (fd == STDOUT_FILENO || fd == STDERR_FILENO) {
struct stat TmpBuf;
int Result = fstat(fd, &TmpBuf);
// Blow away access times
// Causes issues with lockstep runner and file acesses
memset(&TmpBuf.st_atime, 0, sizeof(time_t));
memset(&TmpBuf.st_mtime, 0, sizeof(time_t));
memset(&TmpBuf.st_ctime, 0, sizeof(time_t));
TmpBuf.st_rdev = 0x8800 + fd;
memcpy(buf, &TmpBuf, sizeof(struct stat));
return Result;
}
else {
auto FD = FDMap.find(fd);
if (FD != FDMap.end()) {
return FD->second->fstat(fd, reinterpret_cast<struct stat*>(buf));
}
}
LogMan::Msg::D("Attempting to stat: %d", fd);
return -1LL;
}
uint64_t FileManager::Lseek(int fd, uint64_t offset, int whence) {
LogMan::Msg::E("XXX: Attempting to lseek %d 0x%lx 0x%x", fd, offset, whence);
return -1LL;
}
uint64_t FileManager::Writev(int fd, void *iov, int iovcnt) {
auto fdPtr = FDMap.find(fd);
if (fdPtr == FDMap.end()) {
LogMan::Msg::E("XXX: Trying to writev unknown fd: %d", fd);
return FDMap.find(0)->second->writev(0, iov, iovcnt);
return -1LL;
}
return fdPtr->second->writev(fd, iov, iovcnt);
}
uint64_t FileManager::Access(const char *pathname, [[maybe_unused]] int mode) {
LogMan::Msg::D("Trying to read access of: %s", pathname);
return access(pathname, mode);
}
uint64_t FileManager::Readlink(const char *pathname, char *buf, size_t bufsiz) {
LogMan::Msg::D("Attemptign to readlink: '%s'", pathname);
if (strcmp(pathname, "/proc/self/exe") == 0) {
strncpy(buf, Filename.c_str(), bufsiz);
return std::min(bufsiz, Filename.size());
}
return readlink(pathname, buf, bufsiz);
}
uint64_t FileManager::Openat([[maybe_unused]] int dirfs, const char *pathname, int flags, uint32_t mode) {
int32_t fd = CurrentFDOffset;
LogMan::Msg::D("Attempting to open '%s'", pathname);
if (!strcmp(pathname, "/dev/tty")) {
FDMap[CurrentFDOffset++] = new STDFD{CTX, STDOUT_FILENO, "/dev/tty", 0, 0};
return fd;
}
if (!strcmp(pathname, "/etc/ld.so.cache")) {
return -1;
}
auto fdPtr = new FD{CTX, fd, pathname, flags, mode};
auto Result = fdPtr->openat(dirfs, pathname, flags, mode);
if (Result == -1) {
delete fdPtr;
return -1;
}
FDMap[CurrentFDOffset++] = fdPtr;
LogMan::Msg::D("Opening: %d(%d) %s\n", fd, Result, pathname);
return fd;
}
int32_t FileManager::FindHostFD(int fd) {
auto FD = FDMap.find(fd);
if (FD == FDMap.end()) {
return -1;
}
return FD->second->GetHostFD();
}
}

View File

@ -0,0 +1,77 @@
#pragma once
#include <cstdint>
#include <unordered_map>
#include <vector>
#include <sys/stat.h>
namespace FEXCore::Context {
struct Context;
}
namespace FEXCore {
class FD {
public:
FD() = delete;
FD(FD &&) = delete;
FD(FEXCore::Context::Context *ctx, int32_t fd, const char *pathname, int32_t flags, mode_t mode)
: CTX {ctx}
, FDOffset {fd}
, PathName {pathname}
, Flags {flags}
, Mode {mode} {
}
virtual ~FD() {}
uint64_t read(int fd, void *buf, size_t count);
virtual ssize_t writev(int fd, void *iov, int iovcnt);
virtual uint64_t write(int fd, void *buf, size_t count);
int openat(int dirfd, const char *pathname, int flags, mode_t mode);
int fstat(int fd, struct stat *buf);
int close(int fd);
int GetHostFD() const { return HostFD; }
protected:
FEXCore::Context::Context *CTX;
[[maybe_unused]] int32_t FDOffset{};
std::string PathName;
[[maybe_unused]] int32_t Flags;
[[maybe_unused]] mode_t Mode;
int32_t HostFD;
};
class FileManager final {
public:
FileManager() = delete;
FileManager(FileManager &&) = delete;
FileManager(FEXCore::Context::Context *ctx);
~FileManager();
uint64_t Read(int fd, void *buf, size_t count);
uint64_t Write(int fd, void *buf, size_t count);
uint64_t Open(const char *pathname, int flags, uint32_t mode);
uint64_t Close(int fd);
uint64_t Stat(const char *pathname, void *buf);
uint64_t Fstat(int fd, void *buf);
uint64_t Lseek(int fd, uint64_t offset, int whence);
uint64_t Writev(int fd, void *iov, int iovcnt);
uint64_t Access(const char *pathname, int mode);
uint64_t Readlink(const char *pathname, char *buf, size_t bufsiz);
uint64_t Openat(int dirfs, const char *pathname, int flags, uint32_t mode);
int32_t FindHostFD(int fd);
void SetFilename(std::string const &File) { Filename = File; }
std::string const & GetFilename() const { return Filename; }
private:
FEXCore::Context::Context *CTX;
int32_t CurrentFDOffset{0};
std::unordered_map<int32_t, FD*> FDMap;
std::string Filename;
};
}

View File

@ -0,0 +1,464 @@
#include "Common/MathUtils.h"
#include "Interface/Context/Context.h"
#include "Interface/Core/InternalThreadState.h"
#include "Interface/HLE/Syscalls.h"
#include "LogManager.h"
#include <FEXCore/Core/X86Enums.h>
#include <sys/mman.h>
constexpr uint64_t PAGE_SIZE = 4096;
namespace FEXCore {
void SyscallHandler::DefaultProgramBreak(FEXCore::Core::InternalThreadState *Thread, uint64_t Addr) {
DataSpaceSize = 0;
DataSpace = Addr;
DefaultProgramBreakAddress = Addr;
// Just allocate 1GB of data memory past the default program break location at this point
CTX->MapRegion(Thread, Addr, 0x1000'0000);
}
uint64_t SyscallHandler::HandleSyscall(FEXCore::Core::InternalThreadState *Thread, FEXCore::HLE::SyscallArguments *Args) {
uint64_t Result = 0;
LogMan::Msg::D("Syscall: %d", Args->Argument[0]);
switch (Args->Argument[0]) {
case SYSCALL_UNAME: {
struct _utsname {
char sysname[65];
char nodename[65];
char release[65];
char version[65];
char machine[65];
};
_utsname *Local = CTX->MemoryMapper.GetPointer<_utsname*>(Args->Argument[1]);
strcpy(Local->sysname, "Linux");
strcpy(Local->nodename, "FEXCore");
strcpy(Local->release, "5.0.0");
strcpy(Local->version, "#1");
strcpy(Local->machine, "x86_64");
Result = 0;
break;
}
// Memory management
case SYSCALL_BRK: {
LogMan::Msg::D("\tBRK: 0x%lx - 0x%lx", Args->Argument[1], DataSpace);
if (Args->Argument[1] == 0) { // Just wants to get the location of the program break atm
Result = DataSpace + DataSpaceSize;
}
else {
// Allocating out data space
uint64_t NewEnd = Args->Argument[1];
if (NewEnd < DataSpace) {
// Not allowed to move brk end below original start
// Set the size to zero
DataSpaceSize = 0;
}
else {
uint64_t NewSize = NewEnd - DataSpace;
DataSpaceSize = NewSize;
}
Result = DataSpace + DataSpaceSize;
}
break;
}
case SYSCALL_MMAP: {
LogMan::Msg::D("\tMMAP(<addr> %p, <length> 0x%lx, <prot>%d, <flags>0x%x, <fd>%d, <offset>0x%lx)",
Args->Argument[1], Args->Argument[2],
Args->Argument[3], Args->Argument[4],
Args->Argument[5], Args->Argument[6]);
int Flags = Args->Argument[4];
int GuestFD = Args->Argument[5];
int HostFD = -1;
if (GuestFD != -1) {
HostFD = FM.FindHostFD(GuestFD);
}
uint64_t Base = AlignDown(LastMMAP, PAGE_SIZE);
uint64_t Size = AlignUp(Args->Argument[2], PAGE_SIZE);
uint64_t FileSizeToUse = Args->Argument[2];
uint64_t Prot = Args->Argument[3];
#ifdef DEBUG_MMAP
FileSizeToUse = Size;
Prot = PROT_READ | PROT_WRITE | PROT_EXEC;
#endif
if (Flags & MAP_FIXED) {
Base = Args->Argument[1];
void *HostPtr = CTX->MemoryMapper.GetPointer<void*>(Base);
if (!HostPtr) {
HostPtr = CTX->MapRegion(Thread, Base, Size, true);
}
else {
LogMan::Msg::D("\tMapping Fixed pointer in already mapped space: 0x%lx -> %p", Base, HostPtr);
}
if (HostFD != -1) {
#ifdef DEBUG_MMAP
// We are a file. Screw you I'm going to just memcpy you in to place
void *FileMem = mmap(nullptr, FileSizeToUse, Prot, MAP_PRIVATE, HostFD, Args->Argument[6]);
if (FileMem == MAP_FAILED) {
LogMan::Msg::A("Couldn't map file to %p\n", HostPtr);
}
memcpy(HostPtr, FileMem, FileSizeToUse);
munmap(FileMem, Size);
#else
void *FileMem = mmap(HostPtr, FileSizeToUse, Prot, MAP_PRIVATE | MAP_FIXED, HostFD, Args->Argument[6]);
if (FileMem == MAP_FAILED) {
LogMan::Msg::A("Couldn't map file to %p\n", HostPtr);
}
#endif
}
else {
LogMan::Throw::A(Args->Argument[6] == 0, "Don't understand a fixed offset mmap");
}
Result = Base;
}
else {
// XXX: MMAP should map memory regions for all threads
void *HostPtr = CTX->MapRegion(Thread, Base, Size, true);
if (HostFD != -1) {
#ifdef DEBUG_MMAP
// We are a file. Screw you I'm going to just memcpy you in to place
void *FileMem = mmap(nullptr, FileSizeToUse, Prot, MAP_PRIVATE, HostFD, Args->Argument[6]);
if (FileMem == MAP_FAILED) {
LogMan::Msg::A("Couldn't map file to %p\n", HostPtr);
}
memcpy(HostPtr, FileMem, FileSizeToUse);
munmap(FileMem, Size);
#else
void *FileMem = mmap(HostPtr, FileSizeToUse, Prot, MAP_PRIVATE | MAP_FIXED, HostFD, Args->Argument[6]);
if (FileMem == MAP_FAILED) {
LogMan::Msg::A("Couldn't map file to %p\n", HostPtr);
}
#endif
}
LastMMAP += Size;
Result = Base;
}
break;
}
case SYSCALL_MPROTECT: {
LogMan::Msg::D("\tMPROTECT: 0x%x, 0x%lx, 0x%lx", Args->Argument[1], Args->Argument[2], Args->Argument[3]);
void *HostPtr = CTX->MemoryMapper.GetPointer<void*>(Args->Argument[1]);
Result = mprotect(HostPtr, Args->Argument[2], Args->Argument[3]);
break;
}
case SYSCALL_ARCH_PRCTL: {
LogMan::Msg::D("\tPRTCL: 0x%x: 0x%lx", Args->Argument[1], Args->Argument[2]);
switch (Args->Argument[1]) {
case 0x1001: // ARCH_SET_GS
Thread->State.State.gs = Args->Argument[2];
break;
case 0x1002: // ARCH_SET_FS
Thread->State.State.fs = Args->Argument[2];
break;
case 0x1003: // ARCH_GET_FS
*CTX->MemoryMapper.GetPointer<uint64_t*>(Args->Argument[2]) = Thread->State.State.fs;
break;
case 0x1004: // ARCH_GET_GS
*CTX->MemoryMapper.GetPointer<uint64_t*>(Args->Argument[2]) = Thread->State.State.gs;
break;
default:
LogMan::Msg::E("Unknown prctl: 0x%x", Args->Argument[1]);
CTX->ShouldStop = true;
break;
}
Result = 0;
break;
}
// Thread management
case SYSCALL_GETUID:
Result = Thread->State.ThreadManager.GetUID();
break;
case SYSCALL_GETGID:
Result = Thread->State.ThreadManager.GetGID();
break;
case SYSCALL_GETEUID:
Result = Thread->State.ThreadManager.GetEUID();
break;
case SYSCALL_GETEGID:
Result = Thread->State.ThreadManager.GetEGID();
break;
case SYSCALL_GETTID:
Result = Thread->State.ThreadManager.GetTID();
break;
case SYSCALL_GETPID:
Result = Thread->State.ThreadManager.GetPID();
break;
case SYSCALL_EXIT:
LogMan::Msg::D("Thread exit with: %zd\n", Args->Argument[1]);
Thread->State.RunningEvents.ShouldStop = true;
break;
case SYSCALL_WAIT4:
LogMan::Msg::I("wait4(%lx,\n\t%lx,\n\t%lx,\n\t%lx)",
Args->Argument[1],
Args->Argument[2],
Args->Argument[3],
Args->Argument[4]);
break;
// Futexes
case SYSCALL_FUTEX: {
// 0 : uaddr
// 1 : op
// 2: val
// 3: utime
// 4: uaddr2
// 5: val3
LogMan::Msg::I("futex(%lx,\n\t%lx,\n\t%lx,\n\t%lx,\n\t%lx,\n\t%lx)",
Args->Argument[1],
Args->Argument[2],
Args->Argument[3],
Args->Argument[4],
Args->Argument[5],
Args->Argument[6]);
uint8_t Command = Args->Argument[2] & 0xF;
Result = 0;
switch (Command) {
case 0: { // WAIT
LogMan::Throw::A(!Args->Argument[4], "Can't handle timed futexes");
Futex *futex = new Futex{}; // XXX: Definitely a memory leak. When should we free this?
futex->Addr = CTX->MemoryMapper.GetPointer<std::atomic<uint32_t>*>(Args->Argument[1]);
futex->Val = Args->Argument[3];
EmplaceFutex(Args->Argument[1], futex);
{
std::unique_lock<std::mutex> lk(futex->Mutex);
futex->cv.wait(lk, [futex] { return futex->Addr->load() != futex->Val; });
}
break;
}
case 1: { // WAKE
Futex *futex = GetFutex(Args->Argument[1]);
if (!futex) {
Result = 0;
break;
}
for (uint64_t i = 0; i < Args->Argument[3]; ++i)
futex->cv.notify_one();
break;
}
default:
LogMan::Msg::A("Unknown futex command");
break;
}
break;
}
case SYSCALL_CLONE: {
// 0: clone_flags
// 1: New SP
// 2: parent tidptr
// 3: child tidptr
// 4: TLS
LogMan::Msg::I("clone(%lx,\n\t%lx,\n\t%lx,\n\t%lx,\n\t%lx,\n\t%lx,\n\t%lx)",
Args->Argument[0],
Args->Argument[1],
Args->Argument[2],
Args->Argument[3],
Args->Argument[4],
Args->Argument[5]);
uint32_t Flags = Args->Argument[1];
uint64_t NewSP = Args->Argument[2];
uint64_t ParentTID = Args->Argument[3];
uint64_t ChildTID = Args->Argument[4];
uint64_t NewTLS = Args->Argument[5];
#define FLAGPRINT(x, y) if (Flags & (y)) LogMan::Msg::I("\tFlag: " #x)
FLAGPRINT(CSIGNAL, 0x000000FF);
FLAGPRINT(CLONE_VM, 0x00000100);
FLAGPRINT(CLONE_FS, 0x00000200);
FLAGPRINT(CLONE_FILES, 0x00000400);
FLAGPRINT(CLONE_SIGHAND, 0x00000800);
FLAGPRINT(CLONE_PTRACE, 0x00002000);
FLAGPRINT(CLONE_VFORK, 0x00004000);
FLAGPRINT(CLONE_PARENT, 0x00008000);
FLAGPRINT(CLONE_THREAD, 0x00010000);
FLAGPRINT(CLONE_NEWNS, 0x00020000);
FLAGPRINT(CLONE_SYSVSEM, 0x00040000);
FLAGPRINT(CLONE_SETTLS, 0x00080000);
FLAGPRINT(CLONE_PARENT_SETTID, 0x00100000);
FLAGPRINT(CLONE_CHILD_CLEARTID, 0x00200000);
FLAGPRINT(CLONE_DETACHED, 0x00400000);
FLAGPRINT(CLONE_UNTRACED, 0x00800000);
FLAGPRINT(CLONE_CHILD_SETTID, 0x01000000);
FLAGPRINT(CLONE_NEWCGROUP, 0x02000000);
FLAGPRINT(CLONE_NEWUTS, 0x04000000);
FLAGPRINT(CLONE_NEWIPC, 0x08000000);
FLAGPRINT(CLONE_NEWUSER, 0x10000000);
FLAGPRINT(CLONE_NEWPID, 0x20000000);
FLAGPRINT(CLONE_NEWNET, 0x40000000);
FLAGPRINT(CLONE_IO, 0x80000000);
FEXCore::Core::CPUState NewThreadState{};
// Clone copies the parent thread's state
memcpy(&NewThreadState, &Thread->State.State, sizeof(FEXCore::Core::CPUState));
NewThreadState.gregs[FEXCore::X86State::REG_RAX] = 0;
NewThreadState.gregs[FEXCore::X86State::REG_RBX] = 0;
NewThreadState.gregs[FEXCore::X86State::REG_RBP] = 0;
NewThreadState.gregs[FEXCore::X86State::REG_RSP] = NewSP;
NewThreadState.fs = NewTLS;
// Set us to start just after the syscall instruction
NewThreadState.rip += 2;
auto NewThread = CTX->CreateThread(&NewThreadState, ParentTID, ChildTID);
CTX->CopyMemoryMapping(Thread, NewThread);
// Sets the child TID to pointer in ParentTID
if (Flags & CLONE_PARENT_SETTID) {
uint64_t *TIDPtr = CTX->MemoryMapper.GetPointer<uint64_t*>(ParentTID);
TIDPtr[0] = NewThread->State.ThreadManager.GetTID();
}
// Sets the child TID to the pointer in ChildTID
if (Flags & CLONE_CHILD_SETTID) {
uint64_t *TIDPtr = CTX->MemoryMapper.GetPointer<uint64_t*>(ChildTID);
TIDPtr[0] = NewThread->State.ThreadManager.GetTID();
}
// When the thread exits, clear the child thread ID at ChildTID
// Additionally wakeup a futex at that address
// Address /may/ be changed with SET_TID_ADDRESS syscall
if (Flags & CLONE_CHILD_CLEARTID) {
}
CTX->InitializeThread(NewThread);
// Actually start the thread
CTX->RunThread(NewThread);
// Return the new threads TID
Result = NewThread->State.ThreadManager.GetTID();
break;
}
// File management
case SYSCALL_READ:
Result = FM.Read(Args->Argument[1],
CTX->MemoryMapper.GetPointer(Args->Argument[2]),
Args->Argument[3]);
break;
case SYSCALL_WRITE:
Result = FM.Write(Args->Argument[1],
CTX->MemoryMapper.GetPointer(Args->Argument[2]),
Args->Argument[3]);
break;
case SYSCALL_OPEN:
Result = FM.Open(CTX->MemoryMapper.GetPointer<char const*>(Args->Argument[1]),
Args->Argument[2],
Args->Argument[3]);
break;
case SYSCALL_CLOSE:
Result = FM.Close(Args->Argument[1]);
break;
case SYSCALL_STAT:
Result = FM.Stat(CTX->MemoryMapper.GetPointer<char const*>(Args->Argument[1]),
CTX->MemoryMapper.GetPointer(Args->Argument[2]));
break;
case SYSCALL_FSTAT:
Result = FM.Fstat(Args->Argument[1],
CTX->MemoryMapper.GetPointer(Args->Argument[2]));
break;
case SYSCALL_LSEEK:
Result = FM.Lseek(Args->Argument[1],
Args->Argument[2],
Args->Argument[3]);
break;
case SYSCALL_WRITEV:
Result = FM.Writev(Args->Argument[1],
CTX->MemoryMapper.GetPointer(Args->Argument[2]),
Args->Argument[3]);
break;
case SYSCALL_ACCESS:
Result = FM.Access(
CTX->MemoryMapper.GetPointer<const char*>(Args->Argument[1]),
Args->Argument[2]);
break;
case SYSCALL_READLINK:
Result = FM.Readlink(
CTX->MemoryMapper.GetPointer<const char*>(Args->Argument[1]),
CTX->MemoryMapper.GetPointer<char*>(Args->Argument[2]),
Args->Argument[3]);
break;
case SYSCALL_OPENAT:
Result = FM.Openat(
Args->Argument[1],
CTX->MemoryMapper.GetPointer<const char*>(Args->Argument[2]),
Args->Argument[3],
Args->Argument[4]);
break;
case SYSCALL_CLOCK_GETTIME: {
timespec *ClockResult = CTX->MemoryMapper.GetPointer<timespec*>(Args->Argument[2]);
Result = clock_gettime(Args->Argument[1], ClockResult);
// XXX: Debug
// memset(ClockResult, 0, sizeof(timespec));
}
break;
case SYSCALL_NANOSLEEP: {
timespec const* req = CTX->MemoryMapper.GetPointer<timespec const*>(Args->Argument[1]);
timespec *rem = CTX->MemoryMapper.GetPointer<timespec*>(Args->Argument[2]);
Result = nanosleep(req, rem);
break;
}
case SYSCALL_SET_TID_ADDRESS: {
Thread->State.ThreadManager.child_tid = Args->Argument[1];
Result = Thread->State.ThreadManager.GetTID();
break;
}
case SYSCALL_SET_ROBUST_LIST: {
Thread->State.ThreadManager.robust_list_head = Args->Argument[1];
Result = 0;
break;
}
case SYSCALL_PRLIMIT64: {
LogMan::Throw::A(Args->Argument[3] == 0, "Guest trying to set limit for %d", Args->Argument[2]);
struct rlimit {
uint64_t rlim_cur;
uint64_t rlim_max;
};
switch (Args->Argument[2]) {
case 3: { // Stack limits
rlimit *old_limit = CTX->MemoryMapper.GetPointer<rlimit*>(Args->Argument[3]);
// Default size
old_limit->rlim_cur = 8 * 1024;
old_limit->rlim_max = ~0ULL;
break;
}
default: LogMan::Msg::A("Unknown PRLimit: %d", Args->Argument[2]);
}
Result = 0;
break;
}
// Currently unhandled
// Return fake result
case SYSCALL_RT_SIGACTION:
case SYSCALL_RT_SIGPROCMASK:
case SYSCALL_EXIT_GROUP:
case SYSCALL_TGKILL:
case SYSCALL_MUNMAP:
Result = 0;
break;
default:
Result = -1;
LogMan::Msg::A("Unknown syscall: %d", Args->Argument[0]);
break;
}
return Result;
}
}

View File

@ -0,0 +1,106 @@
#pragma once
#include "Interface/HLE/FileManagement.h"
#include <FEXCore/HLE/SyscallHandler.h>
#include <atomic>
#include <condition_variable>
#include <mutex>
#include <unordered_map>
namespace FEXCore::Context {
struct Context;
}
namespace FEXCore::Core {
struct InternalThreadState;
}
namespace FEXCore {
///< Enum containing all support x86-64 linux syscalls that we support
enum Syscalls {
SYSCALL_READ = 0, ///< __NR_read
SYSCALL_WRITE = 1, ///< __NR_write
SYSCALL_OPEN = 2, ///< __NR_open
SYSCALL_CLOSE = 3, ///< __NR_close
SYSCALL_STAT = 4, ///< __NR_stat
SYSCALL_FSTAT = 5, ///< __NR_fstat
SYSCALL_LSEEK = 8, ///< __NR_lseek
SYSCALL_MMAP = 9, ///< __NR_mmap
SYSCALL_MPROTECT = 10, ///< __NR_mprotect
SYSCALL_MUNMAP = 11, ///< __NR_munmap
SYSCALL_BRK = 12, ///< __NR_brk
SYSCALL_RT_SIGACTION = 13, ///< __NR_rt_sigaction
SYSCALL_RT_SIGPROCMASK = 14, ///< __NR_rt_sigprocmask
SYSCALL_WRITEV = 20, ///< __NR_writev
SYSCALL_ACCESS = 21, ///< __NR_access
SYSCALL_NANOSLEEP = 35, ///< __NR_nanosleep
SYSCALL_GETPID = 39, ///< __NR_getpid
SYSCALL_CLONE = 56, ///< __NR_clone
SYSCALL_EXIT = 60, ///< __NR_exit
SYSCALL_WAIT4 = 61, ///< __NR_wait4
SYSCALL_UNAME = 63, ///< __NR_uname
SYSCALL_READLINK = 89, ///< __NR_readlink
SYSCALL_GETUID = 102, ///< __NR_getuid
SYSCALL_GETGID = 104, ///< __NR_getgid
SYSCALL_GETEUID = 107, ///< __NR_geteuid
SYSCALL_GETEGID = 108, ///< __NR_getegid
SYSCALL_ARCH_PRCTL = 158, ///< __NR_arch_prctl
SYSCALL_GETTID = 186, ///< __NR_gettid
SYSCALL_FUTEX = 202, ///< __NR_futex
SYSCALL_SET_TID_ADDRESS = 218, ///< __NR_set_tid_address
SYSCALL_CLOCK_GETTIME = 228, ///< __NR_clock_gettime
SYSCALL_EXIT_GROUP = 231, ///< __NR_exit_group
SYSCALL_TGKILL = 234, ///< __NR_tgkill
SYSCALL_OPENAT = 257, ///< __NR_openat
SYSCALL_SET_ROBUST_LIST = 273, ///< __NR_set_robust_list
SYSCALL_PRLIMIT64 = 302, ///< __NR_prlimit64
};
struct Futex {
std::mutex Mutex;
std::condition_variable cv;
std::atomic<uint32_t> *Addr;
uint32_t Val;
};
class SyscallHandler final {
public:
SyscallHandler(FEXCore::Context::Context *ctx) : CTX {ctx}, FM {ctx} {}
uint64_t HandleSyscall(FEXCore::Core::InternalThreadState *Thread, FEXCore::HLE::SyscallArguments *Args);
// XXX: This leaks memory.
// Need to know when to delete futexes
void EmplaceFutex(uint64_t Addr, Futex *futex) {
std::scoped_lock<std::mutex> lk(FutexMutex);
Futexes[Addr] = futex;
}
Futex *GetFutex(uint64_t Addr) {
std::scoped_lock<std::mutex> lk (FutexMutex);
return Futexes[Addr];
}
void DefaultProgramBreak(FEXCore::Core::InternalThreadState *Thread, uint64_t Addr);
void SetFilename(std::string const &File) { FM.SetFilename(File); }
std::string const & GetFilename() const { return FM.GetFilename(); }
private:
FEXCore::Context::Context *CTX;
FileManager FM;
// Futex management
std::unordered_map<uint64_t, Futex*> Futexes;
std::mutex FutexMutex;
// BRK management
uint64_t DataSpace {};
uint64_t DataSpaceSize {};
uint64_t DefaultProgramBreakAddress {};
// MMap management
uint64_t LastMMAP = 0xd000'0000;
};
}

View File

@ -0,0 +1,66 @@
#include <FEXCore/IR/IR.h>
#include <FEXCore/IR/IntrusiveIRList.h>
namespace FEXCore::IR {
#define IROP_GETNAME_IMPL
#include "IRDefines.inc"
static void PrintArg(std::stringstream *out, [[maybe_unused]] IRListView<false> const* IR, uint64_t Arg) {
*out << "0x" << std::hex << Arg;
}
static void PrintArg(std::stringstream *out, IRListView<false> const* IR, NodeWrapper Arg) {
uintptr_t Data = IR->GetData();
uintptr_t ListBegin = IR->GetListData();
OrderedNode *RealNode = reinterpret_cast<OrderedNode*>(Arg.GetPtr(ListBegin));
auto IROp = RealNode->Op(Data);
*out << "%ssa" << std::to_string(Arg.NodeOffset / sizeof(OrderedNode)) << " i" << std::dec << (IROp->Size * 8);
if (IROp->Elements > 1) {
*out << "v" << std::dec << IROp->Elements;
}
}
void Dump(std::stringstream *out, IRListView<false> const* IR) {
uintptr_t Data = IR->GetData();
uintptr_t ListBegin = IR->GetListData();
auto Begin = IR->begin();
auto End = IR->end();
while (Begin != End) {
auto Op = Begin();
OrderedNode *RealNode = reinterpret_cast<OrderedNode*>(Op->GetPtr(ListBegin));
auto IROp = RealNode->Op(Data);
auto Name = FEXCore::IR::GetName(IROp->Op);
if (IROp->HasDest) {
*out << "%ssa" << std::to_string(Op->NodeOffset / sizeof(OrderedNode)) << " i" << std::dec << (IROp->Size * 8);
if (IROp->Elements > 1) {
*out << "v" << std::dec << IROp->Elements;
}
*out << " = ";
}
*out << Name;
switch (IROp->Op) {
case IR::OP_BEGINBLOCK:
*out << " %ssa" << std::to_string(Op->ID());
break;
default: break;
}
#define IROP_ARGPRINTER_HELPER
#include "IRDefines.inc"
default: *out << "<Unknown Args>"; break;
}
*out << "\n";
++Begin;
}
}
}

526
Source/Interface/IR/IR.json Normal file
View File

@ -0,0 +1,526 @@
{
"Defines": [
"constexpr static uint8_t COND_EQ = 0",
"constexpr static uint8_t COND_NEQ = 1",
"constexpr static uint8_t COND_CS = 2",
"constexpr static uint8_t COND_CC = 3",
"constexpr static uint8_t COND_MI = 4",
"constexpr static uint8_t COND_PL = 5",
"constexpr static uint8_t COND_VS = 6",
"constexpr static uint8_t COND_VC = 7",
"constexpr static uint8_t COND_HI = 8",
"constexpr static uint8_t COND_LS = 9",
"constexpr static uint8_t COND_GE = 10",
"constexpr static uint8_t COND_LT = 11",
"constexpr static uint8_t COND_GT = 12",
"constexpr static uint8_t COND_LE = 13"
],
"Ops": {
"Dummy": {
},
"Constant": {
"HasDest": true,
"FixedDestSize": "8",
"Args": [
"uint64_t", "Constant"
]
},
"BeginBlock": {},
"EndBlock": {
"Args": [
"uint64_t", "RIPIncrement"
]
},
"Break": {
"Args": [
"uint8_t", "Reason",
"uint8_t", "Literal"
]
},
"EndFunction": {},
"ExitFunction": {},
"Jump": {
"DispatcherUnary": true,
"SSAArgs": "1"
},
"CondJump": {
"SSAArgs": "2"
},
"Mov": {
"HasDest": true,
"DestSize": "GetOpSize(ssa0)",
"SSAArgs": "1"
},
"CycleCounter": {
"HasDest": true,
"FixedDestSize": "8"
},
"LoadContext": {
"HasDest": true,
"DestSize": "Size",
"Args": [
"uint8_t", "Size",
"uint32_t", "Offset"
]
},
"StoreContext": {
"SSAArgs": "1",
"Args": [
"uint8_t", "Size",
"uint32_t", "Offset"
]
},
"LoadFlag": {
"HasDest": true,
"DestSize": "1",
"Args": [
"uint32_t", "Flag"
]
},
"StoreFlag": {
"SSAArgs": "1",
"Args": [
"uint32_t", "Flag"
]
},
"Syscall": {
"HasDest": true,
"FixedDestSize": "8",
"SSAArgs": "7"
},
"LoadMem": {
"HasDest": true,
"DestSize": "Size",
"SSAArgs": "1",
"Args": [
"uint8_t", "Size"
]
},
"StoreMem": {
"SSAArgs": "2",
"Args": [
"uint8_t", "Size"
]
},
"Add": {
"HasDest": true,
"Dispatcher": true,
"SSAArgs": "2"
},
"Sub": {
"HasDest": true,
"Dispatcher": true,
"SSAArgs": "2"
},
"Mul": {
"HasDest": true,
"Dispatcher": true,
"SSAArgs": "2"
},
"UMul": {
"HasDest": true,
"Dispatcher": true,
"SSAArgs": "2"
},
"Div": {
"HasDest": true,
"Dispatcher": true,
"SSAArgs": "2"
},
"UDiv": {
"HasDest": true,
"Dispatcher": true,
"SSAArgs": "2"
},
"Rem": {
"HasDest": true,
"Dispatcher": true,
"SSAArgs": "2"
},
"URem": {
"HasDest": true,
"Dispatcher": true,
"SSAArgs": "2"
},
"MulH": {
"HasDest": true,
"Dispatcher": true,
"SSAArgs": "2"
},
"UMulH": {
"HasDest": true,
"Dispatcher": true,
"SSAArgs": "2"
},
"Or": {
"HasDest": true,
"Dispatcher": true,
"SSAArgs": "2"
},
"And": {
"HasDest": true,
"Dispatcher": true,
"SSAArgs": "2"
},
"Xor": {
"HasDest": true,
"Dispatcher": true,
"SSAArgs": "2"
},
"Lshl": {
"HasDest": true,
"Dispatcher": true,
"SSAArgs": "2"
},
"Lshr": {
"HasDest": true,
"Dispatcher": true,
"SSAArgs": "2"
},
"Ashr": {
"HasDest": true,
"Dispatcher": true,
"SSAArgs": "2"
},
"Rol": {
"HasDest": true,
"Dispatcher": true,
"SSAArgs": "2"
},
"Ror": {
"HasDest": true,
"Dispatcher": true,
"SSAArgs": "2"
},
"LDiv": {
"HasDest": true,
"SSAArgs": "3"
},
"LUDiv": {
"HasDest": true,
"SSAArgs": "3"
},
"LRem": {
"HasDest": true,
"SSAArgs": "3"
},
"LURem": {
"HasDest": true,
"SSAArgs": "3"
},
"Zext": {
"HasDest": true,
"DestSize": "SrcSize / 4",
"SSAArgs": "1",
"Args": [
"uint8_t", "SrcSize"
]
},
"Sext": {
"HasDest": true,
"DestSize": "SrcSize / 4",
"SSAArgs": "1",
"Args": [
"uint8_t", "SrcSize"
]
},
"Neg": {
"HasDest": true,
"DispatcherUnary": true,
"SSAArgs": "1"
},
"Popcount": {
"HasDest": true,
"DispatcherUnary": true,
"SSAArgs": "1"
},
"FindLSB": {
"HasDest": true,
"DispatcherUnary": true,
"SSAArgs": "1"
},
"FindMSB": {
"HasDest": true,
"DispatcherUnary": true,
"SSAArgs": "1"
},
"Rev": {
"HasDest": true,
"DispatcherUnary": true,
"SSAArgs": "1"
},
"CPUID": {
"HasDest": true,
"FixedDestSize": "4",
"NumElements": "4",
"SSAArgs": "1"
},
"Bfi": {
"HasDest": true,
"DestSize": "GetOpSize(ssa0)",
"SSAArgs": "2",
"Args": [
"uint8_t", "Width",
"uint8_t", "lsb"
]
},
"Bfe": {
"HasDest": true,
"DestSize": "GetOpSize(ssa0)",
"SSAArgs": "1",
"Args": [
"uint8_t", "Width",
"uint8_t", "lsb"
]
},
"Sbfe": {
"HasDest": true,
"SSAArgs": "1",
"Args": [
"uint8_t", "Width",
"uint8_t", "lsb"
]
},
"Select": {
"HasDest": true,
"SSAArgs": "4",
"Args": [
"uint8_t", "Cond"
]
},
"CAS": {
"HasDest": true,
"DestSize": "GetOpSize(ssa0)",
"SSAArgs": "3"
},
"CreateVector2": {
"HasDest": true,
"DestSize": "GetOpSize(ssa0) * 2",
"SSAArgs": "2"
},
"CreateVector3": {
"HasDest": true,
"DestSize": "GetOpSize(ssa0) * 3",
"SSAArgs": "3"
},
"CreateVector4": {
"HasDest": true,
"DestSize": "GetOpSize(ssa0) * 4",
"SSAArgs": "4"
},
"SplatVector2": {
"HasDest": true,
"NumElements": "2",
"DestSize": "GetOpSize(ssa0) * 2",
"SSAArgs": "1"
},
"SplatVector3": {
"HasDest": true,
"NumElements": "3",
"DestSize": "GetOpSize(ssa0) * 3",
"SSAArgs": "1"
},
"SplatVector4": {
"HasDest": true,
"NumElements": "4",
"DestSize": "GetOpSize(ssa0) * 4",
"SSAArgs": "1"
},
"ExtractElement": {
"HasDest": true,
"DestSize": "GetOpSize(ssa0)",
"SSAArgs": "1",
"Args": [
"uint8_t", "Idx"
]
},
"VOr": {
"HasDest": true,
"Dispatcher": true,
"SSAArgs": "2"
},
"VXor": {
"HasDest": true,
"Dispatcher": true,
"SSAArgs": "2"
},
"VAdd": {
"HasDest": true,
"SSAArgs": "2",
"Args": [
"uint8_t", "RegisterSize",
"uint8_t", "ElementSize"
]
},
"VSub": {
"HasDest": true,
"SSAArgs": "2",
"Args": [
"uint8_t", "RegisterSize",
"uint8_t", "ElementSize"
]
},
"VUMin": {
"HasDest": true,
"SSAArgs": "2",
"Args": [
"uint8_t", "RegisterSize",
"uint8_t", "ElementSize"
]
},
"VSMin": {
"HasDest": true,
"SSAArgs": "2",
"Args": [
"uint8_t", "RegisterSize",
"uint8_t", "ElementSize"
]
},
"VZip": {
"HasDest": true,
"SSAArgs": "2",
"Args": [
"uint8_t", "RegisterSize",
"uint8_t", "ElementSize"
]
},
"VZip2": {
"HasDest": true,
"SSAArgs": "2",
"Args": [
"uint8_t", "RegisterSize",
"uint8_t", "ElementSize"
]
},
"VCMPEQ": {
"HasDest": true,
"SSAArgs": "2",
"Args": [
"uint8_t", "RegisterSize",
"uint8_t", "ElementSize"
]
},
"VCMPGT": {
"HasDest": true,
"SSAArgs": "2",
"Args": [
"uint8_t", "RegisterSize",
"uint8_t", "ElementSize"
]
},
"VUShl": {
"HasDest": true,
"SSAArgs": "2",
"Args": [
"uint8_t", "RegisterSize",
"uint8_t", "ElementSize"
]
},
"VUShlS": {
"HasDest": true,
"SSAArgs": "2",
"Args": [
"uint8_t", "RegisterSize",
"uint8_t", "ElementSize"
]
},
"VUShr": {
"HasDest": true,
"SSAArgs": "2",
"Args": [
"uint8_t", "RegisterSize",
"uint8_t", "ElementSize"
]
},
"VInsElement": {
"HasDest": true,
"SSAArgs": "2",
"Args": [
"uint8_t", "RegisterSize",
"uint8_t", "ElementSize",
"uint8_t", "DestIdx",
"uint8_t", "SrcIdx"
]
},
"Print": {
"DispatcherUnary": true,
"SSAArgs": "1"
},
"Last": {
"Last": true,
"Args": []
}
}
}

View File

@ -0,0 +1,27 @@
#include "Interface/IR/Passes.h"
#include "Interface/IR/PassManager.h"
namespace FEXCore::IR {
void PassManager::AddDefaultPasses() {
// Passes.emplace_back(std::unique_ptr<FEXCore::IR::Pass>(CreateConstProp()));
// Passes.emplace_back(std::unique_ptr<FEXCore::IR::Pass>(CreateRedundantContextLoadElimination()));
// Passes.emplace_back(std::unique_ptr<FEXCore::IR::Pass>(CreateRedundantFlagCalculationEliminination()));
// Passes.emplace_back(std::unique_ptr<FEXCore::IR::Pass>(CreateSyscallOptimization()));
// Passes.emplace_back(std::unique_ptr<FEXCore::IR::Pass>(CreatePassDeadContextStoreElimination()));
//
// Passes.emplace_back(std::unique_ptr<FEXCore::IR::Pass>(CreateIRCompaction()));
}
void PassManager::AddDefaultValidationPasses() {
Passes.emplace_back(std::unique_ptr<FEXCore::IR::Pass>(Validation::CreateIRValidation()));
}
bool PassManager::Run(OpDispatchBuilder *Disp) {
bool Changed = false;
for (auto const &Pass : Passes) {
Changed |= Pass->Run(Disp);
}
return Changed;
}
}

View File

@ -0,0 +1,27 @@
#pragma once
#include <FEXCore/IR/IntrusiveIRList.h>
#include <memory>
#include <vector>
namespace FEXCore::IR {
class OpDispatchBuilder;
class Pass {
public:
virtual ~Pass() = default;
virtual bool Run(OpDispatchBuilder *Disp) = 0;
};
class PassManager final {
public:
void AddDefaultPasses();
void AddDefaultValidationPasses();
bool Run(OpDispatchBuilder *Disp);
private:
std::vector<std::unique_ptr<Pass>> Passes;
};
}

View File

@ -0,0 +1,17 @@
#pragma once
namespace FEXCore::IR {
class Pass;
FEXCore::IR::Pass* CreateConstProp();
FEXCore::IR::Pass* CreateRedundantContextLoadElimination();
FEXCore::IR::Pass* CreatePassDeadContextStoreElimination();
FEXCore::IR::Pass* CreateSyscallOptimization();
FEXCore::IR::Pass* CreateRedundantFlagCalculationEliminination();
FEXCore::IR::Pass* CreateIRCompaction();
namespace Validation {
FEXCore::IR::Pass* CreateIRValidation();
}
}

View File

@ -0,0 +1,51 @@
#include "Interface/IR/PassManager.h"
#include "Interface/Core/OpcodeDispatcher.h"
namespace FEXCore::IR {
class ConstProp final : public FEXCore::IR::Pass {
public:
bool Run(OpDispatchBuilder *Disp) override;
};
bool ConstProp::Run(OpDispatchBuilder *Disp) {
bool Changed = false;
auto CurrentIR = Disp->ViewIR();
uintptr_t ListBegin = CurrentIR.GetListData();
uintptr_t DataBegin = CurrentIR.GetData();
IR::NodeWrapperIterator Begin = CurrentIR.begin();
IR::NodeWrapperIterator End = CurrentIR.end();
while (Begin != End) {
NodeWrapper *WrapperOp = Begin();
OrderedNode *RealNode = reinterpret_cast<OrderedNode*>(WrapperOp->GetPtr(ListBegin));
FEXCore::IR::IROp_Header *IROp = RealNode->Op(DataBegin);
switch (IROp->Op) {
case OP_ZEXT: {
auto Op = IROp->C<IR::IROp_Zext>();
uint64_t Constant;
if (Disp->IsValueConstant(Op->Header.Args[0], &Constant)) {
uint64_t NewConstant = Constant & ((1ULL << Op->SrcSize) - 1);
auto ConstantVal = Disp->_Constant(NewConstant);
Disp->ReplaceAllUsesWith(RealNode, ConstantVal);
Changed = true;
}
break;
}
default: break;
}
++Begin;
}
return Changed;
}
FEXCore::IR::Pass* CreateConstProp() {
return new ConstProp{};
}
}

View File

@ -0,0 +1,116 @@
#include "Interface/IR/PassManager.h"
#include "Interface/Core/OpcodeDispatcher.h"
#include <FEXCore/Core/CoreState.h>
namespace FEXCore::IR {
class DCSE final : public FEXCore::IR::Pass {
public:
bool Run(OpDispatchBuilder *Disp) override;
};
bool DCSE::Run(OpDispatchBuilder *Disp) {
//printf("Doing DCSE run\n");
return false;
}
FEXCore::IR::Pass* CreatePassDeadContextStoreElimination() {
return new DCSE{};
}
class RCLE final : public FEXCore::IR::Pass {
public:
bool Run(OpDispatchBuilder *Disp) override;
};
static bool IsAlignedGPR(uint8_t Size, uint32_t Offset, uint8_t *greg) {
if (Size != 8) return false;
if (Offset & 0b111) return false;
if (Offset < offsetof(FEXCore::Core::CPUState, gregs[0]) || Offset > offsetof(FEXCore::Core::CPUState, gregs[15])) return false;
*greg = (Offset - offsetof(FEXCore::Core::CPUState, gregs[0])) / 8;
return true;
}
static bool IsGPR(uint32_t Offset, uint8_t *greg) {
if (Offset < offsetof(FEXCore::Core::CPUState, gregs[0]) || Offset > offsetof(FEXCore::Core::CPUState, gregs[15])) return false;
*greg = (Offset - offsetof(FEXCore::Core::CPUState, gregs[0])) / 8;
return true;
}
bool RCLE::Run(OpDispatchBuilder *Disp) {
bool Changed = false;
auto CurrentIR = Disp->ViewIR();
uintptr_t ListBegin = CurrentIR.GetListData();
uintptr_t DataBegin = CurrentIR.GetData();
IR::NodeWrapperIterator Begin = CurrentIR.begin();
IR::NodeWrapperIterator End = CurrentIR.end();
std::array<NodeWrapper*, 16> LastValidGPRStores{};
while (Begin != End) {
NodeWrapper *WrapperOp = Begin();
OrderedNode *RealNode = reinterpret_cast<OrderedNode*>(WrapperOp->GetPtr(ListBegin));
FEXCore::IR::IROp_Header *IROp = RealNode->Op(DataBegin);
if (IROp->Op == OP_BEGINBLOCK ||
IROp->Op == OP_ENDBLOCK ||
IROp->Op == OP_JUMP ||
IROp->Op == OP_CONDJUMP ||
IROp->Op == OP_EXITFUNCTION) {
// We don't track across block boundaries
LastValidGPRStores.fill(nullptr);
}
if (IROp->Op == OP_STORECONTEXT) {
auto Op = IROp->CW<IR::IROp_StoreContext>();
// Make sure we are within GREG state
uint8_t greg = ~0;
if (IsAlignedGPR(Op->Size, Op->Offset, &greg)) {
FEXCore::IR::IROp_Header *ArgOp = reinterpret_cast<OrderedNode*>(Op->Header.Args[0].GetPtr(ListBegin))->Op(DataBegin);
// Ensure we aren't doing a mismatched store
// XXX: We should really catch this in IR validation
if (ArgOp->Size == 8) {
LastValidGPRStores[greg] = &Op->Header.Args[0];
}
else {
LastValidGPRStores[greg] = nullptr;
}
} else if (IsGPR(Op->Offset, &greg)) {
// If we aren't overwriting the whole state then we don't want to track this value
LastValidGPRStores[greg] = nullptr;
}
}
if (IROp->Op == OP_LOADCONTEXT) {
auto Op = IROp->C<IR::IROp_LoadContext>();
// Make sure we are within GREG state
uint8_t greg = ~0;
if (IsAlignedGPR(Op->Size, Op->Offset, &greg)) {
if (LastValidGPRStores[greg] != nullptr) {
// If the last store matches this load value then we can replace the loaded value with the previous valid one
auto MovVal = Disp->_Mov(reinterpret_cast<OrderedNode*>(LastValidGPRStores[greg]->GetPtr(ListBegin)));
Disp->ReplaceAllUsesWith(RealNode, MovVal);
Changed = true;
}
} else if (IsGPR(Op->Offset, &greg)) {
// If we aren't overwriting the whole state then we don't want to track this value
LastValidGPRStores[greg] = nullptr; // 0 is invalid
}
}
++Begin;
}
return Changed;
}
FEXCore::IR::Pass* CreateRedundantContextLoadElimination() {
return new RCLE{};
}
}

View File

@ -0,0 +1,121 @@
#include "Interface/IR/PassManager.h"
#include "Interface/Core/OpcodeDispatcher.h"
#include <map>
namespace FEXCore::IR {
class IRCompaction final : public FEXCore::IR::Pass {
public:
IRCompaction();
bool Run(OpDispatchBuilder *Disp) override;
private:
OpDispatchBuilder LocalBuilder;
std::vector<IR::NodeWrapper::NodeOffsetType> NodeLocationRemapper;
};
IRCompaction::IRCompaction() {
NodeLocationRemapper.resize(9000);
}
bool IRCompaction::Run(OpDispatchBuilder *Disp) {
auto CurrentIR = Disp->ViewIR();
auto LocalIR = LocalBuilder.ViewIR();
uint32_t NodeCount = LocalIR.GetListSize() / sizeof(OrderedNode);
// Reset our local working list
LocalBuilder.ResetWorkingList();
if (NodeLocationRemapper.size() < NodeCount) {
NodeLocationRemapper.resize(NodeCount);
}
memset(&NodeLocationRemapper.at(0), 0xFF, NodeCount * sizeof(IR::NodeWrapper::NodeOffsetType));
uintptr_t LocalListBegin = LocalIR.GetListData();
uintptr_t LocalDataBegin = LocalIR.GetData();
uintptr_t ListBegin = CurrentIR.GetListData();
uintptr_t DataBegin = CurrentIR.GetData();
IR::NodeWrapperIterator Begin = CurrentIR.begin();
IR::NodeWrapperIterator End = CurrentIR.end();
// This compaction pass is something that we need to ensure correct ordering and distances between IROps\
// Later on we assume that an IROp's SSA value live range is its Node locations
//
// RA distance calculation is calculated purely on the Node locations
// So we just need to reorder those
//
// Additionally there may be some dead ops hanging out in the IR list that are orphaned.
// These can also be dropped during this pass
while (Begin != End) {
NodeWrapper *WrapperOp = Begin();
OrderedNode *RealNode = reinterpret_cast<OrderedNode*>(WrapperOp->GetPtr(ListBegin));
FEXCore::IR::IROp_Header *IROp = RealNode->Op(DataBegin);
size_t OpSize = FEXCore::IR::GetSize(IROp->Op);
// Allocate the ops locally for our local dispatch
auto LocalPair = LocalBuilder.AllocateRawOp(OpSize);
IR::NodeWrapper LocalNodeWrapper = LocalPair.Node->Wrapped(LocalListBegin);
// Copy over the op
memcpy(LocalPair.first, IROp, OpSize);
// Set our map remapper to map the new location
// Even nodes that don't have a destination need to be in this map
// Need to be able to remap branch targets any other bits
NodeLocationRemapper[WrapperOp->ID()] = LocalNodeWrapper.ID();
++Begin;
}
Begin = CurrentIR.begin();
while (Begin != End) {
NodeWrapper *WrapperOp = Begin();
OrderedNode *RealNode = reinterpret_cast<OrderedNode*>(WrapperOp->GetPtr(ListBegin));
FEXCore::IR::IROp_Header *IROp = RealNode->Op(DataBegin);
NodeWrapper LocalNodeWrapper = NodeWrapper::WrapOffset(NodeLocationRemapper[WrapperOp->ID()] * sizeof(OrderedNode));
OrderedNode *LocalNode = reinterpret_cast<OrderedNode*>(LocalNodeWrapper.GetPtr(LocalListBegin));
FEXCore::IR::IROp_Header *LocalIROp = LocalNode->Op(LocalDataBegin);
// Now that we have the op copied over, we need to modify SSA values to point to the new correct locations
for (uint8_t i = 0; i < IROp->NumArgs; ++i) {
NodeWrapper OldArg = IROp->Args[i];
LogMan::Throw::A(NodeLocationRemapper[OldArg.ID()] != ~0U, "Tried remapping unfound node");
LocalIROp->Args[i].NodeOffset = NodeLocationRemapper[OldArg.ID()] * sizeof(OrderedNode);
}
++Begin;
}
// uintptr_t OldListSize = CurrentIR.GetListSize();
// uintptr_t OldDataSize = CurrentIR.GetDataSize();
//
// uintptr_t NewListSize = LocalIR.GetListSize();
// uintptr_t NewDataSize = LocalIR.GetDataSize();
//
// if (NewListSize < OldListSize ||
// NewDataSize < OldDataSize) {
// if (NewListSize < OldListSize) {
// LogMan::Msg::D("Shaved %ld bytes off the list size", OldListSize - NewListSize);
// }
// if (NewDataSize < OldDataSize) {
// LogMan::Msg::D("Shaved %ld bytes off the data size", OldDataSize - NewDataSize);
// }
// }
// if (NewListSize > OldListSize ||
// NewDataSize > OldDataSize) {
// LogMan::Msg::A("Whoa. Compaction made the IR a different size when it shouldn't have. 0x%lx > 0x%lx or 0x%lx > 0x%lx",NewListSize, OldListSize, NewDataSize, OldDataSize);
// }
Disp->CopyData(LocalBuilder);
return true;
}
FEXCore::IR::Pass* CreateIRCompaction() {
return new IRCompaction{};
}
}

View File

@ -0,0 +1,153 @@
#include "Interface/IR/PassManager.h"
#include "Interface/Core/OpcodeDispatcher.h"
#include <iostream>
namespace FEXCore::IR::Validation {
struct BlockInfo {
IR::NodeWrapper *Begin;
IR::NodeWrapper *End;
bool HasExit;
std::vector<IR::NodeWrapper*> Predecessors;
std::vector<IR::NodeWrapper*> Successors;
};
class IRValidation final : public FEXCore::IR::Pass {
public:
bool Run(OpDispatchBuilder *Disp) override;
private:
std::unordered_map<IR::NodeWrapper::NodeOffsetType, BlockInfo> OffsetToBlockMap;
};
bool IRValidation::Run(OpDispatchBuilder *Disp) {
bool HadError = false;
auto CurrentIR = Disp->ViewIR();
uintptr_t ListBegin = CurrentIR.GetListData();
uintptr_t DataBegin = CurrentIR.GetData();
IR::NodeWrapperIterator Begin = CurrentIR.begin();
IR::NodeWrapperIterator End = CurrentIR.end();
bool InBlock = false;
BlockInfo *CurrentBlock {};
std::ostringstream Errors;
while (Begin != End) {
NodeWrapper *WrapperOp = Begin();
OrderedNode *RealNode = reinterpret_cast<OrderedNode*>(WrapperOp->GetPtr(ListBegin));
FEXCore::IR::IROp_Header *IROp = RealNode->Op(DataBegin);
uint8_t OpSize = IROp->Size;
if (IROp->HasDest) {
HadError |= OpSize == 0;
if (OpSize == 0) {
Errors << "%ssa" << WrapperOp->NodeOffset << ": Had destination but with no size" << std::endl;
}
}
switch (IROp->Op) {
case OP_BEGINBLOCK: {
HadError |= InBlock;
if (InBlock) {
Errors << "BasicBlock " << WrapperOp->NodeOffset << ": Begin in middle of block" << std::endl;
}
auto Block = OffsetToBlockMap.try_emplace(WrapperOp->NodeOffset, BlockInfo{}).first;
CurrentBlock = &Block->second;
CurrentBlock->Begin = WrapperOp;
InBlock = true;
break;
}
case OP_ENDBLOCK: {
HadError |= !InBlock;
if (!InBlock) {
Errors << "BasicBlock " << WrapperOp->NodeOffset << ": End loose without a begin" << std::endl;
}
if (CurrentBlock) {
// XXX: Enable once fallthrough is handled
// HadError |= !CurrentBlock->HasExit && CurrentBlock->Successors.size() == 0;
// if (!CurrentBlock->HasExit && CurrentBlock->Successors.size() == 0) {
// Errors << "BasicBlock " << WrapperOp->NodeOffset << ": Didn't have an exit and didn't have any successors. (Fallthrough?)" << std::endl;
// }
CurrentBlock->End = WrapperOp;
CurrentBlock = nullptr;
}
InBlock = false;
break;
}
case IR::OP_EXITFUNCTION:
case IR::OP_ENDFUNCTION: {
if (CurrentBlock) {
CurrentBlock->HasExit = true;
}
break;
}
case IR::OP_CONDJUMP: {
auto Op = IROp->C<IR::IROp_CondJump>();
auto IterLocation = NodeWrapperIterator(ListBegin, Op->Header.Args[1]);
if (CurrentBlock) {
CurrentBlock->Successors.emplace_back(IterLocation());
}
OrderedNode *TargetNode = reinterpret_cast<OrderedNode*>(IterLocation()->GetPtr(ListBegin));
FEXCore::IR::IROp_Header *TargetOp = TargetNode->Op(DataBegin);
HadError |= TargetOp->Op != OP_BEGINBLOCK;
if (TargetOp->Op != OP_BEGINBLOCK) {
Errors << "CondJump " << WrapperOp->NodeOffset << ": CondJump to Op that isn't the begining of a block" << std::endl;
}
else {
auto Block = OffsetToBlockMap.try_emplace(IterLocation()->NodeOffset, BlockInfo{}).first;
Block->second.Predecessors.emplace_back(CurrentBlock->Begin);
}
break;
}
case IR::OP_JUMP: {
auto Op = IROp->C<IR::IROp_Jump>();
auto IterLocation = NodeWrapperIterator(ListBegin, Op->Header.Args[0]);
if (CurrentBlock) {
CurrentBlock->Successors.emplace_back(IterLocation());
}
OrderedNode *TargetNode = reinterpret_cast<OrderedNode*>(IterLocation()->GetPtr(ListBegin));
FEXCore::IR::IROp_Header *TargetOp = TargetNode->Op(DataBegin);
HadError |= TargetOp->Op != OP_BEGINBLOCK;
if (TargetOp->Op != OP_BEGINBLOCK) {
Errors << "Jump " << WrapperOp->NodeOffset << ": Jump to Op that isn't the begining of a block" << std::endl;
}
else {
auto Block = OffsetToBlockMap.try_emplace(IterLocation()->NodeOffset, BlockInfo{}).first;
Block->second.Predecessors.emplace_back(CurrentBlock->Begin);
}
break;
}
default:
//LogMan::Msg::A("Unknown IR Op: %d(%s)", IROp->Op, FEXCore::IR::GetName(IROp->Op).data());
break;
}
++Begin;
}
if (HadError) {
std::stringstream Out;
FEXCore::IR::Dump(&Out, &CurrentIR);
std::cerr << Errors.str() << std::endl << Out.str() << std::endl;
}
return false;
}
FEXCore::IR::Pass* CreateIRValidation() {
return new IRValidation{};
}
}

View File

@ -0,0 +1,66 @@
#include "Interface/IR/PassManager.h"
#include "Interface/Core/OpcodeDispatcher.h"
namespace FEXCore::IR {
class RedundantFlagCalculationEliminination final : public FEXCore::IR::Pass {
public:
bool Run(OpDispatchBuilder *Disp) override;
};
bool RedundantFlagCalculationEliminination::Run(OpDispatchBuilder *Disp) {
bool Changed = false;
auto CurrentIR = Disp->ViewIR();
uintptr_t ListBegin = CurrentIR.GetListData();
uintptr_t DataBegin = CurrentIR.GetData();
IR::NodeWrapperIterator Begin = CurrentIR.begin();
IR::NodeWrapperIterator End = CurrentIR.end();
std::array<OrderedNode*, 32> LastValidFlagStores{};
while (Begin != End) {
NodeWrapper *WrapperOp = Begin();
OrderedNode *RealNode = reinterpret_cast<OrderedNode*>(WrapperOp->GetPtr(ListBegin));
FEXCore::IR::IROp_Header *IROp = RealNode->Op(DataBegin);
if (IROp->Op == OP_BEGINBLOCK ||
IROp->Op == OP_ENDBLOCK ||
IROp->Op == OP_JUMP ||
IROp->Op == OP_CONDJUMP ||
IROp->Op == OP_EXITFUNCTION) {
// We don't track across block boundaries
LastValidFlagStores.fill(nullptr);
}
if (IROp->Op == OP_STOREFLAG) {
auto Op = IROp->CW<IR::IROp_StoreFlag>();
// If we have had a valid flag store previously and it hasn't been touched until this new store
// Then just delete the old one and let DCE to take care of the rest
if (LastValidFlagStores[Op->Flag] != nullptr) {
Disp->Unlink(LastValidFlagStores[Op->Flag]);
Changed = true;
}
// Set this node as the last one valid for this flag
LastValidFlagStores[Op->Flag] = RealNode;
}
else if (IROp->Op == OP_LOADFLAG) {
auto Op = IROp->CW<IR::IROp_LoadFlag>();
// If we loaded a flag then we can't track past this
LastValidFlagStores[Op->Flag] = nullptr;
}
++Begin;
}
return Changed;
}
FEXCore::IR::Pass* CreateRedundantFlagCalculationEliminination() {
return new RedundantFlagCalculationEliminination{};
}
}

View File

@ -0,0 +1,46 @@
#include "Interface/IR/PassManager.h"
#include "Interface/Core/OpcodeDispatcher.h"
#include "LogManager.h"
namespace FEXCore::IR {
class SyscallOptimization final : public FEXCore::IR::Pass {
public:
bool Run(OpDispatchBuilder *Disp) override;
};
bool SyscallOptimization::Run(OpDispatchBuilder *Disp) {
bool Changed = false;
auto CurrentIR = Disp->ViewIR();
uintptr_t ListBegin = CurrentIR.GetListData();
uintptr_t DataBegin = CurrentIR.GetData();
IR::NodeWrapperIterator Begin = CurrentIR.begin();
IR::NodeWrapperIterator End = CurrentIR.end();
while (Begin != End) {
NodeWrapper *WrapperOp = Begin();
OrderedNode *RealNode = reinterpret_cast<OrderedNode*>(WrapperOp->GetPtr(ListBegin));
FEXCore::IR::IROp_Header *IROp = RealNode->Op(DataBegin);
if (IROp->Op == FEXCore::IR::OP_SYSCALL) {
// Is the first argument a constant?
uint64_t Constant;
if (Disp->IsValueConstant(IROp->Args[0], &Constant)) {
// LogMan::Msg::A("Whoa. Syscall argument is constant: %ld", Constant);
Changed = true;
}
}
++Begin;
}
return Changed;
}
FEXCore::IR::Pass* CreateSyscallOptimization() {
return new SyscallOptimization{};
}
}

View File

@ -0,0 +1,61 @@
#include "LogManager.h"
#include "Interface/Memory/MemMapper.h"
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>
namespace FEXCore::Memory {
void *MemMapper::MapRegion(uint64_t Offset, size_t Size, bool Fixed) {
return MapRegion(Offset, Size, PROT_READ | PROT_WRITE, Fixed);
}
void *MemMapper::ChangeMappedRegion(uint64_t Offset, size_t Size, uint32_t Flags, bool Fixed) {
uintptr_t PtrOffset = reinterpret_cast<uintptr_t>(SHM->Object.Ptr) + Offset;
void *Ptr = mmap(reinterpret_cast<void*>(PtrOffset), Size, Flags,
MAP_POPULATE | MAP_SHARED | (Fixed ? MAP_FIXED : 0), SHM->SHMFD, Offset);
if (Ptr == MAP_FAILED) {
LogMan::Msg::A("Failed to map memory region [0x%lx, 0x%lx)", Offset, Offset + Size);
return nullptr;
}
return Ptr;
}
void *MemMapper::MapRegion(uint64_t Offset, size_t Size, uint32_t Flags, bool Fixed) {
uintptr_t PtrOffset = reinterpret_cast<uintptr_t>(SHM->Object.Ptr) + Offset;
void *Ptr = mmap(reinterpret_cast<void*>(PtrOffset), Size, Flags,
MAP_SHARED | (Fixed ? MAP_FIXED : 0), SHM->SHMFD, Offset);
if (Ptr == MAP_FAILED) {
LogMan::Msg::A("Failed to map memory region [0x%lx, 0x%lx)", Offset, Offset + Size);
return nullptr;
}
MappedRegions.emplace_back(MemRegion{Ptr, Offset, Size});
return Ptr;
}
void MemMapper::UnmapRegion(void *Ptr, size_t Size) {
auto it = std::find(MappedRegions.begin(), MappedRegions.end(), Ptr);
if (it != MappedRegions.end()) {
munmap(Ptr, Size);
MappedRegions.erase(it);
}
}
void *MemMapper::GetPointer(uint64_t Offset) {
for (auto const &Region : MappedRegions) {
if (Region.contains(Offset)) {
return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(Region.Ptr) + (Offset - Region.Offset));
}
}
return nullptr;
}
}

View File

@ -0,0 +1,44 @@
#pragma once
#include "Interface/Memory/SharedMem.h"
#include <FEXCore/Memory/MemMapper.h>
#include <stdint.h>
#include <vector>
namespace FEXCore::Context {
struct Context;
}
namespace FEXCore::Memory {
class MemMapper final {
friend struct FEXCore::Context::Context;
public:
void SetBaseRegion(FEXCore::SHM::SHMObject *NewSHM) {
SHM = reinterpret_cast<FEXCore::SHM::InternalSHMObject*>(NewSHM);
}
void *MapRegion(uint64_t Offset, size_t Size, bool Fixed = true);
void *MapRegion(uint64_t Offset, size_t Size, uint32_t Flags, bool Fixed = true);
void *ChangeMappedRegion(uint64_t Offset, size_t Size, uint32_t Flags, bool Fixed = true);
void UnmapRegion(void *Ptr, size_t Size);
void *GetMemoryBase() { return SHM->Object.Ptr; }
void *GetPointer(uint64_t Offset);
template<typename T>
T GetPointer(uint64_t Offset) {
return reinterpret_cast<T>(GetPointer(Offset));
}
template<typename T>
T GetBaseOffset(uint64_t Offset) {
return reinterpret_cast<T>((reinterpret_cast<uintptr_t>(GetMemoryBase()) + Offset));
}
private:
FEXCore::SHM::InternalSHMObject *SHM;
std::vector<FEXCore::Memory::MemRegion> MappedRegions{};
};
}

View File

@ -0,0 +1,61 @@
#include "LogManager.h"
#include "Interface/Memory/SharedMem.h"
#include <cstddef>
#include <cstdint>
#include <fcntl.h>
#include <string>
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>
namespace FEXCore::SHM {
void *MapRegionFlags(InternalSHMObject *SHM, size_t Offset, size_t Size, uint32_t flags, bool Fixed) {
uintptr_t PtrOffset = reinterpret_cast<uintptr_t>(SHM->Object.Ptr) + Offset;
void *Ptr = mmap(reinterpret_cast<void*>(PtrOffset), Size, flags,
MAP_PRIVATE | (Fixed ? MAP_FIXED : 0), SHM->SHMFD, Offset);
if (Ptr == MAP_FAILED) {
LogMan::Msg::A("Failed to map memory region [0x%lx, 0x%lx)", Offset, Offset + Size);
return nullptr;
}
return Ptr;
}
SHMObject *AllocateSHMRegion(size_t Size) {
InternalSHMObject *SHM = new InternalSHMObject{};
const std::string SHMName = "FEXCore" + std::to_string(getpid());
SHM->SHMFD = shm_open(SHMName.c_str(), O_RDWR | O_CREAT | O_EXCL, 0600);
if (SHM->SHMFD == -1) {
LogMan::Msg::E("Couldn't open SHM");
goto err;
}
// Unlink the SHM file immediately so it doesn't get left around
shm_unlink(SHMName.c_str());
// Extend the SHM to the size we requested
if (ftruncate(SHM->SHMFD, Size) != 0) {
LogMan::Msg::E("Couldn't set SHM size");
goto err;
}
SHM->Object.Ptr = MapRegionFlags(SHM, 0, Size, PROT_READ | PROT_WRITE, false);
if (SHM->Object.Ptr == nullptr) {
goto err;
}
return &SHM->Object;
err:
delete SHM;
return nullptr;
}
void DestroyRegion(SHMObject *SHM) {
InternalSHMObject *Obj = reinterpret_cast<InternalSHMObject*>(SHM);
close(Obj->SHMFD);
delete Obj;
}
}

View File

@ -0,0 +1,12 @@
#pragma once
#include <FEXCore/Memory/SharedMem.h>
#include <stddef.h>
#include <stdint.h>
namespace FEXCore::SHM {
struct InternalSHMObject {
SHMObject Object;
int SHMFD;
};
}

View File

@ -0,0 +1,16 @@
set (NAME IRTest)
set (SRCS IRTest.cpp)
add_executable(${NAME} ${SRCS})
add_dependencies(${NAME} IR_INC)
target_link_libraries(${NAME} ${PROJECT_NAME} SonicUtils)
set (NAME LLVMIRTest)
set (SRCS LLVMIRTest.cpp)
add_executable(${NAME} ${SRCS})
add_dependencies(${NAME} IR_INC)
target_link_libraries(${NAME} ${PROJECT_NAME} SonicUtils)

11
Source/Test/IRTest.cpp Normal file
View File

@ -0,0 +1,11 @@
#include <FEXCore/IR/IntrusiveIRList.h>
#include "LogManager.h"
#include <cstdio>
#include <cstdint>
#include <list>
#include <set>
int main(int argc, char **argv) {
printf("IR Test\n");
}

View File

@ -0,0 +1,78 @@
#include <llvm-c/Core.h>
#include <llvm/ExecutionEngine/ExecutionEngine.h>
#include <llvm/InitializePasses.h>
#include <llvm/IR/IRBuilder.h>
#include <llvm/IR/IRPrintingPasses.h>
#include <llvm/IR/LLVMContext.h>
#include <llvm/IR/LegacyPassManager.h>
#include <llvm/IR/Verifier.h>
#include <llvm/Support/raw_ostream.h>
#include <llvm/Support/TargetSelect.h>
#include <llvm/Transforms/IPO/PassManagerBuilder.h>
#include <llvm/Transforms/Scalar.h>
#include <llvm/Transforms/Vectorize.h>
#include <chrono>
#include <cstdio>
int main(int argc, char **argv) {
printf("LLVM Test\n");
llvm::InitializeNativeTarget();
llvm::InitializeNativeTargetAsmPrinter();
auto ContextRef = LLVMContextCreate();
auto Con = *llvm::unwrap(&ContextRef);
auto MainModule = new llvm::Module("Main Module", *Con);
auto IRBuilder = new llvm::IRBuilder<>(*Con);
using namespace llvm;
Type *i64 = Type::getInt64Ty(*Con);
auto FunctionType = FunctionType::get(Type::getVoidTy(*Con),
{
i64,
}, false);
legacy::PassManager PM;
PassManagerBuilder PMBuilder;
PMBuilder.OptLevel = 3;
PMBuilder.populateModulePassManager(PM);
std::string Empty;
auto start = std::chrono::high_resolution_clock::now();
for (int i = 0; i < 10000; ++i)
{
auto Func = Function::Create(FunctionType,
Function::ExternalLinkage,
Empty,
MainModule);
Func->setCallingConv(CallingConv::C);
{
auto Entry = BasicBlock::Create(*Con, Empty, Func);
IRBuilder->SetInsertPoint(Entry);
auto ExitBlock = BasicBlock::Create(*Con, Empty, Func);
IRBuilder->SetInsertPoint(ExitBlock);
IRBuilder->CreateRetVoid();
IRBuilder->SetInsertPoint(Entry);
IRBuilder->CreateBr(ExitBlock);
}
//printf("i: %d\n", i);
PM.run(*MainModule);
auto FunctionList = &MainModule->getFunctionList();
FunctionList->clear();
}
auto end = std::chrono::high_resolution_clock::now();
auto diff = end - start;
printf("Took %ld(%ldms) nanoseconds\n", diff.count(), std::chrono::duration_cast<std::chrono::milliseconds>(diff).count());
return 0;
}

29
docs/CPUBackends.md Normal file
View File

@ -0,0 +1,29 @@
# FEXCore CPU Backends
---
FEXCore supports multiple CPU emulation backends. All of which ingest the IR that we have been generating.
## IR Interpreter
The first one is the easiest. This just walks the IR list and interprets the IR as it goes through it. It isn't meant to be fast and is for debugging purposes.
This is used to easily inspect what is going on with the code generation and making sure logic is sound. Will most likely last in to perpetuity since it isn't exactly difficult to maintain and it is useful to have around
## IR JIT
**Not yet implemented**
This is meant to be our first JIT of call and will serve multiple purposes. It'll be the JIT that is used for our runtime compilation of code.
This means it needs to be fast during compilation and have decent runtime performance.
Good chance that we will need to implement multiple of these depending on host architecture with some code reuse between them.
This JIT will also be what we use for gathering sampling data for passing off to our LLVM JIT for tiered recompilation and offline compilation later.
Should use xbyak for our x86-64 host and Vixl for our AArch64 host. For other targets in the future we will see what is available
## LLVM JIT
This is the last JIT that should theoretically generate the most optimal code for us.
This *should* be used for a tiered recompiler system using sampling data from the IR JIT.
Currently it just supports being a regular JIT core. There are still some hard problems that need to be solved with this JIT since LLVM isn't quite ideal for generating code for a JIT.
# Future ideas
---
* Support a custom ABI on the LLVM JIT to generate more optimal code that is shared between the IR JIT and LLVM JIT
* This can let us do fun things like reserve host registers for guest register state. Trivial in the IR JIT, not so much for LLVM.
* Needs a local build of LLVM that we statically link in.
* Create an inline ASM or JIT'd dispatcher loop. Will allow our JITs to be more optimal by reserving more registers for guest state.
* WebAssmembly or other browser language?
* Might allow decent runtime performance of things emulated in a browser. Could be interesting.

17
docs/CustomCPUBackend.md Normal file
View File

@ -0,0 +1,17 @@
# FEXCore custom CPU backends
---
Custom CPU backends can be useful for testing purposes or wanting to support situations that FEXCore doesn't currently understand.
The FEXCore::Context namespace provides a `SetCustomCPUBackendFactory` function for providing a factory function pointer to the core. This function will be used if the `DEFAULTCORE` configuration option is set to `CUSTOM`.
If the guest code creates more threads then the CPU factory function will be invoked for creating a CPUBackend per thread. If you don't want a unique CPUBackend object per thread then that needs to be handled by the user.
It's recommended to store the pointers provided to the factory function for later use.
`FEXCore::Context::Context*` - Is a pointer to previously generated context object
`FEXCore::Core::ThreadState*` - Is a pointer to a thread's state. Lives for as long as the guest thread is alive.
To use this factory, one must override the provided `FEXCore::CPU::CPUBackend` class with a custom one. This factory function then should return a newly allocated class.
`FEXCore::CPU::CPUBackend::GetName` - Returns an `std::string` for the name of this core
`FEXCore::CPU::CPUBackend::CompileCode` - Provides the CPUBackend with potentially an IR and DebugData for compiling code. Returns a pointer that needs to be long lasting to a piece of code that will be executed for the particular RIP.
Both IR and DebugData can be null if `NeedsOpDispatch` returns false
`FEXCore::CPU::CPUBackend::MapRegion` - This function needs to be implemented if the CPUBackend needs to map host facing memory in to the backend. Allows setting up virtual memory mapping if required
`FEXCore::CPU::CPUBackend::Initialize` - Called after the guest memory is initialized and all state is ready for the code to start initializing. Gets called just before the CPUBackend starts executing code for the first time.
`FEXCore::CPU::CPUBackend::NeedsOpDispatch` - Tells FEXCore if the backend needs the FEXCore IR and DebugData provided to it. This can be useful if FEXCore hits something it doesn't understand but it doesn't matter since the CPUBackend can still understand it from raw x86-64 (ex VM based CPU backend).

43
docs/Frontend.md Normal file
View File

@ -0,0 +1,43 @@
# FEXCore Frontend
---
The FEXCore frontend's job is to translate an incoming x86-64 instruction stream in to a more easily digested version of x86.
This effectively expands x86-64 instruction encodings to be more easily ingested later on in the process.
This ends up being essential to allowing our IR translation step to be less strenious. It can decode a "common" expanded instruction format rather than various things that x86-supports.
For a simple example, x86-64's primary op table has ALU ops that duplicate themselves at least six times with minor differences between each. The frontend is able to decode a large amount of these ops to the "same" op that the IR translation understands more readily.
This works for most instructions that follow a common decoding scheme, although there are instructions that don't follow the rules and must be handled explicitly elsewhere.
An example of decoded instructions:
```
00 C0: add al,al
04 01: add al, 0x1
```
These two instructions have a different encoding scheme but they are just an add.
They end up decoding to a generic format with the same destination operand but different sources.
May look subtle but there end up being far more complex cases and we don't want to handle hundreds of instructions differently.
After the frontend is done decoding the instruction stream, it passes the output over to the OpDispatcher for translating to our IR.
## Multiblock
---
The Frontend has an additional duty. Since it is the main piece of code that understands the guest x86-64 code; It is also what does analysis of control flow to determine if we can end up compiling multiple blocks of guest code.
The Frontend already has to determine if it has hit a block ending instruction. This is anything that changes control flow. This feeds in to the analysis system to look at conditional branches to see if we can keep compiling code at the target location in the same functional unit.
Short example:
```
test eax, eax
jne .Continue
ret <--- We can continue past this instruction, which is an unconditional block ender
.Continue:
```
These sorts of patterns crop up extensively in compiled code. A large amount of traditional JITs will end up ending the block at any sort of conditional branch instruction.
If the analysis can determine the target conditional branch location, we can then know that the code can keep compiling past an unconditional block ender instruction.
This works for both backwards branches and forward branches.
### Additional reading
---
There are other emulators out there that implement multiblock JIT compilation with some success.
The best example of this that I know of is the [Dolphin GameCube and Wii Emulator](https://github.com/dolphin-emu/dolphin) Where I implemented the initial multiblock implementation.
One of the major limitations with a console emulator is that you can run in to infinite loops on backedges when using multiblock compilation. This is due to console emulation being able to run an infinite loop and let Interrupts or some other state cause it to break out.
Luckily since we are a userspace emulator we don't have to deal with this problem. If an application has written an infinite loop, then without another thread running, it'll be a true infinite loop.
Additionally luckily is that we are going to emulate the strong memory model of x86-64 and also support true threads, this will mean that we don't need to do any manual thread scheduling in our emulator and switch between virtual threads.

32
docs/IR.md Normal file
View File

@ -0,0 +1,32 @@
# FEXCore IR
---
The IR for the FEXCore is an SSA based IR that is generated from the incoming x86-64 assembly.
SSA is quite nice to work with when translating the x86-64 code to the IR, when optimizing that code with custom optimization passes, and also passing that IR to our CPU backends.
## Emulation IR considerations
* We have explicitly sized IR variables
* Supports traditional element sizes of 1,2,4,8 bytes and some 16byte ops
* Supports arbitrary number of vector elements
* The op determines if something is float or integer based.
* Clear separation of scalar IR ops and vector IR ops
* ex, MUL versus VMUL
* We have explicit Load/Store context IR ops
* This allows us to have a clear separation between guest memory and tracked x86-64 state
* We have an explicit CPUID IR op
* This allows us to return fairly complex data (4 registers of data) and also having an easier optimization for constant CPUID functions
* So if we const-prop the CPUID function then it'll just const-prop further along
* We have an explicit syscall op
* The syscall op is fairly complex as well, same with CPUID that if the syscall function is const-prop then we can directly call the syscall handler
* Can save overhead by removing call overheads
* The IR supports branching from one block to another
* Has a conditional branch instruction that either branches to the target branch or falls through to the next block
* Has an unconditional branch to explicitly jump to a block instead of falling through
* **There is a desire to follow LLVM semantics around block limitations but it isn't currently strictly enforced**
* Supports a debug ```Print``` Op for printing out values for debug viewing
* Supports explicit Load/Store memory IR ops
* This is for accessing guest memory and will do the memory offset translation in to the VM's memory space
* This is done by just adding the VM memory base to the 64bit address passed in
* This is done in a manner that the application **can** escape from the VM and isn't meant to be safe
* There is an option for JITs to validate the memory region prior to accessing for ensuring correctness
* IR is generated from a JSON file, fairly straightforward to extend.
* Read the python generation file to determine the extent of what it can do

47
docs/OpDispatcher.md Normal file
View File

@ -0,0 +1,47 @@
# FEXCore OpDispatcher
---
The OpDispatcher is the step of the recompiler that takes the output from the Frontend and translates it to our IR.
Since the x86-64 instruction set is so large (>1000 instructions in the current FEXCore tables) we need to reduce this down to something more manageable.
We will ingest our decoded x86-64 instructions and translate them down to more basic IR operations. The number of IR ops are currently in the dozens which is a lot easier to handle.
Once we have translated to the IR then we need to pass the IR over to optimization passes or our JIT cores.
Ex:
```
mov rax,0x1
mov rdi,0x1
mov rsi,0x20
mov rdx,0x1
syscall
hlt
```
Translates to the IR of:
```
BeginBlock
%ssa8 i32 = Constant 0x1
StoreContext 0x8, 0x8, %ssa8
%ssa64 i32 = Constant 0x1
StoreContext 0x8, 0x30, %ssa64
%ssa120 i32 = Constant 0x1f
StoreContext 0x8, 0x28, %ssa120
%ssa176 i32 = Constant 0x1
StoreContext 0x8, 0x20, %ssa176
%ssa232 i64 = LoadContext 0x8, 0x8
%ssa264 i64 = LoadContext 0x8, 0x30
%ssa296 i64 = LoadContext 0x8, 0x28
%ssa328 i64 = LoadContext 0x8, 0x20
%ssa360 i64 = LoadContext 0x8, 0x58
%ssa392 i64 = LoadContext 0x8, 0x48
%ssa424 i64 = LoadContext 0x8, 0x50
%ssa456 i64 = Syscall%ssa232, %ssa264, %ssa296, %ssa328, %ssa360, %ssa392, %ssa424
StoreContext 0x8, 0x8, %ssa456
BeginBlock
EndBlock 0x1e
ExitFunction
```
### Multiblock
---
An additional duty of the OpDispatcher is to handle the metadata that the Frontend provides for supporting multiblock.
The IR provides most of the functionality required for supporting robust branching and function creation required for generating large blocks of code translated from x86-64 emulation.
This is required since in the ideal situation we will be doing function level translation of x86-64 guest code to our IR.
The IR is currently lacking any idea of flags or PHI nodes, which can be problematic when optimizing branch heavy code. The good thing is that the LLVM JIT can use a mem to reg pass to minimize a large number of this code.
It **will** be required to improve the IR further once the runtime JIT becomes a higher priority

View File

@ -0,0 +1,36 @@
# FEXCore IR Optimization passes
---
**This is very much a WIP since these optimization passes aren't in code yet**
## Pass Managers
* Need Function level optimization pass manager
* Need block level optimization pass manager
### Dead Store Elimination
We need to do dead store elimination because LLVM can't always handle elimination of our loadstores
This is very apparent when we are doing flag calculations and LLVM isn't able to remove them
This is mainly just an issue around the context loadstores.
We will want this more when the IRJIT comes online.
### Dead flag elimination
X86-64 is a fairly disguting ISA in that it calculates a bunch of flags on almost all instructions.
We need eliminate redundant flag calculations that end up being being overwritten without being used.
This happens *constantly* and in most cases the flag calculation takes significantly more work than the basic op by itself
Good chance that breaking out the flags to independent memory locations will make this easier. Or just adding ops for flag handling.
### Dead Code Elimination
There are a lot of cases that code will be generated that is immediately dead afterwards.
Flag calculation elimination will produce a lot of dead code that needs to get removed.
Additionally there are a decent amount of x86-64 instructions that store their results in to multiple registers and then the next instruction overwrites one of those instructions.
Multiply and Divide being a big one, since x86 calculates these at higher precision.
These can rely significantly tracking liveness between LoadContext and StoreContext ops
### ABI register elimination pass
This one is very fun and will reduce a decent amount of work that the JIT needs to do.
When we are targeting a specific x86-64 ABI and we know that we have translated a block of code that is the entire function.
We can eliminate stores to the context that by ABI standards is a temporary register.
We will be able to know exactly that these are dead and just remove the store (and run all the passes that optimize the rest away afterwards).
### Loadstore coalescing pass
Large amount of x86-64 instructions load or store registers in order from the context.
We can merge these in to loadstore pair ops to improve perf
### Function level heuristic pass
Once we know that a function is a true full recompile we can do some additional optimizations.
Remove any final flag stores. We know that a compiler won't pass flags past a function call boundry(It doesn't exist in the ABI)
Remove any loadstores to the context mid function, only do a final store at the end of the function and do loads at the start. Which means ops just map registers directly throughout the entire function.
### SIMD coalescing pass?
When operating on older MMX ops(64bit SIMD) and they may end up up generating some independent ops that can be coalesced in to a 128bit op

View File

@ -0,0 +1,24 @@
#pragma once
#include <FEXCore/Core/Context.h>
#include <stdint.h>
namespace FEXCore::Config {
enum ConfigOption {
CONFIG_MULTIBLOCK,
CONFIG_MAXBLOCKINST,
CONFIG_DEFAULTCORE,
CONFIG_VIRTUALMEMSIZE,
CONFIG_SINGLESTEP,
};
enum ConfigCore {
CONFIG_INTERPRETER,
CONFIG_IRJIT,
CONFIG_LLVMJIT,
CONFIG_CUSTOM,
};
void SetConfig(FEXCore::Context::Context *CTX, ConfigOption Option, uint64_t Config);
uint64_t GetConfig(FEXCore::Context::Context *CTX, ConfigOption Option);
}

View File

@ -0,0 +1,74 @@
#pragma once
#include <stdint.h>
#include <string>
namespace FEXCore {
namespace IR {
template<bool Copy>
class IRListView;
}
namespace Core {
struct DebugData;
}
namespace CPU {
class InterpreterCore;
class JITCore;
class LLVMCore;
class CPUBackend {
public:
virtual ~CPUBackend() = default;
/**
* @return The name of this backend
*/
virtual std::string GetName() = 0;
/**
* @brief Tells this CPUBackend to compile code for the provided IR and DebugData
*
* The returned pointer needs to be long lived and be executable in the host environment
* FEXCore's frontend will store this pointer in to a cache for the current RIP when this was executed
*
* This is a thread specific compilation unit since there is one CPUBackend per guest thread
*
* If NeedsOpDispatch is returning false then IR and DebugData may be null and the expectation is that the code will still compile
* FEXCore::Core::ThreadState* is valid at the time of compilation.
*
* @param IR - IR that maps to the IR for this RIP
* @param DebugData - Debug data that is available for this IR indirectly
*
* @return An executable function pointer that is theoretically compiled from this point.
* Is actually a function pointer of type `void (FEXCore::Core::ThreadState *Thread)
*/
virtual void *CompileCode(FEXCore::IR::IRListView<true> const *IR, FEXCore::Core::DebugData *DebugData) = 0;
/**
* @brief Function for mapping memory in to the CPUBackend's visible space. Allows setting up virtual mappings if required
*
* @return Currently unused
*/
virtual void *MapRegion(void *HostPtr, uint64_t GuestPtr, uint64_t Size) = 0;
/**
* @brief This is post-setup initialization that is called just before code executino
*
* Guest memory is available at this point and ThreadState is valid
*/
virtual void Initialize() {}
/**
* @brief Lets FEXCore know if this CPUBackend needs IR and DebugData for CompileCode
*
* This is useful if the FEXCore Frontend hits an x86-64 instruction that isn't understood but can continue regardless
*
* This is useful for example, a VM based CPUbackend
*
* @return true if it needs the IR
*/
virtual bool NeedsOpDispatch() = 0;
};
}
}

View File

@ -0,0 +1,78 @@
#pragma once
#include <cstdint>
#include <functional>
namespace FEXCore {
/**
* @brief Code loader class so the CPU backend can load code in a generic fashion
*
* This class is expected to have multiple different style of code loaders
*/
class CodeLoader {
public:
/**
* @brief CPU Core uses this to choose what the stack size should be for this code
*/
virtual uint64_t StackSize() const = 0;
/**
* @brief Allows the code loader to set up the stack the way it wants
*
* @param HostPtr The host facing pointer to the base of the stack.
* Size of memory will be at least the size that StackSize() returns
*
* @param GuestPtr The guest facing memory location where the base of the stack lives
*
* @return The location that the guest stack pointer register should be set to
*
* Probably will be GuestPtr + StackSize() - <Some amount>
*/
virtual uint64_t SetupStack(void *HostPtr, uint64_t GuestPtr) const = 0;
/**
* @brief Function to return the guest RIP that the code should start out at
*/
virtual uint64_t DefaultRIP() const = 0;
using MemoryLayout = std::tuple<uint64_t, uint64_t, uint64_t>;
/**
* @brief Gets the default memory layout of the memory object being loaded
*
* This will be mapped in to the guest memory space automatically
*
* @return A MemoryLayout object describing the layout of the region
*/
virtual MemoryLayout GetLayout() const = 0;
/**
* @brief Allows the loader to map memory regions that it needs
*
* Code loader is expected to call the Mapper function with a memory offset and size for mapping
*
* @param Mapper Returns the host facing pointer for memory setup if the codfe loader needs to do things to it
*/
virtual void MapMemoryRegion(std::function<void*(uint64_t, uint64_t)> Mapper) {}
/**
* @brief Memory writer function for loading code in to guest memory
*
* First argument = Data to write
* Second argument = Guest memory data location
* Third argument = Guest memory size
*/
using MemoryWriter = std::function<void(void const*, uint64_t, uint64_t)>;
virtual void LoadMemory(MemoryWriter Writer) = 0;
/**
* @brief Get the final RIP we are supposed to end up on in a debugger
*
* @return When the debugger reaches this RIP then we know that we have completed
*/
virtual uint64_t GetFinalRIP() { return ~0ULL; }
virtual char const *FindSymbolNameInRange(uint64_t Address) { return nullptr; }
};
}

View File

@ -0,0 +1,193 @@
#pragma once
#include <functional>
#include <stdint.h>
namespace FEXCore {
class CodeLoader;
}
namespace FEXCore::Core {
struct CPUState;
struct ThreadState;
}
namespace FEXCore::CPU {
class CPUBackend;
}
namespace FEXCore::HLE {
struct SyscallArguments;
class SyscallVisitor;
}
namespace FEXCore::SHM {
struct SHMObject;
}
namespace FEXCore::Context {
struct Context;
enum ExitReason {
EXIT_NONE,
EXIT_WAITING,
EXIT_ASYNC_RUN,
EXIT_SHUTDOWN,
EXIT_DEBUG,
EXIT_UNKNOWNERROR,
};
using CustomCPUFactoryType = std::function<FEXCore::CPU::CPUBackend* (FEXCore::Context::Context*, FEXCore::Core::ThreadState *Thread)>;
/**
* @brief This initializes internal FEXCore state that is shared between contexts and requires overhead to setup
*/
void InitializeStaticTables();
/**
* @brief [[threadsafe]] Create a new FEXCore context object
*
* This is necessary to do when running threaded contexts
*
* @return a new context object
*/
FEXCore::Context::Context *CreateNewContext();
/**
* @brief Post creation context initialization
* Once configurations have been set, do the post-creation initialization with that configuration
*
* @param CTX The context that we created
*
* @return true if we managed to initialize correctly
*/
bool InitializeContext(FEXCore::Context::Context *CTX);
/**
* @brief Destroy the context object
*
* @param CTX
*/
void DestroyContext(FEXCore::Context::Context *CTX);
/**
* @brief Adds a base pointer that the VM can use for "physical" memory backing
*
* Will be the guests physical memory location of zero
*
* @return true on added. false when we had already added a guest memory region
*/
bool AddGuestMemoryRegion(FEXCore::Context::Context *CTX, FEXCore::SHM::SHMObject *SHM);
/**
* @brief Allows setting up in memory code and other things prior to launchign code execution
*
* @param CTX The context that we created
* @param Loader The loader that will be doing all the code loading
*
* @return true if we loaded code
*/
bool InitCore(FEXCore::Context::Context *CTX, FEXCore::CodeLoader *Loader);
void SetApplicationFile(FEXCore::Context::Context *CTX, std::string const &File);
/**
* @brief Starts running the CPU core
*
* If WaitForIdle is enabled then this call will block until the thread exits or if single stepping is enabled, after the core steps one instruction
*
* @param CTX The context that we created
* @param WaitForIdle Should we wait for the core to be idle or not
*
* @return The ExitReason for the parentthread. ASYNC_RUN if WaitForIdle was false
*/
ExitReason RunLoop(FEXCore::Context::Context *CTX, bool WaitForIdle);
/**
* @brief [[threadsafe]] Returns the ExitReason of the parent thread. Typically used for async result status
*
* @param CTX The context that we created
*
* @return The ExitReason for the parentthread
*/
ExitReason GetExitReason(FEXCore::Context::Context *CTX);
/**
* @brief [[theadsafe]] Checks if the Context is either done working or paused(in the case of single stepping)
*
* Use this when the context is async running to determine if it is done
*
* @param CTX the context that we created
*
* @return true if the core is done or paused
*/
bool IsDone(FEXCore::Context::Context *CTX);
/**
* @brief Gets a copy the CPUState of the parent thread
*
* @param CTX The context that we created
* @param State The state object to populate
*/
void GetCPUState(FEXCore::Context::Context *CTX, FEXCore::Core::CPUState *State);
/**
* @brief Copies the CPUState provided to the parent thread
*
* @param CTX The context that we created
* @param State The satate object to copy from
*/
void SetCPUState(FEXCore::Context::Context *CTX, FEXCore::Core::CPUState *State);
void Pause(FEXCore::Context::Context *CTX);
/**
* @brief Allows the frontend to pass in a custom CPUBackend creation factory
*
* This allows the frontend to have its own frontend. Typically for debugging
*
* @param CTX The context that we created
* @param Factory The factory that the context will call if the DefaultCore config ise set to CUSTOM
*/
void SetCustomCPUBackendFactory(FEXCore::Context::Context *CTX, CustomCPUFactoryType Factory);
/**
* @brief Allows a custom CPUBackend creation factory for fallback routines when the main CPUBackend core can't handle an instruction
*
* This is only useful for debugging new instruction decodings that FEXCore doesn't understand
* The CPUBackend that is created from this factory must have its NeedsOpDispatch function to return false
*
* @param CTX The context that we created
* @param Factory The factory that the context will call on core creation
*/
void SetFallbackCPUBackendFactory(FEXCore::Context::Context *CTX, CustomCPUFactoryType Factory);
/**
* @brief This allows a frontend core to call Syscall routines directly. Useful for debugging
*
* @param CTX The context that we created
* @param Thread The thread to run the syscall on
* @param Args The arguments to the syscall
*
* @return The value that a syscall returns
*/
uint64_t HandleSyscall(FEXCore::Context::Context *CTX, FEXCore::Core::ThreadState *Thread, FEXCore::HLE::SyscallArguments *Args);
/**
* @brief Sets up memory regions on the guest for mirroring within the guest's VM space
*
* @param VirtualAddress The address we want to set to mirror a physical memory region
* @param PhysicalAddress The physical memory region we are mapping
* @param Size Size of the region to mirror
*
* @return true when successfully mapped. false if there was an error adding
*/
bool AddVirtualMemoryMapping(FEXCore::Context::Context *CTX, uint64_t VirtualAddress, uint64_t PhysicalAddress, uint64_t Size);
/**
* @brief Allows the frontend to set a custom syscall handler
*
* Useful for debugging purposes. May not work if the syscall ID exceeds the maximum number of syscalls in the lookup table
*
* @param Syscall Which syscall ID to install a visitor to
* @param Visitor The Visitor to install
*/
void RegisterExternalSyscallVisitor(FEXCore::Context::Context *CTX, uint64_t Syscall, FEXCore::HLE::SyscallVisitor *Visitor);
}

View File

@ -0,0 +1,39 @@
#pragma once
#include <FEXCore/HLE/Linux/ThreadManagement.h>
#include <atomic>
#include <cstddef>
#include <stdint.h>
#include <string_view>
namespace FEXCore::Core {
struct __attribute__((packed)) CPUState {
uint64_t rip; ///< Current core's RIP. May not be entirely accurate while JIT is active
uint64_t gregs[16];
uint64_t : 64;
uint64_t xmm[16][2];
uint64_t gs;
uint64_t fs;
uint8_t flags[32];
};
static_assert(offsetof(CPUState, xmm) % 16 == 0, "xmm needs to be 128bit aligned!");
struct __attribute__((packed)) ThreadState {
CPUState State{};
struct {
std::atomic_bool Running {false};
std::atomic_bool ShouldStop {false};
std::atomic_bool ShouldPause {false};
std::atomic_bool WaitingToStart {false};
} RunningEvents;
FEXCore::HLE::ThreadManagement ThreadManager;
uint8_t InternalState[0];
};
static_assert(offsetof(ThreadState, State) == 0, "CPUState must be first member in threadstate");
constexpr uint64_t PAGE_SIZE = 4096;
std::string_view const& GetFlagName(unsigned Flag);
}

View File

@ -0,0 +1,63 @@
#pragma once
namespace FEXCore::X86State {
/**
* @name The ordered of the GPRs from name to index
* @{ */
constexpr unsigned REG_RAX = 0;
constexpr unsigned REG_RBX = 1;
constexpr unsigned REG_RCX = 2;
constexpr unsigned REG_RDX = 3;
constexpr unsigned REG_RSI = 4;
constexpr unsigned REG_RDI = 5;
constexpr unsigned REG_RBP = 6;
constexpr unsigned REG_RSP = 7;
constexpr unsigned REG_R8 = 8;
constexpr unsigned REG_R9 = 9;
constexpr unsigned REG_R10 = 10;
constexpr unsigned REG_R11 = 11;
constexpr unsigned REG_R12 = 12;
constexpr unsigned REG_R13 = 13;
constexpr unsigned REG_R14 = 14;
constexpr unsigned REG_R15 = 15;
constexpr unsigned REG_XMM_0 = 16;
constexpr unsigned REG_XMM_1 = 17;
constexpr unsigned REG_XMM_2 = 18;
constexpr unsigned REG_XMM_3 = 19;
constexpr unsigned REG_XMM_4 = 20;
constexpr unsigned REG_XMM_5 = 21;
constexpr unsigned REG_XMM_6 = 22;
constexpr unsigned REG_XMM_7 = 23;
constexpr unsigned REG_XMM_8 = 24;
constexpr unsigned REG_XMM_9 = 25;
constexpr unsigned REG_XMM_10 = 26;
constexpr unsigned REG_XMM_11 = 27;
constexpr unsigned REG_XMM_12 = 28;
constexpr unsigned REG_XMM_13 = 29;
constexpr unsigned REG_XMM_14 = 30;
constexpr unsigned REG_XMM_15 = 31;
constexpr unsigned REG_INVALID = 255;
/** @} */
/**
* @name RFLAG register bit locations
* @{ */
constexpr unsigned RFLAG_CF_LOC = 0;
constexpr unsigned RFLAG_PF_LOC = 2;
constexpr unsigned RFLAG_AF_LOC = 4;
constexpr unsigned RFLAG_ZF_LOC = 6;
constexpr unsigned RFLAG_SF_LOC = 7;
constexpr unsigned RFLAG_TF_LOC = 8;
constexpr unsigned RFLAG_IF_LOC = 9;
constexpr unsigned RFLAG_DF_LOC = 10;
constexpr unsigned RFLAG_OF_LOC = 11;
constexpr unsigned RFLAG_IOPL_LOC = 12;
constexpr unsigned RFLAG_NT_LOC = 14;
constexpr unsigned RFLAG_RF_LOC = 16;
constexpr unsigned RFLAG_VM_LOC = 17;
constexpr unsigned RFLAG_AC_LOC = 18;
constexpr unsigned RFLAG_VIF_LOC = 19;
constexpr unsigned RFLAG_VIP_LOC = 20;
constexpr unsigned RFLAG_ID_LOC = 21;
}

View File

@ -0,0 +1,35 @@
#pragma once
#include <FEXCore/Core/CoreState.h>
#include <FEXCore/Memory/MemMapper.h>
#include <FEXCore/Debug/InternalThreadState.h>
#include <stdint.h>
#include <vector>
namespace FEXCore::Core {
struct RuntimeStats;
}
namespace FEXCore::Context {
struct Context;
namespace Debug {
void CompileRIP(FEXCore::Context::Context *CTX, uint64_t RIP);
uint64_t GetThreadCount(FEXCore::Context::Context *CTX);
FEXCore::Core::RuntimeStats *GetRuntimeStatsForThread(FEXCore::Context::Context *CTX, uint64_t Thread);
FEXCore::Core::CPUState GetCPUState(FEXCore::Context::Context *CTX);
void GetMemoryRegions(FEXCore::Context::Context *CTX, std::vector<FEXCore::Memory::MemRegion> *Regions);
bool GetDebugDataForRIP(FEXCore::Context::Context *CTX, uint64_t RIP, FEXCore::Core::DebugData *Data);
bool FindHostCodeForRIP(FEXCore::Context::Context *CTX, uint64_t RIP, uint8_t **Code);
// XXX:
// bool FindIRForRIP(FEXCore::Context::Context *CTX, uint64_t RIP, FEXCore::IR::IntrusiveIRList **ir);
// void SetIRForRIP(FEXCore::Context::Context *CTX, uint64_t RIP, FEXCore::IR::IntrusiveIRList *const ir);
FEXCore::Core::ThreadState *GetThreadState(FEXCore::Context::Context *CTX);
}
}

View File

@ -0,0 +1,65 @@
#pragma once
#include "Event.h"
#include <FEXCore/Core/Context.h>
#include <FEXCore/Core/CoreState.h>
#include <FEXCore/Core/CPUBackend.h>
#include <FEXCore/IR/IntrusiveIRList.h>
#include <map>
#include <thread>
namespace FEXCore {
class BlockCache;
}
namespace FEXCore::Context {
struct Context;
}
namespace FEXCore::IR{
class OpDispatchBuilder;
}
namespace FEXCore::Core {
struct RuntimeStats {
std::atomic_uint64_t InstructionsExecuted;
std::atomic_uint64_t BlocksCompiled;
};
/**
* @brief Contains debug data for a block of code for later debugger analysis
*
* Needs to remain around for as long as the code could be executed at least
*/
struct DebugData {
uint64_t HostCodeSize; ///< The size of the code generated in the host JIT
uint64_t GuestCodeSize; ///< The size of the guest side code
uint64_t GuestInstructionCount; ///< Number of guest instructions
uint64_t TimeSpentInCode; ///< How long this code has spent time running
uint64_t RunCount; ///< Number of times this block of code has been run
};
struct __attribute__((packed)) InternalThreadState {
FEXCore::Core::ThreadState State;
FEXCore::Context::Context *CTX;
std::thread ExecutionThread;
Event StartRunning;
Event ThreadWaiting;
std::unique_ptr<FEXCore::IR::OpDispatchBuilder> OpDispatcher;
std::unique_ptr<FEXCore::CPU::CPUBackend> CPUBackend;
std::unique_ptr<FEXCore::CPU::CPUBackend> FallbackBackend;
std::unique_ptr<FEXCore::BlockCache> BlockCache;
std::map<uint64_t, std::unique_ptr<FEXCore::IR::IRListView<true>>> IRLists;
std::map<uint64_t, FEXCore::Core::DebugData> DebugData;
RuntimeStats Stats{};
FEXCore::Context::ExitReason ExitReason {FEXCore::Context::ExitReason::EXIT_WAITING};
};
}

View File

@ -0,0 +1,399 @@
#pragma once
#include <array>
#include <cstdint>
#include <cstring>
namespace FEXCore::IR {
///< Forward declaration of OpDispatchBuilder
class OpDispatchBuilder;
}
namespace FEXCore::X86Tables {
///< Forward declaration of X86InstInfo
struct X86InstInfo;
namespace DecodeFlags {
constexpr uint32_t FLAG_OPERAND_SIZE = (1 << 0);
constexpr uint32_t FLAG_ADDRESS_SIZE = (1 << 1);
constexpr uint32_t FLAG_LOCK = (1 << 2);
constexpr uint32_t FLAG_LEGACY_PREFIX = (1 << 3);
constexpr uint32_t FLAG_REX_PREFIX = (1 << 4);
constexpr uint32_t FLAG_MODRM_PRESENT = (1 << 5);
constexpr uint32_t FLAG_SIB_PRESENT = (1 << 6);
constexpr uint32_t FLAG_REX_WIDENING = (1 << 7);
constexpr uint32_t FLAG_REX_XGPR_B = (1 << 8);
constexpr uint32_t FLAG_REX_XGPR_X = (1 << 9);
constexpr uint32_t FLAG_REX_XGPR_R = (1 << 10);
constexpr uint32_t FLAG_FS_PREFIX = (1 << 11);
constexpr uint32_t FLAG_GS_PREFIX = (1 << 12);
constexpr uint32_t FLAG_REP_PREFIX = (1 << 13);
constexpr uint32_t FLAG_REPNE_PREFIX = (1 << 14);
// Size flags
constexpr uint32_t FLAG_SIZE_DST_OFF = 15;
constexpr uint32_t FLAG_SIZE_SRC_OFF = FLAG_SIZE_DST_OFF + 3;
constexpr uint32_t SIZE_MASK = 0b111;
constexpr uint32_t SIZE_DEF = 0b000; // This should be invalid past decoding
constexpr uint32_t SIZE_8BIT = 0b001;
constexpr uint32_t SIZE_16BIT = 0b010;
constexpr uint32_t SIZE_32BIT = 0b011;
constexpr uint32_t SIZE_64BIT = 0b100;
constexpr uint32_t SIZE_128BIT = 0b101;
constexpr uint32_t SIZE_256BIT = 0b110;
inline uint32_t GetSizeDstFlags(uint32_t Flags) { return (Flags >> FLAG_SIZE_DST_OFF) & SIZE_MASK; }
inline uint32_t GetSizeSrcFlags(uint32_t Flags) { return (Flags >> FLAG_SIZE_SRC_OFF) & SIZE_MASK; }
inline uint32_t GenSizeDstSize(uint32_t Size) { return Size << FLAG_SIZE_DST_OFF; }
inline uint32_t GenSizeSrcSize(uint32_t Size) { return Size << FLAG_SIZE_SRC_OFF; }
}
union DecodedOperand {
enum {
TYPE_NONE,
TYPE_GPR,
TYPE_GPR_DIRECT,
TYPE_GPR_INDIRECT,
TYPE_RIP_RELATIVE,
TYPE_LITERAL,
TYPE_SIB,
};
struct {
uint8_t Type;
} TypeNone;
struct {
uint8_t Type;
bool HighBits;
uint8_t GPR;
} TypeGPR;
struct {
uint8_t Type;
uint8_t GPR;
int32_t Displacement;
} TypeGPRIndirect;
struct {
uint8_t Type;
int32_t Literal;
} TypeRIPLiteral;
struct {
uint8_t Type;
uint8_t Size;
uint64_t Literal;
} TypeLiteral;
struct {
uint8_t Type;
uint8_t Index; // ~0 invalid
uint8_t Base; // ~0 invalid
uint32_t Scale : 8;
int32_t Offset;
} TypeSIB;
};
struct DecodedInst {
uint64_t PC;
uint16_t OP;
uint32_t Flags;
uint8_t ModRM;
uint8_t SIB;
uint8_t InstSize;
uint8_t LastEscapePrefix;
bool DecodedModRM;
bool DecodedSIB;
DecodedOperand Dest;
DecodedOperand Src1;
DecodedOperand Src2;
// Constains the dispatcher handler pointer
X86InstInfo const* TableInfo;
};
union ModRMDecoded {
uint8_t Hex{};
struct {
uint8_t rm : 3;
uint8_t reg : 3;
uint8_t mod : 2;
};
};
union SIBDecoded {
uint8_t Hex{};
struct {
uint8_t base : 3;
uint8_t index : 3;
uint8_t scale : 2;
};
};
enum InstType {
TYPE_UNKNOWN,
TYPE_LEGACY_PREFIX,
TYPE_PREFIX,
TYPE_REX_PREFIX,
TYPE_SECONDARY_TABLE_PREFIX,
TYPE_X87_TABLE_PREFIX,
TYPE_MODRM_TABLE_PREFIX,
TYPE_VEX_TABLE_PREFIX,
TYPE_XOP_TABLE_PREFIX,
TYPE_INST,
TYPE_INVALID,
TYPE_COPY_OTHER,
// Must be in order
// Groups 1, 1a, 2, 3, 4, 5, 11 are for the primary op table
// Groups 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, p are for the secondary op table
TYPE_GROUP_1,
TYPE_GROUP_1A,
TYPE_GROUP_2,
TYPE_GROUP_3,
TYPE_GROUP_4,
TYPE_GROUP_5,
TYPE_GROUP_11,
// Must be in order
// Groups 6-p Are for the secondary op table
TYPE_GROUP_6,
TYPE_GROUP_7,
TYPE_GROUP_8,
TYPE_GROUP_9,
TYPE_GROUP_10,
TYPE_GROUP_12,
TYPE_GROUP_13,
TYPE_GROUP_14,
TYPE_GROUP_15,
TYPE_GROUP_16,
TYPE_GROUP_17,
TYPE_GROUP_P,
// The secondary op extension table allows further extensions
// Group 7 allows additional extensions to this table
TYPE_SECOND_GROUP_MODRM,
// Just to make grepping easier
TYPE_3DNOW_TABLE = TYPE_INVALID,
TYPE_3DNOW_INST = TYPE_INVALID,
// Exists in the table but isn't decoded correctly
TYPE_UNDEC = TYPE_INVALID,
TYPE_MMX = TYPE_INVALID,
TYPE_X87 = TYPE_INVALID,
TYPE_PRIV = TYPE_INVALID,
TYPE_0F38_TABLE = TYPE_INVALID,
TYPE_0F3A_TABLE = TYPE_INVALID,
};
namespace InstFlags {
constexpr uint32_t FLAGS_NONE = 0;
constexpr uint32_t FLAGS_DEBUG = (1 << 1);
constexpr uint32_t FLAGS_DEBUG_MEM_ACCESS = (1 << 2);
constexpr uint32_t FLAGS_SUPPORTS_REP = (1 << 3);
constexpr uint32_t FLAGS_BLOCK_END = (1 << 4);
constexpr uint32_t FLAGS_SETS_RIP = (1 << 5);
constexpr uint32_t FLAGS_DISPLACE_SIZE_MUL_2 = (1 << 6);
constexpr uint32_t FLAGS_DISPLACE_SIZE_DIV_2 = (1 << 7);
constexpr uint32_t FLAGS_SRC_SEXT = (1 << 8);
constexpr uint32_t FLAGS_MEM_OFFSET = (1 << 9);
// Enables XMM based subflags
// Current reserved range for this SF is [10, 15]
constexpr uint32_t FLAGS_XMM_FLAGS = (1 << 10);
// Non-XMM subflags
constexpr uint32_t FLAGS_SF_DST_RAX = (1 << 11);
constexpr uint32_t FLAGS_SF_DST_RDX = (1 << 12);
constexpr uint32_t FLAGS_SF_SRC_RAX = (1 << 13);
constexpr uint32_t FLAGS_SF_SRC_RCX = (1 << 14);
constexpr uint32_t FLAGS_SF_REX_IN_BYTE = (1 << 15);
// XMM subflags
constexpr uint32_t FLAGS_SF_HIGH_XMM_REG = (1 << 11);
constexpr uint32_t FLAGS_SF_DST_GPR = (1 << 12);
constexpr uint32_t FLAGS_SF_SRC_GPR = (1 << 13);
// Enables MODRM specific subflags
// Current reserved range for this SF is [14, 17]
constexpr uint32_t FLAGS_MODRM = (1 << 16);
// With ModRM SF flag enabled
// Direction of ModRM. Dst ^ Src
// Set means destination is rm bits
// Unset means src is rm bits
constexpr uint32_t FLAGS_SF_MOD_DST = (1 << 17);
// If the instruction is restricted to mem or reg only
// 0b00 = Regular ModRM support
// 0b01 = Memory accesses only
// 0b10 = Register accesses only
// 0b11 = <Reserved>
constexpr uint32_t FLAGS_SF_MOD_MEM_ONLY = (1 << 18);
constexpr uint32_t FLAGS_SF_MOD_REG_ONLY = (1 << 19);
constexpr uint32_t FLAGS_SIZE_DST_OFF = 20;
constexpr uint32_t FLAGS_SIZE_SRC_OFF = FLAGS_SIZE_DST_OFF + 3;
constexpr uint32_t SIZE_MASK = 0b111;
constexpr uint32_t SIZE_DEF = 0b000;
constexpr uint32_t SIZE_8BIT = 0b001;
constexpr uint32_t SIZE_16BIT = 0b010;
constexpr uint32_t SIZE_32BIT = 0b011;
constexpr uint32_t SIZE_64BIT = 0b100;
constexpr uint32_t SIZE_128BIT = 0b101;
constexpr uint32_t SIZE_256BIT = 0b110;
inline uint32_t GetSizeDstFlags(uint32_t Flags) { return (Flags >> FLAGS_SIZE_DST_OFF) & SIZE_MASK; }
inline uint32_t GetSizeSrcFlags(uint32_t Flags) { return (Flags >> FLAGS_SIZE_SRC_OFF) & SIZE_MASK; }
inline uint32_t GenFlagsDstSize(uint32_t Size) { return Size << FLAGS_SIZE_DST_OFF; }
inline uint32_t GenFlagsSrcSize(uint32_t Size) { return Size << FLAGS_SIZE_SRC_OFF; }
inline uint32_t GenFlagsSameSize(uint32_t Size) {return (Size << FLAGS_SIZE_DST_OFF) | (Size << FLAGS_SIZE_SRC_OFF); }
inline uint32_t GenFlagsSizes(uint32_t Dest, uint32_t Src) {return (Dest << FLAGS_SIZE_DST_OFF) | (Src << FLAGS_SIZE_SRC_OFF); }
// If it has an xmm subflag
#define HAS_XMM_SUBFLAG(x, flag) (((x) & (FEXCore::X86Tables::InstFlags::FLAGS_XMM_FLAGS | (flag))) == (FEXCore::X86Tables::InstFlags::FLAGS_XMM_FLAGS | (flag)))
// If it has non-xmm subflag
#define HAS_NON_XMM_SUBFLAG(x, flag) (((x) & (FEXCore::X86Tables::InstFlags::FLAGS_XMM_FLAGS | (flag))) == (flag))
}
auto OpToIndex = [](uint8_t Op) constexpr -> uint8_t {
switch (Op) {
// Group 1
case 0x80: return 0;
case 0x81: return 1;
case 0x82: return 2;
case 0x83: return 3;
// Group 2
case 0xC0: return 0;
case 0xC1: return 1;
case 0xD0: return 2;
case 0xD1: return 3;
case 0xD2: return 4;
case 0xD3: return 5;
// Group 3
case 0xF6: return 0;
case 0xF7: return 1;
// Group 4
case 0xFE: return 0;
// Group 5
case 0xFF: return 0;
// Group 11
case 0xC6: return 0;
case 0xC7: return 1;
}
return 0;
};
using DecodedOp = DecodedInst const*;
using OpDispatchPtr = void (IR::OpDispatchBuilder::*)(DecodedOp);
#ifndef NDEBUG
namespace X86InstDebugInfo {
constexpr uint64_t FLAGS_MEM_ALIGN_4 = (1 << 0);
constexpr uint64_t FLAGS_MEM_ALIGN_8 = (1 << 1);
constexpr uint64_t FLAGS_MEM_ALIGN_16 = (1 << 2);
constexpr uint64_t FLAGS_MEM_ALIGN_SIZE = (1 << 3); // If instruction size changes depending on prefixes
constexpr uint64_t FLAGS_MEM_ACCESS = (1 << 4);
constexpr uint64_t FLAGS_DEBUG = (1 << 5);
constexpr uint64_t FLAGS_DIVIDE = (1 << 6);
struct Flags {
uint64_t DebugFlags;
};
void InstallDebugInfo();
}
#endif
struct X86InstInfo {
char const *Name;
InstType Type;
uint32_t Flags; ///< Must be larger than InstFlags enum
uint8_t MoreBytes;
OpDispatchPtr OpcodeDispatcher;
#ifndef NDEBUG
X86InstDebugInfo::Flags DebugInfo;
uint32_t NumUnitTestsGenerated;
#endif
bool operator==(const X86InstInfo &b) const {
if (strcmp(Name, b.Name) != 0 ||
Type != b.Type ||
Flags != b.Flags ||
MoreBytes != b.MoreBytes)
return false;
// We don't care if the opcode dispatcher differs
return true;
}
};
static_assert(std::is_pod<X86InstInfo>::value, "Pod?");
constexpr size_t MAX_PRIMARY_TABLE_SIZE = 256;
constexpr size_t MAX_SECOND_TABLE_SIZE = 256;
constexpr size_t MAX_REP_MOD_TABLE_SIZE = 256;
constexpr size_t MAX_REPNE_MOD_TABLE_SIZE = 256;
constexpr size_t MAX_OPSIZE_MOD_TABLE_SIZE = 256;
// 6 (groups) | 6 (max indexes) | 8 ops = 0b111'111'111 = 9 bits
constexpr size_t MAX_INST_GROUP_TABLE_SIZE = 512;
// 12 (groups) | 3(max indexes) | 8 ops = 0b1111'11'111 = 9 bits
constexpr size_t MAX_INST_SECOND_GROUP_TABLE_SIZE = 512;
constexpr size_t MAX_X87_TABLE_SIZE = 1 << 11;
constexpr size_t MAX_SECOND_MODRM_TABLE_SIZE = 32;
// 3 prefixes | 8 bit opcode
constexpr size_t MAX_0F_38_TABLE_SIZE = (1 << 11);
// 1 REX | 1 prefixes | 8 bit opcode
constexpr size_t MAX_0F_3A_TABLE_SIZE = (1 << 11);
constexpr size_t MAX_3DNOW_TABLE_SIZE = 256;
// VEX
// map_select(2 bits for now) | vex.pp (2 bits) | opcode (8bit)
constexpr size_t MAX_VEX_TABLE_SIZE = (1 << 13);
// VEX group ops
// group select (3 bits for now) | ModRM opcode (3 bits)
constexpr size_t MAX_VEX_GROUP_TABLE_SIZE = (1 << 7);
// XOP
// group (2 bits for now) | vex.pp (2 bits) | opcode (8bit)
constexpr size_t MAX_XOP_TABLE_SIZE = (1 << 13);
// XOP group ops
// group select (2 bits for now) | modrm opcode (3 bits)
constexpr size_t MAX_XOP_GROUP_TABLE_SIZE = (1 << 6);
extern std::array<X86InstInfo, MAX_PRIMARY_TABLE_SIZE> BaseOps;
extern std::array<X86InstInfo, MAX_SECOND_TABLE_SIZE> SecondBaseOps;
extern std::array<X86InstInfo, MAX_REP_MOD_TABLE_SIZE> RepModOps;
extern std::array<X86InstInfo, MAX_REPNE_MOD_TABLE_SIZE> RepNEModOps;
extern std::array<X86InstInfo, MAX_OPSIZE_MOD_TABLE_SIZE> OpSizeModOps;
extern std::array<X86InstInfo, MAX_INST_GROUP_TABLE_SIZE> PrimaryInstGroupOps;
extern std::array<X86InstInfo, MAX_INST_SECOND_GROUP_TABLE_SIZE> SecondInstGroupOps;
extern std::array<X86InstInfo, MAX_SECOND_MODRM_TABLE_SIZE> SecondModRMTableOps;
extern std::array<X86InstInfo, MAX_X87_TABLE_SIZE> X87Ops;
extern std::array<X86InstInfo, MAX_3DNOW_TABLE_SIZE> DDDNowOps;
extern std::array<X86InstInfo, MAX_0F_38_TABLE_SIZE> H0F38TableOps;
extern std::array<X86InstInfo, MAX_0F_3A_TABLE_SIZE> H0F3ATableOps;
// VEX
extern std::array<X86InstInfo, MAX_VEX_TABLE_SIZE> VEXTableOps;
extern std::array<X86InstInfo, MAX_VEX_GROUP_TABLE_SIZE> VEXTableGroupOps;
// XOP
extern std::array<X86InstInfo, MAX_XOP_TABLE_SIZE> XOPTableOps;
extern std::array<X86InstInfo, MAX_XOP_GROUP_TABLE_SIZE> XOPTableGroupOps;
void InitializeInfoTables();
}

View File

@ -0,0 +1,26 @@
#pragma once
#include <cstdint>
namespace FEXCore::HLE {
// XXX: This should map multiple IDs correctly
// Tracking relationships between thread IDs and such
class ThreadManagement {
public:
uint64_t GetUID() { return UID; }
uint64_t GetGID() { return GID; }
uint64_t GetEUID() { return EUID; }
uint64_t GetEGID() { return EGID; }
uint64_t GetTID() { return TID; }
uint64_t GetPID() { return PID; }
uint64_t UID{1000};
uint64_t GID{1000};
uint64_t EUID{1000};
uint64_t EGID{1000};
uint64_t TID{1};
uint64_t PID{1};
uint64_t child_tid{0};
uint64_t parent_tid{0};
uint64_t robust_list_head{0};
};
}

View File

@ -0,0 +1,9 @@
#pragma once
#include <cstdint>
namespace FEXCore::HLE {
struct SyscallArguments {
static constexpr std::size_t MAX_ARGS = 7;
uint64_t Argument[MAX_ARGS];
};
}

View File

@ -0,0 +1,49 @@
#pragma once
#include "LogManager.h"
#include <stdint.h>
namespace FEXCore::HLE {
#define INVALID_OP { LogMan::Msg::A("Tried to syscall with unknown number of registers"); return 0; }
class SyscallVisitor {
public:
SyscallVisitor(uint32_t Mask) : SyscallVisitor(Mask, false) {}
SyscallVisitor(uint32_t Mask, bool Constant) : ArgsMask { Mask }, ConstantVal { Constant } {}
/**
* @brief If this syscall returns a constant value regardless of state then we can just read the value at compile time
* Won't happen often
*
* @return true if it is constant value
*/
bool IsConstant() { return ConstantVal; }
virtual uint64_t VisitSyscall0() INVALID_OP
virtual uint64_t VisitSyscall1(uint64_t RDI) INVALID_OP
virtual uint64_t VisitSyscall2(uint64_t RDI,
uint64_t RSI) INVALID_OP
virtual uint64_t VisitSyscall3(uint64_t RDI,
uint64_t RSI,
uint64_t RDX) INVALID_OP
virtual uint64_t VisitSyscall4(uint64_t RDI,
uint64_t RSI,
uint64_t RDX,
uint64_t R10) INVALID_OP
virtual uint64_t VisitSyscall5(uint64_t RDI,
uint64_t RSI,
uint64_t RDX,
uint64_t R10,
uint64_t R8) INVALID_OP
// This one MUST be valid
// Hard fallback if we couldn't look it up
virtual uint64_t VisitSyscall6(uint64_t RDI,
uint64_t RSI,
uint64_t RDX,
uint64_t R10,
uint64_t R8,
uint64_t R9) = 0;
private:
uint32_t ArgsMask{};
bool ConstantVal{};
};
#undef INVALID_OP
}

329
include/FEXCore/IR/IR.h Normal file
View File

@ -0,0 +1,329 @@
#pragma once
#include <array>
#include <cassert>
#include <cstdint>
#include <sstream>
namespace FEXCore::IR {
/**
* @brief This is a very simple wrapper for our node pointers
*
* This is necessary to allow two things
* - Reduce memory usage by having the pointer be an 32bit offset rather than the whole 64bit pointer
* - Actually use an offset from a base so we aren't storing pointers for everything
* - Makes IR list copying be as cheap as a memcpy
* Downsides
* - The IR nodes have to be allocated out of a linear array of memory
* - We currently only allow a 32bit offset, so *only* 4 million nodes per list
* - We have to have the base offset live somewhere else
* - Has to be POD and trivially copyable
* - Makes every real node access turn in to a [Base + Offset] access
*/
struct NodeWrapper final {
// On x86-64 using a uint64_t type is more efficient since RIP addressing gives you [<Base> + <Index> + <imm offset>]
// On AArch64 using uint32_t is just more memory efficient. 32bit or 64bit offset doesn't matter
// We use uint32_t to be more memory efficient (Cuts our node list size in half)
using NodeOffsetType = uint32_t;
NodeOffsetType NodeOffset;
static NodeWrapper WrapOffset(NodeOffsetType Offset) {
NodeWrapper Wrapped;
Wrapped.NodeOffset = Offset;
return Wrapped;
}
static NodeWrapper WrapPtr(uintptr_t Base, uintptr_t Value) {
NodeWrapper Wrapped;
Wrapped.SetOffset(Base, Value);
return Wrapped;
}
static void *UnwrapNode(uintptr_t Base, NodeWrapper Node) {
return Node.GetPtr(Base);
}
uint32_t ID() const;
explicit NodeWrapper() = default;
void *GetPtr(uintptr_t Base) { return reinterpret_cast<void*>(Base + NodeOffset); }
void const *GetPtr(uintptr_t Base) const { return reinterpret_cast<void*>(Base + NodeOffset); }
void SetOffset(uintptr_t Base, uintptr_t Value) { NodeOffset = Value - Base; }
bool operator==(NodeWrapper const &rhs) { return NodeOffset == rhs.NodeOffset; }
};
static_assert(std::is_pod<NodeWrapper>::value);
static_assert(sizeof(NodeWrapper) == sizeof(uint32_t));
struct OrderedNodeHeader {
NodeWrapper Value;
NodeWrapper Next;
NodeWrapper Previous;
};
static_assert(sizeof(OrderedNodeHeader) == sizeof(uint32_t) * 3);
/**
* @brief This is our NodeWrapperIterator
* This stores both the memory base and the provided NodeWrapper to be able to walk the list of nodes directly
* Only the increment and decrement implementations of this class require understanding the implementation details of OrderedNode
*/
class NodeWrapperIterator final {
public:
using value_type = NodeWrapper;
using size_type = std::size_t;
using difference_type = std::ptrdiff_t;
using reference = value_type&;
using const_reference = const value_type&;
using pointer = value_type*;
using const_pointer = const value_type*;
using iterator = NodeWrapperIterator;
using const_iterator = const NodeWrapperIterator;
using reverse_iterator = iterator;
using const_reverse_iterator = const_iterator;
using iterator_category = std::bidirectional_iterator_tag;
using NodeType = NodeWrapper;
using NodePtr = NodeWrapper*;
using NodeRef = NodeWrapper&;
NodeWrapperIterator(uintptr_t Base) : BaseList {Base} {}
explicit NodeWrapperIterator(uintptr_t Base, NodeType Ptr) : BaseList {Base}, Node {Ptr} {}
bool operator==(const NodeWrapperIterator &rhs) const {
return Node.NodeOffset == rhs.Node.NodeOffset;
}
bool operator!=(const NodeWrapperIterator &rhs) const {
return !operator==(rhs);
}
NodeWrapperIterator operator++() {
OrderedNodeHeader *RealNode = reinterpret_cast<OrderedNodeHeader*>(Node.GetPtr(BaseList));
Node = RealNode->Next;
return *this;
}
NodeWrapperIterator operator--() {
OrderedNodeHeader *RealNode = reinterpret_cast<OrderedNodeHeader*>(Node.GetPtr(BaseList));
Node = RealNode->Previous;
return *this;
}
NodeRef operator*() {
return Node;
}
NodePtr operator()() {
return &Node;
}
private:
uintptr_t BaseList{};
NodeType Node{};
};
/**
* @brief The IROp_Header is an dynamically sized array
* At the end it contains a uint8_t for the number of arguments that Op has
* Then there is an unsized array of NodeWrapper arguments for the number of arguments this op has
* The op structures that are including the header must ensure that they pad themselves correctly to the number of arguments used
*/
struct IROp_Header;
/**
* @brief This is a node in our IR representation
* Is a doubly linked list node that lives in a representation of a linearly allocated node list
* The links in the nodes can live in a list independent of the data IR data
*
* ex.
* Region1 : ... <-> <OrderedNode> <-> <OrderedNode> <-> ...
* | *<Value> |
* v v
* Region2 : <IROp>..<IROp>..<IROp>..<IROp>
*
* In this example the OrderedNodes are allocated in one linear memory region (Not necessarily contiguous with one another linking)
* The second region is contiguous but they don't have any relationship with one another directly
*/
class OrderedNode final {
friend class NodeWrapperIterator;
friend class OrderedList;
public:
// These three values are laid out very specifically to make it fast to access the NodeWrappers specifically
OrderedNodeHeader Header;
uint32_t NumUses;
OrderedNode() = default;
/**
* @brief Appends a node to this current node
*
* Before. <Prev> <-> <Current> <-> <Next>
* After. <Prev> <-> <Current> <-> <Node> <-> Next
*
* @return Pointer to the node being added
*/
NodeWrapper append(uintptr_t Base, NodeWrapper Node) {
// Set Next Node's Previous to incoming node
SetPrevious(Base, Header.Next, Node);
// Set Incoming node's links to this node's links
SetPrevious(Base, Node, Wrapped(Base));
SetNext(Base, Node, Header.Next);
// Set this node's next to the incoming node
SetNext(Base, Wrapped(Base), Node);
// Return the node we are appending
return Node;
}
OrderedNode *append(uintptr_t Base, OrderedNode *Node) {
NodeWrapper WNode = Node->Wrapped(Base);
// Set Next Node's Previous to incoming node
SetPrevious(Base, Header.Next, WNode);
// Set Incoming node's links to this node's links
SetPrevious(Base, WNode, Wrapped(Base));
SetNext(Base, WNode, Header.Next);
// Set this node's next to the incoming node
SetNext(Base, Wrapped(Base), WNode);
// Return the node we are appending
return Node;
}
/**
* @brief Prepends a node to the current node
* Before. <Prev> <-> <Current> <-> <Next>
* After. <Prev> <-> <Node> <-> <Current> <-> Next
*
* @return Pointer to the node being added
*/
NodeWrapper prepend(uintptr_t Base, NodeWrapper Node) {
// Set the previous node's next to the incoming node
SetNext(Base, Header.Previous, Node);
// Set the incoming node's links
SetPrevious(Base, Node, Header.Previous);
SetNext(Base, Node, Wrapped(Base));
// Set the current node's link
SetPrevious(Base, Wrapped(Base), Node);
// Return the node we are prepending
return Node;
}
OrderedNode *prepend(uintptr_t Base, OrderedNode *Node) {
NodeWrapper WNode = Node->Wrapped(Base);
// Set the previous node's next to the incoming node
SetNext(Base, Header.Previous, WNode);
// Set the incoming node's links
SetPrevious(Base, WNode, Header.Previous);
SetNext(Base, WNode, Wrapped(Base));
// Set the current node's link
SetPrevious(Base, Wrapped(Base), WNode);
// Return the node we are prepending
return Node;
}
/**
* @brief Gets the remaining size of the blocks from this point onward
*
* Doesn't find the head of the list
*
*/
size_t size(uintptr_t Base) const {
size_t Size = 1;
// Walk the list forward until we hit a sentinal
NodeWrapper Current = Header.Next;
while (Current.NodeOffset != 0) {
++Size;
OrderedNode *RealNode = reinterpret_cast<OrderedNode*>(Current.GetPtr(Base));
Current = RealNode->Header.Next;
}
return Size;
}
void Unlink(uintptr_t Base) {
// This removes the node from the list. Orphaning it
// Before: <Previous> <-> <Current> <-> <Next>
// After: <Previous <-> <Next>
SetNext(Base, Header.Previous, Header.Next);
SetPrevious(Base, Header.Next, Header.Previous);
}
IROp_Header const* Op(uintptr_t Base) const { return reinterpret_cast<IROp_Header const*>(Header.Value.GetPtr(Base)); }
IROp_Header *Op(uintptr_t Base) { return reinterpret_cast<IROp_Header*>(Header.Value.GetPtr(Base)); }
uint32_t GetUses() const { return NumUses; }
void AddUse() { ++NumUses; }
void RemoveUse() { --NumUses; }
using iterator = NodeWrapperIterator;
iterator begin(uint64_t Base) noexcept { return iterator(Base, Wrapped(Base)); }
iterator end(uint64_t Base, uint64_t End) noexcept { return iterator(Base, WrappedOffset(End)); }
NodeWrapper Wrapped(uintptr_t Base) {
NodeWrapper Tmp;
Tmp.SetOffset(Base, reinterpret_cast<uintptr_t>(this));
return Tmp;
}
private:
NodeWrapper WrappedOffset(uint32_t Offset) {
NodeWrapper Tmp;
Tmp.NodeOffset = Offset;
return Tmp;
}
static void SetPrevious(uintptr_t Base, NodeWrapper Node, NodeWrapper New) {
if (Node.NodeOffset == 0) return;
OrderedNode *RealNode = reinterpret_cast<OrderedNode*>(Node.GetPtr(Base));
RealNode->Header.Previous = New;
}
static void SetNext(uintptr_t Base, NodeWrapper Node, NodeWrapper New) {
if (Node.NodeOffset == 0) return;
OrderedNode *RealNode = reinterpret_cast<OrderedNode*>(Node.GetPtr(Base));
RealNode->Header.Next = New;
}
void SetUses(uint32_t Uses) { NumUses = Uses; }
};
static_assert(std::is_pod<OrderedNode>::value);
static_assert(std::is_trivially_copyable<OrderedNode>::value);
static_assert(offsetof(OrderedNode, Header) == 0);
static_assert(sizeof(OrderedNode) == (sizeof(OrderedNodeHeader) + sizeof(uint32_t)));
#define IROP_ENUM
#define IROP_STRUCTS
#define IROP_SIZES
#include "IRDefines.inc"
template <class T>
struct Wrapper final {
T *first;
OrderedNode *Node; ///< Actual offset of this IR in ths list
operator Wrapper<IROp_Header>() const { return Wrapper<IROp_Header> {reinterpret_cast<IROp_Header*>(first), Node}; }
operator OrderedNode *() { return Node; }
operator NodeWrapper () { return Node->Header.Value; }
};
template<bool>
class IRListView;
void Dump(std::stringstream *out, IRListView<false> const* IR);
inline uint32_t NodeWrapper::ID() const { return NodeOffset / sizeof(IR::OrderedNode); }
};

View File

@ -0,0 +1,127 @@
#pragma once
#include "FEXCore/IR/IR.h"
#include "LogManager.h"
#include <cstddef>
#include <cstring>
#include <tuple>
#include <vector>
namespace FEXCore::IR {
/**
* @brief This is purely an intrusive allocator
* This doesn't support any form of ordering at all
* Just provides a chunk of memory for allocating IR nodes from
*
* Can potentially support reallocation if we are smart and make sure to invalidate anything holding a true pointer
*/
class IntrusiveAllocator final {
public:
IntrusiveAllocator() = delete;
IntrusiveAllocator(IntrusiveAllocator &&) = delete;
IntrusiveAllocator(size_t Size)
: MemorySize {Size} {
Data = reinterpret_cast<uintptr_t>(calloc(Size, 1));
}
~IntrusiveAllocator() {
free(reinterpret_cast<void*>(Data));
}
bool CheckSize(size_t Size) {
size_t NewOffset = CurrentOffset + Size;
return NewOffset <= MemorySize;
}
void *Allocate(size_t Size) {
assert(CheckSize(Size) && "Failure");
size_t NewOffset = CurrentOffset + Size;
uintptr_t NewPointer = Data + CurrentOffset;
CurrentOffset = NewOffset;
return reinterpret_cast<void*>(NewPointer);
}
size_t Size() const { return CurrentOffset; }
size_t BackingSize() const { return MemorySize; }
uintptr_t const Begin() const { return Data; }
void Reset() { CurrentOffset = 0; }
void CopyData(IntrusiveAllocator const &rhs) {
CurrentOffset = rhs.CurrentOffset;
memcpy(reinterpret_cast<void*>(Data), reinterpret_cast<void*>(rhs.Data), CurrentOffset);
}
private:
size_t CurrentOffset {0};
size_t MemorySize;
uintptr_t Data;
};
template<bool Copy>
class IRListView final {
public:
IRListView() = delete;
IRListView(IRListView<Copy> &&) = delete;
IRListView(IntrusiveAllocator *Data, IntrusiveAllocator *List) {
DataSize = Data->Size();
ListSize = List->Size();
if (Copy) {
IRData = malloc(DataSize + ListSize);
ListData = reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(IRData) + DataSize);
memcpy(IRData, reinterpret_cast<void*>(Data->Begin()), DataSize);
memcpy(ListData, reinterpret_cast<void*>(List->Begin()), ListSize);
}
else {
// We are just pointing to the data
IRData = reinterpret_cast<void*>(Data->Begin());
ListData = reinterpret_cast<void*>(List->Begin());
}
}
~IRListView() {
if (Copy) {
free (IRData);
// ListData is just offset from IRData
}
}
uintptr_t const GetData() const { return reinterpret_cast<uintptr_t>(IRData); }
uintptr_t const GetListData() const { return reinterpret_cast<uintptr_t>(ListData); }
size_t GetDataSize() const { return DataSize; }
size_t GetListSize() const { return ListSize; }
using iterator = NodeWrapperIterator;
iterator begin() const noexcept
{
NodeWrapper Wrapped;
Wrapped.NodeOffset = sizeof(OrderedNode);
return iterator(reinterpret_cast<uintptr_t>(ListData), Wrapped);
}
/**
* @brief This is not an iterator that you can reverse iterator through!
*
* @return Our iterator sentinal to ensure ending correctly
*/
iterator end() const noexcept
{
NodeWrapper Wrapped;
Wrapped.NodeOffset = 0;
return iterator(reinterpret_cast<uintptr_t>(ListData), Wrapped);
}
private:
void *IRData;
void *ListData;
size_t DataSize;
size_t ListSize;
};
}

View File

@ -0,0 +1,15 @@
#pragma once
#include <cstddef>
#include <stdint.h>
namespace FEXCore::Memory {
struct MemRegion {
void *Ptr;
size_t Offset;
size_t Size;
bool operator==(void *rhs) const { return Ptr == rhs; }
bool contains(uint64_t Addr) const { return Addr >= Offset && Addr < (Offset + Size); }
};
}

View File

@ -0,0 +1,27 @@
#pragma once
#include <stddef.h>
#include <stdint.h>
namespace FEXCore::SHM {
struct SHMObject {
void *Ptr;
uint8_t InternalState[0];
};
/**
* @brief Allocate a shared memory region that will be the base of our VM's memory
*
* @param Size The size of the SHM region
*
* @return An object representing our internal SHM state
*/
SHMObject *AllocateSHMRegion(size_t Size);
/**
* @brief Destroy the SHM region
*
* @param SHM The region previously created with AllocateSHMRegion
*/
void DestroyRegion(SHMObject *SHM);
}