Implement W^X-compatible path. Works on Windows and Android, hopefully iOS (untested).

Disabled block linking in this mode, can re-enable with some more work later.

To enable W^X on other platforms than iOS, simply change PlatformIsWXExclusive.
This commit is contained in:
Henrik Rydgard 2016-08-28 14:52:08 +02:00
parent daf10ec882
commit b264657d56
15 changed files with 126 additions and 32 deletions

View File

@ -45,7 +45,7 @@ private:
virtual void PoisonMemory() = 0;
public:
CodeBlock() {}
CodeBlock() : writeStart_(nullptr) {}
virtual ~CodeBlock() { if (region) FreeCodeSpace(); }
// Call this before you generate any code.
@ -53,14 +53,46 @@ public:
region_size = size;
region = (u8*)AllocateExecutableMemory(region_size);
T::SetCodePointer(region);
// On W^X platforms, we start with writable but not executable pages.
if (PlatformIsWXExclusive()) {
ProtectMemoryPages(region, region_size, MEM_PROT_READ | MEM_PROT_WRITE);
}
}
// Always clear code space with breakpoints, so that if someone accidentally executes
// uninitialized, it just breaks into the debugger.
void ClearCodeSpace() {
if (PlatformIsWXExclusive()) {
ProtectMemoryPages(region, region_size, MEM_PROT_READ | MEM_PROT_WRITE);
} else {
ProtectMemoryPages(region, region_size, MEM_PROT_READ | MEM_PROT_WRITE | MEM_PROT_EXEC);
}
PoisonMemory();
ResetCodePtr();
ProtectMemoryPages(region, region_size, MEM_PROT_READ | MEM_PROT_WRITE | MEM_PROT_EXEC);
}
// BeginWrite/EndWrite assumes that we keep appending. If you don't specify a size and we encounter later executable block, we're screwed.
// These CANNOT be nested.
void BeginWrite(size_t sizeEstimate = 1) {
#ifdef _DEBUG
if (writeStart_) {
PanicAlert("Can't nest BeginWrite calls");
}
#endif
// In case the last block made the current page exec/no-write, let's fix that.
if (PlatformIsWXExclusive()) {
writeStart_ = GetCodePtr();
ProtectMemoryPages(writeStart_, sizeEstimate, MEM_PROT_READ | MEM_PROT_WRITE);
}
}
void EndWrite() {
// OK, we're done. Re-protect the memory we touched.
if (PlatformIsWXExclusive()) {
const uint8_t *end = GetCodePtr();
ProtectMemoryPages(writeStart_, end, MEM_PROT_READ | MEM_PROT_EXEC);
writeStart_ = nullptr;
}
}
// Call this when shutting down. Don't rely on the destructor, even though it'll do the job.
@ -82,13 +114,15 @@ public:
return T::GetCodePointer();
}
void ResetCodePtr()
{
void ResetCodePtr() {
T::SetCodePointer(region);
}
size_t GetSpaceLeft() const {
return region_size - (T::GetCodePointer() - region);
}
private:
const uint8_t *writeStart_;
};

View File

@ -170,14 +170,13 @@ void *AllocateExecutableMemory(size_t size) {
#if defined(_M_X64)
// Try to request one that is close to our memory location if we're in high memory.
// We use a dummy global variable to give us a good location to start from.
if (exec && (!map_hint))
{
if (!map_hint) {
if ((uintptr_t) &hint_location > 0xFFFFFFFFULL)
map_hint = (char*)round_page(&hint_location) - 0x20000000; // 0.5gb lower than our approximate location
else
map_hint = (char*)0x20000000; // 0.5GB mark in memory
}
else if (exec && (uintptr_t) map_hint > 0xFFFFFFFFULL)
else if ((uintptr_t) map_hint > 0xFFFFFFFFULL)
{
map_hint -= round_page(size); /* round down to the next page if we're in high memory */
}
@ -185,7 +184,7 @@ void *AllocateExecutableMemory(size_t size) {
void* ptr = mmap(map_hint, size, PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_ANON | MAP_PRIVATE
#if defined(_M_X64) && defined(MAP_32BIT)
| (exec && (uintptr_t) map_hint == 0 ? MAP_32BIT : 0)
| ((uintptr_t) map_hint == 0 ? MAP_32BIT : 0)
#endif
, -1, 0);
@ -202,7 +201,7 @@ void *AllocateExecutableMemory(size_t size) {
PanicAlert("Failed to allocate executable memory\n%s", GetLastErrorMsg());
}
#if defined(_M_X64) && !defined(_WIN32)
else if (exec && (uintptr_t)map_hint <= 0xFFFFFFFF) {
else if ((uintptr_t)map_hint <= 0xFFFFFFFF) {
// Round up if we're below 32-bit mark, probably allocating sequentially.
map_hint += round_page(size);
@ -286,25 +285,43 @@ void FreeAlignedMemory(void* ptr) {
}
bool PlatformIsWXExclusive() {
// TODO: Turn on on 64-bit iOS9, respect everywhere.
// Only 64-bit iOS9 really needs this mode, but that's most iOS devices and all future ones,
// so let's keep things the same for all of them. Even without block linking, still should be much
// faster than IR JIT.
#ifdef IOS
return true;
#else
// Returning true here lets you test the W^X path on Windows and other non-W^X platforms.
return false;
#endif
}
void ProtectMemoryPages(void* ptr, size_t size, uint32_t memProtFlags) {
void ProtectMemoryPages(const void* ptr, size_t size, uint32_t memProtFlags) {
INFO_LOG(JIT, "ProtectMemoryPages: %p (%d) : r%d w%d x%d", ptr, (int)size, (memProtFlags & MEM_PROT_READ) != 0, (memProtFlags & MEM_PROT_WRITE) != 0, (memProtFlags & MEM_PROT_EXEC) != 0);
if (PlatformIsWXExclusive()) {
if ((memProtFlags & (MEM_PROT_WRITE | MEM_PROT_EXEC)) == (MEM_PROT_WRITE | MEM_PROT_EXEC))
PanicAlert("Bad memory protect : W^X is in effect, can't both write and exec");
}
// Note - both VirtualProtect and mprotect will affect the full pages containing the requested range,
// so no need to round ptr and size down/up.
#ifdef _WIN32
uint32_t protect = ConvertProtFlagsWin32(memProtFlags);
DWORD oldValue;
if (!VirtualProtect(ptr, size, protect, &oldValue))
if (!VirtualProtect((void *)ptr, size, protect, &oldValue))
PanicAlert("WriteProtectMemory failed!\n%s", GetLastErrorMsg());
#elif defined(__SYMBIAN32__)
// Do nothing
#else
uint32_t protect = ConvertProtFlagsUnix(memProtFlags);
mprotect(ptr, size, protect);
uint32_t page_size = GetMemoryProtectPageSize();
uintptr_t start = (uintptr_t)ptr;
uintptr_t end = (uintptr_t)ptr + size;
start &= ~(page_size - 1);
end = (end + page_size - 1) & ~(page_size - 1);
INFO_LOG(JIT, "mprotect: %p to %p", (void *)start, (void *)end);
mprotect((void *)start, end - start, protect);
#endif
}

View File

@ -40,7 +40,7 @@ bool PlatformIsWXExclusive();
void* AllocateExecutableMemory(size_t size);
void* AllocateMemoryPages(size_t size, uint32_t memProtFlags);
// Note that on platforms returning PlatformIsWXExclusive, you cannot set a page to be both readable and writable at the same time.
void ProtectMemoryPages(void* ptr, size_t size, uint32_t memProtFlags);
void ProtectMemoryPages(const void* ptr, size_t size, uint32_t memProtFlags);
inline void ProtectMemoryPages(const void *start, const void *end, uint32_t memProtFlags) {
ProtectMemoryPages((void *)start, (const uint8_t *)end - (const uint8_t *)start, memProtFlags);
}

View File

@ -43,6 +43,7 @@ void ThunkManager::Init()
#endif
AllocCodeSpace(THUNK_ARENA_SIZE);
BeginWrite();
save_regs = GetCodePtr();
#ifdef _M_X64
for (int i = 2; i < ABI_GetNumXMMRegs(); i++)
@ -94,6 +95,7 @@ void ThunkManager::Init()
MOV(32, R(RDX), M(saved_gpr_state + 4 ));
#endif
RET();
EndWrite();
}
void ThunkManager::Reset()
@ -141,8 +143,7 @@ int ThunkManager::ThunkStackOffset()
#endif
}
const void *ThunkManager::ProtectFunction(const void *function, int num_params)
{
const void *ThunkManager::ProtectFunction(const void *function, int num_params) {
std::map<const void *, const u8 *>::iterator iter;
iter = thunks.find(function);
if (iter != thunks.end())
@ -150,6 +151,7 @@ const void *ThunkManager::ProtectFunction(const void *function, int num_params)
if (!region)
PanicAlert("Trying to protect functions before the emu is started. Bad bad bad.");
BeginWrite();
const u8 *call_point = GetCodePtr();
Enter(this, true);
@ -171,6 +173,7 @@ const void *ThunkManager::ProtectFunction(const void *function, int num_params)
Leave(this, true);
RET();
EndWrite();
thunks[function] = call_point;
return (const void *)call_point;

View File

@ -73,6 +73,7 @@ using namespace ArmJitConstants;
void ArmJit::GenerateFixedCode() {
const u8 *start = AlignCodePage();
BeginWrite();
// LR == SCRATCHREG2 on ARM32 so it needs to be pushed.
restoreRoundingMode = AlignCode16(); {
@ -280,9 +281,9 @@ void ArmJit::GenerateFixedCode() {
FlushLitPool();
FlushIcache();
// Freeze the dispatcher code
const void *end = AlignCodePage();
ProtectMemoryPages(start, end, MEM_PROT_READ | MEM_PROT_EXEC);
// Let's spare the pre-generated code from unprotect-reprotect.
AlignCodePage();
EndWrite();
}
} // namespace MIPSComp

View File

@ -201,11 +201,15 @@ void ArmJit::Compile(u32 em_address) {
ClearCache();
}
BeginWrite();
int block_num = blocks.AllocateBlock(em_address);
JitBlock *b = blocks.GetBlock(block_num);
DoJit(em_address, b);
blocks.FinalizeBlock(block_num, jo.enableBlocklink);
EndWrite();
bool cleanSlate = false;
if (js.hasSetRounding && !js.lastSetRounding) {
@ -417,6 +421,9 @@ void ArmJit::LinkBlock(u8 *exitPoint, const u8 *checkedEntry) {
}
void ArmJit::UnlinkBlock(u8 *checkedEntry, u32 originalAddress) {
if (PlatformIsWXExclusive()) {
ProtectMemoryPages(checkedEntry, 16, MEM_PROT_READ | MEM_PROT_WRITE);
}
// Send anyone who tries to run this block back to the dispatcher.
// Not entirely ideal, but .. pretty good.
// I hope there's enough space...
@ -426,6 +433,9 @@ void ArmJit::UnlinkBlock(u8 *checkedEntry, u32 originalAddress) {
emit.STR(R0, CTXREG, offsetof(MIPSState, pc));
emit.B(MIPSComp::jit->GetDispatcher());
emit.FlushIcache();
if (PlatformIsWXExclusive()) {
ProtectMemoryPages(checkedEntry, 16, MEM_PROT_READ | MEM_PROT_EXEC);
}
}
bool ArmJit::ReplaceJalTo(u32 dest) {

View File

@ -95,6 +95,8 @@ using namespace Arm64JitConstants;
void Arm64Jit::GenerateFixedCode(const JitOptions &jo) {
const u8 *start = AlignCodePage();
BeginWrite();
if (jo.useStaticAlloc) {
saveStaticRegisters = AlignCode16();
STR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount));
@ -316,9 +318,9 @@ void Arm64Jit::GenerateFixedCode(const JitOptions &jo) {
// Don't forget to zap the instruction cache! This must stay at the end of this function.
FlushIcache();
// Freeze the dispatcher code
const void *end = AlignCodePage();
ProtectMemoryPages(start, end, MEM_PROT_READ | MEM_PROT_EXEC);
// Let's spare the pre-generated code from unprotect-reprotect.
AlignCodePage();
EndWrite();
}
} // namespace MIPSComp

View File

@ -191,11 +191,15 @@ void Arm64Jit::Compile(u32 em_address) {
ClearCache();
}
BeginWrite();
int block_num = blocks.AllocateBlock(em_address);
JitBlock *b = blocks.GetBlock(block_num);
DoJit(em_address, b);
blocks.FinalizeBlock(block_num, jo.enableBlocklink);
EndWrite();
bool cleanSlate = false;
if (js.hasSetRounding && !js.lastSetRounding) {
@ -412,11 +416,19 @@ void Arm64Jit::UnlinkBlock(u8 *checkedEntry, u32 originalAddress) {
// Send anyone who tries to run this block back to the dispatcher.
// Not entirely ideal, but .. works.
// Spurious entrances from previously linked blocks can only come through checkedEntry
if (PlatformIsWXExclusive()) {
ProtectMemoryPages(checkedEntry, 16, MEM_PROT_READ | MEM_PROT_WRITE);
}
ARM64XEmitter emit(checkedEntry);
emit.MOVI2R(SCRATCH1, originalAddress);
emit.STR(INDEX_UNSIGNED, SCRATCH1, CTXREG, offsetof(MIPSState, pc));
emit.B(MIPSComp::jit->GetDispatcher());
emit.FlushIcache();
if (PlatformIsWXExclusive()) {
ProtectMemoryPages(checkedEntry, 16, MEM_PROT_READ | MEM_PROT_EXEC);
}
}
bool Arm64Jit::ReplaceJalTo(u32 dest) {

View File

@ -17,6 +17,7 @@
#include "Common/CPUDetect.h"
#include "Core/MIPS/JitCommon/JitState.h"
#include "Common/MemoryUtil.h"
namespace MIPSComp {
JitOptions::JitOptions() {
@ -40,7 +41,9 @@ namespace MIPSComp {
useASIMDVFPU = false; // true
// Common
enableBlocklink = true;
// We can get block linking to work with W^X by doing even more unprotect/re-protect, but let's try without first.
enableBlocklink = !PlatformIsWXExclusive();
immBranches = false;
continueBranches = false;
continueJumps = false;

View File

@ -64,6 +64,7 @@ void ImHere() {
void Jit::GenerateFixedCode(JitOptions &jo) {
const u8 *start = AlignCodePage();
BeginWrite();
restoreRoundingMode = AlignCode16(); {
STMXCSR(M(&mips_->temp));
@ -217,10 +218,9 @@ void Jit::GenerateFixedCode(JitOptions &jo) {
ABI_PopAllCalleeSavedRegsAndAdjustStack();
RET();
// Let's spare the pre-generated code from unprotect-reprotect.
endOfPregeneratedCode = AlignCodePage();
// Freeze the pre-generated code.
ProtectMemoryPages(start, endOfPregeneratedCode, MEM_PROT_READ | MEM_PROT_EXEC);
EndWrite();
}
} // namespace

View File

@ -290,16 +290,19 @@ void Jit::EatInstruction(MIPSOpcode op)
void Jit::Compile(u32 em_address)
{
PROFILE_THIS_SCOPE("jitc");
if (GetSpaceLeft() < 0x10000 || blocks.IsFull())
{
if (GetSpaceLeft() < 0x10000 || blocks.IsFull()) {
ClearCache();
}
BeginWrite();
int block_num = blocks.AllocateBlock(em_address);
JitBlock *b = blocks.GetBlock(block_num);
DoJit(em_address, b);
blocks.FinalizeBlock(block_num, jo.enableBlocklink);
EndWrite();
bool cleanSlate = false;
if (js.hasSetRounding && !js.lastSetRounding) {
@ -513,12 +516,18 @@ void Jit::LinkBlock(u8 *exitPoint, const u8 *checkedEntry) {
}
void Jit::UnlinkBlock(u8 *checkedEntry, u32 originalAddress) {
if (PlatformIsWXExclusive()) {
ProtectMemoryPages(checkedEntry, 16, MEM_PROT_READ | MEM_PROT_WRITE);
}
// Send anyone who tries to run this block back to the dispatcher.
// Not entirely ideal, but .. pretty good.
// Spurious entrances from previously linked blocks can only come through checkedEntry
XEmitter emit(checkedEntry);
emit.MOV(32, M(&mips_->pc), Imm32(originalAddress));
emit.JMP(MIPSComp::jit->GetDispatcher(), true);
if (PlatformIsWXExclusive()) {
ProtectMemoryPages(checkedEntry, 16, MEM_PROT_READ | MEM_PROT_EXEC);
}
}
bool Jit::ReplaceJalTo(u32 dest) {

View File

@ -163,6 +163,7 @@ static const JitLookup jitLookup[] = {
JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int32_t *jittedSize) {
dec_ = &dec;
BeginWrite();
const u8 *start = AlignCode16();
bool prescaleStep = false;
@ -313,6 +314,7 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
*/
*jittedSize = GetCodePtr() - start;
EndWrite();
return (JittedVertexDecoder)start;
}

View File

@ -143,7 +143,7 @@ static const JitLookup jitLookup[] = {
JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int32_t *jittedSize) {
dec_ = &dec;
BeginWrite();
const u8 *start = AlignCode16();
bool prescaleStep = false;
@ -300,7 +300,7 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
}
*jittedSize = GetCodePtr() - start;
EndWrite();
return (JittedVertexDecoder)start;
}

View File

@ -1402,7 +1402,6 @@ std::string VertexDecoder::GetString(DebugShaderStringType stringType) {
}
}
VertexDecoderJitCache::VertexDecoderJitCache()
#ifdef ARM64
: fp(this)

View File

@ -159,7 +159,8 @@ static const JitLookup jitLookup[] = {
JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int32_t *jittedSize) {
dec_ = &dec;
const u8 *start = this->GetCodePtr();
BeginWrite();
const u8 *start = this->AlignCode16();
#ifdef _M_IX86
// Store register values
@ -270,6 +271,7 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
RET();
*jittedSize = GetCodePtr() - start;
EndWrite();
return (JittedVertexDecoder)start;
}