Merge pull request #17328 from hrydgard/jit-memory-refactors

Some minor memory management refactoring
This commit is contained in:
Henrik Rydgård 2023-04-24 13:07:49 +02:00 committed by GitHub
commit 22314f8cc7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 49 additions and 70 deletions

View File

@ -136,7 +136,7 @@ public:
// Call this when shutting down. Don't rely on the destructor, even though it'll do the job.
void FreeCodeSpace() {
ProtectMemoryPages(region, region_size, MEM_PROT_READ | MEM_PROT_WRITE);
FreeMemoryPages(region, region_size);
FreeExecutableMemory(region, region_size);
region = nullptr;
writableRegion = nullptr;
region_size = 0;

View File

@ -30,15 +30,11 @@ template <class T, int N>
class FixedSizeQueue {
public:
FixedSizeQueue() {
// Allocate aligned memory, just because.
//int sizeInBytes = N * sizeof(T);
//storage_ = (T *)AllocateMemoryPages(sizeInBytes);
storage_ = new T[N];
clear();
}
~FixedSizeQueue() {
// FreeMemoryPages((void *)storage_, N * sizeof(T));
delete [] storage_;
}

View File

@ -118,10 +118,11 @@ static void *SearchForFreeMem(size_t size) {
}
#endif
#if PPSSPP_PLATFORM(WINDOWS)
// This is purposely not a full wrapper for virtualalloc/mmap, but it
// provides exactly the primitive operations that PPSSPP needs.
void *AllocateExecutableMemory(size_t size) {
#if defined(_WIN32)
void *ptr = nullptr;
DWORD prot = PAGE_EXECUTE_READWRITE;
if (PlatformIsWXExclusive())
@ -157,11 +158,21 @@ void *AllocateExecutableMemory(size_t size) {
ptr = VirtualAlloc(0, size, MEM_RESERVE | MEM_COMMIT, prot);
#endif
}
#else
static char *map_hint = 0;
if (!ptr) {
ERROR_LOG(MEMMAP, "Failed to allocate executable memory (%d)", (int)size);
}
return ptr;
}
#else // Non-Windows platforms
void *AllocateExecutableMemory(size_t size) {
static char *map_hint = nullptr;
#if PPSSPP_ARCH(AMD64)
// Try to request one that is close to our memory location if we're in high memory.
// We use a dummy global variable to give us a good location to start from.
// TODO: Should we also do this for ARM64?
if (!map_hint) {
if ((uintptr_t) &hint_location > 0xFFFFFFFFULL)
map_hint = (char*)ppsspp_round_page(&hint_location) - 0x20000000; // 0.5gb lower than our approximate location
@ -177,20 +188,13 @@ void *AllocateExecutableMemory(size_t size) {
prot = PROT_READ | PROT_WRITE; // POST_EXEC is added later in this case.
void* ptr = mmap(map_hint, size, prot, MAP_ANON | MAP_PRIVATE, -1, 0);
#endif /* defined(_WIN32) */
#if !defined(_WIN32)
static const void *failed_result = MAP_FAILED;
#else
static const void *failed_result = nullptr;
#endif
if (ptr == failed_result) {
if (ptr == MAP_FAILED) {
ptr = nullptr;
ERROR_LOG(MEMMAP, "Failed to allocate executable memory (%d) errno=%d", (int)size, errno);
}
#if PPSSPP_ARCH(AMD64) && !defined(_WIN32)
#if PPSSPP_ARCH(AMD64)
else if ((uintptr_t)map_hint <= 0xFFFFFFFF) {
// Round up if we're below 32-bit mark, probably allocating sequentially.
map_hint += ppsspp_round_page(size);
@ -202,9 +206,12 @@ void *AllocateExecutableMemory(size_t size) {
}
}
#endif
return ptr;
}
#endif // non-windows
void *AllocateMemoryPages(size_t size, uint32_t memProtFlags) {
#ifdef _WIN32
if (sys_info.dwPageSize == 0)
@ -268,6 +275,10 @@ void FreeMemoryPages(void *ptr, size_t size) {
#endif
}
void FreeExecutableMemory(void *ptr, size_t size) {
FreeMemoryPages(ptr, size);
}
void FreeAlignedMemory(void* ptr) {
if (!ptr)
return;

View File

@ -31,9 +31,11 @@ bool PlatformIsWXExclusive();
// Note that some platforms go through special contortions to allocate executable memory. So for memory
// that's intended for execution, allocate it first using AllocateExecutableMemory, then modify protection as desired.
// AllocateMemoryPages is simpler and more generic. Note that on W^X platforms, this will return executable but not writable
// memory!
// AllocateMemoryPages is simpler and more generic.
// Note that on W^X platforms, this will return writable memory that can later be changed to executable!
void* AllocateExecutableMemory(size_t size);
void FreeExecutableMemory(void *ptr, size_t size);
void* AllocateMemoryPages(size_t size, uint32_t memProtFlags);
// Note that on platforms returning PlatformIsWXExclusive, you cannot set a page to be both readable and writable at the same time.
bool ProtectMemoryPages(const void* ptr, size_t size, uint32_t memProtFlags);
@ -45,24 +47,24 @@ void FreeAlignedMemory(void* ptr);
int GetMemoryProtectPageSize();
// A simple buffer that bypasses the libc memory allocator. As a result the buffer is always page-aligned.
template <typename T>
class SimpleBuf {
// A buffer that uses aligned memory. Can be useful for image processing.
template <typename T, size_t A>
class AlignedVector {
public:
SimpleBuf() : buf_(0), size_(0) {}
AlignedVector() : buf_(0), size_(0) {}
SimpleBuf(size_t size) : buf_(0) {
AlignedVector(size_t size) : buf_(0) {
resize(size);
}
SimpleBuf(const SimpleBuf &o) : buf_(o.buf_), size_(o.size_) {}
AlignedVector(const AlignedVector &o) : buf_(o.buf_), size_(o.size_) {}
// Move constructor
SimpleBuf(SimpleBuf &&o) noexcept : buf_(o.buf_), size_(o.size_) { o.buf_ = nullptr; o.size_ = 0; }
AlignedVector(AlignedVector &&o) noexcept : buf_(o.buf_), size_(o.size_) { o.buf_ = nullptr; o.size_ = 0; }
~SimpleBuf() {
~AlignedVector() {
if (buf_ != 0) {
FreeMemoryPages(buf_, size_ * sizeof(T));
FreeAlignedMemory(buf_);
}
}
@ -74,9 +76,9 @@ public:
void resize(size_t size) {
if (size_ < size) {
if (buf_ != 0) {
FreeMemoryPages(buf_, size_ * sizeof(T));
FreeAlignedMemory(buf_);
}
buf_ = (T *)AllocateMemoryPages(size * sizeof(T), MEM_PROT_READ | MEM_PROT_WRITE);
buf_ = (T *)AllocateAlignedMemory(size * sizeof(T), A);
size_ = size;
}
}

View File

@ -41,9 +41,13 @@ DrawEngineCommon::DrawEngineCommon() : decoderMap_(16) {
}
transformed = (TransformedVertex *)AllocateMemoryPages(TRANSFORMED_VERTEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
transformedExpanded = (TransformedVertex *)AllocateMemoryPages(3 * TRANSFORMED_VERTEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
decoded = (u8 *)AllocateMemoryPages(DECODED_VERTEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
decIndex = (u16 *)AllocateMemoryPages(DECODED_INDEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
}
DrawEngineCommon::~DrawEngineCommon() {
FreeMemoryPages(decoded, DECODED_VERTEX_BUFFER_SIZE);
FreeMemoryPages(decIndex, DECODED_INDEX_BUFFER_SIZE);
FreeMemoryPages(transformed, TRANSFORMED_VERTEX_BUFFER_SIZE);
FreeMemoryPages(transformedExpanded, 3 * TRANSFORMED_VERTEX_BUFFER_SIZE);
delete decJitCache_;

View File

@ -482,8 +482,8 @@ protected:
};
std::vector<VideoInfo> videos_;
SimpleBuf<u32> tmpTexBuf32_;
SimpleBuf<u32> tmpTexBufRearrange_;
AlignedVector<u32, 16> tmpTexBuf32_;
AlignedVector<u32, 16> tmpTexBufRearrange_;
TexCacheEntry *nextTexture_ = nullptr;
bool failedTexture_ = false;

View File

@ -50,5 +50,5 @@ protected:
// depending on the factor and texture sizes, these can get pretty large
// maximum is (100 MB total for a 512 by 512 texture with scaling factor 5 and hybrid scaling)
// of course, scaling factor 5 is totally silly anyway
SimpleBuf<u32> bufDeposter, bufOutput, bufTmp1, bufTmp2, bufTmp3;
AlignedVector<u32, 16> bufDeposter, bufOutput, bufTmp1, bufTmp2, bufTmp3;
};

View File

@ -88,9 +88,6 @@ DrawEngineD3D11::DrawEngineD3D11(Draw::DrawContext *draw, ID3D11Device *device,
// Allocate nicely aligned memory. Maybe graphics drivers will
// appreciate it.
// All this is a LOT of memory, need to see if we can cut down somehow.
decoded = (u8 *)AllocateMemoryPages(DECODED_VERTEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
decIndex = (u16 *)AllocateMemoryPages(DECODED_INDEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
indexGen.Setup(decIndex);
InitDeviceObjects();
@ -102,8 +99,6 @@ DrawEngineD3D11::DrawEngineD3D11(Draw::DrawContext *draw, ID3D11Device *device,
DrawEngineD3D11::~DrawEngineD3D11() {
DestroyDeviceObjects();
FreeMemoryPages(decoded, DECODED_VERTEX_BUFFER_SIZE);
FreeMemoryPages(decIndex, DECODED_INDEX_BUFFER_SIZE);
}
void DrawEngineD3D11::InitDeviceObjects() {

View File

@ -91,11 +91,6 @@ DrawEngineDX9::DrawEngineDX9(Draw::DrawContext *draw) : draw_(draw), vai_(256),
decOptions_.expand8BitNormalsToFloat = true;
decimationCounter_ = VERTEXCACHE_DECIMATION_INTERVAL;
// Allocate nicely aligned memory. Maybe graphics drivers will
// appreciate it.
// All this is a LOT of memory, need to see if we can cut down somehow.
decoded = (u8 *)AllocateMemoryPages(DECODED_VERTEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
decIndex = (u16 *)AllocateMemoryPages(DECODED_INDEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
indexGen.Setup(decIndex);
@ -113,8 +108,6 @@ DrawEngineDX9::~DrawEngineDX9() {
}
DestroyDeviceObjects();
FreeMemoryPages(decoded, DECODED_VERTEX_BUFFER_SIZE);
FreeMemoryPages(decIndex, DECODED_INDEX_BUFFER_SIZE);
vertexDeclMap_.Iterate([&](const uint32_t &key, IDirect3DVertexDeclaration9 *decl) {
if (decl) {
decl->Release();

View File

@ -67,12 +67,6 @@ DrawEngineGLES::DrawEngineGLES(Draw::DrawContext *draw) : inputLayoutMap_(16), d
decOptions_.expandAllWeightsToFloat = false;
decOptions_.expand8BitNormalsToFloat = false;
// Allocate nicely aligned memory. Maybe graphics drivers will
// appreciate it.
// All this is a LOT of memory, need to see if we can cut down somehow.
decoded = (u8 *)AllocateMemoryPages(DECODED_VERTEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
decIndex = (u16 *)AllocateMemoryPages(DECODED_INDEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
indexGen.Setup(decIndex);
InitDeviceObjects();
@ -83,8 +77,6 @@ DrawEngineGLES::DrawEngineGLES(Draw::DrawContext *draw) : inputLayoutMap_(16), d
DrawEngineGLES::~DrawEngineGLES() {
DestroyDeviceObjects();
FreeMemoryPages(decoded, DECODED_VERTEX_BUFFER_SIZE);
FreeMemoryPages(decIndex, DECODED_INDEX_BUFFER_SIZE);
delete tessDataTransferGLES;
}

View File

@ -41,14 +41,14 @@
#define TRANSFORM_BUF_SIZE (65536 * 48)
TransformUnit::TransformUnit() {
decoded_ = (u8 *)AllocateMemoryPages(TRANSFORM_BUF_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
decoded_ = (u8 *)AllocateAlignedMemory(TRANSFORM_BUF_SIZE, 16);
if (!decoded_)
return;
binner_ = new BinManager();
}
TransformUnit::~TransformUnit() {
FreeMemoryPages(decoded_, TRANSFORM_BUF_SIZE);
FreeAlignedMemory(decoded_);
delete binner_;
}
@ -57,16 +57,10 @@ bool TransformUnit::IsStarted() {
}
SoftwareDrawEngine::SoftwareDrawEngine() {
// All this is a LOT of memory, need to see if we can cut down somehow. Used for splines.
decoded = (u8 *)AllocateMemoryPages(DECODED_VERTEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
decIndex = (u16 *)AllocateMemoryPages(DECODED_INDEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
flushOnParams_ = false;
}
SoftwareDrawEngine::~SoftwareDrawEngine() {
FreeMemoryPages(decoded, DECODED_VERTEX_BUFFER_SIZE);
FreeMemoryPages(decIndex, DECODED_INDEX_BUFFER_SIZE);
}
SoftwareDrawEngine::~SoftwareDrawEngine() {}
void SoftwareDrawEngine::NotifyConfigChanged() {
DrawEngineCommon::NotifyConfigChanged();

View File

@ -75,11 +75,6 @@ DrawEngineVulkan::DrawEngineVulkan(Draw::DrawContext *draw)
decOptions_.alignOutputToWord = true;
#endif
// Allocate nicely aligned memory. Maybe graphics drivers will appreciate it.
// All this is a LOT of memory, need to see if we can cut down somehow.
decoded = (u8 *)AllocateMemoryPages(DECODED_VERTEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
decIndex = (u16 *)AllocateMemoryPages(DECODED_INDEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
indexGen.Setup(decIndex);
}
@ -214,9 +209,6 @@ void DrawEngineVulkan::InitDeviceObjects() {
}
DrawEngineVulkan::~DrawEngineVulkan() {
FreeMemoryPages(decoded, DECODED_VERTEX_BUFFER_SIZE);
FreeMemoryPages(decIndex, DECODED_INDEX_BUFFER_SIZE);
DestroyDeviceObjects();
}