Merge pull request #6148 from unknownbrackets/gpu-blocktransfer

Support memset and swizzling memcpy on framebuffers
This commit is contained in:
Henrik Rydgård 2014-05-27 17:43:22 +02:00
commit ac8c3b7892
18 changed files with 193 additions and 30 deletions

View File

@ -106,7 +106,7 @@ static int Replace_memcpy() {
u32 bytes = PARAM(2);
bool skip = false;
if (Memory::IsVRAMAddress(destPtr) || Memory::IsVRAMAddress(srcPtr)) {
skip = gpu->UpdateMemory(destPtr, srcPtr, bytes);
skip = gpu->PerformMemoryCopy(destPtr, srcPtr, bytes);
}
if (!skip && bytes != 0) {
u8 *dst = Memory::GetPointerUnchecked(destPtr);
@ -127,7 +127,7 @@ static int Replace_memcpy16() {
u32 bytes = PARAM(2) * 16;
bool skip = false;
if (Memory::IsVRAMAddress(destPtr) || Memory::IsVRAMAddress(srcPtr)) {
skip = gpu->UpdateMemory(destPtr, srcPtr, bytes);
skip = gpu->PerformMemoryCopy(destPtr, srcPtr, bytes);
}
if (!skip && bytes != 0) {
u8 *dst = Memory::GetPointerUnchecked(destPtr);
@ -142,13 +142,49 @@ static int Replace_memcpy16() {
return 10 + bytes / 4; // approximation
}
static int Replace_memcpy_swizzled() {
u32 destPtr = PARAM(0);
u32 srcPtr = PARAM(1);
u32 pitch = PARAM(2);
u32 h = PARAM(4);
if (Memory::IsVRAMAddress(srcPtr)) {
// Cheat a bit to force a download of the framebuffer.
// VRAM + 0x00400000 is simply a VRAM mirror.
gpu->PerformMemoryCopy(srcPtr ^ 0x00400000, srcPtr, pitch * h);
}
u8 *dstp = Memory::GetPointerUnchecked(destPtr);
const u8 *srcp = Memory::GetPointerUnchecked(srcPtr);
const u8 *ysrcp = srcp;
for (u32 y = 0; y < h; y += 8) {
const u8 *xsrcp = ysrcp;
for (u32 x = 0; x < pitch; x += 16) {
const u8 *src = xsrcp;
for (int n = 0; n < 8; ++n) {
memcpy(dstp, src, 16);
src += pitch;
dstp += 16;
}
xsrcp += 16;
}
ysrcp += 8 * pitch;
}
RETURN(0);
#ifndef MOBILE_DEVICE
CBreakPoints::ExecMemCheck(srcPtr, false, pitch * h, currentMIPS->pc);
CBreakPoints::ExecMemCheck(destPtr, true, pitch * h, currentMIPS->pc);
#endif
return 10 + (pitch * h) / 4; // approximation
}
static int Replace_memmove() {
u32 destPtr = PARAM(0);
u32 srcPtr = PARAM(1);
u32 bytes = PARAM(2);
bool skip = false;
if (Memory::IsVRAMAddress(destPtr) || Memory::IsVRAMAddress(srcPtr)) {
skip = gpu->UpdateMemory(destPtr, srcPtr, bytes);
skip = gpu->PerformMemoryCopy(destPtr, srcPtr, bytes);
}
if (!skip && bytes != 0) {
u8 *dst = Memory::GetPointerUnchecked(destPtr);
@ -169,8 +205,8 @@ static int Replace_memset() {
u8 value = PARAM(1);
u32 bytes = PARAM(2);
bool skip = false;
if (Memory::IsVRAMAddress(destPtr) || Memory::IsVRAMAddress(destPtr)) {
skip = gpu->UpdateMemory(destPtr, destPtr, bytes);
if (Memory::IsVRAMAddress(destPtr)) {
skip = gpu->PerformMemorySet(destPtr, value, bytes);
}
if (!skip) {
memset(dst, value, bytes);
@ -423,6 +459,7 @@ static const ReplacementTableEntry entries[] = {
{ "ceilf", &Replace_ceilf, 0, 0},
{ "memcpy", &Replace_memcpy, 0, 0},
{ "memcpy16", &Replace_memcpy16, 0, 0},
{ "memcpy_swizzled", &Replace_memcpy_swizzled, 0, 0},
{ "memmove", &Replace_memmove, 0, 0},
{ "memset", &Replace_memset, 0, 0},
{ "strlen", &Replace_strlen, 0, 0},
@ -523,6 +560,27 @@ void RestoreReplacedInstructions(u32 startAddr, u32 endAddr) {
replacedInstructions.erase(start, end);
}
std::map<u32, u32> SaveAndClearReplacements() {
std::map<u32, u32> saved;
for (auto it = replacedInstructions.begin(), end = replacedInstructions.end(); it != end; ++it) {
const u32 addr = it->first;
const u32 curInstr = Memory::Read_U32(addr);
if (MIPS_IS_REPLACEMENT(curInstr)) {
saved[addr] = curInstr;
Memory::Write_U32(it->second, addr);
}
}
return saved;
}
void RestoreSavedReplacements(const std::map<u32, u32> &saved) {
for (auto it = saved.begin(), end = saved.end(); it != end; ++it) {
const u32 addr = it->first;
// Just put the replacements back.
Memory::Write_U32(it->second, addr);
}
}
bool GetReplacedOpAt(u32 address, u32 *op) {
u32 instr = Memory::Read_Opcode_JIT(address).encoding;
if (MIPS_IS_REPLACEMENT(instr)) {

View File

@ -61,3 +61,7 @@ void WriteReplaceInstruction(u32 address, u64 hash, int size);
void RestoreReplacedInstruction(u32 address);
void RestoreReplacedInstructions(u32 startAddr, u32 endAddr);
bool GetReplacedOpAt(u32 address, u32 *op);
// For savestates. If you call SaveAndClearReplacements(), you must call RestoreSavedReplacements().
std::map<u32, u32> SaveAndClearReplacements();
void RestoreSavedReplacements(const std::map<u32, u32> &saved);

View File

@ -50,7 +50,7 @@ int __DmacMemcpy(u32 dst, u32 src, u32 size) {
bool skip = false;
if (Memory::IsVRAMAddress(src) || Memory::IsVRAMAddress(dst)) {
skip = gpu->UpdateMemory(dst, src, size);
skip = gpu->PerformMemoryCopy(dst, src, size);
}
if (!skip) {
Memory::Memcpy(dst, Memory::GetPointer(src), size);

View File

@ -552,7 +552,13 @@ u32 sceKernelMemset(u32 addr, u32 fillc, u32 n)
{
u8 c = fillc & 0xff;
DEBUG_LOG(SCEINTC, "sceKernelMemset(ptr = %08x, c = %02x, n = %08x)", addr, c, n);
Memory::Memset(addr, c, n);
bool skip = false;
if (Memory::IsVRAMAddress(addr)) {
skip = gpu->PerformMemorySet(addr, fillc, n);
}
if (!skip) {
Memory::Memset(addr, c, n);
}
return addr;
}
@ -562,7 +568,7 @@ u32 sceKernelMemcpy(u32 dst, u32 src, u32 size)
bool skip = false;
if (Memory::IsVRAMAddress(src) || Memory::IsVRAMAddress(dst)) {
skip = gpu->UpdateMemory(dst, src, size);
skip = gpu->PerformMemoryCopy(dst, src, size);
}
// Technically should crash if these are invalid and size > 0...

View File

@ -220,6 +220,7 @@ static const HardHashTableEntry hardcodedHashes[] = {
{ 0x6301fa5149bd973a, 120, "wcscat", },
{ 0x658b07240a690dbd, 36, "strlen", },
{ 0x66122f0ab50b2ef9, 296, "dl_write_dither_matrix_5", },
{ 0x66f7f1beccbc104a, 256, "memcpy_swizzled", }, // God Eater 2
{ 0x679e647e34ecf7f1, 132, "roundf", },
{ 0x67afe74d9ec72f52, 4380, "_strtod_r", },
{ 0x68b22c2aa4b8b915, 400, "sqrt", },
@ -738,6 +739,7 @@ skip:
bool looking = false;
bool end = false;
bool isStraightLeaf = true;
bool decreasedSp = false;
u32 addr;
u32 addrNextSym = 0;
@ -761,6 +763,8 @@ skip:
furthestBranch = 0;
looking = false;
end = false;
isStraightLeaf = false;
decreasedSp = false;
continue;
}
@ -771,6 +775,7 @@ skip:
if (target > furthestBranch) {
furthestBranch = target;
}
// j X
} else if ((op & 0xFC000000) == 0x08000000) {
u32 sureTarget = GetJumpTarget(addr);
// Check for a tail call. Might not even have a jr ra.
@ -782,6 +787,18 @@ skip:
end = true;
}
} else if (sureTarget != INVALIDTARGET && sureTarget > addr && sureTarget > furthestBranch) {
static const u32 MAX_JUMP_FORWARD = 128;
// If it's a nearby forward jump, and not a stackless leaf, assume not a tail call.
if (sureTarget <= addr + MAX_JUMP_FORWARD && decreasedSp) {
// But let's check the delay slot.
MIPSOpcode op = Memory::Read_Instruction(addr + 4);
// addiu sp, sp, +X
if ((op & 0xFFFF8000) != 0x27BD0000) {
furthestBranch = sureTarget;
continue;
}
}
// A jump later. Probably tail, but let's check if it jumps back.
u32 knownEnd = furthestBranch == 0 ? addr : furthestBranch;
u32 jumpback = ScanAheadForJumpback(sureTarget, currentFunction.start, knownEnd);
@ -806,6 +823,14 @@ skip:
end = true;
}
}
// addiu sp, sp, -X
if ((op & 0xFFFF8000) == 0x27BD8000) {
decreasedSp = true;
}
// addiu sp, sp, +X
if ((op & 0xFFFF8000) == 0x27BD0000) {
decreasedSp = false;
}
if (looking) {
if (addr >= furthestBranch) {
@ -838,6 +863,7 @@ skip:
looking = false;
end = false;
isStraightLeaf = true;
decreasedSp = false;
currentFunction.start = addr+4;
}
}

View File

@ -33,6 +33,7 @@
#include "Core/FileSystems/MetaFileSystem.h"
#include "Core/ELF/ParamSFO.h"
#include "Core/HLE/HLE.h"
#include "Core/HLE/ReplaceTables.h"
#include "Core/HLE/sceKernel.h"
#include "Core/MemMap.h"
#include "Core/MIPS/MIPS.h"
@ -226,10 +227,12 @@ namespace SaveState
// Memory is a bit tricky when jit is enabled, since there's emuhacks in it.
if (MIPSComp::jit && p.mode == p.MODE_WRITE)
{
auto blocks = MIPSComp::jit->GetBlockCache();
auto saved = blocks->SaveAndClearEmuHackOps();
auto blockCache = MIPSComp::jit->GetBlockCache();
auto savedReplacements = SaveAndClearReplacements();
auto savedBlocks = blockCache->SaveAndClearEmuHackOps();
Memory::DoState(p);
blocks->RestoreSavedEmuHackOps(saved);
blockCache->RestoreSavedEmuHackOps(savedBlocks);
RestoreSavedReplacements(savedReplacements);
}
else
Memory::DoState(p);

View File

@ -1315,7 +1315,12 @@ void DIRECTX9_GPU::InvalidateCacheInternal(u32 addr, int size, GPUInvalidationTy
framebufferManager_.UpdateFromMemory(addr, size);
}
bool DIRECTX9_GPU::UpdateMemory(u32 dest, u32 src, int size) {
bool DIRECTX9_GPU::PerformMemoryCopy(u32 dest, u32 src, int size) {
InvalidateCache(dest, size, GPU_INVALIDATE_HINT);
return false;
}
bool DIRECTX9_GPU::PerformMemorySet(u32 dest, u8 v, int size) {
InvalidateCache(dest, size, GPU_INVALIDATE_HINT);
return false;
}

View File

@ -46,7 +46,8 @@ public:
virtual void BeginFrame();
virtual void UpdateStats();
virtual void InvalidateCache(u32 addr, int size, GPUInvalidationType type);
virtual bool UpdateMemory(u32 dest, u32 src, int size);
virtual bool PerformMemoryCopy(u32 dest, u32 src, int size);
virtual bool PerformMemorySet(u32 dest, u8 v, int size);
virtual void ClearCacheNextFrame();
virtual void DeviceLost(); // Only happens on Android. Drop all textures and shaders.

View File

@ -1,4 +1,4 @@
// Copyright (c) 2012- PPSSPP Project.
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
@ -1851,7 +1851,7 @@ void FramebufferManager::UpdateFromMemory(u32 addr, int size, bool safe) {
}
}
bool FramebufferManager::NotifyFramebufferCopy(u32 src, u32 dst, int size) {
bool FramebufferManager::NotifyFramebufferCopy(u32 src, u32 dst, int size, bool isMemset) {
if (!useBufferedRendering_ || updateVRAM_) {
return false;
}
@ -1882,7 +1882,7 @@ bool FramebufferManager::NotifyFramebufferCopy(u32 src, u32 dst, int size) {
// TODO: Do ReadFramebufferToMemory etc where applicable.
// This will slow down MotoGP but make the hack above unnecessary.
if (dstBuffer && srcBuffer) {
if (dstBuffer && srcBuffer && !isMemset) {
if (srcBuffer == dstBuffer) {
WARN_LOG_REPORT_ONCE(dstsrccpy, G3D, "Intra-buffer memcpy (not supported) %08x -> %08x", src, dst);
} else {

View File

@ -211,7 +211,7 @@ public:
}
inline bool ShouldDownloadFramebuffer(const VirtualFramebuffer *vfb) const;
bool NotifyFramebufferCopy(u32 src, u32 dest, int size);
bool NotifyFramebufferCopy(u32 src, u32 dest, int size, bool isMemset = false);
void DestroyFramebuf(VirtualFramebuffer *vfb);

View File

@ -662,7 +662,11 @@ void GLES_GPU::ProcessEvent(GPUEvent ev) {
break;
case GPU_EVENT_FB_MEMCPY:
UpdateMemoryInternal(ev.fb_memcpy.dst, ev.fb_memcpy.src, ev.fb_memcpy.size);
PerformMemoryCopyInternal(ev.fb_memcpy.dst, ev.fb_memcpy.src, ev.fb_memcpy.size);
break;
case GPU_EVENT_FB_MEMSET:
PerformMemorySetInternal(ev.fb_memset.dst, ev.fb_memset.v, ev.fb_memset.size);
break;
default:
@ -1968,16 +1972,20 @@ void GLES_GPU::InvalidateCacheInternal(u32 addr, int size, GPUInvalidationType t
}
}
void GLES_GPU::UpdateMemoryInternal(u32 dest, u32 src, int size) {
void GLES_GPU::PerformMemoryCopyInternal(u32 dest, u32 src, int size) {
if (!framebufferManager_.NotifyFramebufferCopy(src, dest, size)) {
Memory::Memcpy(dest, Memory::GetPointer(src), size);
InvalidateCache(dest, size, GPU_INVALIDATE_HINT);
} else {
}
InvalidateCache(dest, size, GPU_INVALIDATE_HINT);
}
void GLES_GPU::PerformMemorySetInternal(u32 dest, u8 v, int size) {
if (!framebufferManager_.NotifyFramebufferCopy(dest, dest, size, true)) {
InvalidateCache(dest, size, GPU_INVALIDATE_HINT);
}
}
bool GLES_GPU::UpdateMemory(u32 dest, u32 src, int size) {
bool GLES_GPU::PerformMemoryCopy(u32 dest, u32 src, int size) {
// Track stray copies of a framebuffer in RAM. MotoGP does this.
if (framebufferManager_.MayIntersectFramebuffer(src) || framebufferManager_.MayIntersectFramebuffer(dest)) {
if (IsOnSeparateCPUThread()) {
@ -1990,7 +1998,7 @@ bool GLES_GPU::UpdateMemory(u32 dest, u32 src, int size) {
// This is a memcpy, so we need to wait for it to complete.
SyncThread();
} else {
UpdateMemoryInternal(dest, src, size);
PerformMemoryCopyInternal(dest, src, size);
}
return true;
}
@ -1999,6 +2007,30 @@ bool GLES_GPU::UpdateMemory(u32 dest, u32 src, int size) {
return false;
}
bool GLES_GPU::PerformMemorySet(u32 dest, u8 v, int size) {
// This may indicate a memset, usually to 0, of a framebuffer.
if (framebufferManager_.MayIntersectFramebuffer(dest)) {
Memory::Memset(dest, v, size);
if (IsOnSeparateCPUThread()) {
GPUEvent ev(GPU_EVENT_FB_MEMSET);
ev.fb_memset.dst = dest;
ev.fb_memset.v = v;
ev.fb_memset.size = size;
ScheduleEvent(ev);
// We don't need to wait for the framebuffer to be updated.
} else {
PerformMemorySetInternal(dest, v, size);
}
return true;
}
// Or perhaps a texture, let's invalidate.
InvalidateCache(dest, size, GPU_INVALIDATE_HINT);
return false;
}
void GLES_GPU::ClearCacheNextFrame() {
textureCache_.ClearNextFrame();
}

View File

@ -44,7 +44,8 @@ public:
virtual void BeginFrame();
virtual void UpdateStats();
virtual void InvalidateCache(u32 addr, int size, GPUInvalidationType type);
virtual bool UpdateMemory(u32 dest, u32 src, int size);
virtual bool PerformMemoryCopy(u32 dest, u32 src, int size);
virtual bool PerformMemorySet(u32 dest, u8 v, int size);
virtual void ClearCacheNextFrame();
virtual void DeviceLost(); // Only happens on Android. Drop all textures and shaders.
@ -151,7 +152,8 @@ private:
void InitClearInternal();
void BeginFrameInternal();
void CopyDisplayToOutputInternal();
void UpdateMemoryInternal(u32 dest, u32 src, int size);
void PerformMemoryCopyInternal(u32 dest, u32 src, int size);
void PerformMemorySetInternal(u32 dest, u8 v, int size);
void InvalidateCacheInternal(u32 addr, int size, GPUInvalidationType type);
static CommandInfo cmdInfo_[256];

View File

@ -164,6 +164,7 @@ enum GPUEventType {
GPU_EVENT_FINISH_EVENT_LOOP,
GPU_EVENT_SYNC_THREAD,
GPU_EVENT_FB_MEMCPY,
GPU_EVENT_FB_MEMSET,
};
struct GPUEvent {
@ -182,6 +183,12 @@ struct GPUEvent {
u32 src;
int size;
} fb_memcpy;
// GPU_EVENT_FB_MEMSET
struct {
u32 dst;
u8 v;
int size;
} fb_memset;
};
operator GPUEventType() const {
@ -234,7 +241,8 @@ public:
// If size = -1, invalidate everything.
virtual void InvalidateCache(u32 addr, int size, GPUInvalidationType type) = 0;
// Update either RAM from VRAM, or VRAM from RAM... or even VRAM from VRAM.
virtual bool UpdateMemory(u32 dest, u32 src, int size) = 0;
virtual bool PerformMemoryCopy(u32 dest, u32 src, int size) = 0;
virtual bool PerformMemorySet(u32 dest, u8 v, int size) = 0;
// Will cause the texture cache to be cleared at the start of the next frame.
virtual void ClearCacheNextFrame() = 0;

View File

@ -657,7 +657,13 @@ void NullGPU::InvalidateCache(u32 addr, int size, GPUInvalidationType type) {
// Nothing to invalidate.
}
bool NullGPU::UpdateMemory(u32 dest, u32 src, int size) {
bool NullGPU::PerformMemoryCopy(u32 dest, u32 src, int size) {
// Nothing to update.
InvalidateCache(dest, size, GPU_INVALIDATE_HINT);
return false;
}
bool NullGPU::PerformMemorySet(u32 dest, u8 v, int size) {
// Nothing to update.
InvalidateCache(dest, size, GPU_INVALIDATE_HINT);
return false;

View File

@ -34,7 +34,8 @@ public:
virtual void CopyDisplayToOutput() {}
virtual void UpdateStats();
virtual void InvalidateCache(u32 addr, int size, GPUInvalidationType type);
virtual bool UpdateMemory(u32 dest, u32 src, int size);
virtual bool PerformMemoryCopy(u32 dest, u32 src, int size);
virtual bool PerformMemorySet(u32 dest, u8 v, int size);
virtual void ClearCacheNextFrame() {};
virtual void DeviceLost() {}

View File

@ -851,7 +851,16 @@ void SoftGPU::InvalidateCache(u32 addr, int size, GPUInvalidationType type)
// Nothing to invalidate.
}
bool SoftGPU::UpdateMemory(u32 dest, u32 src, int size)
bool SoftGPU::PerformMemoryCopy(u32 dest, u32 src, int size)
{
// Nothing to update.
InvalidateCache(dest, size, GPU_INVALIDATE_HINT);
// Let's just be safe.
framebufferDirty_ = true;
return false;
}
bool SoftGPU::PerformMemorySet(u32 dest, u8 v, int size)
{
// Nothing to update.
InvalidateCache(dest, size, GPU_INVALIDATE_HINT);

View File

@ -59,7 +59,8 @@ public:
virtual void CopyDisplayToOutput();
virtual void UpdateStats();
virtual void InvalidateCache(u32 addr, int size, GPUInvalidationType type);
virtual bool UpdateMemory(u32 dest, u32 src, int size);
virtual bool PerformMemoryCopy(u32 dest, u32 src, int size);
virtual bool PerformMemorySet(u32 dest, u8 v, int size);
virtual void ClearCacheNextFrame() {};
virtual void DeviceLost() {}

View File

@ -140,6 +140,7 @@
6301fa5149bd973a:120 = wcscat
658b07240a690dbd:36 = strlen
66122f0ab50b2ef9:296 = dl_write_dither_matrix_5
66f7f1beccbc104a:256 = memcpy_swizzled
679e647e34ecf7f1:132 = roundf
67afe74d9ec72f52:4380 = _strtod_r
68b22c2aa4b8b915:400 = sqrt