Merge branch 'hrydgard:master' into feature_openxr_6dof

This commit is contained in:
Luboš Vonásek 2022-08-24 06:55:31 +02:00 committed by GitHub
commit c5cb45b1f7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
117 changed files with 2706 additions and 2221 deletions

View File

@ -426,6 +426,15 @@ if(WIN32)
endif()
set(CommonJIT
Core/MIPS/JitCommon/JitCommon.cpp
Core/MIPS/JitCommon/JitCommon.h
Core/MIPS/JitCommon/JitBlockCache.cpp
Core/MIPS/JitCommon/JitBlockCache.h
Core/MIPS/JitCommon/JitState.cpp
Core/MIPS/JitCommon/JitState.h
)
set(CommonX86
Common/ABI.cpp
Common/ABI.h
@ -464,6 +473,7 @@ set(CommonMIPS
source_group(MIPS FILES ${CommonMIPS})
set(CommonRISCV64
${CommonJIT}
Common/RiscVCPUDetect.cpp
Core/MIPS/fake/FakeJit.cpp
Core/MIPS/fake/FakeJit.h
@ -1530,8 +1540,8 @@ set(GPU_SOURCES
${GPU_NEON}
GPU/Common/Draw2D.cpp
GPU/Common/Draw2D.h
GPU/Common/DepalettizeCommon.cpp
GPU/Common/DepalettizeCommon.h
GPU/Common/TextureShaderCommon.cpp
GPU/Common/TextureShaderCommon.h
GPU/Common/DepalettizeShaderCommon.cpp
GPU/Common/DepalettizeShaderCommon.h
GPU/Common/FragmentShaderGenerator.cpp
@ -1630,6 +1640,7 @@ set(GPU_SOURCES
# SHARED on ANDROID, STATIC everywhere else
add_library(${CoreLibName} ${CoreLinkType}
${CoreExtra}
${CommonJIT}
Core/Config.cpp
Core/Config.h
Core/ConfigValues.h
@ -1937,12 +1948,6 @@ add_library(${CoreLibName} ${CoreLinkType}
Core/FileLoaders/RamCachingFileLoader.h
Core/FileLoaders/RetryingFileLoader.cpp
Core/FileLoaders/RetryingFileLoader.h
Core/MIPS/JitCommon/JitCommon.cpp
Core/MIPS/JitCommon/JitCommon.h
Core/MIPS/JitCommon/JitBlockCache.cpp
Core/MIPS/JitCommon/JitBlockCache.h
Core/MIPS/JitCommon/JitState.cpp
Core/MIPS/JitCommon/JitState.h
Core/MIPS/MIPS.cpp
Core/MIPS/MIPS.h
Core/MIPS/MIPSAnalyst.cpp

View File

@ -665,7 +665,7 @@ D3D9Context::D3D9Context(IDirect3D9 *d3d, IDirect3D9Ex *d3dEx, int adapterId, ID
caps_.tesselationShaderSupported = false;
caps_.framebufferBlitSupported = true;
caps_.framebufferCopySupported = false;
caps_.framebufferDepthBlitSupported = true;
caps_.framebufferDepthBlitSupported = false;
caps_.framebufferStencilBlitSupported = false;
caps_.framebufferDepthCopySupported = false;
caps_.framebufferSeparateDepthCopySupported = false;

View File

@ -423,3 +423,19 @@ ShaderWriter &ShaderWriter::SampleTexture2D(const char *sampName, const char *uv
}
return *this;
}
ShaderWriter &ShaderWriter::GetTextureSize(const char *szVariable, const char *texName) {
switch (lang_.shaderLanguage) {
case HLSL_D3D11:
F(" float2 %s; %s.GetDimensions(%s.x, %s.y);", szVariable, texName, szVariable, szVariable);
break;
case HLSL_D3D9:
F(" float2 %s; %s.GetDimensions(%s.x, %s.y);", szVariable, texName, szVariable, szVariable);
break;
default:
// Note: we ignore the sampler. make sure you bound samplers to the textures correctly.
F("vec2 %s = textureSize(%s, 0);", szVariable, texName);
break;
}
return *this;
}

View File

@ -83,7 +83,8 @@ public:
void ConstFloat(const char *name, float value);
ShaderWriter &SampleTexture2D(const char *sampName, const char *uv);
ShaderWriter &SampleTexture2D(const char *texName, const char *uv);
ShaderWriter &GetTextureSize(const char *szVariable, const char *texName);
// Simple shaders with no special tricks.
void BeginVSMain(Slice<InputDef> inputs, Slice<UniformDef> uniforms, Slice<VaryingDef> varyings);
@ -93,6 +94,9 @@ public:
void EndVSMain(Slice<VaryingDef> varyings);
void EndFSMain(const char *vec4_color_variable, FSFlags flags);
const ShaderLanguageDesc &Lang() const {
return lang_;
}
void Rewind(size_t offset) {
p_ -= offset;

View File

@ -648,6 +648,7 @@ VkResult VulkanContext::CreateDevice() {
}
_dbg_assert_(found);
// TODO: A lot of these are on by default in later Vulkan versions, should check for that, technically.
extensionsLookup_.KHR_maintenance1 = EnableDeviceExtension(VK_KHR_MAINTENANCE1_EXTENSION_NAME);
extensionsLookup_.KHR_maintenance2 = EnableDeviceExtension(VK_KHR_MAINTENANCE2_EXTENSION_NAME);
extensionsLookup_.KHR_maintenance3 = EnableDeviceExtension(VK_KHR_MAINTENANCE3_EXTENSION_NAME);
@ -684,7 +685,7 @@ VkResult VulkanContext::CreateDevice() {
} else {
VulkanLoadDeviceFunctions(device_, extensionsLookup_);
}
INFO_LOG(G3D, "Device created.\n");
INFO_LOG(G3D, "Vulkan Device created");
VulkanSetAvailable(true);
VmaAllocatorCreateInfo allocatorInfo = {};

View File

@ -667,7 +667,7 @@ public:
s.magFilter = desc.magFilter == TextureFilter::LINEAR ? VK_FILTER_LINEAR : VK_FILTER_NEAREST;
s.minFilter = desc.minFilter == TextureFilter::LINEAR ? VK_FILTER_LINEAR : VK_FILTER_NEAREST;
s.mipmapMode = desc.mipFilter == TextureFilter::LINEAR ? VK_SAMPLER_MIPMAP_MODE_LINEAR : VK_SAMPLER_MIPMAP_MODE_NEAREST;
s.maxLod = desc.maxLod;
s.maxLod = VK_LOD_CLAMP_NONE;
VkResult res = vkCreateSampler(vulkan_->GetDevice(), &s, nullptr, &sampler_);
_assert_(VK_SUCCESS == res);
}

View File

@ -494,7 +494,6 @@ struct SamplerStateDesc {
TextureAddressMode wrapU;
TextureAddressMode wrapV;
TextureAddressMode wrapW;
float maxLod;
bool shadowCompareEnabled;
Comparison shadowCompareFunc;
BorderColor borderColor;

View File

@ -97,6 +97,7 @@ void Compatibility::CheckSettings(IniFile &iniFile, const std::string &gameID) {
CheckSetting(iniFile, gameID, "ZZT3SelectHack", &flags_.ZZT3SelectHack);
CheckSetting(iniFile, gameID, "AllowLargeFBTextureOffsets", &flags_.AllowLargeFBTextureOffsets);
CheckSetting(iniFile, gameID, "AtracLoopHack", &flags_.AtracLoopHack);
CheckSetting(iniFile, gameID, "DeswizzleDepth", &flags_.DeswizzleDepth);
}
void Compatibility::CheckSetting(IniFile &iniFile, const std::string &gameID, const char *option, bool *flag) {

View File

@ -87,6 +87,7 @@ struct CompatFlags {
bool ZZT3SelectHack;
bool AllowLargeFBTextureOffsets;
bool AtracLoopHack;
bool DeswizzleDepth;
};
class IniFile;

View File

@ -748,7 +748,7 @@ int Config::NextValidBackend() {
if (failed.count((GPUBackend)iGPUBackend)) {
ERROR_LOG(LOADER, "Graphics backend failed for %d, trying another", iGPUBackend);
#if (PPSSPP_PLATFORM(WINDOWS) || PPSSPP_PLATFORM(ANDROID)) && !PPSSPP_PLATFORM(UWP)
#if !PPSSPP_PLATFORM(UWP)
if (!failed.count(GPUBackend::VULKAN) && VulkanMayBeAvailable()) {
return (int)GPUBackend::VULKAN;
}
@ -797,6 +797,9 @@ bool Config::IsBackendEnabled(GPUBackend backend, bool validate) {
#if PPSSPP_PLATFORM(UWP)
if (backend != GPUBackend::DIRECT3D11)
return false;
#elif PPSSPP_PLATFORM(SWITCH)
if (backend != GPUBackend::OPENGL)
return false;
#elif PPSSPP_PLATFORM(WINDOWS)
if (validate) {
if (backend == GPUBackend::DIRECT3D11 && !DoesVersionMatchWindows(6, 0, 0, 0, true))
@ -1113,15 +1116,6 @@ static ConfigSetting networkSettings[] = {
ConfigSetting(false),
};
static int DefaultPSPModel() {
// TODO: Can probably default this on, but not sure about its memory differences.
#if !PPSSPP_ARCH(AMD64) && !defined(_WIN32)
return PSP_MODEL_FAT;
#else
return PSP_MODEL_SLIM;
#endif
}
static int DefaultSystemParamLanguage() {
int defaultLang = PSP_SYSTEMPARAM_LANGUAGE_ENGLISH;
if (g_Config.bFirstRun) {
@ -1135,7 +1129,7 @@ static int DefaultSystemParamLanguage() {
}
static ConfigSetting systemParamSettings[] = {
ReportedConfigSetting("PSPModel", &g_Config.iPSPModel, &DefaultPSPModel, true, true),
ReportedConfigSetting("PSPModel", &g_Config.iPSPModel, PSP_MODEL_SLIM, true, true),
ReportedConfigSetting("PSPFirmwareVersion", &g_Config.iFirmwareVersion, PSP_DEFAULT_FIRMWARE, true, true),
ConfigSetting("NickName", &g_Config.sNickName, "PPSSPP", true, true),
ConfigSetting("MacAddress", &g_Config.sMACAddress, "", true, true),

View File

@ -419,6 +419,7 @@ const char *MemoryExceptionTypeAsString(MemoryExceptionType type) {
case MemoryExceptionType::WRITE_WORD: return "Write Word";
case MemoryExceptionType::READ_BLOCK: return "Read Block";
case MemoryExceptionType::WRITE_BLOCK: return "Read/Write Block";
case MemoryExceptionType::ALIGNMENT: return "Alignment";
default:
return "N/A";
}
@ -486,16 +487,19 @@ void Core_ExecException(u32 address, u32 pc, ExecExceptionType type) {
e.exec_type = type;
e.address = address;
e.pc = pc;
Core_EnableStepping(true, "cpu.exception", pc);
// This just records the closest value that could be useful as reference.
e.ra = currentMIPS->r[MIPS_REG_RA];
Core_EnableStepping(true, "cpu.exception", address);
}
void Core_Break() {
void Core_Break(u32 pc) {
ERROR_LOG(CPU, "BREAK!");
ExceptionInfo &e = g_exceptionInfo;
e = {};
e.type = ExceptionType::BREAK;
e.info = "";
e.pc = pc;
if (!g_Config.bIgnoreBadMemAccess) {
Core_EnableStepping(true, "cpu.breakInstruction", currentMIPS->pc);

View File

@ -93,6 +93,7 @@ enum class MemoryExceptionType {
WRITE_WORD,
READ_BLOCK,
WRITE_BLOCK,
ALIGNMENT,
};
enum class ExecExceptionType {
JUMP,
@ -105,7 +106,7 @@ void Core_MemoryException(u32 address, u32 pc, MemoryExceptionType type);
void Core_MemoryExceptionInfo(u32 address, u32 pc, MemoryExceptionType type, std::string additionalInfo);
void Core_ExecException(u32 address, u32 pc, ExecExceptionType type);
void Core_Break();
void Core_Break(u32 pc);
// Call when loading save states, etc.
void Core_ResetException();
@ -124,6 +125,7 @@ struct ExceptionInfo {
MemoryExceptionType memory_type;
uint32_t pc;
uint32_t address;
uint32_t ra = 0;
// Reuses pc and address from memory type, where address is the failed destination.
ExecExceptionType exec_type;

View File

@ -66,7 +66,7 @@ struct CoreParameter {
bool headLess; // Try to avoid messageboxes etc
// Internal PSP rendering resolution and scale factor.
int renderScaleFactor;
int renderScaleFactor = 1;
int renderWidth;
int renderHeight;

View File

@ -99,7 +99,7 @@ protected:
void ChangeStatus(DialogStatus newStatus, int delayUs);
void ChangeStatusInit(int delayUs);
void ChangeStatusShutdown(int delayUs);
DialogStatus ReadStatus() {
DialogStatus ReadStatus() const {
return status;
}
@ -117,10 +117,10 @@ protected:
unsigned int lastButtons = 0;
unsigned int buttons = 0;
float fadeTimer;
bool isFading;
bool fadeIn;
u32 fadeValue;
float fadeTimer = 0.0f;
bool isFading = false;
bool fadeIn = false;
u32 fadeValue = 0;
ImageID okButtonImg;
ImageID cancelButtonImg;

View File

@ -59,16 +59,16 @@ private:
void CloseCurrentFile();
void WriteSfoFile();
SceUtilityGamedataInstallParam request;
SceUtilityGamedataInstallParam request{};
PSPPointer<SceUtilityGamedataInstallParam> param;
std::vector<std::string> inFileNames;
int numFiles;
int readFiles;
u64 allFilesSize; // use this to calculate progress value.
u64 allReadSize; // use this to calculate progress value.
int progressValue;
int numFiles = 0;
int readFiles = 0;
u64 allFilesSize = 0; // use this to calculate progress value.
u64 allReadSize = 0; // use this to calculate progress value.
int progressValue = 0;
int currentInputFile;
u32 currentInputBytesLeft;
int currentOutputFile;
int currentInputFile = 0;
u32 currentInputBytesLeft = 0;
int currentOutputFile = 0;
};

View File

@ -94,11 +94,11 @@ private:
u32 flag = 0;
pspMessageDialog messageDialog;
int messageDialogAddr;
pspMessageDialog messageDialog{};
int messageDialogAddr = 0;
char msgText[512];
int yesnoChoice;
int yesnoChoice = 0;
float scrollPos_ = 0.0f;
int framesUpHeld_ = 0;
int framesDownHeld_ = 0;

View File

@ -148,7 +148,6 @@ struct SceUtilityOskParams
SceUtilityOskState_le state;
// Maybe just padding?
s32_le unk_60;
};
// Internal enum, not from PSP.
@ -245,16 +244,16 @@ private:
std::string oskIntext;
std::string oskOuttext;
int selectedChar;
int selectedChar = 0;
std::u16string inputChars;
OskKeyboardDisplay currentKeyboard;
OskKeyboardLanguage currentKeyboardLanguage;
bool isCombinated;
bool isCombinated = false;
std::mutex nativeMutex_;
PSPOskNativeStatus nativeStatus_ = PSPOskNativeStatus::IDLE;
std::string nativeValue_;
int i_level; // for Korean Keyboard support
int i_value[3]; // for Korean Keyboard support
int i_level = 0; // for Korean Keyboard support
int i_value[3]{}; // for Korean Keyboard support
};

View File

@ -139,13 +139,13 @@ private:
DisplayState display = DS_NONE;
SavedataParam param;
SceUtilitySavedataParam request;
SceUtilitySavedataParam request{};
// For detecting changes made by the game.
SceUtilitySavedataParam originalRequest;
SceUtilitySavedataParam originalRequest{};
u32 requestAddr = 0;
int currentSelectedSave = 0;
int yesnoChoice;
int yesnoChoice = 0;
enum SaveIOStatus
{

View File

@ -193,16 +193,7 @@ void SaveFileInfo::DoState(PointerWrap &p)
}
}
SavedataParam::SavedataParam()
: pspParam(0)
, selectedSave(0)
, saveDataList(0)
, noSaveIcon(0)
, saveDataListCount(0)
, saveNameListDataCount(0)
{
}
SavedataParam::SavedataParam() { }
void SavedataParam::Init()
{

View File

@ -265,7 +265,6 @@ struct SceUtilitySavedataParam
// Function 22 GETSIZES
PSPPointer<PspUtilitySavedataSizeInfo> sizeInfo;
};
// Non native, this one we can reorganize as we like
@ -377,10 +376,10 @@ private:
std::set<std::string> GetSecureFileNames(const std::string &dirPath);
bool GetExpectedHash(const std::string &dirPath, const std::string &filename, u8 hash[16]);
SceUtilitySavedataParam* pspParam;
int selectedSave;
SaveFileInfo *saveDataList;
SaveFileInfo *noSaveIcon;
int saveDataListCount;
int saveNameListDataCount;
SceUtilitySavedataParam* pspParam = nullptr;
int selectedSave = 0;
SaveFileInfo *saveDataList = nullptr;
SaveFileInfo *noSaveIcon = nullptr;
int saveDataListCount = 0;
int saveNameListDataCount = 0;
};

View File

@ -1086,6 +1086,12 @@ void __KernelStartIdleThreads(SceUID moduleId)
}
}
void KernelValidateThreadTarget(uint32_t pc) {
if (!Memory::IsValidAddress(pc) || (pc & 3) != 0) {
Core_ExecException(pc, currentMIPS->pc, ExecExceptionType::THREAD);
}
}
bool __KernelSwitchOffThread(const char *reason)
{
if (!reason)
@ -1141,9 +1147,7 @@ bool __KernelSwitchToThread(SceUID threadID, const char *reason)
if (current && current->isRunning())
__KernelChangeReadyState(current, currentThread, true);
if (!Memory::IsValidAddress(t->context.pc)) {
Core_ExecException(t->context.pc, currentMIPS->pc, ExecExceptionType::THREAD);
}
KernelValidateThreadTarget(t->context.pc);
__KernelSwitchContext(t, reason);
return true;
@ -1471,9 +1475,7 @@ void __KernelLoadContext(PSPThreadContext *ctx, bool vfpuEnabled) {
memcpy(currentMIPS->vfpuCtrl, ctx->vfpuCtrl, sizeof(ctx->vfpuCtrl));
}
if (!Memory::IsValidAddress(ctx->pc)) {
Core_ExecException(ctx->pc, currentMIPS->pc, ExecExceptionType::THREAD);
}
KernelValidateThreadTarget(ctx->pc);
memcpy(currentMIPS->other, ctx->other, sizeof(ctx->other));
// Not locking here, we assume the jit isn't switched during execution.
@ -1924,9 +1926,7 @@ SceUID __KernelSetupRootThread(SceUID moduleID, int args, const char *argp, int
strcpy(thread->nt.name, "root");
if (!Memory::IsValidAddress(thread->context.pc)) {
Core_ExecException(thread->context.pc, currentMIPS->pc, ExecExceptionType::THREAD);
}
KernelValidateThreadTarget(thread->context.pc);
__KernelLoadContext(&thread->context, (attr & PSP_THREAD_ATTR_VFPU) != 0);
currentMIPS->r[MIPS_REG_A0] = args;
@ -2057,9 +2057,7 @@ int __KernelStartThread(SceUID threadToStartID, int argSize, u32 argBlockPtr, bo
// Smaller is better for priority. Only switch if the new thread is better.
if (cur && cur->nt.currentPriority > startThread->nt.currentPriority) {
if (!Memory::IsValidAddress(startThread->context.pc)) {
Core_ExecException(startThread->context.pc, currentMIPS->pc, ExecExceptionType::THREAD);
}
KernelValidateThreadTarget(startThread->context.pc);
__KernelChangeReadyState(cur, currentThread, true);
if (__InterruptsEnabled())
hleReSchedule("thread started");
@ -2939,9 +2937,7 @@ u32 sceKernelExtendThreadStack(u32 size, u32 entryAddr, u32 entryParameter)
Memory::Write_U32(currentMIPS->r[MIPS_REG_SP], thread->currentStack.end - 8);
Memory::Write_U32(currentMIPS->pc, thread->currentStack.end - 12);
if (!Memory::IsValidAddress(entryAddr)) {
Core_ExecException(entryAddr, currentMIPS->pc, ExecExceptionType::THREAD);
}
KernelValidateThreadTarget(entryAddr);
currentMIPS->pc = entryAddr;
currentMIPS->r[MIPS_REG_A0] = entryParameter;
@ -2975,9 +2971,7 @@ void __KernelReturnFromExtendStack()
return;
}
if (!Memory::IsValidAddress(restorePC)) {
Core_ExecException(restorePC, currentMIPS->pc, ExecExceptionType::THREAD);
}
KernelValidateThreadTarget(restorePC);
DEBUG_LOG(SCEKERNEL, "__KernelReturnFromExtendStack()");
currentMIPS->r[MIPS_REG_RA] = restoreRA;
@ -3259,9 +3253,7 @@ bool __KernelExecuteMipsCallOnCurrentThread(u32 callId, bool reschedAfter)
call->savedId = cur->currentMipscallId;
call->reschedAfter = reschedAfter;
if (!Memory::IsValidAddress(call->entryPoint)) {
Core_ExecException(call->entryPoint, currentMIPS->pc, ExecExceptionType::THREAD);
}
KernelValidateThreadTarget(call->entryPoint);
// Set up the new state
currentMIPS->pc = call->entryPoint;
@ -3312,9 +3304,7 @@ void __KernelReturnFromMipsCall()
currentMIPS->r[MIPS_REG_RA] = Memory::Read_U32(sp + MIPS_REG_RA * 4);
sp += 32 * 4;
if (!Memory::IsValidAddress(call->savedPc)) {
Core_ExecException(call->savedPc, currentMIPS->pc, ExecExceptionType::THREAD);
}
KernelValidateThreadTarget(call->savedPc);
currentMIPS->pc = call->savedPc;
// This is how we set the return value.

View File

@ -466,7 +466,7 @@ void ArmJit::Comp_Jump(MIPSOpcode op) {
u32 targetAddr = (GetCompilerPC() & 0xF0000000) | off;
// Might be a stubbed address or something?
if (!Memory::IsValidAddress(targetAddr)) {
if (!Memory::IsValidAddress(targetAddr) || (targetAddr & 3) != 0) {
if (js.nextExit == 0) {
ERROR_LOG_REPORT(JIT, "Jump to invalid address: %08x", targetAddr);
} else {

View File

@ -481,7 +481,7 @@ void Arm64Jit::Comp_Jump(MIPSOpcode op) {
u32 targetAddr = (GetCompilerPC() & 0xF0000000) | off;
// Might be a stubbed address or something?
if (!Memory::IsValidAddress(targetAddr)) {
if (!Memory::IsValidAddress(targetAddr) || (targetAddr & 3) != 0) {
if (js.nextExit == 0) {
ERROR_LOG_REPORT(JIT, "Jump to invalid address: %08x", targetAddr);
} else {

View File

@ -330,6 +330,8 @@ namespace MIPSComp {
ir.Write(IROp::LoadVec4, vregs[0], rs, ir.AddConstant(imm));
} else {
// Let's not even bother with "vertical" loads for now.
if (!g_Config.bFastMemory)
ir.Write({ IROp::ValidateAddress128, { 0 }, (u8)rs, 0, (u32)imm });
ir.Write(IROp::LoadFloat, vregs[0], rs, ir.AddConstant(imm));
ir.Write(IROp::LoadFloat, vregs[1], rs, ir.AddConstant(imm + 4));
ir.Write(IROp::LoadFloat, vregs[2], rs, ir.AddConstant(imm + 8));
@ -342,6 +344,8 @@ namespace MIPSComp {
ir.Write(IROp::StoreVec4, vregs[0], rs, ir.AddConstant(imm));
} else {
// Let's not even bother with "vertical" stores for now.
if (!g_Config.bFastMemory)
ir.Write({ IROp::ValidateAddress128, { 0 }, (u8)rs, 1, (u32)imm });
ir.Write(IROp::StoreFloat, vregs[0], rs, ir.AddConstant(imm));
ir.Write(IROp::StoreFloat, vregs[1], rs, ir.AddConstant(imm + 4));
ir.Write(IROp::StoreFloat, vregs[2], rs, ir.AddConstant(imm + 8));

View File

@ -260,6 +260,7 @@ void IRFrontend::DoJit(u32 em_address, std::vector<IRInst> &instructions, u32 &m
IRWriter *code = &ir;
if (!js.hadBreakpoints) {
static const IRPassFunc passes[] = {
&ApplyMemoryValidation,
&RemoveLoadStoreLeftRight,
&OptimizeFPMoves,
&PropagateConstants,

View File

@ -163,6 +163,11 @@ static const IRMeta irMeta[] = {
{ IROp::Breakpoint, "Breakpoint", "", IRFLAG_EXIT },
{ IROp::MemoryCheck, "MemoryCheck", "_GC", IRFLAG_EXIT },
{ IROp::ValidateAddress8, "ValidAddr8", "_GC", IRFLAG_EXIT },
{ IROp::ValidateAddress16, "ValidAddr16", "_GC", IRFLAG_EXIT },
{ IROp::ValidateAddress32, "ValidAddr32", "_GC", IRFLAG_EXIT },
{ IROp::ValidateAddress128, "ValidAddr128", "_GC", IRFLAG_EXIT },
{ IROp::RestoreRoundingMode, "RestoreRoundingMode", "" },
{ IROp::ApplyRoundingMode, "ApplyRoundingMode", "" },
{ IROp::UpdateRoundingMode, "UpdateRoundingMode", "" },

View File

@ -213,8 +213,15 @@ enum class IROp : u8 {
SetPCConst, // hack to make replacement know PC
CallReplacement,
Break,
// Debugging breakpoints.
Breakpoint,
MemoryCheck,
ValidateAddress8,
ValidateAddress16,
ValidateAddress32,
ValidateAddress128,
};
enum IRComparison {

View File

@ -79,6 +79,25 @@ u32 RunMemCheck(u32 pc, u32 addr) {
return coreState != CORE_RUNNING ? 1 : 0;
}
template <uint32_t alignment>
u32 RunValidateAddress(u32 pc, u32 addr, u32 isWrite) {
const auto toss = [&](MemoryExceptionType t) {
Core_MemoryException(addr, pc, t);
return coreState != CORE_RUNNING ? 1 : 0;
};
if (!Memory::IsValidRange(addr, alignment)) {
MemoryExceptionType t = isWrite == 1 ? MemoryExceptionType::WRITE_WORD : MemoryExceptionType::READ_WORD;
if (alignment > 4)
t = isWrite ? MemoryExceptionType::WRITE_BLOCK : MemoryExceptionType::READ_BLOCK;
return toss(t);
}
if (alignment > 1 && (addr & (alignment - 1)) != 0) {
return toss(MemoryExceptionType::ALIGNMENT);
}
return 0;
}
// We cannot use NEON on ARM32 here until we make it a hard dependency. We can, however, on ARM64.
u32 IRInterpret(MIPSState *mips, const IRInst *inst, int count) {
const IRInst *end = inst + count;
@ -142,6 +161,31 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, int count) {
mips->r[inst->dest] = ReverseBits32(mips->r[inst->src1]);
break;
case IROp::ValidateAddress8:
if (RunValidateAddress<1>(mips->pc, mips->r[inst->src1] + inst->constant, inst->src2)) {
CoreTiming::ForceCheck();
return mips->pc;
}
break;
case IROp::ValidateAddress16:
if (RunValidateAddress<2>(mips->pc, mips->r[inst->src1] + inst->constant, inst->src2)) {
CoreTiming::ForceCheck();
return mips->pc;
}
break;
case IROp::ValidateAddress32:
if (RunValidateAddress<4>(mips->pc, mips->r[inst->src1] + inst->constant, inst->src2)) {
CoreTiming::ForceCheck();
return mips->pc;
}
break;
case IROp::ValidateAddress128:
if (RunValidateAddress<16>(mips->pc, mips->r[inst->src1] + inst->constant, inst->src2)) {
CoreTiming::ForceCheck();
return mips->pc;
}
break;
case IROp::Load8:
mips->r[inst->dest] = Memory::ReadUnchecked_U8(mips->r[inst->src1] + inst->constant);
break;
@ -954,7 +998,7 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, int count) {
}
case IROp::Break:
Core_Break();
Core_Break(mips->pc);
return mips->pc + 4;
case IROp::SetCtrlVFPU:

View File

@ -227,9 +227,10 @@ void IRJit::RunLoopUntil(u64 globalticks) {
if (opcode == MIPS_EMUHACK_OPCODE) {
u32 data = inst & 0xFFFFFF;
IRBlock *block = blocks_.GetBlock(data);
u32 startPC = mips_->pc;
mips_->pc = IRInterpret(mips_, block->GetInstructions(), block->GetNumInstructions());
if (!Memory::IsValidAddress(mips_->pc)) {
Core_ExecException(mips_->pc, mips_->pc, ExecExceptionType::JUMP);
if (!Memory::IsValidAddress(mips_->pc) || (mips_->pc & 3) != 0) {
Core_ExecException(mips_->pc, startPC, ExecExceptionType::JUMP);
break;
}
} else {

View File

@ -5,6 +5,7 @@
#include "Common/BitSet.h"
#include "Common/Data/Convert/SmallDataConvert.h"
#include "Common/Log.h"
#include "Core/Config.h"
#include "Core/MIPS/IR/IRInterpreter.h"
#include "Core/MIPS/IR/IRPassSimplify.h"
#include "Core/MIPS/IR/IRRegCache.h"
@ -622,6 +623,18 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out, const IROptions &opts
}
break;
case IROp::ValidateAddress8:
case IROp::ValidateAddress16:
case IROp::ValidateAddress32:
case IROp::ValidateAddress128:
if (gpr.IsImm(inst.src1)) {
out.Write(inst.op, inst.dest, 0, out.AddConstant(gpr.GetImm(inst.src1) + inst.constant));
} else {
gpr.MapIn(inst.src1);
goto doDefault;
}
break;
case IROp::Downcount:
case IROp::SetPCConst:
goto doDefault;
@ -1428,3 +1441,58 @@ bool MergeLoadStore(const IRWriter &in, IRWriter &out, const IROptions &opts) {
}
return logBlocks;
}
bool ApplyMemoryValidation(const IRWriter &in, IRWriter &out, const IROptions &opts) {
CONDITIONAL_DISABLE;
if (g_Config.bFastMemory)
DISABLE;
const auto addValidate = [&out](IROp validate, const IRInst &inst, bool isStore) {
out.Write({ validate, { 0 }, inst.src1, isStore ? (u8)1 : (u8)0, inst.constant });
};
// TODO: Could be smart about not double-validating an address that has a load / store, etc.
bool logBlocks = false;
for (IRInst inst : in.GetInstructions()) {
switch (inst.op) {
case IROp::Load8:
case IROp::Load8Ext:
case IROp::Store8:
addValidate(IROp::ValidateAddress8, inst, inst.op == IROp::Store8);
break;
case IROp::Load16:
case IROp::Load16Ext:
case IROp::Store16:
addValidate(IROp::ValidateAddress16, inst, inst.op == IROp::Store16);
break;
case IROp::Load32:
case IROp::LoadFloat:
case IROp::Store32:
case IROp::StoreFloat:
addValidate(IROp::ValidateAddress32, inst, inst.op == IROp::Store32 || inst.op == IROp::StoreFloat);
break;
case IROp::LoadVec4:
case IROp::StoreVec4:
addValidate(IROp::ValidateAddress128, inst, inst.op == IROp::StoreVec4);
break;
case IROp::Load32Left:
case IROp::Load32Right:
case IROp::Store32Left:
case IROp::Store32Right:
// This explicitly does not require alignment, so validate as an 8-bit operation.
addValidate(IROp::ValidateAddress8, inst, inst.op == IROp::Store32Left || inst.op == IROp::Store32Right);
break;
default:
break;
}
// Always write out the original. We're only adding.
out.Write(inst);
}
return logBlocks;
}

View File

@ -14,3 +14,4 @@ bool ThreeOpToTwoOp(const IRWriter &in, IRWriter &out, const IROptions &opts);
bool OptimizeFPMoves(const IRWriter &in, IRWriter &out, const IROptions &opts);
bool ReorderLoadStore(const IRWriter &in, IRWriter &out, const IROptions &opts);
bool MergeLoadStore(const IRWriter &in, IRWriter &out, const IROptions &opts);
bool ApplyMemoryValidation(const IRWriter &in, IRWriter &out, const IROptions &opts);

View File

@ -57,8 +57,7 @@
static inline void DelayBranchTo(u32 where)
{
if (!Memory::IsValidAddress(where)) {
// TODO: What about misaligned?
if (!Memory::IsValidAddress(where) || (where & 3) != 0) {
Core_ExecException(where, PC, ExecExceptionType::JUMP);
}
PC += 4;
@ -158,7 +157,7 @@ namespace MIPSInt
void Int_Break(MIPSOpcode op)
{
Reporting::ReportMessage("BREAK instruction hit");
Core_Break();
Core_Break(PC);
PC += 4;
}

View File

@ -612,7 +612,7 @@ void Jit::Comp_Jump(MIPSOpcode op) {
u32 targetAddr = (GetCompilerPC() & 0xF0000000) | off;
// Might be a stubbed address or something?
if (!Memory::IsValidAddress(targetAddr)) {
if (!Memory::IsValidAddress(targetAddr) || (targetAddr & 3) != 0) {
if (js.nextExit == 0) {
ERROR_LOG_REPORT(JIT, "Jump to invalid address: %08x PC %08x LR %08x", targetAddr, GetCompilerPC(), currentMIPS->r[MIPS_REG_RA]);
} else {

View File

@ -275,7 +275,7 @@ void Jit::Compile(u32 em_address) {
ClearCache();
}
if (!Memory::IsValidAddress(em_address)) {
if (!Memory::IsValidAddress(em_address) || (em_address & 3) != 0) {
Core_ExecException(em_address, em_address, ExecExceptionType::JUMP);
return;
}
@ -672,7 +672,7 @@ static void HitInvalidBranch(uint32_t dest) {
void Jit::WriteExit(u32 destination, int exit_num) {
_dbg_assert_msg_(exit_num < MAX_JIT_BLOCK_EXITS, "Expected a valid exit_num");
if (!Memory::IsValidAddress(destination)) {
if (!Memory::IsValidAddress(destination) || (destination & 3) != 0) {
ERROR_LOG_REPORT(JIT, "Trying to write block exit to illegal destination %08x: pc = %08x", destination, currentMIPS->pc);
MOV(32, MIPSSTATE_VAR(pc), Imm32(GetCompilerPC()));
ABI_CallFunctionC(&HitInvalidBranch, destination);
@ -721,6 +721,12 @@ void Jit::WriteExit(u32 destination, int exit_num) {
}
}
static u32 IsValidJumpTarget(uint32_t addr) {
if (Memory::IsValidAddress(addr) && (addr & 3) == 0)
return 1;
return 0;
}
static void HitInvalidJumpReg(uint32_t source) {
Core_ExecException(currentMIPS->pc, source, ExecExceptionType::JUMP);
currentMIPS->pc = source + 8;
@ -762,7 +768,7 @@ void Jit::WriteExitDestInReg(X64Reg reg) {
SetJumpTarget(tooLow);
SetJumpTarget(tooHigh);
ABI_CallFunctionA((const void *)&Memory::IsValidAddress, R(reg));
ABI_CallFunctionA((const void *)&IsValidJumpTarget, R(reg));
// If we're ignoring, coreState didn't trip - so trip it now.
CMP(32, R(EAX), Imm32(0));

View File

@ -49,7 +49,8 @@ static const int VERSION = 1;
static const int MAX_MIP_LEVELS = 12; // 12 should be plenty, 8 is the max mip levels supported by the PSP.
TextureReplacer::TextureReplacer() {
none_.alphaStatus_ = ReplacedTextureAlpha::UNKNOWN;
none_.initDone_ = true;
none_.prepareDone_ = true;
}
TextureReplacer::~TextureReplacer() {
@ -373,7 +374,7 @@ u32 TextureReplacer::ComputeHash(u32 addr, int bufw, int w, int h, GETextureForm
}
}
ReplacedTexture &TextureReplacer::FindReplacement(u64 cachekey, u32 hash, int w, int h) {
ReplacedTexture &TextureReplacer::FindReplacement(u64 cachekey, u32 hash, int w, int h, double budget) {
// Only actually replace if we're replacing. We might just be saving.
if (!Enabled() || !g_Config.bReplaceTextures) {
return none_;
@ -382,13 +383,18 @@ ReplacedTexture &TextureReplacer::FindReplacement(u64 cachekey, u32 hash, int w,
ReplacementCacheKey replacementKey(cachekey, hash);
auto it = cache_.find(replacementKey);
if (it != cache_.end()) {
if (!it->second.prepareDone_ && budget > 0.0) {
// We don't do this on a thread, but we only do it while within budget.
PopulateReplacement(&it->second, cachekey, hash, w, h);
}
return it->second;
}
// Okay, let's construct the result.
ReplacedTexture &result = cache_[replacementKey];
result.alphaStatus_ = ReplacedTextureAlpha::UNKNOWN;
PopulateReplacement(&result, cachekey, hash, w, h);
if (!g_Config.bReplaceTexturesAllowLate || budget > 0.0) {
PopulateReplacement(&result, cachekey, hash, w, h);
}
return result;
}
@ -433,7 +439,7 @@ void TextureReplacer::PopulateReplacement(ReplacedTexture *result, u64 cachekey,
break;
}
result->alphaStatus_ = ReplacedTextureAlpha::UNKNOWN;
result->prepareDone_ = true;
}
enum class ReplacedImageType {
@ -815,11 +821,13 @@ bool ReplacedTexture::IsReady(double budget) {
}
// Loaded already, or not yet on a thread?
if (!levelData_.empty())
if (initDone_ && !levelData_.empty())
return true;
// Let's not even start a new texture if we're already behind.
if (budget < 0.0)
return false;
if (!prepareDone_)
return false;
if (g_Config.bReplaceTexturesAllowLate) {
if (threadWaitable_)
@ -829,10 +837,11 @@ bool ReplacedTexture::IsReady(double budget) {
if (threadWaitable_->WaitFor(budget)) {
// If we finished all the levels, we're done.
return !levelData_.empty();
return initDone_ && !levelData_.empty();
}
} else {
Prepare();
_assert_(initDone_);
return true;
}
@ -842,16 +851,19 @@ bool ReplacedTexture::IsReady(double budget) {
void ReplacedTexture::Prepare() {
std::unique_lock<std::mutex> lock(mutex_);
if (cancelPrepare_)
if (cancelPrepare_) {
initDone_ = true;
return;
}
levelData_.resize(NumLevels());
for (int i = 0; i < NumLevels(); ++i) {
levelData_.resize(levels_.size());
for (int i = 0; i < (int)levels_.size(); ++i) {
if (cancelPrepare_)
break;
PrepareData(i);
}
initDone_ = true;
if (!cancelPrepare_ && threadWaitable_)
threadWaitable_->Notify();
}
@ -975,6 +987,8 @@ bool ReplacedTexture::Load(int level, void *out, int rowPitch) {
_assert_msg_((size_t)level < levels_.size(), "Invalid miplevel");
_assert_msg_(out != nullptr && rowPitch > 0, "Invalid out/pitch");
if (!initDone_)
return false;
if (levelData_.empty())
return false;

View File

@ -118,10 +118,20 @@ struct ReplacedTexture {
~ReplacedTexture();
inline bool Valid() const {
if (!initDone_)
return false;
return !levels_.empty();
}
inline bool IsInvalid() const {
if (!initDone_)
return false;
return levels_.empty();
}
bool GetSize(int level, int &w, int &h) const {
if (!initDone_)
return false;
if ((size_t)level < levels_.size()) {
w = levels_[level].w;
h = levels_[level].h;
@ -131,12 +141,16 @@ struct ReplacedTexture {
}
int NumLevels() const {
if (!initDone_)
return 0;
return (int)levels_.size();
}
Draw::DataFormat Format(int level) const {
if ((size_t)level < levels_.size()) {
return levels_[level].fmt;
if (initDone_) {
if ((size_t)level < levels_.size()) {
return levels_[level].fmt;
}
}
return Draw::DataFormat::R8G8B8A8_UNORM;
}
@ -156,11 +170,13 @@ protected:
std::vector<ReplacedTextureLevel> levels_;
std::vector<std::vector<uint8_t>> levelData_;
ReplacedTextureAlpha alphaStatus_;
ReplacedTextureAlpha alphaStatus_ = ReplacedTextureAlpha::UNKNOWN;
double lastUsed_ = 0.0;
LimitedWaitable *threadWaitable_ = nullptr;
std::mutex mutex_;
bool cancelPrepare_ = false;
bool initDone_ = false;
bool prepareDone_ = false;
friend TextureReplacer;
friend ReplacedTextureTask;
@ -196,7 +212,7 @@ public:
u32 ComputeHash(u32 addr, int bufw, int w, int h, GETextureFormat fmt, u16 maxSeenV);
ReplacedTexture &FindReplacement(u64 cachekey, u32 hash, int w, int h);
ReplacedTexture &FindReplacement(u64 cachekey, u32 hash, int w, int h, double budget);
bool FindFiltering(u64 cachekey, u32 hash, TextureFiltering *forceFiltering);
ReplacedTexture &FindNone() {
return none_;

View File

@ -1,244 +0,0 @@
// Copyright (c) 2014- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include <map>
#include "Common/Log.h"
#include "Common/StringUtils.h"
#include "Common/GPU/Shader.h"
#include "Common/GPU/ShaderWriter.h"
#include "Common/Data/Convert/ColorConv.h"
#include "Core/Reporting.h"
#include "GPU/Common/DrawEngineCommon.h"
#include "GPU/Common/TextureCacheCommon.h"
#include "GPU/Common/DepalettizeShaderCommon.h"
#include "GPU/Common/DepalettizeCommon.h"
static const VaryingDef varyings[1] = {
{ "vec2", "v_texcoord", Draw::SEM_TEXCOORD0, 0, "highp" },
};
static const SamplerDef samplers[2] = {
{ "tex" },
{ "pal" },
};
DepalShaderCache::DepalShaderCache(Draw::DrawContext *draw) : draw_(draw) { }
DepalShaderCache::~DepalShaderCache() {
DeviceLost();
}
void DepalShaderCache::DeviceRestore(Draw::DrawContext *draw) {
draw_ = draw;
}
void DepalShaderCache::DeviceLost() {
Clear();
}
Draw::Texture *DepalShaderCache::GetClutTexture(GEPaletteFormat clutFormat, const u32 clutHash, u32 *rawClut) {
u32 clutId = GetClutID(clutFormat, clutHash);
auto oldtex = texCache_.find(clutId);
if (oldtex != texCache_.end()) {
oldtex->second->lastFrame = gpuStats.numFlips;
return oldtex->second->texture;
}
int texturePixels = clutFormat == GE_CMODE_32BIT_ABGR8888 ? 256 : 512;
DepalTexture *tex = new DepalTexture();
Draw::TextureDesc desc{};
desc.width = texturePixels;
desc.height = 1;
desc.depth = 1;
desc.mipLevels = 1;
desc.tag = "clut";
desc.type = Draw::TextureType::LINEAR2D; // TODO: Try LINEAR1D?
desc.format = Draw::DataFormat::R8G8B8A8_UNORM; // TODO: Also support an BGR format. We won't bother with the 16-bit formats here.
uint8_t convTemp[2048]{};
switch (clutFormat) {
case GEPaletteFormat::GE_CMODE_32BIT_ABGR8888:
desc.initData.push_back((const uint8_t *)rawClut);
break;
case GEPaletteFormat::GE_CMODE_16BIT_BGR5650:
ConvertRGBA5551ToRGBA8888((u32 *)convTemp, (const u16 *)rawClut, texturePixels);
desc.initData.push_back(convTemp);
break;
case GEPaletteFormat::GE_CMODE_16BIT_ABGR5551:
ConvertRGB565ToRGBA8888((u32 *)convTemp, (const u16 *)rawClut, texturePixels);
desc.initData.push_back(convTemp);
break;
case GEPaletteFormat::GE_CMODE_16BIT_ABGR4444:
ConvertRGBA4444ToRGBA8888((u32 *)convTemp, (const u16 *)rawClut, texturePixels);
desc.initData.push_back(convTemp);
break;
}
tex->texture = draw_->CreateTexture(desc);
tex->lastFrame = gpuStats.numFlips;
texCache_[clutId] = tex;
return tex->texture;
}
void DepalShaderCache::Clear() {
for (auto shader = cache_.begin(); shader != cache_.end(); ++shader) {
shader->second->fragShader->Release();
if (shader->second->pipeline) {
shader->second->pipeline->Release();
}
delete shader->second;
}
cache_.clear();
for (auto tex = texCache_.begin(); tex != texCache_.end(); ++tex) {
tex->second->texture->Release();
delete tex->second;
}
texCache_.clear();
if (vertexShader_) {
vertexShader_->Release();
vertexShader_ = nullptr;
}
if (nearestSampler_) {
nearestSampler_->Release();
nearestSampler_ = nullptr;
}
}
void DepalShaderCache::Decimate() {
for (auto tex = texCache_.begin(); tex != texCache_.end(); ) {
if (tex->second->lastFrame + DEPAL_TEXTURE_OLD_AGE < gpuStats.numFlips) {
tex->second->texture->Release();
delete tex->second;
texCache_.erase(tex++);
} else {
++tex;
}
}
}
Draw::SamplerState *DepalShaderCache::GetSampler() {
if (!nearestSampler_) {
Draw::SamplerStateDesc desc{};
desc.wrapU = Draw::TextureAddressMode::CLAMP_TO_EDGE;
desc.wrapV = Draw::TextureAddressMode::CLAMP_TO_EDGE;
desc.wrapW = Draw::TextureAddressMode::CLAMP_TO_EDGE;
nearestSampler_ = draw_->CreateSamplerState(desc);
}
return nearestSampler_;
}
DepalShader *DepalShaderCache::GetDepalettizeShader(uint32_t clutMode, GEBufferFormat pixelFormat) {
using namespace Draw;
u32 id = GenerateShaderID(clutMode, pixelFormat);
auto shader = cache_.find(id);
if (shader != cache_.end()) {
DepalShader *depal = shader->second;
return shader->second;
}
char *buffer = new char[4096];
if (!vertexShader_) {
GenerateDepalVs(buffer, draw_->GetShaderLanguageDesc());
vertexShader_ = draw_->CreateShaderModule(ShaderStage::Vertex, draw_->GetShaderLanguageDesc().shaderLanguage, (const uint8_t *)buffer, strlen(buffer), "depal_vs");
}
// TODO: Parse these out of clutMode some nice way, to become a bit more stateless.
DepalConfig config;
config.clutFormat = gstate.getClutPaletteFormat();
config.startPos = gstate.getClutIndexStartPos();
config.shift = gstate.getClutIndexShift();
config.mask = gstate.getClutIndexMask();
config.pixelFormat = pixelFormat;
GenerateDepalFs(buffer, config, draw_->GetShaderLanguageDesc());
std::string src(buffer);
ShaderModule *fragShader = draw_->CreateShaderModule(ShaderStage::Fragment, draw_->GetShaderLanguageDesc().shaderLanguage, (const uint8_t *)buffer, strlen(buffer), "depal_fs");
DepalShader *depal = new DepalShader();
static const InputLayoutDesc desc = {
{
{ 16, false },
},
{
{ 0, SEM_POSITION, DataFormat::R32G32_FLOAT, 0 },
{ 0, SEM_TEXCOORD0, DataFormat::R32G32_FLOAT, 8 },
},
};
InputLayout *inputLayout = draw_->CreateInputLayout(desc);
BlendState *blendOff = draw_->CreateBlendState({ false, 0xF });
DepthStencilStateDesc dsDesc{};
DepthStencilState *noDepthStencil = draw_->CreateDepthStencilState(dsDesc);
RasterState *rasterNoCull = draw_->CreateRasterState({});
PipelineDesc depalPipelineDesc{
Primitive::TRIANGLE_STRIP, // Could have use a single triangle too (in which case we'd use LIST here) but want to be prepared to do subrectangles.
{ vertexShader_, fragShader },
inputLayout, noDepthStencil, blendOff, rasterNoCull, nullptr, samplers
};
Pipeline *pipeline = draw_->CreateGraphicsPipeline(depalPipelineDesc);
inputLayout->Release();
blendOff->Release();
noDepthStencil->Release();
rasterNoCull->Release();
_assert_(pipeline);
depal->pipeline = pipeline;
depal->fragShader = fragShader;
depal->code = buffer;
cache_[id] = depal;
delete[] buffer;
return depal->pipeline ? depal : nullptr;
}
std::vector<std::string> DepalShaderCache::DebugGetShaderIDs(DebugShaderType type) {
std::vector<std::string> ids;
for (auto &iter : cache_) {
ids.push_back(StringFromFormat("%08x", iter.first));
}
return ids;
}
std::string DepalShaderCache::DebugGetShaderString(std::string idstr, DebugShaderType type, DebugShaderStringType stringType) {
uint32_t id;
sscanf(idstr.c_str(), "%08x", &id);
auto iter = cache_.find(id);
if (iter == cache_.end())
return "";
switch (stringType) {
case SHADER_STRING_SHORT_DESC:
return idstr;
case SHADER_STRING_SOURCE_CODE:
return iter->second->code;
default:
return "";
}
}

View File

@ -1,182 +0,0 @@
// Copyright (c) 2014- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#pragma once
#include <map>
#include <vector>
#include <string>
#include "Common/CommonTypes.h"
#include "Common/GPU/Shader.h"
#include "Common/GPU/thin3d.h"
#include "GPU/ge_constants.h"
#include "GPU/Common/Draw2D.h"
#include "GPU/Common/ShaderCommon.h"
#include "GPU/Common/DepalettizeShaderCommon.h"
class DepalShader {
public:
Draw::ShaderModule *fragShader;
Draw::Pipeline *pipeline;
std::string code;
};
class DepalTexture {
public:
Draw::Texture *texture;
int lastFrame;
};
// Caches both shaders and palette textures.
class DepalShaderCache {
public:
DepalShaderCache(Draw::DrawContext *draw);
~DepalShaderCache();
// This also uploads the palette and binds the correct texture.
DepalShader *GetDepalettizeShader(uint32_t clutMode, GEBufferFormat pixelFormat);
Draw::Texture *GetClutTexture(GEPaletteFormat clutFormat, const u32 clutHash, u32 *rawClut);
Draw::SamplerState *GetSampler();
void Clear();
void Decimate();
std::vector<std::string> DebugGetShaderIDs(DebugShaderType type);
std::string DebugGetShaderString(std::string id, DebugShaderType type, DebugShaderStringType stringType);
void DeviceLost();
void DeviceRestore(Draw::DrawContext *draw);
private:
static uint32_t GenerateShaderID(uint32_t clutMode, GEBufferFormat pixelFormat) {
return (clutMode & 0xFFFFFF) | (pixelFormat << 24);
}
static uint32_t GetClutID(GEPaletteFormat clutFormat, uint32_t clutHash) {
// Simplistic.
return clutHash ^ (uint32_t)clutFormat;
}
Draw::DrawContext *draw_;
Draw::ShaderModule *vertexShader_ = nullptr;
Draw::SamplerState *nearestSampler_ = nullptr;
std::map<u32, DepalShader *> cache_;
std::map<u32, DepalTexture *> texCache_;
};
// TODO: Merge with DepalShaderCache?
class TextureShaderApplier {
public:
struct Pos {
float x;
float y;
};
struct UV {
float u;
float v;
};
TextureShaderApplier(Draw::DrawContext *draw, DepalShader *shader, float bufferW, float bufferH, int renderW, int renderH)
: draw_(draw), shader_(shader), bufferW_(bufferW), bufferH_(bufferH), renderW_(renderW), renderH_(renderH) {
static const Pos pos[4] = {
{-1, -1 },
{ 1, -1 },
{-1, 1 },
{ 1, 1 },
};
memcpy(pos_, pos, sizeof(pos_));
static const UV uv[4] = {
{ 0, 0 },
{ 1, 0 },
{ 0, 1 },
{ 1, 1 },
};
memcpy(uv_, uv, sizeof(uv_));
}
void ApplyBounds(const KnownVertexBounds &bounds, u32 uoff, u32 voff) {
// If min is not < max, then we don't have values (wasn't set during decode.)
if (bounds.minV < bounds.maxV) {
const float invWidth = 1.0f / bufferW_;
const float invHeight = 1.0f / bufferH_;
// Inverse of half = double.
const float invHalfWidth = invWidth * 2.0f;
const float invHalfHeight = invHeight * 2.0f;
const int u1 = bounds.minU + uoff;
const int v1 = bounds.minV + voff;
const int u2 = bounds.maxU + uoff;
const int v2 = bounds.maxV + voff;
const float left = u1 * invHalfWidth - 1.0f;
const float right = u2 * invHalfWidth - 1.0f;
const float top = v1 * invHalfHeight - 1.0f;
const float bottom = v2 * invHalfHeight - 1.0f;
// Points are: BL, BR, TR, TL.
pos_[0] = Pos{ left, bottom };
pos_[1] = Pos{ right, bottom };
pos_[2] = Pos{ left, top };
pos_[3] = Pos{ right, top };
// And also the UVs, same order.
const float uvleft = u1 * invWidth;
const float uvright = u2 * invWidth;
const float uvtop = v1 * invHeight;
const float uvbottom = v2 * invHeight;
uv_[0] = UV{ uvleft, uvbottom };
uv_[1] = UV{ uvright, uvbottom };
uv_[2] = UV{ uvleft, uvtop };
uv_[3] = UV{ uvright, uvtop };
// We need to reapply the texture next time since we cropped UV.
gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
}
}
void Use() {
draw_->BindPipeline(shader_->pipeline);
struct SimpleVertex {
float pos[2];
float uv[2];
};
for (int i = 0; i < 4; i++) {
memcpy(&verts_[i].x, &pos_[i], sizeof(Pos));
memcpy(&verts_[i].u, &uv_[i], sizeof(UV));
}
}
void Shade() {
Draw::Viewport vp{ 0.0f, 0.0f, (float)renderW_, (float)renderH_, 0.0f, 1.0f };
draw_->SetViewports(1, &vp);
draw_->SetScissorRect(0, 0, renderW_, renderH_);
draw_->DrawUP((const uint8_t *)verts_, 4);
}
protected:
Draw::DrawContext *draw_;
DepalShader *shader_;
Pos pos_[4];
UV uv_[4];
Draw2DVertex verts_[4];
float bufferW_;
float bufferH_;
int renderW_;
int renderH_;
};

View File

@ -27,14 +27,12 @@
#include "GPU/Common/GPUStateUtils.h"
#include "GPU/Common/DepalettizeShaderCommon.h"
#define WRITE p+=sprintf
static const InputDef vsInputs[2] = {
{ "vec2", "a_position", Draw::SEM_POSITION, },
{ "vec2", "a_texcoord0", Draw::SEM_TEXCOORD0, },
};
// TODO: Deduplicate with DepalettizeCommon.cpp
// TODO: Deduplicate with TextureShaderCommon.cpp
static const SamplerDef samplers[2] = {
{ "tex" },
{ "pal" },
@ -44,12 +42,12 @@ static const VaryingDef varyings[1] = {
{ "vec2", "v_texcoord", Draw::SEM_TEXCOORD0, 0, "highp" },
};
// Uses integer instructions available since OpenGL 3.0. Suitable for ES 3.0 as well.
void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config, const ShaderLanguageDesc &lang) {
// Uses integer instructions available since OpenGL 3.0, ES 3.0 (and 2.0 with extensions), and of course Vulkan and D3D11.
void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config) {
const int shift = config.shift;
const int mask = config.mask;
if (config.pixelFormat == GE_FORMAT_DEPTH16) {
if (config.bufferFormat == GE_FORMAT_DEPTH16) {
DepthScaleFactors factors = GetDepthScaleFactors();
writer.ConstFloat("z_scale", factors.scale);
writer.ConstFloat("z_offset", factors.offset);
@ -71,7 +69,7 @@ void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config, con
writer.C(" vec4 color = ").SampleTexture2D("tex", "v_texcoord").C(";\n");
int shiftedMask = mask << shift;
switch (config.pixelFormat) {
switch (config.bufferFormat) {
case GE_FORMAT_8888:
if (shiftedMask & 0xFF) writer.C(" int r = int(color.r * 255.99);\n"); else writer.C(" int r = 0;\n");
if (shiftedMask & 0xFF00) writer.C(" int g = int(color.g * 255.99);\n"); else writer.C(" int g = 0;\n");
@ -102,6 +100,17 @@ void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config, con
case GE_FORMAT_DEPTH16:
// Remap depth buffer.
writer.C(" float depth = (color.x - z_offset) * z_scale;\n");
if (config.bufferFormat == GE_FORMAT_DEPTH16 && config.textureFormat == GE_TFMT_5650) {
// Convert depth to 565, without going through a CLUT.
writer.C(" int idepth = int(clamp(depth, 0.0, 65535.0));\n");
writer.C(" float r = (idepth & 31) / 31.0f;\n");
writer.C(" float g = ((idepth >> 5) & 63) / 63.0f;\n");
writer.C(" float b = ((idepth >> 11) & 31) / 31.0f;\n");
writer.C(" vec4 outColor = vec4(r, g, b, 1.0);\n");
return;
}
writer.C(" int index = int(clamp(depth, 0.0, 65535.0));\n");
break;
default:
@ -128,23 +137,25 @@ void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config, con
writer.C(" vec4 outColor = ").SampleTexture2D("pal", "uv").C(";\n");
}
// FP only, to suit GL(ES) 2.0
void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config, const ShaderLanguageDesc &lang) {
// FP only, to suit GL(ES) 2.0 and DX9
void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config) {
char lookupMethod[128] = "index.r";
const int shift = config.shift;
const int mask = config.mask;
if (config.pixelFormat == GE_FORMAT_DEPTH16) {
if (config.bufferFormat == GE_FORMAT_DEPTH16) {
DepthScaleFactors factors = GetDepthScaleFactors();
writer.ConstFloat("z_scale", factors.scale);
writer.ConstFloat("z_offset", factors.offset);
}
writer.C(" vec4 index = ").SampleTexture2D("tex", "v_texcoord").C(";\n");
float index_multiplier = 1.0f;
// pixelformat is the format of the texture we are sampling.
bool formatOK = true;
switch (config.pixelFormat) {
switch (config.bufferFormat) {
case GE_FORMAT_8888:
if ((mask & (mask + 1)) == 0) {
// If the value has all bits contiguous (bitmask check above), we can mod by it + 1.
@ -222,6 +233,19 @@ void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config, c
case GE_FORMAT_DEPTH16:
{
// TODO: I think we can handle most scenarios here, but texturing from depth buffers requires an extension on ES 2.0 anyway.
// Not on D3D9 though, so this path is still relevant.
if (config.bufferFormat == GE_FORMAT_DEPTH16 && config.textureFormat == GE_TFMT_5650) {
// Convert depth to 565, without going through a CLUT.
writer.C(" float depth = (index.x - z_offset) * z_scale;\n");
writer.C(" float idepth = floor(clamp(depth, 0.0, 65535.0));\n");
writer.C(" float r = mod(idepth, 32.0) / 31.0f;\n");
writer.C(" float g = mod(floor(idepth / 32.0), 64.0) / 63.0f;\n");
writer.C(" float b = mod(floor(idepth / 2048.0), 32.0) / 31.0f;\n");
writer.C(" vec4 outColor = vec4(r, g, b, 1.0);\n");
return;
}
if (shift < 16) {
index_multiplier = 1.0f / (float)(1 << shift);
truncate_cpy(lookupMethod, "((index.x - z_offset) * z_scale)");
@ -249,7 +273,7 @@ void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config, c
// index_multiplier -= 0.01f / texturePixels;
if (!formatOK) {
ERROR_LOG_REPORT_ONCE(depal, G3D, "%i depal unsupported: shift=%i mask=%02x offset=%d", config.pixelFormat, shift, mask, config.startPos);
ERROR_LOG_REPORT_ONCE(depal, G3D, "%s depal unsupported: shift=%d mask=%02x offset=%d", GeBufferFormatToString(config.bufferFormat), shift, mask, config.startPos);
}
// Offset by half a texel (plus clutBase) to turn NEAREST filtering into FLOOR.
@ -258,38 +282,67 @@ void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config, c
char offset[128] = "";
sprintf(offset, " + %f", texel_offset);
writer.C(" vec4 index = ").SampleTexture2D("tex", "v_texcoord").C(";\n");
writer.F(" float coord = (%s * %f)%s;\n", lookupMethod, index_multiplier, offset);
writer.C(" vec4 outColor = ").SampleTexture2D("pal", "vec2(coord, 0.0)").C(";\n");
}
void GenerateDepalFs(char *buffer, const DepalConfig &config, const ShaderLanguageDesc &lang) {
ShaderWriter writer(buffer, lang, ShaderStage::Fragment);
void GenerateDepalSmoothed(ShaderWriter &writer, const DepalConfig &config) {
const char *sourceChannel = "error";
float indexMultiplier = 32.0f;
if (config.bufferFormat == GE_FORMAT_5551) {
_dbg_assert_(config.mask == 0x1F);
switch (config.shift) {
case 0: sourceChannel = "r"; break;
case 5: sourceChannel = "g"; break;
case 10: sourceChannel = "b"; break;
default: _dbg_assert_(false);
}
} else if (config.bufferFormat == GE_FORMAT_565) {
_dbg_assert_(config.mask == 0x1F || config.mask == 0x3F);
switch (config.shift) {
case 0: sourceChannel = "r"; break;
case 5: sourceChannel = "g"; indexMultiplier = 64.0f; break;
case 11: sourceChannel = "b"; break;
default: _dbg_assert_(false);
}
} else {
_dbg_assert_(false);
}
writer.C(" float index = ").SampleTexture2D("tex", "v_texcoord").F(".%s * %0.1f;\n", sourceChannel, indexMultiplier);
float texturePixels = 256.f;
if (config.clutFormat != GE_CMODE_32BIT_ABGR8888) {
texturePixels = 512.f;
}
writer.F(" float coord = (index + 0.5) * %f;\n", 1.0 / texturePixels);
writer.C(" vec4 outColor = ").SampleTexture2D("pal", "vec2(coord, 0.0)").C(";\n");
}
void GenerateDepalFs(ShaderWriter &writer, const DepalConfig &config) {
writer.DeclareSamplers(samplers);
writer.HighPrecisionFloat();
writer.BeginFSMain(Slice<UniformDef>::empty(), varyings, FSFLAG_NONE);
switch (lang.shaderLanguage) {
case HLSL_D3D9:
case GLSL_1xx:
GenerateDepalShaderFloat(writer, config, lang);
break;
case GLSL_VULKAN:
case GLSL_3xx:
case HLSL_D3D11:
GenerateDepalShader300(writer, config, lang);
break;
default:
_assert_msg_(false, "Depal shader language not supported: %d", (int)lang.shaderLanguage);
if (config.smoothedDepal) {
// Handles a limited set of cases, but doesn't need any integer math so we don't
// need two variants.
GenerateDepalSmoothed(writer, config);
} else {
switch (writer.Lang().shaderLanguage) {
case HLSL_D3D9:
case GLSL_1xx:
GenerateDepalShaderFloat(writer, config);
break;
case GLSL_VULKAN:
case GLSL_3xx:
case HLSL_D3D11:
GenerateDepalShader300(writer, config);
break;
default:
_assert_msg_(false, "Shader language not supported for depal: %d", (int)writer.Lang().shaderLanguage);
}
}
writer.EndFSMain("outColor", FSFLAG_NONE);
}
void GenerateDepalVs(char *buffer, const ShaderLanguageDesc &lang) {
ShaderWriter writer(buffer, lang, ShaderStage::Vertex, nullptr, 0);
writer.BeginVSMain(vsInputs, Slice<UniformDef>::empty(), varyings);
writer.C(" v_texcoord = a_texcoord0;\n");
writer.C(" gl_Position = vec4(a_position, 0.0, 1.0);\n");
writer.EndVSMain(varyings);
}
#undef WRITE

View File

@ -22,6 +22,8 @@
#include "Common/GPU/Shader.h"
#include "GPU/ge_constants.h"
class ShaderWriter;
static const int DEPAL_TEXTURE_OLD_AGE = 120;
struct DepalConfig {
@ -29,8 +31,9 @@ struct DepalConfig {
int shift;
u32 startPos;
GEPaletteFormat clutFormat;
GEBufferFormat pixelFormat;
GETextureFormat textureFormat;
GEBufferFormat bufferFormat;
bool smoothedDepal;
};
void GenerateDepalFs(char *buffer, const DepalConfig &config, const ShaderLanguageDesc &lang);
void GenerateDepalVs(char *buffer, const ShaderLanguageDesc &lang);
void GenerateDepalFs(ShaderWriter &writer, const DepalConfig &config);

View File

@ -25,6 +25,7 @@
#include "GPU/Common/DrawEngineCommon.h"
#include "GPU/Common/FramebufferManagerCommon.h"
#include "GPU/Common/TextureCacheCommon.h"
#include "GPU/Common/GPUStateUtils.h"
static const InputDef inputs[2] = {
{ "vec2", "a_position", Draw::SEM_POSITION },
@ -39,19 +40,88 @@ static const SamplerDef samplers[1] = {
{ "tex" },
};
void GenerateDraw2DFs(ShaderWriter &writer) {
static const UniformDef uniforms[2] = {
{ "vec2", "texSize", 0 },
{ "float", "scaleFactor", 1},
};
struct Draw2DUB {
float texSizeX;
float texSizeY;
float scaleFactor;
};
const UniformBufferDesc draw2DUBDesc{ sizeof(Draw2DUB), {
{ "texSize", -1, 0, UniformType::FLOAT2, 0 },
{ "scaleFactor", -1, 1, UniformType::FLOAT1, 0 },
} };
Draw2DPipelineInfo GenerateDraw2DCopyColorFs(ShaderWriter &writer) {
writer.DeclareSamplers(samplers);
writer.BeginFSMain(Slice<UniformDef>::empty(), varyings, FSFLAG_NONE);
writer.C(" vec4 outColor = ").SampleTexture2D("tex", "v_texcoord.xy").C(";\n");
writer.EndFSMain("outColor", FSFLAG_NONE);
return Draw2DPipelineInfo{
RASTER_COLOR,
RASTER_COLOR,
};
}
void GenerateDraw2DDepthFs(ShaderWriter &writer) {
Draw2DPipelineInfo GenerateDraw2DCopyDepthFs(ShaderWriter &writer) {
writer.DeclareSamplers(samplers);
writer.BeginFSMain(Slice<UniformDef>::empty(), varyings, FSFLAG_WRITEDEPTH);
writer.C(" vec4 outColor = vec4(0.0, 0.0, 0.0, 0.0);\n");
writer.C(" gl_FragDepth = ").SampleTexture2D("tex", "v_texcoord.xy").C(".x;\n");
writer.EndFSMain("outColor", FSFLAG_WRITEDEPTH);
return Draw2DPipelineInfo{
RASTER_DEPTH,
RASTER_DEPTH,
};
}
Draw2DPipelineInfo GenerateDraw2D565ToDepthFs(ShaderWriter &writer) {
writer.DeclareSamplers(samplers);
writer.BeginFSMain(Slice<UniformDef>::empty(), varyings, FSFLAG_WRITEDEPTH);
writer.C(" vec4 outColor = vec4(0.0, 0.0, 0.0, 0.0);\n");
// Unlike when just copying a depth buffer, here we're generating new depth values so we'll
// have to apply the scaling.
DepthScaleFactors factors = GetDepthScaleFactors();
writer.C(" vec3 rgb = ").SampleTexture2D("tex", "v_texcoord.xy").C(".xyz;\n");
writer.F(" highp float depthValue = (floor(rgb.x * 31.99) + floor(rgb.y * 63.99) * 32.0 + floor(rgb.z * 31.99) * 2048.0); \n");
writer.F(" gl_FragDepth = (depthValue / %f) + %f;\n", factors.scale, factors.offset);
writer.EndFSMain("outColor", FSFLAG_WRITEDEPTH);
return Draw2DPipelineInfo{
RASTER_COLOR,
RASTER_DEPTH,
};
}
Draw2DPipelineInfo GenerateDraw2D565ToDepthDeswizzleFs(ShaderWriter &writer) {
writer.DeclareSamplers(samplers);
writer.BeginFSMain(uniforms, varyings, FSFLAG_WRITEDEPTH);
writer.C(" vec4 outColor = vec4(0.0, 0.0, 0.0, 0.0);\n");
// Unlike when just copying a depth buffer, here we're generating new depth values so we'll
// have to apply the scaling.
DepthScaleFactors factors = GetDepthScaleFactors();
writer.C(" vec2 tsize = texSize;\n");
writer.C(" vec2 coord = v_texcoord * tsize;\n");
writer.F(" float strip = 4.0 * scaleFactor;\n");
writer.C(" float in_strip = mod(coord.y, strip);\n");
writer.C(" coord.y = coord.y - in_strip + strip - in_strip;\n");
writer.C(" coord /= tsize;\n");
writer.C(" vec3 rgb = ").SampleTexture2D("tex", "coord").C(".xyz;\n");
writer.F(" highp float depthValue = (floor(rgb.x * 31.99) + floor(rgb.y * 63.99) * 32.0 + floor(rgb.z * 31.99) * 2048.0); \n");
writer.F(" gl_FragDepth = (depthValue / %f) + %f;\n", factors.scale, factors.offset);
writer.EndFSMain("outColor", FSFLAG_WRITEDEPTH);
return Draw2DPipelineInfo{
RASTER_COLOR,
RASTER_DEPTH
};
}
void GenerateDraw2DVS(ShaderWriter &writer) {
@ -63,7 +133,24 @@ void GenerateDraw2DVS(ShaderWriter &writer) {
writer.EndVSMain(varyings);
}
void FramebufferManagerCommon::Ensure2DResources() {
template <typename T>
static void DoRelease(T *&obj) {
if (obj)
obj->Release();
obj = nullptr;
}
void Draw2D::DeviceLost() {
DoRelease(draw2DVs_);
DoRelease(draw2DSamplerLinear_);
DoRelease(draw2DSamplerNearest_);
}
void Draw2D::DeviceRestore(Draw::DrawContext *draw) {
}
void Draw2D::Ensure2DResources() {
using namespace Draw;
const ShaderLanguageDesc &shaderLanguageDesc = draw_->GetShaderLanguageDesc();
@ -84,6 +171,7 @@ void FramebufferManagerCommon::Ensure2DResources() {
descLinear.mipFilter = TextureFilter::LINEAR;
descLinear.wrapU = TextureAddressMode::CLAMP_TO_EDGE;
descLinear.wrapV = TextureAddressMode::CLAMP_TO_EDGE;
descLinear.wrapW = TextureAddressMode::CLAMP_TO_EDGE;
draw2DSamplerLinear_ = draw_->CreateSamplerState(descLinear);
}
@ -94,20 +182,22 @@ void FramebufferManagerCommon::Ensure2DResources() {
descNearest.mipFilter = TextureFilter::NEAREST;
descNearest.wrapU = TextureAddressMode::CLAMP_TO_EDGE;
descNearest.wrapV = TextureAddressMode::CLAMP_TO_EDGE;
descNearest.wrapW = TextureAddressMode::CLAMP_TO_EDGE;
draw2DSamplerNearest_ = draw_->CreateSamplerState(descNearest);
}
}
Draw::Pipeline *FramebufferManagerCommon::Create2DPipeline(void (*generate)(ShaderWriter &)) {
Draw2DPipeline *Draw2D::Create2DPipeline(std::function<Draw2DPipelineInfo (ShaderWriter &)> generate) {
Ensure2DResources();
using namespace Draw;
const ShaderLanguageDesc &shaderLanguageDesc = draw_->GetShaderLanguageDesc();
char *fsCode = new char[4000];
ShaderWriter writer(fsCode, shaderLanguageDesc, ShaderStage::Fragment);
generate(writer);
Draw2DPipelineInfo info = generate(writer);
ShaderModule *fs = draw_->CreateShaderModule(ShaderStage::Fragment, shaderLanguageDesc.shaderLanguage, (const uint8_t *)fsCode, strlen(fsCode), "draw2d_fs");
delete[] fsCode;
_assert_(fs);
@ -123,22 +213,24 @@ Draw::Pipeline *FramebufferManagerCommon::Create2DPipeline(void (*generate)(Shad
};
InputLayout *inputLayout = draw_->CreateInputLayout(desc);
BlendState *blendOff = draw_->CreateBlendState({ false, 0xF });
BlendState *blendDiscard = draw_->CreateBlendState({ false, 0x0 });
BlendState *blend = draw_->CreateBlendState({ false, info.writeChannel == RASTER_COLOR ? 0xF : 0 });
DepthStencilState *noDepthStencil = draw_->CreateDepthStencilState(DepthStencilStateDesc{});
DepthStencilStateDesc dsDesc{};
if (info.writeChannel == RASTER_DEPTH) {
dsDesc.depthTestEnabled = true;
dsDesc.depthWriteEnabled = true;
dsDesc.depthCompare = Draw::Comparison::ALWAYS;
}
DepthStencilState *depthStencil = draw_->CreateDepthStencilState(dsDesc);
RasterState *rasterNoCull = draw_->CreateRasterState({});
DepthStencilStateDesc dsWriteDesc{};
dsWriteDesc.depthTestEnabled = true;
dsWriteDesc.depthWriteEnabled = true;
dsWriteDesc.depthCompare = Draw::Comparison::ALWAYS;
DepthStencilState *depthWriteAlways = draw_->CreateDepthStencilState(dsWriteDesc);
PipelineDesc pipelineDesc{
Primitive::TRIANGLE_STRIP,
{ draw2DVs_, fs },
inputLayout, noDepthStencil, blendOff, rasterNoCull, nullptr,
inputLayout,
depthStencil,
blend, rasterNoCull, &draw2DUBDesc,
};
Draw::Pipeline *pipeline = draw_->CreateGraphicsPipeline(pipelineDesc);
@ -146,45 +238,97 @@ Draw::Pipeline *FramebufferManagerCommon::Create2DPipeline(void (*generate)(Shad
fs->Release();
rasterNoCull->Release();
blendOff->Release();
blendDiscard->Release();
noDepthStencil->Release();
depthWriteAlways->Release();
blend->Release();
depthStencil->Release();
inputLayout->Release();
return pipeline;
return new Draw2DPipeline {
pipeline,
info,
fsCode,
};
}
void FramebufferManagerCommon::DrawStrip2D(Draw::Texture *tex, Draw2DVertex *verts, int vertexCount, bool linearFilter, RasterChannel channel) {
void Draw2D::DrawStrip2D(Draw::Texture *tex, Draw2DVertex *verts, int vertexCount, bool linearFilter, Draw2DPipeline *pipeline, float texW, float texH, int scaleFactor) {
using namespace Draw;
Ensure2DResources();
_dbg_assert_(pipeline);
const ShaderLanguageDesc &shaderLanguageDesc = draw_->GetShaderLanguageDesc();
if (pipeline->info.writeChannel == RASTER_DEPTH) {
_dbg_assert_(draw_->GetDeviceCaps().fragmentShaderDepthWriteSupported);
switch (channel) {
case RASTER_COLOR:
if (!draw2DPipelineColor_) {
draw2DPipelineColor_ = Create2DPipeline(&GenerateDraw2DFs);
}
draw_->BindPipeline(draw2DPipelineColor_);
break;
case RASTER_DEPTH:
if (!draw_->GetDeviceCaps().fragmentShaderDepthWriteSupported) {
// Can't do it
return;
}
if (!draw2DPipelineDepth_) {
draw2DPipelineDepth_ = Create2DPipeline(&GenerateDraw2DFs);
}
draw_->BindPipeline(draw2DPipelineDepth_);
break;
// We don't filter inputs when writing depth, results will be bad.
linearFilter = false;
}
Draw2DUB ub;
ub.texSizeX = tex ? tex->Width() : texW;
ub.texSizeY = tex ? tex->Height() : texH;
ub.scaleFactor = (float)scaleFactor;
draw_->BindPipeline(pipeline->pipeline);
draw_->UpdateDynamicUniformBuffer(&ub, sizeof(ub));
if (tex) {
draw_->BindTextures(TEX_SLOT_PSP_TEXTURE, 1, &tex);
}
draw_->BindSamplerStates(TEX_SLOT_PSP_TEXTURE, 1, linearFilter ? &draw2DSamplerLinear_ : &draw2DSamplerNearest_);
draw_->DrawUP(verts, vertexCount);
draw_->InvalidateCachedState();
gstate_c.Dirty(DIRTY_FRAGMENTSHADER_STATE | DIRTY_VERTEXSHADER_STATE);
}
Draw2DPipeline *FramebufferManagerCommon::Get2DPipeline(Draw2DShader shader) {
using namespace Draw;
const ShaderLanguageDesc &shaderLanguageDesc = draw_->GetShaderLanguageDesc();
Draw2DPipeline *pipeline = nullptr;
switch (shader) {
case DRAW2D_COPY_COLOR:
if (!draw2DPipelineColor_) {
draw2DPipelineColor_ = draw2D_.Create2DPipeline(&GenerateDraw2DCopyColorFs);
}
pipeline = draw2DPipelineColor_;
break;
case DRAW2D_COPY_DEPTH:
if (!draw_->GetDeviceCaps().fragmentShaderDepthWriteSupported) {
// Can't do it
return nullptr;
}
if (!draw2DPipelineDepth_) {
draw2DPipelineDepth_ = draw2D_.Create2DPipeline(&GenerateDraw2DCopyDepthFs);
}
pipeline = draw2DPipelineDepth_;
break;
case DRAW2D_565_TO_DEPTH:
if (!draw_->GetDeviceCaps().fragmentShaderDepthWriteSupported) {
// Can't do it
return nullptr;
}
if (!draw2DPipeline565ToDepth_) {
draw2DPipeline565ToDepth_ = draw2D_.Create2DPipeline(&GenerateDraw2D565ToDepthFs);
}
pipeline = draw2DPipeline565ToDepth_;
break;
case DRAW2D_565_TO_DEPTH_DESWIZZLE:
if (!draw_->GetDeviceCaps().fragmentShaderDepthWriteSupported) {
// Can't do it
return nullptr;
}
if (!draw2DPipeline565ToDepthDeswizzle_) {
draw2DPipeline565ToDepthDeswizzle_ = draw2D_.Create2DPipeline(&GenerateDraw2D565ToDepthDeswizzleFs);
}
pipeline = draw2DPipeline565ToDepthDeswizzle_;
break;
}
return pipeline;
}

View File

@ -1,5 +1,7 @@
#pragma once
#include "GPU/GPU.h"
// For framebuffer copies and similar things that just require passthrough.
struct Draw2DVertex {
float x;
@ -7,3 +9,60 @@ struct Draw2DVertex {
float u;
float v;
};
enum Draw2DShader {
DRAW2D_COPY_COLOR,
DRAW2D_COPY_DEPTH,
DRAW2D_565_TO_DEPTH,
DRAW2D_565_TO_DEPTH_DESWIZZLE,
};
inline RasterChannel Draw2DSourceChannel(Draw2DShader shader) {
switch (shader) {
case DRAW2D_COPY_DEPTH:
return RASTER_DEPTH;
case DRAW2D_COPY_COLOR:
case DRAW2D_565_TO_DEPTH:
case DRAW2D_565_TO_DEPTH_DESWIZZLE:
default:
return RASTER_COLOR;
}
}
struct Draw2DPipelineInfo {
RasterChannel readChannel;
RasterChannel writeChannel;
bool secondTexture;
};
struct Draw2DPipeline {
Draw::Pipeline *pipeline;
Draw2DPipelineInfo info;
char *code;
void Release() {
pipeline->Release();
delete[] code;
delete this;
}
};
class ShaderWriter;
class Draw2D {
public:
Draw2D(Draw::DrawContext *draw) : draw_(draw) {}
void DeviceLost();
void DeviceRestore(Draw::DrawContext *draw);
Draw2DPipeline *Create2DPipeline(std::function<Draw2DPipelineInfo(ShaderWriter &)> generate);
void DrawStrip2D(Draw::Texture *tex, Draw2DVertex *verts, int vertexCount, bool linearFilter, Draw2DPipeline *pipeline, float texW = 0.0f, float texH = 0.0f, int scaleFactor = 0);
void Ensure2DResources();
private:
Draw::DrawContext *draw_;
Draw::SamplerState *draw2DSamplerLinear_ = nullptr;
Draw::SamplerState *draw2DSamplerNearest_ = nullptr;
Draw::ShaderModule *draw2DVs_ = nullptr;
};

View File

@ -23,8 +23,10 @@
#include "Common/GPU/OpenGL/GLFeatures.h"
#include "Common/GPU/ShaderWriter.h"
#include "Common/GPU/thin3d.h"
#include "Core/Compatibility.h"
#include "Core/Reporting.h"
#include "Core/Config.h"
#include "Core/System.h"
#include "GPU/Common/GPUStateUtils.h"
#include "GPU/Common/ShaderId.h"
#include "GPU/Common/ShaderUniforms.h"
@ -88,9 +90,9 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
bool doFlatShading = id.Bit(FS_BIT_FLATSHADE) && !flatBug;
bool shaderDepal = id.Bit(FS_BIT_SHADER_DEPAL) && !texture3D; // combination with texture3D not supported. Enforced elsewhere too.
bool smoothedDepal = id.Bit(FS_BIT_SHADER_SMOOTHED_DEPAL);
bool bgraTexture = id.Bit(FS_BIT_BGRA_TEXTURE);
bool colorWriteMask = id.Bit(FS_BIT_COLOR_WRITEMASK) && compat.bitwiseOps;
bool colorToDepth = id.Bit(FS_BIT_COLOR_TO_DEPTH);
GEComparison alphaTestFunc = (GEComparison)id.Bits(FS_BIT_ALPHA_TEST_FUNC, 3);
GEComparison colorTestFunc = (GEComparison)id.Bits(FS_BIT_COLOR_TEST_FUNC, 2);
@ -123,7 +125,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
bool readFramebufferTex = readFramebuffer && !gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH);
bool needFragCoord = readFramebuffer || gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT);
bool writeDepth = gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT) || colorToDepth;
bool writeDepth = gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT);
if (shaderDepal && !doTexture) {
*errorString = "depal requires a texture";
@ -136,11 +138,6 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
}
if (compat.shaderLanguage == ShaderLanguage::GLSL_VULKAN) {
if (colorToDepth) {
WRITE(p, "precision highp int;\n");
WRITE(p, "precision highp float;\n");
}
if (useDiscardStencilBugWorkaround && !gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT)) {
WRITE(p, "layout (depth_unchanged) out float gl_FragDepth;\n");
}
@ -293,7 +290,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
WRITE(p, "};\n");
}
} else if (ShaderLanguageIsOpenGL(compat.shaderLanguage)) {
if ((shaderDepal || colorWriteMask || colorToDepth) && gl_extensions.IsGLES) {
if ((shaderDepal || colorWriteMask) && gl_extensions.IsGLES) {
WRITE(p, "precision highp int;\n");
}
@ -461,9 +458,6 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
WRITE(p, "PS_OUT main( PS_IN In ) {\n");
WRITE(p, " PS_OUT outfragment;\n");
WRITE(p, " vec4 target;\n");
if (colorToDepth) {
WRITE(p, " float gl_FragDepth;\n");
}
} else {
WRITE(p, "void main() {\n");
}
@ -599,6 +593,31 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
}
}
}
} else if (shaderDepal && smoothedDepal) {
// Specific mode for Test Drive. Fixes the banding.
if (doTextureProjection) {
// We don't use textureProj because we need better control and it's probably not much of a savings anyway.
// However it is good for precision on older hardware like PowerVR.
WRITE(p, " vec2 uv = %s.xy/%s.z;\n vec2 uv_round;\n", texcoord, texcoord);
} else {
WRITE(p, " vec2 uv = %s.xy;\n vec2 uv_round;\n", texcoord);
}
// Restrictions on this are checked before setting the smoothed flag.
// Only RGB565 and RGBA5551 are supported, and only the specific shifts hitting the
// channels directly.
WRITE(p, " vec4 t = %s(tex, %s.xy);\n", compat.texture, texcoord);
WRITE(p, " uint depalShift = (u_depal_mask_shift_off_fmt >> 8) & 0xFFU;\n");
WRITE(p, " uint depalFmt = (u_depal_mask_shift_off_fmt >> 24) & 0x3U;\n");
WRITE(p, " float index0 = t.r;\n");
WRITE(p, " float mul = 32.0 / 256.0;\n");
WRITE(p, " if (depalFmt == 0) {\n"); // yes, different versions of Test Drive use different formats. Could do compile time by adding more compat flags but meh.
WRITE(p, " if (depalShift == 5) { index0 = t.g; mul = 64.0 / 256.0; }\n");
WRITE(p, " else if (depalShift == 11) { index0 = t.b; }\n");
WRITE(p, " } else {\n");
WRITE(p, " if (depalShift == 5) { index0 = t.g; }\n");
WRITE(p, " else if (depalShift == 10) { index0 = t.b; }\n");
WRITE(p, " }\n");
WRITE(p, " t = %s(pal, vec2(index0 * mul, 0.0));\n", compat.texture);
} else {
if (doTextureProjection) {
// We don't use textureProj because we need better control and it's probably not much of a savings anyway.
@ -1070,22 +1089,6 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
WRITE(p, " %s = vec4(0.0, 0.0, 0.0, %s.z); // blue to alpha\n", compat.fragColor0, compat.fragColor0);
}
if (colorToDepth) {
DepthScaleFactors factors = GetDepthScaleFactors();
if (compat.bitwiseOps) {
WRITE(p, " highp float depthValue = float(int(%s.x * 31.99) | (int(%s.y * 63.99) << 5) | (int(%s.z * 31.99) << 11)) / 65535.0;\n", "v", "v", "v"); // compat.fragColor0, compat.fragColor0, compat.fragColor0);
} else {
// D3D9-compatible alternative
WRITE(p, " highp float depthValue = (floor(%s.x * 31.99) + floor(%s.y * 63.99) * 32.0 + floor(%s.z * 31.99) * 2048.0) / 65535.0;\n", "v", "v", "v"); // compat.fragColor0, compat.fragColor0, compat.fragColor0);
}
if (factors.scale != 1.0 || factors.offset != 0.0) {
WRITE(p, " gl_FragDepth = (depthValue / %f) + %f;\n", factors.scale / 65535.0f, factors.offset);
} else {
WRITE(p, " gl_FragDepth = depthValue;\n");
}
}
if (gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT)) {
const double scale = DepthSliceFactor() * 65535.0;

View File

@ -41,13 +41,14 @@
#include "GPU/Common/PresentationCommon.h"
#include "GPU/Common/TextureCacheCommon.h"
#include "GPU/Common/ReinterpretFramebuffer.h"
#include "GPU/Debugger/Debugger.h"
#include "GPU/Debugger/Record.h"
#include "GPU/Debugger/Stepping.h"
#include "GPU/GPUInterface.h"
#include "GPU/GPUState.h"
FramebufferManagerCommon::FramebufferManagerCommon(Draw::DrawContext *draw)
: draw_(draw) {
: draw_(draw), draw2D_(draw_) {
presentation_ = new PresentationCommon(draw);
}
@ -105,6 +106,7 @@ void FramebufferManagerCommon::SetDisplayFramebuffer(u32 framebuf, u32 stride, G
displayFramebufPtr_ = framebuf;
displayStride_ = stride;
displayFormat_ = format;
GPUDebug::NotifyDisplay(framebuf, stride, format);
GPURecord::NotifyDisplay(framebuf, stride, format);
}
@ -124,7 +126,7 @@ VirtualFramebuffer *FramebufferManagerCommon::GetVFBAt(u32 addr) const {
}
u32 FramebufferManagerCommon::ColorBufferByteSize(const VirtualFramebuffer *vfb) const {
return vfb->fb_stride * vfb->height * (vfb->format == GE_FORMAT_8888 ? 4 : 2);
return vfb->fb_stride * vfb->height * (vfb->fb_format == GE_FORMAT_8888 ? 4 : 2);
}
bool FramebufferManagerCommon::ShouldDownloadFramebuffer(const VirtualFramebuffer *vfb) const {
@ -190,7 +192,7 @@ void FramebufferManagerCommon::EstimateDrawingSize(u32 fb_address, GEBufferForma
// Unless the game is using overlapping buffers, the next buffer should be far enough away.
// This catches some cases where we can know this.
// Hmm. The problem is that we could only catch it for the first of two buffers...
const u32 bpp = fb_format == GE_FORMAT_8888 ? 4 : 2;
const u32 bpp = BufferFormatBytesPerPixel(fb_format);
int avail_height = (nearest_address - fb_address) / (fb_stride * bpp);
if (avail_height < drawing_height && avail_height == region_height) {
drawing_width = std::min(region_width, fb_stride);
@ -220,7 +222,7 @@ void GetFramebufferHeuristicInputs(FramebufferHeuristicParams *params, const GPU
params->z_stride = 0;
}
params->fmt = gstate_c.framebufFormat;
params->fb_format = gstate_c.framebufFormat;
params->isClearingDepth = gstate.isModeClear() && gstate.isClearModeDepthMask();
// Technically, it may write depth later, but we're trying to detect it only when it's really true.
@ -269,7 +271,7 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame
// As there are no clear "framebuffer width" and "framebuffer height" registers,
// we need to infer the size of the current framebuffer somehow.
int drawing_width, drawing_height;
EstimateDrawingSize(params.fb_address, params.fmt, params.viewportWidth, params.viewportHeight, params.regionWidth, params.regionHeight, params.scissorWidth, params.scissorHeight, std::max(params.fb_stride, (u16)4), drawing_width, drawing_height);
EstimateDrawingSize(params.fb_address, params.fb_format, params.viewportWidth, params.viewportHeight, params.regionWidth, params.regionHeight, params.scissorWidth, params.scissorHeight, std::max(params.fb_stride, (u16)4), drawing_width, drawing_height);
gstate_c.SetCurRTOffset(0, 0);
bool vfbFormatChanged = false;
@ -280,14 +282,12 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame
WARN_LOG_ONCE(color_equal_z, G3D, "Framebuffer bound with color addr == z addr, likely will not use Z in this pass: %08x", params.fb_address);
}
RasterMode mode = RASTER_MODE_NORMAL;
// Find a matching framebuffer
VirtualFramebuffer *vfb = nullptr;
for (size_t i = 0; i < vfbs_.size(); ++i) {
VirtualFramebuffer *v = vfbs_[i];
const u32 bpp = v->format == GE_FORMAT_8888 ? 4 : 2;
const u32 bpp = BufferFormatBytesPerPixel(v->fb_format);
if (params.fb_address == v->fb_address) {
vfb = v;
@ -296,8 +296,8 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame
vfb->fb_stride = params.fb_stride;
vfbFormatChanged = true;
}
if (vfb->format != params.fmt) {
vfb->format = params.fmt;
if (vfb->fb_format != params.fb_format) {
vfb->fb_format = params.fb_format;
vfbFormatChanged = true;
}
@ -318,21 +318,8 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame
vfb->height = drawing_height;
}
break;
} else if (params.fb_address == v->z_address && params.fmt != GE_FORMAT_8888 && params.fb_stride == v->z_stride && !params.isBlending) {
// Looks like the game might be intending to use color to write directly to a Z buffer.
// This is seen in Kuroyou 2.
// Ignore this in this loop, BUT, we do a lookup in the depth tracking afterwards to
// make sure we get the latest one.
WARN_LOG_ONCE(color_matches_z, G3D, "Color framebuffer bound at %08x with likely intent to write explicit Z values using color. fmt = %s", params.fb_address, GeBufferFormatToString(params.fmt));
// Seems impractical to use the other 16-bit formats for this due to the limited control over alpha,
// so we'll simply only support 565.
if (params.fmt == GE_FORMAT_565) {
mode = RASTER_MODE_COLOR_TO_DEPTH;
break;
}
} else if (v->fb_stride == params.fb_stride && v->format == params.fmt) {
u32 v_fb_first_line_end_ptr = v->fb_address + v->fb_stride * 4; // This should be * bpp, but leaving like this until after 1.13 to be safe. The God of War games use this for shadows.
} else if (v->fb_stride == params.fb_stride && v->fb_format == params.fb_format) {
u32 v_fb_first_line_end_ptr = v->fb_address + v->fb_stride * bpp;
u32 v_fb_end_ptr = v->fb_address + v->fb_stride * v->height * bpp;
if (params.fb_address > v->fb_address && params.fb_address < v_fb_first_line_end_ptr) {
@ -347,17 +334,6 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame
drawing_width += x_offset;
break;
}
} else if (params.fb_address > v->fb_address && params.fb_address < v_fb_end_ptr && PSP_CoreParameter().compat.flags().AllowLargeFBTextureOffsets) {
if (params.fb_address % params.fb_stride == v->fb_address % params.fb_stride) {
// Framebuffers are overlapping on the Y axis.
const int y_offset = (params.fb_address - v->fb_address) / (bpp * params.fb_stride);
vfb = v;
gstate_c.SetCurRTOffset(0, y_offset);
// To prevent the newSize code from being confused.
drawing_height += y_offset;
break;
}
} else {
// We ignore this match.
// TODO: We can allow X/Y overlaps too, but haven't seen any so safer to not.
@ -365,19 +341,6 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame
}
}
if (mode == RASTER_MODE_COLOR_TO_DEPTH) {
// Lookup in the depth tracking to find which VFB has the latest version of this Z buffer.
// Then bind it in color-to-depth mode.
//
// We do this by having a special render mode where we take color and move to
// depth in the fragment shader, and set color writes to off.
//
// We use a special fragment shader flag to convert color to depth.
vfb = GetLatestDepthBufferAt(params.fb_address /* !!! */, params.fb_stride);
}
gstate_c.SetFramebufferRenderMode(mode);
if (vfb) {
if ((drawing_width != vfb->bufferWidth || drawing_height != vfb->bufferHeight)) {
// Even if it's not newly wrong, if this is larger we need to resize up.
@ -413,6 +376,8 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame
// None found? Create one.
if (!vfb) {
gstate_c.usingDepth = false; // reset depth buffer tracking
vfb = new VirtualFramebuffer{};
vfb->fbo = nullptr;
vfb->fb_address = params.fb_address;
@ -426,9 +391,9 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame
vfb->newWidth = drawing_width;
vfb->newHeight = drawing_height;
vfb->lastFrameNewSize = gpuStats.numFlips;
vfb->format = params.fmt;
vfb->drawnFormat = params.fmt;
vfb->usageFlags = FB_USAGE_RENDERTARGET;
vfb->fb_format = params.fb_format;
vfb->drawnFormat = params.fb_format;
vfb->usageFlags = FB_USAGE_RENDER_COLOR;
u32 byteSize = ColorBufferByteSize(vfb);
if (Memory::IsVRAMAddress(params.fb_address) && params.fb_address + byteSize > framebufRangeEnd_) {
@ -439,20 +404,13 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame
ResizeFramebufFBO(vfb, drawing_width, drawing_height, true);
NotifyRenderFramebufferCreated(vfb);
// Looks up by z_address, so if one is found here and not have last pointers equal to this one,
// there is another one.
VirtualFramebuffer *prevDepth = GetLatestDepthBufferAt(vfb->z_address, vfb->z_stride);
// We might already want to copy depth, in case this is a temp buffer. See #7810.
if (prevDepth != vfb) {
if (!params.isClearingDepth && prevDepth) {
BlitFramebufferDepth(prevDepth, vfb);
}
}
// Note that we do not even think about depth right now. That'll be handled
// on the first depth access, which will call SetDepthFramebuffer.
CopyToColorFromOverlappingFramebuffers(vfb);
SetColorUpdated(vfb, skipDrawReason);
INFO_LOG(FRAMEBUF, "Creating FBO for %08x (z: %08x) : %d x %d x %s", vfb->fb_address, vfb->z_address, vfb->width, vfb->height, GeBufferFormatToString(vfb->format));
INFO_LOG(FRAMEBUF, "Creating FBO for %08x (z: %08x) : %d x %d x %s", vfb->fb_address, vfb->z_address, vfb->width, vfb->height, GeBufferFormatToString(vfb->fb_format));
vfb->last_frame_render = gpuStats.numFlips;
frameLastFramebufUsed_ = gpuStats.numFlips;
@ -486,18 +444,18 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame
} else if (vfbs_[i]->z_stride != 0 && params.z_address == vfbs_[i]->z_address && params.fb_address != vfbs_[i]->fb_address && !sharingReported) {
// This happens a lot, but virtually always it's cleared.
// It's possible the other might not clear, but when every game is reported it's not useful.
if (params.isWritingDepth) {
if (params.isWritingDepth && (vfbs_[i]->usageFlags & FB_USAGE_RENDER_DEPTH)) {
WARN_LOG(SCEGE, "FBO reusing depthbuffer, c=%08x/d=%08x and c=%08x/d=%08x", params.fb_address, params.z_address, vfbs_[i]->fb_address, vfbs_[i]->z_address);
sharingReported = true;
}
}
}
// We already have it!
// We already have it!
} else if (vfb != currentRenderVfb_) {
// Use it as a render target.
DEBUG_LOG(FRAMEBUF, "Switching render target to FBO for %08x: %d x %d x %d ", vfb->fb_address, vfb->width, vfb->height, vfb->format);
vfb->usageFlags |= FB_USAGE_RENDERTARGET;
DEBUG_LOG(FRAMEBUF, "Switching render target to FBO for %08x: %d x %d x %d ", vfb->fb_address, vfb->width, vfb->height, vfb->fb_format);
vfb->usageFlags |= FB_USAGE_RENDER_COLOR;
vfb->last_frame_render = gpuStats.numFlips;
frameLastFramebufUsed_ = gpuStats.numFlips;
vfb->dirtyAfterDisplay = true;
@ -507,18 +465,20 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame
VirtualFramebuffer *prev = currentRenderVfb_;
currentRenderVfb_ = vfb;
NotifyRenderFramebufferSwitched(prev, vfb, params.isClearingDepth);
CopyToColorFromOverlappingFramebuffers(vfb);
gstate_c.usingDepth = false; // reset depth buffer tracking
} else {
// Something changed, but we still got the same framebuffer we were already rendering to.
// Might not be a lot to do here, we check in NotifyRenderFramebufferUpdated
vfb->last_frame_render = gpuStats.numFlips;
frameLastFramebufUsed_ = gpuStats.numFlips;
vfb->dirtyAfterDisplay = true;
if ((skipDrawReason & SKIPDRAW_SKIPFRAME) == 0)
vfb->reallyDirtyAfterDisplay = true;
NotifyRenderFramebufferUpdated(vfb, vfbFormatChanged);
}
vfb->colorBindSeq = GetBindSeqCount();
vfb->depthBindSeq = GetBindSeqCount();
gstate_c.curRTWidth = vfb->width;
gstate_c.curRTHeight = vfb->height;
@ -527,6 +487,182 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame
return vfb;
}
// Called on the first use of depth in a render pass.
void FramebufferManagerCommon::SetDepthFrameBuffer(bool isClearingDepth) {
if (!currentRenderVfb_) {
return;
}
// If this first draw call is anything other than a clear, "resolve" the depth buffer,
// by copying from any overlapping buffers with fresher content.
if (!isClearingDepth) {
CopyToDepthFromOverlappingFramebuffers(currentRenderVfb_);
}
currentRenderVfb_->usageFlags |= FB_USAGE_RENDER_DEPTH;
currentRenderVfb_->depthBindSeq = GetBindSeqCount();
}
struct CopySource {
VirtualFramebuffer *vfb;
RasterChannel channel;
int xOffset;
int yOffset;
int seq() const {
return channel == RASTER_DEPTH ? vfb->depthBindSeq : vfb->colorBindSeq;
}
bool operator < (const CopySource &other) const {
return seq() < other.seq();
}
};
// Not sure if it's more profitable to always do these copies with raster (which may screw up early-Z due to explicit depth buffer write)
// or to use image copies when possible (which may make it easier for the driver to preserve early-Z, but on the other hand, will cost additional memory
// bandwidth on tilers due to the load operation, which we might otherwise be able to skip).
void FramebufferManagerCommon::CopyToDepthFromOverlappingFramebuffers(VirtualFramebuffer *dest) {
std::vector<CopySource> sources;
for (auto src : vfbs_) {
if (src == dest)
continue;
if (src->fb_address == dest->z_address && src->fb_stride == dest->z_stride && src->fb_format == GE_FORMAT_565) {
if (src->colorBindSeq > dest->depthBindSeq) {
// Source has newer data than the current buffer, use it.
sources.push_back(CopySource{ src, RASTER_COLOR, 0, 0 });
}
} else if (src->z_address == dest->z_address && src->z_stride == dest->z_stride && src->depthBindSeq > dest->depthBindSeq) {
sources.push_back(CopySource{ src, RASTER_DEPTH, 0, 0 });
} else {
// TODO: Do more detailed overlap checks here.
}
}
std::sort(sources.begin(), sources.end());
// TODO: A full copy will overwrite anything else. So we can eliminate
// anything that comes before such a copy.
// For now, let's just do the last thing, if there are multiple.
// for (auto &source : sources) {
if (!sources.empty()) {
draw_->InvalidateCachedState();
auto &source = sources.back();
if (source.channel == RASTER_DEPTH) {
// Good old depth->depth copy.
BlitFramebufferDepth(source.vfb, dest);
gpuStats.numDepthCopies++;
dest->last_frame_depth_updated = gpuStats.numFlips;
} else if (source.channel == RASTER_COLOR && draw_->GetDeviceCaps().fragmentShaderDepthWriteSupported) {
VirtualFramebuffer *src = source.vfb;
if (src->drawnFormat != GE_FORMAT_565) {
WARN_LOG_ONCE(not565, G3D, "Drawn fb_format of buffer at %08x not 565 as expected", src->fb_address);
}
// Really hate to do this, but tracking the depth swizzle state across multiple
// copies is not easy.
Draw2DShader shader = DRAW2D_565_TO_DEPTH;
if (PSP_CoreParameter().compat.flags().DeswizzleDepth) {
shader = DRAW2D_565_TO_DEPTH_DESWIZZLE;
}
gpuStats.numReinterpretCopies++;
// Copying color to depth.
BlitUsingRaster(
src->fbo, 0.0f, 0.0f, src->renderWidth, src->renderHeight,
dest->fbo, 0.0f, 0.0f, src->renderWidth, src->renderHeight,
false, Get2DPipeline(shader), "565_to_depth");
}
}
gstate_c.Dirty(DIRTY_TEXTURE_IMAGE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_RASTER_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_BLEND_STATE);
}
// Call this after the target has been bound for rendering. For color, raster is probably always going to win over blits/copies.
void FramebufferManagerCommon::CopyToColorFromOverlappingFramebuffers(VirtualFramebuffer *dst) {
std::vector<CopySource> sources;
for (auto src : vfbs_) {
// Discard old and equal potential inputs.
if (src == dst || src->colorBindSeq < dst->colorBindSeq)
continue;
if (src->fb_address == dst->fb_address && src->fb_stride == dst->fb_stride) {
// Another render target at the exact same location but gotta be a different format, otherwise
// it would be the same.
_dbg_assert_(src->fb_format != dst->fb_format);
WARN_LOG_ONCE(reint, G3D, "Reinterpret detected at %08x", src->fb_address);
// This is where we'll do reinterprets in the future.
} else if (src->fb_stride == dst->fb_stride && src->fb_format == dst->fb_format) {
u32 bytesPerPixel = BufferFormatBytesPerPixel(src->fb_format);
u32 strideInBytes = src->fb_stride * bytesPerPixel; // Same for both src and dest
u32 srcColorStart = src->fb_address;
u32 srcFirstLineEnd = src->fb_address + strideInBytes;
u32 srcColorEnd = strideInBytes * src->height;
u32 dstColorStart = dst->fb_address;
u32 dstFirstLineEnd = dst->fb_address + strideInBytes;
u32 dstColorEnd = strideInBytes * dst->height;
// Initially we'll only allow pure horizontal and vertical overlap,
// to reduce the risk for false positives. We can allow diagonal overlap too if needed
// in the future.
// Check for potential vertical overlap, like in Juiced 2.
int xOffset = 0;
int yOffset = 0;
// TODO: Get rid of the compatibility flag check.
if ((dstColorStart - srcColorStart) % strideInBytes == 0
&& PSP_CoreParameter().compat.flags().AllowLargeFBTextureOffsets) {
// Buffers are aligned.
yOffset = ((int)dstColorStart - (int)srcColorStart) / strideInBytes;
if (yOffset <= -(int)src->height) {
// Not overlapping
continue;
} else if (yOffset >= dst->height) {
// Not overlapping
continue;
}
} else {
// Buffers not stride-aligned - ignoring for now.
continue;
}
gpuStats.numColorCopies++;
sources.push_back(CopySource{ src, RASTER_COLOR, xOffset, yOffset });
}
}
std::sort(sources.begin(), sources.end());
draw_->InvalidateCachedState();
for (const CopySource &source : sources) {
VirtualFramebuffer *src = source.vfb;
// Copy a rectangle from the original to the new buffer.
// Yes, we mean to look at src->width/height for the dest rectangle.
int srcWidth = src->width * src->renderScaleFactor;
int srcHeight = src->height * src->renderScaleFactor;
int dstWidth = src->width * dst->renderScaleFactor;
int dstHeight = src->height * dst->renderScaleFactor;
int dstX1 = -source.xOffset * dst->renderScaleFactor;
int dstY1 = -source.yOffset * dst->renderScaleFactor;
int dstX2 = dstX1 + dstWidth;
int dstY2 = dstY1 + dstHeight;
BlitUsingRaster(src->fbo, 0.0f, 0.0f, srcWidth, srcHeight,
dst->fbo, dstX1, dstY1, dstX2, dstY2, false, Get2DPipeline(DRAW2D_COPY_COLOR), "copy_color");
}
}
void FramebufferManagerCommon::DestroyFramebuf(VirtualFramebuffer *v) {
// Notify the texture cache of both the color and depth buffers.
textureCache_->NotifyFramebuffer(v, NOTIFY_FB_DESTROYED);
@ -597,25 +733,10 @@ void FramebufferManagerCommon::BlitFramebufferDepth(VirtualFramebuffer *src, Vir
draw_->BlitFramebuffer(src->fbo, 0, 0, w, h, dst->fbo, 0, 0, w, h, Draw::FB_DEPTH_BIT, Draw::FB_BLIT_NEAREST, "BlitFramebufferDepth");
RebindFramebuffer("After BlitFramebufferDepth");
} else if (useRaster) {
BlitUsingRaster(src->fbo, 0, 0, w, h, dst->fbo, 0, 0, w, h, false, RasterChannel::RASTER_DEPTH);
BlitUsingRaster(src->fbo, 0, 0, w, h, dst->fbo, 0, 0, w, h, false, Get2DPipeline(Draw2DShader::DRAW2D_COPY_DEPTH), "BlitDepthRaster");
}
draw_->InvalidateCachedState();
gpuStats.numDepthCopies++;
dst->last_frame_depth_updated = gpuStats.numFlips;
}
VirtualFramebuffer *FramebufferManagerCommon::GetLatestDepthBufferAt(u32 z_address, u16 z_stride) {
int maxSeq = -1;
VirtualFramebuffer *latestDepth = nullptr;
for (auto vfb : vfbs_) {
if (vfb->z_address == z_address && vfb->z_stride == z_stride && vfb->depthBindSeq > maxSeq) {
maxSeq = vfb->depthBindSeq;
latestDepth = vfb;
}
}
return latestDepth;
}
void FramebufferManagerCommon::NotifyRenderFramebufferCreated(VirtualFramebuffer *vfb) {
@ -641,8 +762,8 @@ void FramebufferManagerCommon::NotifyRenderFramebufferCreated(VirtualFramebuffer
void FramebufferManagerCommon::NotifyRenderFramebufferUpdated(VirtualFramebuffer *vfb, bool vfbFormatChanged) {
if (vfbFormatChanged) {
textureCache_->NotifyFramebuffer(vfb, NOTIFY_FB_UPDATED);
if (vfb->drawnFormat != vfb->format) {
ReinterpretFramebuffer(vfb, vfb->drawnFormat, vfb->format);
if (vfb->drawnFormat != vfb->fb_format) {
ReinterpretFramebuffer(vfb, vfb->drawnFormat, vfb->fb_format);
}
}
@ -666,19 +787,8 @@ void FramebufferManagerCommon::NotifyRenderFramebufferSwitched(VirtualFramebuffe
textureCache_->ForgetLastTexture();
shaderManager_->DirtyLastShader();
// Copy depth between the framebuffers, if the z_address is the same (checked inside.)
VirtualFramebuffer * prevDepth = GetLatestDepthBufferAt(vfb->z_address, vfb->z_stride);
// We might already want to copy depth, in case this is a temp buffer. See #7810.
if (prevDepth != vfb) {
if (!isClearingDepth && prevDepth) {
BlitFramebufferDepth(prevDepth, vfb);
}
prevDepth = vfb;
}
if (vfb->drawnFormat != vfb->format) {
ReinterpretFramebuffer(vfb, vfb->drawnFormat, vfb->format);
if (vfb->drawnFormat != vfb->fb_format) {
ReinterpretFramebuffer(vfb, vfb->drawnFormat, vfb->fb_format);
}
if (useBufferedRendering_) {
@ -723,9 +833,9 @@ void FramebufferManagerCommon::NotifyVideoUpload(u32 addr, int size, int width,
// TODO: Could possibly be an offset...
VirtualFramebuffer *vfb = GetVFBAt(addr);
if (vfb) {
if (vfb->format != fmt || vfb->drawnFormat != fmt) {
DEBUG_LOG(ME, "Changing format for %08x from %d to %d", addr, vfb->drawnFormat, fmt);
vfb->format = fmt;
if (vfb->fb_format != fmt || vfb->drawnFormat != fmt) {
DEBUG_LOG(ME, "Changing fb_format for %08x from %d to %d", addr, vfb->drawnFormat, fmt);
vfb->fb_format = fmt;
vfb->drawnFormat = fmt;
// Let's count this as a "render". This will also force us to use the correct format.
@ -734,7 +844,7 @@ void FramebufferManagerCommon::NotifyVideoUpload(u32 addr, int size, int width,
if (vfb->fb_stride < width) {
DEBUG_LOG(ME, "Changing stride for %08x from %d to %d", addr, vfb->fb_stride, width);
const int bpp = fmt == GE_FORMAT_8888 ? 4 : 2;
const int bpp = BufferFormatBytesPerPixel(fmt);
ResizeFramebufFBO(vfb, width, size / (bpp * width));
// Resizing may change the viewport/etc.
gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE);
@ -762,7 +872,7 @@ void FramebufferManagerCommon::UpdateFromMemory(u32 addr, int size, bool safe) {
FlushBeforeCopy();
if (useBufferedRendering_ && vfb->fbo) {
GEBufferFormat fmt = vfb->format;
GEBufferFormat fmt = vfb->fb_format;
if (vfb->last_frame_render + 1 < gpuStats.numFlips && isDisplayBuf) {
// If we're not rendering to it, format may be wrong. Use displayFormat_ instead.
fmt = displayFormat_;
@ -770,7 +880,7 @@ void FramebufferManagerCommon::UpdateFromMemory(u32 addr, int size, bool safe) {
DrawPixels(vfb, 0, 0, Memory::GetPointer(addr), fmt, vfb->fb_stride, vfb->width, vfb->height);
SetColorUpdated(vfb, gstate_c.skipDrawReason);
} else {
INFO_LOG(FRAMEBUF, "Invalidating FBO for %08x (%i x %i x %i)", vfb->fb_address, vfb->width, vfb->height, vfb->format);
INFO_LOG(FRAMEBUF, "Invalidating FBO for %08x (%i x %i x %i)", vfb->fb_address, vfb->width, vfb->height, vfb->fb_format);
DestroyFramebuf(vfb);
vfbs_.erase(vfbs_.begin() + i--);
}
@ -1045,7 +1155,7 @@ void FramebufferManagerCommon::CopyDisplayToOutput(bool reallyDirty) {
const u32 v_addr = v->fb_address & 0x3FFFFFFF;
const u32 v_size = ColorBufferByteSize(v);
if (addr >= v_addr && addr < v_addr + v_size) {
const u32 dstBpp = v->format == GE_FORMAT_8888 ? 4 : 2;
const u32 dstBpp = BufferFormatBytesPerPixel(v->fb_format);
const u32 v_offsetX = ((addr - v_addr) / dstBpp) % v->fb_stride;
const u32 v_offsetY = ((addr - v_addr) / dstBpp) / v->fb_stride;
// We have enough space there for the display, right?
@ -1068,10 +1178,10 @@ void FramebufferManagerCommon::CopyDisplayToOutput(bool reallyDirty) {
}
}
if (vfb && vfb->format != displayFormat_) {
if (vfb && vfb->fb_format != displayFormat_) {
if (vfb->last_frame_render + FBO_OLD_AGE < gpuStats.numFlips) {
// The game probably switched formats on us.
vfb->format = displayFormat_;
vfb->fb_format = displayFormat_;
} else {
vfb = 0;
}
@ -1171,7 +1281,7 @@ void FramebufferManagerCommon::DecimateFBOs() {
if (vfb != displayFramebuf_ && vfb != prevDisplayFramebuf_ && vfb != prevPrevDisplayFramebuf_) {
if (age > FBO_OLD_AGE) {
INFO_LOG(FRAMEBUF, "Decimating FBO for %08x (%i x %i x %i), age %i", vfb->fb_address, vfb->width, vfb->height, vfb->format, age);
INFO_LOG(FRAMEBUF, "Decimating FBO for %08x (%i x %i x %i), age %i", vfb->fb_address, vfb->width, vfb->height, vfb->fb_format, age);
DestroyFramebuf(vfb);
vfbs_.erase(vfbs_.begin() + i--);
}
@ -1193,7 +1303,7 @@ void FramebufferManagerCommon::DecimateFBOs() {
VirtualFramebuffer *vfb = bvfbs_[i];
int age = frameLastFramebufUsed_ - vfb->last_frame_render;
if (age > FBO_OLD_AGE) {
INFO_LOG(FRAMEBUF, "Decimating FBO for %08x (%i x %i x %i), age %i", vfb->fb_address, vfb->width, vfb->height, vfb->format, age);
INFO_LOG(FRAMEBUF, "Decimating FBO for %08x (%i x %i x %i), age %i", vfb->fb_address, vfb->width, vfb->height, vfb->fb_format, age);
DestroyFramebuf(vfb);
bvfbs_.erase(bvfbs_.begin() + i--);
}
@ -1269,7 +1379,7 @@ void FramebufferManagerCommon::ResizeFramebufFBO(VirtualFramebuffer *vfb, int w,
shaderManager_->DirtyLastShader();
char tag[128];
size_t len = snprintf(tag, sizeof(tag), "FB_%08x_%08x_%dx%d_%s", vfb->fb_address, vfb->z_address, w, h, GeBufferFormatToString(vfb->format));
size_t len = snprintf(tag, sizeof(tag), "FB_%08x_%08x_%dx%d_%s", vfb->fb_address, vfb->z_address, w, h, GeBufferFormatToString(vfb->fb_format));
vfb->fbo = draw_->CreateFramebuffer({ vfb->renderWidth, vfb->renderHeight, 1, 1, true, tag });
if (Memory::IsVRAMAddress(vfb->fb_address) && vfb->fb_stride != 0) {
NotifyMemInfo(MemBlockFlags::ALLOC, vfb->fb_address, ColorBufferByteSize(vfb), tag, len);
@ -1280,7 +1390,7 @@ void FramebufferManagerCommon::ResizeFramebufFBO(VirtualFramebuffer *vfb, int w,
NotifyMemInfo(MemBlockFlags::ALLOC, vfb->z_address, vfb->fb_stride * vfb->height * sizeof(uint16_t), buf, len);
}
if (old.fbo) {
INFO_LOG(FRAMEBUF, "Resizing FBO for %08x : %dx%dx%s", vfb->fb_address, w, h, GeBufferFormatToString(vfb->format));
INFO_LOG(FRAMEBUF, "Resizing FBO for %08x : %dx%dx%s", vfb->fb_address, w, h, GeBufferFormatToString(vfb->fb_format));
if (vfb->fbo) {
draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }, "ResizeFramebufFBO");
if (!skipCopy) {
@ -1327,7 +1437,7 @@ bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size,
// We only remove the kernel and uncached bits when comparing.
const u32 vfb_address = vfb->fb_address & 0x3FFFFFFF;
const u32 vfb_size = ColorBufferByteSize(vfb);
const u32 vfb_bpp = vfb->format == GE_FORMAT_8888 ? 4 : 2;
const u32 vfb_bpp = BufferFormatBytesPerPixel(vfb->fb_format);
const u32 vfb_byteStride = vfb->fb_stride * vfb_bpp;
const int vfb_byteWidth = vfb->width * vfb_bpp;
@ -1375,7 +1485,7 @@ bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size,
// Note - if we're here, we're in a memcpy, not a block transfer. Not allowing IntraVRAMBlockTransferAllowCreateFB.
// Technically, that makes BlockTransferAllowCreateFB a bit of a misnomer.
if (PSP_CoreParameter().compat.flags().BlockTransferAllowCreateFB) {
dstBuffer = CreateRAMFramebuffer(dst, srcBuffer->width, srcBuffer->height, srcBuffer->fb_stride, srcBuffer->format);
dstBuffer = CreateRAMFramebuffer(dst, srcBuffer->width, srcBuffer->height, srcBuffer->fb_stride, srcBuffer->fb_format);
dstY = 0;
}
}
@ -1401,7 +1511,7 @@ bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size,
WARN_LOG_ONCE(btucpy, G3D, "Memcpy fbo upload %08x -> %08x (size: %x)", src, dst, size);
FlushBeforeCopy();
const u8 *srcBase = Memory::GetPointerUnchecked(src);
DrawPixels(dstBuffer, 0, dstY, srcBase, dstBuffer->format, dstBuffer->fb_stride, dstBuffer->width, dstH);
DrawPixels(dstBuffer, 0, dstY, srcBase, dstBuffer->fb_format, dstBuffer->fb_stride, dstBuffer->width, dstH);
SetColorUpdated(dstBuffer, skipDrawReason);
RebindFramebuffer("RebindFramebuffer - Memcpy fbo upload");
// This is a memcpy, let's still copy just in case.
@ -1437,7 +1547,7 @@ void FramebufferManagerCommon::FindTransferFramebuffers(VirtualFramebuffer *&dst
VirtualFramebuffer *vfb = vfbs_[i];
const u32 vfb_address = vfb->fb_address & 0x3FFFFFFF;
const u32 vfb_size = ColorBufferByteSize(vfb);
const u32 vfb_bpp = vfb->format == GE_FORMAT_8888 ? 4 : 2;
const u32 vfb_bpp = BufferFormatBytesPerPixel(vfb->fb_format);
const u32 vfb_byteStride = vfb->fb_stride * vfb_bpp;
const u32 vfb_byteWidth = vfb->width * vfb_bpp;
@ -1512,10 +1622,10 @@ void FramebufferManagerCommon::FindTransferFramebuffers(VirtualFramebuffer *&dst
if (bpp == 4) {
// Only one possibility unless it's doing split pixel tricks (which we could detect through stride maybe).
ramFormat = GE_FORMAT_8888;
} else if (srcBuffer->format != GE_FORMAT_8888) {
} else if (srcBuffer->fb_format != GE_FORMAT_8888) {
// We guess that the game will interpret the data the same as it was in the source of the copy.
// Seems like a likely good guess, and works in Test Drive Unlimited.
ramFormat = srcBuffer->format;
ramFormat = srcBuffer->fb_format;
} else {
// No info left - just fall back to something. But this is definitely split pixel tricks.
ramFormat = GE_FORMAT_5551;
@ -1538,7 +1648,7 @@ void FramebufferManagerCommon::FindTransferFramebuffers(VirtualFramebuffer *&dst
}
VirtualFramebuffer *FramebufferManagerCommon::CreateRAMFramebuffer(uint32_t fbAddress, int width, int height, int stride, GEBufferFormat format) {
INFO_LOG(G3D, "Creating RAM framebuffer at %08x (%dx%d, stride %d, format %d)", fbAddress, width, height, stride, format);
INFO_LOG(G3D, "Creating RAM framebuffer at %08x (%dx%d, stride %d, fb_format %d)", fbAddress, width, height, stride, format);
// A target for the destination is missing - so just create one!
// Make sure this one would be found by the algorithm above so we wouldn't
@ -1559,9 +1669,9 @@ VirtualFramebuffer *FramebufferManagerCommon::CreateRAMFramebuffer(uint32_t fbAd
vfb->renderHeight = (u16)(vfb->height * renderScaleFactor_);
vfb->bufferWidth = vfb->width;
vfb->bufferHeight = vfb->height;
vfb->format = format;
vfb->fb_format = format;
vfb->drawnFormat = GE_FORMAT_8888;
vfb->usageFlags = FB_USAGE_RENDERTARGET;
vfb->usageFlags = FB_USAGE_RENDER_COLOR;
SetColorUpdated(vfb, 0);
char name[64];
snprintf(name, sizeof(name), "%08x_color_RAM", vfb->fb_address);
@ -1585,7 +1695,7 @@ VirtualFramebuffer *FramebufferManagerCommon::FindDownloadTempBuffer(VirtualFram
// We maintain a separate vector of framebuffer objects for blitting.
for (VirtualFramebuffer *v : bvfbs_) {
if (v->fb_address == vfb->fb_address && v->format == vfb->format) {
if (v->fb_address == vfb->fb_address && v->fb_format == vfb->fb_format) {
if (v->bufferWidth == vfb->bufferWidth && v->bufferHeight == vfb->bufferHeight) {
nvfb = v;
v->fb_stride = vfb->fb_stride;
@ -1611,10 +1721,10 @@ VirtualFramebuffer *FramebufferManagerCommon::FindDownloadTempBuffer(VirtualFram
nvfb->renderScaleFactor = 1; // For readbacks we resize to the original size, of course.
nvfb->bufferWidth = vfb->bufferWidth;
nvfb->bufferHeight = vfb->bufferHeight;
nvfb->format = vfb->format;
nvfb->fb_format = vfb->fb_format;
nvfb->drawnWidth = vfb->drawnWidth;
nvfb->drawnHeight = vfb->drawnHeight;
nvfb->drawnFormat = vfb->format;
nvfb->drawnFormat = vfb->fb_format;
char name[64];
snprintf(name, sizeof(name), "download_temp");
@ -1629,7 +1739,7 @@ VirtualFramebuffer *FramebufferManagerCommon::FindDownloadTempBuffer(VirtualFram
UpdateDownloadTempBuffer(nvfb);
}
nvfb->usageFlags |= FB_USAGE_RENDERTARGET;
nvfb->usageFlags |= FB_USAGE_RENDER_COLOR;
nvfb->last_frame_render = gpuStats.numFlips;
nvfb->dirtyAfterDisplay = true;
@ -1649,7 +1759,7 @@ void FramebufferManagerCommon::ApplyClearToMemory(int x1, int y1, int x2, int y2
}
u8 *addr = Memory::GetPointerWriteUnchecked(gstate.getFrameBufAddress());
const int bpp = gstate_c.framebufFormat == GE_FORMAT_8888 ? 4 : 2;
const int bpp = BufferFormatBytesPerPixel(gstate_c.framebufFormat);
u32 clearBits = clearColor;
if (bpp == 2) {
@ -1782,7 +1892,7 @@ bool FramebufferManagerCommon::NotifyBlockTransferBefore(u32 dstBasePtr, int dst
dstBasePtr, dstX, dstY, dstStride);
FlushBeforeCopy();
if (g_Config.bBlockTransferGPU && !srcBuffer->memoryUpdated) {
const int srcBpp = srcBuffer->format == GE_FORMAT_8888 ? 4 : 2;
const int srcBpp = BufferFormatBytesPerPixel(srcBuffer->fb_format);
const float srcXFactor = (float)bpp / srcBpp;
const bool tooTall = srcY + srcHeight > srcBuffer->bufferHeight;
if (srcHeight <= 0 || (tooTall && srcY != 0)) {
@ -1833,7 +1943,7 @@ void FramebufferManagerCommon::NotifyBlockTransferAfter(u32 dstBasePtr, int dstS
WARN_LOG_ONCE(btu, G3D, "Block transfer upload %08x -> %08x", srcBasePtr, dstBasePtr);
FlushBeforeCopy();
const u8 *srcBase = Memory::GetPointerUnchecked(srcBasePtr) + (srcX + srcY * srcStride) * bpp;
int dstBpp = dstBuffer->format == GE_FORMAT_8888 ? 4 : 2;
int dstBpp = BufferFormatBytesPerPixel(dstBuffer->fb_format);
float dstXFactor = (float)bpp / dstBpp;
if (dstWidth > dstBuffer->width || dstHeight > dstBuffer->height) {
// The buffer isn't big enough, and we have a clear hint of size. Resize.
@ -1846,7 +1956,7 @@ void FramebufferManagerCommon::NotifyBlockTransferAfter(u32 dstBasePtr, int dstS
// Resizing may change the viewport/etc.
gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE);
}
DrawPixels(dstBuffer, static_cast<int>(dstX * dstXFactor), dstY, srcBase, dstBuffer->format, static_cast<int>(srcStride * dstXFactor), static_cast<int>(dstWidth * dstXFactor), dstHeight);
DrawPixels(dstBuffer, static_cast<int>(dstX * dstXFactor), dstY, srcBase, dstBuffer->fb_format, static_cast<int>(srcStride * dstXFactor), static_cast<int>(dstWidth * dstXFactor), dstHeight);
SetColorUpdated(dstBuffer, skipDrawReason);
RebindFramebuffer("RebindFramebuffer - NotifyBlockTransferAfter");
}
@ -1892,7 +2002,7 @@ void FramebufferManagerCommon::DestroyAllFBOs() {
prevPrevDisplayFramebuf_ = nullptr;
for (VirtualFramebuffer *vfb : vfbs_) {
INFO_LOG(FRAMEBUF, "Destroying FBO for %08x : %i x %i x %i", vfb->fb_address, vfb->width, vfb->height, vfb->format);
INFO_LOG(FRAMEBUF, "Destroying FBO for %08x : %i x %i x %i", vfb->fb_address, vfb->width, vfb->height, vfb->fb_format);
DestroyFramebuf(vfb);
}
vfbs_.clear();
@ -1923,7 +2033,7 @@ Draw::Framebuffer *FramebufferManagerCommon::GetTempFBO(TempFBO reason, u16 w, u
bool z_stencil = reason == TempFBO::STENCIL;
char name[128];
snprintf(name, sizeof(name), "temp_fbo_%dx%d%s", w, h, z_stencil ? "_depth" : "");
snprintf(name, sizeof(name), "temp_fbo_%dx%d%s", w / renderScaleFactor_, h / renderScaleFactor_, z_stencil ? "_depth" : "");
Draw::Framebuffer *fbo = draw_->CreateFramebuffer({ w, h, 1, 1, z_stencil, name });
if (!fbo) {
return nullptr;
@ -1946,7 +2056,7 @@ void FramebufferManagerCommon::UpdateFramebufUsage(VirtualFramebuffer *vfb) {
checkFlag(FB_USAGE_DISPLAYED_FRAMEBUFFER, vfb->last_frame_displayed);
checkFlag(FB_USAGE_TEXTURE, vfb->last_frame_used);
checkFlag(FB_USAGE_RENDERTARGET, vfb->last_frame_render);
checkFlag(FB_USAGE_RENDER_COLOR, vfb->last_frame_render);
checkFlag(FB_USAGE_CLUT, vfb->last_frame_clut);
}
@ -2136,7 +2246,7 @@ void FramebufferManagerCommon::PackFramebufferSync_(VirtualFramebuffer *vfb, int
const u32 fb_address = vfb->fb_address & 0x3FFFFFFF;
Draw::DataFormat destFormat = GEFormatToThin3D(vfb->format);
Draw::DataFormat destFormat = GEFormatToThin3D(vfb->fb_format);
const int dstBpp = (int)DataFormatSizeInBytes(destFormat);
const int dstByteOffset = (y * vfb->fb_stride + x) * dstBpp;
@ -2156,7 +2266,7 @@ void FramebufferManagerCommon::PackFramebufferSync_(VirtualFramebuffer *vfb, int
if (destPtr) {
draw_->CopyFramebufferToMemorySync(vfb->fbo, Draw::FB_COLOR_BIT, x, y, w, h, destFormat, destPtr, vfb->fb_stride, "PackFramebufferSync_");
char tag[128];
size_t len = snprintf(tag, sizeof(tag), "FramebufferPack/%08x_%08x_%dx%d_%s", vfb->fb_address, vfb->z_address, w, h, GeBufferFormatToString(vfb->format));
size_t len = snprintf(tag, sizeof(tag), "FramebufferPack/%08x_%08x_%dx%d_%s", vfb->fb_address, vfb->z_address, w, h, GeBufferFormatToString(vfb->fb_format));
NotifyMemInfo(MemBlockFlags::WRITE, fb_address + dstByteOffset, dstSize, tag, len);
} else {
ERROR_LOG(G3D, "PackFramebufferSync_: Tried to readback to bad address %08x (stride = %d)", fb_address + dstByteOffset, vfb->fb_stride);
@ -2233,7 +2343,7 @@ void FramebufferManagerCommon::FlushBeforeCopy() {
void FramebufferManagerCommon::DownloadFramebufferForClut(u32 fb_address, u32 loadBytes) {
VirtualFramebuffer *vfb = GetVFBAt(fb_address);
if (vfb && vfb->fb_stride != 0) {
const u32 bpp = vfb->drawnFormat == GE_FORMAT_8888 ? 4 : 2;
const u32 bpp = BufferFormatBytesPerPixel(vfb->drawnFormat);
int x = 0;
int y = 0;
int pixels = loadBytes / bpp;
@ -2287,7 +2397,7 @@ std::vector<FramebufferInfo> FramebufferManagerCommon::GetFramebufferList() cons
FramebufferInfo info;
info.fb_address = vfb->fb_address;
info.z_address = vfb->z_address;
info.format = vfb->format;
info.format = vfb->fb_format;
info.width = vfb->width;
info.height = vfb->height;
info.fbo = vfb->fbo;
@ -2314,22 +2424,21 @@ void FramebufferManagerCommon::DeviceLost() {
DoRelease(reinterpretFromTo_[i][j]);
}
}
DoRelease(reinterpretVBuf_);
DoRelease(reinterpretSampler_);
DoRelease(reinterpretVS_);
DoRelease(stencilUploadSampler_);
DoRelease(stencilUploadPipeline_);
DoRelease(draw2DSamplerNearest_);
DoRelease(draw2DSamplerLinear_);
DoRelease(draw2DVs_);
DoRelease(draw2DPipelineColor_);
DoRelease(draw2DPipelineDepth_);
DoRelease(draw2DPipeline565ToDepth_);
DoRelease(draw2DPipeline565ToDepthDeswizzle_);
draw2D_.DeviceLost();
draw_ = nullptr;
}
void FramebufferManagerCommon::DeviceRestore(Draw::DrawContext *draw) {
draw_ = draw;
draw2D_.DeviceRestore(draw_);
presentation_->DeviceRestore(draw);
}
@ -2381,7 +2490,7 @@ void FramebufferManagerCommon::DrawActiveTexture(float x, float y, float w, floa
// Rearrange to strip form.
std::swap(coord[2], coord[3]);
DrawStrip2D(nullptr, coord, 4, (flags & DRAWTEX_LINEAR) != 0, RASTER_COLOR);
draw2D_.DrawStrip2D(nullptr, coord, 4, (flags & DRAWTEX_LINEAR) != 0, Get2DPipeline(DRAW2D_COPY_COLOR));
gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_RASTER_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE);
}
@ -2430,8 +2539,9 @@ void FramebufferManagerCommon::BlitFramebuffer(VirtualFramebuffer *dst, int dstX
float srcXFactor = src->renderScaleFactor;
float srcYFactor = src->renderScaleFactor;
const int srcBpp = src->format == GE_FORMAT_8888 ? 4 : 2;
const int srcBpp = BufferFormatBytesPerPixel(src->fb_format);
if (srcBpp != bpp && bpp != 0) {
// If we do this, we're kinda in nonsense territory since the actual formats won't match (unless intentionally blitting black or white).
srcXFactor = (srcXFactor * bpp) / srcBpp;
}
int srcX1 = srcX * srcXFactor;
@ -2441,8 +2551,9 @@ void FramebufferManagerCommon::BlitFramebuffer(VirtualFramebuffer *dst, int dstX
float dstXFactor = dst->renderScaleFactor;
float dstYFactor = dst->renderScaleFactor;
const int dstBpp = dst->format == GE_FORMAT_8888 ? 4 : 2;
const int dstBpp = BufferFormatBytesPerPixel(dst->fb_format);
if (dstBpp != bpp && bpp != 0) {
// If we do this, we're kinda in nonsense territory since the actual formats won't match (unless intentionally blitting black or white).
dstXFactor = (dstXFactor * bpp) / dstBpp;
}
int dstX1 = dstX * dstXFactor;
@ -2475,13 +2586,14 @@ void FramebufferManagerCommon::BlitFramebuffer(VirtualFramebuffer *dst, int dstX
draw_->BlitFramebuffer(src->fbo, srcX1, srcY1, srcX2, srcY2, dst->fbo, dstX1, dstY1, dstX2, dstY2,
channel == RASTER_COLOR ? Draw::FB_COLOR_BIT : Draw::FB_DEPTH_BIT, Draw::FB_BLIT_NEAREST, tag);
} else {
Draw2DPipeline *pipeline = Get2DPipeline(channel == RASTER_COLOR ? DRAW2D_COPY_COLOR : DRAW2D_COPY_DEPTH);
Draw::Framebuffer *srcFBO = src->fbo;
if (src == dst) {
Draw::Framebuffer *tempFBO = GetTempFBO(TempFBO::BLIT, src->renderWidth, src->renderHeight);
BlitUsingRaster(src->fbo, srcX1, srcY1, srcX2, srcY2, tempFBO, dstX1, dstY1, dstX2, dstY2, false, channel);
BlitUsingRaster(src->fbo, srcX1, srcY1, srcX2, srcY2, tempFBO, dstX1, dstY1, dstX2, dstY2, false, pipeline, tag);
srcFBO = tempFBO;
}
BlitUsingRaster(srcFBO, srcX1, srcY1, srcX2, srcY2, dst->fbo, dstX1, dstY1, dstX2, dstY2, false, channel);
BlitUsingRaster(srcFBO, srcX1, srcY1, srcX2, srcY2, dst->fbo, dstX1, dstY1, dstX2, dstY2, false, pipeline, tag);
}
draw_->InvalidateCachedState();
@ -2489,13 +2601,14 @@ void FramebufferManagerCommon::BlitFramebuffer(VirtualFramebuffer *dst, int dstX
gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_BLEND_STATE | DIRTY_RASTER_STATE);
}
// The input is raw pixel coordinates, scale not taken into account.
void FramebufferManagerCommon::BlitUsingRaster(
Draw::Framebuffer *src, float srcX1, float srcY1, float srcX2, float srcY2,
Draw::Framebuffer *dest, float destX1, float destY1, float destX2, float destY2,
bool linearFilter,
RasterChannel channel) {
Draw2DPipeline *pipeline, const char *tag) {
if (channel == RASTER_DEPTH) {
if (pipeline->info.writeChannel == RASTER_DEPTH) {
_dbg_assert_(draw_->GetDeviceCaps().fragmentShaderDepthWriteSupported);
}
@ -2517,13 +2630,14 @@ void FramebufferManagerCommon::BlitUsingRaster(
// Unbind the texture first to avoid the D3D11 hazard check (can't set render target to things bound as textures and vice versa, not even temporarily).
draw_->BindTexture(0, nullptr);
// This will get optimized away in case it's already bound (in VK and GL at least..)
draw_->BindFramebufferAsRenderTarget(dest, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP }, "BlitUsingRaster");
draw_->BindFramebufferAsTexture(src, 0, channel == RASTER_COLOR ? Draw::FB_COLOR_BIT : Draw::FB_DEPTH_BIT, 0);
draw_->BindFramebufferAsRenderTarget(dest, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP }, tag ? tag : "BlitUsingRaster");
draw_->BindFramebufferAsTexture(src, 0, pipeline->info.readChannel == RASTER_COLOR ? Draw::FB_COLOR_BIT : Draw::FB_DEPTH_BIT, 0);
Draw::Viewport vp{ 0.0f, 0.0f, (float)dest->Width(), (float)dest->Height(), 0.0f, 1.0f };
draw_->SetViewports(1, &vp);
draw_->SetScissorRect(0, 0, (int)dest->Width(), (int)dest->Height());
DrawStrip2D(nullptr, vtx, 4, linearFilter, channel);
draw2D_.DrawStrip2D(nullptr, vtx, 4, linearFilter, pipeline, src->Width(), src->Height(), renderScaleFactor_);
gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE);
}

View File

@ -37,13 +37,14 @@
enum {
FB_USAGE_DISPLAYED_FRAMEBUFFER = 1,
FB_USAGE_RENDERTARGET = 2,
FB_USAGE_RENDER_COLOR = 2,
FB_USAGE_TEXTURE = 4,
FB_USAGE_CLUT = 8,
FB_USAGE_DOWNLOAD = 16,
FB_USAGE_DOWNLOAD_CLEAR = 32,
FB_USAGE_BLUE_TO_ALPHA = 64,
FB_USAGE_FIRST_FRAME_SAVED = 128,
FB_USAGE_RENDER_DEPTH = 256,
};
enum {
@ -71,6 +72,11 @@ struct VirtualFramebuffer {
u16 fb_stride;
u16 z_stride;
// The original PSP format of the framebuffer.
// In reality they are all RGBA8888 for better quality but this is what the PSP thinks it is. This is necessary
// when we need to interpret the bits directly (depal or buffer aliasing).
GEBufferFormat fb_format;
// width/height: The detected size of the current framebuffer, in original PSP pixels.
u16 width;
u16 height;
@ -98,11 +104,6 @@ struct VirtualFramebuffer {
// The scale factor at which we are rendering (to achieve higher resolution).
u8 renderScaleFactor;
// The original PSP format of the framebuffer.
// In reality they are all RGBA8888 for better quality but this is what the PSP thinks it is. This is necessary
// when we need to interpret the bits directly (depal or buffer aliasing).
GEBufferFormat format;
// The configured buffer format at the time of the latest/current draw. This will change first, then
// if different we'll "reinterpret" the framebuffer to match 'format' as needed.
GEBufferFormat drawnFormat;
@ -152,7 +153,7 @@ struct FramebufferHeuristicParams {
u32 z_address;
u16 fb_stride;
u16 z_stride;
GEBufferFormat fmt;
GEBufferFormat fb_format;
bool isClearingDepth;
bool isWritingDepth;
bool isDrawing;
@ -266,6 +267,8 @@ public:
return vfb;
}
}
void SetDepthFrameBuffer(bool isClearingDepth);
void RebindFramebuffer(const char *tag);
std::vector<FramebufferInfo> GetFramebufferList() const;
@ -288,8 +291,6 @@ public:
void DownloadFramebufferForClut(u32 fb_address, u32 loadBytes);
void DrawFramebufferToOutput(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride);
VirtualFramebuffer *GetLatestDepthBufferAt(u32 z_address, u16 z_stride);
void DrawPixels(VirtualFramebuffer *vfb, int dstX, int dstY, const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height);
size_t NumVFBs() const { return vfbs_.size(); }
@ -340,14 +341,8 @@ public:
int GetTargetBufferWidth() const { return currentRenderVfb_ ? currentRenderVfb_->bufferWidth : 480; }
int GetTargetBufferHeight() const { return currentRenderVfb_ ? currentRenderVfb_->bufferHeight : 272; }
int GetTargetStride() const { return currentRenderVfb_ ? currentRenderVfb_->fb_stride : 512; }
GEBufferFormat GetTargetFormat() const { return currentRenderVfb_ ? currentRenderVfb_->format : displayFormat_; }
GEBufferFormat GetTargetFormat() const { return currentRenderVfb_ ? currentRenderVfb_->fb_format : displayFormat_; }
void SetDepthUpdated() {
if (currentRenderVfb_) {
currentRenderVfb_->last_frame_depth_render = gpuStats.numFlips;
currentRenderVfb_->last_frame_depth_updated = gpuStats.numFlips;
}
}
void SetColorUpdated(int skipDrawReason) {
if (currentRenderVfb_) {
SetColorUpdated(currentRenderVfb_, skipDrawReason);
@ -374,15 +369,20 @@ public:
}
void ReinterpretFramebuffer(VirtualFramebuffer *vfb, GEBufferFormat oldFormat, GEBufferFormat newFormat);
Draw2D *GetDraw2D() {
return &draw2D_;
}
protected:
virtual void PackFramebufferSync_(VirtualFramebuffer *vfb, int x, int y, int w, int h);
void SetViewport2D(int x, int y, int w, int h);
Draw::Texture *MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height);
void DrawActiveTexture(float x, float y, float w, float h, float destW, float destH, float u0, float v0, float u1, float v1, int uvRotation, int flags);
void DrawStrip2D(Draw::Texture *tex, Draw2DVertex *verts, int vertexCount, bool linearFilter, RasterChannel channel);
void Ensure2DResources();
Draw::Pipeline *Create2DPipeline(void (*generate)(ShaderWriter &));
Draw2DPipeline *Get2DPipeline(Draw2DShader shader);
void CopyToColorFromOverlappingFramebuffers(VirtualFramebuffer *dest);
void CopyToDepthFromOverlappingFramebuffers(VirtualFramebuffer *dest);
bool UpdateSize();
@ -394,7 +394,7 @@ protected:
void BlitUsingRaster(
Draw::Framebuffer *src, float srcX1, float srcY1, float srcX2, float srcY2,
Draw::Framebuffer *dest, float destX1, float destY1, float destX2, float destY2, bool linearFilter, RasterChannel channel);
Draw::Framebuffer *dest, float destX1, float destY1, float destX2, float destY2, bool linearFilter, Draw2DPipeline *pipeline, const char *tag);
void CopyFramebufferForColorTexture(VirtualFramebuffer *dst, VirtualFramebuffer *src, int flags);
@ -426,7 +426,7 @@ protected:
dstBuffer->dirtyAfterDisplay = true;
dstBuffer->drawnWidth = dstBuffer->width;
dstBuffer->drawnHeight = dstBuffer->height;
dstBuffer->drawnFormat = dstBuffer->format;
dstBuffer->drawnFormat = dstBuffer->fb_format;
if ((skipDrawReason & SKIPDRAW_SKIPFRAME) == 0)
dstBuffer->reallyDirtyAfterDisplay = true;
}
@ -500,10 +500,7 @@ protected:
// Thin3D stuff for reinterpreting image data between the various 16-bit formats.
// Safe, not optimal - there might be input attachment tricks, etc, but we can't use them
// since we don't want N different implementations.
Draw::Pipeline *reinterpretFromTo_[3][3]{};
Draw::ShaderModule *reinterpretVS_ = nullptr;
Draw::SamplerState *reinterpretSampler_ = nullptr;
Draw::Buffer *reinterpretVBuf_ = nullptr;
Draw2DPipeline *reinterpretFromTo_[3][3]{};
// Common implementation of stencil buffer upload. Also not 100% optimal, but not performance
// critical either.
@ -511,10 +508,11 @@ protected:
Draw::SamplerState *stencilUploadSampler_ = nullptr;
// Draw2D pipelines
Draw::Pipeline *draw2DPipelineColor_ = nullptr;
Draw::Pipeline *draw2DPipelineDepth_ = nullptr;
Draw::SamplerState *draw2DSamplerLinear_ = nullptr;
Draw::SamplerState *draw2DSamplerNearest_ = nullptr;
Draw::ShaderModule *draw2DVs_ = nullptr;
Draw2DPipeline *draw2DPipelineColor_ = nullptr;
Draw2DPipeline *draw2DPipelineDepth_ = nullptr;
Draw2DPipeline *draw2DPipeline565ToDepth_ = nullptr;
Draw2DPipeline *draw2DPipeline565ToDepthDeswizzle_ = nullptr;
Draw2D draw2D_;
// The fragment shaders are "owned" by the pipelines since they're 1:1.
};

View File

@ -557,9 +557,7 @@ DepthScaleFactors GetDepthScaleFactors() {
}
void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, float renderHeight, int bufferWidth, int bufferHeight, ViewportAndScissor &out) {
bool throughmode = gstate.isModeThrough();
out.dirtyProj = false;
out.dirtyDepth = false;
out.throughMode = gstate.isModeThrough();
float renderWidthFactor, renderHeightFactor;
float renderX = 0.0f, renderY = 0.0f;
@ -610,7 +608,7 @@ void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, flo
float offsetX = gstate.getOffsetX();
float offsetY = gstate.getOffsetY();
if (throughmode) {
if (out.throughMode) {
out.viewportX = renderX * renderWidthFactor + displayOffsetX;
out.viewportY = renderY * renderHeightFactor + displayOffsetY;
out.viewportW = curRTWidth * renderWidthFactor;
@ -647,10 +645,10 @@ void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, flo
float right = left + vpWidth;
float bottom = top + vpHeight;
float wScale = 1.0f;
float xOffset = 0.0f;
float hScale = 1.0f;
float yOffset = 0.0f;
out.widthScale = 1.0f;
out.xOffset = 0.0f;
out.heightScale = 1.0f;
out.yOffset = 0.0f;
// If we're within the bounds, we want clipping the viewport way. So leave it be.
{
@ -678,8 +676,8 @@ void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, flo
right = left + 1.0f;
}
wScale = vpWidth / (right - left);
xOffset = drift / (right - left);
out.widthScale = vpWidth / (right - left);
out.xOffset = drift / (right - left);
}
}
@ -707,8 +705,8 @@ void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, flo
bottom = top + 1.0f;
}
hScale = vpHeight / (bottom - top);
yOffset = drift / (bottom - top);
out.heightScale = vpHeight / (bottom - top);
out.yOffset = drift / (bottom - top);
}
}
@ -740,13 +738,13 @@ void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, flo
}
// Okay. So, in our shader, -1 will map to minz, and +1 will map to maxz.
float halfActualZRange = (maxz - minz) * (1.0f / 2.0f);
float zScale = halfActualZRange < std::numeric_limits<float>::epsilon() ? 1.0f : vpZScale / halfActualZRange;
out.depthScale = halfActualZRange < std::numeric_limits<float>::epsilon() ? 1.0f : vpZScale / halfActualZRange;
// This adjusts the center from halfActualZRange to vpZCenter.
float zOffset = halfActualZRange < std::numeric_limits<float>::epsilon() ? 0.0f : (vpZCenter - (minz + halfActualZRange)) / halfActualZRange;
out.zOffset = halfActualZRange < std::numeric_limits<float>::epsilon() ? 0.0f : (vpZCenter - (minz + halfActualZRange)) / halfActualZRange;
if (!gstate_c.Supports(GPU_SUPPORTS_ACCURATE_DEPTH)) {
zScale = 1.0f;
zOffset = 0.0f;
out.depthScale = 1.0f;
out.zOffset = 0.0f;
out.depthRangeMin = ToScaledDepthFromIntegerScale(vpZCenter - vpZScale);
out.depthRangeMax = ToScaledDepthFromIntegerScale(vpZCenter + vpZScale);
} else {
@ -757,19 +755,27 @@ void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, flo
// OpenGL will clamp these for us anyway, and Direct3D will error if not clamped.
out.depthRangeMin = std::max(out.depthRangeMin, 0.0f);
out.depthRangeMax = std::min(out.depthRangeMax, 1.0f);
}
}
bool scaleChanged = gstate_c.vpWidthScale != wScale || gstate_c.vpHeightScale != hScale;
bool offsetChanged = gstate_c.vpXOffset != xOffset || gstate_c.vpYOffset != yOffset;
bool depthChanged = gstate_c.vpDepthScale != zScale || gstate_c.vpZOffset != zOffset;
if (scaleChanged || offsetChanged || depthChanged) {
gstate_c.vpWidthScale = wScale;
gstate_c.vpHeightScale = hScale;
gstate_c.vpDepthScale = zScale;
gstate_c.vpXOffset = xOffset;
gstate_c.vpYOffset = yOffset;
gstate_c.vpZOffset = zOffset;
out.dirtyProj = true;
out.dirtyDepth = depthChanged;
void UpdateCachedViewportState(const ViewportAndScissor &vpAndScissor) {
if (vpAndScissor.throughMode)
return;
bool scaleChanged = gstate_c.vpWidthScale != vpAndScissor.widthScale || gstate_c.vpHeightScale != vpAndScissor.heightScale;
bool offsetChanged = gstate_c.vpXOffset != vpAndScissor.xOffset || gstate_c.vpYOffset != vpAndScissor.yOffset;
bool depthChanged = gstate_c.vpDepthScale != vpAndScissor.depthScale || gstate_c.vpZOffset != vpAndScissor.zOffset;
if (scaleChanged || offsetChanged || depthChanged) {
gstate_c.vpWidthScale = vpAndScissor.widthScale;
gstate_c.vpHeightScale = vpAndScissor.heightScale;
gstate_c.vpDepthScale = vpAndScissor.depthScale;
gstate_c.vpXOffset = vpAndScissor.xOffset;
gstate_c.vpYOffset = vpAndScissor.yOffset;
gstate_c.vpZOffset = vpAndScissor.zOffset;
gstate_c.Dirty(DIRTY_PROJMATRIX);
if (depthChanged) {
gstate_c.Dirty(DIRTY_DEPTHRANGE);
}
}
}
@ -1018,16 +1024,6 @@ void ConvertMaskState(GenericMaskState &maskState, bool allowFramebufferRead) {
return;
}
if (gstate_c.renderMode == RASTER_MODE_COLOR_TO_DEPTH) {
// Suppress color writes entirely in this mode.
maskState.applyFramebufferRead = false;
maskState.rgba[0] = false;
maskState.rgba[1] = false;
maskState.rgba[2] = false;
maskState.rgba[3] = false;
return;
}
// Invert to convert masks from the PSP's format where 1 is don't draw to PC where 1 is draw.
uint32_t colorMask = ~((gstate.pmskc & 0xFFFFFF) | (gstate.pmska << 24));

View File

@ -75,10 +75,16 @@ struct ViewportAndScissor {
float viewportH;
float depthRangeMin;
float depthRangeMax;
bool dirtyProj;
bool dirtyDepth;
float widthScale;
float heightScale;
float depthScale;
float xOffset;
float yOffset;
float zOffset;
bool throughMode;
};
void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, float renderHeight, int bufferWidth, int bufferHeight, ViewportAndScissor &out);
void UpdateCachedViewportState(const ViewportAndScissor &vpAndScissor);
float ToScaledDepthFromIntegerScale(float z);
struct DepthScaleFactors {

View File

@ -20,11 +20,7 @@ static const SamplerDef samplers[1] = {
// TODO: We could possibly have an option to preserve any extra color precision? But gonna start without it.
// Requires full size integer math. It would be possible to make a floating point-only version with lots of
// modulo and stuff, might do it one day.
void GenerateReinterpretFragmentShader(char *buffer, GEBufferFormat from, GEBufferFormat to, const ShaderLanguageDesc &lang) {
_assert_(lang.bitwiseOps);
ShaderWriter writer(buffer, lang, ShaderStage::Fragment);
Draw2DPipelineInfo GenerateReinterpretFragmentShader(ShaderWriter &writer, GEBufferFormat from, GEBufferFormat to) {
writer.HighPrecisionFloat();
writer.DeclareSamplers(samplers);
@ -70,22 +66,13 @@ void GenerateReinterpretFragmentShader(char *buffer, GEBufferFormat from, GEBuff
}
writer.EndFSMain("outColor", FSFLAG_NONE);
return Draw2DPipelineInfo{
RASTER_COLOR,
RASTER_COLOR,
};
}
void GenerateReinterpretVertexShader(char *buffer, const ShaderLanguageDesc &lang) {
_assert_(lang.bitwiseOps);
ShaderWriter writer(buffer, lang, ShaderStage::Vertex);
writer.BeginVSMain(Slice<InputDef>::empty(), Slice<UniformDef>::empty(), varyings);
writer.C(" float x = -1.0 + float((gl_VertexIndex & 1) << 2);\n");
writer.C(" float y = -1.0 + float((gl_VertexIndex & 2) << 1);\n");
writer.C(" v_texcoord = (vec2(x, y) + vec2(1.0, 1.0)) * 0.5;\n");
writer.C(" gl_Position = vec4(x, y, 0.0, 1.0);\n");
writer.EndVSMain(varyings);
}
// Can't easily dynamically create these strings, we just pass along the pointer.
static const char *reinterpretStrings[3][3] = {
{
@ -112,7 +99,7 @@ void FramebufferManagerCommon::ReinterpretFramebuffer(VirtualFramebuffer *vfb, G
_assert_(newFormat != oldFormat);
// The caller is responsible for updating the format.
_assert_(newFormat == vfb->format);
_assert_(newFormat == vfb->fb_format);
ShaderLanguage lang = draw_->GetShaderLanguageDesc().shaderLanguage;
@ -150,60 +137,15 @@ void FramebufferManagerCommon::ReinterpretFramebuffer(VirtualFramebuffer *vfb, G
return;
}
if (!reinterpretVS_) {
char *vsCode = new char[4000];
const ShaderLanguageDesc &shaderLanguageDesc = draw_->GetShaderLanguageDesc();
GenerateReinterpretVertexShader(vsCode, shaderLanguageDesc);
reinterpretVS_ = draw_->CreateShaderModule(ShaderStage::Vertex, shaderLanguageDesc.shaderLanguage, (const uint8_t *)vsCode, strlen(vsCode), "reinterpret_vs");
_assert_(reinterpretVS_);
delete[] vsCode;
}
if (!reinterpretSampler_) {
Draw::SamplerStateDesc samplerDesc{};
samplerDesc.magFilter = Draw::TextureFilter::LINEAR;
samplerDesc.minFilter = Draw::TextureFilter::LINEAR;
reinterpretSampler_ = draw_->CreateSamplerState(samplerDesc);
}
if (!reinterpretVBuf_) {
reinterpretVBuf_ = draw_->CreateBuffer(12 * 3, Draw::BufferUsageFlag::DYNAMIC | Draw::BufferUsageFlag::VERTEXDATA);
}
// See if we need to create a new pipeline.
Draw::Pipeline *pipeline = reinterpretFromTo_[(int)oldFormat][(int)newFormat];
Draw2DPipeline *pipeline = reinterpretFromTo_[(int)oldFormat][(int)newFormat];
if (!pipeline) {
char *fsCode = new char[4000];
const ShaderLanguageDesc &shaderLanguageDesc = draw_->GetShaderLanguageDesc();
GenerateReinterpretFragmentShader(fsCode, oldFormat, newFormat, shaderLanguageDesc);
Draw::ShaderModule *reinterpretFS = draw_->CreateShaderModule(ShaderStage::Fragment, shaderLanguageDesc.shaderLanguage, (const uint8_t *)fsCode, strlen(fsCode), "reinterpret_fs");
_assert_(reinterpretFS);
delete[] fsCode;
pipeline = draw2D_.Create2DPipeline([=](ShaderWriter &shaderWriter) -> Draw2DPipelineInfo {
return GenerateReinterpretFragmentShader(shaderWriter, oldFormat, newFormat);
});
std::vector<Draw::ShaderModule *> shaders;
shaders.push_back(reinterpretVS_);
shaders.push_back(reinterpretFS);
using namespace Draw;
Draw::PipelineDesc desc{};
// We use a "fullscreen triangle".
// TODO: clear the stencil buffer. Hard to actually initialize it with the new alpha, though possible - let's see if
// we need it.
DepthStencilState *depth = draw_->CreateDepthStencilState({ false, false, Comparison::LESS });
BlendState *blendstateOff = draw_->CreateBlendState({ false, 0xF });
RasterState *rasterNoCull = draw_->CreateRasterState({});
// No uniforms for these, only a single texture input.
PipelineDesc pipelineDesc{ Primitive::TRIANGLE_LIST, shaders, nullptr, depth, blendstateOff, rasterNoCull, nullptr };
pipeline = draw_->CreateGraphicsPipeline(pipelineDesc);
_assert_(pipeline != nullptr);
reinterpretFromTo_[(int)oldFormat][(int)newFormat] = pipeline;
depth->Release();
blendstateOff->Release();
rasterNoCull->Release();
reinterpretFS->Release();
}
// Copy to a temp framebuffer.
@ -213,18 +155,9 @@ void FramebufferManagerCommon::ReinterpretFramebuffer(VirtualFramebuffer *vfb, G
// itself while writing.
draw_->InvalidateCachedState();
draw_->CopyFramebufferImage(vfb->fbo, 0, 0, 0, 0, temp, 0, 0, 0, 0, vfb->renderWidth, vfb->renderHeight, 1, Draw::FBChannel::FB_COLOR_BIT, "reinterpret_prep");
draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::DONT_CARE, Draw::RPAction::KEEP, Draw::RPAction::KEEP }, reinterpretStrings[(int)oldFormat][(int)newFormat]);
draw_->BindPipeline(pipeline);
draw_->BindFramebufferAsTexture(temp, 0, Draw::FBChannel::FB_COLOR_BIT, 0);
draw_->BindSamplerStates(0, 1, &reinterpretSampler_);
draw_->SetScissorRect(0, 0, vfb->renderWidth, vfb->renderHeight);
Draw::Viewport vp = Draw::Viewport{ 0.0f, 0.0f, (float)vfb->renderWidth, (float)vfb->renderHeight, 0.0f, 1.0f };
draw_->SetViewports(1, &vp);
// Vertex buffer not used - vertices generated in shader.
// TODO: Switch to a vertex buffer for GLES2/D3D9 compat.
draw_->BindVertexBuffers(0, 1, &reinterpretVBuf_, nullptr);
draw_->Draw(3, 0);
draw_->InvalidateCachedState();
BlitUsingRaster(temp, 0.0f, 0.0f, vfb->renderWidth, vfb->renderHeight,
vfb->fbo, 0.0f, 0.0f, vfb->renderWidth, vfb->renderHeight, false, pipeline, "reinterpret");
// Unbind.
draw_->BindTexture(0, nullptr);

View File

@ -1,11 +1,9 @@
#pragma once
#include "Common/GPU/ShaderWriter.h"
#include "GPU/ge_constants.h"
#include "GPU/GPUCommon.h"
#include "Common/GPU/ShaderWriter.h"
#include "GPU/Common/Draw2D.h"
void GenerateReinterpretFragmentShader(char *buffer, GEBufferFormat from, GEBufferFormat to, const ShaderLanguageDesc &lang);
// Just a single one. Can probably be shared with a lot of similar use cases.
// Generates the coordinates for a fullscreen triangle.
void GenerateReinterpretVertexShader(char *buffer, const ShaderLanguageDesc &lang);
Draw2DPipelineInfo GenerateReinterpretFragmentShader(ShaderWriter &writer, GEBufferFormat from, GEBufferFormat to);

View File

@ -29,7 +29,7 @@ enum DebugShaderType {
SHADER_TYPE_GEOMETRY = 2,
SHADER_TYPE_VERTEXLOADER = 3, // Not really a shader, but might as well re-use this mechanism
SHADER_TYPE_PIPELINE = 4, // Vulkan and DX12 combines a bunch of state into pipeline objects. Might as well make them inspectable.
SHADER_TYPE_DEPAL = 5,
SHADER_TYPE_TEXTURE = 5,
SHADER_TYPE_SAMPLER = 6, // Not really a shader either. Need to rename this enum...
};

View File

@ -240,8 +240,6 @@ std::string FragmentShaderDesc(const FShaderID &id) {
if (id.Bit(FS_BIT_COLOR_AGAINST_ZERO)) desc << "ColorTest0 " << alphaTestFuncs[id.Bits(FS_BIT_COLOR_TEST_FUNC, 2)] << " "; // first 4 match;
else if (id.Bit(FS_BIT_COLOR_TEST)) desc << "ColorTest " << alphaTestFuncs[id.Bits(FS_BIT_COLOR_TEST_FUNC, 2)] << " "; // first 4 match
if (id.Bit(FS_BIT_COLOR_TO_DEPTH)) desc << "ColorToDepth ";
return desc.str();
}
@ -263,8 +261,8 @@ void ComputeFragmentShaderID(FShaderID *id_out, const Draw::Bugs &bugs) {
bool doTextureAlpha = gstate.isTextureAlphaUsed();
bool doFlatShading = gstate.getShadeMode() == GE_SHADE_FLAT;
bool useShaderDepal = gstate_c.useShaderDepal;
bool useSmoothedDepal = gstate_c.useSmoothedShaderDepal;
bool colorWriteMask = IsColorWriteMaskComplex(gstate_c.allowFramebufferRead);
bool colorToDepth = gstate_c.renderMode == RasterMode::RASTER_MODE_COLOR_TO_DEPTH;
// Note how we here recompute some of the work already done in state mapping.
// Not ideal! At least we share the code.
@ -293,11 +291,10 @@ void ComputeFragmentShaderID(FShaderID *id_out, const Draw::Bugs &bugs) {
}
id.SetBit(FS_BIT_BGRA_TEXTURE, gstate_c.bgraTexture);
id.SetBit(FS_BIT_SHADER_DEPAL, useShaderDepal);
id.SetBit(FS_BIT_SHADER_SMOOTHED_DEPAL, useSmoothedDepal);
id.SetBit(FS_BIT_3D_TEXTURE, gstate_c.curTextureIs3D);
}
id.SetBit(FS_BIT_COLOR_TO_DEPTH, colorToDepth);
id.SetBit(FS_BIT_LMODE, lmode);
if (enableAlphaTest) {
// 5 bits total.

View File

@ -94,7 +94,7 @@ enum FShaderBit : uint8_t {
FS_BIT_NO_DEPTH_CANNOT_DISCARD_STENCIL = 49,
FS_BIT_COLOR_WRITEMASK = 50,
FS_BIT_3D_TEXTURE = 51,
FS_BIT_COLOR_TO_DEPTH = 52,
FS_BIT_SHADER_SMOOTHED_DEPAL = 52,
};
static inline FShaderBit operator +(FShaderBit bit, int i) {

View File

@ -153,7 +153,7 @@ bool FramebufferManagerCommon::PerformStencilUpload(u32 addr, int size, StencilU
if (!src)
return false;
switch (dstBuffer->format) {
switch (dstBuffer->fb_format) {
case GE_FORMAT_565:
// Well, this doesn't make much sense.
return false;
@ -290,7 +290,7 @@ bool FramebufferManagerCommon::PerformStencilUpload(u32 addr, int size, StencilU
draw_->SetViewports(1, &viewport);
// TODO: Switch the format to a single channel format?
Draw::Texture *tex = MakePixelTexture(src, dstBuffer->format, dstBuffer->fb_stride, dstBuffer->width, dstBuffer->height);
Draw::Texture *tex = MakePixelTexture(src, dstBuffer->fb_format, dstBuffer->fb_stride, dstBuffer->width, dstBuffer->height);
if (!tex) {
// Bad!
return false;
@ -309,10 +309,10 @@ bool FramebufferManagerCommon::PerformStencilUpload(u32 addr, int size, StencilU
continue;
}
StencilUB ub{};
if (dstBuffer->format == GE_FORMAT_4444) {
if (dstBuffer->fb_format == GE_FORMAT_4444) {
draw_->SetStencilParams(0xFF, (i << 4) | i, 0xFF);
ub.stencilValue = i * (16.0f / 255.0f);
} else if (dstBuffer->format == GE_FORMAT_5551) {
} else if (dstBuffer->fb_format == GE_FORMAT_5551) {
draw_->SetStencilParams(0xFF, 0xFF, 0xFF);
ub.stencilValue = i * (128.0f / 255.0f);
} else {

View File

@ -105,14 +105,8 @@ inline int dimHeight(u16 dim) {
// Vulkan color formats:
// TODO
TextureCacheCommon::TextureCacheCommon(Draw::DrawContext *draw)
: draw_(draw),
clutLastFormat_(0xFFFFFFFF),
clutTotalBytes_(0),
clutMaxBytes_(0),
clutRenderAddress_(0xFFFFFFFF),
clutAlphaLinear_(false),
isBgraBackend_(false) {
TextureCacheCommon::TextureCacheCommon(Draw::DrawContext *draw, Draw2D *draw2D)
: draw_(draw), draw2D_(draw2D) {
decimationCounter_ = TEXCACHE_DECIMATION_INTERVAL;
// TODO: Clamp down to 256/1KB? Need to check mipmapShareClut and clamp loadclut.
@ -130,11 +124,11 @@ TextureCacheCommon::TextureCacheCommon(Draw::DrawContext *draw)
replacer_.Init();
depalShaderCache_ = new DepalShaderCache(draw);
textureShaderCache_ = new TextureShaderCache(draw, draw2D_);
}
TextureCacheCommon::~TextureCacheCommon() {
delete depalShaderCache_;
delete textureShaderCache_;
FreeAlignedMemory(clutBufConverted_);
FreeAlignedMemory(clutBufRaw_);
@ -265,10 +259,6 @@ SamplerCacheKey TextureCacheCommon::GetSamplingParams(int maxLevel, const TexCac
}
}
if (gstate_c.renderMode == RASTER_MODE_COLOR_TO_DEPTH) {
forceFiltering = TEX_FILTER_FORCE_NEAREST;
}
switch (forceFiltering) {
case TEX_FILTER_AUTO:
break;
@ -492,7 +482,14 @@ TexCacheEntry *TextureCacheCommon::SetTexture() {
int h0 = gstate.getTextureHeight(0);
int d0 = 1;
ReplacedTexture &replaced = FindReplacement(entry, w0, h0, d0);
if (replaced.Valid()) {
if (replaced.IsInvalid()) {
entry->status &= ~TexCacheEntry::STATUS_TO_REPLACE;
if (g_Config.bSaveNewTextures) {
// Load once more to actually save.
match = false;
reason = "replacing";
}
} else {
match = false;
reason = "replacing";
}
@ -515,6 +512,7 @@ TexCacheEntry *TextureCacheCommon::SetTexture() {
nextNeedsChange_ = false;
// Might need a rebuild if the hash fails, but that will be set later.
nextNeedsRebuild_ = false;
failedTexture_ = false;
VERBOSE_LOG(G3D, "Texture at %08x found in cache, applying", texaddr);
return entry; //Done!
} else {
@ -610,10 +608,9 @@ TexCacheEntry *TextureCacheCommon::SetTexture() {
gstate_c.curTextureHeight = h;
gstate_c.SetTextureIs3D((entry->status & TexCacheEntry::STATUS_3D) != 0);
failedTexture_ = false;
nextTexture_ = entry;
if (nextFramebufferTexture_) {
nextFramebufferTexture_ = nullptr; // in case it was accidentally set somehow?
}
nextFramebufferTexture_ = nullptr;
nextNeedsRehash_ = true;
// We still need to rebuild, to allocate a texture. But we'll bail early.
nextNeedsRebuild_ = true;
@ -625,35 +622,27 @@ std::vector<AttachCandidate> TextureCacheCommon::GetFramebufferCandidates(const
std::vector<AttachCandidate> candidates;
RasterChannel channel = Memory::IsDepthTexVRAMAddress(entry.addr) ? RasterChannel::RASTER_DEPTH : RasterChannel::RASTER_COLOR;
if (channel == RasterChannel::RASTER_DEPTH && !gstate_c.Supports(GPU_SUPPORTS_DEPTH_TEXTURE)) {
// Depth texture not supported. Don't try to match it, fall back to the memory behind..
return std::vector<AttachCandidate>();
}
const std::vector<VirtualFramebuffer *> &framebuffers = framebufferManager_->Framebuffers();
for (VirtualFramebuffer *framebuffer : framebuffers) {
FramebufferMatchInfo match = MatchFramebuffer(entry, framebuffer, texAddrOffset, channel);
switch (match.match) {
case FramebufferMatch::VALID:
candidates.push_back(AttachCandidate{ match, entry, framebuffer, channel });
break;
default:
break;
FramebufferMatchInfo match{};
if (MatchFramebuffer(entry, framebuffer, texAddrOffset, RASTER_COLOR, &match)) {
candidates.push_back(AttachCandidate{ match, entry, framebuffer, RASTER_COLOR, framebuffer->colorBindSeq });
}
match = {};
if (MatchFramebuffer(entry, framebuffer, texAddrOffset, RASTER_DEPTH, &match)) {
candidates.push_back(AttachCandidate{ match, entry, framebuffer, RASTER_DEPTH, framebuffer->depthBindSeq });
}
}
if (candidates.size() > 1) {
bool depth = channel == RasterChannel::RASTER_DEPTH;
std::string cands;
for (auto &candidate : candidates) {
cands += candidate.ToString() + " ";
}
WARN_LOG_REPORT_ONCE(multifbcandidate, G3D, "GetFramebufferCandidates(%s): Multiple (%d) candidate framebuffers. First will be chosen. texaddr: %08x offset: %d (%dx%d stride %d, %s):\n%s",
depth ? "DEPTH" : "COLOR", (int)candidates.size(),
WARN_LOG_REPORT_ONCE(multifbcandidate, G3D, "GetFramebufferCandidates: Multiple (%d) candidate framebuffers. texaddr: %08x offset: %d (%dx%d stride %d, %s):\n%s",
(int)candidates.size(),
entry.addr, texAddrOffset, dimWidth(entry.dim), dimHeight(entry.dim), entry.bufw, GeTextureFormatToString(entry.format),
cands.c_str()
);
@ -677,29 +666,22 @@ int TextureCacheCommon::GetBestCandidateIndex(const std::vector<AttachCandidate>
// a comparison function.
for (int i = 0; i < (int)candidates.size(); i++) {
const AttachCandidate &candidate = candidates[i];
int relevancy = 0;
switch (candidate.match.match) {
case FramebufferMatch::VALID:
relevancy += 1000;
break;
default:
break;
}
int relevancy = candidate.seqCount;
// Bonus point for matching stride.
if (candidate.channel == RASTER_COLOR && candidate.fb->fb_stride == candidate.entry.bufw) {
relevancy += 100;
relevancy += 1000;
}
// Bonus points for no offset.
if (candidate.match.xOffset == 0 && candidate.match.yOffset == 0) {
relevancy += 10;
relevancy += 100;
}
if (candidate.channel == RASTER_COLOR && candidate.fb->last_frame_render == gpuStats.numFlips) {
relevancy += 5;
relevancy += 50;
} else if (candidate.channel == RASTER_DEPTH && candidate.fb->last_frame_depth_render == gpuStats.numFlips) {
relevancy += 5;
relevancy += 50;
}
if (relevancy > bestRelevancy) {
@ -825,7 +807,7 @@ void TextureCacheCommon::NotifyFramebuffer(VirtualFramebuffer *framebuffer, Fram
const u32 z_addr = framebuffer->z_address & ~mirrorMask; // Probably unnecessary.
const u32 fb_bpp = framebuffer->format == GE_FORMAT_8888 ? 4 : 2;
const u32 fb_bpp = BufferFormatBytesPerPixel(framebuffer->fb_format);
const u32 z_bpp = 2; // No other format exists.
const u32 fb_stride = framebuffer->fb_stride;
const u32 z_stride = framebuffer->z_stride;
@ -876,12 +858,26 @@ void TextureCacheCommon::NotifyFramebuffer(VirtualFramebuffer *framebuffer, Fram
}
}
FramebufferMatchInfo TextureCacheCommon::MatchFramebuffer(
bool TextureCacheCommon::MatchFramebuffer(
const TextureDefinition &entry,
VirtualFramebuffer *framebuffer, u32 texaddrOffset, RasterChannel channel) const {
VirtualFramebuffer *framebuffer, u32 texaddrOffset, RasterChannel channel, FramebufferMatchInfo *matchInfo) const {
static const u32 MAX_SUBAREA_Y_OFFSET_SAFE = 32;
uint32_t fb_address = channel == RASTER_DEPTH ? framebuffer->z_address : framebuffer->fb_address;
uint32_t fb_stride = channel == RASTER_DEPTH ? framebuffer->z_stride : framebuffer->fb_stride;
GEBufferFormat fb_format = channel == RASTER_DEPTH ? GE_FORMAT_DEPTH16 : framebuffer->fb_format;
if (channel == RASTER_DEPTH && framebuffer->z_address == framebuffer->fb_address) {
// Try to avoid silly matches to somewhat malformed buffers.
return false;
}
switch (entry.format) {
case GE_TFMT_DXT1:
case GE_TFMT_DXT3:
case GE_TFMT_DXT5:
return false;
}
u32 addr = fb_address & 0x3FFFFFFF;
u32 texaddr = entry.addr + texaddrOffset;
@ -891,40 +887,19 @@ FramebufferMatchInfo TextureCacheCommon::MatchFramebuffer(
if (texInVRAM != fbInVRAM) {
// Shortcut. Cannot possibly be a match.
return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
return false;
}
if (texInVRAM) {
const u32 mirrorMask = 0x00600000;
// This bit controls swizzle. The swizzles at 0x00200000 and 0x00600000 are designed
// to perfectly match reading depth as color (which one to use I think might be related
// to the bpp of the color format used when rendering to it).
// It's fairly unlikely that games would screw this up since the result will be garbage so
// we use it to filter out unlikely matches.
switch (entry.addr & mirrorMask) {
case 0x00000000:
case 0x00400000:
// Don't match the depth channel with these addresses when texturing.
if (channel == RasterChannel::RASTER_DEPTH) {
return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
}
break;
case 0x00200000:
case 0x00600000:
// Don't match the color channel with these addresses when texturing.
if (channel == RasterChannel::RASTER_COLOR) {
return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
}
break;
}
addr &= ~mirrorMask;
texaddr &= ~mirrorMask;
}
const bool noOffset = texaddr == addr;
const bool exactMatch = noOffset && entry.format < 4 && channel == RASTER_COLOR;
const u32 w = 1 << ((entry.dim >> 0) & 0xf);
const u32 h = 1 << ((entry.dim >> 8) & 0xf);
// 512 on a 272 framebuffer is sane, so let's be lenient.
@ -932,103 +907,101 @@ FramebufferMatchInfo TextureCacheCommon::MatchFramebuffer(
// If they match "exactly", it's non-CLUT and from the top left.
if (exactMatch) {
if (framebuffer->fb_stride != entry.bufw) {
WARN_LOG_ONCE(diffStrides1, G3D, "Texturing from framebuffer with different strides %d != %d", entry.bufw, framebuffer->fb_stride);
if (fb_stride != entry.bufw) {
WARN_LOG_ONCE(diffStrides1, G3D, "Texturing from framebuffer with different strides %d != %d", entry.bufw, (int)fb_stride);
}
// NOTE: This check is okay because the first texture formats are the same as the buffer formats.
if (IsTextureFormatBufferCompatible(entry.format)) {
if (TextureFormatMatchesBufferFormat(entry.format, framebuffer->format) || (framebuffer->usageFlags & FB_USAGE_BLUE_TO_ALPHA)) {
return FramebufferMatchInfo{ FramebufferMatch::VALID };
} else if (IsTextureFormat16Bit(entry.format) && IsBufferFormat16Bit(framebuffer->format)) {
WARN_LOG_ONCE(diffFormat1, G3D, "Texturing from framebuffer with reinterpretable format: %s != %s", GeTextureFormatToString(entry.format), GeBufferFormatToString(framebuffer->format));
return FramebufferMatchInfo{ FramebufferMatch::VALID, 0, 0, true, TextureFormatToBufferFormat(entry.format) };
if (TextureFormatMatchesBufferFormat(entry.format, fb_format) || (framebuffer->usageFlags & FB_USAGE_BLUE_TO_ALPHA)) {
return true;
} else if (IsTextureFormat16Bit(entry.format) && IsBufferFormat16Bit(fb_format) && channel == RASTER_COLOR) {
WARN_LOG_ONCE(diffFormat1, G3D, "Texturing from framebuffer with reinterpretable fb_format: %s != %s", GeTextureFormatToString(entry.format), GeBufferFormatToString(fb_format));
*matchInfo = FramebufferMatchInfo{ 0, 0, true, TextureFormatToBufferFormat(entry.format) };
return true;
} else {
WARN_LOG_ONCE(diffFormat2, G3D, "Texturing from framebuffer with incompatible formats %s != %s", GeTextureFormatToString(entry.format), GeBufferFormatToString(framebuffer->format));
return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
WARN_LOG_ONCE(diffFormat2, G3D, "Not texturing from framebuffer with incompatible formats %s != %s", GeTextureFormatToString(entry.format), GeBufferFormatToString(fb_format));
return false;
}
} else {
// Format incompatible, ignoring without comment. (maybe some really gnarly hacks will end up here...)
return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
return false;
}
} else {
// Apply to buffered mode only.
if (!framebufferManager_->UseBufferedRendering()) {
return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
return false;
}
// Check works for D16 too (???)
const bool matchingClutFormat =
(channel != RASTER_COLOR && entry.format == GE_TFMT_CLUT16) ||
(channel == RASTER_COLOR && framebuffer->format == GE_FORMAT_8888 && entry.format == GE_TFMT_CLUT32) ||
(channel == RASTER_COLOR && framebuffer->format != GE_FORMAT_8888 && entry.format == GE_TFMT_CLUT16);
// To avoid ruining git blame, kept the same name as the old struct.
FramebufferMatchInfo fbInfo{ FramebufferMatch::VALID };
(fb_format == GE_FORMAT_DEPTH16 && entry.format == GE_TFMT_CLUT16) ||
(fb_format == GE_FORMAT_DEPTH16 && entry.format == GE_TFMT_5650) ||
(fb_format == GE_FORMAT_8888 && entry.format == GE_TFMT_CLUT32) ||
(fb_format != GE_FORMAT_8888 && entry.format == GE_TFMT_CLUT16);
const u32 bitOffset = (texaddr - addr) * 8;
if (bitOffset != 0) {
const u32 pixelOffset = bitOffset / std::max(1U, (u32)textureBitsPerPixel[entry.format]);
fbInfo.yOffset = entry.bufw == 0 ? 0 : pixelOffset / entry.bufw;
fbInfo.xOffset = entry.bufw == 0 ? 0 : pixelOffset % entry.bufw;
matchInfo->yOffset = entry.bufw == 0 ? 0 : pixelOffset / entry.bufw;
matchInfo->xOffset = entry.bufw == 0 ? 0 : pixelOffset % entry.bufw;
}
if (fbInfo.yOffset + minSubareaHeight >= framebuffer->height) {
if (matchInfo->yOffset + minSubareaHeight >= framebuffer->height) {
// Can't be inside the framebuffer.
return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
return false;
}
if (framebuffer->fb_stride != entry.bufw) {
if (fb_stride != entry.bufw) {
if (noOffset) {
WARN_LOG_ONCE(diffStrides2, G3D, "Texturing from framebuffer (matching_clut=%s) different strides %d != %d", matchingClutFormat ? "yes" : "no", entry.bufw, framebuffer->fb_stride);
WARN_LOG_ONCE(diffStrides2, G3D, "Texturing from framebuffer (matching_clut=%s) different strides %d != %d", matchingClutFormat ? "yes" : "no", entry.bufw, fb_stride);
// Continue on with other checks.
// Not actually sure why we even try here. There's no way it'll go well if the strides are different.
} else {
// Assume any render-to-tex with different bufw + offset is a render from ram.
return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
return false;
}
}
// Check if it's in bufferWidth (which might be higher than width and may indicate the framebuffer includes the data.)
if (fbInfo.xOffset >= framebuffer->bufferWidth && fbInfo.xOffset + w <= (u32)framebuffer->fb_stride) {
if (matchInfo->xOffset >= framebuffer->bufferWidth && matchInfo->xOffset + w <= (u32)fb_stride) {
// This happens in Brave Story, see #10045 - the texture is in the space between strides, with matching stride.
return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
return false;
}
// Trying to play it safe. Below 0x04110000 is almost always framebuffers.
// TODO: Maybe we can reduce this check and find a better way above 0x04110000?
if (fbInfo.yOffset > MAX_SUBAREA_Y_OFFSET_SAFE && addr > 0x04110000 && !PSP_CoreParameter().compat.flags().AllowLargeFBTextureOffsets) {
WARN_LOG_REPORT_ONCE(subareaIgnored, G3D, "Ignoring possible texturing from framebuffer at %08x +%dx%d / %dx%d", fb_address, fbInfo.xOffset, fbInfo.yOffset, framebuffer->width, framebuffer->height);
return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
if (matchInfo->yOffset > MAX_SUBAREA_Y_OFFSET_SAFE && addr > 0x04110000 && !PSP_CoreParameter().compat.flags().AllowLargeFBTextureOffsets) {
WARN_LOG_REPORT_ONCE(subareaIgnored, G3D, "Ignoring possible texturing from framebuffer at %08x +%dx%d / %dx%d", fb_address, matchInfo->xOffset, matchInfo->yOffset, framebuffer->width, framebuffer->height);
return false;
}
// Check for CLUT. The framebuffer is always RGB, but it can be interpreted as a CLUT texture.
// 3rd Birthday (and a bunch of other games) render to a 16 bit clut texture.
if (matchingClutFormat) {
if (!noOffset) {
WARN_LOG_ONCE(subareaClut, G3D, "Texturing from framebuffer using CLUT with offset at %08x +%dx%d", fb_address, fbInfo.xOffset, fbInfo.yOffset);
WARN_LOG_ONCE(subareaClut, G3D, "Texturing from framebuffer (%s) using %s with offset at %08x +%dx%d", channel == RASTER_DEPTH ? "DEPTH" : "COLOR", GeTextureFormatToString(entry.format), fb_address, matchInfo->xOffset, matchInfo->yOffset);
}
fbInfo.match = FramebufferMatch::VALID; // We check the format again later, no need to return a special value here.
return fbInfo;
return true;
} else if (IsClutFormat((GETextureFormat)(entry.format)) || IsDXTFormat((GETextureFormat)(entry.format))) {
WARN_LOG_ONCE(fourEightBit, G3D, "%s format not supported when texturing from framebuffer of format %s", GeTextureFormatToString(entry.format), GeBufferFormatToString(framebuffer->format));
return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
WARN_LOG_ONCE(fourEightBit, G3D, "%s fb_format not supported when texturing from framebuffer of format %s", GeTextureFormatToString(entry.format), GeBufferFormatToString(fb_format));
return false;
}
// This is either normal or we failed to generate a shader to depalettize
if ((int)framebuffer->format == (int)entry.format || matchingClutFormat) {
if ((int)framebuffer->format != (int)entry.format) {
if ((int)fb_format == (int)entry.format || matchingClutFormat) {
if ((int)fb_format != (int)entry.format) {
WARN_LOG_ONCE(diffFormat2, G3D, "Texturing from framebuffer with different formats %s != %s at %08x",
GeTextureFormatToString(entry.format), GeBufferFormatToString(framebuffer->format), fb_address);
return fbInfo;
GeTextureFormatToString(entry.format), GeBufferFormatToString(fb_format), fb_address);
return true;
} else {
WARN_LOG_ONCE(subarea, G3D, "Texturing from framebuffer at %08x +%dx%d", fb_address, fbInfo.xOffset, fbInfo.yOffset);
return fbInfo;
WARN_LOG_ONCE(subarea, G3D, "Texturing from framebuffer at %08x +%dx%d", fb_address, matchInfo->xOffset, matchInfo->yOffset);
return true;
}
} else {
WARN_LOG_ONCE(diffFormat2, G3D, "Texturing from framebuffer with incompatible format %s != %s at %08x",
GeTextureFormatToString(entry.format), GeBufferFormatToString(framebuffer->format), fb_address);
return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
GeTextureFormatToString(entry.format), GeBufferFormatToString(fb_format), fb_address);
return false;
}
}
}
@ -1038,18 +1011,20 @@ void TextureCacheCommon::SetTextureFramebuffer(const AttachCandidate &candidate)
FramebufferMatchInfo fbInfo = candidate.match;
if (candidate.match.reinterpret) {
GEBufferFormat oldFormat = candidate.fb->format;
candidate.fb->format = candidate.match.reinterpretTo;
GEBufferFormat oldFormat = candidate.fb->fb_format;
candidate.fb->fb_format = candidate.match.reinterpretTo;
framebufferManager_->ReinterpretFramebuffer(candidate.fb, oldFormat, candidate.match.reinterpretTo);
}
_dbg_assert_msg_(framebuffer != nullptr, "Framebuffer must not be null.");
framebuffer->usageFlags |= FB_USAGE_TEXTURE;
if (framebufferManager_->UseBufferedRendering()) {
// Keep the framebuffer alive.
framebuffer->last_frame_used = gpuStats.numFlips;
// Keep the framebuffer alive.
framebuffer->last_frame_used = gpuStats.numFlips;
nextFramebufferTextureChannel_ = RASTER_COLOR;
if (framebufferManager_->UseBufferedRendering()) {
// We need to force it, since we may have set it on a texture before attaching.
gstate_c.curTextureWidth = framebuffer->bufferWidth;
gstate_c.curTextureHeight = framebuffer->bufferHeight;
@ -1068,7 +1043,15 @@ void TextureCacheCommon::SetTextureFramebuffer(const AttachCandidate &candidate)
gstate_c.SetNeedShaderTexclamp(true);
}
nextFramebufferTexture_ = framebuffer;
if (candidate.channel == RASTER_DEPTH && !gstate_c.Supports(GPU_SUPPORTS_DEPTH_TEXTURE)) {
// Flag to bind a null texture if we can't support depth textures.
// Should only happen on old OpenGL.
nextFramebufferTexture_ = nullptr;
failedTexture_ = true;
} else {
nextFramebufferTexture_ = framebuffer;
nextFramebufferTextureChannel_ = candidate.channel;
}
nextTexture_ = nullptr;
} else {
if (framebuffer->fbo) {
@ -1168,7 +1151,7 @@ void TextureCacheCommon::LoadClut(u32 clutAddr, u32 loadBytes) {
const std::vector<VirtualFramebuffer *> &framebuffers = framebufferManager_->Framebuffers();
for (VirtualFramebuffer *framebuffer : framebuffers) {
const u32 fb_address = framebuffer->fb_address & 0x3FFFFFFF;
const u32 bpp = framebuffer->drawnFormat == GE_FORMAT_8888 ? 4 : 2;
const u32 bpp = BufferFormatBytesPerPixel(framebuffer->drawnFormat);
u32 offset = clutFramebufAddr - fb_address;
// Is this inside the framebuffer at all?
@ -1327,22 +1310,22 @@ ReplacedTexture &TextureCacheCommon::FindReplacement(TexCacheEntry *entry, int &
constexpr double MAX_BUDGET_PER_TEX = 0.25 / 60.0;
double replaceStart = time_now_d();
double budget = std::min(MAX_BUDGET_PER_TEX, replacementFrameBudget_ - replacementTimeThisFrame_);
u64 cachekey = replacer_.Enabled() ? entry->CacheKey() : 0;
ReplacedTexture &replaced = replacer_.FindReplacement(cachekey, entry->fullhash, w, h);
if (replaced.IsReady(std::min(MAX_BUDGET_PER_TEX, replacementFrameBudget_ - replacementTimeThisFrame_))) {
ReplacedTexture &replaced = replacer_.FindReplacement(cachekey, entry->fullhash, w, h, budget);
if (replaced.IsReady(budget)) {
if (replaced.GetSize(0, w, h)) {
replacementTimeThisFrame_ += time_now_d() - replaceStart;
// Consider it already "scaled" and remove any delayed replace flag.
// Consider it already "scaled."
entry->status |= TexCacheEntry::STATUS_IS_SCALED;
entry->status &= ~TexCacheEntry::STATUS_TO_REPLACE;
return replaced;
}
} else if (replaced.Valid()) {
// Remove the flag, even if it was invalid.
entry->status &= ~TexCacheEntry::STATUS_TO_REPLACE;
} else if (!replaced.IsInvalid()) {
entry->status |= TexCacheEntry::STATUS_TO_REPLACE;
}
replacementTimeThisFrame_ += time_now_d() - replaceStart;
return replacer_.FindNone();
return replaced;
}
// This is only used in the GLES backend, where we don't point these to video memory.
@ -1692,7 +1675,9 @@ CheckAlphaResult TextureCacheCommon::ReadIndexedTex(u8 *out, int outPitch, int l
texptr = (u8 *)tmpTexBuf32_.data();
}
const bool mipmapShareClut = gstate.isClutSharedForMipmaps();
// Misshitsu no Sacrifice has separate CLUT data, this is a hack to allow it.
// Normally separate CLUTs are not allowed for 8-bit or higher indices.
const bool mipmapShareClut = gstate.isClutSharedForMipmaps() || gstate.getClutLoadBlocks() != 0x40;
const int clutSharingOffset = mipmapShareClut ? 0 : (level & 1) * 256;
GEPaletteFormat palFormat = (GEPaletteFormat)gstate.getClutPaletteFormat();
@ -1779,10 +1764,12 @@ void TextureCacheCommon::ApplyTexture() {
if (!entry) {
// Maybe we bound a framebuffer?
InvalidateLastTexture();
if (nextFramebufferTexture_) {
bool depth = Memory::IsDepthTexVRAMAddress(gstate.getTextureAddress(0));
if (failedTexture_) {
// Backends should handle this by binding a black texture with 0 alpha.
BindTexture(nullptr);
} else if (nextFramebufferTexture_) {
// ApplyTextureFrameBuffer is responsible for setting SetTextureFullAlpha.
ApplyTextureFramebuffer(nextFramebufferTexture_, gstate.getTextureFormat(), depth ? RASTER_DEPTH : RASTER_COLOR);
ApplyTextureFramebuffer(nextFramebufferTexture_, gstate.getTextureFormat(), nextFramebufferTextureChannel_);
nextFramebufferTexture_ = nullptr;
}
@ -1845,18 +1832,69 @@ void TextureCacheCommon::ApplyTexture() {
gstate_c.SetTextureIs3D((entry->status & TexCacheEntry::STATUS_3D) != 0);
}
bool CanDepalettize(GETextureFormat texFormat, GEBufferFormat bufferFormat) {
if (IsClutFormat(texFormat)) {
switch (bufferFormat) {
case GE_FORMAT_4444:
case GE_FORMAT_565:
case GE_FORMAT_5551:
case GE_FORMAT_DEPTH16:
if (texFormat == GE_TFMT_CLUT16) {
return true;
}
break;
case GE_FORMAT_8888:
if (texFormat == GE_TFMT_CLUT32) {
return true;
}
break;
}
WARN_LOG(G3D, "Invalid CLUT/framebuffer combination: %s vs %s", GeTextureFormatToString(texFormat), GeBufferFormatToString(bufferFormat));
return false;
} else if (texFormat == GE_TFMT_5650 && bufferFormat == GE_FORMAT_DEPTH16) {
// We can also "depal" 565 format, this is used to read depth buffers as 565 on occasion (#15491).
return true;
} else {
return false;
}
}
// If the palette is detected as a smooth ramp, we can interpolate for higher color precision.
// But we only do it if the mask/shift exactly matches a color channel, else something different might be going
// on and we definitely don't want to interpolate.
// Great enhancement for Test Drive.
static bool CanUseSmoothDepal(const GPUgstate &gstate, GEBufferFormat framebufferFormat, int rampLength) {
if (gstate.getClutIndexStartPos() == 0 &&
gstate.getClutIndexMask() <= rampLength) {
switch (framebufferFormat) {
case GE_FORMAT_565:
if (gstate.getClutIndexShift() == 0 || gstate.getClutIndexShift() == 11) {
return gstate.getClutIndexMask() == 0x1F;
} else if (gstate.getClutIndexShift() == 5) {
return gstate.getClutIndexMask() == 0x3F;
}
break;
case GE_FORMAT_5551:
if (gstate.getClutIndexShift() == 0 || gstate.getClutIndexShift() == 5 || gstate.getClutIndexShift() == 10) {
return gstate.getClutIndexMask() == 0x1F;
}
break;
}
}
return false;
}
void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer, GETextureFormat texFormat, RasterChannel channel) {
DepalShader *depalShader = nullptr;
Draw2DPipeline *textureShader = nullptr;
uint32_t clutMode = gstate.clutformat & 0xFFFFFF;
bool need_depalettize = IsClutFormat(texFormat);
bool depth = channel == RASTER_DEPTH;
bool need_depalettize = CanDepalettize(texFormat, depth ? GE_FORMAT_DEPTH16 : framebuffer->drawnFormat);
bool useShaderDepal = framebufferManager_->GetCurrentRenderVFB() != framebuffer && !depth && !gstate_c.curTextureIs3D;
// TODO: Implement shader depal in the fragment shader generator for D3D11 at least.
if (!draw_->GetDeviceCaps().fragmentShaderInt32Supported) {
useShaderDepal = false;
depth = false; // Can't support this
}
switch (draw_->GetShaderLanguageDesc().shaderLanguage) {
@ -1868,13 +1906,18 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer
break;
}
const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat();
ClutTexture clutTexture{};
bool smoothedDepal = false;
if (need_depalettize && !g_Config.bDisableSlowFramebufEffects) {
clutTexture = textureShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBufRaw_);
smoothedDepal = CanUseSmoothDepal(gstate, framebuffer->drawnFormat, clutTexture.rampLength);
if (useShaderDepal) {
const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat();
// Very icky conflation here of native and thin3d rendering. This will need careful work per backend in BindAsClutTexture.
Draw::Texture *clutTexture = depalShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBufRaw_);
BindAsClutTexture(clutTexture);
BindAsClutTexture(clutTexture.texture);
framebufferManager_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET);
// Vulkan needs to do some extra work here to pick out the native handle from Draw.
@ -1888,7 +1931,8 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer
// Since we started/ended render passes, might need these.
gstate_c.Dirty(DIRTY_DEPAL);
gstate_c.SetUseShaderDepal(true);
gstate_c.SetUseShaderDepal(true, smoothedDepal);
gstate_c.depalFramebufferFormat = framebuffer->drawnFormat;
const u32 bytesPerColor = clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16);
const u32 clutTotalColors = clutMaxBytes_ / bytesPerColor;
@ -1900,13 +1944,13 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer
return;
}
depalShader = depalShaderCache_->GetDepalettizeShader(clutMode, depth ? GE_FORMAT_DEPTH16 : framebuffer->drawnFormat);
gstate_c.SetUseShaderDepal(false);
textureShader = textureShaderCache_->GetDepalettizeShader(clutMode, texFormat, depth ? GE_FORMAT_DEPTH16 : framebuffer->drawnFormat, smoothedDepal);
gstate_c.SetUseShaderDepal(false, false);
}
if (depalShader) {
if (textureShader) {
const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat();
Draw::Texture *clutTexture = depalShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBufRaw_);
ClutTexture clutTexture = textureShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBufRaw_);
Draw::Framebuffer *depalFBO = framebufferManager_->GetTempFBO(TempFBO::DEPAL, framebuffer->renderWidth, framebuffer->renderHeight);
draw_->BindTexture(0, nullptr);
draw_->BindTexture(1, nullptr);
@ -1916,17 +1960,17 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer
Draw::Viewport vp{ 0.0f, 0.0f, (float)framebuffer->renderWidth, (float)framebuffer->renderHeight, 0.0f, 1.0f };
draw_->SetViewports(1, &vp);
TextureShaderApplier shaderApply(draw_, depalShader, framebuffer->bufferWidth, framebuffer->bufferHeight, framebuffer->renderWidth, framebuffer->renderHeight);
shaderApply.ApplyBounds(gstate_c.vertBounds, gstate_c.curTextureXOffset, gstate_c.curTextureYOffset);
shaderApply.Use();
draw_->BindFramebufferAsTexture(framebuffer->fbo, 0, depth ? Draw::FB_DEPTH_BIT : Draw::FB_COLOR_BIT, 0);
draw_->BindTexture(1, clutTexture);
Draw::SamplerState *nearest = depalShaderCache_->GetSampler();
draw_->BindTexture(1, clutTexture.texture);
Draw::SamplerState *nearest = textureShaderCache_->GetSampler(false);
Draw::SamplerState *clutSampler = textureShaderCache_->GetSampler(smoothedDepal);
draw_->BindSamplerStates(0, 1, &nearest);
draw_->BindSamplerStates(1, 1, &nearest);
draw_->BindSamplerStates(1, 1, &clutSampler);
textureShaderCache_->ApplyShader(textureShader,
framebuffer->bufferWidth, framebuffer->bufferHeight, framebuffer->renderWidth, framebuffer->renderHeight,
gstate_c.vertBounds, gstate_c.curTextureXOffset, gstate_c.curTextureYOffset);
shaderApply.Shade();
draw_->BindTexture(0, nullptr);
framebufferManager_->RebindFramebuffer("ApplyTextureFramebuffer");
@ -1946,7 +1990,7 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer
framebufferManager_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET);
BoundFramebufferTexture();
gstate_c.SetUseShaderDepal(false);
gstate_c.SetUseShaderDepal(false, false);
gstate_c.SetTextureFullAlpha(gstate.getTextureFormat() == GE_TFMT_5650);
}
@ -1958,7 +2002,7 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer
}
void TextureCacheCommon::Clear(bool delete_them) {
depalShaderCache_->Clear();
textureShaderCache_->Clear();
ForgetLastTexture();
for (TexCache::iterator iter = cache_.begin(); iter != cache_.end(); ++iter) {
@ -2158,8 +2202,8 @@ void TextureCacheCommon::ClearNextFrame() {
clearCacheNextFrame_ = true;
}
std::string AttachCandidate::ToString() {
return StringFromFormat("[C:%08x/%d Z:%08x/%d X:%d Y:%d reint: %s]", this->fb->fb_address, this->fb->fb_stride, this->fb->z_address, this->fb->z_stride, this->match.xOffset, this->match.yOffset, this->match.reinterpret ? "true" : "false");
std::string AttachCandidate::ToString() const {
return StringFromFormat("[%s seq:%d C:%08x/%d Z:%08x/%d X:%d Y:%d reint: %s]", this->channel == RASTER_COLOR ? "COLOR" : "DEPTH", this->seqCount, this->fb->fb_address, this->fb->fb_stride, this->fb->z_address, this->fb->z_stride, this->match.xOffset, this->match.yOffset, this->match.reinterpret ? "true" : "false");
}
bool TextureCacheCommon::PrepareBuildTexture(BuildTexturePlan &plan, TexCacheEntry *entry) {
@ -2261,11 +2305,6 @@ bool TextureCacheCommon::PrepareBuildTexture(BuildTexturePlan &plan, TexCacheEnt
plan.scaleFactor = 1;
}
// Don't upscale textures in color-to-depth mode.
if (gstate_c.renderMode == RASTER_MODE_COLOR_TO_DEPTH) {
plan.scaleFactor = 1;
}
if ((entry->status & TexCacheEntry::STATUS_CHANGE_FREQUENT) != 0 && plan.scaleFactor != 1 && plan.slowScaler) {
// Remember for later that we /wanted/ to scale this texture.
entry->status |= TexCacheEntry::STATUS_TO_SCALE;
@ -2374,7 +2413,7 @@ void TextureCacheCommon::LoadTextureLevel(TexCacheEntry &entry, uint8_t *data, i
}
}
if (replacer_.Enabled()) {
if (replacer_.Enabled() && replaced.IsInvalid()) {
ReplacedTextureDecodeInfo replacedInfo;
replacedInfo.cachekey = entry.CacheKey();
replacedInfo.hash = entry.fullhash;
@ -2405,5 +2444,5 @@ CheckAlphaResult TextureCacheCommon::CheckCLUTAlpha(const uint8_t *pixelData, GE
}
void TextureCacheCommon::StartFrame() {
depalShaderCache_->Decimate();
textureShaderCache_->Decimate();
}

View File

@ -29,7 +29,9 @@
#include "GPU/Common/GPUDebugInterface.h"
#include "GPU/Common/TextureDecoder.h"
#include "GPU/Common/TextureScalerCommon.h"
#include "GPU/Common/DepalettizeCommon.h"
#include "GPU/Common/TextureShaderCommon.h"
class Draw2D;
enum FramebufferNotification {
NOTIFY_FB_CREATED,
@ -206,15 +208,7 @@ typedef std::map<u64, std::unique_ptr<TexCacheEntry>> TexCache;
#undef IGNORE
#endif
enum class FramebufferMatch {
// Valid, exact match.
VALID = 0,
// Not a match, remove if currently attached.
NO_MATCH,
};
struct FramebufferMatchInfo {
FramebufferMatch match;
u32 xOffset;
u32 yOffset;
bool reinterpret;
@ -226,8 +220,9 @@ struct AttachCandidate {
TextureDefinition entry;
VirtualFramebuffer *fb;
RasterChannel channel;
int seqCount;
std::string ToString();
std::string ToString() const;
};
class FramebufferManagerCommon;
@ -278,7 +273,7 @@ struct BuildTexturePlan {
class TextureCacheCommon {
public:
TextureCacheCommon(Draw::DrawContext *draw);
TextureCacheCommon(Draw::DrawContext *draw, Draw2D *draw2D);
virtual ~TextureCacheCommon();
void LoadClut(u32 clutAddr, u32 loadBytes);
@ -298,7 +293,7 @@ public:
void InvalidateAll(GPUInvalidationType type);
void ClearNextFrame();
DepalShaderCache *GetDepalShaderCache() { return depalShaderCache_; }
TextureShaderCache *GetTextureShaderCache() { return textureShaderCache_; }
virtual void ForgetLastTexture() = 0;
virtual void InvalidateLastTexture() = 0;
@ -365,7 +360,7 @@ protected:
SamplerCacheKey GetFramebufferSamplingParams(u16 bufferWidth, u16 bufferHeight);
void UpdateMaxSeenV(TexCacheEntry *entry, bool throughMode);
FramebufferMatchInfo MatchFramebuffer(const TextureDefinition &entry, VirtualFramebuffer *framebuffer, u32 texaddrOffset, RasterChannel channel) const;
bool MatchFramebuffer(const TextureDefinition &entry, VirtualFramebuffer *framebuffer, u32 texaddrOffset, RasterChannel channel, FramebufferMatchInfo *matchInfo) const;
std::vector<AttachCandidate> GetFramebufferCandidates(const TextureDefinition &entry, u32 texAddrOffset);
int GetBestCandidateIndex(const std::vector<AttachCandidate> &candidates);
@ -407,10 +402,12 @@ protected:
}
Draw::DrawContext *draw_;
Draw2D *draw2D_;
TextureReplacer replacer_;
TextureScalerCommon scaler_;
FramebufferManagerCommon *framebufferManager_;
DepalShaderCache *depalShaderCache_;
TextureShaderCache *textureShaderCache_;
ShaderManagerCommon *shaderManager_;
bool clearCacheNextFrame_ = false;
@ -440,7 +437,9 @@ protected:
SimpleBuf<u32> tmpTexBufRearrange_;
TexCacheEntry *nextTexture_ = nullptr;
bool failedTexture_ = false;
VirtualFramebuffer *nextFramebufferTexture_ = nullptr;
RasterChannel nextFramebufferTextureChannel_ = RASTER_COLOR;
u32 clutHash_ = 0;
@ -449,13 +448,13 @@ protected:
u32 *clutBufConverted_;
// This is the active one.
u32 *clutBuf_;
u32 clutLastFormat_;
u32 clutTotalBytes_;
u32 clutMaxBytes_;
u32 clutRenderAddress_;
u32 clutLastFormat_ = 0xFFFFFFFF;
u32 clutTotalBytes_ = 0;
u32 clutMaxBytes_ = 0;
u32 clutRenderAddress_ = 0xFFFFFFFF;
u32 clutRenderOffset_;
// True if the clut is just alpha values in the same order (RGBA4444-bit only.)
bool clutAlphaLinear_;
bool clutAlphaLinear_ = false;
u16 clutAlphaLinearColor_;
int standardScaleFactor_;
@ -466,7 +465,7 @@ protected:
bool nextNeedsChange_;
bool nextNeedsRebuild_;
bool isBgraBackend_;
bool isBgraBackend_ = false;
u32 expandClut_[256];
};

View File

@ -0,0 +1,295 @@
// Copyright (c) 2014- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include <map>
#include "Common/Log.h"
#include "Common/StringUtils.h"
#include "Common/GPU/Shader.h"
#include "Common/GPU/ShaderWriter.h"
#include "Common/Data/Convert/ColorConv.h"
#include "Core/Reporting.h"
#include "GPU/Common/Draw2D.h"
#include "GPU/Common/DrawEngineCommon.h"
#include "GPU/Common/TextureCacheCommon.h"
#include "GPU/Common/TextureShaderCommon.h"
#include "GPU/Common/DepalettizeShaderCommon.h"
static const VaryingDef varyings[1] = {
{ "vec2", "v_texcoord", Draw::SEM_TEXCOORD0, 0, "highp" },
};
static const SamplerDef samplers[2] = {
{ "tex" },
{ "pal" },
};
TextureShaderCache::TextureShaderCache(Draw::DrawContext *draw, Draw2D *draw2D) : draw_(draw), draw2D_(draw2D) { }
TextureShaderCache::~TextureShaderCache() {
DeviceLost();
}
void TextureShaderCache::DeviceRestore(Draw::DrawContext *draw) {
draw_ = draw;
}
void TextureShaderCache::DeviceLost() {
Clear();
}
ClutTexture TextureShaderCache::GetClutTexture(GEPaletteFormat clutFormat, const u32 clutHash, u32 *rawClut) {
// Simplistic, but works well enough.
u32 clutId = clutHash ^ (uint32_t)clutFormat;
auto oldtex = texCache_.find(clutId);
if (oldtex != texCache_.end()) {
oldtex->second->lastFrame = gpuStats.numFlips;
return *oldtex->second;
}
int maxClutEntries = clutFormat == GE_CMODE_32BIT_ABGR8888 ? 256 : 512;
ClutTexture *tex = new ClutTexture();
Draw::TextureDesc desc{};
desc.width = maxClutEntries;
desc.height = 1;
desc.depth = 1;
desc.mipLevels = 1;
desc.tag = "clut";
desc.type = Draw::TextureType::LINEAR2D; // TODO: Try LINEAR1D?
desc.format = Draw::DataFormat::R8G8B8A8_UNORM; // TODO: Also support an BGR format. We won't bother with the 16-bit formats here.
uint8_t convTemp[2048]{};
switch (clutFormat) {
case GEPaletteFormat::GE_CMODE_32BIT_ABGR8888:
desc.initData.push_back((const uint8_t *)rawClut);
break;
case GEPaletteFormat::GE_CMODE_16BIT_BGR5650:
ConvertRGB565ToRGBA8888((u32 *)convTemp, (const u16 *)rawClut, maxClutEntries);
desc.initData.push_back(convTemp);
break;
case GEPaletteFormat::GE_CMODE_16BIT_ABGR5551:
ConvertRGBA5551ToRGBA8888((u32 *)convTemp, (const u16 *)rawClut, maxClutEntries);
desc.initData.push_back(convTemp);
break;
case GEPaletteFormat::GE_CMODE_16BIT_ABGR4444:
ConvertRGBA4444ToRGBA8888((u32 *)convTemp, (const u16 *)rawClut, maxClutEntries);
desc.initData.push_back(convTemp);
break;
}
int lastR = 0;
int lastG = 0;
int lastB = 0;
int lastA = 0;
int rampLength = 0;
// Quick check for how many continouosly growing entries we have at the start.
// Bilinearly filtering CLUTs only really makes sense for this kind of ramp.
for (int i = 0; i < maxClutEntries; i++) {
rampLength = i + 1;
int r = desc.initData[0][i * 4];
int g = desc.initData[0][i * 4 + 1];
int b = desc.initData[0][i * 4 + 2];
int a = desc.initData[0][i * 4 + 3];
if (r < lastR || g < lastG || b < lastB || a < lastA) {
break;
} else {
lastR = r;
lastG = g;
lastB = b;
lastA = a;
}
}
tex->texture = draw_->CreateTexture(desc);
tex->lastFrame = gpuStats.numFlips;
tex->rampLength = rampLength;
texCache_[clutId] = tex;
return *tex;
}
void TextureShaderCache::Clear() {
for (auto shader = depalCache_.begin(); shader != depalCache_.end(); ++shader) {
if (shader->second->pipeline) {
shader->second->pipeline->Release();
}
delete shader->second;
}
depalCache_.clear();
for (auto tex = texCache_.begin(); tex != texCache_.end(); ++tex) {
tex->second->texture->Release();
delete tex->second;
}
texCache_.clear();
if (nearestSampler_) {
nearestSampler_->Release();
nearestSampler_ = nullptr;
}
if (linearSampler_) {
linearSampler_->Release();
linearSampler_ = nullptr;
}
}
Draw::SamplerState *TextureShaderCache::GetSampler(bool linearFilter) {
if (linearFilter) {
if (!linearSampler_) {
Draw::SamplerStateDesc desc{};
desc.magFilter = Draw::TextureFilter::LINEAR;
desc.minFilter = Draw::TextureFilter::LINEAR;
desc.wrapU = Draw::TextureAddressMode::CLAMP_TO_EDGE;
desc.wrapV = Draw::TextureAddressMode::CLAMP_TO_EDGE;
desc.wrapW = Draw::TextureAddressMode::CLAMP_TO_EDGE;
linearSampler_ = draw_->CreateSamplerState(desc);
}
return linearSampler_;
} else {
if (!nearestSampler_) {
Draw::SamplerStateDesc desc{};
desc.wrapU = Draw::TextureAddressMode::CLAMP_TO_EDGE;
desc.wrapV = Draw::TextureAddressMode::CLAMP_TO_EDGE;
desc.wrapW = Draw::TextureAddressMode::CLAMP_TO_EDGE;
nearestSampler_ = draw_->CreateSamplerState(desc);
}
return nearestSampler_;
}
}
void TextureShaderCache::Decimate() {
for (auto tex = texCache_.begin(); tex != texCache_.end(); ) {
if (tex->second->lastFrame + DEPAL_TEXTURE_OLD_AGE < gpuStats.numFlips) {
tex->second->texture->Release();
delete tex->second;
texCache_.erase(tex++);
} else {
++tex;
}
}
}
Draw2DPipeline *TextureShaderCache::GetDepalettizeShader(uint32_t clutMode, GETextureFormat textureFormat, GEBufferFormat bufferFormat, bool smoothedDepal) {
using namespace Draw;
// Generate an ID for depal shaders.
u32 id = (clutMode & 0xFFFFFF) | (textureFormat << 24) | (bufferFormat << 28);
auto shader = depalCache_.find(id);
if (shader != depalCache_.end()) {
return shader->second;
}
// TODO: Parse these out of clutMode some nice way, to become a bit more stateless.
DepalConfig config;
config.clutFormat = gstate.getClutPaletteFormat();
config.startPos = gstate.getClutIndexStartPos();
config.shift = gstate.getClutIndexShift();
config.mask = gstate.getClutIndexMask();
config.bufferFormat = bufferFormat;
config.textureFormat = textureFormat;
config.smoothedDepal = smoothedDepal;
char *buffer = new char[4096];
Draw2DPipeline *ts = draw2D_->Create2DPipeline([=](ShaderWriter &writer) -> Draw2DPipelineInfo {
GenerateDepalFs(writer, config);
return Draw2DPipelineInfo{
config.bufferFormat == GE_FORMAT_DEPTH16 ? RASTER_DEPTH : RASTER_COLOR,
RASTER_COLOR,
};
});
delete[] buffer;
depalCache_[id] = ts;
return ts->pipeline ? ts : nullptr;
}
std::vector<std::string> TextureShaderCache::DebugGetShaderIDs(DebugShaderType type) {
std::vector<std::string> ids;
for (auto &iter : depalCache_) {
ids.push_back(StringFromFormat("%08x", iter.first));
}
return ids;
}
std::string TextureShaderCache::DebugGetShaderString(std::string idstr, DebugShaderType type, DebugShaderStringType stringType) {
uint32_t id;
sscanf(idstr.c_str(), "%08x", &id);
auto iter = depalCache_.find(id);
if (iter == depalCache_.end())
return "";
switch (stringType) {
case SHADER_STRING_SHORT_DESC:
return idstr;
case SHADER_STRING_SOURCE_CODE:
return iter->second->code;
default:
return "";
}
}
void TextureShaderCache::ApplyShader(Draw2DPipeline *pipeline, float bufferW, float bufferH, int renderW, int renderH, const KnownVertexBounds &bounds, u32 uoff, u32 voff) {
Draw2DVertex verts[4] = {
{-1, -1, 0, 0 },
{ 1, -1, 1, 0 },
{-1, 1, 0, 1 },
{ 1, 1, 1, 1 },
};
// If min is not < max, then we don't have values (wasn't set during decode.)
if (bounds.minV < bounds.maxV) {
const float invWidth = 1.0f / bufferW;
const float invHeight = 1.0f / bufferH;
// Inverse of half = double.
const float invHalfWidth = invWidth * 2.0f;
const float invHalfHeight = invHeight * 2.0f;
const int u1 = bounds.minU + uoff;
const int v1 = bounds.minV + voff;
const int u2 = bounds.maxU + uoff;
const int v2 = bounds.maxV + voff;
const float left = u1 * invHalfWidth - 1.0f;
const float right = u2 * invHalfWidth - 1.0f;
const float top = v1 * invHalfHeight - 1.0f;
const float bottom = v2 * invHalfHeight - 1.0f;
const float uvleft = u1 * invWidth;
const float uvright = u2 * invWidth;
const float uvtop = v1 * invHeight;
const float uvbottom = v2 * invHeight;
// Points are: BL, BR, TR, TL.
verts[0] = Draw2DVertex{ left, bottom, uvleft, uvbottom };
verts[1] = Draw2DVertex{ right, bottom, uvright, uvbottom };
verts[2] = Draw2DVertex{ left, top, uvleft, uvtop };
verts[3] = Draw2DVertex{ right, top, uvright, uvtop };
// We need to reapply the texture next time since we cropped UV.
gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
}
Draw::Viewport vp{ 0.0f, 0.0f, (float)renderW, (float)renderH, 0.0f, 1.0f };
draw_->BindPipeline(pipeline->pipeline);
draw_->SetViewports(1, &vp);
draw_->SetScissorRect(0, 0, renderW, renderH);
draw_->DrawUP((const uint8_t *)verts, 4);
}

View File

@ -0,0 +1,69 @@
// Copyright (c) 2014- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#pragma once
#include <map>
#include <vector>
#include <string>
#include "Common/CommonTypes.h"
#include "Common/GPU/Shader.h"
#include "Common/GPU/thin3d.h"
#include "GPU/ge_constants.h"
#include "GPU/Common/Draw2D.h"
#include "GPU/Common/ShaderCommon.h"
#include "GPU/Common/DepalettizeShaderCommon.h"
class ClutTexture {
public:
Draw::Texture *texture;
int lastFrame;
int rampLength;
};
// For CLUT depal shaders, and other pre-bind texture shaders.
// Caches both shaders and palette textures.
class TextureShaderCache {
public:
TextureShaderCache(Draw::DrawContext *draw, Draw2D *draw2D);
~TextureShaderCache();
Draw2DPipeline *GetDepalettizeShader(uint32_t clutMode, GETextureFormat texFormat, GEBufferFormat pixelFormat, bool smoothedDepal);
ClutTexture GetClutTexture(GEPaletteFormat clutFormat, const u32 clutHash, u32 *rawClut);
Draw::SamplerState *GetSampler(bool linearFilter);
void ApplyShader(Draw2DPipeline *pipeline, float bufferW, float bufferH, int renderW, int renderH, const KnownVertexBounds &bounds, u32 uoff, u32 voff);
void Clear();
void Decimate();
std::vector<std::string> DebugGetShaderIDs(DebugShaderType type);
std::string DebugGetShaderString(std::string id, DebugShaderType type, DebugShaderStringType stringType);
void DeviceLost();
void DeviceRestore(Draw::DrawContext *draw);
private:
Draw::DrawContext *draw_;
Draw::SamplerState *nearestSampler_ = nullptr;
Draw::SamplerState *linearSampler_ = nullptr;
Draw2D *draw2D_;
std::map<u32, Draw2DPipeline *> depalCache_;
std::map<u32, ClutTexture *> texCache_;
};

View File

@ -617,12 +617,13 @@ rotateVBO:
framebufferManager_->GetRenderWidth(), framebufferManager_->GetRenderHeight(),
framebufferManager_->GetTargetBufferWidth(), framebufferManager_->GetTargetBufferHeight(),
vpAndScissor);
UpdateCachedViewportState(vpAndScissor);
}
int maxIndex = indexGen.MaxIndex();
SoftwareTransform swTransform(params);
const Lin::Vec3 trans(gstate_c.vpXOffset, gstate_c.vpYOffset, gstate_c.vpZOffset * 0.5f + 0.5f);
const Lin::Vec3 trans(gstate_c.vpXOffset, -gstate_c.vpYOffset, gstate_c.vpZOffset * 0.5f + 0.5f);
const Lin::Vec3 scale(gstate_c.vpWidthScale, -gstate_c.vpHeightScale, gstate_c.vpDepthScale * 0.5f);
swTransform.SetProjMatrix(gstate.projMatrix, gstate_c.vpWidth < 0, gstate_c.vpHeight < 0, trans, scale);
@ -693,9 +694,6 @@ rotateVBO:
if (gstate.isClearModeAlphaMask()) clearFlag |= Draw::FBChannel::FB_STENCIL_BIT;
if (gstate.isClearModeDepthMask()) clearFlag |= Draw::FBChannel::FB_DEPTH_BIT;
if (clearFlag & Draw::FBChannel::FB_DEPTH_BIT) {
framebufferManager_->SetDepthUpdated();
}
if (clearFlag & Draw::FBChannel::FB_COLOR_BIT) {
framebufferManager_->SetColorUpdated(gstate_c.skipDrawReason);
}

View File

@ -35,7 +35,6 @@
#include "GPU/GeDisasm.h"
#include "GPU/Common/FramebufferManagerCommon.h"
#include "GPU/Debugger/Debugger.h"
#include "GPU/D3D11/ShaderManagerD3D11.h"
#include "GPU/D3D11/GPU_D3D11.h"
#include "GPU/D3D11/FramebufferManagerD3D11.h"
@ -60,7 +59,7 @@ GPU_D3D11::GPU_D3D11(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
shaderManagerD3D11_ = new ShaderManagerD3D11(draw, device_, context_, featureLevel);
framebufferManagerD3D11_ = new FramebufferManagerD3D11(draw);
framebufferManager_ = framebufferManagerD3D11_;
textureCacheD3D11_ = new TextureCacheD3D11(draw);
textureCacheD3D11_ = new TextureCacheD3D11(draw, framebufferManager_->GetDraw2D());
textureCache_ = textureCacheD3D11_;
drawEngineCommon_ = &drawEngine_;
shaderManager_ = shaderManagerD3D11_;
@ -239,13 +238,6 @@ void GPU_D3D11::BeginFrame() {
gstate_c.Dirty(DIRTY_PROJTHROUGHMATRIX);
}
void GPU_D3D11::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) {
// TODO: Some games like Spongebob - Yellow Avenger, never change framebuffer, they blit to it.
// So breaking on frames doesn't work. Might want to move this to sceDisplay vsync.
GPUDebug::NotifyDisplay(framebuf, stride, format);
framebufferManagerD3D11_->SetDisplayFramebuffer(framebuf, stride, format);
}
void GPU_D3D11::CopyDisplayToOutput(bool reallyDirty) {
// Flush anything left over.
drawEngine_.Flush();
@ -332,8 +324,8 @@ std::vector<std::string> GPU_D3D11::DebugGetShaderIDs(DebugShaderType type) {
switch (type) {
case SHADER_TYPE_VERTEXLOADER:
return drawEngine_.DebugGetVertexLoaderIDs();
case SHADER_TYPE_DEPAL:
return textureCache_->GetDepalShaderCache()->DebugGetShaderIDs(type);
case SHADER_TYPE_TEXTURE:
return textureCache_->GetTextureShaderCache()->DebugGetShaderIDs(type);
default:
return shaderManagerD3D11_->DebugGetShaderIDs(type);
}
@ -343,8 +335,8 @@ std::string GPU_D3D11::DebugGetShaderString(std::string id, DebugShaderType type
switch (type) {
case SHADER_TYPE_VERTEXLOADER:
return drawEngine_.DebugGetVertexLoaderString(id, stringType);
case SHADER_TYPE_DEPAL:
return textureCache_->GetDepalShaderCache()->DebugGetShaderString(id, type, stringType);
case SHADER_TYPE_TEXTURE:
return textureCache_->GetTextureShaderCache()->DebugGetShaderString(id, type, stringType);
default:
return shaderManagerD3D11_->DebugGetShaderString(id, type, stringType);
}

View File

@ -23,7 +23,7 @@
#include "GPU/GPUCommon.h"
#include "GPU/D3D11/DrawEngineD3D11.h"
#include "GPU/Common/DepalettizeCommon.h"
#include "GPU/Common/TextureShaderCommon.h"
#include "GPU/Common/VertexDecoderCommon.h"
class FramebufferManagerD3D11;
@ -41,7 +41,6 @@ public:
void ExecuteOp(u32 op, u32 diff) override;
void ReapplyGfxState() override;
void SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) override;
void GetStats(char *buffer, size_t bufsize) override;
void ClearCacheNextFrame() override;
void DeviceLost() override; // Only happens on Android. Drop all textures and shaders.

View File

@ -293,21 +293,11 @@ void DrawEngineD3D11::ApplyDrawState(int prim) {
GenericStencilFuncState stencilState;
ConvertStencilFuncState(stencilState);
if (gstate_c.renderMode == RASTER_MODE_COLOR_TO_DEPTH) {
// Enforce plain depth writing.
keys_.depthStencil.value = 0;
keys_.depthStencil.depthTestEnable = true;
keys_.depthStencil.depthWriteEnable = true;
keys_.depthStencil.stencilTestEnable = false;
keys_.depthStencil.depthCompareOp = D3D11_COMPARISON_ALWAYS;
} else if (gstate.isModeClear()) {
if (gstate.isModeClear()) {
keys_.depthStencil.value = 0;
keys_.depthStencil.depthTestEnable = true;
keys_.depthStencil.depthCompareOp = D3D11_COMPARISON_ALWAYS;
keys_.depthStencil.depthWriteEnable = gstate.isClearModeDepthMask();
if (gstate.isClearModeDepthMask()) {
framebufferManager_->SetDepthUpdated();
}
// Stencil Test
bool alphaMask = gstate.isClearModeAlphaMask();
@ -336,9 +326,6 @@ void DrawEngineD3D11::ApplyDrawState(int prim) {
keys_.depthStencil.depthTestEnable = true;
keys_.depthStencil.depthCompareOp = compareOps[gstate.getDepthTestFunction()];
keys_.depthStencil.depthWriteEnable = gstate.isDepthWriteEnabled();
if (gstate.isDepthWriteEnabled()) {
framebufferManager_->SetDepthUpdated();
}
} else {
keys_.depthStencil.depthTestEnable = false;
keys_.depthStencil.depthWriteEnable = false;
@ -387,15 +374,13 @@ void DrawEngineD3D11::ApplyDrawState(int prim) {
framebufferManager_->GetRenderWidth(), framebufferManager_->GetRenderHeight(),
framebufferManager_->GetTargetBufferWidth(), framebufferManager_->GetTargetBufferHeight(),
vpAndScissor);
UpdateCachedViewportState(vpAndScissor);
float depthMin = vpAndScissor.depthRangeMin;
float depthMax = vpAndScissor.depthRangeMax;
if (depthMin < 0.0f) depthMin = 0.0f;
if (depthMax > 1.0f) depthMax = 1.0f;
if (vpAndScissor.dirtyDepth) {
gstate_c.Dirty(DIRTY_DEPTHRANGE);
}
Draw::Viewport &vp = dynState_.viewport;
vp.TopLeftX = vpAndScissor.viewportX;
@ -405,10 +390,6 @@ void DrawEngineD3D11::ApplyDrawState(int prim) {
vp.MinDepth = depthMin;
vp.MaxDepth = depthMax;
if (vpAndScissor.dirtyProj) {
gstate_c.Dirty(DIRTY_PROJMATRIX);
}
D3D11_RECT &scissor = dynState_.scissor;
scissor.left = vpAndScissor.scissorX;
scissor.top = vpAndScissor.scissorY;

View File

@ -31,7 +31,7 @@
#include "GPU/D3D11/TextureCacheD3D11.h"
#include "GPU/D3D11/FramebufferManagerD3D11.h"
#include "GPU/D3D11/ShaderManagerD3D11.h"
#include "GPU/Common/DepalettizeCommon.h"
#include "GPU/Common/TextureShaderCommon.h"
#include "GPU/D3D11/D3D11Util.h"
#include "GPU/Common/FramebufferManagerCommon.h"
#include "GPU/Common/TextureDecoder.h"
@ -127,8 +127,8 @@ ID3D11SamplerState *SamplerCacheD3D11::GetOrCreateSampler(ID3D11Device *device,
return sampler;
}
TextureCacheD3D11::TextureCacheD3D11(Draw::DrawContext *draw)
: TextureCacheCommon(draw) {
TextureCacheD3D11::TextureCacheD3D11(Draw::DrawContext *draw, Draw2D *draw2D)
: TextureCacheCommon(draw, draw2D) {
device_ = (ID3D11Device *)draw->GetNativeObject(Draw::NativeObject::DEVICE);
context_ = (ID3D11DeviceContext *)draw->GetNativeObject(Draw::NativeObject::CONTEXT);
@ -236,6 +236,11 @@ void TextureCacheD3D11::UpdateCurrentClut(GEPaletteFormat clutFormat, u32 clutBa
}
void TextureCacheD3D11::BindTexture(TexCacheEntry *entry) {
if (!entry) {
ID3D11ShaderResourceView *textureView = nullptr;
context_->PSSetShaderResources(0, 1, &textureView);
return;
}
ID3D11ShaderResourceView *textureView = DxView(entry);
if (textureView != lastBoundTexture) {
context_->PSSetShaderResources(0, 1, &textureView);
@ -460,6 +465,8 @@ bool TextureCacheD3D11::GetCurrentTextureDebug(GPUDebugBuffer &buffer, int level
gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE);
// We may have blitted to a temp FBO.
framebufferManager_->RebindFramebuffer("RebindFramebuffer - GetCurrentTextureDebug");
if (!retval)
ERROR_LOG(G3D, "Failed to get debug texture: copy to memory failed");
return retval;
} else {
return false;

View File

@ -28,7 +28,7 @@
struct VirtualFramebuffer;
class FramebufferManagerD3D11;
class DepalShaderCache;
class TextureShaderCache;
class ShaderManagerD3D11;
class SamplerCacheD3D11 {
@ -43,7 +43,7 @@ private:
class TextureCacheD3D11 : public TextureCacheCommon {
public:
TextureCacheD3D11(Draw::DrawContext *draw);
TextureCacheD3D11(Draw::DrawContext *draw, Draw2D *draw2D);
~TextureCacheD3D11();
void StartFrame() override;

View File

@ -154,6 +154,15 @@ void NotifyDisplay(u32 framebuf, u32 stride, int format) {
}
}
void NotifyBeginFrame() {
if (!active)
return;
if (breakNext == BreakNext::VSYNC) {
// Just start stepping as soon as we can once the vblank finishes.
breakNext = BreakNext::OP;
}
}
int PrimsThisFrame() {
return primsThisFrame;
}

View File

@ -28,6 +28,7 @@ enum class BreakNext {
TEX,
NONTEX,
FRAME,
VSYNC,
PRIM,
CURVE,
COUNT,
@ -43,6 +44,7 @@ void SetBreakCount(int c, bool relative = false);
bool NotifyCommand(u32 pc);
void NotifyDraw();
void NotifyDisplay(u32 framebuf, u32 stride, int format);
void NotifyBeginFrame();
int PrimsThisFrame();
int PrimsLastFrame();

View File

@ -50,6 +50,7 @@ namespace GPURecord {
static bool active = false;
static bool nextFrame = false;
static int flipLastAction = -1;
static int flipFinishAt = -1;
static std::function<void(const Path &)> writeCallback;
static std::vector<u8> pushbuf;
@ -145,6 +146,7 @@ static void BeginRecording() {
lastTextures.clear();
lastRenderTargets.clear();
flipLastAction = gpuStats.numFlips;
flipFinishAt = -1;
u32 ptr = (u32)pushbuf.size();
u32 sz = 512 * 4;
@ -454,7 +456,9 @@ static void EmitTransfer(u32 op) {
static void EmitClut(u32 op) {
u32 addr = gstate.getClutAddress();
u32 bytes = (op & 0x3F) * 32;
// Actually should only be 0x3F, but we allow enhanced CLUTs. See #15727.
u32 blocks = (op & 0x7F) == 0x40 ? 0x40 : (op & 0x3F);
u32 bytes = blocks * 32;
bytes = Memory::ValidSize(addr, bytes);
if (bytes != 0) {
@ -492,6 +496,7 @@ bool Activate() {
if (!nextFrame) {
nextFrame = true;
flipLastAction = gpuStats.numFlips;
flipFinishAt = -1;
return true;
}
return false;
@ -510,6 +515,7 @@ static void FinishRecording() {
NOTICE_LOG(SYSTEM, "Recording finished");
active = false;
flipLastAction = gpuStats.numFlips;
flipFinishAt = -1;
if (writeCallback)
writeCallback(filename);
@ -671,10 +677,10 @@ void NotifyDisplay(u32 framebuf, int stride, int fmt) {
}
}
void NotifyFrame() {
void NotifyBeginFrame() {
const bool noDisplayAction = flipLastAction + 4 < gpuStats.numFlips;
// We do this only to catch things that don't call NotifyFrame.
if (active && HasDrawCommands() && noDisplayAction) {
// We do this only to catch things that don't call NotifyDisplay.
if (active && HasDrawCommands() && (noDisplayAction || gpuStats.numFlips == flipFinishAt)) {
NOTICE_LOG(SYSTEM, "Recording complete on frame");
struct DisplayBufData {
@ -698,6 +704,8 @@ void NotifyFrame() {
if (nextFrame && (gstate_c.skipDrawReason & SKIPDRAW_SKIPFRAME) == 0 && noDisplayAction) {
NOTICE_LOG(SYSTEM, "Recording starting on frame...");
BeginRecording();
// If we began on a BeginFrame, end on a BeginFrame.
flipFinishAt = gpuStats.numFlips + 1;
}
}

View File

@ -36,7 +36,7 @@ void NotifyMemcpy(u32 dest, u32 src, u32 sz);
void NotifyMemset(u32 dest, int v, u32 sz);
void NotifyUpload(u32 dest, u32 sz);
void NotifyDisplay(u32 addr, int stride, int fmt);
void NotifyFrame();
void NotifyBeginFrame();
void NotifyCPU();
};

View File

@ -582,18 +582,17 @@ rotateVBO:
framebufferManager_->GetRenderWidth(), framebufferManager_->GetRenderHeight(),
framebufferManager_->GetTargetBufferWidth(), framebufferManager_->GetTargetBufferHeight(),
vpAndScissor);
UpdateCachedViewportState(vpAndScissor);
}
int maxIndex = indexGen.MaxIndex();
SoftwareTransform swTransform(params);
// Half pixel offset hack.
float xScale = gstate_c.vpWidth < 0 ? -1.0f : 1.0f;
float xOffset = -1.0f / gstate_c.curRTRenderWidth;
float yScale = gstate_c.vpHeight > 0 ? -1.0f : 1.0f;
float yOffset = 1.0f / gstate_c.curRTRenderHeight;
const Lin::Vec3 trans(gstate_c.vpXOffset * xScale + xOffset, gstate_c.vpYOffset * yScale + yOffset, gstate_c.vpZOffset * 0.5f + 0.5f);
const Lin::Vec3 trans(gstate_c.vpXOffset + xOffset, -gstate_c.vpYOffset + yOffset, gstate_c.vpZOffset * 0.5f + 0.5f);
const Lin::Vec3 scale(gstate_c.vpWidthScale, gstate_c.vpHeightScale, gstate_c.vpDepthScale * 0.5f);
swTransform.SetProjMatrix(gstate.projMatrix, gstate_c.vpWidth < 0, gstate_c.vpHeight > 0, trans, scale);
@ -640,9 +639,6 @@ rotateVBO:
if (gstate.isClearModeAlphaMask()) mask |= D3DCLEAR_STENCIL;
if (gstate.isClearModeDepthMask()) mask |= D3DCLEAR_ZBUFFER;
if (mask & D3DCLEAR_ZBUFFER) {
framebufferManager_->SetDepthUpdated();
}
if (mask & D3DCLEAR_TARGET) {
framebufferManager_->SetColorUpdated(gstate_c.skipDrawReason);
}

View File

@ -140,7 +140,7 @@
}
const u32 fb_address = vfb->fb_address & 0x3FFFFFFF;
const int dstBpp = vfb->format == GE_FORMAT_8888 ? 4 : 2;
const int dstBpp = vfb->fb_format == GE_FORMAT_8888 ? 4 : 2;
// We always need to convert from the framebuffer native format.
// Right now that's always 8888.
@ -163,7 +163,7 @@
// TODO: Handle the other formats? We don't currently create them, I think.
const int dstByteOffset = (y * vfb->fb_stride + x) * dstBpp;
// Pixel size always 4 here because we always request BGRA8888.
ConvertFromBGRA8888(Memory::GetPointerWrite(fb_address + dstByteOffset), (u8 *)locked.pBits, vfb->fb_stride, locked.Pitch / 4, w, h, vfb->format);
ConvertFromBGRA8888(Memory::GetPointerWrite(fb_address + dstByteOffset), (u8 *)locked.pBits, vfb->fb_stride, locked.Pitch / 4, w, h, vfb->fb_format);
offscreen->UnlockRect();
} else {
ERROR_LOG_REPORT(G3D, "Unable to lock rect from %08x: %d,%d %dx%d of %dx%d", fb_address, (int)rect.left, (int)rect.top, (int)rect.right, (int)rect.bottom, vfb->renderWidth, vfb->renderHeight);

View File

@ -38,7 +38,6 @@
#include "GPU/GeDisasm.h"
#include "GPU/Common/FramebufferManagerCommon.h"
#include "GPU/Debugger/Debugger.h"
#include "GPU/Directx9/ShaderManagerDX9.h"
#include "GPU/Directx9/GPU_DX9.h"
#include "GPU/Directx9/FramebufferManagerDX9.h"
@ -58,7 +57,7 @@ GPU_DX9::GPU_DX9(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
shaderManagerDX9_ = new ShaderManagerDX9(draw, device_);
framebufferManagerDX9_ = new FramebufferManagerDX9(draw);
framebufferManager_ = framebufferManagerDX9_;
textureCacheDX9_ = new TextureCacheDX9(draw);
textureCacheDX9_ = new TextureCacheDX9(draw, framebufferManager_->GetDraw2D());
textureCache_ = textureCacheDX9_;
drawEngineCommon_ = &drawEngine_;
shaderManager_ = shaderManagerDX9_;
@ -160,6 +159,7 @@ void GPU_DX9::CheckGPUFeatures() {
u32 features = 0;
features |= GPU_SUPPORTS_16BIT_FORMATS;
features |= GPU_SUPPORTS_BLEND_MINMAX;
features |= GPU_SUPPORTS_DEPTH_TEXTURE;
features |= GPU_SUPPORTS_TEXTURE_LOD_CONTROL;
// Accurate depth is required because the Direct3D API does not support inverse Z.
@ -285,11 +285,6 @@ void GPU_DX9::BeginFrame() {
framebufferManager_->BeginFrame();
}
void GPU_DX9::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) {
GPUDebug::NotifyDisplay(framebuf, stride, format);
framebufferManagerDX9_->SetDisplayFramebuffer(framebuf, stride, format);
}
void GPU_DX9::CopyDisplayToOutput(bool reallyDirty) {
dxstate.depthWrite.set(true);
dxstate.colorMask.set(0xF);
@ -374,8 +369,8 @@ std::vector<std::string> GPU_DX9::DebugGetShaderIDs(DebugShaderType type) {
switch (type) {
case SHADER_TYPE_VERTEXLOADER:
return drawEngine_.DebugGetVertexLoaderIDs();
case SHADER_TYPE_DEPAL:
return textureCache_->GetDepalShaderCache()->DebugGetShaderIDs(type);
case SHADER_TYPE_TEXTURE:
return textureCache_->GetTextureShaderCache()->DebugGetShaderIDs(type);
default:
return shaderManagerDX9_->DebugGetShaderIDs(type);
}
@ -385,8 +380,8 @@ std::string GPU_DX9::DebugGetShaderString(std::string id, DebugShaderType type,
switch (type) {
case SHADER_TYPE_VERTEXLOADER:
return drawEngine_.DebugGetVertexLoaderString(id, stringType);
case SHADER_TYPE_DEPAL:
return textureCache_->GetDepalShaderCache()->DebugGetShaderString(id, type, stringType);
case SHADER_TYPE_TEXTURE:
return textureCache_->GetTextureShaderCache()->DebugGetShaderString(id, type, stringType);
default:
return shaderManagerDX9_->DebugGetShaderString(id, type, stringType);
}

View File

@ -23,7 +23,7 @@
#include "GPU/GPUCommon.h"
#include "GPU/Directx9/FramebufferManagerDX9.h"
#include "GPU/Directx9/DrawEngineDX9.h"
#include "GPU/Common/DepalettizeCommon.h"
#include "GPU/Common/TextureShaderCommon.h"
#include "GPU/Common/VertexDecoderCommon.h"
class ShaderManagerDX9;
@ -40,7 +40,6 @@ public:
void ExecuteOp(u32 op, u32 diff) override;
void ReapplyGfxState() override;
void SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) override;
void GetStats(char *buffer, size_t bufsize) override;
void ClearCacheNextFrame() override;
void DeviceLost() override; // Only happens on Android. Drop all textures and shaders.

View File

@ -211,21 +211,11 @@ void DrawEngineDX9::ApplyDrawState(int prim) {
ConvertStencilFuncState(stencilState);
// Set Stencil/Depth
if (gstate_c.renderMode == RASTER_MODE_COLOR_TO_DEPTH) {
// Enforce plain depth writing.
dxstate.depthTest.enable();
dxstate.depthFunc.set(D3DCMP_ALWAYS);
dxstate.depthWrite.set(true);
dxstate.stencilTest.disable();
} else if (gstate.isModeClear()) {
if (gstate.isModeClear()) {
// Depth Test
dxstate.depthTest.enable();
dxstate.depthFunc.set(D3DCMP_ALWAYS);
dxstate.depthWrite.set(gstate.isClearModeDepthMask());
if (gstate.isClearModeDepthMask()) {
framebufferManager_->SetDepthUpdated();
}
// Stencil Test
bool alphaMask = gstate.isClearModeAlphaMask();
@ -246,9 +236,6 @@ void DrawEngineDX9::ApplyDrawState(int prim) {
dxstate.depthTest.enable();
dxstate.depthFunc.set(ztests[gstate.getDepthTestFunction()]);
dxstate.depthWrite.set(gstate.isDepthWriteEnabled());
if (gstate.isDepthWriteEnabled()) {
framebufferManager_->SetDepthUpdated();
}
} else {
dxstate.depthTest.disable();
}
@ -273,6 +260,7 @@ void DrawEngineDX9::ApplyDrawState(int prim) {
framebufferManager_->GetRenderWidth(), framebufferManager_->GetRenderHeight(),
framebufferManager_->GetTargetBufferWidth(), framebufferManager_->GetTargetBufferHeight(),
vpAndScissor);
UpdateCachedViewportState(vpAndScissor);
dxstate.scissorTest.enable();
dxstate.scissorRect.set(vpAndScissor.scissorX, vpAndScissor.scissorY, vpAndScissor.scissorX + vpAndScissor.scissorW, vpAndScissor.scissorY + vpAndScissor.scissorH);
@ -281,12 +269,6 @@ void DrawEngineDX9::ApplyDrawState(int prim) {
float depthMax = vpAndScissor.depthRangeMax;
dxstate.viewport.set(vpAndScissor.viewportX, vpAndScissor.viewportY, vpAndScissor.viewportW, vpAndScissor.viewportH, depthMin, depthMax);
if (vpAndScissor.dirtyProj) {
gstate_c.Dirty(DIRTY_PROJMATRIX);
}
if (vpAndScissor.dirtyDepth) {
gstate_c.Dirty(DIRTY_DEPTHRANGE);
}
}
gstate_c.Clean(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_BLEND_STATE);

View File

@ -28,7 +28,7 @@
#include "GPU/Directx9/FramebufferManagerDX9.h"
#include "GPU/Directx9/ShaderManagerDX9.h"
#include "Common/GPU/D3D9/D3D9StateCache.h"
#include "GPU/Common/DepalettizeCommon.h"
#include "GPU/Common/TextureShaderCommon.h"
#include "GPU/Common/FramebufferManagerCommon.h"
#include "GPU/Common/TextureDecoder.h"
#include "Core/Config.h"
@ -59,8 +59,8 @@ static const D3DVERTEXELEMENT9 g_FramebufferVertexElements[] = {
D3DDECL_END()
};
TextureCacheDX9::TextureCacheDX9(Draw::DrawContext *draw)
: TextureCacheCommon(draw) {
TextureCacheDX9::TextureCacheDX9(Draw::DrawContext *draw, Draw2D *draw2D)
: TextureCacheCommon(draw, draw2D) {
lastBoundTexture = INVALID_TEX;
isBgraBackend_ = true;
@ -204,6 +204,10 @@ void TextureCacheDX9::UpdateCurrentClut(GEPaletteFormat clutFormat, u32 clutBase
}
void TextureCacheDX9::BindTexture(TexCacheEntry *entry) {
if (!entry) {
device_->SetTexture(0, nullptr);
return;
}
LPDIRECT3DBASETEXTURE9 texture = DxTex(entry);
if (texture != lastBoundTexture) {
device_->SetTexture(0, texture);
@ -215,7 +219,7 @@ void TextureCacheDX9::BindTexture(TexCacheEntry *entry) {
}
void TextureCacheDX9::Unbind() {
device_->SetTexture(0, NULL);
device_->SetTexture(0, nullptr);
InvalidateLastTexture();
}

View File

@ -24,14 +24,14 @@
#include "GPU/Common/TextureCacheCommon.h"
struct VirtualFramebuffer;
class DepalShaderCache;
class TextureShaderCache;
class FramebufferManagerDX9;
class ShaderManagerDX9;
class TextureCacheDX9 : public TextureCacheCommon {
public:
TextureCacheDX9(Draw::DrawContext *draw);
TextureCacheDX9(Draw::DrawContext *draw, Draw2D *draw2D);
~TextureCacheDX9();
void StartFrame() override;

View File

@ -364,6 +364,7 @@ void DrawEngineGLES::DoFlush() {
framebufferManager_->GetRenderWidth(), framebufferManager_->GetRenderHeight(),
framebufferManager_->GetTargetBufferWidth(), framebufferManager_->GetTargetBufferHeight(),
vpAndScissor);
UpdateCachedViewportState(vpAndScissor);
}
int maxIndex = indexGen.MaxIndex();
@ -423,9 +424,6 @@ void DrawEngineGLES::DoFlush() {
bool colorMask = gstate.isClearModeColorMask();
bool alphaMask = gstate.isClearModeAlphaMask();
bool depthMask = gstate.isClearModeDepthMask();
if (depthMask) {
framebufferManager_->SetDepthUpdated();
}
GLbitfield target = 0;
// Without this, we will clear RGB when clearing stencil, which breaks games.

View File

@ -36,7 +36,6 @@
#include "GPU/ge_constants.h"
#include "GPU/GeDisasm.h"
#include "GPU/Common/FramebufferManagerCommon.h"
#include "GPU/Debugger/Debugger.h"
#include "GPU/GLES/ShaderManagerGLES.h"
#include "GPU/GLES/GPU_GLES.h"
#include "GPU/GLES/FramebufferManagerGLES.h"
@ -60,7 +59,7 @@ GPU_GLES::GPU_GLES(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
shaderManagerGL_ = new ShaderManagerGLES(draw);
framebufferManagerGL_ = new FramebufferManagerGLES(draw);
framebufferManager_ = framebufferManagerGL_;
textureCacheGL_ = new TextureCacheGLES(draw);
textureCacheGL_ = new TextureCacheGLES(draw, framebufferManager_->GetDraw2D());
textureCache_ = textureCacheGL_;
drawEngineCommon_ = &drawEngine_;
shaderManager_ = shaderManagerGL_;
@ -360,11 +359,6 @@ void GPU_GLES::BeginFrame() {
framebufferManagerGL_->BeginFrame();
}
void GPU_GLES::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) {
GPUDebug::NotifyDisplay(framebuf, stride, format);
framebufferManagerGL_->SetDisplayFramebuffer(framebuf, stride, format);
}
void GPU_GLES::CopyDisplayToOutput(bool reallyDirty) {
// Flush anything left over.
framebufferManagerGL_->RebindFramebuffer("RebindFramebuffer - CopyDisplayToOutput");
@ -453,8 +447,8 @@ std::vector<std::string> GPU_GLES::DebugGetShaderIDs(DebugShaderType type) {
switch (type) {
case SHADER_TYPE_VERTEXLOADER:
return drawEngine_.DebugGetVertexLoaderIDs();
case SHADER_TYPE_DEPAL:
return textureCache_->GetDepalShaderCache()->DebugGetShaderIDs(type);
case SHADER_TYPE_TEXTURE:
return textureCache_->GetTextureShaderCache()->DebugGetShaderIDs(type);
default:
return shaderManagerGL_->DebugGetShaderIDs(type);
}
@ -464,8 +458,8 @@ std::string GPU_GLES::DebugGetShaderString(std::string id, DebugShaderType type,
switch (type) {
case SHADER_TYPE_VERTEXLOADER:
return drawEngine_.DebugGetVertexLoaderString(id, stringType);
case SHADER_TYPE_DEPAL:
return textureCache_->GetDepalShaderCache()->DebugGetShaderString(id, type, stringType);
case SHADER_TYPE_TEXTURE:
return textureCache_->GetTextureShaderCache()->DebugGetShaderString(id, type, stringType);
default:
return shaderManagerGL_->DebugGetShaderString(id, type, stringType);
}

View File

@ -23,7 +23,7 @@
#include "Common/File/Path.h"
#include "GPU/GPUCommon.h"
#include "GPU/Common/DepalettizeCommon.h"
#include "GPU/Common/TextureShaderCommon.h"
#include "GPU/GLES/FramebufferManagerGLES.h"
#include "GPU/GLES/DrawEngineGLES.h"
#include "GPU/GLES/FragmentTestCacheGLES.h"
@ -47,7 +47,6 @@ public:
void ExecuteOp(u32 op, u32 diff) override;
void ReapplyGfxState() override;
void SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) override;
void GetStats(char *buffer, size_t bufsize) override;
void ClearCacheNextFrame() override;

View File

@ -251,24 +251,14 @@ void DrawEngineGLES::ApplyDrawState(int prim) {
GenericStencilFuncState stencilState;
ConvertStencilFuncState(stencilState);
if (gstate_c.renderMode == RASTER_MODE_COLOR_TO_DEPTH) {
// Enforce plain depth writing.
renderManager->SetStencilDisabled();
renderManager->SetDepth(true, true, GL_ALWAYS);
} else if (gstate.isModeClear()) {
if (gstate.isModeClear()) {
// Depth Test
if (gstate.isClearModeDepthMask()) {
framebufferManager_->SetDepthUpdated();
}
renderManager->SetStencilFunc(gstate.isClearModeAlphaMask(), GL_ALWAYS, 0xFF, 0xFF);
renderManager->SetStencilOp(stencilState.writeMask, GL_REPLACE, GL_REPLACE, GL_REPLACE);
renderManager->SetDepth(true, gstate.isClearModeDepthMask() ? true : false, GL_ALWAYS);
} else {
// Depth Test
renderManager->SetDepth(gstate.isDepthTestEnabled(), gstate.isDepthWriteEnabled(), compareOps[gstate.getDepthTestFunction()]);
if (gstate.isDepthTestEnabled() && gstate.isDepthWriteEnabled()) {
framebufferManager_->SetDepthUpdated();
}
// Stencil Test
if (stencilState.enabled) {
@ -286,19 +276,13 @@ void DrawEngineGLES::ApplyDrawState(int prim) {
framebufferManager_->GetRenderWidth(), framebufferManager_->GetRenderHeight(),
framebufferManager_->GetTargetBufferWidth(), framebufferManager_->GetTargetBufferHeight(),
vpAndScissor);
UpdateCachedViewportState(vpAndScissor);
renderManager->SetScissor(GLRect2D{ vpAndScissor.scissorX, vpAndScissor.scissorY, vpAndScissor.scissorW, vpAndScissor.scissorH });
renderManager->SetViewport({
vpAndScissor.viewportX, vpAndScissor.viewportY,
vpAndScissor.viewportW, vpAndScissor.viewportH,
vpAndScissor.depthRangeMin, vpAndScissor.depthRangeMax });
if (vpAndScissor.dirtyProj) {
gstate_c.Dirty(DIRTY_PROJMATRIX);
}
if (vpAndScissor.dirtyDepth) {
gstate_c.Dirty(DIRTY_DEPTHRANGE);
}
}
}

View File

@ -36,7 +36,7 @@
#include "GPU/GLES/TextureCacheGLES.h"
#include "GPU/GLES/FramebufferManagerGLES.h"
#include "GPU/Common/FragmentShaderGenerator.h"
#include "GPU/Common/DepalettizeCommon.h"
#include "GPU/Common/TextureShaderCommon.h"
#include "GPU/GLES/ShaderManagerGLES.h"
#include "GPU/GLES/DrawEngineGLES.h"
#include "GPU/Common/TextureDecoder.h"
@ -45,8 +45,8 @@
#include <emmintrin.h>
#endif
TextureCacheGLES::TextureCacheGLES(Draw::DrawContext *draw)
: TextureCacheCommon(draw) {
TextureCacheGLES::TextureCacheGLES(Draw::DrawContext *draw, Draw2D *draw2D)
: TextureCacheCommon(draw, draw2D) {
render_ = (GLRenderManager *)draw_->GetNativeObject(Draw::NativeObject::RENDER_MANAGER);
nextTexture_ = nullptr;
@ -225,7 +225,7 @@ void TextureCacheGLES::BindTexture(TexCacheEntry *entry) {
int maxLevel = (entry->status & TexCacheEntry::STATUS_NO_MIPS) ? 0 : entry->maxLevel;
SamplerCacheKey samplerKey = GetSamplingParams(maxLevel, entry);
ApplySamplingParams(samplerKey);
gstate_c.SetUseShaderDepal(false);
gstate_c.SetUseShaderDepal(false, false);
}
void TextureCacheGLES::Unbind() {
@ -382,19 +382,8 @@ Draw::DataFormat TextureCacheGLES::GetDestFormat(GETextureFormat format, GEPalet
}
bool TextureCacheGLES::GetCurrentTextureDebug(GPUDebugBuffer &buffer, int level) {
GPUgstate saved;
if (level != 0) {
saved = gstate;
// The way we set textures is a bit complex. Let's just override level 0.
gstate.texsize[0] = gstate.texsize[level];
gstate.texaddr[0] = gstate.texaddr[level];
gstate.texbufwidth[0] = gstate.texbufwidth[level];
}
InvalidateLastTexture();
SetTexture();
if (!nextTexture_) {
if (nextFramebufferTexture_) {
VirtualFramebuffer *vfb = nextFramebufferTexture_;
@ -427,10 +416,6 @@ bool TextureCacheGLES::GetCurrentTextureDebug(GPUDebugBuffer &buffer, int level)
int w = gstate.getTextureWidth(level);
int h = gstate.getTextureHeight(level);
if (level != 0) {
gstate = saved;
}
bool result = entry->textureName != nullptr;
if (result) {
buffer.Allocate(w, h, GE_FORMAT_8888, false);
@ -445,7 +430,7 @@ bool TextureCacheGLES::GetCurrentTextureDebug(GPUDebugBuffer &buffer, int level)
}
void TextureCacheGLES::DeviceLost() {
depalShaderCache_->DeviceLost();
textureShaderCache_->DeviceLost();
Clear(false);
draw_ = nullptr;
render_ = nullptr;
@ -454,5 +439,5 @@ void TextureCacheGLES::DeviceLost() {
void TextureCacheGLES::DeviceRestore(Draw::DrawContext *draw) {
draw_ = draw;
render_ = (GLRenderManager *)draw_->GetNativeObject(Draw::NativeObject::RENDER_MANAGER);
depalShaderCache_->DeviceRestore(draw);
textureShaderCache_->DeviceRestore(draw);
}

View File

@ -27,22 +27,22 @@
struct VirtualFramebuffer;
class FramebufferManagerGLES;
class DepalShaderCache;
class TextureShaderCache;
class ShaderManagerGLES;
class DrawEngineGLES;
class GLRTexture;
class TextureCacheGLES : public TextureCacheCommon {
public:
TextureCacheGLES(Draw::DrawContext *draw);
TextureCacheGLES(Draw::DrawContext *draw, Draw2D *draw2D);
~TextureCacheGLES();
void Clear(bool delete_them) override;
void StartFrame() override;
void SetFramebufferManager(FramebufferManagerGLES *fbManager);
void SetDepalShaderCache(DepalShaderCache *dpCache) {
depalShaderCache_ = dpCache;
void SetDepalShaderCache(TextureShaderCache *dpCache) {
textureShaderCache_ = dpCache;
}
void SetDrawEngine(DrawEngineGLES *td) {
drawEngine_ = td;

View File

@ -24,11 +24,6 @@ class GPUInterface;
class GPUDebugInterface;
class GraphicsContext;
enum RasterMode {
RASTER_MODE_NORMAL = 0,
RASTER_MODE_COLOR_TO_DEPTH = 1,
};
// PSP rasterization has two outputs, color and depth. Stencil is packed
// into the alpha channel of color (if exists), so possibly RASTER_COLOR
// should be named RASTER_COLOR_STENCIL but it gets kinda hard to read.
@ -89,6 +84,8 @@ struct GPUStatistics {
numUploads = 0;
numClears = 0;
numDepthCopies = 0;
numReinterpretCopies = 0;
numColorCopies = 0;
msProcessingDisplayLists = 0;
vertexGPUCycles = 0;
otherGPUCycles = 0;
@ -115,6 +112,8 @@ struct GPUStatistics {
int numUploads;
int numClears;
int numDepthCopies;
int numReinterpretCopies;
int numColorCopies;
double msProcessingDisplayLists;
int vertexGPUCycles;
int otherGPUCycles;

View File

@ -338,7 +338,7 @@
</ItemDefinitionGroup>
<ItemGroup>
<ClInclude Include="..\ext\xbrz\xbrz.h" />
<ClInclude Include="Common\DepalettizeCommon.h" />
<ClInclude Include="Common\TextureShaderCommon.h" />
<ClInclude Include="Common\Draw2D.h" />
<ClInclude Include="Common\ReinterpretFramebuffer.h" />
<ClInclude Include="Common\DepalettizeShaderCommon.h" />
@ -452,7 +452,7 @@
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\ext\xbrz\xbrz.cpp" />
<ClCompile Include="Common\DepalettizeCommon.cpp" />
<ClCompile Include="Common\TextureShaderCommon.cpp" />
<ClCompile Include="Common\Draw2D.cpp" />
<ClCompile Include="Common\ReinterpretFramebuffer.cpp" />
<ClCompile Include="Common\DepalettizeShaderCommon.cpp" />

View File

@ -255,7 +255,7 @@
<ClInclude Include="Common\Draw2D.h">
<Filter>Common</Filter>
</ClInclude>
<ClInclude Include="Common\DepalettizeCommon.h">
<ClInclude Include="Common\TextureShaderCommon.h">
<Filter>Common</Filter>
</ClInclude>
</ItemGroup>
@ -503,7 +503,7 @@
<ClCompile Include="Common\Draw2D.cpp">
<Filter>Common</Filter>
</ClCompile>
<ClCompile Include="Common\DepalettizeCommon.cpp">
<ClCompile Include="Common\TextureShaderCommon.cpp">
<Filter>Common</Filter>
</ClCompile>
</ItemGroup>

View File

@ -1109,7 +1109,8 @@ void GPUCommon::BeginFrame() {
} else if (dumpThisFrame_) {
dumpThisFrame_ = false;
}
GPURecord::NotifyFrame();
GPUDebug::NotifyBeginFrame();
GPURecord::NotifyBeginFrame();
}
void GPUCommon::SlowRunLoop(DisplayList &list)
@ -1624,6 +1625,21 @@ void GPUCommon::Execute_VertexTypeSkinning(u32 op, u32 diff) {
gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_CULLRANGE);
}
void GPUCommon::CheckDepthUsage(VirtualFramebuffer *vfb) {
if (!gstate_c.usingDepth) {
bool isClearingDepth = gstate.isModeClear() && gstate.isClearModeDepthMask();
if ((gstate.isDepthTestEnabled() || isClearingDepth)) {
gstate_c.usingDepth = true;
gstate_c.clearingDepth = isClearingDepth;
vfb->last_frame_depth_render = gpuStats.numFlips;
if (isClearingDepth || gstate.isDepthWriteEnabled()) {
vfb->last_frame_depth_updated = gpuStats.numFlips;
}
framebufferManager_->SetDepthFrameBuffer(isClearingDepth);
}
}
}
void GPUCommon::Execute_Prim(u32 op, u32 diff) {
// This drives all drawing. All other state we just buffer up, then we apply it only
@ -1685,6 +1701,8 @@ void GPUCommon::Execute_Prim(u32 op, u32 diff) {
return;
}
CheckDepthUsage(vfb);
const void *verts = Memory::GetPointerUnchecked(gstate_c.vertexAddr);
const void *inds = nullptr;
u32 vertexType = gstate.vertType;
@ -1883,12 +1901,14 @@ void GPUCommon::Execute_Bezier(u32 op, u32 diff) {
gstate_c.framebufFormat = gstate.FrameBufFormat();
// This also make skipping drawing very effective.
framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason);
VirtualFramebuffer *vfb = framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason);
if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
// TODO: Should this eat some cycles? Probably yes. Not sure if important.
return;
}
CheckDepthUsage(vfb);
if (!Memory::IsValidAddress(gstate_c.vertexAddr)) {
ERROR_LOG_REPORT(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr);
return;
@ -1953,12 +1973,14 @@ void GPUCommon::Execute_Spline(u32 op, u32 diff) {
gstate_c.framebufFormat = gstate.FrameBufFormat();
// This also make skipping drawing very effective.
framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason);
VirtualFramebuffer *vfb = framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason);
if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
// TODO: Should this eat some cycles? Probably yes. Not sure if important.
return;
}
CheckDepthUsage(vfb);
if (!Memory::IsValidAddress(gstate_c.vertexAddr)) {
ERROR_LOG_REPORT(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr);
return;
@ -2686,7 +2708,8 @@ void GPUCommon::ResetListState(int listID, DisplayListState state) {
GPUDebugOp GPUCommon::DissassembleOp(u32 pc, u32 op) {
char buffer[1024];
GeDisassembleOp(pc, op, Memory::Read_U32(pc - 4), buffer, sizeof(buffer));
u32 prev = Memory::IsValidAddress(pc - 4) ? Memory::ReadUnchecked_U32(pc - 4) : 0;
GeDisassembleOp(pc, op, prev, buffer, sizeof(buffer));
GPUDebugOp info;
info.pc = pc;
@ -2744,6 +2767,10 @@ void GPUCommon::SetCmdValue(u32 op) {
downcount = 0;
}
void GPUCommon::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) {
framebufferManager_->SetDisplayFramebuffer(framebuf, stride, format);
}
void GPUCommon::DoBlockTransfer(u32 skipDrawReason) {
// TODO: This is used a lot to copy data around between render targets and textures,
// and also to quickly load textures from RAM to VRAM. So we should do checks like the following:
@ -3041,7 +3068,8 @@ size_t GPUCommon::FormatGPUStatsCommon(char *buffer, size_t size) {
"Vertices: %d cached: %d uncached: %d\n"
"FBOs active: %d (evaluations: %d)\n"
"Textures: %d, dec: %d, invalidated: %d, hashed: %d kB\n"
"Readbacks: %d, uploads: %d, depth copies: %d\n"
"Readbacks: %d, uploads: %d\n"
"Copies: depth %d, color %d, reinterpret: %d\n"
"GPU cycles executed: %d (%f per vertex)\n",
gpuStats.msProcessingDisplayLists * 1000.0f,
gpuStats.numDrawCalls,
@ -3062,6 +3090,8 @@ size_t GPUCommon::FormatGPUStatsCommon(char *buffer, size_t size) {
gpuStats.numReadbacks,
gpuStats.numUploads,
gpuStats.numDepthCopies,
gpuStats.numColorCopies,
gpuStats.numReinterpretCopies,
gpuStats.vertexGPUCycles + gpuStats.otherGPUCycles,
vertexAverageCycles
);

View File

@ -15,6 +15,8 @@ class FramebufferManagerCommon;
class TextureCacheCommon;
class DrawEngineCommon;
class GraphicsContext;
struct VirtualFramebuffer;
namespace Draw {
class DrawContext;
}
@ -115,6 +117,7 @@ public:
u32 Break(int mode) override;
void ReapplyGfxState() override;
void SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) override;
void CopyDisplayToOutput(bool reallyDirty) override = 0;
void InitClear() override = 0;
bool PerformMemoryCopy(u32 dest, u32 src, int size) override;
@ -282,17 +285,11 @@ protected:
void SlowRunLoop(DisplayList &list);
void UpdatePC(u32 currentPC, u32 newPC);
void UpdateState(GPURunState state);
void PopDLQueue();
void CheckDrawSync();
int GetNextListIndex();
virtual void FastLoadBoneMatrix(u32 target);
void FastLoadBoneMatrix(u32 target);
// TODO: Unify this.
virtual void FinishDeferred() {}
void DoBlockTransfer(u32 skipDrawReason);
void DoExecuteCall(u32 target);
void AdvanceVerts(u32 vertType, int count, int bytesRead) {
if ((vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
int indexShift = ((vertType & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT) - 1;
@ -362,6 +359,13 @@ protected:
private:
void FlushImm();
void CheckDepthUsage(VirtualFramebuffer *vfb);
void DoBlockTransfer(u32 skipDrawReason);
void DoExecuteCall(u32 target);
void PopDLQueue();
void CheckDrawSync();
int GetNextListIndex();
// Debug stats.
double timeSteppingStarted_;
double timeSpentStepping_;

View File

@ -300,8 +300,14 @@ struct GPUgstate {
bool isTextureFormatIndexed() const { return (texformat & 4) != 0; } // GE_TFMT_CLUT4 - GE_TFMT_CLUT32 are 0b1xx.
int getTextureEnvColRGB() const { return texenvcolor & 0x00FFFFFF; }
u32 getClutAddress() const { return (clutaddr & 0x00FFFFF0) | ((clutaddrupper << 8) & 0x0F000000); }
int getClutLoadBytes() const { return (loadclut & 0x7F) * 32; }
int getClutLoadBlocks() const { return (loadclut & 0x7F); }
int getClutLoadBytes() const { return getClutLoadBlocks() * 32; }
int getClutLoadBlocks() const {
// The PSP only supports 0x3F, but Misshitsu no Sacrifice has extra color data (see #15727.)
// 0x40 would be 0, which would be a no-op, so we allow it.
if ((loadclut & 0x7F) == 0x40)
return 0x40;
return loadclut & 0x3F;
}
GEPaletteFormat getClutPaletteFormat() const { return static_cast<GEPaletteFormat>(clutformat & 3); }
int getClutIndexShift() const { return (clutformat >> 2) & 0x1F; }
int getClutIndexMask() const { return (clutformat >> 8) & 0xFF; }
@ -523,9 +529,10 @@ struct GPUStateCache {
bool IsDirty(u64 what) const {
return (dirty & what) != 0ULL;
}
void SetUseShaderDepal(bool depal) {
void SetUseShaderDepal(bool depal, bool smoothed) {
if (depal != useShaderDepal) {
useShaderDepal = depal;
useSmoothedShaderDepal = smoothed;
Dirty(DIRTY_FRAGMENTSHADER_STATE);
}
}
@ -555,14 +562,6 @@ struct GPUStateCache {
Dirty(DIRTY_FRAGMENTSHADER_STATE | (is3D ? DIRTY_MIPBIAS : 0));
}
}
void SetFramebufferRenderMode(RasterMode mode) {
if (mode != renderMode) {
// This mode modifies the fragment shader to write depth, the depth state to write without testing, and the blend state to write nothing to color.
// So we need to re-evaluate those states.
Dirty(DIRTY_FRAGMENTSHADER_STATE | DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_TEXTURE_PARAMS);
renderMode = mode;
}
}
u32 featureFlags;
@ -572,6 +571,9 @@ struct GPUStateCache {
uint64_t dirty;
bool usingDepth; // For deferred depth copies.
bool clearingDepth;
bool textureFullAlpha;
bool vertexFullAlpha;
@ -613,9 +615,6 @@ struct GPUStateCache {
// We detect this case and go into a special drawing mode.
bool blueToAlpha;
// Some games try to write to the Z buffer using color. Catch that and actually do the writes to the Z buffer instead.
RasterMode renderMode;
// TODO: These should be accessed from the current VFB object directly.
u32 curRTWidth;
u32 curRTHeight;
@ -637,6 +636,7 @@ struct GPUStateCache {
int spline_num_points_u;
bool useShaderDepal;
bool useSmoothedShaderDepal;
GEBufferFormat depalFramebufferFormat;
u32 getRelativeAddress(u32 data) const;

View File

@ -6,6 +6,7 @@ const char *GeBufferFormatToString(GEBufferFormat fmt) {
case GE_FORMAT_5551: return "5551";
case GE_FORMAT_565: return "565";
case GE_FORMAT_8888: return "8888";
case GE_FORMAT_DEPTH16: return "DEPTH16";
default: return "N/A";
}
}

View File

@ -210,6 +210,9 @@ void BinManager::UpdateState() {
}
if (HasDirty(SoftDirty::BINNER_OVERLAP)) {
// This is a good place to record any dependencies for block transfer overlap.
MarkPendingReads(state);
// Disallow threads when rendering to the target, even offset.
bool selfRender = HasTextureWrite(state);
int newMaxTasks = selfRender ? 1 : g_threadManager.GetNumLooperThreads();
@ -251,6 +254,34 @@ bool BinManager::HasTextureWrite(const RasterizerState &state) {
return false;
}
void BinManager::MarkPendingReads(const Rasterizer::RasterizerState &state) {
if (!state.enableTextures)
return;
const uint8_t textureBits = textureBitsPerPixel[state.samplerID.texfmt];
for (int i = 0; i <= state.maxTexLevel; ++i) {
uint32_t byteStride = (state.texbufw[i] * textureBits) / 8;
uint32_t byteWidth = (state.samplerID.cached.sizes[i].w * textureBits) / 8;
uint32_t h = state.samplerID.cached.sizes[i].h;
auto it = pendingReads_.find(state.texaddr[i]);
if (it != pendingReads_.end()) {
uint32_t total = byteStride * (h - 1) + byteWidth;
uint32_t existing = it->second.strideBytes * (it->second.height - 1) + it->second.widthBytes;
if (existing < total) {
it->second.strideBytes = std::max(it->second.strideBytes, byteStride);
it->second.widthBytes = std::max(it->second.widthBytes, byteWidth);
it->second.height = std::max(it->second.height, h);
}
} else {
auto &range = pendingReads_[state.texaddr[i]];
range.base = state.texaddr[i];
range.strideBytes = byteStride;
range.widthBytes = byteWidth;
range.height = h;
}
}
}
inline void BinDirtyRange::Expand(uint32_t newBase, uint32_t bpp, uint32_t stride, DrawingCoords &tl, DrawingCoords &br) {
const uint32_t w = br.x - tl.x + 1;
const uint32_t h = br.y - tl.y + 1;
@ -465,9 +496,10 @@ void BinManager::Flush(const char *reason) {
for (auto &pending : pendingWrites_)
pending.base = 0;
pendingOverlap_ = false;
pendingReads_.clear();
// We'll need to set the pending writes again, since we just flushed it.
dirty_ |= SoftDirty::BINNER_RANGE;
// We'll need to set the pending writes and reads again, since we just flushed it.
dirty_ |= SoftDirty::BINNER_RANGE | SoftDirty::BINNER_OVERLAP;
if (coreCollectDebugStats) {
double et = time_now_d();
@ -486,7 +518,7 @@ bool BinManager::HasPendingWrite(uint32_t start, uint32_t stride, uint32_t w, ui
// Ignore mirrors for overlap detection.
start &= 0x0FFFFFFF & ~0x00600000;
uint32_t size = stride * h;
uint32_t size = stride * (h - 1) + w;
for (const auto &range : pendingWrites_) {
if (range.base == 0 || range.strideBytes == 0)
continue;
@ -512,6 +544,28 @@ bool BinManager::HasPendingWrite(uint32_t start, uint32_t stride, uint32_t w, ui
return false;
}
bool BinManager::HasPendingRead(uint32_t start, uint32_t stride, uint32_t w, uint32_t h) {
if (Memory::IsVRAMAddress(start)) {
// Ignore VRAM mirrors.
start &= 0x0FFFFFFF & ~0x00600000;
} else {
// Ignore only regular RAM mirrors.
start &= 0x3FFFFFFF;
}
uint32_t size = stride * (h - 1) + w;
for (const auto &pair : pendingReads_) {
const auto &range = pair.second;
if (start >= range.base + range.height * range.strideBytes || start + size <= range.base)
continue;
// Stride gaps are uncommon with reads, so don't bother.
return true;
}
return false;
}
void BinManager::GetStats(char *buffer, size_t bufsize) {
double allTotal = 0.0;
double slowestTotalTime = 0.0;

View File

@ -198,6 +198,8 @@ public:
void Drain();
void Flush(const char *reason);
bool HasPendingWrite(uint32_t start, uint32_t stride, uint32_t w, uint32_t h);
// Assumes you've also checked for a write (writes are partial so are automatically reads.)
bool HasPendingRead(uint32_t start, uint32_t stride, uint32_t w, uint32_t h);
void GetStats(char *buffer, size_t bufsize);
void ResetStats();
@ -252,6 +254,8 @@ private:
BinWaitable *waitable_ = nullptr;
BinDirtyRange pendingWrites_[2]{};
std::unordered_map<uint32_t, BinDirtyRange> pendingReads_;
bool pendingOverlap_ = false;
std::unordered_map<const char *, double> flushReasonTimes_;
@ -262,6 +266,7 @@ private:
int enqueues_ = 0;
int mostThreads_ = 0;
void MarkPendingReads(const Rasterizer::RasterizerState &state);
bool HasTextureWrite(const Rasterizer::RasterizerState &state);
BinCoords Scissor(BinCoords range);
BinCoords Range(const VertexData &v0, const VertexData &v1, const VertexData &v2);

View File

@ -1408,7 +1408,7 @@ bool GetCurrentTexture(GPUDebugBuffer &buffer, int level)
SamplerID id;
ComputeSamplerID(&id);
id.cached.clut = (const u8 *)clut;
id.cached.clut = clut;
Sampler::FetchFunc sampler = Sampler::GetFetchFunc(id);

View File

@ -52,7 +52,7 @@
const int FB_WIDTH = 480;
const int FB_HEIGHT = 272;
u32 clut[4096];
uint8_t clut[1024];
FormatBuffer fb;
FormatBuffer depthbuf;
@ -201,14 +201,14 @@ const SoftwareCommandTableEntry softgpuCommandTable[] = {
{ GE_CMD_CLUTFORMAT, 0, SoftDirty::SAMPLER_BASIC },
// Morph weights. TODO: Remove precomputation?
{ GE_CMD_MORPHWEIGHT0, 0, SoftDirty::NONE, &GPUCommon::Execute_MorphWeight },
{ GE_CMD_MORPHWEIGHT1, 0, SoftDirty::NONE, &GPUCommon::Execute_MorphWeight },
{ GE_CMD_MORPHWEIGHT2, 0, SoftDirty::NONE, &GPUCommon::Execute_MorphWeight },
{ GE_CMD_MORPHWEIGHT3, 0, SoftDirty::NONE, &GPUCommon::Execute_MorphWeight },
{ GE_CMD_MORPHWEIGHT4, 0, SoftDirty::NONE, &GPUCommon::Execute_MorphWeight },
{ GE_CMD_MORPHWEIGHT5, 0, SoftDirty::NONE, &GPUCommon::Execute_MorphWeight },
{ GE_CMD_MORPHWEIGHT6, 0, SoftDirty::NONE, &GPUCommon::Execute_MorphWeight },
{ GE_CMD_MORPHWEIGHT7, 0, SoftDirty::NONE, &GPUCommon::Execute_MorphWeight },
{ GE_CMD_MORPHWEIGHT0, FLAG_EXECUTEONCHANGE, SoftDirty::NONE, &GPUCommon::Execute_MorphWeight },
{ GE_CMD_MORPHWEIGHT1, FLAG_EXECUTEONCHANGE, SoftDirty::NONE, &GPUCommon::Execute_MorphWeight },
{ GE_CMD_MORPHWEIGHT2, FLAG_EXECUTEONCHANGE, SoftDirty::NONE, &GPUCommon::Execute_MorphWeight },
{ GE_CMD_MORPHWEIGHT3, FLAG_EXECUTEONCHANGE, SoftDirty::NONE, &GPUCommon::Execute_MorphWeight },
{ GE_CMD_MORPHWEIGHT4, FLAG_EXECUTEONCHANGE, SoftDirty::NONE, &GPUCommon::Execute_MorphWeight },
{ GE_CMD_MORPHWEIGHT5, FLAG_EXECUTEONCHANGE, SoftDirty::NONE, &GPUCommon::Execute_MorphWeight },
{ GE_CMD_MORPHWEIGHT6, FLAG_EXECUTEONCHANGE, SoftDirty::NONE, &GPUCommon::Execute_MorphWeight },
{ GE_CMD_MORPHWEIGHT7, FLAG_EXECUTEONCHANGE, SoftDirty::NONE, &GPUCommon::Execute_MorphWeight },
// No state of flushing required for patch parameters, currently.
{ GE_CMD_PATCHDIVISION },
@ -787,8 +787,8 @@ void SoftGPU::Execute_BlockTransferStart(u32 op, u32 diff) {
const uint32_t dstSize = height * dstStride * bpp;
// Need to flush both source and target, so we overwrite properly.
drawEngine_->transformUnit.FlushIfOverlap("blockxfer", src, srcStride, width * bpp, height);
drawEngine_->transformUnit.FlushIfOverlap("blockxfer", dst, dstStride, width * bpp, height);
drawEngine_->transformUnit.FlushIfOverlap("blockxfer", false, src, srcStride, width * bpp, height);
drawEngine_->transformUnit.FlushIfOverlap("blockxfer", true, dst, dstStride, width * bpp, height);
DEBUG_LOG(G3D, "Block transfer: %08x/%x -> %08x/%x, %ix%ix%i (%i,%i)->(%i,%i)", srcBasePtr, srcStride, dstBasePtr, dstStride, width, height, bpp, srcX, srcY, dstX, dstY);
@ -971,10 +971,13 @@ void SoftGPU::Execute_Spline(u32 op, u32 diff) {
void SoftGPU::Execute_LoadClut(u32 op, u32 diff) {
u32 clutAddr = gstate.getClutAddress();
u32 clutTotalBytes = gstate.getClutLoadBytes();
// Avoid the hack in getClutLoadBytes() to inaccurately allow more palette data.
u32 clutTotalBytes = (gstate.getClutLoadBlocks() & 0x3F) * 32;
if (clutTotalBytes > 1024)
clutTotalBytes = 1024;
// Might be copying drawing into the CLUT, so flush.
drawEngine_->transformUnit.FlushIfOverlap("loadclut", clutAddr, clutTotalBytes, clutTotalBytes, 1);
drawEngine_->transformUnit.FlushIfOverlap("loadclut", false, clutAddr, clutTotalBytes, clutTotalBytes, 1);
bool changed = false;
if (Memory::IsValidAddress(clutAddr)) {

View File

@ -216,7 +216,7 @@ private:
};
// TODO: These shouldn't be global.
extern u32 clut[4096];
extern uint8_t clut[1024];
extern FormatBuffer fb;
extern FormatBuffer depthbuf;

View File

@ -797,9 +797,11 @@ void TransformUnit::GetStats(char *buffer, size_t bufsize) {
binner_->GetStats(buffer, bufsize);
}
void TransformUnit::FlushIfOverlap(const char *reason, uint32_t addr, uint32_t stride, uint32_t w, uint32_t h) {
void TransformUnit::FlushIfOverlap(const char *reason, bool modifying, uint32_t addr, uint32_t stride, uint32_t w, uint32_t h) {
if (binner_->HasPendingWrite(addr, stride, w, h))
Flush(reason);
if (modifying && binner_->HasPendingRead(addr, stride, w, h))
Flush(reason);
}
void TransformUnit::NotifyClutUpdate(const void *src) {

View File

@ -123,7 +123,7 @@ public:
bool GetCurrentSimpleVertices(int count, std::vector<GPUDebugVertex> &vertices, std::vector<u16> &indices);
void Flush(const char *reason);
void FlushIfOverlap(const char *reason, uint32_t addr, uint32_t stride, uint32_t w, uint32_t h);
void FlushIfOverlap(const char *reason, bool modifying, uint32_t addr, uint32_t stride, uint32_t w, uint32_t h);
void NotifyClutUpdate(const void *src);
void GetStats(char *buffer, size_t bufsize);

View File

@ -185,8 +185,8 @@ void DrawEngineVulkan::InitDeviceObjects() {
samp.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
samp.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST;
samp.flags = 0;
samp.magFilter = VK_FILTER_NEAREST;
samp.minFilter = VK_FILTER_NEAREST;
samp.magFilter = VK_FILTER_LINEAR;
samp.minFilter = VK_FILTER_LINEAR;
res = vkCreateSampler(device, &samp, nullptr, &samplerSecondary_);
_dbg_assert_(VK_SUCCESS == res);
res = vkCreateSampler(device, &samp, nullptr, &nullSampler_);
@ -856,6 +856,7 @@ void DrawEngineVulkan::DoFlush() {
framebufferManager_->GetRenderWidth(), framebufferManager_->GetRenderHeight(),
framebufferManager_->GetTargetBufferWidth(), framebufferManager_->GetTargetBufferHeight(),
vpAndScissor);
UpdateCachedViewportState(vpAndScissor);
}
int maxIndex = indexGen.MaxIndex();

View File

@ -218,7 +218,7 @@ private:
// Secondary texture for shader blending
VkImageView boundSecondary_ = VK_NULL_HANDLE;
VkImageView boundDepal_ = VK_NULL_HANDLE;
VkSampler samplerSecondary_ = VK_NULL_HANDLE; // This one is actually never used since we use fetch.
VkSampler samplerSecondary_ = VK_NULL_HANDLE; // This one is actually never used since we use fetch (except in SmoothedDepal mode for Test Drive).
PrehashMap<VertexArrayInfoVulkan *, nullptr> vai_;
VulkanPushBuffer *vertexCache_;

View File

@ -75,7 +75,4 @@ void FramebufferManagerVulkan::NotifyClear(bool clearColor, bool clearAlpha, boo
if (clearColor || clearAlpha) {
SetColorUpdated(gstate_c.skipDrawReason);
}
if (clearDepth) {
SetDepthUpdated();
}
}

View File

@ -37,7 +37,6 @@
#include "GPU/ge_constants.h"
#include "GPU/GeDisasm.h"
#include "GPU/Common/FramebufferManagerCommon.h"
#include "GPU/Debugger/Debugger.h"
#include "GPU/Vulkan/ShaderManagerVulkan.h"
#include "GPU/Vulkan/GPU_Vulkan.h"
#include "GPU/Vulkan/FramebufferManagerVulkan.h"
@ -63,7 +62,7 @@ GPU_Vulkan::GPU_Vulkan(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
pipelineManager_ = new PipelineManagerVulkan(vulkan);
framebufferManagerVulkan_ = new FramebufferManagerVulkan(draw);
framebufferManager_ = framebufferManagerVulkan_;
textureCacheVulkan_ = new TextureCacheVulkan(draw, vulkan);
textureCacheVulkan_ = new TextureCacheVulkan(draw, framebufferManager_->GetDraw2D(), vulkan);
textureCache_ = textureCacheVulkan_;
drawEngineCommon_ = &drawEngine_;
shaderManager_ = shaderManagerVulkan_;
@ -431,11 +430,6 @@ void GPU_Vulkan::InitClear() {
}
}
void GPU_Vulkan::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) {
GPUDebug::NotifyDisplay(framebuf, stride, format);
framebufferManager_->SetDisplayFramebuffer(framebuf, stride, format);
}
void GPU_Vulkan::CopyDisplayToOutput(bool reallyDirty) {
// Flush anything left over.
drawEngine_.Flush();
@ -604,9 +598,8 @@ std::vector<std::string> GPU_Vulkan::DebugGetShaderIDs(DebugShaderType type) {
return drawEngine_.DebugGetVertexLoaderIDs();
} else if (type == SHADER_TYPE_PIPELINE) {
return pipelineManager_->DebugGetObjectIDs(type);
} else if (type == SHADER_TYPE_DEPAL) {
///...
return std::vector<std::string>();
} else if (type == SHADER_TYPE_TEXTURE) {
return textureCache_->GetTextureShaderCache()->DebugGetShaderIDs(type);
} else if (type == SHADER_TYPE_VERTEX || type == SHADER_TYPE_FRAGMENT) {
return shaderManagerVulkan_->DebugGetShaderIDs(type);
} else if (type == SHADER_TYPE_SAMPLER) {
@ -621,8 +614,8 @@ std::string GPU_Vulkan::DebugGetShaderString(std::string id, DebugShaderType typ
return drawEngine_.DebugGetVertexLoaderString(id, stringType);
} else if (type == SHADER_TYPE_PIPELINE) {
return pipelineManager_->DebugGetObjectString(id, type, stringType);
} else if (type == SHADER_TYPE_DEPAL) {
return "";
} else if (type == SHADER_TYPE_TEXTURE) {
return textureCache_->GetTextureShaderCache()->DebugGetShaderString(id, type, stringType);
} else if (type == SHADER_TYPE_SAMPLER) {
return textureCacheVulkan_->DebugGetSamplerString(id, stringType);
} else if (type == SHADER_TYPE_VERTEX || type == SHADER_TYPE_FRAGMENT) {

View File

@ -25,7 +25,7 @@
#include "GPU/GPUCommon.h"
#include "GPU/Vulkan/DrawEngineVulkan.h"
#include "GPU/Vulkan/PipelineManagerVulkan.h"
#include "GPU/Common/DepalettizeCommon.h"
#include "GPU/Common/TextureShaderCommon.h"
class FramebufferManagerVulkan;
class ShaderManagerVulkan;
@ -50,7 +50,6 @@ public:
void PreExecuteOp(u32 op, u32 diff) override;
void ExecuteOp(u32 op, u32 diff) override;
void SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) override;
void GetStats(char *buffer, size_t bufsize) override;
void ClearCacheNextFrame() override;
void DeviceLost() override; // Only happens on Android. Drop all textures and shaders.

View File

@ -250,20 +250,10 @@ void DrawEngineVulkan::ConvertStateToVulkanKey(FramebufferManagerVulkan &fbManag
GenericStencilFuncState stencilState;
ConvertStencilFuncState(stencilState);
if (gstate_c.renderMode == RASTER_MODE_COLOR_TO_DEPTH) {
// Enforce plain depth writing.
key.depthTestEnable = true;
key.depthWriteEnable = true;
key.stencilTestEnable = false;
key.depthCompareOp = VK_COMPARE_OP_ALWAYS;
key.depthClampEnable = false;
} else if (gstate.isModeClear()) {
if (gstate.isModeClear()) {
key.depthTestEnable = true;
key.depthCompareOp = VK_COMPARE_OP_ALWAYS;
key.depthWriteEnable = gstate.isClearModeDepthMask();
if (gstate.isClearModeDepthMask()) {
fbManager.SetDepthUpdated();
}
// Stencil Test
bool alphaMask = gstate.isClearModeAlphaMask();
@ -294,9 +284,6 @@ void DrawEngineVulkan::ConvertStateToVulkanKey(FramebufferManagerVulkan &fbManag
key.depthTestEnable = true;
key.depthCompareOp = compareOps[gstate.getDepthTestFunction()];
key.depthWriteEnable = gstate.isDepthWriteEnabled();
if (gstate.isDepthWriteEnabled()) {
fbManager.SetDepthUpdated();
}
} else {
key.depthTestEnable = false;
key.depthWriteEnable = false;
@ -331,15 +318,13 @@ void DrawEngineVulkan::ConvertStateToVulkanKey(FramebufferManagerVulkan &fbManag
fbManager.GetRenderWidth(), fbManager.GetRenderHeight(),
fbManager.GetTargetBufferWidth(), fbManager.GetTargetBufferHeight(),
vpAndScissor);
UpdateCachedViewportState(vpAndScissor);
float depthMin = vpAndScissor.depthRangeMin;
float depthMax = vpAndScissor.depthRangeMax;
if (depthMin < 0.0f) depthMin = 0.0f;
if (depthMax > 1.0f) depthMax = 1.0f;
if (vpAndScissor.dirtyDepth) {
gstate_c.Dirty(DIRTY_DEPTHRANGE);
}
VkViewport &vp = dynState.viewport;
vp.x = vpAndScissor.viewportX;
@ -349,10 +334,6 @@ void DrawEngineVulkan::ConvertStateToVulkanKey(FramebufferManagerVulkan &fbManag
vp.minDepth = vpAndScissor.depthRangeMin;
vp.maxDepth = vpAndScissor.depthRangeMax;
if (vpAndScissor.dirtyProj) {
gstate_c.Dirty(DIRTY_PROJMATRIX);
}
ScissorRect &scissor = dynState.scissor;
scissor.x = vpAndScissor.scissorX;
scissor.y = vpAndScissor.scissorY;

Some files were not shown because too many files have changed in this diff Show More