mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-11-23 05:19:56 +00:00
Merge branch 'hrydgard:master' into feature_openxr_6dof
This commit is contained in:
commit
c5cb45b1f7
@ -426,6 +426,15 @@ if(WIN32)
|
||||
endif()
|
||||
|
||||
|
||||
set(CommonJIT
|
||||
Core/MIPS/JitCommon/JitCommon.cpp
|
||||
Core/MIPS/JitCommon/JitCommon.h
|
||||
Core/MIPS/JitCommon/JitBlockCache.cpp
|
||||
Core/MIPS/JitCommon/JitBlockCache.h
|
||||
Core/MIPS/JitCommon/JitState.cpp
|
||||
Core/MIPS/JitCommon/JitState.h
|
||||
)
|
||||
|
||||
set(CommonX86
|
||||
Common/ABI.cpp
|
||||
Common/ABI.h
|
||||
@ -464,6 +473,7 @@ set(CommonMIPS
|
||||
source_group(MIPS FILES ${CommonMIPS})
|
||||
|
||||
set(CommonRISCV64
|
||||
${CommonJIT}
|
||||
Common/RiscVCPUDetect.cpp
|
||||
Core/MIPS/fake/FakeJit.cpp
|
||||
Core/MIPS/fake/FakeJit.h
|
||||
@ -1530,8 +1540,8 @@ set(GPU_SOURCES
|
||||
${GPU_NEON}
|
||||
GPU/Common/Draw2D.cpp
|
||||
GPU/Common/Draw2D.h
|
||||
GPU/Common/DepalettizeCommon.cpp
|
||||
GPU/Common/DepalettizeCommon.h
|
||||
GPU/Common/TextureShaderCommon.cpp
|
||||
GPU/Common/TextureShaderCommon.h
|
||||
GPU/Common/DepalettizeShaderCommon.cpp
|
||||
GPU/Common/DepalettizeShaderCommon.h
|
||||
GPU/Common/FragmentShaderGenerator.cpp
|
||||
@ -1630,6 +1640,7 @@ set(GPU_SOURCES
|
||||
# SHARED on ANDROID, STATIC everywhere else
|
||||
add_library(${CoreLibName} ${CoreLinkType}
|
||||
${CoreExtra}
|
||||
${CommonJIT}
|
||||
Core/Config.cpp
|
||||
Core/Config.h
|
||||
Core/ConfigValues.h
|
||||
@ -1937,12 +1948,6 @@ add_library(${CoreLibName} ${CoreLinkType}
|
||||
Core/FileLoaders/RamCachingFileLoader.h
|
||||
Core/FileLoaders/RetryingFileLoader.cpp
|
||||
Core/FileLoaders/RetryingFileLoader.h
|
||||
Core/MIPS/JitCommon/JitCommon.cpp
|
||||
Core/MIPS/JitCommon/JitCommon.h
|
||||
Core/MIPS/JitCommon/JitBlockCache.cpp
|
||||
Core/MIPS/JitCommon/JitBlockCache.h
|
||||
Core/MIPS/JitCommon/JitState.cpp
|
||||
Core/MIPS/JitCommon/JitState.h
|
||||
Core/MIPS/MIPS.cpp
|
||||
Core/MIPS/MIPS.h
|
||||
Core/MIPS/MIPSAnalyst.cpp
|
||||
|
@ -665,7 +665,7 @@ D3D9Context::D3D9Context(IDirect3D9 *d3d, IDirect3D9Ex *d3dEx, int adapterId, ID
|
||||
caps_.tesselationShaderSupported = false;
|
||||
caps_.framebufferBlitSupported = true;
|
||||
caps_.framebufferCopySupported = false;
|
||||
caps_.framebufferDepthBlitSupported = true;
|
||||
caps_.framebufferDepthBlitSupported = false;
|
||||
caps_.framebufferStencilBlitSupported = false;
|
||||
caps_.framebufferDepthCopySupported = false;
|
||||
caps_.framebufferSeparateDepthCopySupported = false;
|
||||
|
@ -423,3 +423,19 @@ ShaderWriter &ShaderWriter::SampleTexture2D(const char *sampName, const char *uv
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
ShaderWriter &ShaderWriter::GetTextureSize(const char *szVariable, const char *texName) {
|
||||
switch (lang_.shaderLanguage) {
|
||||
case HLSL_D3D11:
|
||||
F(" float2 %s; %s.GetDimensions(%s.x, %s.y);", szVariable, texName, szVariable, szVariable);
|
||||
break;
|
||||
case HLSL_D3D9:
|
||||
F(" float2 %s; %s.GetDimensions(%s.x, %s.y);", szVariable, texName, szVariable, szVariable);
|
||||
break;
|
||||
default:
|
||||
// Note: we ignore the sampler. make sure you bound samplers to the textures correctly.
|
||||
F("vec2 %s = textureSize(%s, 0);", szVariable, texName);
|
||||
break;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
@ -83,7 +83,8 @@ public:
|
||||
|
||||
void ConstFloat(const char *name, float value);
|
||||
|
||||
ShaderWriter &SampleTexture2D(const char *sampName, const char *uv);
|
||||
ShaderWriter &SampleTexture2D(const char *texName, const char *uv);
|
||||
ShaderWriter &GetTextureSize(const char *szVariable, const char *texName);
|
||||
|
||||
// Simple shaders with no special tricks.
|
||||
void BeginVSMain(Slice<InputDef> inputs, Slice<UniformDef> uniforms, Slice<VaryingDef> varyings);
|
||||
@ -93,6 +94,9 @@ public:
|
||||
void EndVSMain(Slice<VaryingDef> varyings);
|
||||
void EndFSMain(const char *vec4_color_variable, FSFlags flags);
|
||||
|
||||
const ShaderLanguageDesc &Lang() const {
|
||||
return lang_;
|
||||
}
|
||||
|
||||
void Rewind(size_t offset) {
|
||||
p_ -= offset;
|
||||
|
@ -648,6 +648,7 @@ VkResult VulkanContext::CreateDevice() {
|
||||
}
|
||||
_dbg_assert_(found);
|
||||
|
||||
// TODO: A lot of these are on by default in later Vulkan versions, should check for that, technically.
|
||||
extensionsLookup_.KHR_maintenance1 = EnableDeviceExtension(VK_KHR_MAINTENANCE1_EXTENSION_NAME);
|
||||
extensionsLookup_.KHR_maintenance2 = EnableDeviceExtension(VK_KHR_MAINTENANCE2_EXTENSION_NAME);
|
||||
extensionsLookup_.KHR_maintenance3 = EnableDeviceExtension(VK_KHR_MAINTENANCE3_EXTENSION_NAME);
|
||||
@ -684,7 +685,7 @@ VkResult VulkanContext::CreateDevice() {
|
||||
} else {
|
||||
VulkanLoadDeviceFunctions(device_, extensionsLookup_);
|
||||
}
|
||||
INFO_LOG(G3D, "Device created.\n");
|
||||
INFO_LOG(G3D, "Vulkan Device created");
|
||||
VulkanSetAvailable(true);
|
||||
|
||||
VmaAllocatorCreateInfo allocatorInfo = {};
|
||||
|
@ -667,7 +667,7 @@ public:
|
||||
s.magFilter = desc.magFilter == TextureFilter::LINEAR ? VK_FILTER_LINEAR : VK_FILTER_NEAREST;
|
||||
s.minFilter = desc.minFilter == TextureFilter::LINEAR ? VK_FILTER_LINEAR : VK_FILTER_NEAREST;
|
||||
s.mipmapMode = desc.mipFilter == TextureFilter::LINEAR ? VK_SAMPLER_MIPMAP_MODE_LINEAR : VK_SAMPLER_MIPMAP_MODE_NEAREST;
|
||||
s.maxLod = desc.maxLod;
|
||||
s.maxLod = VK_LOD_CLAMP_NONE;
|
||||
VkResult res = vkCreateSampler(vulkan_->GetDevice(), &s, nullptr, &sampler_);
|
||||
_assert_(VK_SUCCESS == res);
|
||||
}
|
||||
|
@ -494,7 +494,6 @@ struct SamplerStateDesc {
|
||||
TextureAddressMode wrapU;
|
||||
TextureAddressMode wrapV;
|
||||
TextureAddressMode wrapW;
|
||||
float maxLod;
|
||||
bool shadowCompareEnabled;
|
||||
Comparison shadowCompareFunc;
|
||||
BorderColor borderColor;
|
||||
|
@ -97,6 +97,7 @@ void Compatibility::CheckSettings(IniFile &iniFile, const std::string &gameID) {
|
||||
CheckSetting(iniFile, gameID, "ZZT3SelectHack", &flags_.ZZT3SelectHack);
|
||||
CheckSetting(iniFile, gameID, "AllowLargeFBTextureOffsets", &flags_.AllowLargeFBTextureOffsets);
|
||||
CheckSetting(iniFile, gameID, "AtracLoopHack", &flags_.AtracLoopHack);
|
||||
CheckSetting(iniFile, gameID, "DeswizzleDepth", &flags_.DeswizzleDepth);
|
||||
}
|
||||
|
||||
void Compatibility::CheckSetting(IniFile &iniFile, const std::string &gameID, const char *option, bool *flag) {
|
||||
|
@ -87,6 +87,7 @@ struct CompatFlags {
|
||||
bool ZZT3SelectHack;
|
||||
bool AllowLargeFBTextureOffsets;
|
||||
bool AtracLoopHack;
|
||||
bool DeswizzleDepth;
|
||||
};
|
||||
|
||||
class IniFile;
|
||||
|
@ -748,7 +748,7 @@ int Config::NextValidBackend() {
|
||||
if (failed.count((GPUBackend)iGPUBackend)) {
|
||||
ERROR_LOG(LOADER, "Graphics backend failed for %d, trying another", iGPUBackend);
|
||||
|
||||
#if (PPSSPP_PLATFORM(WINDOWS) || PPSSPP_PLATFORM(ANDROID)) && !PPSSPP_PLATFORM(UWP)
|
||||
#if !PPSSPP_PLATFORM(UWP)
|
||||
if (!failed.count(GPUBackend::VULKAN) && VulkanMayBeAvailable()) {
|
||||
return (int)GPUBackend::VULKAN;
|
||||
}
|
||||
@ -797,6 +797,9 @@ bool Config::IsBackendEnabled(GPUBackend backend, bool validate) {
|
||||
#if PPSSPP_PLATFORM(UWP)
|
||||
if (backend != GPUBackend::DIRECT3D11)
|
||||
return false;
|
||||
#elif PPSSPP_PLATFORM(SWITCH)
|
||||
if (backend != GPUBackend::OPENGL)
|
||||
return false;
|
||||
#elif PPSSPP_PLATFORM(WINDOWS)
|
||||
if (validate) {
|
||||
if (backend == GPUBackend::DIRECT3D11 && !DoesVersionMatchWindows(6, 0, 0, 0, true))
|
||||
@ -1113,15 +1116,6 @@ static ConfigSetting networkSettings[] = {
|
||||
ConfigSetting(false),
|
||||
};
|
||||
|
||||
static int DefaultPSPModel() {
|
||||
// TODO: Can probably default this on, but not sure about its memory differences.
|
||||
#if !PPSSPP_ARCH(AMD64) && !defined(_WIN32)
|
||||
return PSP_MODEL_FAT;
|
||||
#else
|
||||
return PSP_MODEL_SLIM;
|
||||
#endif
|
||||
}
|
||||
|
||||
static int DefaultSystemParamLanguage() {
|
||||
int defaultLang = PSP_SYSTEMPARAM_LANGUAGE_ENGLISH;
|
||||
if (g_Config.bFirstRun) {
|
||||
@ -1135,7 +1129,7 @@ static int DefaultSystemParamLanguage() {
|
||||
}
|
||||
|
||||
static ConfigSetting systemParamSettings[] = {
|
||||
ReportedConfigSetting("PSPModel", &g_Config.iPSPModel, &DefaultPSPModel, true, true),
|
||||
ReportedConfigSetting("PSPModel", &g_Config.iPSPModel, PSP_MODEL_SLIM, true, true),
|
||||
ReportedConfigSetting("PSPFirmwareVersion", &g_Config.iFirmwareVersion, PSP_DEFAULT_FIRMWARE, true, true),
|
||||
ConfigSetting("NickName", &g_Config.sNickName, "PPSSPP", true, true),
|
||||
ConfigSetting("MacAddress", &g_Config.sMACAddress, "", true, true),
|
||||
|
@ -419,6 +419,7 @@ const char *MemoryExceptionTypeAsString(MemoryExceptionType type) {
|
||||
case MemoryExceptionType::WRITE_WORD: return "Write Word";
|
||||
case MemoryExceptionType::READ_BLOCK: return "Read Block";
|
||||
case MemoryExceptionType::WRITE_BLOCK: return "Read/Write Block";
|
||||
case MemoryExceptionType::ALIGNMENT: return "Alignment";
|
||||
default:
|
||||
return "N/A";
|
||||
}
|
||||
@ -486,16 +487,19 @@ void Core_ExecException(u32 address, u32 pc, ExecExceptionType type) {
|
||||
e.exec_type = type;
|
||||
e.address = address;
|
||||
e.pc = pc;
|
||||
Core_EnableStepping(true, "cpu.exception", pc);
|
||||
// This just records the closest value that could be useful as reference.
|
||||
e.ra = currentMIPS->r[MIPS_REG_RA];
|
||||
Core_EnableStepping(true, "cpu.exception", address);
|
||||
}
|
||||
|
||||
void Core_Break() {
|
||||
void Core_Break(u32 pc) {
|
||||
ERROR_LOG(CPU, "BREAK!");
|
||||
|
||||
ExceptionInfo &e = g_exceptionInfo;
|
||||
e = {};
|
||||
e.type = ExceptionType::BREAK;
|
||||
e.info = "";
|
||||
e.pc = pc;
|
||||
|
||||
if (!g_Config.bIgnoreBadMemAccess) {
|
||||
Core_EnableStepping(true, "cpu.breakInstruction", currentMIPS->pc);
|
||||
|
@ -93,6 +93,7 @@ enum class MemoryExceptionType {
|
||||
WRITE_WORD,
|
||||
READ_BLOCK,
|
||||
WRITE_BLOCK,
|
||||
ALIGNMENT,
|
||||
};
|
||||
enum class ExecExceptionType {
|
||||
JUMP,
|
||||
@ -105,7 +106,7 @@ void Core_MemoryException(u32 address, u32 pc, MemoryExceptionType type);
|
||||
void Core_MemoryExceptionInfo(u32 address, u32 pc, MemoryExceptionType type, std::string additionalInfo);
|
||||
|
||||
void Core_ExecException(u32 address, u32 pc, ExecExceptionType type);
|
||||
void Core_Break();
|
||||
void Core_Break(u32 pc);
|
||||
// Call when loading save states, etc.
|
||||
void Core_ResetException();
|
||||
|
||||
@ -124,6 +125,7 @@ struct ExceptionInfo {
|
||||
MemoryExceptionType memory_type;
|
||||
uint32_t pc;
|
||||
uint32_t address;
|
||||
uint32_t ra = 0;
|
||||
|
||||
// Reuses pc and address from memory type, where address is the failed destination.
|
||||
ExecExceptionType exec_type;
|
||||
|
@ -66,7 +66,7 @@ struct CoreParameter {
|
||||
bool headLess; // Try to avoid messageboxes etc
|
||||
|
||||
// Internal PSP rendering resolution and scale factor.
|
||||
int renderScaleFactor;
|
||||
int renderScaleFactor = 1;
|
||||
int renderWidth;
|
||||
int renderHeight;
|
||||
|
||||
|
@ -99,7 +99,7 @@ protected:
|
||||
void ChangeStatus(DialogStatus newStatus, int delayUs);
|
||||
void ChangeStatusInit(int delayUs);
|
||||
void ChangeStatusShutdown(int delayUs);
|
||||
DialogStatus ReadStatus() {
|
||||
DialogStatus ReadStatus() const {
|
||||
return status;
|
||||
}
|
||||
|
||||
@ -117,10 +117,10 @@ protected:
|
||||
unsigned int lastButtons = 0;
|
||||
unsigned int buttons = 0;
|
||||
|
||||
float fadeTimer;
|
||||
bool isFading;
|
||||
bool fadeIn;
|
||||
u32 fadeValue;
|
||||
float fadeTimer = 0.0f;
|
||||
bool isFading = false;
|
||||
bool fadeIn = false;
|
||||
u32 fadeValue = 0;
|
||||
|
||||
ImageID okButtonImg;
|
||||
ImageID cancelButtonImg;
|
||||
|
@ -59,16 +59,16 @@ private:
|
||||
void CloseCurrentFile();
|
||||
void WriteSfoFile();
|
||||
|
||||
SceUtilityGamedataInstallParam request;
|
||||
SceUtilityGamedataInstallParam request{};
|
||||
PSPPointer<SceUtilityGamedataInstallParam> param;
|
||||
std::vector<std::string> inFileNames;
|
||||
int numFiles;
|
||||
int readFiles;
|
||||
u64 allFilesSize; // use this to calculate progress value.
|
||||
u64 allReadSize; // use this to calculate progress value.
|
||||
int progressValue;
|
||||
int numFiles = 0;
|
||||
int readFiles = 0;
|
||||
u64 allFilesSize = 0; // use this to calculate progress value.
|
||||
u64 allReadSize = 0; // use this to calculate progress value.
|
||||
int progressValue = 0;
|
||||
|
||||
int currentInputFile;
|
||||
u32 currentInputBytesLeft;
|
||||
int currentOutputFile;
|
||||
int currentInputFile = 0;
|
||||
u32 currentInputBytesLeft = 0;
|
||||
int currentOutputFile = 0;
|
||||
};
|
||||
|
@ -94,11 +94,11 @@ private:
|
||||
|
||||
u32 flag = 0;
|
||||
|
||||
pspMessageDialog messageDialog;
|
||||
int messageDialogAddr;
|
||||
pspMessageDialog messageDialog{};
|
||||
int messageDialogAddr = 0;
|
||||
|
||||
char msgText[512];
|
||||
int yesnoChoice;
|
||||
int yesnoChoice = 0;
|
||||
float scrollPos_ = 0.0f;
|
||||
int framesUpHeld_ = 0;
|
||||
int framesDownHeld_ = 0;
|
||||
|
@ -148,7 +148,6 @@ struct SceUtilityOskParams
|
||||
SceUtilityOskState_le state;
|
||||
// Maybe just padding?
|
||||
s32_le unk_60;
|
||||
|
||||
};
|
||||
|
||||
// Internal enum, not from PSP.
|
||||
@ -245,16 +244,16 @@ private:
|
||||
std::string oskIntext;
|
||||
std::string oskOuttext;
|
||||
|
||||
int selectedChar;
|
||||
int selectedChar = 0;
|
||||
std::u16string inputChars;
|
||||
OskKeyboardDisplay currentKeyboard;
|
||||
OskKeyboardLanguage currentKeyboardLanguage;
|
||||
bool isCombinated;
|
||||
bool isCombinated = false;
|
||||
|
||||
std::mutex nativeMutex_;
|
||||
PSPOskNativeStatus nativeStatus_ = PSPOskNativeStatus::IDLE;
|
||||
std::string nativeValue_;
|
||||
|
||||
int i_level; // for Korean Keyboard support
|
||||
int i_value[3]; // for Korean Keyboard support
|
||||
int i_level = 0; // for Korean Keyboard support
|
||||
int i_value[3]{}; // for Korean Keyboard support
|
||||
};
|
||||
|
@ -139,13 +139,13 @@ private:
|
||||
DisplayState display = DS_NONE;
|
||||
|
||||
SavedataParam param;
|
||||
SceUtilitySavedataParam request;
|
||||
SceUtilitySavedataParam request{};
|
||||
// For detecting changes made by the game.
|
||||
SceUtilitySavedataParam originalRequest;
|
||||
SceUtilitySavedataParam originalRequest{};
|
||||
u32 requestAddr = 0;
|
||||
int currentSelectedSave = 0;
|
||||
|
||||
int yesnoChoice;
|
||||
int yesnoChoice = 0;
|
||||
|
||||
enum SaveIOStatus
|
||||
{
|
||||
|
@ -193,16 +193,7 @@ void SaveFileInfo::DoState(PointerWrap &p)
|
||||
}
|
||||
}
|
||||
|
||||
SavedataParam::SavedataParam()
|
||||
: pspParam(0)
|
||||
, selectedSave(0)
|
||||
, saveDataList(0)
|
||||
, noSaveIcon(0)
|
||||
, saveDataListCount(0)
|
||||
, saveNameListDataCount(0)
|
||||
{
|
||||
|
||||
}
|
||||
SavedataParam::SavedataParam() { }
|
||||
|
||||
void SavedataParam::Init()
|
||||
{
|
||||
|
@ -265,7 +265,6 @@ struct SceUtilitySavedataParam
|
||||
|
||||
// Function 22 GETSIZES
|
||||
PSPPointer<PspUtilitySavedataSizeInfo> sizeInfo;
|
||||
|
||||
};
|
||||
|
||||
// Non native, this one we can reorganize as we like
|
||||
@ -377,10 +376,10 @@ private:
|
||||
std::set<std::string> GetSecureFileNames(const std::string &dirPath);
|
||||
bool GetExpectedHash(const std::string &dirPath, const std::string &filename, u8 hash[16]);
|
||||
|
||||
SceUtilitySavedataParam* pspParam;
|
||||
int selectedSave;
|
||||
SaveFileInfo *saveDataList;
|
||||
SaveFileInfo *noSaveIcon;
|
||||
int saveDataListCount;
|
||||
int saveNameListDataCount;
|
||||
SceUtilitySavedataParam* pspParam = nullptr;
|
||||
int selectedSave = 0;
|
||||
SaveFileInfo *saveDataList = nullptr;
|
||||
SaveFileInfo *noSaveIcon = nullptr;
|
||||
int saveDataListCount = 0;
|
||||
int saveNameListDataCount = 0;
|
||||
};
|
||||
|
@ -1086,6 +1086,12 @@ void __KernelStartIdleThreads(SceUID moduleId)
|
||||
}
|
||||
}
|
||||
|
||||
void KernelValidateThreadTarget(uint32_t pc) {
|
||||
if (!Memory::IsValidAddress(pc) || (pc & 3) != 0) {
|
||||
Core_ExecException(pc, currentMIPS->pc, ExecExceptionType::THREAD);
|
||||
}
|
||||
}
|
||||
|
||||
bool __KernelSwitchOffThread(const char *reason)
|
||||
{
|
||||
if (!reason)
|
||||
@ -1141,9 +1147,7 @@ bool __KernelSwitchToThread(SceUID threadID, const char *reason)
|
||||
if (current && current->isRunning())
|
||||
__KernelChangeReadyState(current, currentThread, true);
|
||||
|
||||
if (!Memory::IsValidAddress(t->context.pc)) {
|
||||
Core_ExecException(t->context.pc, currentMIPS->pc, ExecExceptionType::THREAD);
|
||||
}
|
||||
KernelValidateThreadTarget(t->context.pc);
|
||||
|
||||
__KernelSwitchContext(t, reason);
|
||||
return true;
|
||||
@ -1471,9 +1475,7 @@ void __KernelLoadContext(PSPThreadContext *ctx, bool vfpuEnabled) {
|
||||
memcpy(currentMIPS->vfpuCtrl, ctx->vfpuCtrl, sizeof(ctx->vfpuCtrl));
|
||||
}
|
||||
|
||||
if (!Memory::IsValidAddress(ctx->pc)) {
|
||||
Core_ExecException(ctx->pc, currentMIPS->pc, ExecExceptionType::THREAD);
|
||||
}
|
||||
KernelValidateThreadTarget(ctx->pc);
|
||||
|
||||
memcpy(currentMIPS->other, ctx->other, sizeof(ctx->other));
|
||||
// Not locking here, we assume the jit isn't switched during execution.
|
||||
@ -1924,9 +1926,7 @@ SceUID __KernelSetupRootThread(SceUID moduleID, int args, const char *argp, int
|
||||
|
||||
strcpy(thread->nt.name, "root");
|
||||
|
||||
if (!Memory::IsValidAddress(thread->context.pc)) {
|
||||
Core_ExecException(thread->context.pc, currentMIPS->pc, ExecExceptionType::THREAD);
|
||||
}
|
||||
KernelValidateThreadTarget(thread->context.pc);
|
||||
|
||||
__KernelLoadContext(&thread->context, (attr & PSP_THREAD_ATTR_VFPU) != 0);
|
||||
currentMIPS->r[MIPS_REG_A0] = args;
|
||||
@ -2057,9 +2057,7 @@ int __KernelStartThread(SceUID threadToStartID, int argSize, u32 argBlockPtr, bo
|
||||
|
||||
// Smaller is better for priority. Only switch if the new thread is better.
|
||||
if (cur && cur->nt.currentPriority > startThread->nt.currentPriority) {
|
||||
if (!Memory::IsValidAddress(startThread->context.pc)) {
|
||||
Core_ExecException(startThread->context.pc, currentMIPS->pc, ExecExceptionType::THREAD);
|
||||
}
|
||||
KernelValidateThreadTarget(startThread->context.pc);
|
||||
__KernelChangeReadyState(cur, currentThread, true);
|
||||
if (__InterruptsEnabled())
|
||||
hleReSchedule("thread started");
|
||||
@ -2939,9 +2937,7 @@ u32 sceKernelExtendThreadStack(u32 size, u32 entryAddr, u32 entryParameter)
|
||||
Memory::Write_U32(currentMIPS->r[MIPS_REG_SP], thread->currentStack.end - 8);
|
||||
Memory::Write_U32(currentMIPS->pc, thread->currentStack.end - 12);
|
||||
|
||||
if (!Memory::IsValidAddress(entryAddr)) {
|
||||
Core_ExecException(entryAddr, currentMIPS->pc, ExecExceptionType::THREAD);
|
||||
}
|
||||
KernelValidateThreadTarget(entryAddr);
|
||||
|
||||
currentMIPS->pc = entryAddr;
|
||||
currentMIPS->r[MIPS_REG_A0] = entryParameter;
|
||||
@ -2975,9 +2971,7 @@ void __KernelReturnFromExtendStack()
|
||||
return;
|
||||
}
|
||||
|
||||
if (!Memory::IsValidAddress(restorePC)) {
|
||||
Core_ExecException(restorePC, currentMIPS->pc, ExecExceptionType::THREAD);
|
||||
}
|
||||
KernelValidateThreadTarget(restorePC);
|
||||
|
||||
DEBUG_LOG(SCEKERNEL, "__KernelReturnFromExtendStack()");
|
||||
currentMIPS->r[MIPS_REG_RA] = restoreRA;
|
||||
@ -3259,9 +3253,7 @@ bool __KernelExecuteMipsCallOnCurrentThread(u32 callId, bool reschedAfter)
|
||||
call->savedId = cur->currentMipscallId;
|
||||
call->reschedAfter = reschedAfter;
|
||||
|
||||
if (!Memory::IsValidAddress(call->entryPoint)) {
|
||||
Core_ExecException(call->entryPoint, currentMIPS->pc, ExecExceptionType::THREAD);
|
||||
}
|
||||
KernelValidateThreadTarget(call->entryPoint);
|
||||
|
||||
// Set up the new state
|
||||
currentMIPS->pc = call->entryPoint;
|
||||
@ -3312,9 +3304,7 @@ void __KernelReturnFromMipsCall()
|
||||
currentMIPS->r[MIPS_REG_RA] = Memory::Read_U32(sp + MIPS_REG_RA * 4);
|
||||
sp += 32 * 4;
|
||||
|
||||
if (!Memory::IsValidAddress(call->savedPc)) {
|
||||
Core_ExecException(call->savedPc, currentMIPS->pc, ExecExceptionType::THREAD);
|
||||
}
|
||||
KernelValidateThreadTarget(call->savedPc);
|
||||
|
||||
currentMIPS->pc = call->savedPc;
|
||||
// This is how we set the return value.
|
||||
|
@ -466,7 +466,7 @@ void ArmJit::Comp_Jump(MIPSOpcode op) {
|
||||
u32 targetAddr = (GetCompilerPC() & 0xF0000000) | off;
|
||||
|
||||
// Might be a stubbed address or something?
|
||||
if (!Memory::IsValidAddress(targetAddr)) {
|
||||
if (!Memory::IsValidAddress(targetAddr) || (targetAddr & 3) != 0) {
|
||||
if (js.nextExit == 0) {
|
||||
ERROR_LOG_REPORT(JIT, "Jump to invalid address: %08x", targetAddr);
|
||||
} else {
|
||||
|
@ -481,7 +481,7 @@ void Arm64Jit::Comp_Jump(MIPSOpcode op) {
|
||||
u32 targetAddr = (GetCompilerPC() & 0xF0000000) | off;
|
||||
|
||||
// Might be a stubbed address or something?
|
||||
if (!Memory::IsValidAddress(targetAddr)) {
|
||||
if (!Memory::IsValidAddress(targetAddr) || (targetAddr & 3) != 0) {
|
||||
if (js.nextExit == 0) {
|
||||
ERROR_LOG_REPORT(JIT, "Jump to invalid address: %08x", targetAddr);
|
||||
} else {
|
||||
|
@ -330,6 +330,8 @@ namespace MIPSComp {
|
||||
ir.Write(IROp::LoadVec4, vregs[0], rs, ir.AddConstant(imm));
|
||||
} else {
|
||||
// Let's not even bother with "vertical" loads for now.
|
||||
if (!g_Config.bFastMemory)
|
||||
ir.Write({ IROp::ValidateAddress128, { 0 }, (u8)rs, 0, (u32)imm });
|
||||
ir.Write(IROp::LoadFloat, vregs[0], rs, ir.AddConstant(imm));
|
||||
ir.Write(IROp::LoadFloat, vregs[1], rs, ir.AddConstant(imm + 4));
|
||||
ir.Write(IROp::LoadFloat, vregs[2], rs, ir.AddConstant(imm + 8));
|
||||
@ -342,6 +344,8 @@ namespace MIPSComp {
|
||||
ir.Write(IROp::StoreVec4, vregs[0], rs, ir.AddConstant(imm));
|
||||
} else {
|
||||
// Let's not even bother with "vertical" stores for now.
|
||||
if (!g_Config.bFastMemory)
|
||||
ir.Write({ IROp::ValidateAddress128, { 0 }, (u8)rs, 1, (u32)imm });
|
||||
ir.Write(IROp::StoreFloat, vregs[0], rs, ir.AddConstant(imm));
|
||||
ir.Write(IROp::StoreFloat, vregs[1], rs, ir.AddConstant(imm + 4));
|
||||
ir.Write(IROp::StoreFloat, vregs[2], rs, ir.AddConstant(imm + 8));
|
||||
|
@ -260,6 +260,7 @@ void IRFrontend::DoJit(u32 em_address, std::vector<IRInst> &instructions, u32 &m
|
||||
IRWriter *code = &ir;
|
||||
if (!js.hadBreakpoints) {
|
||||
static const IRPassFunc passes[] = {
|
||||
&ApplyMemoryValidation,
|
||||
&RemoveLoadStoreLeftRight,
|
||||
&OptimizeFPMoves,
|
||||
&PropagateConstants,
|
||||
|
@ -163,6 +163,11 @@ static const IRMeta irMeta[] = {
|
||||
{ IROp::Breakpoint, "Breakpoint", "", IRFLAG_EXIT },
|
||||
{ IROp::MemoryCheck, "MemoryCheck", "_GC", IRFLAG_EXIT },
|
||||
|
||||
{ IROp::ValidateAddress8, "ValidAddr8", "_GC", IRFLAG_EXIT },
|
||||
{ IROp::ValidateAddress16, "ValidAddr16", "_GC", IRFLAG_EXIT },
|
||||
{ IROp::ValidateAddress32, "ValidAddr32", "_GC", IRFLAG_EXIT },
|
||||
{ IROp::ValidateAddress128, "ValidAddr128", "_GC", IRFLAG_EXIT },
|
||||
|
||||
{ IROp::RestoreRoundingMode, "RestoreRoundingMode", "" },
|
||||
{ IROp::ApplyRoundingMode, "ApplyRoundingMode", "" },
|
||||
{ IROp::UpdateRoundingMode, "UpdateRoundingMode", "" },
|
||||
|
@ -213,8 +213,15 @@ enum class IROp : u8 {
|
||||
SetPCConst, // hack to make replacement know PC
|
||||
CallReplacement,
|
||||
Break,
|
||||
|
||||
// Debugging breakpoints.
|
||||
Breakpoint,
|
||||
MemoryCheck,
|
||||
|
||||
ValidateAddress8,
|
||||
ValidateAddress16,
|
||||
ValidateAddress32,
|
||||
ValidateAddress128,
|
||||
};
|
||||
|
||||
enum IRComparison {
|
||||
|
@ -79,6 +79,25 @@ u32 RunMemCheck(u32 pc, u32 addr) {
|
||||
return coreState != CORE_RUNNING ? 1 : 0;
|
||||
}
|
||||
|
||||
template <uint32_t alignment>
|
||||
u32 RunValidateAddress(u32 pc, u32 addr, u32 isWrite) {
|
||||
const auto toss = [&](MemoryExceptionType t) {
|
||||
Core_MemoryException(addr, pc, t);
|
||||
return coreState != CORE_RUNNING ? 1 : 0;
|
||||
};
|
||||
|
||||
if (!Memory::IsValidRange(addr, alignment)) {
|
||||
MemoryExceptionType t = isWrite == 1 ? MemoryExceptionType::WRITE_WORD : MemoryExceptionType::READ_WORD;
|
||||
if (alignment > 4)
|
||||
t = isWrite ? MemoryExceptionType::WRITE_BLOCK : MemoryExceptionType::READ_BLOCK;
|
||||
return toss(t);
|
||||
}
|
||||
if (alignment > 1 && (addr & (alignment - 1)) != 0) {
|
||||
return toss(MemoryExceptionType::ALIGNMENT);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// We cannot use NEON on ARM32 here until we make it a hard dependency. We can, however, on ARM64.
|
||||
u32 IRInterpret(MIPSState *mips, const IRInst *inst, int count) {
|
||||
const IRInst *end = inst + count;
|
||||
@ -142,6 +161,31 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, int count) {
|
||||
mips->r[inst->dest] = ReverseBits32(mips->r[inst->src1]);
|
||||
break;
|
||||
|
||||
case IROp::ValidateAddress8:
|
||||
if (RunValidateAddress<1>(mips->pc, mips->r[inst->src1] + inst->constant, inst->src2)) {
|
||||
CoreTiming::ForceCheck();
|
||||
return mips->pc;
|
||||
}
|
||||
break;
|
||||
case IROp::ValidateAddress16:
|
||||
if (RunValidateAddress<2>(mips->pc, mips->r[inst->src1] + inst->constant, inst->src2)) {
|
||||
CoreTiming::ForceCheck();
|
||||
return mips->pc;
|
||||
}
|
||||
break;
|
||||
case IROp::ValidateAddress32:
|
||||
if (RunValidateAddress<4>(mips->pc, mips->r[inst->src1] + inst->constant, inst->src2)) {
|
||||
CoreTiming::ForceCheck();
|
||||
return mips->pc;
|
||||
}
|
||||
break;
|
||||
case IROp::ValidateAddress128:
|
||||
if (RunValidateAddress<16>(mips->pc, mips->r[inst->src1] + inst->constant, inst->src2)) {
|
||||
CoreTiming::ForceCheck();
|
||||
return mips->pc;
|
||||
}
|
||||
break;
|
||||
|
||||
case IROp::Load8:
|
||||
mips->r[inst->dest] = Memory::ReadUnchecked_U8(mips->r[inst->src1] + inst->constant);
|
||||
break;
|
||||
@ -954,7 +998,7 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, int count) {
|
||||
}
|
||||
|
||||
case IROp::Break:
|
||||
Core_Break();
|
||||
Core_Break(mips->pc);
|
||||
return mips->pc + 4;
|
||||
|
||||
case IROp::SetCtrlVFPU:
|
||||
|
@ -227,9 +227,10 @@ void IRJit::RunLoopUntil(u64 globalticks) {
|
||||
if (opcode == MIPS_EMUHACK_OPCODE) {
|
||||
u32 data = inst & 0xFFFFFF;
|
||||
IRBlock *block = blocks_.GetBlock(data);
|
||||
u32 startPC = mips_->pc;
|
||||
mips_->pc = IRInterpret(mips_, block->GetInstructions(), block->GetNumInstructions());
|
||||
if (!Memory::IsValidAddress(mips_->pc)) {
|
||||
Core_ExecException(mips_->pc, mips_->pc, ExecExceptionType::JUMP);
|
||||
if (!Memory::IsValidAddress(mips_->pc) || (mips_->pc & 3) != 0) {
|
||||
Core_ExecException(mips_->pc, startPC, ExecExceptionType::JUMP);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include "Common/BitSet.h"
|
||||
#include "Common/Data/Convert/SmallDataConvert.h"
|
||||
#include "Common/Log.h"
|
||||
#include "Core/Config.h"
|
||||
#include "Core/MIPS/IR/IRInterpreter.h"
|
||||
#include "Core/MIPS/IR/IRPassSimplify.h"
|
||||
#include "Core/MIPS/IR/IRRegCache.h"
|
||||
@ -622,6 +623,18 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out, const IROptions &opts
|
||||
}
|
||||
break;
|
||||
|
||||
case IROp::ValidateAddress8:
|
||||
case IROp::ValidateAddress16:
|
||||
case IROp::ValidateAddress32:
|
||||
case IROp::ValidateAddress128:
|
||||
if (gpr.IsImm(inst.src1)) {
|
||||
out.Write(inst.op, inst.dest, 0, out.AddConstant(gpr.GetImm(inst.src1) + inst.constant));
|
||||
} else {
|
||||
gpr.MapIn(inst.src1);
|
||||
goto doDefault;
|
||||
}
|
||||
break;
|
||||
|
||||
case IROp::Downcount:
|
||||
case IROp::SetPCConst:
|
||||
goto doDefault;
|
||||
@ -1428,3 +1441,58 @@ bool MergeLoadStore(const IRWriter &in, IRWriter &out, const IROptions &opts) {
|
||||
}
|
||||
return logBlocks;
|
||||
}
|
||||
|
||||
bool ApplyMemoryValidation(const IRWriter &in, IRWriter &out, const IROptions &opts) {
|
||||
CONDITIONAL_DISABLE;
|
||||
if (g_Config.bFastMemory)
|
||||
DISABLE;
|
||||
|
||||
const auto addValidate = [&out](IROp validate, const IRInst &inst, bool isStore) {
|
||||
out.Write({ validate, { 0 }, inst.src1, isStore ? (u8)1 : (u8)0, inst.constant });
|
||||
};
|
||||
|
||||
// TODO: Could be smart about not double-validating an address that has a load / store, etc.
|
||||
bool logBlocks = false;
|
||||
for (IRInst inst : in.GetInstructions()) {
|
||||
switch (inst.op) {
|
||||
case IROp::Load8:
|
||||
case IROp::Load8Ext:
|
||||
case IROp::Store8:
|
||||
addValidate(IROp::ValidateAddress8, inst, inst.op == IROp::Store8);
|
||||
break;
|
||||
|
||||
case IROp::Load16:
|
||||
case IROp::Load16Ext:
|
||||
case IROp::Store16:
|
||||
addValidate(IROp::ValidateAddress16, inst, inst.op == IROp::Store16);
|
||||
break;
|
||||
|
||||
case IROp::Load32:
|
||||
case IROp::LoadFloat:
|
||||
case IROp::Store32:
|
||||
case IROp::StoreFloat:
|
||||
addValidate(IROp::ValidateAddress32, inst, inst.op == IROp::Store32 || inst.op == IROp::StoreFloat);
|
||||
break;
|
||||
|
||||
case IROp::LoadVec4:
|
||||
case IROp::StoreVec4:
|
||||
addValidate(IROp::ValidateAddress128, inst, inst.op == IROp::StoreVec4);
|
||||
break;
|
||||
|
||||
case IROp::Load32Left:
|
||||
case IROp::Load32Right:
|
||||
case IROp::Store32Left:
|
||||
case IROp::Store32Right:
|
||||
// This explicitly does not require alignment, so validate as an 8-bit operation.
|
||||
addValidate(IROp::ValidateAddress8, inst, inst.op == IROp::Store32Left || inst.op == IROp::Store32Right);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
// Always write out the original. We're only adding.
|
||||
out.Write(inst);
|
||||
}
|
||||
return logBlocks;
|
||||
}
|
||||
|
@ -14,3 +14,4 @@ bool ThreeOpToTwoOp(const IRWriter &in, IRWriter &out, const IROptions &opts);
|
||||
bool OptimizeFPMoves(const IRWriter &in, IRWriter &out, const IROptions &opts);
|
||||
bool ReorderLoadStore(const IRWriter &in, IRWriter &out, const IROptions &opts);
|
||||
bool MergeLoadStore(const IRWriter &in, IRWriter &out, const IROptions &opts);
|
||||
bool ApplyMemoryValidation(const IRWriter &in, IRWriter &out, const IROptions &opts);
|
||||
|
@ -57,8 +57,7 @@
|
||||
|
||||
static inline void DelayBranchTo(u32 where)
|
||||
{
|
||||
if (!Memory::IsValidAddress(where)) {
|
||||
// TODO: What about misaligned?
|
||||
if (!Memory::IsValidAddress(where) || (where & 3) != 0) {
|
||||
Core_ExecException(where, PC, ExecExceptionType::JUMP);
|
||||
}
|
||||
PC += 4;
|
||||
@ -158,7 +157,7 @@ namespace MIPSInt
|
||||
void Int_Break(MIPSOpcode op)
|
||||
{
|
||||
Reporting::ReportMessage("BREAK instruction hit");
|
||||
Core_Break();
|
||||
Core_Break(PC);
|
||||
PC += 4;
|
||||
}
|
||||
|
||||
|
@ -612,7 +612,7 @@ void Jit::Comp_Jump(MIPSOpcode op) {
|
||||
u32 targetAddr = (GetCompilerPC() & 0xF0000000) | off;
|
||||
|
||||
// Might be a stubbed address or something?
|
||||
if (!Memory::IsValidAddress(targetAddr)) {
|
||||
if (!Memory::IsValidAddress(targetAddr) || (targetAddr & 3) != 0) {
|
||||
if (js.nextExit == 0) {
|
||||
ERROR_LOG_REPORT(JIT, "Jump to invalid address: %08x PC %08x LR %08x", targetAddr, GetCompilerPC(), currentMIPS->r[MIPS_REG_RA]);
|
||||
} else {
|
||||
|
@ -275,7 +275,7 @@ void Jit::Compile(u32 em_address) {
|
||||
ClearCache();
|
||||
}
|
||||
|
||||
if (!Memory::IsValidAddress(em_address)) {
|
||||
if (!Memory::IsValidAddress(em_address) || (em_address & 3) != 0) {
|
||||
Core_ExecException(em_address, em_address, ExecExceptionType::JUMP);
|
||||
return;
|
||||
}
|
||||
@ -672,7 +672,7 @@ static void HitInvalidBranch(uint32_t dest) {
|
||||
void Jit::WriteExit(u32 destination, int exit_num) {
|
||||
_dbg_assert_msg_(exit_num < MAX_JIT_BLOCK_EXITS, "Expected a valid exit_num");
|
||||
|
||||
if (!Memory::IsValidAddress(destination)) {
|
||||
if (!Memory::IsValidAddress(destination) || (destination & 3) != 0) {
|
||||
ERROR_LOG_REPORT(JIT, "Trying to write block exit to illegal destination %08x: pc = %08x", destination, currentMIPS->pc);
|
||||
MOV(32, MIPSSTATE_VAR(pc), Imm32(GetCompilerPC()));
|
||||
ABI_CallFunctionC(&HitInvalidBranch, destination);
|
||||
@ -721,6 +721,12 @@ void Jit::WriteExit(u32 destination, int exit_num) {
|
||||
}
|
||||
}
|
||||
|
||||
static u32 IsValidJumpTarget(uint32_t addr) {
|
||||
if (Memory::IsValidAddress(addr) && (addr & 3) == 0)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void HitInvalidJumpReg(uint32_t source) {
|
||||
Core_ExecException(currentMIPS->pc, source, ExecExceptionType::JUMP);
|
||||
currentMIPS->pc = source + 8;
|
||||
@ -762,7 +768,7 @@ void Jit::WriteExitDestInReg(X64Reg reg) {
|
||||
SetJumpTarget(tooLow);
|
||||
SetJumpTarget(tooHigh);
|
||||
|
||||
ABI_CallFunctionA((const void *)&Memory::IsValidAddress, R(reg));
|
||||
ABI_CallFunctionA((const void *)&IsValidJumpTarget, R(reg));
|
||||
|
||||
// If we're ignoring, coreState didn't trip - so trip it now.
|
||||
CMP(32, R(EAX), Imm32(0));
|
||||
|
@ -49,7 +49,8 @@ static const int VERSION = 1;
|
||||
static const int MAX_MIP_LEVELS = 12; // 12 should be plenty, 8 is the max mip levels supported by the PSP.
|
||||
|
||||
TextureReplacer::TextureReplacer() {
|
||||
none_.alphaStatus_ = ReplacedTextureAlpha::UNKNOWN;
|
||||
none_.initDone_ = true;
|
||||
none_.prepareDone_ = true;
|
||||
}
|
||||
|
||||
TextureReplacer::~TextureReplacer() {
|
||||
@ -373,7 +374,7 @@ u32 TextureReplacer::ComputeHash(u32 addr, int bufw, int w, int h, GETextureForm
|
||||
}
|
||||
}
|
||||
|
||||
ReplacedTexture &TextureReplacer::FindReplacement(u64 cachekey, u32 hash, int w, int h) {
|
||||
ReplacedTexture &TextureReplacer::FindReplacement(u64 cachekey, u32 hash, int w, int h, double budget) {
|
||||
// Only actually replace if we're replacing. We might just be saving.
|
||||
if (!Enabled() || !g_Config.bReplaceTextures) {
|
||||
return none_;
|
||||
@ -382,13 +383,18 @@ ReplacedTexture &TextureReplacer::FindReplacement(u64 cachekey, u32 hash, int w,
|
||||
ReplacementCacheKey replacementKey(cachekey, hash);
|
||||
auto it = cache_.find(replacementKey);
|
||||
if (it != cache_.end()) {
|
||||
if (!it->second.prepareDone_ && budget > 0.0) {
|
||||
// We don't do this on a thread, but we only do it while within budget.
|
||||
PopulateReplacement(&it->second, cachekey, hash, w, h);
|
||||
}
|
||||
return it->second;
|
||||
}
|
||||
|
||||
// Okay, let's construct the result.
|
||||
ReplacedTexture &result = cache_[replacementKey];
|
||||
result.alphaStatus_ = ReplacedTextureAlpha::UNKNOWN;
|
||||
PopulateReplacement(&result, cachekey, hash, w, h);
|
||||
if (!g_Config.bReplaceTexturesAllowLate || budget > 0.0) {
|
||||
PopulateReplacement(&result, cachekey, hash, w, h);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -433,7 +439,7 @@ void TextureReplacer::PopulateReplacement(ReplacedTexture *result, u64 cachekey,
|
||||
break;
|
||||
}
|
||||
|
||||
result->alphaStatus_ = ReplacedTextureAlpha::UNKNOWN;
|
||||
result->prepareDone_ = true;
|
||||
}
|
||||
|
||||
enum class ReplacedImageType {
|
||||
@ -815,11 +821,13 @@ bool ReplacedTexture::IsReady(double budget) {
|
||||
}
|
||||
|
||||
// Loaded already, or not yet on a thread?
|
||||
if (!levelData_.empty())
|
||||
if (initDone_ && !levelData_.empty())
|
||||
return true;
|
||||
// Let's not even start a new texture if we're already behind.
|
||||
if (budget < 0.0)
|
||||
return false;
|
||||
if (!prepareDone_)
|
||||
return false;
|
||||
|
||||
if (g_Config.bReplaceTexturesAllowLate) {
|
||||
if (threadWaitable_)
|
||||
@ -829,10 +837,11 @@ bool ReplacedTexture::IsReady(double budget) {
|
||||
|
||||
if (threadWaitable_->WaitFor(budget)) {
|
||||
// If we finished all the levels, we're done.
|
||||
return !levelData_.empty();
|
||||
return initDone_ && !levelData_.empty();
|
||||
}
|
||||
} else {
|
||||
Prepare();
|
||||
_assert_(initDone_);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -842,16 +851,19 @@ bool ReplacedTexture::IsReady(double budget) {
|
||||
|
||||
void ReplacedTexture::Prepare() {
|
||||
std::unique_lock<std::mutex> lock(mutex_);
|
||||
if (cancelPrepare_)
|
||||
if (cancelPrepare_) {
|
||||
initDone_ = true;
|
||||
return;
|
||||
}
|
||||
|
||||
levelData_.resize(NumLevels());
|
||||
for (int i = 0; i < NumLevels(); ++i) {
|
||||
levelData_.resize(levels_.size());
|
||||
for (int i = 0; i < (int)levels_.size(); ++i) {
|
||||
if (cancelPrepare_)
|
||||
break;
|
||||
PrepareData(i);
|
||||
}
|
||||
|
||||
initDone_ = true;
|
||||
if (!cancelPrepare_ && threadWaitable_)
|
||||
threadWaitable_->Notify();
|
||||
}
|
||||
@ -975,6 +987,8 @@ bool ReplacedTexture::Load(int level, void *out, int rowPitch) {
|
||||
_assert_msg_((size_t)level < levels_.size(), "Invalid miplevel");
|
||||
_assert_msg_(out != nullptr && rowPitch > 0, "Invalid out/pitch");
|
||||
|
||||
if (!initDone_)
|
||||
return false;
|
||||
if (levelData_.empty())
|
||||
return false;
|
||||
|
||||
|
@ -118,10 +118,20 @@ struct ReplacedTexture {
|
||||
~ReplacedTexture();
|
||||
|
||||
inline bool Valid() const {
|
||||
if (!initDone_)
|
||||
return false;
|
||||
return !levels_.empty();
|
||||
}
|
||||
|
||||
inline bool IsInvalid() const {
|
||||
if (!initDone_)
|
||||
return false;
|
||||
return levels_.empty();
|
||||
}
|
||||
|
||||
bool GetSize(int level, int &w, int &h) const {
|
||||
if (!initDone_)
|
||||
return false;
|
||||
if ((size_t)level < levels_.size()) {
|
||||
w = levels_[level].w;
|
||||
h = levels_[level].h;
|
||||
@ -131,12 +141,16 @@ struct ReplacedTexture {
|
||||
}
|
||||
|
||||
int NumLevels() const {
|
||||
if (!initDone_)
|
||||
return 0;
|
||||
return (int)levels_.size();
|
||||
}
|
||||
|
||||
Draw::DataFormat Format(int level) const {
|
||||
if ((size_t)level < levels_.size()) {
|
||||
return levels_[level].fmt;
|
||||
if (initDone_) {
|
||||
if ((size_t)level < levels_.size()) {
|
||||
return levels_[level].fmt;
|
||||
}
|
||||
}
|
||||
return Draw::DataFormat::R8G8B8A8_UNORM;
|
||||
}
|
||||
@ -156,11 +170,13 @@ protected:
|
||||
|
||||
std::vector<ReplacedTextureLevel> levels_;
|
||||
std::vector<std::vector<uint8_t>> levelData_;
|
||||
ReplacedTextureAlpha alphaStatus_;
|
||||
ReplacedTextureAlpha alphaStatus_ = ReplacedTextureAlpha::UNKNOWN;
|
||||
double lastUsed_ = 0.0;
|
||||
LimitedWaitable *threadWaitable_ = nullptr;
|
||||
std::mutex mutex_;
|
||||
bool cancelPrepare_ = false;
|
||||
bool initDone_ = false;
|
||||
bool prepareDone_ = false;
|
||||
|
||||
friend TextureReplacer;
|
||||
friend ReplacedTextureTask;
|
||||
@ -196,7 +212,7 @@ public:
|
||||
|
||||
u32 ComputeHash(u32 addr, int bufw, int w, int h, GETextureFormat fmt, u16 maxSeenV);
|
||||
|
||||
ReplacedTexture &FindReplacement(u64 cachekey, u32 hash, int w, int h);
|
||||
ReplacedTexture &FindReplacement(u64 cachekey, u32 hash, int w, int h, double budget);
|
||||
bool FindFiltering(u64 cachekey, u32 hash, TextureFiltering *forceFiltering);
|
||||
ReplacedTexture &FindNone() {
|
||||
return none_;
|
||||
|
@ -1,244 +0,0 @@
|
||||
// Copyright (c) 2014- PPSSPP Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0 or later versions.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official git repository and contact information can be found at
|
||||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
#include <map>
|
||||
|
||||
#include "Common/Log.h"
|
||||
#include "Common/StringUtils.h"
|
||||
#include "Common/GPU/Shader.h"
|
||||
#include "Common/GPU/ShaderWriter.h"
|
||||
#include "Common/Data/Convert/ColorConv.h"
|
||||
#include "Core/Reporting.h"
|
||||
#include "GPU/Common/DrawEngineCommon.h"
|
||||
#include "GPU/Common/TextureCacheCommon.h"
|
||||
#include "GPU/Common/DepalettizeShaderCommon.h"
|
||||
#include "GPU/Common/DepalettizeCommon.h"
|
||||
|
||||
static const VaryingDef varyings[1] = {
|
||||
{ "vec2", "v_texcoord", Draw::SEM_TEXCOORD0, 0, "highp" },
|
||||
};
|
||||
|
||||
static const SamplerDef samplers[2] = {
|
||||
{ "tex" },
|
||||
{ "pal" },
|
||||
};
|
||||
|
||||
DepalShaderCache::DepalShaderCache(Draw::DrawContext *draw) : draw_(draw) { }
|
||||
|
||||
DepalShaderCache::~DepalShaderCache() {
|
||||
DeviceLost();
|
||||
}
|
||||
|
||||
void DepalShaderCache::DeviceRestore(Draw::DrawContext *draw) {
|
||||
draw_ = draw;
|
||||
}
|
||||
|
||||
void DepalShaderCache::DeviceLost() {
|
||||
Clear();
|
||||
}
|
||||
|
||||
Draw::Texture *DepalShaderCache::GetClutTexture(GEPaletteFormat clutFormat, const u32 clutHash, u32 *rawClut) {
|
||||
u32 clutId = GetClutID(clutFormat, clutHash);
|
||||
|
||||
auto oldtex = texCache_.find(clutId);
|
||||
if (oldtex != texCache_.end()) {
|
||||
oldtex->second->lastFrame = gpuStats.numFlips;
|
||||
return oldtex->second->texture;
|
||||
}
|
||||
|
||||
int texturePixels = clutFormat == GE_CMODE_32BIT_ABGR8888 ? 256 : 512;
|
||||
|
||||
DepalTexture *tex = new DepalTexture();
|
||||
|
||||
Draw::TextureDesc desc{};
|
||||
desc.width = texturePixels;
|
||||
desc.height = 1;
|
||||
desc.depth = 1;
|
||||
desc.mipLevels = 1;
|
||||
desc.tag = "clut";
|
||||
desc.type = Draw::TextureType::LINEAR2D; // TODO: Try LINEAR1D?
|
||||
desc.format = Draw::DataFormat::R8G8B8A8_UNORM; // TODO: Also support an BGR format. We won't bother with the 16-bit formats here.
|
||||
|
||||
uint8_t convTemp[2048]{};
|
||||
|
||||
switch (clutFormat) {
|
||||
case GEPaletteFormat::GE_CMODE_32BIT_ABGR8888:
|
||||
desc.initData.push_back((const uint8_t *)rawClut);
|
||||
break;
|
||||
case GEPaletteFormat::GE_CMODE_16BIT_BGR5650:
|
||||
ConvertRGBA5551ToRGBA8888((u32 *)convTemp, (const u16 *)rawClut, texturePixels);
|
||||
desc.initData.push_back(convTemp);
|
||||
break;
|
||||
case GEPaletteFormat::GE_CMODE_16BIT_ABGR5551:
|
||||
ConvertRGB565ToRGBA8888((u32 *)convTemp, (const u16 *)rawClut, texturePixels);
|
||||
desc.initData.push_back(convTemp);
|
||||
break;
|
||||
case GEPaletteFormat::GE_CMODE_16BIT_ABGR4444:
|
||||
ConvertRGBA4444ToRGBA8888((u32 *)convTemp, (const u16 *)rawClut, texturePixels);
|
||||
desc.initData.push_back(convTemp);
|
||||
break;
|
||||
}
|
||||
|
||||
tex->texture = draw_->CreateTexture(desc);
|
||||
tex->lastFrame = gpuStats.numFlips;
|
||||
|
||||
texCache_[clutId] = tex;
|
||||
return tex->texture;
|
||||
}
|
||||
|
||||
void DepalShaderCache::Clear() {
|
||||
for (auto shader = cache_.begin(); shader != cache_.end(); ++shader) {
|
||||
shader->second->fragShader->Release();
|
||||
if (shader->second->pipeline) {
|
||||
shader->second->pipeline->Release();
|
||||
}
|
||||
delete shader->second;
|
||||
}
|
||||
cache_.clear();
|
||||
for (auto tex = texCache_.begin(); tex != texCache_.end(); ++tex) {
|
||||
tex->second->texture->Release();
|
||||
delete tex->second;
|
||||
}
|
||||
texCache_.clear();
|
||||
if (vertexShader_) {
|
||||
vertexShader_->Release();
|
||||
vertexShader_ = nullptr;
|
||||
}
|
||||
if (nearestSampler_) {
|
||||
nearestSampler_->Release();
|
||||
nearestSampler_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void DepalShaderCache::Decimate() {
|
||||
for (auto tex = texCache_.begin(); tex != texCache_.end(); ) {
|
||||
if (tex->second->lastFrame + DEPAL_TEXTURE_OLD_AGE < gpuStats.numFlips) {
|
||||
tex->second->texture->Release();
|
||||
delete tex->second;
|
||||
texCache_.erase(tex++);
|
||||
} else {
|
||||
++tex;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Draw::SamplerState *DepalShaderCache::GetSampler() {
|
||||
if (!nearestSampler_) {
|
||||
Draw::SamplerStateDesc desc{};
|
||||
desc.wrapU = Draw::TextureAddressMode::CLAMP_TO_EDGE;
|
||||
desc.wrapV = Draw::TextureAddressMode::CLAMP_TO_EDGE;
|
||||
desc.wrapW = Draw::TextureAddressMode::CLAMP_TO_EDGE;
|
||||
nearestSampler_ = draw_->CreateSamplerState(desc);
|
||||
}
|
||||
return nearestSampler_;
|
||||
}
|
||||
|
||||
DepalShader *DepalShaderCache::GetDepalettizeShader(uint32_t clutMode, GEBufferFormat pixelFormat) {
|
||||
using namespace Draw;
|
||||
|
||||
u32 id = GenerateShaderID(clutMode, pixelFormat);
|
||||
|
||||
auto shader = cache_.find(id);
|
||||
if (shader != cache_.end()) {
|
||||
DepalShader *depal = shader->second;
|
||||
return shader->second;
|
||||
}
|
||||
|
||||
char *buffer = new char[4096];
|
||||
|
||||
if (!vertexShader_) {
|
||||
GenerateDepalVs(buffer, draw_->GetShaderLanguageDesc());
|
||||
vertexShader_ = draw_->CreateShaderModule(ShaderStage::Vertex, draw_->GetShaderLanguageDesc().shaderLanguage, (const uint8_t *)buffer, strlen(buffer), "depal_vs");
|
||||
}
|
||||
|
||||
// TODO: Parse these out of clutMode some nice way, to become a bit more stateless.
|
||||
DepalConfig config;
|
||||
config.clutFormat = gstate.getClutPaletteFormat();
|
||||
config.startPos = gstate.getClutIndexStartPos();
|
||||
config.shift = gstate.getClutIndexShift();
|
||||
config.mask = gstate.getClutIndexMask();
|
||||
config.pixelFormat = pixelFormat;
|
||||
|
||||
GenerateDepalFs(buffer, config, draw_->GetShaderLanguageDesc());
|
||||
|
||||
std::string src(buffer);
|
||||
ShaderModule *fragShader = draw_->CreateShaderModule(ShaderStage::Fragment, draw_->GetShaderLanguageDesc().shaderLanguage, (const uint8_t *)buffer, strlen(buffer), "depal_fs");
|
||||
|
||||
DepalShader *depal = new DepalShader();
|
||||
|
||||
static const InputLayoutDesc desc = {
|
||||
{
|
||||
{ 16, false },
|
||||
},
|
||||
{
|
||||
{ 0, SEM_POSITION, DataFormat::R32G32_FLOAT, 0 },
|
||||
{ 0, SEM_TEXCOORD0, DataFormat::R32G32_FLOAT, 8 },
|
||||
},
|
||||
};
|
||||
InputLayout *inputLayout = draw_->CreateInputLayout(desc);
|
||||
BlendState *blendOff = draw_->CreateBlendState({ false, 0xF });
|
||||
DepthStencilStateDesc dsDesc{};
|
||||
DepthStencilState *noDepthStencil = draw_->CreateDepthStencilState(dsDesc);
|
||||
RasterState *rasterNoCull = draw_->CreateRasterState({});
|
||||
|
||||
PipelineDesc depalPipelineDesc{
|
||||
Primitive::TRIANGLE_STRIP, // Could have use a single triangle too (in which case we'd use LIST here) but want to be prepared to do subrectangles.
|
||||
{ vertexShader_, fragShader },
|
||||
inputLayout, noDepthStencil, blendOff, rasterNoCull, nullptr, samplers
|
||||
};
|
||||
|
||||
Pipeline *pipeline = draw_->CreateGraphicsPipeline(depalPipelineDesc);
|
||||
|
||||
inputLayout->Release();
|
||||
blendOff->Release();
|
||||
noDepthStencil->Release();
|
||||
rasterNoCull->Release();
|
||||
|
||||
_assert_(pipeline);
|
||||
|
||||
depal->pipeline = pipeline;
|
||||
depal->fragShader = fragShader;
|
||||
depal->code = buffer;
|
||||
cache_[id] = depal;
|
||||
|
||||
delete[] buffer;
|
||||
return depal->pipeline ? depal : nullptr;
|
||||
}
|
||||
|
||||
std::vector<std::string> DepalShaderCache::DebugGetShaderIDs(DebugShaderType type) {
|
||||
std::vector<std::string> ids;
|
||||
for (auto &iter : cache_) {
|
||||
ids.push_back(StringFromFormat("%08x", iter.first));
|
||||
}
|
||||
return ids;
|
||||
}
|
||||
|
||||
std::string DepalShaderCache::DebugGetShaderString(std::string idstr, DebugShaderType type, DebugShaderStringType stringType) {
|
||||
uint32_t id;
|
||||
sscanf(idstr.c_str(), "%08x", &id);
|
||||
auto iter = cache_.find(id);
|
||||
if (iter == cache_.end())
|
||||
return "";
|
||||
switch (stringType) {
|
||||
case SHADER_STRING_SHORT_DESC:
|
||||
return idstr;
|
||||
case SHADER_STRING_SOURCE_CODE:
|
||||
return iter->second->code;
|
||||
default:
|
||||
return "";
|
||||
}
|
||||
}
|
@ -1,182 +0,0 @@
|
||||
// Copyright (c) 2014- PPSSPP Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0 or later versions.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official git repository and contact information can be found at
|
||||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Common/GPU/Shader.h"
|
||||
#include "Common/GPU/thin3d.h"
|
||||
#include "GPU/ge_constants.h"
|
||||
#include "GPU/Common/Draw2D.h"
|
||||
#include "GPU/Common/ShaderCommon.h"
|
||||
#include "GPU/Common/DepalettizeShaderCommon.h"
|
||||
|
||||
class DepalShader {
|
||||
public:
|
||||
Draw::ShaderModule *fragShader;
|
||||
Draw::Pipeline *pipeline;
|
||||
std::string code;
|
||||
};
|
||||
|
||||
class DepalTexture {
|
||||
public:
|
||||
Draw::Texture *texture;
|
||||
int lastFrame;
|
||||
};
|
||||
|
||||
// Caches both shaders and palette textures.
|
||||
class DepalShaderCache {
|
||||
public:
|
||||
DepalShaderCache(Draw::DrawContext *draw);
|
||||
~DepalShaderCache();
|
||||
|
||||
// This also uploads the palette and binds the correct texture.
|
||||
DepalShader *GetDepalettizeShader(uint32_t clutMode, GEBufferFormat pixelFormat);
|
||||
Draw::Texture *GetClutTexture(GEPaletteFormat clutFormat, const u32 clutHash, u32 *rawClut);
|
||||
|
||||
Draw::SamplerState *GetSampler();
|
||||
|
||||
void Clear();
|
||||
void Decimate();
|
||||
std::vector<std::string> DebugGetShaderIDs(DebugShaderType type);
|
||||
std::string DebugGetShaderString(std::string id, DebugShaderType type, DebugShaderStringType stringType);
|
||||
|
||||
void DeviceLost();
|
||||
void DeviceRestore(Draw::DrawContext *draw);
|
||||
|
||||
private:
|
||||
static uint32_t GenerateShaderID(uint32_t clutMode, GEBufferFormat pixelFormat) {
|
||||
return (clutMode & 0xFFFFFF) | (pixelFormat << 24);
|
||||
}
|
||||
|
||||
static uint32_t GetClutID(GEPaletteFormat clutFormat, uint32_t clutHash) {
|
||||
// Simplistic.
|
||||
return clutHash ^ (uint32_t)clutFormat;
|
||||
}
|
||||
|
||||
Draw::DrawContext *draw_;
|
||||
Draw::ShaderModule *vertexShader_ = nullptr;
|
||||
Draw::SamplerState *nearestSampler_ = nullptr;
|
||||
|
||||
std::map<u32, DepalShader *> cache_;
|
||||
std::map<u32, DepalTexture *> texCache_;
|
||||
};
|
||||
|
||||
// TODO: Merge with DepalShaderCache?
|
||||
class TextureShaderApplier {
|
||||
public:
|
||||
struct Pos {
|
||||
float x;
|
||||
float y;
|
||||
};
|
||||
struct UV {
|
||||
float u;
|
||||
float v;
|
||||
};
|
||||
|
||||
TextureShaderApplier(Draw::DrawContext *draw, DepalShader *shader, float bufferW, float bufferH, int renderW, int renderH)
|
||||
: draw_(draw), shader_(shader), bufferW_(bufferW), bufferH_(bufferH), renderW_(renderW), renderH_(renderH) {
|
||||
static const Pos pos[4] = {
|
||||
{-1, -1 },
|
||||
{ 1, -1 },
|
||||
{-1, 1 },
|
||||
{ 1, 1 },
|
||||
};
|
||||
memcpy(pos_, pos, sizeof(pos_));
|
||||
|
||||
static const UV uv[4] = {
|
||||
{ 0, 0 },
|
||||
{ 1, 0 },
|
||||
{ 0, 1 },
|
||||
{ 1, 1 },
|
||||
};
|
||||
memcpy(uv_, uv, sizeof(uv_));
|
||||
}
|
||||
|
||||
void ApplyBounds(const KnownVertexBounds &bounds, u32 uoff, u32 voff) {
|
||||
// If min is not < max, then we don't have values (wasn't set during decode.)
|
||||
if (bounds.minV < bounds.maxV) {
|
||||
const float invWidth = 1.0f / bufferW_;
|
||||
const float invHeight = 1.0f / bufferH_;
|
||||
// Inverse of half = double.
|
||||
const float invHalfWidth = invWidth * 2.0f;
|
||||
const float invHalfHeight = invHeight * 2.0f;
|
||||
|
||||
const int u1 = bounds.minU + uoff;
|
||||
const int v1 = bounds.minV + voff;
|
||||
const int u2 = bounds.maxU + uoff;
|
||||
const int v2 = bounds.maxV + voff;
|
||||
|
||||
const float left = u1 * invHalfWidth - 1.0f;
|
||||
const float right = u2 * invHalfWidth - 1.0f;
|
||||
const float top = v1 * invHalfHeight - 1.0f;
|
||||
const float bottom = v2 * invHalfHeight - 1.0f;
|
||||
// Points are: BL, BR, TR, TL.
|
||||
pos_[0] = Pos{ left, bottom };
|
||||
pos_[1] = Pos{ right, bottom };
|
||||
pos_[2] = Pos{ left, top };
|
||||
pos_[3] = Pos{ right, top };
|
||||
|
||||
// And also the UVs, same order.
|
||||
const float uvleft = u1 * invWidth;
|
||||
const float uvright = u2 * invWidth;
|
||||
const float uvtop = v1 * invHeight;
|
||||
const float uvbottom = v2 * invHeight;
|
||||
uv_[0] = UV{ uvleft, uvbottom };
|
||||
uv_[1] = UV{ uvright, uvbottom };
|
||||
uv_[2] = UV{ uvleft, uvtop };
|
||||
uv_[3] = UV{ uvright, uvtop };
|
||||
|
||||
// We need to reapply the texture next time since we cropped UV.
|
||||
gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
|
||||
}
|
||||
}
|
||||
|
||||
void Use() {
|
||||
draw_->BindPipeline(shader_->pipeline);
|
||||
struct SimpleVertex {
|
||||
float pos[2];
|
||||
float uv[2];
|
||||
};
|
||||
for (int i = 0; i < 4; i++) {
|
||||
memcpy(&verts_[i].x, &pos_[i], sizeof(Pos));
|
||||
memcpy(&verts_[i].u, &uv_[i], sizeof(UV));
|
||||
}
|
||||
}
|
||||
|
||||
void Shade() {
|
||||
Draw::Viewport vp{ 0.0f, 0.0f, (float)renderW_, (float)renderH_, 0.0f, 1.0f };
|
||||
draw_->SetViewports(1, &vp);
|
||||
draw_->SetScissorRect(0, 0, renderW_, renderH_);
|
||||
draw_->DrawUP((const uint8_t *)verts_, 4);
|
||||
}
|
||||
|
||||
protected:
|
||||
Draw::DrawContext *draw_;
|
||||
DepalShader *shader_;
|
||||
Pos pos_[4];
|
||||
UV uv_[4];
|
||||
Draw2DVertex verts_[4];
|
||||
float bufferW_;
|
||||
float bufferH_;
|
||||
int renderW_;
|
||||
int renderH_;
|
||||
};
|
@ -27,14 +27,12 @@
|
||||
#include "GPU/Common/GPUStateUtils.h"
|
||||
#include "GPU/Common/DepalettizeShaderCommon.h"
|
||||
|
||||
#define WRITE p+=sprintf
|
||||
|
||||
static const InputDef vsInputs[2] = {
|
||||
{ "vec2", "a_position", Draw::SEM_POSITION, },
|
||||
{ "vec2", "a_texcoord0", Draw::SEM_TEXCOORD0, },
|
||||
};
|
||||
|
||||
// TODO: Deduplicate with DepalettizeCommon.cpp
|
||||
// TODO: Deduplicate with TextureShaderCommon.cpp
|
||||
static const SamplerDef samplers[2] = {
|
||||
{ "tex" },
|
||||
{ "pal" },
|
||||
@ -44,12 +42,12 @@ static const VaryingDef varyings[1] = {
|
||||
{ "vec2", "v_texcoord", Draw::SEM_TEXCOORD0, 0, "highp" },
|
||||
};
|
||||
|
||||
// Uses integer instructions available since OpenGL 3.0. Suitable for ES 3.0 as well.
|
||||
void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config, const ShaderLanguageDesc &lang) {
|
||||
// Uses integer instructions available since OpenGL 3.0, ES 3.0 (and 2.0 with extensions), and of course Vulkan and D3D11.
|
||||
void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config) {
|
||||
const int shift = config.shift;
|
||||
const int mask = config.mask;
|
||||
|
||||
if (config.pixelFormat == GE_FORMAT_DEPTH16) {
|
||||
if (config.bufferFormat == GE_FORMAT_DEPTH16) {
|
||||
DepthScaleFactors factors = GetDepthScaleFactors();
|
||||
writer.ConstFloat("z_scale", factors.scale);
|
||||
writer.ConstFloat("z_offset", factors.offset);
|
||||
@ -71,7 +69,7 @@ void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config, con
|
||||
writer.C(" vec4 color = ").SampleTexture2D("tex", "v_texcoord").C(";\n");
|
||||
|
||||
int shiftedMask = mask << shift;
|
||||
switch (config.pixelFormat) {
|
||||
switch (config.bufferFormat) {
|
||||
case GE_FORMAT_8888:
|
||||
if (shiftedMask & 0xFF) writer.C(" int r = int(color.r * 255.99);\n"); else writer.C(" int r = 0;\n");
|
||||
if (shiftedMask & 0xFF00) writer.C(" int g = int(color.g * 255.99);\n"); else writer.C(" int g = 0;\n");
|
||||
@ -102,6 +100,17 @@ void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config, con
|
||||
case GE_FORMAT_DEPTH16:
|
||||
// Remap depth buffer.
|
||||
writer.C(" float depth = (color.x - z_offset) * z_scale;\n");
|
||||
|
||||
if (config.bufferFormat == GE_FORMAT_DEPTH16 && config.textureFormat == GE_TFMT_5650) {
|
||||
// Convert depth to 565, without going through a CLUT.
|
||||
writer.C(" int idepth = int(clamp(depth, 0.0, 65535.0));\n");
|
||||
writer.C(" float r = (idepth & 31) / 31.0f;\n");
|
||||
writer.C(" float g = ((idepth >> 5) & 63) / 63.0f;\n");
|
||||
writer.C(" float b = ((idepth >> 11) & 31) / 31.0f;\n");
|
||||
writer.C(" vec4 outColor = vec4(r, g, b, 1.0);\n");
|
||||
return;
|
||||
}
|
||||
|
||||
writer.C(" int index = int(clamp(depth, 0.0, 65535.0));\n");
|
||||
break;
|
||||
default:
|
||||
@ -128,23 +137,25 @@ void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config, con
|
||||
writer.C(" vec4 outColor = ").SampleTexture2D("pal", "uv").C(";\n");
|
||||
}
|
||||
|
||||
// FP only, to suit GL(ES) 2.0
|
||||
void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config, const ShaderLanguageDesc &lang) {
|
||||
// FP only, to suit GL(ES) 2.0 and DX9
|
||||
void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config) {
|
||||
char lookupMethod[128] = "index.r";
|
||||
|
||||
const int shift = config.shift;
|
||||
const int mask = config.mask;
|
||||
|
||||
if (config.pixelFormat == GE_FORMAT_DEPTH16) {
|
||||
if (config.bufferFormat == GE_FORMAT_DEPTH16) {
|
||||
DepthScaleFactors factors = GetDepthScaleFactors();
|
||||
writer.ConstFloat("z_scale", factors.scale);
|
||||
writer.ConstFloat("z_offset", factors.offset);
|
||||
}
|
||||
|
||||
writer.C(" vec4 index = ").SampleTexture2D("tex", "v_texcoord").C(";\n");
|
||||
|
||||
float index_multiplier = 1.0f;
|
||||
// pixelformat is the format of the texture we are sampling.
|
||||
bool formatOK = true;
|
||||
switch (config.pixelFormat) {
|
||||
switch (config.bufferFormat) {
|
||||
case GE_FORMAT_8888:
|
||||
if ((mask & (mask + 1)) == 0) {
|
||||
// If the value has all bits contiguous (bitmask check above), we can mod by it + 1.
|
||||
@ -222,6 +233,19 @@ void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config, c
|
||||
case GE_FORMAT_DEPTH16:
|
||||
{
|
||||
// TODO: I think we can handle most scenarios here, but texturing from depth buffers requires an extension on ES 2.0 anyway.
|
||||
// Not on D3D9 though, so this path is still relevant.
|
||||
|
||||
if (config.bufferFormat == GE_FORMAT_DEPTH16 && config.textureFormat == GE_TFMT_5650) {
|
||||
// Convert depth to 565, without going through a CLUT.
|
||||
writer.C(" float depth = (index.x - z_offset) * z_scale;\n");
|
||||
writer.C(" float idepth = floor(clamp(depth, 0.0, 65535.0));\n");
|
||||
writer.C(" float r = mod(idepth, 32.0) / 31.0f;\n");
|
||||
writer.C(" float g = mod(floor(idepth / 32.0), 64.0) / 63.0f;\n");
|
||||
writer.C(" float b = mod(floor(idepth / 2048.0), 32.0) / 31.0f;\n");
|
||||
writer.C(" vec4 outColor = vec4(r, g, b, 1.0);\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (shift < 16) {
|
||||
index_multiplier = 1.0f / (float)(1 << shift);
|
||||
truncate_cpy(lookupMethod, "((index.x - z_offset) * z_scale)");
|
||||
@ -249,7 +273,7 @@ void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config, c
|
||||
// index_multiplier -= 0.01f / texturePixels;
|
||||
|
||||
if (!formatOK) {
|
||||
ERROR_LOG_REPORT_ONCE(depal, G3D, "%i depal unsupported: shift=%i mask=%02x offset=%d", config.pixelFormat, shift, mask, config.startPos);
|
||||
ERROR_LOG_REPORT_ONCE(depal, G3D, "%s depal unsupported: shift=%d mask=%02x offset=%d", GeBufferFormatToString(config.bufferFormat), shift, mask, config.startPos);
|
||||
}
|
||||
|
||||
// Offset by half a texel (plus clutBase) to turn NEAREST filtering into FLOOR.
|
||||
@ -258,38 +282,67 @@ void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config, c
|
||||
char offset[128] = "";
|
||||
sprintf(offset, " + %f", texel_offset);
|
||||
|
||||
writer.C(" vec4 index = ").SampleTexture2D("tex", "v_texcoord").C(";\n");
|
||||
writer.F(" float coord = (%s * %f)%s;\n", lookupMethod, index_multiplier, offset);
|
||||
writer.C(" vec4 outColor = ").SampleTexture2D("pal", "vec2(coord, 0.0)").C(";\n");
|
||||
}
|
||||
|
||||
void GenerateDepalFs(char *buffer, const DepalConfig &config, const ShaderLanguageDesc &lang) {
|
||||
ShaderWriter writer(buffer, lang, ShaderStage::Fragment);
|
||||
void GenerateDepalSmoothed(ShaderWriter &writer, const DepalConfig &config) {
|
||||
const char *sourceChannel = "error";
|
||||
float indexMultiplier = 32.0f;
|
||||
|
||||
if (config.bufferFormat == GE_FORMAT_5551) {
|
||||
_dbg_assert_(config.mask == 0x1F);
|
||||
switch (config.shift) {
|
||||
case 0: sourceChannel = "r"; break;
|
||||
case 5: sourceChannel = "g"; break;
|
||||
case 10: sourceChannel = "b"; break;
|
||||
default: _dbg_assert_(false);
|
||||
}
|
||||
} else if (config.bufferFormat == GE_FORMAT_565) {
|
||||
_dbg_assert_(config.mask == 0x1F || config.mask == 0x3F);
|
||||
switch (config.shift) {
|
||||
case 0: sourceChannel = "r"; break;
|
||||
case 5: sourceChannel = "g"; indexMultiplier = 64.0f; break;
|
||||
case 11: sourceChannel = "b"; break;
|
||||
default: _dbg_assert_(false);
|
||||
}
|
||||
} else {
|
||||
_dbg_assert_(false);
|
||||
}
|
||||
|
||||
writer.C(" float index = ").SampleTexture2D("tex", "v_texcoord").F(".%s * %0.1f;\n", sourceChannel, indexMultiplier);
|
||||
|
||||
float texturePixels = 256.f;
|
||||
if (config.clutFormat != GE_CMODE_32BIT_ABGR8888) {
|
||||
texturePixels = 512.f;
|
||||
}
|
||||
|
||||
writer.F(" float coord = (index + 0.5) * %f;\n", 1.0 / texturePixels);
|
||||
writer.C(" vec4 outColor = ").SampleTexture2D("pal", "vec2(coord, 0.0)").C(";\n");
|
||||
}
|
||||
|
||||
void GenerateDepalFs(ShaderWriter &writer, const DepalConfig &config) {
|
||||
writer.DeclareSamplers(samplers);
|
||||
writer.HighPrecisionFloat();
|
||||
writer.BeginFSMain(Slice<UniformDef>::empty(), varyings, FSFLAG_NONE);
|
||||
switch (lang.shaderLanguage) {
|
||||
case HLSL_D3D9:
|
||||
case GLSL_1xx:
|
||||
GenerateDepalShaderFloat(writer, config, lang);
|
||||
break;
|
||||
case GLSL_VULKAN:
|
||||
case GLSL_3xx:
|
||||
case HLSL_D3D11:
|
||||
GenerateDepalShader300(writer, config, lang);
|
||||
break;
|
||||
default:
|
||||
_assert_msg_(false, "Depal shader language not supported: %d", (int)lang.shaderLanguage);
|
||||
if (config.smoothedDepal) {
|
||||
// Handles a limited set of cases, but doesn't need any integer math so we don't
|
||||
// need two variants.
|
||||
GenerateDepalSmoothed(writer, config);
|
||||
} else {
|
||||
switch (writer.Lang().shaderLanguage) {
|
||||
case HLSL_D3D9:
|
||||
case GLSL_1xx:
|
||||
GenerateDepalShaderFloat(writer, config);
|
||||
break;
|
||||
case GLSL_VULKAN:
|
||||
case GLSL_3xx:
|
||||
case HLSL_D3D11:
|
||||
GenerateDepalShader300(writer, config);
|
||||
break;
|
||||
default:
|
||||
_assert_msg_(false, "Shader language not supported for depal: %d", (int)writer.Lang().shaderLanguage);
|
||||
}
|
||||
}
|
||||
writer.EndFSMain("outColor", FSFLAG_NONE);
|
||||
}
|
||||
|
||||
void GenerateDepalVs(char *buffer, const ShaderLanguageDesc &lang) {
|
||||
ShaderWriter writer(buffer, lang, ShaderStage::Vertex, nullptr, 0);
|
||||
writer.BeginVSMain(vsInputs, Slice<UniformDef>::empty(), varyings);
|
||||
writer.C(" v_texcoord = a_texcoord0;\n");
|
||||
writer.C(" gl_Position = vec4(a_position, 0.0, 1.0);\n");
|
||||
writer.EndVSMain(varyings);
|
||||
}
|
||||
|
||||
#undef WRITE
|
||||
|
@ -22,6 +22,8 @@
|
||||
#include "Common/GPU/Shader.h"
|
||||
#include "GPU/ge_constants.h"
|
||||
|
||||
class ShaderWriter;
|
||||
|
||||
static const int DEPAL_TEXTURE_OLD_AGE = 120;
|
||||
|
||||
struct DepalConfig {
|
||||
@ -29,8 +31,9 @@ struct DepalConfig {
|
||||
int shift;
|
||||
u32 startPos;
|
||||
GEPaletteFormat clutFormat;
|
||||
GEBufferFormat pixelFormat;
|
||||
GETextureFormat textureFormat;
|
||||
GEBufferFormat bufferFormat;
|
||||
bool smoothedDepal;
|
||||
};
|
||||
|
||||
void GenerateDepalFs(char *buffer, const DepalConfig &config, const ShaderLanguageDesc &lang);
|
||||
void GenerateDepalVs(char *buffer, const ShaderLanguageDesc &lang);
|
||||
void GenerateDepalFs(ShaderWriter &writer, const DepalConfig &config);
|
||||
|
@ -25,6 +25,7 @@
|
||||
#include "GPU/Common/DrawEngineCommon.h"
|
||||
#include "GPU/Common/FramebufferManagerCommon.h"
|
||||
#include "GPU/Common/TextureCacheCommon.h"
|
||||
#include "GPU/Common/GPUStateUtils.h"
|
||||
|
||||
static const InputDef inputs[2] = {
|
||||
{ "vec2", "a_position", Draw::SEM_POSITION },
|
||||
@ -39,19 +40,88 @@ static const SamplerDef samplers[1] = {
|
||||
{ "tex" },
|
||||
};
|
||||
|
||||
void GenerateDraw2DFs(ShaderWriter &writer) {
|
||||
static const UniformDef uniforms[2] = {
|
||||
{ "vec2", "texSize", 0 },
|
||||
{ "float", "scaleFactor", 1},
|
||||
};
|
||||
|
||||
struct Draw2DUB {
|
||||
float texSizeX;
|
||||
float texSizeY;
|
||||
float scaleFactor;
|
||||
};
|
||||
|
||||
const UniformBufferDesc draw2DUBDesc{ sizeof(Draw2DUB), {
|
||||
{ "texSize", -1, 0, UniformType::FLOAT2, 0 },
|
||||
{ "scaleFactor", -1, 1, UniformType::FLOAT1, 0 },
|
||||
} };
|
||||
|
||||
|
||||
Draw2DPipelineInfo GenerateDraw2DCopyColorFs(ShaderWriter &writer) {
|
||||
writer.DeclareSamplers(samplers);
|
||||
writer.BeginFSMain(Slice<UniformDef>::empty(), varyings, FSFLAG_NONE);
|
||||
writer.C(" vec4 outColor = ").SampleTexture2D("tex", "v_texcoord.xy").C(";\n");
|
||||
writer.EndFSMain("outColor", FSFLAG_NONE);
|
||||
|
||||
return Draw2DPipelineInfo{
|
||||
RASTER_COLOR,
|
||||
RASTER_COLOR,
|
||||
};
|
||||
}
|
||||
|
||||
void GenerateDraw2DDepthFs(ShaderWriter &writer) {
|
||||
Draw2DPipelineInfo GenerateDraw2DCopyDepthFs(ShaderWriter &writer) {
|
||||
writer.DeclareSamplers(samplers);
|
||||
writer.BeginFSMain(Slice<UniformDef>::empty(), varyings, FSFLAG_WRITEDEPTH);
|
||||
writer.C(" vec4 outColor = vec4(0.0, 0.0, 0.0, 0.0);\n");
|
||||
writer.C(" gl_FragDepth = ").SampleTexture2D("tex", "v_texcoord.xy").C(".x;\n");
|
||||
writer.EndFSMain("outColor", FSFLAG_WRITEDEPTH);
|
||||
|
||||
return Draw2DPipelineInfo{
|
||||
RASTER_DEPTH,
|
||||
RASTER_DEPTH,
|
||||
};
|
||||
}
|
||||
|
||||
Draw2DPipelineInfo GenerateDraw2D565ToDepthFs(ShaderWriter &writer) {
|
||||
writer.DeclareSamplers(samplers);
|
||||
writer.BeginFSMain(Slice<UniformDef>::empty(), varyings, FSFLAG_WRITEDEPTH);
|
||||
writer.C(" vec4 outColor = vec4(0.0, 0.0, 0.0, 0.0);\n");
|
||||
// Unlike when just copying a depth buffer, here we're generating new depth values so we'll
|
||||
// have to apply the scaling.
|
||||
DepthScaleFactors factors = GetDepthScaleFactors();
|
||||
writer.C(" vec3 rgb = ").SampleTexture2D("tex", "v_texcoord.xy").C(".xyz;\n");
|
||||
writer.F(" highp float depthValue = (floor(rgb.x * 31.99) + floor(rgb.y * 63.99) * 32.0 + floor(rgb.z * 31.99) * 2048.0); \n");
|
||||
writer.F(" gl_FragDepth = (depthValue / %f) + %f;\n", factors.scale, factors.offset);
|
||||
writer.EndFSMain("outColor", FSFLAG_WRITEDEPTH);
|
||||
|
||||
return Draw2DPipelineInfo{
|
||||
RASTER_COLOR,
|
||||
RASTER_DEPTH,
|
||||
};
|
||||
}
|
||||
|
||||
Draw2DPipelineInfo GenerateDraw2D565ToDepthDeswizzleFs(ShaderWriter &writer) {
|
||||
writer.DeclareSamplers(samplers);
|
||||
writer.BeginFSMain(uniforms, varyings, FSFLAG_WRITEDEPTH);
|
||||
writer.C(" vec4 outColor = vec4(0.0, 0.0, 0.0, 0.0);\n");
|
||||
// Unlike when just copying a depth buffer, here we're generating new depth values so we'll
|
||||
// have to apply the scaling.
|
||||
DepthScaleFactors factors = GetDepthScaleFactors();
|
||||
writer.C(" vec2 tsize = texSize;\n");
|
||||
writer.C(" vec2 coord = v_texcoord * tsize;\n");
|
||||
writer.F(" float strip = 4.0 * scaleFactor;\n");
|
||||
writer.C(" float in_strip = mod(coord.y, strip);\n");
|
||||
writer.C(" coord.y = coord.y - in_strip + strip - in_strip;\n");
|
||||
writer.C(" coord /= tsize;\n");
|
||||
writer.C(" vec3 rgb = ").SampleTexture2D("tex", "coord").C(".xyz;\n");
|
||||
writer.F(" highp float depthValue = (floor(rgb.x * 31.99) + floor(rgb.y * 63.99) * 32.0 + floor(rgb.z * 31.99) * 2048.0); \n");
|
||||
writer.F(" gl_FragDepth = (depthValue / %f) + %f;\n", factors.scale, factors.offset);
|
||||
writer.EndFSMain("outColor", FSFLAG_WRITEDEPTH);
|
||||
|
||||
return Draw2DPipelineInfo{
|
||||
RASTER_COLOR,
|
||||
RASTER_DEPTH
|
||||
};
|
||||
}
|
||||
|
||||
void GenerateDraw2DVS(ShaderWriter &writer) {
|
||||
@ -63,7 +133,24 @@ void GenerateDraw2DVS(ShaderWriter &writer) {
|
||||
writer.EndVSMain(varyings);
|
||||
}
|
||||
|
||||
void FramebufferManagerCommon::Ensure2DResources() {
|
||||
template <typename T>
|
||||
static void DoRelease(T *&obj) {
|
||||
if (obj)
|
||||
obj->Release();
|
||||
obj = nullptr;
|
||||
}
|
||||
|
||||
void Draw2D::DeviceLost() {
|
||||
DoRelease(draw2DVs_);
|
||||
DoRelease(draw2DSamplerLinear_);
|
||||
DoRelease(draw2DSamplerNearest_);
|
||||
}
|
||||
|
||||
void Draw2D::DeviceRestore(Draw::DrawContext *draw) {
|
||||
|
||||
}
|
||||
|
||||
void Draw2D::Ensure2DResources() {
|
||||
using namespace Draw;
|
||||
|
||||
const ShaderLanguageDesc &shaderLanguageDesc = draw_->GetShaderLanguageDesc();
|
||||
@ -84,6 +171,7 @@ void FramebufferManagerCommon::Ensure2DResources() {
|
||||
descLinear.mipFilter = TextureFilter::LINEAR;
|
||||
descLinear.wrapU = TextureAddressMode::CLAMP_TO_EDGE;
|
||||
descLinear.wrapV = TextureAddressMode::CLAMP_TO_EDGE;
|
||||
descLinear.wrapW = TextureAddressMode::CLAMP_TO_EDGE;
|
||||
draw2DSamplerLinear_ = draw_->CreateSamplerState(descLinear);
|
||||
}
|
||||
|
||||
@ -94,20 +182,22 @@ void FramebufferManagerCommon::Ensure2DResources() {
|
||||
descNearest.mipFilter = TextureFilter::NEAREST;
|
||||
descNearest.wrapU = TextureAddressMode::CLAMP_TO_EDGE;
|
||||
descNearest.wrapV = TextureAddressMode::CLAMP_TO_EDGE;
|
||||
descNearest.wrapW = TextureAddressMode::CLAMP_TO_EDGE;
|
||||
draw2DSamplerNearest_ = draw_->CreateSamplerState(descNearest);
|
||||
}
|
||||
}
|
||||
|
||||
Draw::Pipeline *FramebufferManagerCommon::Create2DPipeline(void (*generate)(ShaderWriter &)) {
|
||||
Draw2DPipeline *Draw2D::Create2DPipeline(std::function<Draw2DPipelineInfo (ShaderWriter &)> generate) {
|
||||
Ensure2DResources();
|
||||
|
||||
using namespace Draw;
|
||||
const ShaderLanguageDesc &shaderLanguageDesc = draw_->GetShaderLanguageDesc();
|
||||
|
||||
char *fsCode = new char[4000];
|
||||
ShaderWriter writer(fsCode, shaderLanguageDesc, ShaderStage::Fragment);
|
||||
generate(writer);
|
||||
Draw2DPipelineInfo info = generate(writer);
|
||||
|
||||
ShaderModule *fs = draw_->CreateShaderModule(ShaderStage::Fragment, shaderLanguageDesc.shaderLanguage, (const uint8_t *)fsCode, strlen(fsCode), "draw2d_fs");
|
||||
delete[] fsCode;
|
||||
|
||||
_assert_(fs);
|
||||
|
||||
@ -123,22 +213,24 @@ Draw::Pipeline *FramebufferManagerCommon::Create2DPipeline(void (*generate)(Shad
|
||||
};
|
||||
InputLayout *inputLayout = draw_->CreateInputLayout(desc);
|
||||
|
||||
BlendState *blendOff = draw_->CreateBlendState({ false, 0xF });
|
||||
BlendState *blendDiscard = draw_->CreateBlendState({ false, 0x0 });
|
||||
BlendState *blend = draw_->CreateBlendState({ false, info.writeChannel == RASTER_COLOR ? 0xF : 0 });
|
||||
|
||||
DepthStencilState *noDepthStencil = draw_->CreateDepthStencilState(DepthStencilStateDesc{});
|
||||
DepthStencilStateDesc dsDesc{};
|
||||
if (info.writeChannel == RASTER_DEPTH) {
|
||||
dsDesc.depthTestEnabled = true;
|
||||
dsDesc.depthWriteEnabled = true;
|
||||
dsDesc.depthCompare = Draw::Comparison::ALWAYS;
|
||||
}
|
||||
|
||||
DepthStencilState *depthStencil = draw_->CreateDepthStencilState(dsDesc);
|
||||
RasterState *rasterNoCull = draw_->CreateRasterState({});
|
||||
|
||||
DepthStencilStateDesc dsWriteDesc{};
|
||||
dsWriteDesc.depthTestEnabled = true;
|
||||
dsWriteDesc.depthWriteEnabled = true;
|
||||
dsWriteDesc.depthCompare = Draw::Comparison::ALWAYS;
|
||||
DepthStencilState *depthWriteAlways = draw_->CreateDepthStencilState(dsWriteDesc);
|
||||
|
||||
PipelineDesc pipelineDesc{
|
||||
Primitive::TRIANGLE_STRIP,
|
||||
{ draw2DVs_, fs },
|
||||
inputLayout, noDepthStencil, blendOff, rasterNoCull, nullptr,
|
||||
inputLayout,
|
||||
depthStencil,
|
||||
blend, rasterNoCull, &draw2DUBDesc,
|
||||
};
|
||||
|
||||
Draw::Pipeline *pipeline = draw_->CreateGraphicsPipeline(pipelineDesc);
|
||||
@ -146,45 +238,97 @@ Draw::Pipeline *FramebufferManagerCommon::Create2DPipeline(void (*generate)(Shad
|
||||
fs->Release();
|
||||
|
||||
rasterNoCull->Release();
|
||||
blendOff->Release();
|
||||
blendDiscard->Release();
|
||||
noDepthStencil->Release();
|
||||
depthWriteAlways->Release();
|
||||
blend->Release();
|
||||
depthStencil->Release();
|
||||
inputLayout->Release();
|
||||
|
||||
return pipeline;
|
||||
return new Draw2DPipeline {
|
||||
pipeline,
|
||||
info,
|
||||
fsCode,
|
||||
};
|
||||
}
|
||||
|
||||
void FramebufferManagerCommon::DrawStrip2D(Draw::Texture *tex, Draw2DVertex *verts, int vertexCount, bool linearFilter, RasterChannel channel) {
|
||||
|
||||
void Draw2D::DrawStrip2D(Draw::Texture *tex, Draw2DVertex *verts, int vertexCount, bool linearFilter, Draw2DPipeline *pipeline, float texW, float texH, int scaleFactor) {
|
||||
using namespace Draw;
|
||||
|
||||
Ensure2DResources();
|
||||
_dbg_assert_(pipeline);
|
||||
|
||||
const ShaderLanguageDesc &shaderLanguageDesc = draw_->GetShaderLanguageDesc();
|
||||
if (pipeline->info.writeChannel == RASTER_DEPTH) {
|
||||
_dbg_assert_(draw_->GetDeviceCaps().fragmentShaderDepthWriteSupported);
|
||||
|
||||
switch (channel) {
|
||||
case RASTER_COLOR:
|
||||
if (!draw2DPipelineColor_) {
|
||||
draw2DPipelineColor_ = Create2DPipeline(&GenerateDraw2DFs);
|
||||
}
|
||||
draw_->BindPipeline(draw2DPipelineColor_);
|
||||
break;
|
||||
|
||||
case RASTER_DEPTH:
|
||||
if (!draw_->GetDeviceCaps().fragmentShaderDepthWriteSupported) {
|
||||
// Can't do it
|
||||
return;
|
||||
}
|
||||
if (!draw2DPipelineDepth_) {
|
||||
draw2DPipelineDepth_ = Create2DPipeline(&GenerateDraw2DFs);
|
||||
}
|
||||
draw_->BindPipeline(draw2DPipelineDepth_);
|
||||
break;
|
||||
// We don't filter inputs when writing depth, results will be bad.
|
||||
linearFilter = false;
|
||||
}
|
||||
|
||||
Draw2DUB ub;
|
||||
ub.texSizeX = tex ? tex->Width() : texW;
|
||||
ub.texSizeY = tex ? tex->Height() : texH;
|
||||
ub.scaleFactor = (float)scaleFactor;
|
||||
|
||||
draw_->BindPipeline(pipeline->pipeline);
|
||||
draw_->UpdateDynamicUniformBuffer(&ub, sizeof(ub));
|
||||
|
||||
if (tex) {
|
||||
draw_->BindTextures(TEX_SLOT_PSP_TEXTURE, 1, &tex);
|
||||
}
|
||||
draw_->BindSamplerStates(TEX_SLOT_PSP_TEXTURE, 1, linearFilter ? &draw2DSamplerLinear_ : &draw2DSamplerNearest_);
|
||||
draw_->DrawUP(verts, vertexCount);
|
||||
|
||||
draw_->InvalidateCachedState();
|
||||
|
||||
gstate_c.Dirty(DIRTY_FRAGMENTSHADER_STATE | DIRTY_VERTEXSHADER_STATE);
|
||||
}
|
||||
|
||||
Draw2DPipeline *FramebufferManagerCommon::Get2DPipeline(Draw2DShader shader) {
|
||||
using namespace Draw;
|
||||
|
||||
const ShaderLanguageDesc &shaderLanguageDesc = draw_->GetShaderLanguageDesc();
|
||||
|
||||
Draw2DPipeline *pipeline = nullptr;
|
||||
|
||||
switch (shader) {
|
||||
case DRAW2D_COPY_COLOR:
|
||||
if (!draw2DPipelineColor_) {
|
||||
draw2DPipelineColor_ = draw2D_.Create2DPipeline(&GenerateDraw2DCopyColorFs);
|
||||
}
|
||||
pipeline = draw2DPipelineColor_;
|
||||
break;
|
||||
|
||||
case DRAW2D_COPY_DEPTH:
|
||||
if (!draw_->GetDeviceCaps().fragmentShaderDepthWriteSupported) {
|
||||
// Can't do it
|
||||
return nullptr;
|
||||
}
|
||||
if (!draw2DPipelineDepth_) {
|
||||
draw2DPipelineDepth_ = draw2D_.Create2DPipeline(&GenerateDraw2DCopyDepthFs);
|
||||
}
|
||||
pipeline = draw2DPipelineDepth_;
|
||||
break;
|
||||
|
||||
case DRAW2D_565_TO_DEPTH:
|
||||
if (!draw_->GetDeviceCaps().fragmentShaderDepthWriteSupported) {
|
||||
// Can't do it
|
||||
return nullptr;
|
||||
}
|
||||
if (!draw2DPipeline565ToDepth_) {
|
||||
draw2DPipeline565ToDepth_ = draw2D_.Create2DPipeline(&GenerateDraw2D565ToDepthFs);
|
||||
}
|
||||
pipeline = draw2DPipeline565ToDepth_;
|
||||
break;
|
||||
|
||||
case DRAW2D_565_TO_DEPTH_DESWIZZLE:
|
||||
if (!draw_->GetDeviceCaps().fragmentShaderDepthWriteSupported) {
|
||||
// Can't do it
|
||||
return nullptr;
|
||||
}
|
||||
if (!draw2DPipeline565ToDepthDeswizzle_) {
|
||||
draw2DPipeline565ToDepthDeswizzle_ = draw2D_.Create2DPipeline(&GenerateDraw2D565ToDepthDeswizzleFs);
|
||||
}
|
||||
pipeline = draw2DPipeline565ToDepthDeswizzle_;
|
||||
break;
|
||||
}
|
||||
|
||||
return pipeline;
|
||||
}
|
||||
|
@ -1,5 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "GPU/GPU.h"
|
||||
|
||||
// For framebuffer copies and similar things that just require passthrough.
|
||||
struct Draw2DVertex {
|
||||
float x;
|
||||
@ -7,3 +9,60 @@ struct Draw2DVertex {
|
||||
float u;
|
||||
float v;
|
||||
};
|
||||
|
||||
enum Draw2DShader {
|
||||
DRAW2D_COPY_COLOR,
|
||||
DRAW2D_COPY_DEPTH,
|
||||
DRAW2D_565_TO_DEPTH,
|
||||
DRAW2D_565_TO_DEPTH_DESWIZZLE,
|
||||
};
|
||||
|
||||
inline RasterChannel Draw2DSourceChannel(Draw2DShader shader) {
|
||||
switch (shader) {
|
||||
case DRAW2D_COPY_DEPTH:
|
||||
return RASTER_DEPTH;
|
||||
case DRAW2D_COPY_COLOR:
|
||||
case DRAW2D_565_TO_DEPTH:
|
||||
case DRAW2D_565_TO_DEPTH_DESWIZZLE:
|
||||
default:
|
||||
return RASTER_COLOR;
|
||||
}
|
||||
}
|
||||
|
||||
struct Draw2DPipelineInfo {
|
||||
RasterChannel readChannel;
|
||||
RasterChannel writeChannel;
|
||||
bool secondTexture;
|
||||
};
|
||||
|
||||
struct Draw2DPipeline {
|
||||
Draw::Pipeline *pipeline;
|
||||
Draw2DPipelineInfo info;
|
||||
char *code;
|
||||
void Release() {
|
||||
pipeline->Release();
|
||||
delete[] code;
|
||||
delete this;
|
||||
}
|
||||
};
|
||||
|
||||
class ShaderWriter;
|
||||
|
||||
class Draw2D {
|
||||
public:
|
||||
Draw2D(Draw::DrawContext *draw) : draw_(draw) {}
|
||||
void DeviceLost();
|
||||
void DeviceRestore(Draw::DrawContext *draw);
|
||||
|
||||
Draw2DPipeline *Create2DPipeline(std::function<Draw2DPipelineInfo(ShaderWriter &)> generate);
|
||||
|
||||
void DrawStrip2D(Draw::Texture *tex, Draw2DVertex *verts, int vertexCount, bool linearFilter, Draw2DPipeline *pipeline, float texW = 0.0f, float texH = 0.0f, int scaleFactor = 0);
|
||||
void Ensure2DResources();
|
||||
|
||||
private:
|
||||
Draw::DrawContext *draw_;
|
||||
|
||||
Draw::SamplerState *draw2DSamplerLinear_ = nullptr;
|
||||
Draw::SamplerState *draw2DSamplerNearest_ = nullptr;
|
||||
Draw::ShaderModule *draw2DVs_ = nullptr;
|
||||
};
|
||||
|
@ -23,8 +23,10 @@
|
||||
#include "Common/GPU/OpenGL/GLFeatures.h"
|
||||
#include "Common/GPU/ShaderWriter.h"
|
||||
#include "Common/GPU/thin3d.h"
|
||||
#include "Core/Compatibility.h"
|
||||
#include "Core/Reporting.h"
|
||||
#include "Core/Config.h"
|
||||
#include "Core/System.h"
|
||||
#include "GPU/Common/GPUStateUtils.h"
|
||||
#include "GPU/Common/ShaderId.h"
|
||||
#include "GPU/Common/ShaderUniforms.h"
|
||||
@ -88,9 +90,9 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
|
||||
|
||||
bool doFlatShading = id.Bit(FS_BIT_FLATSHADE) && !flatBug;
|
||||
bool shaderDepal = id.Bit(FS_BIT_SHADER_DEPAL) && !texture3D; // combination with texture3D not supported. Enforced elsewhere too.
|
||||
bool smoothedDepal = id.Bit(FS_BIT_SHADER_SMOOTHED_DEPAL);
|
||||
bool bgraTexture = id.Bit(FS_BIT_BGRA_TEXTURE);
|
||||
bool colorWriteMask = id.Bit(FS_BIT_COLOR_WRITEMASK) && compat.bitwiseOps;
|
||||
bool colorToDepth = id.Bit(FS_BIT_COLOR_TO_DEPTH);
|
||||
|
||||
GEComparison alphaTestFunc = (GEComparison)id.Bits(FS_BIT_ALPHA_TEST_FUNC, 3);
|
||||
GEComparison colorTestFunc = (GEComparison)id.Bits(FS_BIT_COLOR_TEST_FUNC, 2);
|
||||
@ -123,7 +125,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
|
||||
bool readFramebufferTex = readFramebuffer && !gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH);
|
||||
|
||||
bool needFragCoord = readFramebuffer || gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT);
|
||||
bool writeDepth = gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT) || colorToDepth;
|
||||
bool writeDepth = gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT);
|
||||
|
||||
if (shaderDepal && !doTexture) {
|
||||
*errorString = "depal requires a texture";
|
||||
@ -136,11 +138,6 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
|
||||
}
|
||||
|
||||
if (compat.shaderLanguage == ShaderLanguage::GLSL_VULKAN) {
|
||||
if (colorToDepth) {
|
||||
WRITE(p, "precision highp int;\n");
|
||||
WRITE(p, "precision highp float;\n");
|
||||
}
|
||||
|
||||
if (useDiscardStencilBugWorkaround && !gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT)) {
|
||||
WRITE(p, "layout (depth_unchanged) out float gl_FragDepth;\n");
|
||||
}
|
||||
@ -293,7 +290,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
|
||||
WRITE(p, "};\n");
|
||||
}
|
||||
} else if (ShaderLanguageIsOpenGL(compat.shaderLanguage)) {
|
||||
if ((shaderDepal || colorWriteMask || colorToDepth) && gl_extensions.IsGLES) {
|
||||
if ((shaderDepal || colorWriteMask) && gl_extensions.IsGLES) {
|
||||
WRITE(p, "precision highp int;\n");
|
||||
}
|
||||
|
||||
@ -461,9 +458,6 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
|
||||
WRITE(p, "PS_OUT main( PS_IN In ) {\n");
|
||||
WRITE(p, " PS_OUT outfragment;\n");
|
||||
WRITE(p, " vec4 target;\n");
|
||||
if (colorToDepth) {
|
||||
WRITE(p, " float gl_FragDepth;\n");
|
||||
}
|
||||
} else {
|
||||
WRITE(p, "void main() {\n");
|
||||
}
|
||||
@ -599,6 +593,31 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (shaderDepal && smoothedDepal) {
|
||||
// Specific mode for Test Drive. Fixes the banding.
|
||||
if (doTextureProjection) {
|
||||
// We don't use textureProj because we need better control and it's probably not much of a savings anyway.
|
||||
// However it is good for precision on older hardware like PowerVR.
|
||||
WRITE(p, " vec2 uv = %s.xy/%s.z;\n vec2 uv_round;\n", texcoord, texcoord);
|
||||
} else {
|
||||
WRITE(p, " vec2 uv = %s.xy;\n vec2 uv_round;\n", texcoord);
|
||||
}
|
||||
// Restrictions on this are checked before setting the smoothed flag.
|
||||
// Only RGB565 and RGBA5551 are supported, and only the specific shifts hitting the
|
||||
// channels directly.
|
||||
WRITE(p, " vec4 t = %s(tex, %s.xy);\n", compat.texture, texcoord);
|
||||
WRITE(p, " uint depalShift = (u_depal_mask_shift_off_fmt >> 8) & 0xFFU;\n");
|
||||
WRITE(p, " uint depalFmt = (u_depal_mask_shift_off_fmt >> 24) & 0x3U;\n");
|
||||
WRITE(p, " float index0 = t.r;\n");
|
||||
WRITE(p, " float mul = 32.0 / 256.0;\n");
|
||||
WRITE(p, " if (depalFmt == 0) {\n"); // yes, different versions of Test Drive use different formats. Could do compile time by adding more compat flags but meh.
|
||||
WRITE(p, " if (depalShift == 5) { index0 = t.g; mul = 64.0 / 256.0; }\n");
|
||||
WRITE(p, " else if (depalShift == 11) { index0 = t.b; }\n");
|
||||
WRITE(p, " } else {\n");
|
||||
WRITE(p, " if (depalShift == 5) { index0 = t.g; }\n");
|
||||
WRITE(p, " else if (depalShift == 10) { index0 = t.b; }\n");
|
||||
WRITE(p, " }\n");
|
||||
WRITE(p, " t = %s(pal, vec2(index0 * mul, 0.0));\n", compat.texture);
|
||||
} else {
|
||||
if (doTextureProjection) {
|
||||
// We don't use textureProj because we need better control and it's probably not much of a savings anyway.
|
||||
@ -1070,22 +1089,6 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
|
||||
WRITE(p, " %s = vec4(0.0, 0.0, 0.0, %s.z); // blue to alpha\n", compat.fragColor0, compat.fragColor0);
|
||||
}
|
||||
|
||||
if (colorToDepth) {
|
||||
DepthScaleFactors factors = GetDepthScaleFactors();
|
||||
|
||||
if (compat.bitwiseOps) {
|
||||
WRITE(p, " highp float depthValue = float(int(%s.x * 31.99) | (int(%s.y * 63.99) << 5) | (int(%s.z * 31.99) << 11)) / 65535.0;\n", "v", "v", "v"); // compat.fragColor0, compat.fragColor0, compat.fragColor0);
|
||||
} else {
|
||||
// D3D9-compatible alternative
|
||||
WRITE(p, " highp float depthValue = (floor(%s.x * 31.99) + floor(%s.y * 63.99) * 32.0 + floor(%s.z * 31.99) * 2048.0) / 65535.0;\n", "v", "v", "v"); // compat.fragColor0, compat.fragColor0, compat.fragColor0);
|
||||
}
|
||||
if (factors.scale != 1.0 || factors.offset != 0.0) {
|
||||
WRITE(p, " gl_FragDepth = (depthValue / %f) + %f;\n", factors.scale / 65535.0f, factors.offset);
|
||||
} else {
|
||||
WRITE(p, " gl_FragDepth = depthValue;\n");
|
||||
}
|
||||
}
|
||||
|
||||
if (gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT)) {
|
||||
const double scale = DepthSliceFactor() * 65535.0;
|
||||
|
||||
|
@ -41,13 +41,14 @@
|
||||
#include "GPU/Common/PresentationCommon.h"
|
||||
#include "GPU/Common/TextureCacheCommon.h"
|
||||
#include "GPU/Common/ReinterpretFramebuffer.h"
|
||||
#include "GPU/Debugger/Debugger.h"
|
||||
#include "GPU/Debugger/Record.h"
|
||||
#include "GPU/Debugger/Stepping.h"
|
||||
#include "GPU/GPUInterface.h"
|
||||
#include "GPU/GPUState.h"
|
||||
|
||||
FramebufferManagerCommon::FramebufferManagerCommon(Draw::DrawContext *draw)
|
||||
: draw_(draw) {
|
||||
: draw_(draw), draw2D_(draw_) {
|
||||
presentation_ = new PresentationCommon(draw);
|
||||
}
|
||||
|
||||
@ -105,6 +106,7 @@ void FramebufferManagerCommon::SetDisplayFramebuffer(u32 framebuf, u32 stride, G
|
||||
displayFramebufPtr_ = framebuf;
|
||||
displayStride_ = stride;
|
||||
displayFormat_ = format;
|
||||
GPUDebug::NotifyDisplay(framebuf, stride, format);
|
||||
GPURecord::NotifyDisplay(framebuf, stride, format);
|
||||
}
|
||||
|
||||
@ -124,7 +126,7 @@ VirtualFramebuffer *FramebufferManagerCommon::GetVFBAt(u32 addr) const {
|
||||
}
|
||||
|
||||
u32 FramebufferManagerCommon::ColorBufferByteSize(const VirtualFramebuffer *vfb) const {
|
||||
return vfb->fb_stride * vfb->height * (vfb->format == GE_FORMAT_8888 ? 4 : 2);
|
||||
return vfb->fb_stride * vfb->height * (vfb->fb_format == GE_FORMAT_8888 ? 4 : 2);
|
||||
}
|
||||
|
||||
bool FramebufferManagerCommon::ShouldDownloadFramebuffer(const VirtualFramebuffer *vfb) const {
|
||||
@ -190,7 +192,7 @@ void FramebufferManagerCommon::EstimateDrawingSize(u32 fb_address, GEBufferForma
|
||||
// Unless the game is using overlapping buffers, the next buffer should be far enough away.
|
||||
// This catches some cases where we can know this.
|
||||
// Hmm. The problem is that we could only catch it for the first of two buffers...
|
||||
const u32 bpp = fb_format == GE_FORMAT_8888 ? 4 : 2;
|
||||
const u32 bpp = BufferFormatBytesPerPixel(fb_format);
|
||||
int avail_height = (nearest_address - fb_address) / (fb_stride * bpp);
|
||||
if (avail_height < drawing_height && avail_height == region_height) {
|
||||
drawing_width = std::min(region_width, fb_stride);
|
||||
@ -220,7 +222,7 @@ void GetFramebufferHeuristicInputs(FramebufferHeuristicParams *params, const GPU
|
||||
params->z_stride = 0;
|
||||
}
|
||||
|
||||
params->fmt = gstate_c.framebufFormat;
|
||||
params->fb_format = gstate_c.framebufFormat;
|
||||
|
||||
params->isClearingDepth = gstate.isModeClear() && gstate.isClearModeDepthMask();
|
||||
// Technically, it may write depth later, but we're trying to detect it only when it's really true.
|
||||
@ -269,7 +271,7 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame
|
||||
// As there are no clear "framebuffer width" and "framebuffer height" registers,
|
||||
// we need to infer the size of the current framebuffer somehow.
|
||||
int drawing_width, drawing_height;
|
||||
EstimateDrawingSize(params.fb_address, params.fmt, params.viewportWidth, params.viewportHeight, params.regionWidth, params.regionHeight, params.scissorWidth, params.scissorHeight, std::max(params.fb_stride, (u16)4), drawing_width, drawing_height);
|
||||
EstimateDrawingSize(params.fb_address, params.fb_format, params.viewportWidth, params.viewportHeight, params.regionWidth, params.regionHeight, params.scissorWidth, params.scissorHeight, std::max(params.fb_stride, (u16)4), drawing_width, drawing_height);
|
||||
|
||||
gstate_c.SetCurRTOffset(0, 0);
|
||||
bool vfbFormatChanged = false;
|
||||
@ -280,14 +282,12 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame
|
||||
WARN_LOG_ONCE(color_equal_z, G3D, "Framebuffer bound with color addr == z addr, likely will not use Z in this pass: %08x", params.fb_address);
|
||||
}
|
||||
|
||||
RasterMode mode = RASTER_MODE_NORMAL;
|
||||
|
||||
// Find a matching framebuffer
|
||||
VirtualFramebuffer *vfb = nullptr;
|
||||
for (size_t i = 0; i < vfbs_.size(); ++i) {
|
||||
VirtualFramebuffer *v = vfbs_[i];
|
||||
|
||||
const u32 bpp = v->format == GE_FORMAT_8888 ? 4 : 2;
|
||||
const u32 bpp = BufferFormatBytesPerPixel(v->fb_format);
|
||||
|
||||
if (params.fb_address == v->fb_address) {
|
||||
vfb = v;
|
||||
@ -296,8 +296,8 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame
|
||||
vfb->fb_stride = params.fb_stride;
|
||||
vfbFormatChanged = true;
|
||||
}
|
||||
if (vfb->format != params.fmt) {
|
||||
vfb->format = params.fmt;
|
||||
if (vfb->fb_format != params.fb_format) {
|
||||
vfb->fb_format = params.fb_format;
|
||||
vfbFormatChanged = true;
|
||||
}
|
||||
|
||||
@ -318,21 +318,8 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame
|
||||
vfb->height = drawing_height;
|
||||
}
|
||||
break;
|
||||
} else if (params.fb_address == v->z_address && params.fmt != GE_FORMAT_8888 && params.fb_stride == v->z_stride && !params.isBlending) {
|
||||
// Looks like the game might be intending to use color to write directly to a Z buffer.
|
||||
// This is seen in Kuroyou 2.
|
||||
|
||||
// Ignore this in this loop, BUT, we do a lookup in the depth tracking afterwards to
|
||||
// make sure we get the latest one.
|
||||
WARN_LOG_ONCE(color_matches_z, G3D, "Color framebuffer bound at %08x with likely intent to write explicit Z values using color. fmt = %s", params.fb_address, GeBufferFormatToString(params.fmt));
|
||||
// Seems impractical to use the other 16-bit formats for this due to the limited control over alpha,
|
||||
// so we'll simply only support 565.
|
||||
if (params.fmt == GE_FORMAT_565) {
|
||||
mode = RASTER_MODE_COLOR_TO_DEPTH;
|
||||
break;
|
||||
}
|
||||
} else if (v->fb_stride == params.fb_stride && v->format == params.fmt) {
|
||||
u32 v_fb_first_line_end_ptr = v->fb_address + v->fb_stride * 4; // This should be * bpp, but leaving like this until after 1.13 to be safe. The God of War games use this for shadows.
|
||||
} else if (v->fb_stride == params.fb_stride && v->fb_format == params.fb_format) {
|
||||
u32 v_fb_first_line_end_ptr = v->fb_address + v->fb_stride * bpp;
|
||||
u32 v_fb_end_ptr = v->fb_address + v->fb_stride * v->height * bpp;
|
||||
|
||||
if (params.fb_address > v->fb_address && params.fb_address < v_fb_first_line_end_ptr) {
|
||||
@ -347,17 +334,6 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame
|
||||
drawing_width += x_offset;
|
||||
break;
|
||||
}
|
||||
} else if (params.fb_address > v->fb_address && params.fb_address < v_fb_end_ptr && PSP_CoreParameter().compat.flags().AllowLargeFBTextureOffsets) {
|
||||
if (params.fb_address % params.fb_stride == v->fb_address % params.fb_stride) {
|
||||
// Framebuffers are overlapping on the Y axis.
|
||||
const int y_offset = (params.fb_address - v->fb_address) / (bpp * params.fb_stride);
|
||||
|
||||
vfb = v;
|
||||
gstate_c.SetCurRTOffset(0, y_offset);
|
||||
// To prevent the newSize code from being confused.
|
||||
drawing_height += y_offset;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// We ignore this match.
|
||||
// TODO: We can allow X/Y overlaps too, but haven't seen any so safer to not.
|
||||
@ -365,19 +341,6 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame
|
||||
}
|
||||
}
|
||||
|
||||
if (mode == RASTER_MODE_COLOR_TO_DEPTH) {
|
||||
// Lookup in the depth tracking to find which VFB has the latest version of this Z buffer.
|
||||
// Then bind it in color-to-depth mode.
|
||||
//
|
||||
// We do this by having a special render mode where we take color and move to
|
||||
// depth in the fragment shader, and set color writes to off.
|
||||
//
|
||||
// We use a special fragment shader flag to convert color to depth.
|
||||
vfb = GetLatestDepthBufferAt(params.fb_address /* !!! */, params.fb_stride);
|
||||
}
|
||||
|
||||
gstate_c.SetFramebufferRenderMode(mode);
|
||||
|
||||
if (vfb) {
|
||||
if ((drawing_width != vfb->bufferWidth || drawing_height != vfb->bufferHeight)) {
|
||||
// Even if it's not newly wrong, if this is larger we need to resize up.
|
||||
@ -413,6 +376,8 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame
|
||||
|
||||
// None found? Create one.
|
||||
if (!vfb) {
|
||||
gstate_c.usingDepth = false; // reset depth buffer tracking
|
||||
|
||||
vfb = new VirtualFramebuffer{};
|
||||
vfb->fbo = nullptr;
|
||||
vfb->fb_address = params.fb_address;
|
||||
@ -426,9 +391,9 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame
|
||||
vfb->newWidth = drawing_width;
|
||||
vfb->newHeight = drawing_height;
|
||||
vfb->lastFrameNewSize = gpuStats.numFlips;
|
||||
vfb->format = params.fmt;
|
||||
vfb->drawnFormat = params.fmt;
|
||||
vfb->usageFlags = FB_USAGE_RENDERTARGET;
|
||||
vfb->fb_format = params.fb_format;
|
||||
vfb->drawnFormat = params.fb_format;
|
||||
vfb->usageFlags = FB_USAGE_RENDER_COLOR;
|
||||
|
||||
u32 byteSize = ColorBufferByteSize(vfb);
|
||||
if (Memory::IsVRAMAddress(params.fb_address) && params.fb_address + byteSize > framebufRangeEnd_) {
|
||||
@ -439,20 +404,13 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame
|
||||
ResizeFramebufFBO(vfb, drawing_width, drawing_height, true);
|
||||
NotifyRenderFramebufferCreated(vfb);
|
||||
|
||||
// Looks up by z_address, so if one is found here and not have last pointers equal to this one,
|
||||
// there is another one.
|
||||
VirtualFramebuffer *prevDepth = GetLatestDepthBufferAt(vfb->z_address, vfb->z_stride);
|
||||
|
||||
// We might already want to copy depth, in case this is a temp buffer. See #7810.
|
||||
if (prevDepth != vfb) {
|
||||
if (!params.isClearingDepth && prevDepth) {
|
||||
BlitFramebufferDepth(prevDepth, vfb);
|
||||
}
|
||||
}
|
||||
// Note that we do not even think about depth right now. That'll be handled
|
||||
// on the first depth access, which will call SetDepthFramebuffer.
|
||||
|
||||
CopyToColorFromOverlappingFramebuffers(vfb);
|
||||
SetColorUpdated(vfb, skipDrawReason);
|
||||
|
||||
INFO_LOG(FRAMEBUF, "Creating FBO for %08x (z: %08x) : %d x %d x %s", vfb->fb_address, vfb->z_address, vfb->width, vfb->height, GeBufferFormatToString(vfb->format));
|
||||
INFO_LOG(FRAMEBUF, "Creating FBO for %08x (z: %08x) : %d x %d x %s", vfb->fb_address, vfb->z_address, vfb->width, vfb->height, GeBufferFormatToString(vfb->fb_format));
|
||||
|
||||
vfb->last_frame_render = gpuStats.numFlips;
|
||||
frameLastFramebufUsed_ = gpuStats.numFlips;
|
||||
@ -486,18 +444,18 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame
|
||||
} else if (vfbs_[i]->z_stride != 0 && params.z_address == vfbs_[i]->z_address && params.fb_address != vfbs_[i]->fb_address && !sharingReported) {
|
||||
// This happens a lot, but virtually always it's cleared.
|
||||
// It's possible the other might not clear, but when every game is reported it's not useful.
|
||||
if (params.isWritingDepth) {
|
||||
if (params.isWritingDepth && (vfbs_[i]->usageFlags & FB_USAGE_RENDER_DEPTH)) {
|
||||
WARN_LOG(SCEGE, "FBO reusing depthbuffer, c=%08x/d=%08x and c=%08x/d=%08x", params.fb_address, params.z_address, vfbs_[i]->fb_address, vfbs_[i]->z_address);
|
||||
sharingReported = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// We already have it!
|
||||
// We already have it!
|
||||
} else if (vfb != currentRenderVfb_) {
|
||||
// Use it as a render target.
|
||||
DEBUG_LOG(FRAMEBUF, "Switching render target to FBO for %08x: %d x %d x %d ", vfb->fb_address, vfb->width, vfb->height, vfb->format);
|
||||
vfb->usageFlags |= FB_USAGE_RENDERTARGET;
|
||||
DEBUG_LOG(FRAMEBUF, "Switching render target to FBO for %08x: %d x %d x %d ", vfb->fb_address, vfb->width, vfb->height, vfb->fb_format);
|
||||
vfb->usageFlags |= FB_USAGE_RENDER_COLOR;
|
||||
vfb->last_frame_render = gpuStats.numFlips;
|
||||
frameLastFramebufUsed_ = gpuStats.numFlips;
|
||||
vfb->dirtyAfterDisplay = true;
|
||||
@ -507,18 +465,20 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame
|
||||
VirtualFramebuffer *prev = currentRenderVfb_;
|
||||
currentRenderVfb_ = vfb;
|
||||
NotifyRenderFramebufferSwitched(prev, vfb, params.isClearingDepth);
|
||||
CopyToColorFromOverlappingFramebuffers(vfb);
|
||||
gstate_c.usingDepth = false; // reset depth buffer tracking
|
||||
} else {
|
||||
// Something changed, but we still got the same framebuffer we were already rendering to.
|
||||
// Might not be a lot to do here, we check in NotifyRenderFramebufferUpdated
|
||||
vfb->last_frame_render = gpuStats.numFlips;
|
||||
frameLastFramebufUsed_ = gpuStats.numFlips;
|
||||
vfb->dirtyAfterDisplay = true;
|
||||
if ((skipDrawReason & SKIPDRAW_SKIPFRAME) == 0)
|
||||
vfb->reallyDirtyAfterDisplay = true;
|
||||
|
||||
NotifyRenderFramebufferUpdated(vfb, vfbFormatChanged);
|
||||
}
|
||||
|
||||
vfb->colorBindSeq = GetBindSeqCount();
|
||||
vfb->depthBindSeq = GetBindSeqCount();
|
||||
|
||||
gstate_c.curRTWidth = vfb->width;
|
||||
gstate_c.curRTHeight = vfb->height;
|
||||
@ -527,6 +487,182 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame
|
||||
return vfb;
|
||||
}
|
||||
|
||||
// Called on the first use of depth in a render pass.
|
||||
void FramebufferManagerCommon::SetDepthFrameBuffer(bool isClearingDepth) {
|
||||
if (!currentRenderVfb_) {
|
||||
return;
|
||||
}
|
||||
|
||||
// If this first draw call is anything other than a clear, "resolve" the depth buffer,
|
||||
// by copying from any overlapping buffers with fresher content.
|
||||
if (!isClearingDepth) {
|
||||
CopyToDepthFromOverlappingFramebuffers(currentRenderVfb_);
|
||||
}
|
||||
|
||||
currentRenderVfb_->usageFlags |= FB_USAGE_RENDER_DEPTH;
|
||||
currentRenderVfb_->depthBindSeq = GetBindSeqCount();
|
||||
}
|
||||
|
||||
struct CopySource {
|
||||
VirtualFramebuffer *vfb;
|
||||
RasterChannel channel;
|
||||
int xOffset;
|
||||
int yOffset;
|
||||
|
||||
int seq() const {
|
||||
return channel == RASTER_DEPTH ? vfb->depthBindSeq : vfb->colorBindSeq;
|
||||
}
|
||||
|
||||
bool operator < (const CopySource &other) const {
|
||||
return seq() < other.seq();
|
||||
}
|
||||
};
|
||||
|
||||
// Not sure if it's more profitable to always do these copies with raster (which may screw up early-Z due to explicit depth buffer write)
|
||||
// or to use image copies when possible (which may make it easier for the driver to preserve early-Z, but on the other hand, will cost additional memory
|
||||
// bandwidth on tilers due to the load operation, which we might otherwise be able to skip).
|
||||
void FramebufferManagerCommon::CopyToDepthFromOverlappingFramebuffers(VirtualFramebuffer *dest) {
|
||||
std::vector<CopySource> sources;
|
||||
for (auto src : vfbs_) {
|
||||
if (src == dest)
|
||||
continue;
|
||||
|
||||
if (src->fb_address == dest->z_address && src->fb_stride == dest->z_stride && src->fb_format == GE_FORMAT_565) {
|
||||
if (src->colorBindSeq > dest->depthBindSeq) {
|
||||
// Source has newer data than the current buffer, use it.
|
||||
sources.push_back(CopySource{ src, RASTER_COLOR, 0, 0 });
|
||||
}
|
||||
} else if (src->z_address == dest->z_address && src->z_stride == dest->z_stride && src->depthBindSeq > dest->depthBindSeq) {
|
||||
sources.push_back(CopySource{ src, RASTER_DEPTH, 0, 0 });
|
||||
} else {
|
||||
// TODO: Do more detailed overlap checks here.
|
||||
}
|
||||
}
|
||||
|
||||
std::sort(sources.begin(), sources.end());
|
||||
|
||||
// TODO: A full copy will overwrite anything else. So we can eliminate
|
||||
// anything that comes before such a copy.
|
||||
|
||||
// For now, let's just do the last thing, if there are multiple.
|
||||
|
||||
// for (auto &source : sources) {
|
||||
if (!sources.empty()) {
|
||||
draw_->InvalidateCachedState();
|
||||
|
||||
auto &source = sources.back();
|
||||
if (source.channel == RASTER_DEPTH) {
|
||||
// Good old depth->depth copy.
|
||||
BlitFramebufferDepth(source.vfb, dest);
|
||||
gpuStats.numDepthCopies++;
|
||||
dest->last_frame_depth_updated = gpuStats.numFlips;
|
||||
} else if (source.channel == RASTER_COLOR && draw_->GetDeviceCaps().fragmentShaderDepthWriteSupported) {
|
||||
VirtualFramebuffer *src = source.vfb;
|
||||
if (src->drawnFormat != GE_FORMAT_565) {
|
||||
WARN_LOG_ONCE(not565, G3D, "Drawn fb_format of buffer at %08x not 565 as expected", src->fb_address);
|
||||
}
|
||||
|
||||
// Really hate to do this, but tracking the depth swizzle state across multiple
|
||||
// copies is not easy.
|
||||
Draw2DShader shader = DRAW2D_565_TO_DEPTH;
|
||||
if (PSP_CoreParameter().compat.flags().DeswizzleDepth) {
|
||||
shader = DRAW2D_565_TO_DEPTH_DESWIZZLE;
|
||||
}
|
||||
|
||||
gpuStats.numReinterpretCopies++;
|
||||
|
||||
// Copying color to depth.
|
||||
BlitUsingRaster(
|
||||
src->fbo, 0.0f, 0.0f, src->renderWidth, src->renderHeight,
|
||||
dest->fbo, 0.0f, 0.0f, src->renderWidth, src->renderHeight,
|
||||
false, Get2DPipeline(shader), "565_to_depth");
|
||||
}
|
||||
}
|
||||
|
||||
gstate_c.Dirty(DIRTY_TEXTURE_IMAGE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_RASTER_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_BLEND_STATE);
|
||||
}
|
||||
|
||||
// Call this after the target has been bound for rendering. For color, raster is probably always going to win over blits/copies.
|
||||
void FramebufferManagerCommon::CopyToColorFromOverlappingFramebuffers(VirtualFramebuffer *dst) {
|
||||
std::vector<CopySource> sources;
|
||||
for (auto src : vfbs_) {
|
||||
// Discard old and equal potential inputs.
|
||||
if (src == dst || src->colorBindSeq < dst->colorBindSeq)
|
||||
continue;
|
||||
|
||||
if (src->fb_address == dst->fb_address && src->fb_stride == dst->fb_stride) {
|
||||
// Another render target at the exact same location but gotta be a different format, otherwise
|
||||
// it would be the same.
|
||||
_dbg_assert_(src->fb_format != dst->fb_format);
|
||||
WARN_LOG_ONCE(reint, G3D, "Reinterpret detected at %08x", src->fb_address);
|
||||
// This is where we'll do reinterprets in the future.
|
||||
} else if (src->fb_stride == dst->fb_stride && src->fb_format == dst->fb_format) {
|
||||
u32 bytesPerPixel = BufferFormatBytesPerPixel(src->fb_format);
|
||||
|
||||
u32 strideInBytes = src->fb_stride * bytesPerPixel; // Same for both src and dest
|
||||
|
||||
u32 srcColorStart = src->fb_address;
|
||||
u32 srcFirstLineEnd = src->fb_address + strideInBytes;
|
||||
u32 srcColorEnd = strideInBytes * src->height;
|
||||
|
||||
u32 dstColorStart = dst->fb_address;
|
||||
u32 dstFirstLineEnd = dst->fb_address + strideInBytes;
|
||||
u32 dstColorEnd = strideInBytes * dst->height;
|
||||
|
||||
// Initially we'll only allow pure horizontal and vertical overlap,
|
||||
// to reduce the risk for false positives. We can allow diagonal overlap too if needed
|
||||
// in the future.
|
||||
|
||||
// Check for potential vertical overlap, like in Juiced 2.
|
||||
int xOffset = 0;
|
||||
int yOffset = 0;
|
||||
|
||||
// TODO: Get rid of the compatibility flag check.
|
||||
if ((dstColorStart - srcColorStart) % strideInBytes == 0
|
||||
&& PSP_CoreParameter().compat.flags().AllowLargeFBTextureOffsets) {
|
||||
// Buffers are aligned.
|
||||
yOffset = ((int)dstColorStart - (int)srcColorStart) / strideInBytes;
|
||||
if (yOffset <= -(int)src->height) {
|
||||
// Not overlapping
|
||||
continue;
|
||||
} else if (yOffset >= dst->height) {
|
||||
// Not overlapping
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
// Buffers not stride-aligned - ignoring for now.
|
||||
continue;
|
||||
}
|
||||
gpuStats.numColorCopies++;
|
||||
sources.push_back(CopySource{ src, RASTER_COLOR, xOffset, yOffset });
|
||||
}
|
||||
}
|
||||
|
||||
std::sort(sources.begin(), sources.end());
|
||||
|
||||
draw_->InvalidateCachedState();
|
||||
|
||||
for (const CopySource &source : sources) {
|
||||
VirtualFramebuffer *src = source.vfb;
|
||||
|
||||
// Copy a rectangle from the original to the new buffer.
|
||||
// Yes, we mean to look at src->width/height for the dest rectangle.
|
||||
int srcWidth = src->width * src->renderScaleFactor;
|
||||
int srcHeight = src->height * src->renderScaleFactor;
|
||||
int dstWidth = src->width * dst->renderScaleFactor;
|
||||
int dstHeight = src->height * dst->renderScaleFactor;
|
||||
|
||||
int dstX1 = -source.xOffset * dst->renderScaleFactor;
|
||||
int dstY1 = -source.yOffset * dst->renderScaleFactor;
|
||||
int dstX2 = dstX1 + dstWidth;
|
||||
int dstY2 = dstY1 + dstHeight;
|
||||
|
||||
BlitUsingRaster(src->fbo, 0.0f, 0.0f, srcWidth, srcHeight,
|
||||
dst->fbo, dstX1, dstY1, dstX2, dstY2, false, Get2DPipeline(DRAW2D_COPY_COLOR), "copy_color");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void FramebufferManagerCommon::DestroyFramebuf(VirtualFramebuffer *v) {
|
||||
// Notify the texture cache of both the color and depth buffers.
|
||||
textureCache_->NotifyFramebuffer(v, NOTIFY_FB_DESTROYED);
|
||||
@ -597,25 +733,10 @@ void FramebufferManagerCommon::BlitFramebufferDepth(VirtualFramebuffer *src, Vir
|
||||
draw_->BlitFramebuffer(src->fbo, 0, 0, w, h, dst->fbo, 0, 0, w, h, Draw::FB_DEPTH_BIT, Draw::FB_BLIT_NEAREST, "BlitFramebufferDepth");
|
||||
RebindFramebuffer("After BlitFramebufferDepth");
|
||||
} else if (useRaster) {
|
||||
BlitUsingRaster(src->fbo, 0, 0, w, h, dst->fbo, 0, 0, w, h, false, RasterChannel::RASTER_DEPTH);
|
||||
BlitUsingRaster(src->fbo, 0, 0, w, h, dst->fbo, 0, 0, w, h, false, Get2DPipeline(Draw2DShader::DRAW2D_COPY_DEPTH), "BlitDepthRaster");
|
||||
}
|
||||
|
||||
draw_->InvalidateCachedState();
|
||||
|
||||
gpuStats.numDepthCopies++;
|
||||
dst->last_frame_depth_updated = gpuStats.numFlips;
|
||||
}
|
||||
|
||||
VirtualFramebuffer *FramebufferManagerCommon::GetLatestDepthBufferAt(u32 z_address, u16 z_stride) {
|
||||
int maxSeq = -1;
|
||||
VirtualFramebuffer *latestDepth = nullptr;
|
||||
for (auto vfb : vfbs_) {
|
||||
if (vfb->z_address == z_address && vfb->z_stride == z_stride && vfb->depthBindSeq > maxSeq) {
|
||||
maxSeq = vfb->depthBindSeq;
|
||||
latestDepth = vfb;
|
||||
}
|
||||
}
|
||||
return latestDepth;
|
||||
}
|
||||
|
||||
void FramebufferManagerCommon::NotifyRenderFramebufferCreated(VirtualFramebuffer *vfb) {
|
||||
@ -641,8 +762,8 @@ void FramebufferManagerCommon::NotifyRenderFramebufferCreated(VirtualFramebuffer
|
||||
void FramebufferManagerCommon::NotifyRenderFramebufferUpdated(VirtualFramebuffer *vfb, bool vfbFormatChanged) {
|
||||
if (vfbFormatChanged) {
|
||||
textureCache_->NotifyFramebuffer(vfb, NOTIFY_FB_UPDATED);
|
||||
if (vfb->drawnFormat != vfb->format) {
|
||||
ReinterpretFramebuffer(vfb, vfb->drawnFormat, vfb->format);
|
||||
if (vfb->drawnFormat != vfb->fb_format) {
|
||||
ReinterpretFramebuffer(vfb, vfb->drawnFormat, vfb->fb_format);
|
||||
}
|
||||
}
|
||||
|
||||
@ -666,19 +787,8 @@ void FramebufferManagerCommon::NotifyRenderFramebufferSwitched(VirtualFramebuffe
|
||||
textureCache_->ForgetLastTexture();
|
||||
shaderManager_->DirtyLastShader();
|
||||
|
||||
// Copy depth between the framebuffers, if the z_address is the same (checked inside.)
|
||||
VirtualFramebuffer * prevDepth = GetLatestDepthBufferAt(vfb->z_address, vfb->z_stride);
|
||||
|
||||
// We might already want to copy depth, in case this is a temp buffer. See #7810.
|
||||
if (prevDepth != vfb) {
|
||||
if (!isClearingDepth && prevDepth) {
|
||||
BlitFramebufferDepth(prevDepth, vfb);
|
||||
}
|
||||
prevDepth = vfb;
|
||||
}
|
||||
|
||||
if (vfb->drawnFormat != vfb->format) {
|
||||
ReinterpretFramebuffer(vfb, vfb->drawnFormat, vfb->format);
|
||||
if (vfb->drawnFormat != vfb->fb_format) {
|
||||
ReinterpretFramebuffer(vfb, vfb->drawnFormat, vfb->fb_format);
|
||||
}
|
||||
|
||||
if (useBufferedRendering_) {
|
||||
@ -723,9 +833,9 @@ void FramebufferManagerCommon::NotifyVideoUpload(u32 addr, int size, int width,
|
||||
// TODO: Could possibly be an offset...
|
||||
VirtualFramebuffer *vfb = GetVFBAt(addr);
|
||||
if (vfb) {
|
||||
if (vfb->format != fmt || vfb->drawnFormat != fmt) {
|
||||
DEBUG_LOG(ME, "Changing format for %08x from %d to %d", addr, vfb->drawnFormat, fmt);
|
||||
vfb->format = fmt;
|
||||
if (vfb->fb_format != fmt || vfb->drawnFormat != fmt) {
|
||||
DEBUG_LOG(ME, "Changing fb_format for %08x from %d to %d", addr, vfb->drawnFormat, fmt);
|
||||
vfb->fb_format = fmt;
|
||||
vfb->drawnFormat = fmt;
|
||||
|
||||
// Let's count this as a "render". This will also force us to use the correct format.
|
||||
@ -734,7 +844,7 @@ void FramebufferManagerCommon::NotifyVideoUpload(u32 addr, int size, int width,
|
||||
|
||||
if (vfb->fb_stride < width) {
|
||||
DEBUG_LOG(ME, "Changing stride for %08x from %d to %d", addr, vfb->fb_stride, width);
|
||||
const int bpp = fmt == GE_FORMAT_8888 ? 4 : 2;
|
||||
const int bpp = BufferFormatBytesPerPixel(fmt);
|
||||
ResizeFramebufFBO(vfb, width, size / (bpp * width));
|
||||
// Resizing may change the viewport/etc.
|
||||
gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE);
|
||||
@ -762,7 +872,7 @@ void FramebufferManagerCommon::UpdateFromMemory(u32 addr, int size, bool safe) {
|
||||
FlushBeforeCopy();
|
||||
|
||||
if (useBufferedRendering_ && vfb->fbo) {
|
||||
GEBufferFormat fmt = vfb->format;
|
||||
GEBufferFormat fmt = vfb->fb_format;
|
||||
if (vfb->last_frame_render + 1 < gpuStats.numFlips && isDisplayBuf) {
|
||||
// If we're not rendering to it, format may be wrong. Use displayFormat_ instead.
|
||||
fmt = displayFormat_;
|
||||
@ -770,7 +880,7 @@ void FramebufferManagerCommon::UpdateFromMemory(u32 addr, int size, bool safe) {
|
||||
DrawPixels(vfb, 0, 0, Memory::GetPointer(addr), fmt, vfb->fb_stride, vfb->width, vfb->height);
|
||||
SetColorUpdated(vfb, gstate_c.skipDrawReason);
|
||||
} else {
|
||||
INFO_LOG(FRAMEBUF, "Invalidating FBO for %08x (%i x %i x %i)", vfb->fb_address, vfb->width, vfb->height, vfb->format);
|
||||
INFO_LOG(FRAMEBUF, "Invalidating FBO for %08x (%i x %i x %i)", vfb->fb_address, vfb->width, vfb->height, vfb->fb_format);
|
||||
DestroyFramebuf(vfb);
|
||||
vfbs_.erase(vfbs_.begin() + i--);
|
||||
}
|
||||
@ -1045,7 +1155,7 @@ void FramebufferManagerCommon::CopyDisplayToOutput(bool reallyDirty) {
|
||||
const u32 v_addr = v->fb_address & 0x3FFFFFFF;
|
||||
const u32 v_size = ColorBufferByteSize(v);
|
||||
if (addr >= v_addr && addr < v_addr + v_size) {
|
||||
const u32 dstBpp = v->format == GE_FORMAT_8888 ? 4 : 2;
|
||||
const u32 dstBpp = BufferFormatBytesPerPixel(v->fb_format);
|
||||
const u32 v_offsetX = ((addr - v_addr) / dstBpp) % v->fb_stride;
|
||||
const u32 v_offsetY = ((addr - v_addr) / dstBpp) / v->fb_stride;
|
||||
// We have enough space there for the display, right?
|
||||
@ -1068,10 +1178,10 @@ void FramebufferManagerCommon::CopyDisplayToOutput(bool reallyDirty) {
|
||||
}
|
||||
}
|
||||
|
||||
if (vfb && vfb->format != displayFormat_) {
|
||||
if (vfb && vfb->fb_format != displayFormat_) {
|
||||
if (vfb->last_frame_render + FBO_OLD_AGE < gpuStats.numFlips) {
|
||||
// The game probably switched formats on us.
|
||||
vfb->format = displayFormat_;
|
||||
vfb->fb_format = displayFormat_;
|
||||
} else {
|
||||
vfb = 0;
|
||||
}
|
||||
@ -1171,7 +1281,7 @@ void FramebufferManagerCommon::DecimateFBOs() {
|
||||
|
||||
if (vfb != displayFramebuf_ && vfb != prevDisplayFramebuf_ && vfb != prevPrevDisplayFramebuf_) {
|
||||
if (age > FBO_OLD_AGE) {
|
||||
INFO_LOG(FRAMEBUF, "Decimating FBO for %08x (%i x %i x %i), age %i", vfb->fb_address, vfb->width, vfb->height, vfb->format, age);
|
||||
INFO_LOG(FRAMEBUF, "Decimating FBO for %08x (%i x %i x %i), age %i", vfb->fb_address, vfb->width, vfb->height, vfb->fb_format, age);
|
||||
DestroyFramebuf(vfb);
|
||||
vfbs_.erase(vfbs_.begin() + i--);
|
||||
}
|
||||
@ -1193,7 +1303,7 @@ void FramebufferManagerCommon::DecimateFBOs() {
|
||||
VirtualFramebuffer *vfb = bvfbs_[i];
|
||||
int age = frameLastFramebufUsed_ - vfb->last_frame_render;
|
||||
if (age > FBO_OLD_AGE) {
|
||||
INFO_LOG(FRAMEBUF, "Decimating FBO for %08x (%i x %i x %i), age %i", vfb->fb_address, vfb->width, vfb->height, vfb->format, age);
|
||||
INFO_LOG(FRAMEBUF, "Decimating FBO for %08x (%i x %i x %i), age %i", vfb->fb_address, vfb->width, vfb->height, vfb->fb_format, age);
|
||||
DestroyFramebuf(vfb);
|
||||
bvfbs_.erase(bvfbs_.begin() + i--);
|
||||
}
|
||||
@ -1269,7 +1379,7 @@ void FramebufferManagerCommon::ResizeFramebufFBO(VirtualFramebuffer *vfb, int w,
|
||||
|
||||
shaderManager_->DirtyLastShader();
|
||||
char tag[128];
|
||||
size_t len = snprintf(tag, sizeof(tag), "FB_%08x_%08x_%dx%d_%s", vfb->fb_address, vfb->z_address, w, h, GeBufferFormatToString(vfb->format));
|
||||
size_t len = snprintf(tag, sizeof(tag), "FB_%08x_%08x_%dx%d_%s", vfb->fb_address, vfb->z_address, w, h, GeBufferFormatToString(vfb->fb_format));
|
||||
vfb->fbo = draw_->CreateFramebuffer({ vfb->renderWidth, vfb->renderHeight, 1, 1, true, tag });
|
||||
if (Memory::IsVRAMAddress(vfb->fb_address) && vfb->fb_stride != 0) {
|
||||
NotifyMemInfo(MemBlockFlags::ALLOC, vfb->fb_address, ColorBufferByteSize(vfb), tag, len);
|
||||
@ -1280,7 +1390,7 @@ void FramebufferManagerCommon::ResizeFramebufFBO(VirtualFramebuffer *vfb, int w,
|
||||
NotifyMemInfo(MemBlockFlags::ALLOC, vfb->z_address, vfb->fb_stride * vfb->height * sizeof(uint16_t), buf, len);
|
||||
}
|
||||
if (old.fbo) {
|
||||
INFO_LOG(FRAMEBUF, "Resizing FBO for %08x : %dx%dx%s", vfb->fb_address, w, h, GeBufferFormatToString(vfb->format));
|
||||
INFO_LOG(FRAMEBUF, "Resizing FBO for %08x : %dx%dx%s", vfb->fb_address, w, h, GeBufferFormatToString(vfb->fb_format));
|
||||
if (vfb->fbo) {
|
||||
draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }, "ResizeFramebufFBO");
|
||||
if (!skipCopy) {
|
||||
@ -1327,7 +1437,7 @@ bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size,
|
||||
// We only remove the kernel and uncached bits when comparing.
|
||||
const u32 vfb_address = vfb->fb_address & 0x3FFFFFFF;
|
||||
const u32 vfb_size = ColorBufferByteSize(vfb);
|
||||
const u32 vfb_bpp = vfb->format == GE_FORMAT_8888 ? 4 : 2;
|
||||
const u32 vfb_bpp = BufferFormatBytesPerPixel(vfb->fb_format);
|
||||
const u32 vfb_byteStride = vfb->fb_stride * vfb_bpp;
|
||||
const int vfb_byteWidth = vfb->width * vfb_bpp;
|
||||
|
||||
@ -1375,7 +1485,7 @@ bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size,
|
||||
// Note - if we're here, we're in a memcpy, not a block transfer. Not allowing IntraVRAMBlockTransferAllowCreateFB.
|
||||
// Technically, that makes BlockTransferAllowCreateFB a bit of a misnomer.
|
||||
if (PSP_CoreParameter().compat.flags().BlockTransferAllowCreateFB) {
|
||||
dstBuffer = CreateRAMFramebuffer(dst, srcBuffer->width, srcBuffer->height, srcBuffer->fb_stride, srcBuffer->format);
|
||||
dstBuffer = CreateRAMFramebuffer(dst, srcBuffer->width, srcBuffer->height, srcBuffer->fb_stride, srcBuffer->fb_format);
|
||||
dstY = 0;
|
||||
}
|
||||
}
|
||||
@ -1401,7 +1511,7 @@ bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size,
|
||||
WARN_LOG_ONCE(btucpy, G3D, "Memcpy fbo upload %08x -> %08x (size: %x)", src, dst, size);
|
||||
FlushBeforeCopy();
|
||||
const u8 *srcBase = Memory::GetPointerUnchecked(src);
|
||||
DrawPixels(dstBuffer, 0, dstY, srcBase, dstBuffer->format, dstBuffer->fb_stride, dstBuffer->width, dstH);
|
||||
DrawPixels(dstBuffer, 0, dstY, srcBase, dstBuffer->fb_format, dstBuffer->fb_stride, dstBuffer->width, dstH);
|
||||
SetColorUpdated(dstBuffer, skipDrawReason);
|
||||
RebindFramebuffer("RebindFramebuffer - Memcpy fbo upload");
|
||||
// This is a memcpy, let's still copy just in case.
|
||||
@ -1437,7 +1547,7 @@ void FramebufferManagerCommon::FindTransferFramebuffers(VirtualFramebuffer *&dst
|
||||
VirtualFramebuffer *vfb = vfbs_[i];
|
||||
const u32 vfb_address = vfb->fb_address & 0x3FFFFFFF;
|
||||
const u32 vfb_size = ColorBufferByteSize(vfb);
|
||||
const u32 vfb_bpp = vfb->format == GE_FORMAT_8888 ? 4 : 2;
|
||||
const u32 vfb_bpp = BufferFormatBytesPerPixel(vfb->fb_format);
|
||||
const u32 vfb_byteStride = vfb->fb_stride * vfb_bpp;
|
||||
const u32 vfb_byteWidth = vfb->width * vfb_bpp;
|
||||
|
||||
@ -1512,10 +1622,10 @@ void FramebufferManagerCommon::FindTransferFramebuffers(VirtualFramebuffer *&dst
|
||||
if (bpp == 4) {
|
||||
// Only one possibility unless it's doing split pixel tricks (which we could detect through stride maybe).
|
||||
ramFormat = GE_FORMAT_8888;
|
||||
} else if (srcBuffer->format != GE_FORMAT_8888) {
|
||||
} else if (srcBuffer->fb_format != GE_FORMAT_8888) {
|
||||
// We guess that the game will interpret the data the same as it was in the source of the copy.
|
||||
// Seems like a likely good guess, and works in Test Drive Unlimited.
|
||||
ramFormat = srcBuffer->format;
|
||||
ramFormat = srcBuffer->fb_format;
|
||||
} else {
|
||||
// No info left - just fall back to something. But this is definitely split pixel tricks.
|
||||
ramFormat = GE_FORMAT_5551;
|
||||
@ -1538,7 +1648,7 @@ void FramebufferManagerCommon::FindTransferFramebuffers(VirtualFramebuffer *&dst
|
||||
}
|
||||
|
||||
VirtualFramebuffer *FramebufferManagerCommon::CreateRAMFramebuffer(uint32_t fbAddress, int width, int height, int stride, GEBufferFormat format) {
|
||||
INFO_LOG(G3D, "Creating RAM framebuffer at %08x (%dx%d, stride %d, format %d)", fbAddress, width, height, stride, format);
|
||||
INFO_LOG(G3D, "Creating RAM framebuffer at %08x (%dx%d, stride %d, fb_format %d)", fbAddress, width, height, stride, format);
|
||||
|
||||
// A target for the destination is missing - so just create one!
|
||||
// Make sure this one would be found by the algorithm above so we wouldn't
|
||||
@ -1559,9 +1669,9 @@ VirtualFramebuffer *FramebufferManagerCommon::CreateRAMFramebuffer(uint32_t fbAd
|
||||
vfb->renderHeight = (u16)(vfb->height * renderScaleFactor_);
|
||||
vfb->bufferWidth = vfb->width;
|
||||
vfb->bufferHeight = vfb->height;
|
||||
vfb->format = format;
|
||||
vfb->fb_format = format;
|
||||
vfb->drawnFormat = GE_FORMAT_8888;
|
||||
vfb->usageFlags = FB_USAGE_RENDERTARGET;
|
||||
vfb->usageFlags = FB_USAGE_RENDER_COLOR;
|
||||
SetColorUpdated(vfb, 0);
|
||||
char name[64];
|
||||
snprintf(name, sizeof(name), "%08x_color_RAM", vfb->fb_address);
|
||||
@ -1585,7 +1695,7 @@ VirtualFramebuffer *FramebufferManagerCommon::FindDownloadTempBuffer(VirtualFram
|
||||
|
||||
// We maintain a separate vector of framebuffer objects for blitting.
|
||||
for (VirtualFramebuffer *v : bvfbs_) {
|
||||
if (v->fb_address == vfb->fb_address && v->format == vfb->format) {
|
||||
if (v->fb_address == vfb->fb_address && v->fb_format == vfb->fb_format) {
|
||||
if (v->bufferWidth == vfb->bufferWidth && v->bufferHeight == vfb->bufferHeight) {
|
||||
nvfb = v;
|
||||
v->fb_stride = vfb->fb_stride;
|
||||
@ -1611,10 +1721,10 @@ VirtualFramebuffer *FramebufferManagerCommon::FindDownloadTempBuffer(VirtualFram
|
||||
nvfb->renderScaleFactor = 1; // For readbacks we resize to the original size, of course.
|
||||
nvfb->bufferWidth = vfb->bufferWidth;
|
||||
nvfb->bufferHeight = vfb->bufferHeight;
|
||||
nvfb->format = vfb->format;
|
||||
nvfb->fb_format = vfb->fb_format;
|
||||
nvfb->drawnWidth = vfb->drawnWidth;
|
||||
nvfb->drawnHeight = vfb->drawnHeight;
|
||||
nvfb->drawnFormat = vfb->format;
|
||||
nvfb->drawnFormat = vfb->fb_format;
|
||||
|
||||
char name[64];
|
||||
snprintf(name, sizeof(name), "download_temp");
|
||||
@ -1629,7 +1739,7 @@ VirtualFramebuffer *FramebufferManagerCommon::FindDownloadTempBuffer(VirtualFram
|
||||
UpdateDownloadTempBuffer(nvfb);
|
||||
}
|
||||
|
||||
nvfb->usageFlags |= FB_USAGE_RENDERTARGET;
|
||||
nvfb->usageFlags |= FB_USAGE_RENDER_COLOR;
|
||||
nvfb->last_frame_render = gpuStats.numFlips;
|
||||
nvfb->dirtyAfterDisplay = true;
|
||||
|
||||
@ -1649,7 +1759,7 @@ void FramebufferManagerCommon::ApplyClearToMemory(int x1, int y1, int x2, int y2
|
||||
}
|
||||
|
||||
u8 *addr = Memory::GetPointerWriteUnchecked(gstate.getFrameBufAddress());
|
||||
const int bpp = gstate_c.framebufFormat == GE_FORMAT_8888 ? 4 : 2;
|
||||
const int bpp = BufferFormatBytesPerPixel(gstate_c.framebufFormat);
|
||||
|
||||
u32 clearBits = clearColor;
|
||||
if (bpp == 2) {
|
||||
@ -1782,7 +1892,7 @@ bool FramebufferManagerCommon::NotifyBlockTransferBefore(u32 dstBasePtr, int dst
|
||||
dstBasePtr, dstX, dstY, dstStride);
|
||||
FlushBeforeCopy();
|
||||
if (g_Config.bBlockTransferGPU && !srcBuffer->memoryUpdated) {
|
||||
const int srcBpp = srcBuffer->format == GE_FORMAT_8888 ? 4 : 2;
|
||||
const int srcBpp = BufferFormatBytesPerPixel(srcBuffer->fb_format);
|
||||
const float srcXFactor = (float)bpp / srcBpp;
|
||||
const bool tooTall = srcY + srcHeight > srcBuffer->bufferHeight;
|
||||
if (srcHeight <= 0 || (tooTall && srcY != 0)) {
|
||||
@ -1833,7 +1943,7 @@ void FramebufferManagerCommon::NotifyBlockTransferAfter(u32 dstBasePtr, int dstS
|
||||
WARN_LOG_ONCE(btu, G3D, "Block transfer upload %08x -> %08x", srcBasePtr, dstBasePtr);
|
||||
FlushBeforeCopy();
|
||||
const u8 *srcBase = Memory::GetPointerUnchecked(srcBasePtr) + (srcX + srcY * srcStride) * bpp;
|
||||
int dstBpp = dstBuffer->format == GE_FORMAT_8888 ? 4 : 2;
|
||||
int dstBpp = BufferFormatBytesPerPixel(dstBuffer->fb_format);
|
||||
float dstXFactor = (float)bpp / dstBpp;
|
||||
if (dstWidth > dstBuffer->width || dstHeight > dstBuffer->height) {
|
||||
// The buffer isn't big enough, and we have a clear hint of size. Resize.
|
||||
@ -1846,7 +1956,7 @@ void FramebufferManagerCommon::NotifyBlockTransferAfter(u32 dstBasePtr, int dstS
|
||||
// Resizing may change the viewport/etc.
|
||||
gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE);
|
||||
}
|
||||
DrawPixels(dstBuffer, static_cast<int>(dstX * dstXFactor), dstY, srcBase, dstBuffer->format, static_cast<int>(srcStride * dstXFactor), static_cast<int>(dstWidth * dstXFactor), dstHeight);
|
||||
DrawPixels(dstBuffer, static_cast<int>(dstX * dstXFactor), dstY, srcBase, dstBuffer->fb_format, static_cast<int>(srcStride * dstXFactor), static_cast<int>(dstWidth * dstXFactor), dstHeight);
|
||||
SetColorUpdated(dstBuffer, skipDrawReason);
|
||||
RebindFramebuffer("RebindFramebuffer - NotifyBlockTransferAfter");
|
||||
}
|
||||
@ -1892,7 +2002,7 @@ void FramebufferManagerCommon::DestroyAllFBOs() {
|
||||
prevPrevDisplayFramebuf_ = nullptr;
|
||||
|
||||
for (VirtualFramebuffer *vfb : vfbs_) {
|
||||
INFO_LOG(FRAMEBUF, "Destroying FBO for %08x : %i x %i x %i", vfb->fb_address, vfb->width, vfb->height, vfb->format);
|
||||
INFO_LOG(FRAMEBUF, "Destroying FBO for %08x : %i x %i x %i", vfb->fb_address, vfb->width, vfb->height, vfb->fb_format);
|
||||
DestroyFramebuf(vfb);
|
||||
}
|
||||
vfbs_.clear();
|
||||
@ -1923,7 +2033,7 @@ Draw::Framebuffer *FramebufferManagerCommon::GetTempFBO(TempFBO reason, u16 w, u
|
||||
|
||||
bool z_stencil = reason == TempFBO::STENCIL;
|
||||
char name[128];
|
||||
snprintf(name, sizeof(name), "temp_fbo_%dx%d%s", w, h, z_stencil ? "_depth" : "");
|
||||
snprintf(name, sizeof(name), "temp_fbo_%dx%d%s", w / renderScaleFactor_, h / renderScaleFactor_, z_stencil ? "_depth" : "");
|
||||
Draw::Framebuffer *fbo = draw_->CreateFramebuffer({ w, h, 1, 1, z_stencil, name });
|
||||
if (!fbo) {
|
||||
return nullptr;
|
||||
@ -1946,7 +2056,7 @@ void FramebufferManagerCommon::UpdateFramebufUsage(VirtualFramebuffer *vfb) {
|
||||
|
||||
checkFlag(FB_USAGE_DISPLAYED_FRAMEBUFFER, vfb->last_frame_displayed);
|
||||
checkFlag(FB_USAGE_TEXTURE, vfb->last_frame_used);
|
||||
checkFlag(FB_USAGE_RENDERTARGET, vfb->last_frame_render);
|
||||
checkFlag(FB_USAGE_RENDER_COLOR, vfb->last_frame_render);
|
||||
checkFlag(FB_USAGE_CLUT, vfb->last_frame_clut);
|
||||
}
|
||||
|
||||
@ -2136,7 +2246,7 @@ void FramebufferManagerCommon::PackFramebufferSync_(VirtualFramebuffer *vfb, int
|
||||
|
||||
const u32 fb_address = vfb->fb_address & 0x3FFFFFFF;
|
||||
|
||||
Draw::DataFormat destFormat = GEFormatToThin3D(vfb->format);
|
||||
Draw::DataFormat destFormat = GEFormatToThin3D(vfb->fb_format);
|
||||
const int dstBpp = (int)DataFormatSizeInBytes(destFormat);
|
||||
|
||||
const int dstByteOffset = (y * vfb->fb_stride + x) * dstBpp;
|
||||
@ -2156,7 +2266,7 @@ void FramebufferManagerCommon::PackFramebufferSync_(VirtualFramebuffer *vfb, int
|
||||
if (destPtr) {
|
||||
draw_->CopyFramebufferToMemorySync(vfb->fbo, Draw::FB_COLOR_BIT, x, y, w, h, destFormat, destPtr, vfb->fb_stride, "PackFramebufferSync_");
|
||||
char tag[128];
|
||||
size_t len = snprintf(tag, sizeof(tag), "FramebufferPack/%08x_%08x_%dx%d_%s", vfb->fb_address, vfb->z_address, w, h, GeBufferFormatToString(vfb->format));
|
||||
size_t len = snprintf(tag, sizeof(tag), "FramebufferPack/%08x_%08x_%dx%d_%s", vfb->fb_address, vfb->z_address, w, h, GeBufferFormatToString(vfb->fb_format));
|
||||
NotifyMemInfo(MemBlockFlags::WRITE, fb_address + dstByteOffset, dstSize, tag, len);
|
||||
} else {
|
||||
ERROR_LOG(G3D, "PackFramebufferSync_: Tried to readback to bad address %08x (stride = %d)", fb_address + dstByteOffset, vfb->fb_stride);
|
||||
@ -2233,7 +2343,7 @@ void FramebufferManagerCommon::FlushBeforeCopy() {
|
||||
void FramebufferManagerCommon::DownloadFramebufferForClut(u32 fb_address, u32 loadBytes) {
|
||||
VirtualFramebuffer *vfb = GetVFBAt(fb_address);
|
||||
if (vfb && vfb->fb_stride != 0) {
|
||||
const u32 bpp = vfb->drawnFormat == GE_FORMAT_8888 ? 4 : 2;
|
||||
const u32 bpp = BufferFormatBytesPerPixel(vfb->drawnFormat);
|
||||
int x = 0;
|
||||
int y = 0;
|
||||
int pixels = loadBytes / bpp;
|
||||
@ -2287,7 +2397,7 @@ std::vector<FramebufferInfo> FramebufferManagerCommon::GetFramebufferList() cons
|
||||
FramebufferInfo info;
|
||||
info.fb_address = vfb->fb_address;
|
||||
info.z_address = vfb->z_address;
|
||||
info.format = vfb->format;
|
||||
info.format = vfb->fb_format;
|
||||
info.width = vfb->width;
|
||||
info.height = vfb->height;
|
||||
info.fbo = vfb->fbo;
|
||||
@ -2314,22 +2424,21 @@ void FramebufferManagerCommon::DeviceLost() {
|
||||
DoRelease(reinterpretFromTo_[i][j]);
|
||||
}
|
||||
}
|
||||
DoRelease(reinterpretVBuf_);
|
||||
DoRelease(reinterpretSampler_);
|
||||
DoRelease(reinterpretVS_);
|
||||
DoRelease(stencilUploadSampler_);
|
||||
DoRelease(stencilUploadPipeline_);
|
||||
DoRelease(draw2DSamplerNearest_);
|
||||
DoRelease(draw2DSamplerLinear_);
|
||||
DoRelease(draw2DVs_);
|
||||
DoRelease(draw2DPipelineColor_);
|
||||
DoRelease(draw2DPipelineDepth_);
|
||||
DoRelease(draw2DPipeline565ToDepth_);
|
||||
DoRelease(draw2DPipeline565ToDepthDeswizzle_);
|
||||
|
||||
draw2D_.DeviceLost();
|
||||
|
||||
draw_ = nullptr;
|
||||
}
|
||||
|
||||
void FramebufferManagerCommon::DeviceRestore(Draw::DrawContext *draw) {
|
||||
draw_ = draw;
|
||||
draw2D_.DeviceRestore(draw_);
|
||||
presentation_->DeviceRestore(draw);
|
||||
}
|
||||
|
||||
@ -2381,7 +2490,7 @@ void FramebufferManagerCommon::DrawActiveTexture(float x, float y, float w, floa
|
||||
// Rearrange to strip form.
|
||||
std::swap(coord[2], coord[3]);
|
||||
|
||||
DrawStrip2D(nullptr, coord, 4, (flags & DRAWTEX_LINEAR) != 0, RASTER_COLOR);
|
||||
draw2D_.DrawStrip2D(nullptr, coord, 4, (flags & DRAWTEX_LINEAR) != 0, Get2DPipeline(DRAW2D_COPY_COLOR));
|
||||
|
||||
gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_RASTER_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE);
|
||||
}
|
||||
@ -2430,8 +2539,9 @@ void FramebufferManagerCommon::BlitFramebuffer(VirtualFramebuffer *dst, int dstX
|
||||
|
||||
float srcXFactor = src->renderScaleFactor;
|
||||
float srcYFactor = src->renderScaleFactor;
|
||||
const int srcBpp = src->format == GE_FORMAT_8888 ? 4 : 2;
|
||||
const int srcBpp = BufferFormatBytesPerPixel(src->fb_format);
|
||||
if (srcBpp != bpp && bpp != 0) {
|
||||
// If we do this, we're kinda in nonsense territory since the actual formats won't match (unless intentionally blitting black or white).
|
||||
srcXFactor = (srcXFactor * bpp) / srcBpp;
|
||||
}
|
||||
int srcX1 = srcX * srcXFactor;
|
||||
@ -2441,8 +2551,9 @@ void FramebufferManagerCommon::BlitFramebuffer(VirtualFramebuffer *dst, int dstX
|
||||
|
||||
float dstXFactor = dst->renderScaleFactor;
|
||||
float dstYFactor = dst->renderScaleFactor;
|
||||
const int dstBpp = dst->format == GE_FORMAT_8888 ? 4 : 2;
|
||||
const int dstBpp = BufferFormatBytesPerPixel(dst->fb_format);
|
||||
if (dstBpp != bpp && bpp != 0) {
|
||||
// If we do this, we're kinda in nonsense territory since the actual formats won't match (unless intentionally blitting black or white).
|
||||
dstXFactor = (dstXFactor * bpp) / dstBpp;
|
||||
}
|
||||
int dstX1 = dstX * dstXFactor;
|
||||
@ -2475,13 +2586,14 @@ void FramebufferManagerCommon::BlitFramebuffer(VirtualFramebuffer *dst, int dstX
|
||||
draw_->BlitFramebuffer(src->fbo, srcX1, srcY1, srcX2, srcY2, dst->fbo, dstX1, dstY1, dstX2, dstY2,
|
||||
channel == RASTER_COLOR ? Draw::FB_COLOR_BIT : Draw::FB_DEPTH_BIT, Draw::FB_BLIT_NEAREST, tag);
|
||||
} else {
|
||||
Draw2DPipeline *pipeline = Get2DPipeline(channel == RASTER_COLOR ? DRAW2D_COPY_COLOR : DRAW2D_COPY_DEPTH);
|
||||
Draw::Framebuffer *srcFBO = src->fbo;
|
||||
if (src == dst) {
|
||||
Draw::Framebuffer *tempFBO = GetTempFBO(TempFBO::BLIT, src->renderWidth, src->renderHeight);
|
||||
BlitUsingRaster(src->fbo, srcX1, srcY1, srcX2, srcY2, tempFBO, dstX1, dstY1, dstX2, dstY2, false, channel);
|
||||
BlitUsingRaster(src->fbo, srcX1, srcY1, srcX2, srcY2, tempFBO, dstX1, dstY1, dstX2, dstY2, false, pipeline, tag);
|
||||
srcFBO = tempFBO;
|
||||
}
|
||||
BlitUsingRaster(srcFBO, srcX1, srcY1, srcX2, srcY2, dst->fbo, dstX1, dstY1, dstX2, dstY2, false, channel);
|
||||
BlitUsingRaster(srcFBO, srcX1, srcY1, srcX2, srcY2, dst->fbo, dstX1, dstY1, dstX2, dstY2, false, pipeline, tag);
|
||||
}
|
||||
|
||||
draw_->InvalidateCachedState();
|
||||
@ -2489,13 +2601,14 @@ void FramebufferManagerCommon::BlitFramebuffer(VirtualFramebuffer *dst, int dstX
|
||||
gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_BLEND_STATE | DIRTY_RASTER_STATE);
|
||||
}
|
||||
|
||||
// The input is raw pixel coordinates, scale not taken into account.
|
||||
void FramebufferManagerCommon::BlitUsingRaster(
|
||||
Draw::Framebuffer *src, float srcX1, float srcY1, float srcX2, float srcY2,
|
||||
Draw::Framebuffer *dest, float destX1, float destY1, float destX2, float destY2,
|
||||
bool linearFilter,
|
||||
RasterChannel channel) {
|
||||
Draw2DPipeline *pipeline, const char *tag) {
|
||||
|
||||
if (channel == RASTER_DEPTH) {
|
||||
if (pipeline->info.writeChannel == RASTER_DEPTH) {
|
||||
_dbg_assert_(draw_->GetDeviceCaps().fragmentShaderDepthWriteSupported);
|
||||
}
|
||||
|
||||
@ -2517,13 +2630,14 @@ void FramebufferManagerCommon::BlitUsingRaster(
|
||||
// Unbind the texture first to avoid the D3D11 hazard check (can't set render target to things bound as textures and vice versa, not even temporarily).
|
||||
draw_->BindTexture(0, nullptr);
|
||||
// This will get optimized away in case it's already bound (in VK and GL at least..)
|
||||
draw_->BindFramebufferAsRenderTarget(dest, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP }, "BlitUsingRaster");
|
||||
draw_->BindFramebufferAsTexture(src, 0, channel == RASTER_COLOR ? Draw::FB_COLOR_BIT : Draw::FB_DEPTH_BIT, 0);
|
||||
draw_->BindFramebufferAsRenderTarget(dest, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP }, tag ? tag : "BlitUsingRaster");
|
||||
draw_->BindFramebufferAsTexture(src, 0, pipeline->info.readChannel == RASTER_COLOR ? Draw::FB_COLOR_BIT : Draw::FB_DEPTH_BIT, 0);
|
||||
|
||||
Draw::Viewport vp{ 0.0f, 0.0f, (float)dest->Width(), (float)dest->Height(), 0.0f, 1.0f };
|
||||
draw_->SetViewports(1, &vp);
|
||||
draw_->SetScissorRect(0, 0, (int)dest->Width(), (int)dest->Height());
|
||||
DrawStrip2D(nullptr, vtx, 4, linearFilter, channel);
|
||||
|
||||
draw2D_.DrawStrip2D(nullptr, vtx, 4, linearFilter, pipeline, src->Width(), src->Height(), renderScaleFactor_);
|
||||
|
||||
gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE);
|
||||
}
|
||||
|
@ -37,13 +37,14 @@
|
||||
|
||||
enum {
|
||||
FB_USAGE_DISPLAYED_FRAMEBUFFER = 1,
|
||||
FB_USAGE_RENDERTARGET = 2,
|
||||
FB_USAGE_RENDER_COLOR = 2,
|
||||
FB_USAGE_TEXTURE = 4,
|
||||
FB_USAGE_CLUT = 8,
|
||||
FB_USAGE_DOWNLOAD = 16,
|
||||
FB_USAGE_DOWNLOAD_CLEAR = 32,
|
||||
FB_USAGE_BLUE_TO_ALPHA = 64,
|
||||
FB_USAGE_FIRST_FRAME_SAVED = 128,
|
||||
FB_USAGE_RENDER_DEPTH = 256,
|
||||
};
|
||||
|
||||
enum {
|
||||
@ -71,6 +72,11 @@ struct VirtualFramebuffer {
|
||||
u16 fb_stride;
|
||||
u16 z_stride;
|
||||
|
||||
// The original PSP format of the framebuffer.
|
||||
// In reality they are all RGBA8888 for better quality but this is what the PSP thinks it is. This is necessary
|
||||
// when we need to interpret the bits directly (depal or buffer aliasing).
|
||||
GEBufferFormat fb_format;
|
||||
|
||||
// width/height: The detected size of the current framebuffer, in original PSP pixels.
|
||||
u16 width;
|
||||
u16 height;
|
||||
@ -98,11 +104,6 @@ struct VirtualFramebuffer {
|
||||
// The scale factor at which we are rendering (to achieve higher resolution).
|
||||
u8 renderScaleFactor;
|
||||
|
||||
// The original PSP format of the framebuffer.
|
||||
// In reality they are all RGBA8888 for better quality but this is what the PSP thinks it is. This is necessary
|
||||
// when we need to interpret the bits directly (depal or buffer aliasing).
|
||||
GEBufferFormat format;
|
||||
|
||||
// The configured buffer format at the time of the latest/current draw. This will change first, then
|
||||
// if different we'll "reinterpret" the framebuffer to match 'format' as needed.
|
||||
GEBufferFormat drawnFormat;
|
||||
@ -152,7 +153,7 @@ struct FramebufferHeuristicParams {
|
||||
u32 z_address;
|
||||
u16 fb_stride;
|
||||
u16 z_stride;
|
||||
GEBufferFormat fmt;
|
||||
GEBufferFormat fb_format;
|
||||
bool isClearingDepth;
|
||||
bool isWritingDepth;
|
||||
bool isDrawing;
|
||||
@ -266,6 +267,8 @@ public:
|
||||
return vfb;
|
||||
}
|
||||
}
|
||||
void SetDepthFrameBuffer(bool isClearingDepth);
|
||||
|
||||
void RebindFramebuffer(const char *tag);
|
||||
std::vector<FramebufferInfo> GetFramebufferList() const;
|
||||
|
||||
@ -288,8 +291,6 @@ public:
|
||||
void DownloadFramebufferForClut(u32 fb_address, u32 loadBytes);
|
||||
void DrawFramebufferToOutput(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride);
|
||||
|
||||
VirtualFramebuffer *GetLatestDepthBufferAt(u32 z_address, u16 z_stride);
|
||||
|
||||
void DrawPixels(VirtualFramebuffer *vfb, int dstX, int dstY, const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height);
|
||||
|
||||
size_t NumVFBs() const { return vfbs_.size(); }
|
||||
@ -340,14 +341,8 @@ public:
|
||||
int GetTargetBufferWidth() const { return currentRenderVfb_ ? currentRenderVfb_->bufferWidth : 480; }
|
||||
int GetTargetBufferHeight() const { return currentRenderVfb_ ? currentRenderVfb_->bufferHeight : 272; }
|
||||
int GetTargetStride() const { return currentRenderVfb_ ? currentRenderVfb_->fb_stride : 512; }
|
||||
GEBufferFormat GetTargetFormat() const { return currentRenderVfb_ ? currentRenderVfb_->format : displayFormat_; }
|
||||
GEBufferFormat GetTargetFormat() const { return currentRenderVfb_ ? currentRenderVfb_->fb_format : displayFormat_; }
|
||||
|
||||
void SetDepthUpdated() {
|
||||
if (currentRenderVfb_) {
|
||||
currentRenderVfb_->last_frame_depth_render = gpuStats.numFlips;
|
||||
currentRenderVfb_->last_frame_depth_updated = gpuStats.numFlips;
|
||||
}
|
||||
}
|
||||
void SetColorUpdated(int skipDrawReason) {
|
||||
if (currentRenderVfb_) {
|
||||
SetColorUpdated(currentRenderVfb_, skipDrawReason);
|
||||
@ -374,15 +369,20 @@ public:
|
||||
}
|
||||
void ReinterpretFramebuffer(VirtualFramebuffer *vfb, GEBufferFormat oldFormat, GEBufferFormat newFormat);
|
||||
|
||||
Draw2D *GetDraw2D() {
|
||||
return &draw2D_;
|
||||
}
|
||||
|
||||
protected:
|
||||
virtual void PackFramebufferSync_(VirtualFramebuffer *vfb, int x, int y, int w, int h);
|
||||
void SetViewport2D(int x, int y, int w, int h);
|
||||
Draw::Texture *MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height);
|
||||
void DrawActiveTexture(float x, float y, float w, float h, float destW, float destH, float u0, float v0, float u1, float v1, int uvRotation, int flags);
|
||||
|
||||
void DrawStrip2D(Draw::Texture *tex, Draw2DVertex *verts, int vertexCount, bool linearFilter, RasterChannel channel);
|
||||
void Ensure2DResources();
|
||||
Draw::Pipeline *Create2DPipeline(void (*generate)(ShaderWriter &));
|
||||
Draw2DPipeline *Get2DPipeline(Draw2DShader shader);
|
||||
|
||||
void CopyToColorFromOverlappingFramebuffers(VirtualFramebuffer *dest);
|
||||
void CopyToDepthFromOverlappingFramebuffers(VirtualFramebuffer *dest);
|
||||
|
||||
bool UpdateSize();
|
||||
|
||||
@ -394,7 +394,7 @@ protected:
|
||||
|
||||
void BlitUsingRaster(
|
||||
Draw::Framebuffer *src, float srcX1, float srcY1, float srcX2, float srcY2,
|
||||
Draw::Framebuffer *dest, float destX1, float destY1, float destX2, float destY2, bool linearFilter, RasterChannel channel);
|
||||
Draw::Framebuffer *dest, float destX1, float destY1, float destX2, float destY2, bool linearFilter, Draw2DPipeline *pipeline, const char *tag);
|
||||
|
||||
void CopyFramebufferForColorTexture(VirtualFramebuffer *dst, VirtualFramebuffer *src, int flags);
|
||||
|
||||
@ -426,7 +426,7 @@ protected:
|
||||
dstBuffer->dirtyAfterDisplay = true;
|
||||
dstBuffer->drawnWidth = dstBuffer->width;
|
||||
dstBuffer->drawnHeight = dstBuffer->height;
|
||||
dstBuffer->drawnFormat = dstBuffer->format;
|
||||
dstBuffer->drawnFormat = dstBuffer->fb_format;
|
||||
if ((skipDrawReason & SKIPDRAW_SKIPFRAME) == 0)
|
||||
dstBuffer->reallyDirtyAfterDisplay = true;
|
||||
}
|
||||
@ -500,10 +500,7 @@ protected:
|
||||
// Thin3D stuff for reinterpreting image data between the various 16-bit formats.
|
||||
// Safe, not optimal - there might be input attachment tricks, etc, but we can't use them
|
||||
// since we don't want N different implementations.
|
||||
Draw::Pipeline *reinterpretFromTo_[3][3]{};
|
||||
Draw::ShaderModule *reinterpretVS_ = nullptr;
|
||||
Draw::SamplerState *reinterpretSampler_ = nullptr;
|
||||
Draw::Buffer *reinterpretVBuf_ = nullptr;
|
||||
Draw2DPipeline *reinterpretFromTo_[3][3]{};
|
||||
|
||||
// Common implementation of stencil buffer upload. Also not 100% optimal, but not performance
|
||||
// critical either.
|
||||
@ -511,10 +508,11 @@ protected:
|
||||
Draw::SamplerState *stencilUploadSampler_ = nullptr;
|
||||
|
||||
// Draw2D pipelines
|
||||
Draw::Pipeline *draw2DPipelineColor_ = nullptr;
|
||||
Draw::Pipeline *draw2DPipelineDepth_ = nullptr;
|
||||
Draw::SamplerState *draw2DSamplerLinear_ = nullptr;
|
||||
Draw::SamplerState *draw2DSamplerNearest_ = nullptr;
|
||||
Draw::ShaderModule *draw2DVs_ = nullptr;
|
||||
Draw2DPipeline *draw2DPipelineColor_ = nullptr;
|
||||
Draw2DPipeline *draw2DPipelineDepth_ = nullptr;
|
||||
Draw2DPipeline *draw2DPipeline565ToDepth_ = nullptr;
|
||||
Draw2DPipeline *draw2DPipeline565ToDepthDeswizzle_ = nullptr;
|
||||
|
||||
Draw2D draw2D_;
|
||||
// The fragment shaders are "owned" by the pipelines since they're 1:1.
|
||||
};
|
||||
|
@ -557,9 +557,7 @@ DepthScaleFactors GetDepthScaleFactors() {
|
||||
}
|
||||
|
||||
void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, float renderHeight, int bufferWidth, int bufferHeight, ViewportAndScissor &out) {
|
||||
bool throughmode = gstate.isModeThrough();
|
||||
out.dirtyProj = false;
|
||||
out.dirtyDepth = false;
|
||||
out.throughMode = gstate.isModeThrough();
|
||||
|
||||
float renderWidthFactor, renderHeightFactor;
|
||||
float renderX = 0.0f, renderY = 0.0f;
|
||||
@ -610,7 +608,7 @@ void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, flo
|
||||
float offsetX = gstate.getOffsetX();
|
||||
float offsetY = gstate.getOffsetY();
|
||||
|
||||
if (throughmode) {
|
||||
if (out.throughMode) {
|
||||
out.viewportX = renderX * renderWidthFactor + displayOffsetX;
|
||||
out.viewportY = renderY * renderHeightFactor + displayOffsetY;
|
||||
out.viewportW = curRTWidth * renderWidthFactor;
|
||||
@ -647,10 +645,10 @@ void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, flo
|
||||
float right = left + vpWidth;
|
||||
float bottom = top + vpHeight;
|
||||
|
||||
float wScale = 1.0f;
|
||||
float xOffset = 0.0f;
|
||||
float hScale = 1.0f;
|
||||
float yOffset = 0.0f;
|
||||
out.widthScale = 1.0f;
|
||||
out.xOffset = 0.0f;
|
||||
out.heightScale = 1.0f;
|
||||
out.yOffset = 0.0f;
|
||||
|
||||
// If we're within the bounds, we want clipping the viewport way. So leave it be.
|
||||
{
|
||||
@ -678,8 +676,8 @@ void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, flo
|
||||
right = left + 1.0f;
|
||||
}
|
||||
|
||||
wScale = vpWidth / (right - left);
|
||||
xOffset = drift / (right - left);
|
||||
out.widthScale = vpWidth / (right - left);
|
||||
out.xOffset = drift / (right - left);
|
||||
}
|
||||
}
|
||||
|
||||
@ -707,8 +705,8 @@ void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, flo
|
||||
bottom = top + 1.0f;
|
||||
}
|
||||
|
||||
hScale = vpHeight / (bottom - top);
|
||||
yOffset = drift / (bottom - top);
|
||||
out.heightScale = vpHeight / (bottom - top);
|
||||
out.yOffset = drift / (bottom - top);
|
||||
}
|
||||
}
|
||||
|
||||
@ -740,13 +738,13 @@ void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, flo
|
||||
}
|
||||
// Okay. So, in our shader, -1 will map to minz, and +1 will map to maxz.
|
||||
float halfActualZRange = (maxz - minz) * (1.0f / 2.0f);
|
||||
float zScale = halfActualZRange < std::numeric_limits<float>::epsilon() ? 1.0f : vpZScale / halfActualZRange;
|
||||
out.depthScale = halfActualZRange < std::numeric_limits<float>::epsilon() ? 1.0f : vpZScale / halfActualZRange;
|
||||
// This adjusts the center from halfActualZRange to vpZCenter.
|
||||
float zOffset = halfActualZRange < std::numeric_limits<float>::epsilon() ? 0.0f : (vpZCenter - (minz + halfActualZRange)) / halfActualZRange;
|
||||
out.zOffset = halfActualZRange < std::numeric_limits<float>::epsilon() ? 0.0f : (vpZCenter - (minz + halfActualZRange)) / halfActualZRange;
|
||||
|
||||
if (!gstate_c.Supports(GPU_SUPPORTS_ACCURATE_DEPTH)) {
|
||||
zScale = 1.0f;
|
||||
zOffset = 0.0f;
|
||||
out.depthScale = 1.0f;
|
||||
out.zOffset = 0.0f;
|
||||
out.depthRangeMin = ToScaledDepthFromIntegerScale(vpZCenter - vpZScale);
|
||||
out.depthRangeMax = ToScaledDepthFromIntegerScale(vpZCenter + vpZScale);
|
||||
} else {
|
||||
@ -757,19 +755,27 @@ void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, flo
|
||||
// OpenGL will clamp these for us anyway, and Direct3D will error if not clamped.
|
||||
out.depthRangeMin = std::max(out.depthRangeMin, 0.0f);
|
||||
out.depthRangeMax = std::min(out.depthRangeMax, 1.0f);
|
||||
}
|
||||
}
|
||||
|
||||
bool scaleChanged = gstate_c.vpWidthScale != wScale || gstate_c.vpHeightScale != hScale;
|
||||
bool offsetChanged = gstate_c.vpXOffset != xOffset || gstate_c.vpYOffset != yOffset;
|
||||
bool depthChanged = gstate_c.vpDepthScale != zScale || gstate_c.vpZOffset != zOffset;
|
||||
if (scaleChanged || offsetChanged || depthChanged) {
|
||||
gstate_c.vpWidthScale = wScale;
|
||||
gstate_c.vpHeightScale = hScale;
|
||||
gstate_c.vpDepthScale = zScale;
|
||||
gstate_c.vpXOffset = xOffset;
|
||||
gstate_c.vpYOffset = yOffset;
|
||||
gstate_c.vpZOffset = zOffset;
|
||||
out.dirtyProj = true;
|
||||
out.dirtyDepth = depthChanged;
|
||||
void UpdateCachedViewportState(const ViewportAndScissor &vpAndScissor) {
|
||||
if (vpAndScissor.throughMode)
|
||||
return;
|
||||
|
||||
bool scaleChanged = gstate_c.vpWidthScale != vpAndScissor.widthScale || gstate_c.vpHeightScale != vpAndScissor.heightScale;
|
||||
bool offsetChanged = gstate_c.vpXOffset != vpAndScissor.xOffset || gstate_c.vpYOffset != vpAndScissor.yOffset;
|
||||
bool depthChanged = gstate_c.vpDepthScale != vpAndScissor.depthScale || gstate_c.vpZOffset != vpAndScissor.zOffset;
|
||||
if (scaleChanged || offsetChanged || depthChanged) {
|
||||
gstate_c.vpWidthScale = vpAndScissor.widthScale;
|
||||
gstate_c.vpHeightScale = vpAndScissor.heightScale;
|
||||
gstate_c.vpDepthScale = vpAndScissor.depthScale;
|
||||
gstate_c.vpXOffset = vpAndScissor.xOffset;
|
||||
gstate_c.vpYOffset = vpAndScissor.yOffset;
|
||||
gstate_c.vpZOffset = vpAndScissor.zOffset;
|
||||
|
||||
gstate_c.Dirty(DIRTY_PROJMATRIX);
|
||||
if (depthChanged) {
|
||||
gstate_c.Dirty(DIRTY_DEPTHRANGE);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1018,16 +1024,6 @@ void ConvertMaskState(GenericMaskState &maskState, bool allowFramebufferRead) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (gstate_c.renderMode == RASTER_MODE_COLOR_TO_DEPTH) {
|
||||
// Suppress color writes entirely in this mode.
|
||||
maskState.applyFramebufferRead = false;
|
||||
maskState.rgba[0] = false;
|
||||
maskState.rgba[1] = false;
|
||||
maskState.rgba[2] = false;
|
||||
maskState.rgba[3] = false;
|
||||
return;
|
||||
}
|
||||
|
||||
// Invert to convert masks from the PSP's format where 1 is don't draw to PC where 1 is draw.
|
||||
uint32_t colorMask = ~((gstate.pmskc & 0xFFFFFF) | (gstate.pmska << 24));
|
||||
|
||||
|
@ -75,10 +75,16 @@ struct ViewportAndScissor {
|
||||
float viewportH;
|
||||
float depthRangeMin;
|
||||
float depthRangeMax;
|
||||
bool dirtyProj;
|
||||
bool dirtyDepth;
|
||||
float widthScale;
|
||||
float heightScale;
|
||||
float depthScale;
|
||||
float xOffset;
|
||||
float yOffset;
|
||||
float zOffset;
|
||||
bool throughMode;
|
||||
};
|
||||
void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, float renderHeight, int bufferWidth, int bufferHeight, ViewportAndScissor &out);
|
||||
void UpdateCachedViewportState(const ViewportAndScissor &vpAndScissor);
|
||||
float ToScaledDepthFromIntegerScale(float z);
|
||||
|
||||
struct DepthScaleFactors {
|
||||
|
@ -20,11 +20,7 @@ static const SamplerDef samplers[1] = {
|
||||
// TODO: We could possibly have an option to preserve any extra color precision? But gonna start without it.
|
||||
// Requires full size integer math. It would be possible to make a floating point-only version with lots of
|
||||
// modulo and stuff, might do it one day.
|
||||
void GenerateReinterpretFragmentShader(char *buffer, GEBufferFormat from, GEBufferFormat to, const ShaderLanguageDesc &lang) {
|
||||
_assert_(lang.bitwiseOps);
|
||||
|
||||
ShaderWriter writer(buffer, lang, ShaderStage::Fragment);
|
||||
|
||||
Draw2DPipelineInfo GenerateReinterpretFragmentShader(ShaderWriter &writer, GEBufferFormat from, GEBufferFormat to) {
|
||||
writer.HighPrecisionFloat();
|
||||
|
||||
writer.DeclareSamplers(samplers);
|
||||
@ -70,22 +66,13 @@ void GenerateReinterpretFragmentShader(char *buffer, GEBufferFormat from, GEBuff
|
||||
}
|
||||
|
||||
writer.EndFSMain("outColor", FSFLAG_NONE);
|
||||
|
||||
return Draw2DPipelineInfo{
|
||||
RASTER_COLOR,
|
||||
RASTER_COLOR,
|
||||
};
|
||||
}
|
||||
|
||||
void GenerateReinterpretVertexShader(char *buffer, const ShaderLanguageDesc &lang) {
|
||||
_assert_(lang.bitwiseOps);
|
||||
ShaderWriter writer(buffer, lang, ShaderStage::Vertex);
|
||||
|
||||
writer.BeginVSMain(Slice<InputDef>::empty(), Slice<UniformDef>::empty(), varyings);
|
||||
|
||||
writer.C(" float x = -1.0 + float((gl_VertexIndex & 1) << 2);\n");
|
||||
writer.C(" float y = -1.0 + float((gl_VertexIndex & 2) << 1);\n");
|
||||
writer.C(" v_texcoord = (vec2(x, y) + vec2(1.0, 1.0)) * 0.5;\n");
|
||||
writer.C(" gl_Position = vec4(x, y, 0.0, 1.0);\n");
|
||||
writer.EndVSMain(varyings);
|
||||
}
|
||||
|
||||
|
||||
// Can't easily dynamically create these strings, we just pass along the pointer.
|
||||
static const char *reinterpretStrings[3][3] = {
|
||||
{
|
||||
@ -112,7 +99,7 @@ void FramebufferManagerCommon::ReinterpretFramebuffer(VirtualFramebuffer *vfb, G
|
||||
|
||||
_assert_(newFormat != oldFormat);
|
||||
// The caller is responsible for updating the format.
|
||||
_assert_(newFormat == vfb->format);
|
||||
_assert_(newFormat == vfb->fb_format);
|
||||
|
||||
ShaderLanguage lang = draw_->GetShaderLanguageDesc().shaderLanguage;
|
||||
|
||||
@ -150,60 +137,15 @@ void FramebufferManagerCommon::ReinterpretFramebuffer(VirtualFramebuffer *vfb, G
|
||||
return;
|
||||
}
|
||||
|
||||
if (!reinterpretVS_) {
|
||||
char *vsCode = new char[4000];
|
||||
const ShaderLanguageDesc &shaderLanguageDesc = draw_->GetShaderLanguageDesc();
|
||||
GenerateReinterpretVertexShader(vsCode, shaderLanguageDesc);
|
||||
reinterpretVS_ = draw_->CreateShaderModule(ShaderStage::Vertex, shaderLanguageDesc.shaderLanguage, (const uint8_t *)vsCode, strlen(vsCode), "reinterpret_vs");
|
||||
_assert_(reinterpretVS_);
|
||||
delete[] vsCode;
|
||||
}
|
||||
|
||||
if (!reinterpretSampler_) {
|
||||
Draw::SamplerStateDesc samplerDesc{};
|
||||
samplerDesc.magFilter = Draw::TextureFilter::LINEAR;
|
||||
samplerDesc.minFilter = Draw::TextureFilter::LINEAR;
|
||||
reinterpretSampler_ = draw_->CreateSamplerState(samplerDesc);
|
||||
}
|
||||
|
||||
if (!reinterpretVBuf_) {
|
||||
reinterpretVBuf_ = draw_->CreateBuffer(12 * 3, Draw::BufferUsageFlag::DYNAMIC | Draw::BufferUsageFlag::VERTEXDATA);
|
||||
}
|
||||
|
||||
// See if we need to create a new pipeline.
|
||||
|
||||
Draw::Pipeline *pipeline = reinterpretFromTo_[(int)oldFormat][(int)newFormat];
|
||||
Draw2DPipeline *pipeline = reinterpretFromTo_[(int)oldFormat][(int)newFormat];
|
||||
if (!pipeline) {
|
||||
char *fsCode = new char[4000];
|
||||
const ShaderLanguageDesc &shaderLanguageDesc = draw_->GetShaderLanguageDesc();
|
||||
GenerateReinterpretFragmentShader(fsCode, oldFormat, newFormat, shaderLanguageDesc);
|
||||
Draw::ShaderModule *reinterpretFS = draw_->CreateShaderModule(ShaderStage::Fragment, shaderLanguageDesc.shaderLanguage, (const uint8_t *)fsCode, strlen(fsCode), "reinterpret_fs");
|
||||
_assert_(reinterpretFS);
|
||||
delete[] fsCode;
|
||||
pipeline = draw2D_.Create2DPipeline([=](ShaderWriter &shaderWriter) -> Draw2DPipelineInfo {
|
||||
return GenerateReinterpretFragmentShader(shaderWriter, oldFormat, newFormat);
|
||||
});
|
||||
|
||||
std::vector<Draw::ShaderModule *> shaders;
|
||||
shaders.push_back(reinterpretVS_);
|
||||
shaders.push_back(reinterpretFS);
|
||||
|
||||
using namespace Draw;
|
||||
Draw::PipelineDesc desc{};
|
||||
// We use a "fullscreen triangle".
|
||||
// TODO: clear the stencil buffer. Hard to actually initialize it with the new alpha, though possible - let's see if
|
||||
// we need it.
|
||||
DepthStencilState *depth = draw_->CreateDepthStencilState({ false, false, Comparison::LESS });
|
||||
BlendState *blendstateOff = draw_->CreateBlendState({ false, 0xF });
|
||||
RasterState *rasterNoCull = draw_->CreateRasterState({});
|
||||
|
||||
// No uniforms for these, only a single texture input.
|
||||
PipelineDesc pipelineDesc{ Primitive::TRIANGLE_LIST, shaders, nullptr, depth, blendstateOff, rasterNoCull, nullptr };
|
||||
pipeline = draw_->CreateGraphicsPipeline(pipelineDesc);
|
||||
_assert_(pipeline != nullptr);
|
||||
reinterpretFromTo_[(int)oldFormat][(int)newFormat] = pipeline;
|
||||
|
||||
depth->Release();
|
||||
blendstateOff->Release();
|
||||
rasterNoCull->Release();
|
||||
reinterpretFS->Release();
|
||||
}
|
||||
|
||||
// Copy to a temp framebuffer.
|
||||
@ -213,18 +155,9 @@ void FramebufferManagerCommon::ReinterpretFramebuffer(VirtualFramebuffer *vfb, G
|
||||
// itself while writing.
|
||||
draw_->InvalidateCachedState();
|
||||
draw_->CopyFramebufferImage(vfb->fbo, 0, 0, 0, 0, temp, 0, 0, 0, 0, vfb->renderWidth, vfb->renderHeight, 1, Draw::FBChannel::FB_COLOR_BIT, "reinterpret_prep");
|
||||
draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::DONT_CARE, Draw::RPAction::KEEP, Draw::RPAction::KEEP }, reinterpretStrings[(int)oldFormat][(int)newFormat]);
|
||||
draw_->BindPipeline(pipeline);
|
||||
draw_->BindFramebufferAsTexture(temp, 0, Draw::FBChannel::FB_COLOR_BIT, 0);
|
||||
draw_->BindSamplerStates(0, 1, &reinterpretSampler_);
|
||||
draw_->SetScissorRect(0, 0, vfb->renderWidth, vfb->renderHeight);
|
||||
Draw::Viewport vp = Draw::Viewport{ 0.0f, 0.0f, (float)vfb->renderWidth, (float)vfb->renderHeight, 0.0f, 1.0f };
|
||||
draw_->SetViewports(1, &vp);
|
||||
// Vertex buffer not used - vertices generated in shader.
|
||||
// TODO: Switch to a vertex buffer for GLES2/D3D9 compat.
|
||||
draw_->BindVertexBuffers(0, 1, &reinterpretVBuf_, nullptr);
|
||||
draw_->Draw(3, 0);
|
||||
draw_->InvalidateCachedState();
|
||||
|
||||
BlitUsingRaster(temp, 0.0f, 0.0f, vfb->renderWidth, vfb->renderHeight,
|
||||
vfb->fbo, 0.0f, 0.0f, vfb->renderWidth, vfb->renderHeight, false, pipeline, "reinterpret");
|
||||
|
||||
// Unbind.
|
||||
draw_->BindTexture(0, nullptr);
|
||||
|
@ -1,11 +1,9 @@
|
||||
#pragma once
|
||||
|
||||
#include "Common/GPU/ShaderWriter.h"
|
||||
|
||||
#include "GPU/ge_constants.h"
|
||||
#include "GPU/GPUCommon.h"
|
||||
#include "Common/GPU/ShaderWriter.h"
|
||||
#include "GPU/Common/Draw2D.h"
|
||||
|
||||
void GenerateReinterpretFragmentShader(char *buffer, GEBufferFormat from, GEBufferFormat to, const ShaderLanguageDesc &lang);
|
||||
|
||||
// Just a single one. Can probably be shared with a lot of similar use cases.
|
||||
// Generates the coordinates for a fullscreen triangle.
|
||||
void GenerateReinterpretVertexShader(char *buffer, const ShaderLanguageDesc &lang);
|
||||
Draw2DPipelineInfo GenerateReinterpretFragmentShader(ShaderWriter &writer, GEBufferFormat from, GEBufferFormat to);
|
||||
|
@ -29,7 +29,7 @@ enum DebugShaderType {
|
||||
SHADER_TYPE_GEOMETRY = 2,
|
||||
SHADER_TYPE_VERTEXLOADER = 3, // Not really a shader, but might as well re-use this mechanism
|
||||
SHADER_TYPE_PIPELINE = 4, // Vulkan and DX12 combines a bunch of state into pipeline objects. Might as well make them inspectable.
|
||||
SHADER_TYPE_DEPAL = 5,
|
||||
SHADER_TYPE_TEXTURE = 5,
|
||||
SHADER_TYPE_SAMPLER = 6, // Not really a shader either. Need to rename this enum...
|
||||
};
|
||||
|
||||
|
@ -240,8 +240,6 @@ std::string FragmentShaderDesc(const FShaderID &id) {
|
||||
if (id.Bit(FS_BIT_COLOR_AGAINST_ZERO)) desc << "ColorTest0 " << alphaTestFuncs[id.Bits(FS_BIT_COLOR_TEST_FUNC, 2)] << " "; // first 4 match;
|
||||
else if (id.Bit(FS_BIT_COLOR_TEST)) desc << "ColorTest " << alphaTestFuncs[id.Bits(FS_BIT_COLOR_TEST_FUNC, 2)] << " "; // first 4 match
|
||||
|
||||
if (id.Bit(FS_BIT_COLOR_TO_DEPTH)) desc << "ColorToDepth ";
|
||||
|
||||
return desc.str();
|
||||
}
|
||||
|
||||
@ -263,8 +261,8 @@ void ComputeFragmentShaderID(FShaderID *id_out, const Draw::Bugs &bugs) {
|
||||
bool doTextureAlpha = gstate.isTextureAlphaUsed();
|
||||
bool doFlatShading = gstate.getShadeMode() == GE_SHADE_FLAT;
|
||||
bool useShaderDepal = gstate_c.useShaderDepal;
|
||||
bool useSmoothedDepal = gstate_c.useSmoothedShaderDepal;
|
||||
bool colorWriteMask = IsColorWriteMaskComplex(gstate_c.allowFramebufferRead);
|
||||
bool colorToDepth = gstate_c.renderMode == RasterMode::RASTER_MODE_COLOR_TO_DEPTH;
|
||||
|
||||
// Note how we here recompute some of the work already done in state mapping.
|
||||
// Not ideal! At least we share the code.
|
||||
@ -293,11 +291,10 @@ void ComputeFragmentShaderID(FShaderID *id_out, const Draw::Bugs &bugs) {
|
||||
}
|
||||
id.SetBit(FS_BIT_BGRA_TEXTURE, gstate_c.bgraTexture);
|
||||
id.SetBit(FS_BIT_SHADER_DEPAL, useShaderDepal);
|
||||
id.SetBit(FS_BIT_SHADER_SMOOTHED_DEPAL, useSmoothedDepal);
|
||||
id.SetBit(FS_BIT_3D_TEXTURE, gstate_c.curTextureIs3D);
|
||||
}
|
||||
|
||||
id.SetBit(FS_BIT_COLOR_TO_DEPTH, colorToDepth);
|
||||
|
||||
id.SetBit(FS_BIT_LMODE, lmode);
|
||||
if (enableAlphaTest) {
|
||||
// 5 bits total.
|
||||
|
@ -94,7 +94,7 @@ enum FShaderBit : uint8_t {
|
||||
FS_BIT_NO_DEPTH_CANNOT_DISCARD_STENCIL = 49,
|
||||
FS_BIT_COLOR_WRITEMASK = 50,
|
||||
FS_BIT_3D_TEXTURE = 51,
|
||||
FS_BIT_COLOR_TO_DEPTH = 52,
|
||||
FS_BIT_SHADER_SMOOTHED_DEPAL = 52,
|
||||
};
|
||||
|
||||
static inline FShaderBit operator +(FShaderBit bit, int i) {
|
||||
|
@ -153,7 +153,7 @@ bool FramebufferManagerCommon::PerformStencilUpload(u32 addr, int size, StencilU
|
||||
if (!src)
|
||||
return false;
|
||||
|
||||
switch (dstBuffer->format) {
|
||||
switch (dstBuffer->fb_format) {
|
||||
case GE_FORMAT_565:
|
||||
// Well, this doesn't make much sense.
|
||||
return false;
|
||||
@ -290,7 +290,7 @@ bool FramebufferManagerCommon::PerformStencilUpload(u32 addr, int size, StencilU
|
||||
draw_->SetViewports(1, &viewport);
|
||||
|
||||
// TODO: Switch the format to a single channel format?
|
||||
Draw::Texture *tex = MakePixelTexture(src, dstBuffer->format, dstBuffer->fb_stride, dstBuffer->width, dstBuffer->height);
|
||||
Draw::Texture *tex = MakePixelTexture(src, dstBuffer->fb_format, dstBuffer->fb_stride, dstBuffer->width, dstBuffer->height);
|
||||
if (!tex) {
|
||||
// Bad!
|
||||
return false;
|
||||
@ -309,10 +309,10 @@ bool FramebufferManagerCommon::PerformStencilUpload(u32 addr, int size, StencilU
|
||||
continue;
|
||||
}
|
||||
StencilUB ub{};
|
||||
if (dstBuffer->format == GE_FORMAT_4444) {
|
||||
if (dstBuffer->fb_format == GE_FORMAT_4444) {
|
||||
draw_->SetStencilParams(0xFF, (i << 4) | i, 0xFF);
|
||||
ub.stencilValue = i * (16.0f / 255.0f);
|
||||
} else if (dstBuffer->format == GE_FORMAT_5551) {
|
||||
} else if (dstBuffer->fb_format == GE_FORMAT_5551) {
|
||||
draw_->SetStencilParams(0xFF, 0xFF, 0xFF);
|
||||
ub.stencilValue = i * (128.0f / 255.0f);
|
||||
} else {
|
||||
|
@ -105,14 +105,8 @@ inline int dimHeight(u16 dim) {
|
||||
|
||||
// Vulkan color formats:
|
||||
// TODO
|
||||
TextureCacheCommon::TextureCacheCommon(Draw::DrawContext *draw)
|
||||
: draw_(draw),
|
||||
clutLastFormat_(0xFFFFFFFF),
|
||||
clutTotalBytes_(0),
|
||||
clutMaxBytes_(0),
|
||||
clutRenderAddress_(0xFFFFFFFF),
|
||||
clutAlphaLinear_(false),
|
||||
isBgraBackend_(false) {
|
||||
TextureCacheCommon::TextureCacheCommon(Draw::DrawContext *draw, Draw2D *draw2D)
|
||||
: draw_(draw), draw2D_(draw2D) {
|
||||
decimationCounter_ = TEXCACHE_DECIMATION_INTERVAL;
|
||||
|
||||
// TODO: Clamp down to 256/1KB? Need to check mipmapShareClut and clamp loadclut.
|
||||
@ -130,11 +124,11 @@ TextureCacheCommon::TextureCacheCommon(Draw::DrawContext *draw)
|
||||
|
||||
replacer_.Init();
|
||||
|
||||
depalShaderCache_ = new DepalShaderCache(draw);
|
||||
textureShaderCache_ = new TextureShaderCache(draw, draw2D_);
|
||||
}
|
||||
|
||||
TextureCacheCommon::~TextureCacheCommon() {
|
||||
delete depalShaderCache_;
|
||||
delete textureShaderCache_;
|
||||
|
||||
FreeAlignedMemory(clutBufConverted_);
|
||||
FreeAlignedMemory(clutBufRaw_);
|
||||
@ -265,10 +259,6 @@ SamplerCacheKey TextureCacheCommon::GetSamplingParams(int maxLevel, const TexCac
|
||||
}
|
||||
}
|
||||
|
||||
if (gstate_c.renderMode == RASTER_MODE_COLOR_TO_DEPTH) {
|
||||
forceFiltering = TEX_FILTER_FORCE_NEAREST;
|
||||
}
|
||||
|
||||
switch (forceFiltering) {
|
||||
case TEX_FILTER_AUTO:
|
||||
break;
|
||||
@ -492,7 +482,14 @@ TexCacheEntry *TextureCacheCommon::SetTexture() {
|
||||
int h0 = gstate.getTextureHeight(0);
|
||||
int d0 = 1;
|
||||
ReplacedTexture &replaced = FindReplacement(entry, w0, h0, d0);
|
||||
if (replaced.Valid()) {
|
||||
if (replaced.IsInvalid()) {
|
||||
entry->status &= ~TexCacheEntry::STATUS_TO_REPLACE;
|
||||
if (g_Config.bSaveNewTextures) {
|
||||
// Load once more to actually save.
|
||||
match = false;
|
||||
reason = "replacing";
|
||||
}
|
||||
} else {
|
||||
match = false;
|
||||
reason = "replacing";
|
||||
}
|
||||
@ -515,6 +512,7 @@ TexCacheEntry *TextureCacheCommon::SetTexture() {
|
||||
nextNeedsChange_ = false;
|
||||
// Might need a rebuild if the hash fails, but that will be set later.
|
||||
nextNeedsRebuild_ = false;
|
||||
failedTexture_ = false;
|
||||
VERBOSE_LOG(G3D, "Texture at %08x found in cache, applying", texaddr);
|
||||
return entry; //Done!
|
||||
} else {
|
||||
@ -610,10 +608,9 @@ TexCacheEntry *TextureCacheCommon::SetTexture() {
|
||||
gstate_c.curTextureHeight = h;
|
||||
gstate_c.SetTextureIs3D((entry->status & TexCacheEntry::STATUS_3D) != 0);
|
||||
|
||||
failedTexture_ = false;
|
||||
nextTexture_ = entry;
|
||||
if (nextFramebufferTexture_) {
|
||||
nextFramebufferTexture_ = nullptr; // in case it was accidentally set somehow?
|
||||
}
|
||||
nextFramebufferTexture_ = nullptr;
|
||||
nextNeedsRehash_ = true;
|
||||
// We still need to rebuild, to allocate a texture. But we'll bail early.
|
||||
nextNeedsRebuild_ = true;
|
||||
@ -625,35 +622,27 @@ std::vector<AttachCandidate> TextureCacheCommon::GetFramebufferCandidates(const
|
||||
|
||||
std::vector<AttachCandidate> candidates;
|
||||
|
||||
RasterChannel channel = Memory::IsDepthTexVRAMAddress(entry.addr) ? RasterChannel::RASTER_DEPTH : RasterChannel::RASTER_COLOR;
|
||||
if (channel == RasterChannel::RASTER_DEPTH && !gstate_c.Supports(GPU_SUPPORTS_DEPTH_TEXTURE)) {
|
||||
// Depth texture not supported. Don't try to match it, fall back to the memory behind..
|
||||
return std::vector<AttachCandidate>();
|
||||
}
|
||||
|
||||
const std::vector<VirtualFramebuffer *> &framebuffers = framebufferManager_->Framebuffers();
|
||||
|
||||
for (VirtualFramebuffer *framebuffer : framebuffers) {
|
||||
FramebufferMatchInfo match = MatchFramebuffer(entry, framebuffer, texAddrOffset, channel);
|
||||
switch (match.match) {
|
||||
case FramebufferMatch::VALID:
|
||||
candidates.push_back(AttachCandidate{ match, entry, framebuffer, channel });
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
FramebufferMatchInfo match{};
|
||||
if (MatchFramebuffer(entry, framebuffer, texAddrOffset, RASTER_COLOR, &match)) {
|
||||
candidates.push_back(AttachCandidate{ match, entry, framebuffer, RASTER_COLOR, framebuffer->colorBindSeq });
|
||||
}
|
||||
match = {};
|
||||
if (MatchFramebuffer(entry, framebuffer, texAddrOffset, RASTER_DEPTH, &match)) {
|
||||
candidates.push_back(AttachCandidate{ match, entry, framebuffer, RASTER_DEPTH, framebuffer->depthBindSeq });
|
||||
}
|
||||
}
|
||||
|
||||
if (candidates.size() > 1) {
|
||||
bool depth = channel == RasterChannel::RASTER_DEPTH;
|
||||
|
||||
std::string cands;
|
||||
for (auto &candidate : candidates) {
|
||||
cands += candidate.ToString() + " ";
|
||||
}
|
||||
|
||||
WARN_LOG_REPORT_ONCE(multifbcandidate, G3D, "GetFramebufferCandidates(%s): Multiple (%d) candidate framebuffers. First will be chosen. texaddr: %08x offset: %d (%dx%d stride %d, %s):\n%s",
|
||||
depth ? "DEPTH" : "COLOR", (int)candidates.size(),
|
||||
WARN_LOG_REPORT_ONCE(multifbcandidate, G3D, "GetFramebufferCandidates: Multiple (%d) candidate framebuffers. texaddr: %08x offset: %d (%dx%d stride %d, %s):\n%s",
|
||||
(int)candidates.size(),
|
||||
entry.addr, texAddrOffset, dimWidth(entry.dim), dimHeight(entry.dim), entry.bufw, GeTextureFormatToString(entry.format),
|
||||
cands.c_str()
|
||||
);
|
||||
@ -677,29 +666,22 @@ int TextureCacheCommon::GetBestCandidateIndex(const std::vector<AttachCandidate>
|
||||
// a comparison function.
|
||||
for (int i = 0; i < (int)candidates.size(); i++) {
|
||||
const AttachCandidate &candidate = candidates[i];
|
||||
int relevancy = 0;
|
||||
switch (candidate.match.match) {
|
||||
case FramebufferMatch::VALID:
|
||||
relevancy += 1000;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
int relevancy = candidate.seqCount;
|
||||
|
||||
// Bonus point for matching stride.
|
||||
if (candidate.channel == RASTER_COLOR && candidate.fb->fb_stride == candidate.entry.bufw) {
|
||||
relevancy += 100;
|
||||
relevancy += 1000;
|
||||
}
|
||||
|
||||
// Bonus points for no offset.
|
||||
if (candidate.match.xOffset == 0 && candidate.match.yOffset == 0) {
|
||||
relevancy += 10;
|
||||
relevancy += 100;
|
||||
}
|
||||
|
||||
if (candidate.channel == RASTER_COLOR && candidate.fb->last_frame_render == gpuStats.numFlips) {
|
||||
relevancy += 5;
|
||||
relevancy += 50;
|
||||
} else if (candidate.channel == RASTER_DEPTH && candidate.fb->last_frame_depth_render == gpuStats.numFlips) {
|
||||
relevancy += 5;
|
||||
relevancy += 50;
|
||||
}
|
||||
|
||||
if (relevancy > bestRelevancy) {
|
||||
@ -825,7 +807,7 @@ void TextureCacheCommon::NotifyFramebuffer(VirtualFramebuffer *framebuffer, Fram
|
||||
|
||||
const u32 z_addr = framebuffer->z_address & ~mirrorMask; // Probably unnecessary.
|
||||
|
||||
const u32 fb_bpp = framebuffer->format == GE_FORMAT_8888 ? 4 : 2;
|
||||
const u32 fb_bpp = BufferFormatBytesPerPixel(framebuffer->fb_format);
|
||||
const u32 z_bpp = 2; // No other format exists.
|
||||
const u32 fb_stride = framebuffer->fb_stride;
|
||||
const u32 z_stride = framebuffer->z_stride;
|
||||
@ -876,12 +858,26 @@ void TextureCacheCommon::NotifyFramebuffer(VirtualFramebuffer *framebuffer, Fram
|
||||
}
|
||||
}
|
||||
|
||||
FramebufferMatchInfo TextureCacheCommon::MatchFramebuffer(
|
||||
bool TextureCacheCommon::MatchFramebuffer(
|
||||
const TextureDefinition &entry,
|
||||
VirtualFramebuffer *framebuffer, u32 texaddrOffset, RasterChannel channel) const {
|
||||
VirtualFramebuffer *framebuffer, u32 texaddrOffset, RasterChannel channel, FramebufferMatchInfo *matchInfo) const {
|
||||
static const u32 MAX_SUBAREA_Y_OFFSET_SAFE = 32;
|
||||
|
||||
uint32_t fb_address = channel == RASTER_DEPTH ? framebuffer->z_address : framebuffer->fb_address;
|
||||
uint32_t fb_stride = channel == RASTER_DEPTH ? framebuffer->z_stride : framebuffer->fb_stride;
|
||||
GEBufferFormat fb_format = channel == RASTER_DEPTH ? GE_FORMAT_DEPTH16 : framebuffer->fb_format;
|
||||
|
||||
if (channel == RASTER_DEPTH && framebuffer->z_address == framebuffer->fb_address) {
|
||||
// Try to avoid silly matches to somewhat malformed buffers.
|
||||
return false;
|
||||
}
|
||||
|
||||
switch (entry.format) {
|
||||
case GE_TFMT_DXT1:
|
||||
case GE_TFMT_DXT3:
|
||||
case GE_TFMT_DXT5:
|
||||
return false;
|
||||
}
|
||||
|
||||
u32 addr = fb_address & 0x3FFFFFFF;
|
||||
u32 texaddr = entry.addr + texaddrOffset;
|
||||
@ -891,40 +887,19 @@ FramebufferMatchInfo TextureCacheCommon::MatchFramebuffer(
|
||||
|
||||
if (texInVRAM != fbInVRAM) {
|
||||
// Shortcut. Cannot possibly be a match.
|
||||
return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
|
||||
return false;
|
||||
}
|
||||
|
||||
if (texInVRAM) {
|
||||
const u32 mirrorMask = 0x00600000;
|
||||
|
||||
// This bit controls swizzle. The swizzles at 0x00200000 and 0x00600000 are designed
|
||||
// to perfectly match reading depth as color (which one to use I think might be related
|
||||
// to the bpp of the color format used when rendering to it).
|
||||
// It's fairly unlikely that games would screw this up since the result will be garbage so
|
||||
// we use it to filter out unlikely matches.
|
||||
switch (entry.addr & mirrorMask) {
|
||||
case 0x00000000:
|
||||
case 0x00400000:
|
||||
// Don't match the depth channel with these addresses when texturing.
|
||||
if (channel == RasterChannel::RASTER_DEPTH) {
|
||||
return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
|
||||
}
|
||||
break;
|
||||
case 0x00200000:
|
||||
case 0x00600000:
|
||||
// Don't match the color channel with these addresses when texturing.
|
||||
if (channel == RasterChannel::RASTER_COLOR) {
|
||||
return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
addr &= ~mirrorMask;
|
||||
texaddr &= ~mirrorMask;
|
||||
}
|
||||
|
||||
const bool noOffset = texaddr == addr;
|
||||
const bool exactMatch = noOffset && entry.format < 4 && channel == RASTER_COLOR;
|
||||
|
||||
const u32 w = 1 << ((entry.dim >> 0) & 0xf);
|
||||
const u32 h = 1 << ((entry.dim >> 8) & 0xf);
|
||||
// 512 on a 272 framebuffer is sane, so let's be lenient.
|
||||
@ -932,103 +907,101 @@ FramebufferMatchInfo TextureCacheCommon::MatchFramebuffer(
|
||||
|
||||
// If they match "exactly", it's non-CLUT and from the top left.
|
||||
if (exactMatch) {
|
||||
if (framebuffer->fb_stride != entry.bufw) {
|
||||
WARN_LOG_ONCE(diffStrides1, G3D, "Texturing from framebuffer with different strides %d != %d", entry.bufw, framebuffer->fb_stride);
|
||||
if (fb_stride != entry.bufw) {
|
||||
WARN_LOG_ONCE(diffStrides1, G3D, "Texturing from framebuffer with different strides %d != %d", entry.bufw, (int)fb_stride);
|
||||
}
|
||||
// NOTE: This check is okay because the first texture formats are the same as the buffer formats.
|
||||
if (IsTextureFormatBufferCompatible(entry.format)) {
|
||||
if (TextureFormatMatchesBufferFormat(entry.format, framebuffer->format) || (framebuffer->usageFlags & FB_USAGE_BLUE_TO_ALPHA)) {
|
||||
return FramebufferMatchInfo{ FramebufferMatch::VALID };
|
||||
} else if (IsTextureFormat16Bit(entry.format) && IsBufferFormat16Bit(framebuffer->format)) {
|
||||
WARN_LOG_ONCE(diffFormat1, G3D, "Texturing from framebuffer with reinterpretable format: %s != %s", GeTextureFormatToString(entry.format), GeBufferFormatToString(framebuffer->format));
|
||||
return FramebufferMatchInfo{ FramebufferMatch::VALID, 0, 0, true, TextureFormatToBufferFormat(entry.format) };
|
||||
if (TextureFormatMatchesBufferFormat(entry.format, fb_format) || (framebuffer->usageFlags & FB_USAGE_BLUE_TO_ALPHA)) {
|
||||
return true;
|
||||
} else if (IsTextureFormat16Bit(entry.format) && IsBufferFormat16Bit(fb_format) && channel == RASTER_COLOR) {
|
||||
WARN_LOG_ONCE(diffFormat1, G3D, "Texturing from framebuffer with reinterpretable fb_format: %s != %s", GeTextureFormatToString(entry.format), GeBufferFormatToString(fb_format));
|
||||
*matchInfo = FramebufferMatchInfo{ 0, 0, true, TextureFormatToBufferFormat(entry.format) };
|
||||
return true;
|
||||
} else {
|
||||
WARN_LOG_ONCE(diffFormat2, G3D, "Texturing from framebuffer with incompatible formats %s != %s", GeTextureFormatToString(entry.format), GeBufferFormatToString(framebuffer->format));
|
||||
return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
|
||||
WARN_LOG_ONCE(diffFormat2, G3D, "Not texturing from framebuffer with incompatible formats %s != %s", GeTextureFormatToString(entry.format), GeBufferFormatToString(fb_format));
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
// Format incompatible, ignoring without comment. (maybe some really gnarly hacks will end up here...)
|
||||
return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
// Apply to buffered mode only.
|
||||
if (!framebufferManager_->UseBufferedRendering()) {
|
||||
return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check works for D16 too (???)
|
||||
const bool matchingClutFormat =
|
||||
(channel != RASTER_COLOR && entry.format == GE_TFMT_CLUT16) ||
|
||||
(channel == RASTER_COLOR && framebuffer->format == GE_FORMAT_8888 && entry.format == GE_TFMT_CLUT32) ||
|
||||
(channel == RASTER_COLOR && framebuffer->format != GE_FORMAT_8888 && entry.format == GE_TFMT_CLUT16);
|
||||
|
||||
// To avoid ruining git blame, kept the same name as the old struct.
|
||||
FramebufferMatchInfo fbInfo{ FramebufferMatch::VALID };
|
||||
(fb_format == GE_FORMAT_DEPTH16 && entry.format == GE_TFMT_CLUT16) ||
|
||||
(fb_format == GE_FORMAT_DEPTH16 && entry.format == GE_TFMT_5650) ||
|
||||
(fb_format == GE_FORMAT_8888 && entry.format == GE_TFMT_CLUT32) ||
|
||||
(fb_format != GE_FORMAT_8888 && entry.format == GE_TFMT_CLUT16);
|
||||
|
||||
const u32 bitOffset = (texaddr - addr) * 8;
|
||||
if (bitOffset != 0) {
|
||||
const u32 pixelOffset = bitOffset / std::max(1U, (u32)textureBitsPerPixel[entry.format]);
|
||||
|
||||
fbInfo.yOffset = entry.bufw == 0 ? 0 : pixelOffset / entry.bufw;
|
||||
fbInfo.xOffset = entry.bufw == 0 ? 0 : pixelOffset % entry.bufw;
|
||||
matchInfo->yOffset = entry.bufw == 0 ? 0 : pixelOffset / entry.bufw;
|
||||
matchInfo->xOffset = entry.bufw == 0 ? 0 : pixelOffset % entry.bufw;
|
||||
}
|
||||
|
||||
if (fbInfo.yOffset + minSubareaHeight >= framebuffer->height) {
|
||||
if (matchInfo->yOffset + minSubareaHeight >= framebuffer->height) {
|
||||
// Can't be inside the framebuffer.
|
||||
return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
|
||||
return false;
|
||||
}
|
||||
|
||||
if (framebuffer->fb_stride != entry.bufw) {
|
||||
if (fb_stride != entry.bufw) {
|
||||
if (noOffset) {
|
||||
WARN_LOG_ONCE(diffStrides2, G3D, "Texturing from framebuffer (matching_clut=%s) different strides %d != %d", matchingClutFormat ? "yes" : "no", entry.bufw, framebuffer->fb_stride);
|
||||
WARN_LOG_ONCE(diffStrides2, G3D, "Texturing from framebuffer (matching_clut=%s) different strides %d != %d", matchingClutFormat ? "yes" : "no", entry.bufw, fb_stride);
|
||||
// Continue on with other checks.
|
||||
// Not actually sure why we even try here. There's no way it'll go well if the strides are different.
|
||||
} else {
|
||||
// Assume any render-to-tex with different bufw + offset is a render from ram.
|
||||
return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Check if it's in bufferWidth (which might be higher than width and may indicate the framebuffer includes the data.)
|
||||
if (fbInfo.xOffset >= framebuffer->bufferWidth && fbInfo.xOffset + w <= (u32)framebuffer->fb_stride) {
|
||||
if (matchInfo->xOffset >= framebuffer->bufferWidth && matchInfo->xOffset + w <= (u32)fb_stride) {
|
||||
// This happens in Brave Story, see #10045 - the texture is in the space between strides, with matching stride.
|
||||
return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
|
||||
return false;
|
||||
}
|
||||
|
||||
// Trying to play it safe. Below 0x04110000 is almost always framebuffers.
|
||||
// TODO: Maybe we can reduce this check and find a better way above 0x04110000?
|
||||
if (fbInfo.yOffset > MAX_SUBAREA_Y_OFFSET_SAFE && addr > 0x04110000 && !PSP_CoreParameter().compat.flags().AllowLargeFBTextureOffsets) {
|
||||
WARN_LOG_REPORT_ONCE(subareaIgnored, G3D, "Ignoring possible texturing from framebuffer at %08x +%dx%d / %dx%d", fb_address, fbInfo.xOffset, fbInfo.yOffset, framebuffer->width, framebuffer->height);
|
||||
return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
|
||||
if (matchInfo->yOffset > MAX_SUBAREA_Y_OFFSET_SAFE && addr > 0x04110000 && !PSP_CoreParameter().compat.flags().AllowLargeFBTextureOffsets) {
|
||||
WARN_LOG_REPORT_ONCE(subareaIgnored, G3D, "Ignoring possible texturing from framebuffer at %08x +%dx%d / %dx%d", fb_address, matchInfo->xOffset, matchInfo->yOffset, framebuffer->width, framebuffer->height);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check for CLUT. The framebuffer is always RGB, but it can be interpreted as a CLUT texture.
|
||||
// 3rd Birthday (and a bunch of other games) render to a 16 bit clut texture.
|
||||
if (matchingClutFormat) {
|
||||
if (!noOffset) {
|
||||
WARN_LOG_ONCE(subareaClut, G3D, "Texturing from framebuffer using CLUT with offset at %08x +%dx%d", fb_address, fbInfo.xOffset, fbInfo.yOffset);
|
||||
WARN_LOG_ONCE(subareaClut, G3D, "Texturing from framebuffer (%s) using %s with offset at %08x +%dx%d", channel == RASTER_DEPTH ? "DEPTH" : "COLOR", GeTextureFormatToString(entry.format), fb_address, matchInfo->xOffset, matchInfo->yOffset);
|
||||
}
|
||||
fbInfo.match = FramebufferMatch::VALID; // We check the format again later, no need to return a special value here.
|
||||
return fbInfo;
|
||||
return true;
|
||||
} else if (IsClutFormat((GETextureFormat)(entry.format)) || IsDXTFormat((GETextureFormat)(entry.format))) {
|
||||
WARN_LOG_ONCE(fourEightBit, G3D, "%s format not supported when texturing from framebuffer of format %s", GeTextureFormatToString(entry.format), GeBufferFormatToString(framebuffer->format));
|
||||
return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
|
||||
WARN_LOG_ONCE(fourEightBit, G3D, "%s fb_format not supported when texturing from framebuffer of format %s", GeTextureFormatToString(entry.format), GeBufferFormatToString(fb_format));
|
||||
return false;
|
||||
}
|
||||
|
||||
// This is either normal or we failed to generate a shader to depalettize
|
||||
if ((int)framebuffer->format == (int)entry.format || matchingClutFormat) {
|
||||
if ((int)framebuffer->format != (int)entry.format) {
|
||||
if ((int)fb_format == (int)entry.format || matchingClutFormat) {
|
||||
if ((int)fb_format != (int)entry.format) {
|
||||
WARN_LOG_ONCE(diffFormat2, G3D, "Texturing from framebuffer with different formats %s != %s at %08x",
|
||||
GeTextureFormatToString(entry.format), GeBufferFormatToString(framebuffer->format), fb_address);
|
||||
return fbInfo;
|
||||
GeTextureFormatToString(entry.format), GeBufferFormatToString(fb_format), fb_address);
|
||||
return true;
|
||||
} else {
|
||||
WARN_LOG_ONCE(subarea, G3D, "Texturing from framebuffer at %08x +%dx%d", fb_address, fbInfo.xOffset, fbInfo.yOffset);
|
||||
return fbInfo;
|
||||
WARN_LOG_ONCE(subarea, G3D, "Texturing from framebuffer at %08x +%dx%d", fb_address, matchInfo->xOffset, matchInfo->yOffset);
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
WARN_LOG_ONCE(diffFormat2, G3D, "Texturing from framebuffer with incompatible format %s != %s at %08x",
|
||||
GeTextureFormatToString(entry.format), GeBufferFormatToString(framebuffer->format), fb_address);
|
||||
return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
|
||||
GeTextureFormatToString(entry.format), GeBufferFormatToString(fb_format), fb_address);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1038,18 +1011,20 @@ void TextureCacheCommon::SetTextureFramebuffer(const AttachCandidate &candidate)
|
||||
FramebufferMatchInfo fbInfo = candidate.match;
|
||||
|
||||
if (candidate.match.reinterpret) {
|
||||
GEBufferFormat oldFormat = candidate.fb->format;
|
||||
candidate.fb->format = candidate.match.reinterpretTo;
|
||||
GEBufferFormat oldFormat = candidate.fb->fb_format;
|
||||
candidate.fb->fb_format = candidate.match.reinterpretTo;
|
||||
framebufferManager_->ReinterpretFramebuffer(candidate.fb, oldFormat, candidate.match.reinterpretTo);
|
||||
}
|
||||
|
||||
_dbg_assert_msg_(framebuffer != nullptr, "Framebuffer must not be null.");
|
||||
|
||||
framebuffer->usageFlags |= FB_USAGE_TEXTURE;
|
||||
if (framebufferManager_->UseBufferedRendering()) {
|
||||
// Keep the framebuffer alive.
|
||||
framebuffer->last_frame_used = gpuStats.numFlips;
|
||||
// Keep the framebuffer alive.
|
||||
framebuffer->last_frame_used = gpuStats.numFlips;
|
||||
|
||||
nextFramebufferTextureChannel_ = RASTER_COLOR;
|
||||
|
||||
if (framebufferManager_->UseBufferedRendering()) {
|
||||
// We need to force it, since we may have set it on a texture before attaching.
|
||||
gstate_c.curTextureWidth = framebuffer->bufferWidth;
|
||||
gstate_c.curTextureHeight = framebuffer->bufferHeight;
|
||||
@ -1068,7 +1043,15 @@ void TextureCacheCommon::SetTextureFramebuffer(const AttachCandidate &candidate)
|
||||
gstate_c.SetNeedShaderTexclamp(true);
|
||||
}
|
||||
|
||||
nextFramebufferTexture_ = framebuffer;
|
||||
if (candidate.channel == RASTER_DEPTH && !gstate_c.Supports(GPU_SUPPORTS_DEPTH_TEXTURE)) {
|
||||
// Flag to bind a null texture if we can't support depth textures.
|
||||
// Should only happen on old OpenGL.
|
||||
nextFramebufferTexture_ = nullptr;
|
||||
failedTexture_ = true;
|
||||
} else {
|
||||
nextFramebufferTexture_ = framebuffer;
|
||||
nextFramebufferTextureChannel_ = candidate.channel;
|
||||
}
|
||||
nextTexture_ = nullptr;
|
||||
} else {
|
||||
if (framebuffer->fbo) {
|
||||
@ -1168,7 +1151,7 @@ void TextureCacheCommon::LoadClut(u32 clutAddr, u32 loadBytes) {
|
||||
const std::vector<VirtualFramebuffer *> &framebuffers = framebufferManager_->Framebuffers();
|
||||
for (VirtualFramebuffer *framebuffer : framebuffers) {
|
||||
const u32 fb_address = framebuffer->fb_address & 0x3FFFFFFF;
|
||||
const u32 bpp = framebuffer->drawnFormat == GE_FORMAT_8888 ? 4 : 2;
|
||||
const u32 bpp = BufferFormatBytesPerPixel(framebuffer->drawnFormat);
|
||||
u32 offset = clutFramebufAddr - fb_address;
|
||||
|
||||
// Is this inside the framebuffer at all?
|
||||
@ -1327,22 +1310,22 @@ ReplacedTexture &TextureCacheCommon::FindReplacement(TexCacheEntry *entry, int &
|
||||
constexpr double MAX_BUDGET_PER_TEX = 0.25 / 60.0;
|
||||
|
||||
double replaceStart = time_now_d();
|
||||
double budget = std::min(MAX_BUDGET_PER_TEX, replacementFrameBudget_ - replacementTimeThisFrame_);
|
||||
u64 cachekey = replacer_.Enabled() ? entry->CacheKey() : 0;
|
||||
ReplacedTexture &replaced = replacer_.FindReplacement(cachekey, entry->fullhash, w, h);
|
||||
if (replaced.IsReady(std::min(MAX_BUDGET_PER_TEX, replacementFrameBudget_ - replacementTimeThisFrame_))) {
|
||||
ReplacedTexture &replaced = replacer_.FindReplacement(cachekey, entry->fullhash, w, h, budget);
|
||||
if (replaced.IsReady(budget)) {
|
||||
if (replaced.GetSize(0, w, h)) {
|
||||
replacementTimeThisFrame_ += time_now_d() - replaceStart;
|
||||
|
||||
// Consider it already "scaled" and remove any delayed replace flag.
|
||||
// Consider it already "scaled."
|
||||
entry->status |= TexCacheEntry::STATUS_IS_SCALED;
|
||||
entry->status &= ~TexCacheEntry::STATUS_TO_REPLACE;
|
||||
return replaced;
|
||||
}
|
||||
} else if (replaced.Valid()) {
|
||||
|
||||
// Remove the flag, even if it was invalid.
|
||||
entry->status &= ~TexCacheEntry::STATUS_TO_REPLACE;
|
||||
} else if (!replaced.IsInvalid()) {
|
||||
entry->status |= TexCacheEntry::STATUS_TO_REPLACE;
|
||||
}
|
||||
replacementTimeThisFrame_ += time_now_d() - replaceStart;
|
||||
return replacer_.FindNone();
|
||||
return replaced;
|
||||
}
|
||||
|
||||
// This is only used in the GLES backend, where we don't point these to video memory.
|
||||
@ -1692,7 +1675,9 @@ CheckAlphaResult TextureCacheCommon::ReadIndexedTex(u8 *out, int outPitch, int l
|
||||
texptr = (u8 *)tmpTexBuf32_.data();
|
||||
}
|
||||
|
||||
const bool mipmapShareClut = gstate.isClutSharedForMipmaps();
|
||||
// Misshitsu no Sacrifice has separate CLUT data, this is a hack to allow it.
|
||||
// Normally separate CLUTs are not allowed for 8-bit or higher indices.
|
||||
const bool mipmapShareClut = gstate.isClutSharedForMipmaps() || gstate.getClutLoadBlocks() != 0x40;
|
||||
const int clutSharingOffset = mipmapShareClut ? 0 : (level & 1) * 256;
|
||||
|
||||
GEPaletteFormat palFormat = (GEPaletteFormat)gstate.getClutPaletteFormat();
|
||||
@ -1779,10 +1764,12 @@ void TextureCacheCommon::ApplyTexture() {
|
||||
if (!entry) {
|
||||
// Maybe we bound a framebuffer?
|
||||
InvalidateLastTexture();
|
||||
if (nextFramebufferTexture_) {
|
||||
bool depth = Memory::IsDepthTexVRAMAddress(gstate.getTextureAddress(0));
|
||||
if (failedTexture_) {
|
||||
// Backends should handle this by binding a black texture with 0 alpha.
|
||||
BindTexture(nullptr);
|
||||
} else if (nextFramebufferTexture_) {
|
||||
// ApplyTextureFrameBuffer is responsible for setting SetTextureFullAlpha.
|
||||
ApplyTextureFramebuffer(nextFramebufferTexture_, gstate.getTextureFormat(), depth ? RASTER_DEPTH : RASTER_COLOR);
|
||||
ApplyTextureFramebuffer(nextFramebufferTexture_, gstate.getTextureFormat(), nextFramebufferTextureChannel_);
|
||||
nextFramebufferTexture_ = nullptr;
|
||||
}
|
||||
|
||||
@ -1845,18 +1832,69 @@ void TextureCacheCommon::ApplyTexture() {
|
||||
gstate_c.SetTextureIs3D((entry->status & TexCacheEntry::STATUS_3D) != 0);
|
||||
}
|
||||
|
||||
bool CanDepalettize(GETextureFormat texFormat, GEBufferFormat bufferFormat) {
|
||||
if (IsClutFormat(texFormat)) {
|
||||
switch (bufferFormat) {
|
||||
case GE_FORMAT_4444:
|
||||
case GE_FORMAT_565:
|
||||
case GE_FORMAT_5551:
|
||||
case GE_FORMAT_DEPTH16:
|
||||
if (texFormat == GE_TFMT_CLUT16) {
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
case GE_FORMAT_8888:
|
||||
if (texFormat == GE_TFMT_CLUT32) {
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
WARN_LOG(G3D, "Invalid CLUT/framebuffer combination: %s vs %s", GeTextureFormatToString(texFormat), GeBufferFormatToString(bufferFormat));
|
||||
return false;
|
||||
} else if (texFormat == GE_TFMT_5650 && bufferFormat == GE_FORMAT_DEPTH16) {
|
||||
// We can also "depal" 565 format, this is used to read depth buffers as 565 on occasion (#15491).
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// If the palette is detected as a smooth ramp, we can interpolate for higher color precision.
|
||||
// But we only do it if the mask/shift exactly matches a color channel, else something different might be going
|
||||
// on and we definitely don't want to interpolate.
|
||||
// Great enhancement for Test Drive.
|
||||
static bool CanUseSmoothDepal(const GPUgstate &gstate, GEBufferFormat framebufferFormat, int rampLength) {
|
||||
if (gstate.getClutIndexStartPos() == 0 &&
|
||||
gstate.getClutIndexMask() <= rampLength) {
|
||||
switch (framebufferFormat) {
|
||||
case GE_FORMAT_565:
|
||||
if (gstate.getClutIndexShift() == 0 || gstate.getClutIndexShift() == 11) {
|
||||
return gstate.getClutIndexMask() == 0x1F;
|
||||
} else if (gstate.getClutIndexShift() == 5) {
|
||||
return gstate.getClutIndexMask() == 0x3F;
|
||||
}
|
||||
break;
|
||||
case GE_FORMAT_5551:
|
||||
if (gstate.getClutIndexShift() == 0 || gstate.getClutIndexShift() == 5 || gstate.getClutIndexShift() == 10) {
|
||||
return gstate.getClutIndexMask() == 0x1F;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer, GETextureFormat texFormat, RasterChannel channel) {
|
||||
DepalShader *depalShader = nullptr;
|
||||
Draw2DPipeline *textureShader = nullptr;
|
||||
uint32_t clutMode = gstate.clutformat & 0xFFFFFF;
|
||||
|
||||
bool need_depalettize = IsClutFormat(texFormat);
|
||||
bool depth = channel == RASTER_DEPTH;
|
||||
bool need_depalettize = CanDepalettize(texFormat, depth ? GE_FORMAT_DEPTH16 : framebuffer->drawnFormat);
|
||||
bool useShaderDepal = framebufferManager_->GetCurrentRenderVFB() != framebuffer && !depth && !gstate_c.curTextureIs3D;
|
||||
|
||||
// TODO: Implement shader depal in the fragment shader generator for D3D11 at least.
|
||||
if (!draw_->GetDeviceCaps().fragmentShaderInt32Supported) {
|
||||
useShaderDepal = false;
|
||||
depth = false; // Can't support this
|
||||
}
|
||||
|
||||
switch (draw_->GetShaderLanguageDesc().shaderLanguage) {
|
||||
@ -1868,13 +1906,18 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer
|
||||
break;
|
||||
}
|
||||
|
||||
const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat();
|
||||
ClutTexture clutTexture{};
|
||||
bool smoothedDepal = false;
|
||||
|
||||
if (need_depalettize && !g_Config.bDisableSlowFramebufEffects) {
|
||||
clutTexture = textureShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBufRaw_);
|
||||
smoothedDepal = CanUseSmoothDepal(gstate, framebuffer->drawnFormat, clutTexture.rampLength);
|
||||
|
||||
if (useShaderDepal) {
|
||||
const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat();
|
||||
|
||||
// Very icky conflation here of native and thin3d rendering. This will need careful work per backend in BindAsClutTexture.
|
||||
Draw::Texture *clutTexture = depalShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBufRaw_);
|
||||
BindAsClutTexture(clutTexture);
|
||||
BindAsClutTexture(clutTexture.texture);
|
||||
|
||||
framebufferManager_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET);
|
||||
// Vulkan needs to do some extra work here to pick out the native handle from Draw.
|
||||
@ -1888,7 +1931,8 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer
|
||||
|
||||
// Since we started/ended render passes, might need these.
|
||||
gstate_c.Dirty(DIRTY_DEPAL);
|
||||
gstate_c.SetUseShaderDepal(true);
|
||||
|
||||
gstate_c.SetUseShaderDepal(true, smoothedDepal);
|
||||
gstate_c.depalFramebufferFormat = framebuffer->drawnFormat;
|
||||
const u32 bytesPerColor = clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16);
|
||||
const u32 clutTotalColors = clutMaxBytes_ / bytesPerColor;
|
||||
@ -1900,13 +1944,13 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer
|
||||
return;
|
||||
}
|
||||
|
||||
depalShader = depalShaderCache_->GetDepalettizeShader(clutMode, depth ? GE_FORMAT_DEPTH16 : framebuffer->drawnFormat);
|
||||
gstate_c.SetUseShaderDepal(false);
|
||||
textureShader = textureShaderCache_->GetDepalettizeShader(clutMode, texFormat, depth ? GE_FORMAT_DEPTH16 : framebuffer->drawnFormat, smoothedDepal);
|
||||
gstate_c.SetUseShaderDepal(false, false);
|
||||
}
|
||||
|
||||
if (depalShader) {
|
||||
if (textureShader) {
|
||||
const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat();
|
||||
Draw::Texture *clutTexture = depalShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBufRaw_);
|
||||
ClutTexture clutTexture = textureShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBufRaw_);
|
||||
Draw::Framebuffer *depalFBO = framebufferManager_->GetTempFBO(TempFBO::DEPAL, framebuffer->renderWidth, framebuffer->renderHeight);
|
||||
draw_->BindTexture(0, nullptr);
|
||||
draw_->BindTexture(1, nullptr);
|
||||
@ -1916,17 +1960,17 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer
|
||||
Draw::Viewport vp{ 0.0f, 0.0f, (float)framebuffer->renderWidth, (float)framebuffer->renderHeight, 0.0f, 1.0f };
|
||||
draw_->SetViewports(1, &vp);
|
||||
|
||||
TextureShaderApplier shaderApply(draw_, depalShader, framebuffer->bufferWidth, framebuffer->bufferHeight, framebuffer->renderWidth, framebuffer->renderHeight);
|
||||
shaderApply.ApplyBounds(gstate_c.vertBounds, gstate_c.curTextureXOffset, gstate_c.curTextureYOffset);
|
||||
shaderApply.Use();
|
||||
|
||||
draw_->BindFramebufferAsTexture(framebuffer->fbo, 0, depth ? Draw::FB_DEPTH_BIT : Draw::FB_COLOR_BIT, 0);
|
||||
draw_->BindTexture(1, clutTexture);
|
||||
Draw::SamplerState *nearest = depalShaderCache_->GetSampler();
|
||||
draw_->BindTexture(1, clutTexture.texture);
|
||||
Draw::SamplerState *nearest = textureShaderCache_->GetSampler(false);
|
||||
Draw::SamplerState *clutSampler = textureShaderCache_->GetSampler(smoothedDepal);
|
||||
draw_->BindSamplerStates(0, 1, &nearest);
|
||||
draw_->BindSamplerStates(1, 1, &nearest);
|
||||
draw_->BindSamplerStates(1, 1, &clutSampler);
|
||||
|
||||
textureShaderCache_->ApplyShader(textureShader,
|
||||
framebuffer->bufferWidth, framebuffer->bufferHeight, framebuffer->renderWidth, framebuffer->renderHeight,
|
||||
gstate_c.vertBounds, gstate_c.curTextureXOffset, gstate_c.curTextureYOffset);
|
||||
|
||||
shaderApply.Shade();
|
||||
draw_->BindTexture(0, nullptr);
|
||||
framebufferManager_->RebindFramebuffer("ApplyTextureFramebuffer");
|
||||
|
||||
@ -1946,7 +1990,7 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer
|
||||
framebufferManager_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET);
|
||||
BoundFramebufferTexture();
|
||||
|
||||
gstate_c.SetUseShaderDepal(false);
|
||||
gstate_c.SetUseShaderDepal(false, false);
|
||||
gstate_c.SetTextureFullAlpha(gstate.getTextureFormat() == GE_TFMT_5650);
|
||||
}
|
||||
|
||||
@ -1958,7 +2002,7 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer
|
||||
}
|
||||
|
||||
void TextureCacheCommon::Clear(bool delete_them) {
|
||||
depalShaderCache_->Clear();
|
||||
textureShaderCache_->Clear();
|
||||
|
||||
ForgetLastTexture();
|
||||
for (TexCache::iterator iter = cache_.begin(); iter != cache_.end(); ++iter) {
|
||||
@ -2158,8 +2202,8 @@ void TextureCacheCommon::ClearNextFrame() {
|
||||
clearCacheNextFrame_ = true;
|
||||
}
|
||||
|
||||
std::string AttachCandidate::ToString() {
|
||||
return StringFromFormat("[C:%08x/%d Z:%08x/%d X:%d Y:%d reint: %s]", this->fb->fb_address, this->fb->fb_stride, this->fb->z_address, this->fb->z_stride, this->match.xOffset, this->match.yOffset, this->match.reinterpret ? "true" : "false");
|
||||
std::string AttachCandidate::ToString() const {
|
||||
return StringFromFormat("[%s seq:%d C:%08x/%d Z:%08x/%d X:%d Y:%d reint: %s]", this->channel == RASTER_COLOR ? "COLOR" : "DEPTH", this->seqCount, this->fb->fb_address, this->fb->fb_stride, this->fb->z_address, this->fb->z_stride, this->match.xOffset, this->match.yOffset, this->match.reinterpret ? "true" : "false");
|
||||
}
|
||||
|
||||
bool TextureCacheCommon::PrepareBuildTexture(BuildTexturePlan &plan, TexCacheEntry *entry) {
|
||||
@ -2261,11 +2305,6 @@ bool TextureCacheCommon::PrepareBuildTexture(BuildTexturePlan &plan, TexCacheEnt
|
||||
plan.scaleFactor = 1;
|
||||
}
|
||||
|
||||
// Don't upscale textures in color-to-depth mode.
|
||||
if (gstate_c.renderMode == RASTER_MODE_COLOR_TO_DEPTH) {
|
||||
plan.scaleFactor = 1;
|
||||
}
|
||||
|
||||
if ((entry->status & TexCacheEntry::STATUS_CHANGE_FREQUENT) != 0 && plan.scaleFactor != 1 && plan.slowScaler) {
|
||||
// Remember for later that we /wanted/ to scale this texture.
|
||||
entry->status |= TexCacheEntry::STATUS_TO_SCALE;
|
||||
@ -2374,7 +2413,7 @@ void TextureCacheCommon::LoadTextureLevel(TexCacheEntry &entry, uint8_t *data, i
|
||||
}
|
||||
}
|
||||
|
||||
if (replacer_.Enabled()) {
|
||||
if (replacer_.Enabled() && replaced.IsInvalid()) {
|
||||
ReplacedTextureDecodeInfo replacedInfo;
|
||||
replacedInfo.cachekey = entry.CacheKey();
|
||||
replacedInfo.hash = entry.fullhash;
|
||||
@ -2405,5 +2444,5 @@ CheckAlphaResult TextureCacheCommon::CheckCLUTAlpha(const uint8_t *pixelData, GE
|
||||
}
|
||||
|
||||
void TextureCacheCommon::StartFrame() {
|
||||
depalShaderCache_->Decimate();
|
||||
textureShaderCache_->Decimate();
|
||||
}
|
||||
|
@ -29,7 +29,9 @@
|
||||
#include "GPU/Common/GPUDebugInterface.h"
|
||||
#include "GPU/Common/TextureDecoder.h"
|
||||
#include "GPU/Common/TextureScalerCommon.h"
|
||||
#include "GPU/Common/DepalettizeCommon.h"
|
||||
#include "GPU/Common/TextureShaderCommon.h"
|
||||
|
||||
class Draw2D;
|
||||
|
||||
enum FramebufferNotification {
|
||||
NOTIFY_FB_CREATED,
|
||||
@ -206,15 +208,7 @@ typedef std::map<u64, std::unique_ptr<TexCacheEntry>> TexCache;
|
||||
#undef IGNORE
|
||||
#endif
|
||||
|
||||
enum class FramebufferMatch {
|
||||
// Valid, exact match.
|
||||
VALID = 0,
|
||||
// Not a match, remove if currently attached.
|
||||
NO_MATCH,
|
||||
};
|
||||
|
||||
struct FramebufferMatchInfo {
|
||||
FramebufferMatch match;
|
||||
u32 xOffset;
|
||||
u32 yOffset;
|
||||
bool reinterpret;
|
||||
@ -226,8 +220,9 @@ struct AttachCandidate {
|
||||
TextureDefinition entry;
|
||||
VirtualFramebuffer *fb;
|
||||
RasterChannel channel;
|
||||
int seqCount;
|
||||
|
||||
std::string ToString();
|
||||
std::string ToString() const;
|
||||
};
|
||||
|
||||
class FramebufferManagerCommon;
|
||||
@ -278,7 +273,7 @@ struct BuildTexturePlan {
|
||||
|
||||
class TextureCacheCommon {
|
||||
public:
|
||||
TextureCacheCommon(Draw::DrawContext *draw);
|
||||
TextureCacheCommon(Draw::DrawContext *draw, Draw2D *draw2D);
|
||||
virtual ~TextureCacheCommon();
|
||||
|
||||
void LoadClut(u32 clutAddr, u32 loadBytes);
|
||||
@ -298,7 +293,7 @@ public:
|
||||
void InvalidateAll(GPUInvalidationType type);
|
||||
void ClearNextFrame();
|
||||
|
||||
DepalShaderCache *GetDepalShaderCache() { return depalShaderCache_; }
|
||||
TextureShaderCache *GetTextureShaderCache() { return textureShaderCache_; }
|
||||
|
||||
virtual void ForgetLastTexture() = 0;
|
||||
virtual void InvalidateLastTexture() = 0;
|
||||
@ -365,7 +360,7 @@ protected:
|
||||
SamplerCacheKey GetFramebufferSamplingParams(u16 bufferWidth, u16 bufferHeight);
|
||||
void UpdateMaxSeenV(TexCacheEntry *entry, bool throughMode);
|
||||
|
||||
FramebufferMatchInfo MatchFramebuffer(const TextureDefinition &entry, VirtualFramebuffer *framebuffer, u32 texaddrOffset, RasterChannel channel) const;
|
||||
bool MatchFramebuffer(const TextureDefinition &entry, VirtualFramebuffer *framebuffer, u32 texaddrOffset, RasterChannel channel, FramebufferMatchInfo *matchInfo) const;
|
||||
|
||||
std::vector<AttachCandidate> GetFramebufferCandidates(const TextureDefinition &entry, u32 texAddrOffset);
|
||||
int GetBestCandidateIndex(const std::vector<AttachCandidate> &candidates);
|
||||
@ -407,10 +402,12 @@ protected:
|
||||
}
|
||||
|
||||
Draw::DrawContext *draw_;
|
||||
Draw2D *draw2D_;
|
||||
|
||||
TextureReplacer replacer_;
|
||||
TextureScalerCommon scaler_;
|
||||
FramebufferManagerCommon *framebufferManager_;
|
||||
DepalShaderCache *depalShaderCache_;
|
||||
TextureShaderCache *textureShaderCache_;
|
||||
ShaderManagerCommon *shaderManager_;
|
||||
|
||||
bool clearCacheNextFrame_ = false;
|
||||
@ -440,7 +437,9 @@ protected:
|
||||
SimpleBuf<u32> tmpTexBufRearrange_;
|
||||
|
||||
TexCacheEntry *nextTexture_ = nullptr;
|
||||
bool failedTexture_ = false;
|
||||
VirtualFramebuffer *nextFramebufferTexture_ = nullptr;
|
||||
RasterChannel nextFramebufferTextureChannel_ = RASTER_COLOR;
|
||||
|
||||
u32 clutHash_ = 0;
|
||||
|
||||
@ -449,13 +448,13 @@ protected:
|
||||
u32 *clutBufConverted_;
|
||||
// This is the active one.
|
||||
u32 *clutBuf_;
|
||||
u32 clutLastFormat_;
|
||||
u32 clutTotalBytes_;
|
||||
u32 clutMaxBytes_;
|
||||
u32 clutRenderAddress_;
|
||||
u32 clutLastFormat_ = 0xFFFFFFFF;
|
||||
u32 clutTotalBytes_ = 0;
|
||||
u32 clutMaxBytes_ = 0;
|
||||
u32 clutRenderAddress_ = 0xFFFFFFFF;
|
||||
u32 clutRenderOffset_;
|
||||
// True if the clut is just alpha values in the same order (RGBA4444-bit only.)
|
||||
bool clutAlphaLinear_;
|
||||
bool clutAlphaLinear_ = false;
|
||||
u16 clutAlphaLinearColor_;
|
||||
|
||||
int standardScaleFactor_;
|
||||
@ -466,7 +465,7 @@ protected:
|
||||
bool nextNeedsChange_;
|
||||
bool nextNeedsRebuild_;
|
||||
|
||||
bool isBgraBackend_;
|
||||
bool isBgraBackend_ = false;
|
||||
|
||||
u32 expandClut_[256];
|
||||
};
|
||||
|
295
GPU/Common/TextureShaderCommon.cpp
Normal file
295
GPU/Common/TextureShaderCommon.cpp
Normal file
@ -0,0 +1,295 @@
|
||||
// Copyright (c) 2014- PPSSPP Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0 or later versions.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official git repository and contact information can be found at
|
||||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
#include <map>
|
||||
|
||||
#include "Common/Log.h"
|
||||
#include "Common/StringUtils.h"
|
||||
#include "Common/GPU/Shader.h"
|
||||
#include "Common/GPU/ShaderWriter.h"
|
||||
#include "Common/Data/Convert/ColorConv.h"
|
||||
#include "Core/Reporting.h"
|
||||
#include "GPU/Common/Draw2D.h"
|
||||
#include "GPU/Common/DrawEngineCommon.h"
|
||||
#include "GPU/Common/TextureCacheCommon.h"
|
||||
#include "GPU/Common/TextureShaderCommon.h"
|
||||
#include "GPU/Common/DepalettizeShaderCommon.h"
|
||||
|
||||
static const VaryingDef varyings[1] = {
|
||||
{ "vec2", "v_texcoord", Draw::SEM_TEXCOORD0, 0, "highp" },
|
||||
};
|
||||
|
||||
static const SamplerDef samplers[2] = {
|
||||
{ "tex" },
|
||||
{ "pal" },
|
||||
};
|
||||
|
||||
TextureShaderCache::TextureShaderCache(Draw::DrawContext *draw, Draw2D *draw2D) : draw_(draw), draw2D_(draw2D) { }
|
||||
|
||||
TextureShaderCache::~TextureShaderCache() {
|
||||
DeviceLost();
|
||||
}
|
||||
|
||||
void TextureShaderCache::DeviceRestore(Draw::DrawContext *draw) {
|
||||
draw_ = draw;
|
||||
}
|
||||
|
||||
void TextureShaderCache::DeviceLost() {
|
||||
Clear();
|
||||
}
|
||||
|
||||
ClutTexture TextureShaderCache::GetClutTexture(GEPaletteFormat clutFormat, const u32 clutHash, u32 *rawClut) {
|
||||
// Simplistic, but works well enough.
|
||||
u32 clutId = clutHash ^ (uint32_t)clutFormat;
|
||||
|
||||
auto oldtex = texCache_.find(clutId);
|
||||
if (oldtex != texCache_.end()) {
|
||||
oldtex->second->lastFrame = gpuStats.numFlips;
|
||||
return *oldtex->second;
|
||||
}
|
||||
|
||||
int maxClutEntries = clutFormat == GE_CMODE_32BIT_ABGR8888 ? 256 : 512;
|
||||
|
||||
ClutTexture *tex = new ClutTexture();
|
||||
|
||||
Draw::TextureDesc desc{};
|
||||
desc.width = maxClutEntries;
|
||||
desc.height = 1;
|
||||
desc.depth = 1;
|
||||
desc.mipLevels = 1;
|
||||
desc.tag = "clut";
|
||||
desc.type = Draw::TextureType::LINEAR2D; // TODO: Try LINEAR1D?
|
||||
desc.format = Draw::DataFormat::R8G8B8A8_UNORM; // TODO: Also support an BGR format. We won't bother with the 16-bit formats here.
|
||||
|
||||
uint8_t convTemp[2048]{};
|
||||
|
||||
switch (clutFormat) {
|
||||
case GEPaletteFormat::GE_CMODE_32BIT_ABGR8888:
|
||||
desc.initData.push_back((const uint8_t *)rawClut);
|
||||
break;
|
||||
case GEPaletteFormat::GE_CMODE_16BIT_BGR5650:
|
||||
ConvertRGB565ToRGBA8888((u32 *)convTemp, (const u16 *)rawClut, maxClutEntries);
|
||||
desc.initData.push_back(convTemp);
|
||||
break;
|
||||
case GEPaletteFormat::GE_CMODE_16BIT_ABGR5551:
|
||||
ConvertRGBA5551ToRGBA8888((u32 *)convTemp, (const u16 *)rawClut, maxClutEntries);
|
||||
desc.initData.push_back(convTemp);
|
||||
break;
|
||||
case GEPaletteFormat::GE_CMODE_16BIT_ABGR4444:
|
||||
ConvertRGBA4444ToRGBA8888((u32 *)convTemp, (const u16 *)rawClut, maxClutEntries);
|
||||
desc.initData.push_back(convTemp);
|
||||
break;
|
||||
}
|
||||
|
||||
int lastR = 0;
|
||||
int lastG = 0;
|
||||
int lastB = 0;
|
||||
int lastA = 0;
|
||||
|
||||
int rampLength = 0;
|
||||
// Quick check for how many continouosly growing entries we have at the start.
|
||||
// Bilinearly filtering CLUTs only really makes sense for this kind of ramp.
|
||||
for (int i = 0; i < maxClutEntries; i++) {
|
||||
rampLength = i + 1;
|
||||
int r = desc.initData[0][i * 4];
|
||||
int g = desc.initData[0][i * 4 + 1];
|
||||
int b = desc.initData[0][i * 4 + 2];
|
||||
int a = desc.initData[0][i * 4 + 3];
|
||||
if (r < lastR || g < lastG || b < lastB || a < lastA) {
|
||||
break;
|
||||
} else {
|
||||
lastR = r;
|
||||
lastG = g;
|
||||
lastB = b;
|
||||
lastA = a;
|
||||
}
|
||||
}
|
||||
|
||||
tex->texture = draw_->CreateTexture(desc);
|
||||
tex->lastFrame = gpuStats.numFlips;
|
||||
tex->rampLength = rampLength;
|
||||
|
||||
texCache_[clutId] = tex;
|
||||
return *tex;
|
||||
}
|
||||
|
||||
void TextureShaderCache::Clear() {
|
||||
for (auto shader = depalCache_.begin(); shader != depalCache_.end(); ++shader) {
|
||||
if (shader->second->pipeline) {
|
||||
shader->second->pipeline->Release();
|
||||
}
|
||||
delete shader->second;
|
||||
}
|
||||
depalCache_.clear();
|
||||
for (auto tex = texCache_.begin(); tex != texCache_.end(); ++tex) {
|
||||
tex->second->texture->Release();
|
||||
delete tex->second;
|
||||
}
|
||||
texCache_.clear();
|
||||
if (nearestSampler_) {
|
||||
nearestSampler_->Release();
|
||||
nearestSampler_ = nullptr;
|
||||
}
|
||||
if (linearSampler_) {
|
||||
linearSampler_->Release();
|
||||
linearSampler_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
Draw::SamplerState *TextureShaderCache::GetSampler(bool linearFilter) {
|
||||
if (linearFilter) {
|
||||
if (!linearSampler_) {
|
||||
Draw::SamplerStateDesc desc{};
|
||||
desc.magFilter = Draw::TextureFilter::LINEAR;
|
||||
desc.minFilter = Draw::TextureFilter::LINEAR;
|
||||
desc.wrapU = Draw::TextureAddressMode::CLAMP_TO_EDGE;
|
||||
desc.wrapV = Draw::TextureAddressMode::CLAMP_TO_EDGE;
|
||||
desc.wrapW = Draw::TextureAddressMode::CLAMP_TO_EDGE;
|
||||
linearSampler_ = draw_->CreateSamplerState(desc);
|
||||
}
|
||||
return linearSampler_;
|
||||
} else {
|
||||
if (!nearestSampler_) {
|
||||
Draw::SamplerStateDesc desc{};
|
||||
desc.wrapU = Draw::TextureAddressMode::CLAMP_TO_EDGE;
|
||||
desc.wrapV = Draw::TextureAddressMode::CLAMP_TO_EDGE;
|
||||
desc.wrapW = Draw::TextureAddressMode::CLAMP_TO_EDGE;
|
||||
nearestSampler_ = draw_->CreateSamplerState(desc);
|
||||
}
|
||||
return nearestSampler_;
|
||||
}
|
||||
}
|
||||
|
||||
void TextureShaderCache::Decimate() {
|
||||
for (auto tex = texCache_.begin(); tex != texCache_.end(); ) {
|
||||
if (tex->second->lastFrame + DEPAL_TEXTURE_OLD_AGE < gpuStats.numFlips) {
|
||||
tex->second->texture->Release();
|
||||
delete tex->second;
|
||||
texCache_.erase(tex++);
|
||||
} else {
|
||||
++tex;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Draw2DPipeline *TextureShaderCache::GetDepalettizeShader(uint32_t clutMode, GETextureFormat textureFormat, GEBufferFormat bufferFormat, bool smoothedDepal) {
|
||||
using namespace Draw;
|
||||
|
||||
// Generate an ID for depal shaders.
|
||||
u32 id = (clutMode & 0xFFFFFF) | (textureFormat << 24) | (bufferFormat << 28);
|
||||
|
||||
auto shader = depalCache_.find(id);
|
||||
if (shader != depalCache_.end()) {
|
||||
return shader->second;
|
||||
}
|
||||
|
||||
// TODO: Parse these out of clutMode some nice way, to become a bit more stateless.
|
||||
DepalConfig config;
|
||||
config.clutFormat = gstate.getClutPaletteFormat();
|
||||
config.startPos = gstate.getClutIndexStartPos();
|
||||
config.shift = gstate.getClutIndexShift();
|
||||
config.mask = gstate.getClutIndexMask();
|
||||
config.bufferFormat = bufferFormat;
|
||||
config.textureFormat = textureFormat;
|
||||
config.smoothedDepal = smoothedDepal;
|
||||
|
||||
char *buffer = new char[4096];
|
||||
Draw2DPipeline *ts = draw2D_->Create2DPipeline([=](ShaderWriter &writer) -> Draw2DPipelineInfo {
|
||||
GenerateDepalFs(writer, config);
|
||||
return Draw2DPipelineInfo{
|
||||
config.bufferFormat == GE_FORMAT_DEPTH16 ? RASTER_DEPTH : RASTER_COLOR,
|
||||
RASTER_COLOR,
|
||||
};
|
||||
});
|
||||
delete[] buffer;
|
||||
|
||||
depalCache_[id] = ts;
|
||||
|
||||
return ts->pipeline ? ts : nullptr;
|
||||
}
|
||||
|
||||
std::vector<std::string> TextureShaderCache::DebugGetShaderIDs(DebugShaderType type) {
|
||||
std::vector<std::string> ids;
|
||||
for (auto &iter : depalCache_) {
|
||||
ids.push_back(StringFromFormat("%08x", iter.first));
|
||||
}
|
||||
return ids;
|
||||
}
|
||||
|
||||
std::string TextureShaderCache::DebugGetShaderString(std::string idstr, DebugShaderType type, DebugShaderStringType stringType) {
|
||||
uint32_t id;
|
||||
sscanf(idstr.c_str(), "%08x", &id);
|
||||
auto iter = depalCache_.find(id);
|
||||
if (iter == depalCache_.end())
|
||||
return "";
|
||||
switch (stringType) {
|
||||
case SHADER_STRING_SHORT_DESC:
|
||||
return idstr;
|
||||
case SHADER_STRING_SOURCE_CODE:
|
||||
return iter->second->code;
|
||||
default:
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
void TextureShaderCache::ApplyShader(Draw2DPipeline *pipeline, float bufferW, float bufferH, int renderW, int renderH, const KnownVertexBounds &bounds, u32 uoff, u32 voff) {
|
||||
Draw2DVertex verts[4] = {
|
||||
{-1, -1, 0, 0 },
|
||||
{ 1, -1, 1, 0 },
|
||||
{-1, 1, 0, 1 },
|
||||
{ 1, 1, 1, 1 },
|
||||
};
|
||||
|
||||
// If min is not < max, then we don't have values (wasn't set during decode.)
|
||||
if (bounds.minV < bounds.maxV) {
|
||||
const float invWidth = 1.0f / bufferW;
|
||||
const float invHeight = 1.0f / bufferH;
|
||||
// Inverse of half = double.
|
||||
const float invHalfWidth = invWidth * 2.0f;
|
||||
const float invHalfHeight = invHeight * 2.0f;
|
||||
|
||||
const int u1 = bounds.minU + uoff;
|
||||
const int v1 = bounds.minV + voff;
|
||||
const int u2 = bounds.maxU + uoff;
|
||||
const int v2 = bounds.maxV + voff;
|
||||
|
||||
const float left = u1 * invHalfWidth - 1.0f;
|
||||
const float right = u2 * invHalfWidth - 1.0f;
|
||||
const float top = v1 * invHalfHeight - 1.0f;
|
||||
const float bottom = v2 * invHalfHeight - 1.0f;
|
||||
|
||||
const float uvleft = u1 * invWidth;
|
||||
const float uvright = u2 * invWidth;
|
||||
const float uvtop = v1 * invHeight;
|
||||
const float uvbottom = v2 * invHeight;
|
||||
|
||||
// Points are: BL, BR, TR, TL.
|
||||
verts[0] = Draw2DVertex{ left, bottom, uvleft, uvbottom };
|
||||
verts[1] = Draw2DVertex{ right, bottom, uvright, uvbottom };
|
||||
verts[2] = Draw2DVertex{ left, top, uvleft, uvtop };
|
||||
verts[3] = Draw2DVertex{ right, top, uvright, uvtop };
|
||||
|
||||
// We need to reapply the texture next time since we cropped UV.
|
||||
gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
|
||||
}
|
||||
|
||||
Draw::Viewport vp{ 0.0f, 0.0f, (float)renderW, (float)renderH, 0.0f, 1.0f };
|
||||
draw_->BindPipeline(pipeline->pipeline);
|
||||
draw_->SetViewports(1, &vp);
|
||||
draw_->SetScissorRect(0, 0, renderW, renderH);
|
||||
draw_->DrawUP((const uint8_t *)verts, 4);
|
||||
}
|
69
GPU/Common/TextureShaderCommon.h
Normal file
69
GPU/Common/TextureShaderCommon.h
Normal file
@ -0,0 +1,69 @@
|
||||
// Copyright (c) 2014- PPSSPP Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0 or later versions.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official git repository and contact information can be found at
|
||||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Common/GPU/Shader.h"
|
||||
#include "Common/GPU/thin3d.h"
|
||||
#include "GPU/ge_constants.h"
|
||||
#include "GPU/Common/Draw2D.h"
|
||||
#include "GPU/Common/ShaderCommon.h"
|
||||
#include "GPU/Common/DepalettizeShaderCommon.h"
|
||||
|
||||
class ClutTexture {
|
||||
public:
|
||||
Draw::Texture *texture;
|
||||
int lastFrame;
|
||||
int rampLength;
|
||||
};
|
||||
|
||||
// For CLUT depal shaders, and other pre-bind texture shaders.
|
||||
// Caches both shaders and palette textures.
|
||||
class TextureShaderCache {
|
||||
public:
|
||||
TextureShaderCache(Draw::DrawContext *draw, Draw2D *draw2D);
|
||||
~TextureShaderCache();
|
||||
|
||||
Draw2DPipeline *GetDepalettizeShader(uint32_t clutMode, GETextureFormat texFormat, GEBufferFormat pixelFormat, bool smoothedDepal);
|
||||
ClutTexture GetClutTexture(GEPaletteFormat clutFormat, const u32 clutHash, u32 *rawClut);
|
||||
|
||||
Draw::SamplerState *GetSampler(bool linearFilter);
|
||||
|
||||
void ApplyShader(Draw2DPipeline *pipeline, float bufferW, float bufferH, int renderW, int renderH, const KnownVertexBounds &bounds, u32 uoff, u32 voff);
|
||||
|
||||
void Clear();
|
||||
void Decimate();
|
||||
std::vector<std::string> DebugGetShaderIDs(DebugShaderType type);
|
||||
std::string DebugGetShaderString(std::string id, DebugShaderType type, DebugShaderStringType stringType);
|
||||
|
||||
void DeviceLost();
|
||||
void DeviceRestore(Draw::DrawContext *draw);
|
||||
|
||||
private:
|
||||
Draw::DrawContext *draw_;
|
||||
Draw::SamplerState *nearestSampler_ = nullptr;
|
||||
Draw::SamplerState *linearSampler_ = nullptr;
|
||||
Draw2D *draw2D_;
|
||||
|
||||
std::map<u32, Draw2DPipeline *> depalCache_;
|
||||
std::map<u32, ClutTexture *> texCache_;
|
||||
};
|
@ -617,12 +617,13 @@ rotateVBO:
|
||||
framebufferManager_->GetRenderWidth(), framebufferManager_->GetRenderHeight(),
|
||||
framebufferManager_->GetTargetBufferWidth(), framebufferManager_->GetTargetBufferHeight(),
|
||||
vpAndScissor);
|
||||
UpdateCachedViewportState(vpAndScissor);
|
||||
}
|
||||
|
||||
int maxIndex = indexGen.MaxIndex();
|
||||
SoftwareTransform swTransform(params);
|
||||
|
||||
const Lin::Vec3 trans(gstate_c.vpXOffset, gstate_c.vpYOffset, gstate_c.vpZOffset * 0.5f + 0.5f);
|
||||
const Lin::Vec3 trans(gstate_c.vpXOffset, -gstate_c.vpYOffset, gstate_c.vpZOffset * 0.5f + 0.5f);
|
||||
const Lin::Vec3 scale(gstate_c.vpWidthScale, -gstate_c.vpHeightScale, gstate_c.vpDepthScale * 0.5f);
|
||||
swTransform.SetProjMatrix(gstate.projMatrix, gstate_c.vpWidth < 0, gstate_c.vpHeight < 0, trans, scale);
|
||||
|
||||
@ -693,9 +694,6 @@ rotateVBO:
|
||||
if (gstate.isClearModeAlphaMask()) clearFlag |= Draw::FBChannel::FB_STENCIL_BIT;
|
||||
if (gstate.isClearModeDepthMask()) clearFlag |= Draw::FBChannel::FB_DEPTH_BIT;
|
||||
|
||||
if (clearFlag & Draw::FBChannel::FB_DEPTH_BIT) {
|
||||
framebufferManager_->SetDepthUpdated();
|
||||
}
|
||||
if (clearFlag & Draw::FBChannel::FB_COLOR_BIT) {
|
||||
framebufferManager_->SetColorUpdated(gstate_c.skipDrawReason);
|
||||
}
|
||||
|
@ -35,7 +35,6 @@
|
||||
#include "GPU/GeDisasm.h"
|
||||
|
||||
#include "GPU/Common/FramebufferManagerCommon.h"
|
||||
#include "GPU/Debugger/Debugger.h"
|
||||
#include "GPU/D3D11/ShaderManagerD3D11.h"
|
||||
#include "GPU/D3D11/GPU_D3D11.h"
|
||||
#include "GPU/D3D11/FramebufferManagerD3D11.h"
|
||||
@ -60,7 +59,7 @@ GPU_D3D11::GPU_D3D11(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
|
||||
shaderManagerD3D11_ = new ShaderManagerD3D11(draw, device_, context_, featureLevel);
|
||||
framebufferManagerD3D11_ = new FramebufferManagerD3D11(draw);
|
||||
framebufferManager_ = framebufferManagerD3D11_;
|
||||
textureCacheD3D11_ = new TextureCacheD3D11(draw);
|
||||
textureCacheD3D11_ = new TextureCacheD3D11(draw, framebufferManager_->GetDraw2D());
|
||||
textureCache_ = textureCacheD3D11_;
|
||||
drawEngineCommon_ = &drawEngine_;
|
||||
shaderManager_ = shaderManagerD3D11_;
|
||||
@ -239,13 +238,6 @@ void GPU_D3D11::BeginFrame() {
|
||||
gstate_c.Dirty(DIRTY_PROJTHROUGHMATRIX);
|
||||
}
|
||||
|
||||
void GPU_D3D11::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) {
|
||||
// TODO: Some games like Spongebob - Yellow Avenger, never change framebuffer, they blit to it.
|
||||
// So breaking on frames doesn't work. Might want to move this to sceDisplay vsync.
|
||||
GPUDebug::NotifyDisplay(framebuf, stride, format);
|
||||
framebufferManagerD3D11_->SetDisplayFramebuffer(framebuf, stride, format);
|
||||
}
|
||||
|
||||
void GPU_D3D11::CopyDisplayToOutput(bool reallyDirty) {
|
||||
// Flush anything left over.
|
||||
drawEngine_.Flush();
|
||||
@ -332,8 +324,8 @@ std::vector<std::string> GPU_D3D11::DebugGetShaderIDs(DebugShaderType type) {
|
||||
switch (type) {
|
||||
case SHADER_TYPE_VERTEXLOADER:
|
||||
return drawEngine_.DebugGetVertexLoaderIDs();
|
||||
case SHADER_TYPE_DEPAL:
|
||||
return textureCache_->GetDepalShaderCache()->DebugGetShaderIDs(type);
|
||||
case SHADER_TYPE_TEXTURE:
|
||||
return textureCache_->GetTextureShaderCache()->DebugGetShaderIDs(type);
|
||||
default:
|
||||
return shaderManagerD3D11_->DebugGetShaderIDs(type);
|
||||
}
|
||||
@ -343,8 +335,8 @@ std::string GPU_D3D11::DebugGetShaderString(std::string id, DebugShaderType type
|
||||
switch (type) {
|
||||
case SHADER_TYPE_VERTEXLOADER:
|
||||
return drawEngine_.DebugGetVertexLoaderString(id, stringType);
|
||||
case SHADER_TYPE_DEPAL:
|
||||
return textureCache_->GetDepalShaderCache()->DebugGetShaderString(id, type, stringType);
|
||||
case SHADER_TYPE_TEXTURE:
|
||||
return textureCache_->GetTextureShaderCache()->DebugGetShaderString(id, type, stringType);
|
||||
default:
|
||||
return shaderManagerD3D11_->DebugGetShaderString(id, type, stringType);
|
||||
}
|
||||
|
@ -23,7 +23,7 @@
|
||||
|
||||
#include "GPU/GPUCommon.h"
|
||||
#include "GPU/D3D11/DrawEngineD3D11.h"
|
||||
#include "GPU/Common/DepalettizeCommon.h"
|
||||
#include "GPU/Common/TextureShaderCommon.h"
|
||||
#include "GPU/Common/VertexDecoderCommon.h"
|
||||
|
||||
class FramebufferManagerD3D11;
|
||||
@ -41,7 +41,6 @@ public:
|
||||
void ExecuteOp(u32 op, u32 diff) override;
|
||||
|
||||
void ReapplyGfxState() override;
|
||||
void SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) override;
|
||||
void GetStats(char *buffer, size_t bufsize) override;
|
||||
void ClearCacheNextFrame() override;
|
||||
void DeviceLost() override; // Only happens on Android. Drop all textures and shaders.
|
||||
|
@ -293,21 +293,11 @@ void DrawEngineD3D11::ApplyDrawState(int prim) {
|
||||
GenericStencilFuncState stencilState;
|
||||
ConvertStencilFuncState(stencilState);
|
||||
|
||||
if (gstate_c.renderMode == RASTER_MODE_COLOR_TO_DEPTH) {
|
||||
// Enforce plain depth writing.
|
||||
keys_.depthStencil.value = 0;
|
||||
keys_.depthStencil.depthTestEnable = true;
|
||||
keys_.depthStencil.depthWriteEnable = true;
|
||||
keys_.depthStencil.stencilTestEnable = false;
|
||||
keys_.depthStencil.depthCompareOp = D3D11_COMPARISON_ALWAYS;
|
||||
} else if (gstate.isModeClear()) {
|
||||
if (gstate.isModeClear()) {
|
||||
keys_.depthStencil.value = 0;
|
||||
keys_.depthStencil.depthTestEnable = true;
|
||||
keys_.depthStencil.depthCompareOp = D3D11_COMPARISON_ALWAYS;
|
||||
keys_.depthStencil.depthWriteEnable = gstate.isClearModeDepthMask();
|
||||
if (gstate.isClearModeDepthMask()) {
|
||||
framebufferManager_->SetDepthUpdated();
|
||||
}
|
||||
|
||||
// Stencil Test
|
||||
bool alphaMask = gstate.isClearModeAlphaMask();
|
||||
@ -336,9 +326,6 @@ void DrawEngineD3D11::ApplyDrawState(int prim) {
|
||||
keys_.depthStencil.depthTestEnable = true;
|
||||
keys_.depthStencil.depthCompareOp = compareOps[gstate.getDepthTestFunction()];
|
||||
keys_.depthStencil.depthWriteEnable = gstate.isDepthWriteEnabled();
|
||||
if (gstate.isDepthWriteEnabled()) {
|
||||
framebufferManager_->SetDepthUpdated();
|
||||
}
|
||||
} else {
|
||||
keys_.depthStencil.depthTestEnable = false;
|
||||
keys_.depthStencil.depthWriteEnable = false;
|
||||
@ -387,15 +374,13 @@ void DrawEngineD3D11::ApplyDrawState(int prim) {
|
||||
framebufferManager_->GetRenderWidth(), framebufferManager_->GetRenderHeight(),
|
||||
framebufferManager_->GetTargetBufferWidth(), framebufferManager_->GetTargetBufferHeight(),
|
||||
vpAndScissor);
|
||||
UpdateCachedViewportState(vpAndScissor);
|
||||
|
||||
float depthMin = vpAndScissor.depthRangeMin;
|
||||
float depthMax = vpAndScissor.depthRangeMax;
|
||||
|
||||
if (depthMin < 0.0f) depthMin = 0.0f;
|
||||
if (depthMax > 1.0f) depthMax = 1.0f;
|
||||
if (vpAndScissor.dirtyDepth) {
|
||||
gstate_c.Dirty(DIRTY_DEPTHRANGE);
|
||||
}
|
||||
|
||||
Draw::Viewport &vp = dynState_.viewport;
|
||||
vp.TopLeftX = vpAndScissor.viewportX;
|
||||
@ -405,10 +390,6 @@ void DrawEngineD3D11::ApplyDrawState(int prim) {
|
||||
vp.MinDepth = depthMin;
|
||||
vp.MaxDepth = depthMax;
|
||||
|
||||
if (vpAndScissor.dirtyProj) {
|
||||
gstate_c.Dirty(DIRTY_PROJMATRIX);
|
||||
}
|
||||
|
||||
D3D11_RECT &scissor = dynState_.scissor;
|
||||
scissor.left = vpAndScissor.scissorX;
|
||||
scissor.top = vpAndScissor.scissorY;
|
||||
|
@ -31,7 +31,7 @@
|
||||
#include "GPU/D3D11/TextureCacheD3D11.h"
|
||||
#include "GPU/D3D11/FramebufferManagerD3D11.h"
|
||||
#include "GPU/D3D11/ShaderManagerD3D11.h"
|
||||
#include "GPU/Common/DepalettizeCommon.h"
|
||||
#include "GPU/Common/TextureShaderCommon.h"
|
||||
#include "GPU/D3D11/D3D11Util.h"
|
||||
#include "GPU/Common/FramebufferManagerCommon.h"
|
||||
#include "GPU/Common/TextureDecoder.h"
|
||||
@ -127,8 +127,8 @@ ID3D11SamplerState *SamplerCacheD3D11::GetOrCreateSampler(ID3D11Device *device,
|
||||
return sampler;
|
||||
}
|
||||
|
||||
TextureCacheD3D11::TextureCacheD3D11(Draw::DrawContext *draw)
|
||||
: TextureCacheCommon(draw) {
|
||||
TextureCacheD3D11::TextureCacheD3D11(Draw::DrawContext *draw, Draw2D *draw2D)
|
||||
: TextureCacheCommon(draw, draw2D) {
|
||||
device_ = (ID3D11Device *)draw->GetNativeObject(Draw::NativeObject::DEVICE);
|
||||
context_ = (ID3D11DeviceContext *)draw->GetNativeObject(Draw::NativeObject::CONTEXT);
|
||||
|
||||
@ -236,6 +236,11 @@ void TextureCacheD3D11::UpdateCurrentClut(GEPaletteFormat clutFormat, u32 clutBa
|
||||
}
|
||||
|
||||
void TextureCacheD3D11::BindTexture(TexCacheEntry *entry) {
|
||||
if (!entry) {
|
||||
ID3D11ShaderResourceView *textureView = nullptr;
|
||||
context_->PSSetShaderResources(0, 1, &textureView);
|
||||
return;
|
||||
}
|
||||
ID3D11ShaderResourceView *textureView = DxView(entry);
|
||||
if (textureView != lastBoundTexture) {
|
||||
context_->PSSetShaderResources(0, 1, &textureView);
|
||||
@ -460,6 +465,8 @@ bool TextureCacheD3D11::GetCurrentTextureDebug(GPUDebugBuffer &buffer, int level
|
||||
gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE);
|
||||
// We may have blitted to a temp FBO.
|
||||
framebufferManager_->RebindFramebuffer("RebindFramebuffer - GetCurrentTextureDebug");
|
||||
if (!retval)
|
||||
ERROR_LOG(G3D, "Failed to get debug texture: copy to memory failed");
|
||||
return retval;
|
||||
} else {
|
||||
return false;
|
||||
|
@ -28,7 +28,7 @@
|
||||
struct VirtualFramebuffer;
|
||||
|
||||
class FramebufferManagerD3D11;
|
||||
class DepalShaderCache;
|
||||
class TextureShaderCache;
|
||||
class ShaderManagerD3D11;
|
||||
|
||||
class SamplerCacheD3D11 {
|
||||
@ -43,7 +43,7 @@ private:
|
||||
|
||||
class TextureCacheD3D11 : public TextureCacheCommon {
|
||||
public:
|
||||
TextureCacheD3D11(Draw::DrawContext *draw);
|
||||
TextureCacheD3D11(Draw::DrawContext *draw, Draw2D *draw2D);
|
||||
~TextureCacheD3D11();
|
||||
|
||||
void StartFrame() override;
|
||||
|
@ -154,6 +154,15 @@ void NotifyDisplay(u32 framebuf, u32 stride, int format) {
|
||||
}
|
||||
}
|
||||
|
||||
void NotifyBeginFrame() {
|
||||
if (!active)
|
||||
return;
|
||||
if (breakNext == BreakNext::VSYNC) {
|
||||
// Just start stepping as soon as we can once the vblank finishes.
|
||||
breakNext = BreakNext::OP;
|
||||
}
|
||||
}
|
||||
|
||||
int PrimsThisFrame() {
|
||||
return primsThisFrame;
|
||||
}
|
||||
|
@ -28,6 +28,7 @@ enum class BreakNext {
|
||||
TEX,
|
||||
NONTEX,
|
||||
FRAME,
|
||||
VSYNC,
|
||||
PRIM,
|
||||
CURVE,
|
||||
COUNT,
|
||||
@ -43,6 +44,7 @@ void SetBreakCount(int c, bool relative = false);
|
||||
bool NotifyCommand(u32 pc);
|
||||
void NotifyDraw();
|
||||
void NotifyDisplay(u32 framebuf, u32 stride, int format);
|
||||
void NotifyBeginFrame();
|
||||
|
||||
int PrimsThisFrame();
|
||||
int PrimsLastFrame();
|
||||
|
@ -50,6 +50,7 @@ namespace GPURecord {
|
||||
static bool active = false;
|
||||
static bool nextFrame = false;
|
||||
static int flipLastAction = -1;
|
||||
static int flipFinishAt = -1;
|
||||
static std::function<void(const Path &)> writeCallback;
|
||||
|
||||
static std::vector<u8> pushbuf;
|
||||
@ -145,6 +146,7 @@ static void BeginRecording() {
|
||||
lastTextures.clear();
|
||||
lastRenderTargets.clear();
|
||||
flipLastAction = gpuStats.numFlips;
|
||||
flipFinishAt = -1;
|
||||
|
||||
u32 ptr = (u32)pushbuf.size();
|
||||
u32 sz = 512 * 4;
|
||||
@ -454,7 +456,9 @@ static void EmitTransfer(u32 op) {
|
||||
|
||||
static void EmitClut(u32 op) {
|
||||
u32 addr = gstate.getClutAddress();
|
||||
u32 bytes = (op & 0x3F) * 32;
|
||||
// Actually should only be 0x3F, but we allow enhanced CLUTs. See #15727.
|
||||
u32 blocks = (op & 0x7F) == 0x40 ? 0x40 : (op & 0x3F);
|
||||
u32 bytes = blocks * 32;
|
||||
bytes = Memory::ValidSize(addr, bytes);
|
||||
|
||||
if (bytes != 0) {
|
||||
@ -492,6 +496,7 @@ bool Activate() {
|
||||
if (!nextFrame) {
|
||||
nextFrame = true;
|
||||
flipLastAction = gpuStats.numFlips;
|
||||
flipFinishAt = -1;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
@ -510,6 +515,7 @@ static void FinishRecording() {
|
||||
NOTICE_LOG(SYSTEM, "Recording finished");
|
||||
active = false;
|
||||
flipLastAction = gpuStats.numFlips;
|
||||
flipFinishAt = -1;
|
||||
|
||||
if (writeCallback)
|
||||
writeCallback(filename);
|
||||
@ -671,10 +677,10 @@ void NotifyDisplay(u32 framebuf, int stride, int fmt) {
|
||||
}
|
||||
}
|
||||
|
||||
void NotifyFrame() {
|
||||
void NotifyBeginFrame() {
|
||||
const bool noDisplayAction = flipLastAction + 4 < gpuStats.numFlips;
|
||||
// We do this only to catch things that don't call NotifyFrame.
|
||||
if (active && HasDrawCommands() && noDisplayAction) {
|
||||
// We do this only to catch things that don't call NotifyDisplay.
|
||||
if (active && HasDrawCommands() && (noDisplayAction || gpuStats.numFlips == flipFinishAt)) {
|
||||
NOTICE_LOG(SYSTEM, "Recording complete on frame");
|
||||
|
||||
struct DisplayBufData {
|
||||
@ -698,6 +704,8 @@ void NotifyFrame() {
|
||||
if (nextFrame && (gstate_c.skipDrawReason & SKIPDRAW_SKIPFRAME) == 0 && noDisplayAction) {
|
||||
NOTICE_LOG(SYSTEM, "Recording starting on frame...");
|
||||
BeginRecording();
|
||||
// If we began on a BeginFrame, end on a BeginFrame.
|
||||
flipFinishAt = gpuStats.numFlips + 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -36,7 +36,7 @@ void NotifyMemcpy(u32 dest, u32 src, u32 sz);
|
||||
void NotifyMemset(u32 dest, int v, u32 sz);
|
||||
void NotifyUpload(u32 dest, u32 sz);
|
||||
void NotifyDisplay(u32 addr, int stride, int fmt);
|
||||
void NotifyFrame();
|
||||
void NotifyBeginFrame();
|
||||
void NotifyCPU();
|
||||
|
||||
};
|
||||
|
@ -582,18 +582,17 @@ rotateVBO:
|
||||
framebufferManager_->GetRenderWidth(), framebufferManager_->GetRenderHeight(),
|
||||
framebufferManager_->GetTargetBufferWidth(), framebufferManager_->GetTargetBufferHeight(),
|
||||
vpAndScissor);
|
||||
UpdateCachedViewportState(vpAndScissor);
|
||||
}
|
||||
|
||||
int maxIndex = indexGen.MaxIndex();
|
||||
SoftwareTransform swTransform(params);
|
||||
|
||||
// Half pixel offset hack.
|
||||
float xScale = gstate_c.vpWidth < 0 ? -1.0f : 1.0f;
|
||||
float xOffset = -1.0f / gstate_c.curRTRenderWidth;
|
||||
float yScale = gstate_c.vpHeight > 0 ? -1.0f : 1.0f;
|
||||
float yOffset = 1.0f / gstate_c.curRTRenderHeight;
|
||||
|
||||
const Lin::Vec3 trans(gstate_c.vpXOffset * xScale + xOffset, gstate_c.vpYOffset * yScale + yOffset, gstate_c.vpZOffset * 0.5f + 0.5f);
|
||||
const Lin::Vec3 trans(gstate_c.vpXOffset + xOffset, -gstate_c.vpYOffset + yOffset, gstate_c.vpZOffset * 0.5f + 0.5f);
|
||||
const Lin::Vec3 scale(gstate_c.vpWidthScale, gstate_c.vpHeightScale, gstate_c.vpDepthScale * 0.5f);
|
||||
swTransform.SetProjMatrix(gstate.projMatrix, gstate_c.vpWidth < 0, gstate_c.vpHeight > 0, trans, scale);
|
||||
|
||||
@ -640,9 +639,6 @@ rotateVBO:
|
||||
if (gstate.isClearModeAlphaMask()) mask |= D3DCLEAR_STENCIL;
|
||||
if (gstate.isClearModeDepthMask()) mask |= D3DCLEAR_ZBUFFER;
|
||||
|
||||
if (mask & D3DCLEAR_ZBUFFER) {
|
||||
framebufferManager_->SetDepthUpdated();
|
||||
}
|
||||
if (mask & D3DCLEAR_TARGET) {
|
||||
framebufferManager_->SetColorUpdated(gstate_c.skipDrawReason);
|
||||
}
|
||||
|
@ -140,7 +140,7 @@
|
||||
}
|
||||
|
||||
const u32 fb_address = vfb->fb_address & 0x3FFFFFFF;
|
||||
const int dstBpp = vfb->format == GE_FORMAT_8888 ? 4 : 2;
|
||||
const int dstBpp = vfb->fb_format == GE_FORMAT_8888 ? 4 : 2;
|
||||
|
||||
// We always need to convert from the framebuffer native format.
|
||||
// Right now that's always 8888.
|
||||
@ -163,7 +163,7 @@
|
||||
// TODO: Handle the other formats? We don't currently create them, I think.
|
||||
const int dstByteOffset = (y * vfb->fb_stride + x) * dstBpp;
|
||||
// Pixel size always 4 here because we always request BGRA8888.
|
||||
ConvertFromBGRA8888(Memory::GetPointerWrite(fb_address + dstByteOffset), (u8 *)locked.pBits, vfb->fb_stride, locked.Pitch / 4, w, h, vfb->format);
|
||||
ConvertFromBGRA8888(Memory::GetPointerWrite(fb_address + dstByteOffset), (u8 *)locked.pBits, vfb->fb_stride, locked.Pitch / 4, w, h, vfb->fb_format);
|
||||
offscreen->UnlockRect();
|
||||
} else {
|
||||
ERROR_LOG_REPORT(G3D, "Unable to lock rect from %08x: %d,%d %dx%d of %dx%d", fb_address, (int)rect.left, (int)rect.top, (int)rect.right, (int)rect.bottom, vfb->renderWidth, vfb->renderHeight);
|
||||
|
@ -38,7 +38,6 @@
|
||||
#include "GPU/GeDisasm.h"
|
||||
|
||||
#include "GPU/Common/FramebufferManagerCommon.h"
|
||||
#include "GPU/Debugger/Debugger.h"
|
||||
#include "GPU/Directx9/ShaderManagerDX9.h"
|
||||
#include "GPU/Directx9/GPU_DX9.h"
|
||||
#include "GPU/Directx9/FramebufferManagerDX9.h"
|
||||
@ -58,7 +57,7 @@ GPU_DX9::GPU_DX9(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
|
||||
shaderManagerDX9_ = new ShaderManagerDX9(draw, device_);
|
||||
framebufferManagerDX9_ = new FramebufferManagerDX9(draw);
|
||||
framebufferManager_ = framebufferManagerDX9_;
|
||||
textureCacheDX9_ = new TextureCacheDX9(draw);
|
||||
textureCacheDX9_ = new TextureCacheDX9(draw, framebufferManager_->GetDraw2D());
|
||||
textureCache_ = textureCacheDX9_;
|
||||
drawEngineCommon_ = &drawEngine_;
|
||||
shaderManager_ = shaderManagerDX9_;
|
||||
@ -160,6 +159,7 @@ void GPU_DX9::CheckGPUFeatures() {
|
||||
u32 features = 0;
|
||||
features |= GPU_SUPPORTS_16BIT_FORMATS;
|
||||
features |= GPU_SUPPORTS_BLEND_MINMAX;
|
||||
features |= GPU_SUPPORTS_DEPTH_TEXTURE;
|
||||
features |= GPU_SUPPORTS_TEXTURE_LOD_CONTROL;
|
||||
|
||||
// Accurate depth is required because the Direct3D API does not support inverse Z.
|
||||
@ -285,11 +285,6 @@ void GPU_DX9::BeginFrame() {
|
||||
framebufferManager_->BeginFrame();
|
||||
}
|
||||
|
||||
void GPU_DX9::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) {
|
||||
GPUDebug::NotifyDisplay(framebuf, stride, format);
|
||||
framebufferManagerDX9_->SetDisplayFramebuffer(framebuf, stride, format);
|
||||
}
|
||||
|
||||
void GPU_DX9::CopyDisplayToOutput(bool reallyDirty) {
|
||||
dxstate.depthWrite.set(true);
|
||||
dxstate.colorMask.set(0xF);
|
||||
@ -374,8 +369,8 @@ std::vector<std::string> GPU_DX9::DebugGetShaderIDs(DebugShaderType type) {
|
||||
switch (type) {
|
||||
case SHADER_TYPE_VERTEXLOADER:
|
||||
return drawEngine_.DebugGetVertexLoaderIDs();
|
||||
case SHADER_TYPE_DEPAL:
|
||||
return textureCache_->GetDepalShaderCache()->DebugGetShaderIDs(type);
|
||||
case SHADER_TYPE_TEXTURE:
|
||||
return textureCache_->GetTextureShaderCache()->DebugGetShaderIDs(type);
|
||||
default:
|
||||
return shaderManagerDX9_->DebugGetShaderIDs(type);
|
||||
}
|
||||
@ -385,8 +380,8 @@ std::string GPU_DX9::DebugGetShaderString(std::string id, DebugShaderType type,
|
||||
switch (type) {
|
||||
case SHADER_TYPE_VERTEXLOADER:
|
||||
return drawEngine_.DebugGetVertexLoaderString(id, stringType);
|
||||
case SHADER_TYPE_DEPAL:
|
||||
return textureCache_->GetDepalShaderCache()->DebugGetShaderString(id, type, stringType);
|
||||
case SHADER_TYPE_TEXTURE:
|
||||
return textureCache_->GetTextureShaderCache()->DebugGetShaderString(id, type, stringType);
|
||||
default:
|
||||
return shaderManagerDX9_->DebugGetShaderString(id, type, stringType);
|
||||
}
|
||||
|
@ -23,7 +23,7 @@
|
||||
#include "GPU/GPUCommon.h"
|
||||
#include "GPU/Directx9/FramebufferManagerDX9.h"
|
||||
#include "GPU/Directx9/DrawEngineDX9.h"
|
||||
#include "GPU/Common/DepalettizeCommon.h"
|
||||
#include "GPU/Common/TextureShaderCommon.h"
|
||||
#include "GPU/Common/VertexDecoderCommon.h"
|
||||
|
||||
class ShaderManagerDX9;
|
||||
@ -40,7 +40,6 @@ public:
|
||||
void ExecuteOp(u32 op, u32 diff) override;
|
||||
|
||||
void ReapplyGfxState() override;
|
||||
void SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) override;
|
||||
void GetStats(char *buffer, size_t bufsize) override;
|
||||
void ClearCacheNextFrame() override;
|
||||
void DeviceLost() override; // Only happens on Android. Drop all textures and shaders.
|
||||
|
@ -211,21 +211,11 @@ void DrawEngineDX9::ApplyDrawState(int prim) {
|
||||
ConvertStencilFuncState(stencilState);
|
||||
|
||||
// Set Stencil/Depth
|
||||
|
||||
if (gstate_c.renderMode == RASTER_MODE_COLOR_TO_DEPTH) {
|
||||
// Enforce plain depth writing.
|
||||
dxstate.depthTest.enable();
|
||||
dxstate.depthFunc.set(D3DCMP_ALWAYS);
|
||||
dxstate.depthWrite.set(true);
|
||||
dxstate.stencilTest.disable();
|
||||
} else if (gstate.isModeClear()) {
|
||||
if (gstate.isModeClear()) {
|
||||
// Depth Test
|
||||
dxstate.depthTest.enable();
|
||||
dxstate.depthFunc.set(D3DCMP_ALWAYS);
|
||||
dxstate.depthWrite.set(gstate.isClearModeDepthMask());
|
||||
if (gstate.isClearModeDepthMask()) {
|
||||
framebufferManager_->SetDepthUpdated();
|
||||
}
|
||||
|
||||
// Stencil Test
|
||||
bool alphaMask = gstate.isClearModeAlphaMask();
|
||||
@ -246,9 +236,6 @@ void DrawEngineDX9::ApplyDrawState(int prim) {
|
||||
dxstate.depthTest.enable();
|
||||
dxstate.depthFunc.set(ztests[gstate.getDepthTestFunction()]);
|
||||
dxstate.depthWrite.set(gstate.isDepthWriteEnabled());
|
||||
if (gstate.isDepthWriteEnabled()) {
|
||||
framebufferManager_->SetDepthUpdated();
|
||||
}
|
||||
} else {
|
||||
dxstate.depthTest.disable();
|
||||
}
|
||||
@ -273,6 +260,7 @@ void DrawEngineDX9::ApplyDrawState(int prim) {
|
||||
framebufferManager_->GetRenderWidth(), framebufferManager_->GetRenderHeight(),
|
||||
framebufferManager_->GetTargetBufferWidth(), framebufferManager_->GetTargetBufferHeight(),
|
||||
vpAndScissor);
|
||||
UpdateCachedViewportState(vpAndScissor);
|
||||
|
||||
dxstate.scissorTest.enable();
|
||||
dxstate.scissorRect.set(vpAndScissor.scissorX, vpAndScissor.scissorY, vpAndScissor.scissorX + vpAndScissor.scissorW, vpAndScissor.scissorY + vpAndScissor.scissorH);
|
||||
@ -281,12 +269,6 @@ void DrawEngineDX9::ApplyDrawState(int prim) {
|
||||
float depthMax = vpAndScissor.depthRangeMax;
|
||||
|
||||
dxstate.viewport.set(vpAndScissor.viewportX, vpAndScissor.viewportY, vpAndScissor.viewportW, vpAndScissor.viewportH, depthMin, depthMax);
|
||||
if (vpAndScissor.dirtyProj) {
|
||||
gstate_c.Dirty(DIRTY_PROJMATRIX);
|
||||
}
|
||||
if (vpAndScissor.dirtyDepth) {
|
||||
gstate_c.Dirty(DIRTY_DEPTHRANGE);
|
||||
}
|
||||
}
|
||||
|
||||
gstate_c.Clean(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_BLEND_STATE);
|
||||
|
@ -28,7 +28,7 @@
|
||||
#include "GPU/Directx9/FramebufferManagerDX9.h"
|
||||
#include "GPU/Directx9/ShaderManagerDX9.h"
|
||||
#include "Common/GPU/D3D9/D3D9StateCache.h"
|
||||
#include "GPU/Common/DepalettizeCommon.h"
|
||||
#include "GPU/Common/TextureShaderCommon.h"
|
||||
#include "GPU/Common/FramebufferManagerCommon.h"
|
||||
#include "GPU/Common/TextureDecoder.h"
|
||||
#include "Core/Config.h"
|
||||
@ -59,8 +59,8 @@ static const D3DVERTEXELEMENT9 g_FramebufferVertexElements[] = {
|
||||
D3DDECL_END()
|
||||
};
|
||||
|
||||
TextureCacheDX9::TextureCacheDX9(Draw::DrawContext *draw)
|
||||
: TextureCacheCommon(draw) {
|
||||
TextureCacheDX9::TextureCacheDX9(Draw::DrawContext *draw, Draw2D *draw2D)
|
||||
: TextureCacheCommon(draw, draw2D) {
|
||||
lastBoundTexture = INVALID_TEX;
|
||||
isBgraBackend_ = true;
|
||||
|
||||
@ -204,6 +204,10 @@ void TextureCacheDX9::UpdateCurrentClut(GEPaletteFormat clutFormat, u32 clutBase
|
||||
}
|
||||
|
||||
void TextureCacheDX9::BindTexture(TexCacheEntry *entry) {
|
||||
if (!entry) {
|
||||
device_->SetTexture(0, nullptr);
|
||||
return;
|
||||
}
|
||||
LPDIRECT3DBASETEXTURE9 texture = DxTex(entry);
|
||||
if (texture != lastBoundTexture) {
|
||||
device_->SetTexture(0, texture);
|
||||
@ -215,7 +219,7 @@ void TextureCacheDX9::BindTexture(TexCacheEntry *entry) {
|
||||
}
|
||||
|
||||
void TextureCacheDX9::Unbind() {
|
||||
device_->SetTexture(0, NULL);
|
||||
device_->SetTexture(0, nullptr);
|
||||
InvalidateLastTexture();
|
||||
}
|
||||
|
||||
|
@ -24,14 +24,14 @@
|
||||
#include "GPU/Common/TextureCacheCommon.h"
|
||||
|
||||
struct VirtualFramebuffer;
|
||||
class DepalShaderCache;
|
||||
class TextureShaderCache;
|
||||
|
||||
class FramebufferManagerDX9;
|
||||
class ShaderManagerDX9;
|
||||
|
||||
class TextureCacheDX9 : public TextureCacheCommon {
|
||||
public:
|
||||
TextureCacheDX9(Draw::DrawContext *draw);
|
||||
TextureCacheDX9(Draw::DrawContext *draw, Draw2D *draw2D);
|
||||
~TextureCacheDX9();
|
||||
|
||||
void StartFrame() override;
|
||||
|
@ -364,6 +364,7 @@ void DrawEngineGLES::DoFlush() {
|
||||
framebufferManager_->GetRenderWidth(), framebufferManager_->GetRenderHeight(),
|
||||
framebufferManager_->GetTargetBufferWidth(), framebufferManager_->GetTargetBufferHeight(),
|
||||
vpAndScissor);
|
||||
UpdateCachedViewportState(vpAndScissor);
|
||||
}
|
||||
|
||||
int maxIndex = indexGen.MaxIndex();
|
||||
@ -423,9 +424,6 @@ void DrawEngineGLES::DoFlush() {
|
||||
bool colorMask = gstate.isClearModeColorMask();
|
||||
bool alphaMask = gstate.isClearModeAlphaMask();
|
||||
bool depthMask = gstate.isClearModeDepthMask();
|
||||
if (depthMask) {
|
||||
framebufferManager_->SetDepthUpdated();
|
||||
}
|
||||
|
||||
GLbitfield target = 0;
|
||||
// Without this, we will clear RGB when clearing stencil, which breaks games.
|
||||
|
@ -36,7 +36,6 @@
|
||||
#include "GPU/ge_constants.h"
|
||||
#include "GPU/GeDisasm.h"
|
||||
#include "GPU/Common/FramebufferManagerCommon.h"
|
||||
#include "GPU/Debugger/Debugger.h"
|
||||
#include "GPU/GLES/ShaderManagerGLES.h"
|
||||
#include "GPU/GLES/GPU_GLES.h"
|
||||
#include "GPU/GLES/FramebufferManagerGLES.h"
|
||||
@ -60,7 +59,7 @@ GPU_GLES::GPU_GLES(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
|
||||
shaderManagerGL_ = new ShaderManagerGLES(draw);
|
||||
framebufferManagerGL_ = new FramebufferManagerGLES(draw);
|
||||
framebufferManager_ = framebufferManagerGL_;
|
||||
textureCacheGL_ = new TextureCacheGLES(draw);
|
||||
textureCacheGL_ = new TextureCacheGLES(draw, framebufferManager_->GetDraw2D());
|
||||
textureCache_ = textureCacheGL_;
|
||||
drawEngineCommon_ = &drawEngine_;
|
||||
shaderManager_ = shaderManagerGL_;
|
||||
@ -360,11 +359,6 @@ void GPU_GLES::BeginFrame() {
|
||||
framebufferManagerGL_->BeginFrame();
|
||||
}
|
||||
|
||||
void GPU_GLES::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) {
|
||||
GPUDebug::NotifyDisplay(framebuf, stride, format);
|
||||
framebufferManagerGL_->SetDisplayFramebuffer(framebuf, stride, format);
|
||||
}
|
||||
|
||||
void GPU_GLES::CopyDisplayToOutput(bool reallyDirty) {
|
||||
// Flush anything left over.
|
||||
framebufferManagerGL_->RebindFramebuffer("RebindFramebuffer - CopyDisplayToOutput");
|
||||
@ -453,8 +447,8 @@ std::vector<std::string> GPU_GLES::DebugGetShaderIDs(DebugShaderType type) {
|
||||
switch (type) {
|
||||
case SHADER_TYPE_VERTEXLOADER:
|
||||
return drawEngine_.DebugGetVertexLoaderIDs();
|
||||
case SHADER_TYPE_DEPAL:
|
||||
return textureCache_->GetDepalShaderCache()->DebugGetShaderIDs(type);
|
||||
case SHADER_TYPE_TEXTURE:
|
||||
return textureCache_->GetTextureShaderCache()->DebugGetShaderIDs(type);
|
||||
default:
|
||||
return shaderManagerGL_->DebugGetShaderIDs(type);
|
||||
}
|
||||
@ -464,8 +458,8 @@ std::string GPU_GLES::DebugGetShaderString(std::string id, DebugShaderType type,
|
||||
switch (type) {
|
||||
case SHADER_TYPE_VERTEXLOADER:
|
||||
return drawEngine_.DebugGetVertexLoaderString(id, stringType);
|
||||
case SHADER_TYPE_DEPAL:
|
||||
return textureCache_->GetDepalShaderCache()->DebugGetShaderString(id, type, stringType);
|
||||
case SHADER_TYPE_TEXTURE:
|
||||
return textureCache_->GetTextureShaderCache()->DebugGetShaderString(id, type, stringType);
|
||||
default:
|
||||
return shaderManagerGL_->DebugGetShaderString(id, type, stringType);
|
||||
}
|
||||
|
@ -23,7 +23,7 @@
|
||||
#include "Common/File/Path.h"
|
||||
|
||||
#include "GPU/GPUCommon.h"
|
||||
#include "GPU/Common/DepalettizeCommon.h"
|
||||
#include "GPU/Common/TextureShaderCommon.h"
|
||||
#include "GPU/GLES/FramebufferManagerGLES.h"
|
||||
#include "GPU/GLES/DrawEngineGLES.h"
|
||||
#include "GPU/GLES/FragmentTestCacheGLES.h"
|
||||
@ -47,7 +47,6 @@ public:
|
||||
void ExecuteOp(u32 op, u32 diff) override;
|
||||
|
||||
void ReapplyGfxState() override;
|
||||
void SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) override;
|
||||
void GetStats(char *buffer, size_t bufsize) override;
|
||||
|
||||
void ClearCacheNextFrame() override;
|
||||
|
@ -251,24 +251,14 @@ void DrawEngineGLES::ApplyDrawState(int prim) {
|
||||
GenericStencilFuncState stencilState;
|
||||
ConvertStencilFuncState(stencilState);
|
||||
|
||||
if (gstate_c.renderMode == RASTER_MODE_COLOR_TO_DEPTH) {
|
||||
// Enforce plain depth writing.
|
||||
renderManager->SetStencilDisabled();
|
||||
renderManager->SetDepth(true, true, GL_ALWAYS);
|
||||
} else if (gstate.isModeClear()) {
|
||||
if (gstate.isModeClear()) {
|
||||
// Depth Test
|
||||
if (gstate.isClearModeDepthMask()) {
|
||||
framebufferManager_->SetDepthUpdated();
|
||||
}
|
||||
renderManager->SetStencilFunc(gstate.isClearModeAlphaMask(), GL_ALWAYS, 0xFF, 0xFF);
|
||||
renderManager->SetStencilOp(stencilState.writeMask, GL_REPLACE, GL_REPLACE, GL_REPLACE);
|
||||
renderManager->SetDepth(true, gstate.isClearModeDepthMask() ? true : false, GL_ALWAYS);
|
||||
} else {
|
||||
// Depth Test
|
||||
renderManager->SetDepth(gstate.isDepthTestEnabled(), gstate.isDepthWriteEnabled(), compareOps[gstate.getDepthTestFunction()]);
|
||||
if (gstate.isDepthTestEnabled() && gstate.isDepthWriteEnabled()) {
|
||||
framebufferManager_->SetDepthUpdated();
|
||||
}
|
||||
|
||||
// Stencil Test
|
||||
if (stencilState.enabled) {
|
||||
@ -286,19 +276,13 @@ void DrawEngineGLES::ApplyDrawState(int prim) {
|
||||
framebufferManager_->GetRenderWidth(), framebufferManager_->GetRenderHeight(),
|
||||
framebufferManager_->GetTargetBufferWidth(), framebufferManager_->GetTargetBufferHeight(),
|
||||
vpAndScissor);
|
||||
UpdateCachedViewportState(vpAndScissor);
|
||||
|
||||
renderManager->SetScissor(GLRect2D{ vpAndScissor.scissorX, vpAndScissor.scissorY, vpAndScissor.scissorW, vpAndScissor.scissorH });
|
||||
renderManager->SetViewport({
|
||||
vpAndScissor.viewportX, vpAndScissor.viewportY,
|
||||
vpAndScissor.viewportW, vpAndScissor.viewportH,
|
||||
vpAndScissor.depthRangeMin, vpAndScissor.depthRangeMax });
|
||||
|
||||
if (vpAndScissor.dirtyProj) {
|
||||
gstate_c.Dirty(DIRTY_PROJMATRIX);
|
||||
}
|
||||
if (vpAndScissor.dirtyDepth) {
|
||||
gstate_c.Dirty(DIRTY_DEPTHRANGE);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -36,7 +36,7 @@
|
||||
#include "GPU/GLES/TextureCacheGLES.h"
|
||||
#include "GPU/GLES/FramebufferManagerGLES.h"
|
||||
#include "GPU/Common/FragmentShaderGenerator.h"
|
||||
#include "GPU/Common/DepalettizeCommon.h"
|
||||
#include "GPU/Common/TextureShaderCommon.h"
|
||||
#include "GPU/GLES/ShaderManagerGLES.h"
|
||||
#include "GPU/GLES/DrawEngineGLES.h"
|
||||
#include "GPU/Common/TextureDecoder.h"
|
||||
@ -45,8 +45,8 @@
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
TextureCacheGLES::TextureCacheGLES(Draw::DrawContext *draw)
|
||||
: TextureCacheCommon(draw) {
|
||||
TextureCacheGLES::TextureCacheGLES(Draw::DrawContext *draw, Draw2D *draw2D)
|
||||
: TextureCacheCommon(draw, draw2D) {
|
||||
render_ = (GLRenderManager *)draw_->GetNativeObject(Draw::NativeObject::RENDER_MANAGER);
|
||||
|
||||
nextTexture_ = nullptr;
|
||||
@ -225,7 +225,7 @@ void TextureCacheGLES::BindTexture(TexCacheEntry *entry) {
|
||||
int maxLevel = (entry->status & TexCacheEntry::STATUS_NO_MIPS) ? 0 : entry->maxLevel;
|
||||
SamplerCacheKey samplerKey = GetSamplingParams(maxLevel, entry);
|
||||
ApplySamplingParams(samplerKey);
|
||||
gstate_c.SetUseShaderDepal(false);
|
||||
gstate_c.SetUseShaderDepal(false, false);
|
||||
}
|
||||
|
||||
void TextureCacheGLES::Unbind() {
|
||||
@ -382,19 +382,8 @@ Draw::DataFormat TextureCacheGLES::GetDestFormat(GETextureFormat format, GEPalet
|
||||
}
|
||||
|
||||
bool TextureCacheGLES::GetCurrentTextureDebug(GPUDebugBuffer &buffer, int level) {
|
||||
GPUgstate saved;
|
||||
if (level != 0) {
|
||||
saved = gstate;
|
||||
|
||||
// The way we set textures is a bit complex. Let's just override level 0.
|
||||
gstate.texsize[0] = gstate.texsize[level];
|
||||
gstate.texaddr[0] = gstate.texaddr[level];
|
||||
gstate.texbufwidth[0] = gstate.texbufwidth[level];
|
||||
}
|
||||
|
||||
InvalidateLastTexture();
|
||||
SetTexture();
|
||||
|
||||
if (!nextTexture_) {
|
||||
if (nextFramebufferTexture_) {
|
||||
VirtualFramebuffer *vfb = nextFramebufferTexture_;
|
||||
@ -427,10 +416,6 @@ bool TextureCacheGLES::GetCurrentTextureDebug(GPUDebugBuffer &buffer, int level)
|
||||
int w = gstate.getTextureWidth(level);
|
||||
int h = gstate.getTextureHeight(level);
|
||||
|
||||
if (level != 0) {
|
||||
gstate = saved;
|
||||
}
|
||||
|
||||
bool result = entry->textureName != nullptr;
|
||||
if (result) {
|
||||
buffer.Allocate(w, h, GE_FORMAT_8888, false);
|
||||
@ -445,7 +430,7 @@ bool TextureCacheGLES::GetCurrentTextureDebug(GPUDebugBuffer &buffer, int level)
|
||||
}
|
||||
|
||||
void TextureCacheGLES::DeviceLost() {
|
||||
depalShaderCache_->DeviceLost();
|
||||
textureShaderCache_->DeviceLost();
|
||||
Clear(false);
|
||||
draw_ = nullptr;
|
||||
render_ = nullptr;
|
||||
@ -454,5 +439,5 @@ void TextureCacheGLES::DeviceLost() {
|
||||
void TextureCacheGLES::DeviceRestore(Draw::DrawContext *draw) {
|
||||
draw_ = draw;
|
||||
render_ = (GLRenderManager *)draw_->GetNativeObject(Draw::NativeObject::RENDER_MANAGER);
|
||||
depalShaderCache_->DeviceRestore(draw);
|
||||
textureShaderCache_->DeviceRestore(draw);
|
||||
}
|
||||
|
@ -27,22 +27,22 @@
|
||||
|
||||
struct VirtualFramebuffer;
|
||||
class FramebufferManagerGLES;
|
||||
class DepalShaderCache;
|
||||
class TextureShaderCache;
|
||||
class ShaderManagerGLES;
|
||||
class DrawEngineGLES;
|
||||
class GLRTexture;
|
||||
|
||||
class TextureCacheGLES : public TextureCacheCommon {
|
||||
public:
|
||||
TextureCacheGLES(Draw::DrawContext *draw);
|
||||
TextureCacheGLES(Draw::DrawContext *draw, Draw2D *draw2D);
|
||||
~TextureCacheGLES();
|
||||
|
||||
void Clear(bool delete_them) override;
|
||||
void StartFrame() override;
|
||||
|
||||
void SetFramebufferManager(FramebufferManagerGLES *fbManager);
|
||||
void SetDepalShaderCache(DepalShaderCache *dpCache) {
|
||||
depalShaderCache_ = dpCache;
|
||||
void SetDepalShaderCache(TextureShaderCache *dpCache) {
|
||||
textureShaderCache_ = dpCache;
|
||||
}
|
||||
void SetDrawEngine(DrawEngineGLES *td) {
|
||||
drawEngine_ = td;
|
||||
|
@ -24,11 +24,6 @@ class GPUInterface;
|
||||
class GPUDebugInterface;
|
||||
class GraphicsContext;
|
||||
|
||||
enum RasterMode {
|
||||
RASTER_MODE_NORMAL = 0,
|
||||
RASTER_MODE_COLOR_TO_DEPTH = 1,
|
||||
};
|
||||
|
||||
// PSP rasterization has two outputs, color and depth. Stencil is packed
|
||||
// into the alpha channel of color (if exists), so possibly RASTER_COLOR
|
||||
// should be named RASTER_COLOR_STENCIL but it gets kinda hard to read.
|
||||
@ -89,6 +84,8 @@ struct GPUStatistics {
|
||||
numUploads = 0;
|
||||
numClears = 0;
|
||||
numDepthCopies = 0;
|
||||
numReinterpretCopies = 0;
|
||||
numColorCopies = 0;
|
||||
msProcessingDisplayLists = 0;
|
||||
vertexGPUCycles = 0;
|
||||
otherGPUCycles = 0;
|
||||
@ -115,6 +112,8 @@ struct GPUStatistics {
|
||||
int numUploads;
|
||||
int numClears;
|
||||
int numDepthCopies;
|
||||
int numReinterpretCopies;
|
||||
int numColorCopies;
|
||||
double msProcessingDisplayLists;
|
||||
int vertexGPUCycles;
|
||||
int otherGPUCycles;
|
||||
|
@ -338,7 +338,7 @@
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\ext\xbrz\xbrz.h" />
|
||||
<ClInclude Include="Common\DepalettizeCommon.h" />
|
||||
<ClInclude Include="Common\TextureShaderCommon.h" />
|
||||
<ClInclude Include="Common\Draw2D.h" />
|
||||
<ClInclude Include="Common\ReinterpretFramebuffer.h" />
|
||||
<ClInclude Include="Common\DepalettizeShaderCommon.h" />
|
||||
@ -452,7 +452,7 @@
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="..\ext\xbrz\xbrz.cpp" />
|
||||
<ClCompile Include="Common\DepalettizeCommon.cpp" />
|
||||
<ClCompile Include="Common\TextureShaderCommon.cpp" />
|
||||
<ClCompile Include="Common\Draw2D.cpp" />
|
||||
<ClCompile Include="Common\ReinterpretFramebuffer.cpp" />
|
||||
<ClCompile Include="Common\DepalettizeShaderCommon.cpp" />
|
||||
|
@ -255,7 +255,7 @@
|
||||
<ClInclude Include="Common\Draw2D.h">
|
||||
<Filter>Common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Common\DepalettizeCommon.h">
|
||||
<ClInclude Include="Common\TextureShaderCommon.h">
|
||||
<Filter>Common</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
@ -503,7 +503,7 @@
|
||||
<ClCompile Include="Common\Draw2D.cpp">
|
||||
<Filter>Common</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="Common\DepalettizeCommon.cpp">
|
||||
<ClCompile Include="Common\TextureShaderCommon.cpp">
|
||||
<Filter>Common</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
|
@ -1109,7 +1109,8 @@ void GPUCommon::BeginFrame() {
|
||||
} else if (dumpThisFrame_) {
|
||||
dumpThisFrame_ = false;
|
||||
}
|
||||
GPURecord::NotifyFrame();
|
||||
GPUDebug::NotifyBeginFrame();
|
||||
GPURecord::NotifyBeginFrame();
|
||||
}
|
||||
|
||||
void GPUCommon::SlowRunLoop(DisplayList &list)
|
||||
@ -1624,6 +1625,21 @@ void GPUCommon::Execute_VertexTypeSkinning(u32 op, u32 diff) {
|
||||
gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_CULLRANGE);
|
||||
}
|
||||
|
||||
void GPUCommon::CheckDepthUsage(VirtualFramebuffer *vfb) {
|
||||
if (!gstate_c.usingDepth) {
|
||||
bool isClearingDepth = gstate.isModeClear() && gstate.isClearModeDepthMask();
|
||||
|
||||
if ((gstate.isDepthTestEnabled() || isClearingDepth)) {
|
||||
gstate_c.usingDepth = true;
|
||||
gstate_c.clearingDepth = isClearingDepth;
|
||||
vfb->last_frame_depth_render = gpuStats.numFlips;
|
||||
if (isClearingDepth || gstate.isDepthWriteEnabled()) {
|
||||
vfb->last_frame_depth_updated = gpuStats.numFlips;
|
||||
}
|
||||
framebufferManager_->SetDepthFrameBuffer(isClearingDepth);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GPUCommon::Execute_Prim(u32 op, u32 diff) {
|
||||
// This drives all drawing. All other state we just buffer up, then we apply it only
|
||||
@ -1685,6 +1701,8 @@ void GPUCommon::Execute_Prim(u32 op, u32 diff) {
|
||||
return;
|
||||
}
|
||||
|
||||
CheckDepthUsage(vfb);
|
||||
|
||||
const void *verts = Memory::GetPointerUnchecked(gstate_c.vertexAddr);
|
||||
const void *inds = nullptr;
|
||||
u32 vertexType = gstate.vertType;
|
||||
@ -1883,12 +1901,14 @@ void GPUCommon::Execute_Bezier(u32 op, u32 diff) {
|
||||
gstate_c.framebufFormat = gstate.FrameBufFormat();
|
||||
|
||||
// This also make skipping drawing very effective.
|
||||
framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason);
|
||||
VirtualFramebuffer *vfb = framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason);
|
||||
if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
|
||||
// TODO: Should this eat some cycles? Probably yes. Not sure if important.
|
||||
return;
|
||||
}
|
||||
|
||||
CheckDepthUsage(vfb);
|
||||
|
||||
if (!Memory::IsValidAddress(gstate_c.vertexAddr)) {
|
||||
ERROR_LOG_REPORT(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr);
|
||||
return;
|
||||
@ -1953,12 +1973,14 @@ void GPUCommon::Execute_Spline(u32 op, u32 diff) {
|
||||
gstate_c.framebufFormat = gstate.FrameBufFormat();
|
||||
|
||||
// This also make skipping drawing very effective.
|
||||
framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason);
|
||||
VirtualFramebuffer *vfb = framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason);
|
||||
if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
|
||||
// TODO: Should this eat some cycles? Probably yes. Not sure if important.
|
||||
return;
|
||||
}
|
||||
|
||||
CheckDepthUsage(vfb);
|
||||
|
||||
if (!Memory::IsValidAddress(gstate_c.vertexAddr)) {
|
||||
ERROR_LOG_REPORT(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr);
|
||||
return;
|
||||
@ -2686,7 +2708,8 @@ void GPUCommon::ResetListState(int listID, DisplayListState state) {
|
||||
|
||||
GPUDebugOp GPUCommon::DissassembleOp(u32 pc, u32 op) {
|
||||
char buffer[1024];
|
||||
GeDisassembleOp(pc, op, Memory::Read_U32(pc - 4), buffer, sizeof(buffer));
|
||||
u32 prev = Memory::IsValidAddress(pc - 4) ? Memory::ReadUnchecked_U32(pc - 4) : 0;
|
||||
GeDisassembleOp(pc, op, prev, buffer, sizeof(buffer));
|
||||
|
||||
GPUDebugOp info;
|
||||
info.pc = pc;
|
||||
@ -2744,6 +2767,10 @@ void GPUCommon::SetCmdValue(u32 op) {
|
||||
downcount = 0;
|
||||
}
|
||||
|
||||
void GPUCommon::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) {
|
||||
framebufferManager_->SetDisplayFramebuffer(framebuf, stride, format);
|
||||
}
|
||||
|
||||
void GPUCommon::DoBlockTransfer(u32 skipDrawReason) {
|
||||
// TODO: This is used a lot to copy data around between render targets and textures,
|
||||
// and also to quickly load textures from RAM to VRAM. So we should do checks like the following:
|
||||
@ -3041,7 +3068,8 @@ size_t GPUCommon::FormatGPUStatsCommon(char *buffer, size_t size) {
|
||||
"Vertices: %d cached: %d uncached: %d\n"
|
||||
"FBOs active: %d (evaluations: %d)\n"
|
||||
"Textures: %d, dec: %d, invalidated: %d, hashed: %d kB\n"
|
||||
"Readbacks: %d, uploads: %d, depth copies: %d\n"
|
||||
"Readbacks: %d, uploads: %d\n"
|
||||
"Copies: depth %d, color %d, reinterpret: %d\n"
|
||||
"GPU cycles executed: %d (%f per vertex)\n",
|
||||
gpuStats.msProcessingDisplayLists * 1000.0f,
|
||||
gpuStats.numDrawCalls,
|
||||
@ -3062,6 +3090,8 @@ size_t GPUCommon::FormatGPUStatsCommon(char *buffer, size_t size) {
|
||||
gpuStats.numReadbacks,
|
||||
gpuStats.numUploads,
|
||||
gpuStats.numDepthCopies,
|
||||
gpuStats.numColorCopies,
|
||||
gpuStats.numReinterpretCopies,
|
||||
gpuStats.vertexGPUCycles + gpuStats.otherGPUCycles,
|
||||
vertexAverageCycles
|
||||
);
|
||||
|
@ -15,6 +15,8 @@ class FramebufferManagerCommon;
|
||||
class TextureCacheCommon;
|
||||
class DrawEngineCommon;
|
||||
class GraphicsContext;
|
||||
struct VirtualFramebuffer;
|
||||
|
||||
namespace Draw {
|
||||
class DrawContext;
|
||||
}
|
||||
@ -115,6 +117,7 @@ public:
|
||||
u32 Break(int mode) override;
|
||||
void ReapplyGfxState() override;
|
||||
|
||||
void SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) override;
|
||||
void CopyDisplayToOutput(bool reallyDirty) override = 0;
|
||||
void InitClear() override = 0;
|
||||
bool PerformMemoryCopy(u32 dest, u32 src, int size) override;
|
||||
@ -282,17 +285,11 @@ protected:
|
||||
void SlowRunLoop(DisplayList &list);
|
||||
void UpdatePC(u32 currentPC, u32 newPC);
|
||||
void UpdateState(GPURunState state);
|
||||
void PopDLQueue();
|
||||
void CheckDrawSync();
|
||||
int GetNextListIndex();
|
||||
virtual void FastLoadBoneMatrix(u32 target);
|
||||
void FastLoadBoneMatrix(u32 target);
|
||||
|
||||
// TODO: Unify this.
|
||||
virtual void FinishDeferred() {}
|
||||
|
||||
void DoBlockTransfer(u32 skipDrawReason);
|
||||
void DoExecuteCall(u32 target);
|
||||
|
||||
void AdvanceVerts(u32 vertType, int count, int bytesRead) {
|
||||
if ((vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
|
||||
int indexShift = ((vertType & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT) - 1;
|
||||
@ -362,6 +359,13 @@ protected:
|
||||
|
||||
private:
|
||||
void FlushImm();
|
||||
void CheckDepthUsage(VirtualFramebuffer *vfb);
|
||||
void DoBlockTransfer(u32 skipDrawReason);
|
||||
void DoExecuteCall(u32 target);
|
||||
void PopDLQueue();
|
||||
void CheckDrawSync();
|
||||
int GetNextListIndex();
|
||||
|
||||
// Debug stats.
|
||||
double timeSteppingStarted_;
|
||||
double timeSpentStepping_;
|
||||
|
@ -300,8 +300,14 @@ struct GPUgstate {
|
||||
bool isTextureFormatIndexed() const { return (texformat & 4) != 0; } // GE_TFMT_CLUT4 - GE_TFMT_CLUT32 are 0b1xx.
|
||||
int getTextureEnvColRGB() const { return texenvcolor & 0x00FFFFFF; }
|
||||
u32 getClutAddress() const { return (clutaddr & 0x00FFFFF0) | ((clutaddrupper << 8) & 0x0F000000); }
|
||||
int getClutLoadBytes() const { return (loadclut & 0x7F) * 32; }
|
||||
int getClutLoadBlocks() const { return (loadclut & 0x7F); }
|
||||
int getClutLoadBytes() const { return getClutLoadBlocks() * 32; }
|
||||
int getClutLoadBlocks() const {
|
||||
// The PSP only supports 0x3F, but Misshitsu no Sacrifice has extra color data (see #15727.)
|
||||
// 0x40 would be 0, which would be a no-op, so we allow it.
|
||||
if ((loadclut & 0x7F) == 0x40)
|
||||
return 0x40;
|
||||
return loadclut & 0x3F;
|
||||
}
|
||||
GEPaletteFormat getClutPaletteFormat() const { return static_cast<GEPaletteFormat>(clutformat & 3); }
|
||||
int getClutIndexShift() const { return (clutformat >> 2) & 0x1F; }
|
||||
int getClutIndexMask() const { return (clutformat >> 8) & 0xFF; }
|
||||
@ -523,9 +529,10 @@ struct GPUStateCache {
|
||||
bool IsDirty(u64 what) const {
|
||||
return (dirty & what) != 0ULL;
|
||||
}
|
||||
void SetUseShaderDepal(bool depal) {
|
||||
void SetUseShaderDepal(bool depal, bool smoothed) {
|
||||
if (depal != useShaderDepal) {
|
||||
useShaderDepal = depal;
|
||||
useSmoothedShaderDepal = smoothed;
|
||||
Dirty(DIRTY_FRAGMENTSHADER_STATE);
|
||||
}
|
||||
}
|
||||
@ -555,14 +562,6 @@ struct GPUStateCache {
|
||||
Dirty(DIRTY_FRAGMENTSHADER_STATE | (is3D ? DIRTY_MIPBIAS : 0));
|
||||
}
|
||||
}
|
||||
void SetFramebufferRenderMode(RasterMode mode) {
|
||||
if (mode != renderMode) {
|
||||
// This mode modifies the fragment shader to write depth, the depth state to write without testing, and the blend state to write nothing to color.
|
||||
// So we need to re-evaluate those states.
|
||||
Dirty(DIRTY_FRAGMENTSHADER_STATE | DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_TEXTURE_PARAMS);
|
||||
renderMode = mode;
|
||||
}
|
||||
}
|
||||
|
||||
u32 featureFlags;
|
||||
|
||||
@ -572,6 +571,9 @@ struct GPUStateCache {
|
||||
|
||||
uint64_t dirty;
|
||||
|
||||
bool usingDepth; // For deferred depth copies.
|
||||
bool clearingDepth;
|
||||
|
||||
bool textureFullAlpha;
|
||||
bool vertexFullAlpha;
|
||||
|
||||
@ -613,9 +615,6 @@ struct GPUStateCache {
|
||||
// We detect this case and go into a special drawing mode.
|
||||
bool blueToAlpha;
|
||||
|
||||
// Some games try to write to the Z buffer using color. Catch that and actually do the writes to the Z buffer instead.
|
||||
RasterMode renderMode;
|
||||
|
||||
// TODO: These should be accessed from the current VFB object directly.
|
||||
u32 curRTWidth;
|
||||
u32 curRTHeight;
|
||||
@ -637,6 +636,7 @@ struct GPUStateCache {
|
||||
int spline_num_points_u;
|
||||
|
||||
bool useShaderDepal;
|
||||
bool useSmoothedShaderDepal;
|
||||
GEBufferFormat depalFramebufferFormat;
|
||||
|
||||
u32 getRelativeAddress(u32 data) const;
|
||||
|
@ -6,6 +6,7 @@ const char *GeBufferFormatToString(GEBufferFormat fmt) {
|
||||
case GE_FORMAT_5551: return "5551";
|
||||
case GE_FORMAT_565: return "565";
|
||||
case GE_FORMAT_8888: return "8888";
|
||||
case GE_FORMAT_DEPTH16: return "DEPTH16";
|
||||
default: return "N/A";
|
||||
}
|
||||
}
|
||||
|
@ -210,6 +210,9 @@ void BinManager::UpdateState() {
|
||||
}
|
||||
|
||||
if (HasDirty(SoftDirty::BINNER_OVERLAP)) {
|
||||
// This is a good place to record any dependencies for block transfer overlap.
|
||||
MarkPendingReads(state);
|
||||
|
||||
// Disallow threads when rendering to the target, even offset.
|
||||
bool selfRender = HasTextureWrite(state);
|
||||
int newMaxTasks = selfRender ? 1 : g_threadManager.GetNumLooperThreads();
|
||||
@ -251,6 +254,34 @@ bool BinManager::HasTextureWrite(const RasterizerState &state) {
|
||||
return false;
|
||||
}
|
||||
|
||||
void BinManager::MarkPendingReads(const Rasterizer::RasterizerState &state) {
|
||||
if (!state.enableTextures)
|
||||
return;
|
||||
|
||||
const uint8_t textureBits = textureBitsPerPixel[state.samplerID.texfmt];
|
||||
for (int i = 0; i <= state.maxTexLevel; ++i) {
|
||||
uint32_t byteStride = (state.texbufw[i] * textureBits) / 8;
|
||||
uint32_t byteWidth = (state.samplerID.cached.sizes[i].w * textureBits) / 8;
|
||||
uint32_t h = state.samplerID.cached.sizes[i].h;
|
||||
auto it = pendingReads_.find(state.texaddr[i]);
|
||||
if (it != pendingReads_.end()) {
|
||||
uint32_t total = byteStride * (h - 1) + byteWidth;
|
||||
uint32_t existing = it->second.strideBytes * (it->second.height - 1) + it->second.widthBytes;
|
||||
if (existing < total) {
|
||||
it->second.strideBytes = std::max(it->second.strideBytes, byteStride);
|
||||
it->second.widthBytes = std::max(it->second.widthBytes, byteWidth);
|
||||
it->second.height = std::max(it->second.height, h);
|
||||
}
|
||||
} else {
|
||||
auto &range = pendingReads_[state.texaddr[i]];
|
||||
range.base = state.texaddr[i];
|
||||
range.strideBytes = byteStride;
|
||||
range.widthBytes = byteWidth;
|
||||
range.height = h;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void BinDirtyRange::Expand(uint32_t newBase, uint32_t bpp, uint32_t stride, DrawingCoords &tl, DrawingCoords &br) {
|
||||
const uint32_t w = br.x - tl.x + 1;
|
||||
const uint32_t h = br.y - tl.y + 1;
|
||||
@ -465,9 +496,10 @@ void BinManager::Flush(const char *reason) {
|
||||
for (auto &pending : pendingWrites_)
|
||||
pending.base = 0;
|
||||
pendingOverlap_ = false;
|
||||
pendingReads_.clear();
|
||||
|
||||
// We'll need to set the pending writes again, since we just flushed it.
|
||||
dirty_ |= SoftDirty::BINNER_RANGE;
|
||||
// We'll need to set the pending writes and reads again, since we just flushed it.
|
||||
dirty_ |= SoftDirty::BINNER_RANGE | SoftDirty::BINNER_OVERLAP;
|
||||
|
||||
if (coreCollectDebugStats) {
|
||||
double et = time_now_d();
|
||||
@ -486,7 +518,7 @@ bool BinManager::HasPendingWrite(uint32_t start, uint32_t stride, uint32_t w, ui
|
||||
// Ignore mirrors for overlap detection.
|
||||
start &= 0x0FFFFFFF & ~0x00600000;
|
||||
|
||||
uint32_t size = stride * h;
|
||||
uint32_t size = stride * (h - 1) + w;
|
||||
for (const auto &range : pendingWrites_) {
|
||||
if (range.base == 0 || range.strideBytes == 0)
|
||||
continue;
|
||||
@ -512,6 +544,28 @@ bool BinManager::HasPendingWrite(uint32_t start, uint32_t stride, uint32_t w, ui
|
||||
return false;
|
||||
}
|
||||
|
||||
bool BinManager::HasPendingRead(uint32_t start, uint32_t stride, uint32_t w, uint32_t h) {
|
||||
if (Memory::IsVRAMAddress(start)) {
|
||||
// Ignore VRAM mirrors.
|
||||
start &= 0x0FFFFFFF & ~0x00600000;
|
||||
} else {
|
||||
// Ignore only regular RAM mirrors.
|
||||
start &= 0x3FFFFFFF;
|
||||
}
|
||||
|
||||
uint32_t size = stride * (h - 1) + w;
|
||||
for (const auto &pair : pendingReads_) {
|
||||
const auto &range = pair.second;
|
||||
if (start >= range.base + range.height * range.strideBytes || start + size <= range.base)
|
||||
continue;
|
||||
|
||||
// Stride gaps are uncommon with reads, so don't bother.
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void BinManager::GetStats(char *buffer, size_t bufsize) {
|
||||
double allTotal = 0.0;
|
||||
double slowestTotalTime = 0.0;
|
||||
|
@ -198,6 +198,8 @@ public:
|
||||
void Drain();
|
||||
void Flush(const char *reason);
|
||||
bool HasPendingWrite(uint32_t start, uint32_t stride, uint32_t w, uint32_t h);
|
||||
// Assumes you've also checked for a write (writes are partial so are automatically reads.)
|
||||
bool HasPendingRead(uint32_t start, uint32_t stride, uint32_t w, uint32_t h);
|
||||
|
||||
void GetStats(char *buffer, size_t bufsize);
|
||||
void ResetStats();
|
||||
@ -252,6 +254,8 @@ private:
|
||||
BinWaitable *waitable_ = nullptr;
|
||||
|
||||
BinDirtyRange pendingWrites_[2]{};
|
||||
std::unordered_map<uint32_t, BinDirtyRange> pendingReads_;
|
||||
|
||||
bool pendingOverlap_ = false;
|
||||
|
||||
std::unordered_map<const char *, double> flushReasonTimes_;
|
||||
@ -262,6 +266,7 @@ private:
|
||||
int enqueues_ = 0;
|
||||
int mostThreads_ = 0;
|
||||
|
||||
void MarkPendingReads(const Rasterizer::RasterizerState &state);
|
||||
bool HasTextureWrite(const Rasterizer::RasterizerState &state);
|
||||
BinCoords Scissor(BinCoords range);
|
||||
BinCoords Range(const VertexData &v0, const VertexData &v1, const VertexData &v2);
|
||||
|
@ -1408,7 +1408,7 @@ bool GetCurrentTexture(GPUDebugBuffer &buffer, int level)
|
||||
|
||||
SamplerID id;
|
||||
ComputeSamplerID(&id);
|
||||
id.cached.clut = (const u8 *)clut;
|
||||
id.cached.clut = clut;
|
||||
|
||||
Sampler::FetchFunc sampler = Sampler::GetFetchFunc(id);
|
||||
|
||||
|
@ -52,7 +52,7 @@
|
||||
const int FB_WIDTH = 480;
|
||||
const int FB_HEIGHT = 272;
|
||||
|
||||
u32 clut[4096];
|
||||
uint8_t clut[1024];
|
||||
FormatBuffer fb;
|
||||
FormatBuffer depthbuf;
|
||||
|
||||
@ -201,14 +201,14 @@ const SoftwareCommandTableEntry softgpuCommandTable[] = {
|
||||
{ GE_CMD_CLUTFORMAT, 0, SoftDirty::SAMPLER_BASIC },
|
||||
|
||||
// Morph weights. TODO: Remove precomputation?
|
||||
{ GE_CMD_MORPHWEIGHT0, 0, SoftDirty::NONE, &GPUCommon::Execute_MorphWeight },
|
||||
{ GE_CMD_MORPHWEIGHT1, 0, SoftDirty::NONE, &GPUCommon::Execute_MorphWeight },
|
||||
{ GE_CMD_MORPHWEIGHT2, 0, SoftDirty::NONE, &GPUCommon::Execute_MorphWeight },
|
||||
{ GE_CMD_MORPHWEIGHT3, 0, SoftDirty::NONE, &GPUCommon::Execute_MorphWeight },
|
||||
{ GE_CMD_MORPHWEIGHT4, 0, SoftDirty::NONE, &GPUCommon::Execute_MorphWeight },
|
||||
{ GE_CMD_MORPHWEIGHT5, 0, SoftDirty::NONE, &GPUCommon::Execute_MorphWeight },
|
||||
{ GE_CMD_MORPHWEIGHT6, 0, SoftDirty::NONE, &GPUCommon::Execute_MorphWeight },
|
||||
{ GE_CMD_MORPHWEIGHT7, 0, SoftDirty::NONE, &GPUCommon::Execute_MorphWeight },
|
||||
{ GE_CMD_MORPHWEIGHT0, FLAG_EXECUTEONCHANGE, SoftDirty::NONE, &GPUCommon::Execute_MorphWeight },
|
||||
{ GE_CMD_MORPHWEIGHT1, FLAG_EXECUTEONCHANGE, SoftDirty::NONE, &GPUCommon::Execute_MorphWeight },
|
||||
{ GE_CMD_MORPHWEIGHT2, FLAG_EXECUTEONCHANGE, SoftDirty::NONE, &GPUCommon::Execute_MorphWeight },
|
||||
{ GE_CMD_MORPHWEIGHT3, FLAG_EXECUTEONCHANGE, SoftDirty::NONE, &GPUCommon::Execute_MorphWeight },
|
||||
{ GE_CMD_MORPHWEIGHT4, FLAG_EXECUTEONCHANGE, SoftDirty::NONE, &GPUCommon::Execute_MorphWeight },
|
||||
{ GE_CMD_MORPHWEIGHT5, FLAG_EXECUTEONCHANGE, SoftDirty::NONE, &GPUCommon::Execute_MorphWeight },
|
||||
{ GE_CMD_MORPHWEIGHT6, FLAG_EXECUTEONCHANGE, SoftDirty::NONE, &GPUCommon::Execute_MorphWeight },
|
||||
{ GE_CMD_MORPHWEIGHT7, FLAG_EXECUTEONCHANGE, SoftDirty::NONE, &GPUCommon::Execute_MorphWeight },
|
||||
|
||||
// No state of flushing required for patch parameters, currently.
|
||||
{ GE_CMD_PATCHDIVISION },
|
||||
@ -787,8 +787,8 @@ void SoftGPU::Execute_BlockTransferStart(u32 op, u32 diff) {
|
||||
const uint32_t dstSize = height * dstStride * bpp;
|
||||
|
||||
// Need to flush both source and target, so we overwrite properly.
|
||||
drawEngine_->transformUnit.FlushIfOverlap("blockxfer", src, srcStride, width * bpp, height);
|
||||
drawEngine_->transformUnit.FlushIfOverlap("blockxfer", dst, dstStride, width * bpp, height);
|
||||
drawEngine_->transformUnit.FlushIfOverlap("blockxfer", false, src, srcStride, width * bpp, height);
|
||||
drawEngine_->transformUnit.FlushIfOverlap("blockxfer", true, dst, dstStride, width * bpp, height);
|
||||
|
||||
DEBUG_LOG(G3D, "Block transfer: %08x/%x -> %08x/%x, %ix%ix%i (%i,%i)->(%i,%i)", srcBasePtr, srcStride, dstBasePtr, dstStride, width, height, bpp, srcX, srcY, dstX, dstY);
|
||||
|
||||
@ -971,10 +971,13 @@ void SoftGPU::Execute_Spline(u32 op, u32 diff) {
|
||||
|
||||
void SoftGPU::Execute_LoadClut(u32 op, u32 diff) {
|
||||
u32 clutAddr = gstate.getClutAddress();
|
||||
u32 clutTotalBytes = gstate.getClutLoadBytes();
|
||||
// Avoid the hack in getClutLoadBytes() to inaccurately allow more palette data.
|
||||
u32 clutTotalBytes = (gstate.getClutLoadBlocks() & 0x3F) * 32;
|
||||
if (clutTotalBytes > 1024)
|
||||
clutTotalBytes = 1024;
|
||||
|
||||
// Might be copying drawing into the CLUT, so flush.
|
||||
drawEngine_->transformUnit.FlushIfOverlap("loadclut", clutAddr, clutTotalBytes, clutTotalBytes, 1);
|
||||
drawEngine_->transformUnit.FlushIfOverlap("loadclut", false, clutAddr, clutTotalBytes, clutTotalBytes, 1);
|
||||
|
||||
bool changed = false;
|
||||
if (Memory::IsValidAddress(clutAddr)) {
|
||||
|
@ -216,7 +216,7 @@ private:
|
||||
};
|
||||
|
||||
// TODO: These shouldn't be global.
|
||||
extern u32 clut[4096];
|
||||
extern uint8_t clut[1024];
|
||||
extern FormatBuffer fb;
|
||||
extern FormatBuffer depthbuf;
|
||||
|
||||
|
@ -797,9 +797,11 @@ void TransformUnit::GetStats(char *buffer, size_t bufsize) {
|
||||
binner_->GetStats(buffer, bufsize);
|
||||
}
|
||||
|
||||
void TransformUnit::FlushIfOverlap(const char *reason, uint32_t addr, uint32_t stride, uint32_t w, uint32_t h) {
|
||||
void TransformUnit::FlushIfOverlap(const char *reason, bool modifying, uint32_t addr, uint32_t stride, uint32_t w, uint32_t h) {
|
||||
if (binner_->HasPendingWrite(addr, stride, w, h))
|
||||
Flush(reason);
|
||||
if (modifying && binner_->HasPendingRead(addr, stride, w, h))
|
||||
Flush(reason);
|
||||
}
|
||||
|
||||
void TransformUnit::NotifyClutUpdate(const void *src) {
|
||||
|
@ -123,7 +123,7 @@ public:
|
||||
bool GetCurrentSimpleVertices(int count, std::vector<GPUDebugVertex> &vertices, std::vector<u16> &indices);
|
||||
|
||||
void Flush(const char *reason);
|
||||
void FlushIfOverlap(const char *reason, uint32_t addr, uint32_t stride, uint32_t w, uint32_t h);
|
||||
void FlushIfOverlap(const char *reason, bool modifying, uint32_t addr, uint32_t stride, uint32_t w, uint32_t h);
|
||||
void NotifyClutUpdate(const void *src);
|
||||
|
||||
void GetStats(char *buffer, size_t bufsize);
|
||||
|
@ -185,8 +185,8 @@ void DrawEngineVulkan::InitDeviceObjects() {
|
||||
samp.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
|
||||
samp.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST;
|
||||
samp.flags = 0;
|
||||
samp.magFilter = VK_FILTER_NEAREST;
|
||||
samp.minFilter = VK_FILTER_NEAREST;
|
||||
samp.magFilter = VK_FILTER_LINEAR;
|
||||
samp.minFilter = VK_FILTER_LINEAR;
|
||||
res = vkCreateSampler(device, &samp, nullptr, &samplerSecondary_);
|
||||
_dbg_assert_(VK_SUCCESS == res);
|
||||
res = vkCreateSampler(device, &samp, nullptr, &nullSampler_);
|
||||
@ -856,6 +856,7 @@ void DrawEngineVulkan::DoFlush() {
|
||||
framebufferManager_->GetRenderWidth(), framebufferManager_->GetRenderHeight(),
|
||||
framebufferManager_->GetTargetBufferWidth(), framebufferManager_->GetTargetBufferHeight(),
|
||||
vpAndScissor);
|
||||
UpdateCachedViewportState(vpAndScissor);
|
||||
}
|
||||
|
||||
int maxIndex = indexGen.MaxIndex();
|
||||
|
@ -218,7 +218,7 @@ private:
|
||||
// Secondary texture for shader blending
|
||||
VkImageView boundSecondary_ = VK_NULL_HANDLE;
|
||||
VkImageView boundDepal_ = VK_NULL_HANDLE;
|
||||
VkSampler samplerSecondary_ = VK_NULL_HANDLE; // This one is actually never used since we use fetch.
|
||||
VkSampler samplerSecondary_ = VK_NULL_HANDLE; // This one is actually never used since we use fetch (except in SmoothedDepal mode for Test Drive).
|
||||
|
||||
PrehashMap<VertexArrayInfoVulkan *, nullptr> vai_;
|
||||
VulkanPushBuffer *vertexCache_;
|
||||
|
@ -75,7 +75,4 @@ void FramebufferManagerVulkan::NotifyClear(bool clearColor, bool clearAlpha, boo
|
||||
if (clearColor || clearAlpha) {
|
||||
SetColorUpdated(gstate_c.skipDrawReason);
|
||||
}
|
||||
if (clearDepth) {
|
||||
SetDepthUpdated();
|
||||
}
|
||||
}
|
||||
|
@ -37,7 +37,6 @@
|
||||
#include "GPU/ge_constants.h"
|
||||
#include "GPU/GeDisasm.h"
|
||||
#include "GPU/Common/FramebufferManagerCommon.h"
|
||||
#include "GPU/Debugger/Debugger.h"
|
||||
#include "GPU/Vulkan/ShaderManagerVulkan.h"
|
||||
#include "GPU/Vulkan/GPU_Vulkan.h"
|
||||
#include "GPU/Vulkan/FramebufferManagerVulkan.h"
|
||||
@ -63,7 +62,7 @@ GPU_Vulkan::GPU_Vulkan(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
|
||||
pipelineManager_ = new PipelineManagerVulkan(vulkan);
|
||||
framebufferManagerVulkan_ = new FramebufferManagerVulkan(draw);
|
||||
framebufferManager_ = framebufferManagerVulkan_;
|
||||
textureCacheVulkan_ = new TextureCacheVulkan(draw, vulkan);
|
||||
textureCacheVulkan_ = new TextureCacheVulkan(draw, framebufferManager_->GetDraw2D(), vulkan);
|
||||
textureCache_ = textureCacheVulkan_;
|
||||
drawEngineCommon_ = &drawEngine_;
|
||||
shaderManager_ = shaderManagerVulkan_;
|
||||
@ -431,11 +430,6 @@ void GPU_Vulkan::InitClear() {
|
||||
}
|
||||
}
|
||||
|
||||
void GPU_Vulkan::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) {
|
||||
GPUDebug::NotifyDisplay(framebuf, stride, format);
|
||||
framebufferManager_->SetDisplayFramebuffer(framebuf, stride, format);
|
||||
}
|
||||
|
||||
void GPU_Vulkan::CopyDisplayToOutput(bool reallyDirty) {
|
||||
// Flush anything left over.
|
||||
drawEngine_.Flush();
|
||||
@ -604,9 +598,8 @@ std::vector<std::string> GPU_Vulkan::DebugGetShaderIDs(DebugShaderType type) {
|
||||
return drawEngine_.DebugGetVertexLoaderIDs();
|
||||
} else if (type == SHADER_TYPE_PIPELINE) {
|
||||
return pipelineManager_->DebugGetObjectIDs(type);
|
||||
} else if (type == SHADER_TYPE_DEPAL) {
|
||||
///...
|
||||
return std::vector<std::string>();
|
||||
} else if (type == SHADER_TYPE_TEXTURE) {
|
||||
return textureCache_->GetTextureShaderCache()->DebugGetShaderIDs(type);
|
||||
} else if (type == SHADER_TYPE_VERTEX || type == SHADER_TYPE_FRAGMENT) {
|
||||
return shaderManagerVulkan_->DebugGetShaderIDs(type);
|
||||
} else if (type == SHADER_TYPE_SAMPLER) {
|
||||
@ -621,8 +614,8 @@ std::string GPU_Vulkan::DebugGetShaderString(std::string id, DebugShaderType typ
|
||||
return drawEngine_.DebugGetVertexLoaderString(id, stringType);
|
||||
} else if (type == SHADER_TYPE_PIPELINE) {
|
||||
return pipelineManager_->DebugGetObjectString(id, type, stringType);
|
||||
} else if (type == SHADER_TYPE_DEPAL) {
|
||||
return "";
|
||||
} else if (type == SHADER_TYPE_TEXTURE) {
|
||||
return textureCache_->GetTextureShaderCache()->DebugGetShaderString(id, type, stringType);
|
||||
} else if (type == SHADER_TYPE_SAMPLER) {
|
||||
return textureCacheVulkan_->DebugGetSamplerString(id, stringType);
|
||||
} else if (type == SHADER_TYPE_VERTEX || type == SHADER_TYPE_FRAGMENT) {
|
||||
|
@ -25,7 +25,7 @@
|
||||
#include "GPU/GPUCommon.h"
|
||||
#include "GPU/Vulkan/DrawEngineVulkan.h"
|
||||
#include "GPU/Vulkan/PipelineManagerVulkan.h"
|
||||
#include "GPU/Common/DepalettizeCommon.h"
|
||||
#include "GPU/Common/TextureShaderCommon.h"
|
||||
|
||||
class FramebufferManagerVulkan;
|
||||
class ShaderManagerVulkan;
|
||||
@ -50,7 +50,6 @@ public:
|
||||
void PreExecuteOp(u32 op, u32 diff) override;
|
||||
void ExecuteOp(u32 op, u32 diff) override;
|
||||
|
||||
void SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) override;
|
||||
void GetStats(char *buffer, size_t bufsize) override;
|
||||
void ClearCacheNextFrame() override;
|
||||
void DeviceLost() override; // Only happens on Android. Drop all textures and shaders.
|
||||
|
@ -250,20 +250,10 @@ void DrawEngineVulkan::ConvertStateToVulkanKey(FramebufferManagerVulkan &fbManag
|
||||
GenericStencilFuncState stencilState;
|
||||
ConvertStencilFuncState(stencilState);
|
||||
|
||||
if (gstate_c.renderMode == RASTER_MODE_COLOR_TO_DEPTH) {
|
||||
// Enforce plain depth writing.
|
||||
key.depthTestEnable = true;
|
||||
key.depthWriteEnable = true;
|
||||
key.stencilTestEnable = false;
|
||||
key.depthCompareOp = VK_COMPARE_OP_ALWAYS;
|
||||
key.depthClampEnable = false;
|
||||
} else if (gstate.isModeClear()) {
|
||||
if (gstate.isModeClear()) {
|
||||
key.depthTestEnable = true;
|
||||
key.depthCompareOp = VK_COMPARE_OP_ALWAYS;
|
||||
key.depthWriteEnable = gstate.isClearModeDepthMask();
|
||||
if (gstate.isClearModeDepthMask()) {
|
||||
fbManager.SetDepthUpdated();
|
||||
}
|
||||
|
||||
// Stencil Test
|
||||
bool alphaMask = gstate.isClearModeAlphaMask();
|
||||
@ -294,9 +284,6 @@ void DrawEngineVulkan::ConvertStateToVulkanKey(FramebufferManagerVulkan &fbManag
|
||||
key.depthTestEnable = true;
|
||||
key.depthCompareOp = compareOps[gstate.getDepthTestFunction()];
|
||||
key.depthWriteEnable = gstate.isDepthWriteEnabled();
|
||||
if (gstate.isDepthWriteEnabled()) {
|
||||
fbManager.SetDepthUpdated();
|
||||
}
|
||||
} else {
|
||||
key.depthTestEnable = false;
|
||||
key.depthWriteEnable = false;
|
||||
@ -331,15 +318,13 @@ void DrawEngineVulkan::ConvertStateToVulkanKey(FramebufferManagerVulkan &fbManag
|
||||
fbManager.GetRenderWidth(), fbManager.GetRenderHeight(),
|
||||
fbManager.GetTargetBufferWidth(), fbManager.GetTargetBufferHeight(),
|
||||
vpAndScissor);
|
||||
UpdateCachedViewportState(vpAndScissor);
|
||||
|
||||
float depthMin = vpAndScissor.depthRangeMin;
|
||||
float depthMax = vpAndScissor.depthRangeMax;
|
||||
|
||||
if (depthMin < 0.0f) depthMin = 0.0f;
|
||||
if (depthMax > 1.0f) depthMax = 1.0f;
|
||||
if (vpAndScissor.dirtyDepth) {
|
||||
gstate_c.Dirty(DIRTY_DEPTHRANGE);
|
||||
}
|
||||
|
||||
VkViewport &vp = dynState.viewport;
|
||||
vp.x = vpAndScissor.viewportX;
|
||||
@ -349,10 +334,6 @@ void DrawEngineVulkan::ConvertStateToVulkanKey(FramebufferManagerVulkan &fbManag
|
||||
vp.minDepth = vpAndScissor.depthRangeMin;
|
||||
vp.maxDepth = vpAndScissor.depthRangeMax;
|
||||
|
||||
if (vpAndScissor.dirtyProj) {
|
||||
gstate_c.Dirty(DIRTY_PROJMATRIX);
|
||||
}
|
||||
|
||||
ScissorRect &scissor = dynState.scissor;
|
||||
scissor.x = vpAndScissor.scissorX;
|
||||
scissor.y = vpAndScissor.scissorY;
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user