Merge branch 'master' into compat_openxr_gta

This commit is contained in:
Lubos 2022-09-23 14:16:58 +02:00
commit adffbb2ea7
36 changed files with 402 additions and 261 deletions

View File

@ -30,16 +30,14 @@ static void MergeRenderAreaRectInto(VkRect2D *dest, VkRect2D &src) {
// We need to take the "max" of the features used in the two render passes.
RenderPassType MergeRPTypes(RenderPassType a, RenderPassType b) {
// Either both are backbuffer type, or neither are.
_dbg_assert_((a == RP_TYPE_BACKBUFFER) == (b == RP_TYPE_BACKBUFFER));
if (a == b) {
// Trivial merging case.
// These can't merge with other renderpasses
if (a == RP_TYPE_BACKBUFFER || b == RP_TYPE_BACKBUFFER) {
_dbg_assert_(a == b);
return a;
} else if (a == RP_TYPE_COLOR_DEPTH && b == RP_TYPE_COLOR_DEPTH_INPUT) {
return RP_TYPE_COLOR_DEPTH_INPUT;
} else if (a == RP_TYPE_COLOR_DEPTH_INPUT && b == RP_TYPE_COLOR_DEPTH) {
return RP_TYPE_COLOR_DEPTH_INPUT;
}
return a;
// The rest we can just OR together to get the maximum feature set.
return (RenderPassType)((u32)a | (u32)b);
}
void VulkanQueueRunner::CreateDeviceObjects() {
@ -326,29 +324,33 @@ static VkAttachmentStoreOp ConvertStoreAction(VKRRenderPassStoreAction action) {
// Self-dependency: https://github.com/gpuweb/gpuweb/issues/442#issuecomment-547604827
// Also see https://www.khronos.org/registry/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-pipeline-barriers-subpass-self-dependencies
VkRenderPass CreateRP(VulkanContext *vulkan, const RPKey &key, RenderPassType rpType) {
bool selfDependency = rpType == RP_TYPE_COLOR_DEPTH_INPUT;
VkRenderPass CreateRenderPass(VulkanContext *vulkan, const RPKey &key, RenderPassType rpType) {
bool selfDependency = rpType == RP_TYPE_COLOR_INPUT || rpType == RP_TYPE_COLOR_DEPTH_INPUT;
bool isBackbuffer = rpType == RP_TYPE_BACKBUFFER;
bool hasDepth = rpType == RP_TYPE_BACKBUFFER || rpType == RP_TYPE_COLOR_DEPTH || rpType == RP_TYPE_COLOR_DEPTH_INPUT;
VkAttachmentDescription attachments[2] = {};
attachments[0].format = rpType == RP_TYPE_BACKBUFFER ? vulkan->GetSwapchainFormat() : VK_FORMAT_R8G8B8A8_UNORM;
attachments[0].format = isBackbuffer ? vulkan->GetSwapchainFormat() : VK_FORMAT_R8G8B8A8_UNORM;
attachments[0].samples = VK_SAMPLE_COUNT_1_BIT;
attachments[0].loadOp = ConvertLoadAction(key.colorLoadAction);
attachments[0].storeOp = ConvertStoreAction(key.colorStoreAction);
attachments[0].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
attachments[0].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
attachments[0].initialLayout = rpType == RP_TYPE_BACKBUFFER ? VK_IMAGE_LAYOUT_UNDEFINED : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
attachments[0].finalLayout = rpType == RP_TYPE_BACKBUFFER ? VK_IMAGE_LAYOUT_PRESENT_SRC_KHR : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
attachments[0].initialLayout = isBackbuffer ? VK_IMAGE_LAYOUT_UNDEFINED : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
attachments[0].finalLayout = isBackbuffer ? VK_IMAGE_LAYOUT_PRESENT_SRC_KHR : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
attachments[0].flags = 0;
attachments[1].format = vulkan->GetDeviceInfo().preferredDepthStencilFormat;
attachments[1].samples = VK_SAMPLE_COUNT_1_BIT;
attachments[1].loadOp = ConvertLoadAction(key.depthLoadAction);
attachments[1].storeOp = ConvertStoreAction(key.depthStoreAction);
attachments[1].stencilLoadOp = ConvertLoadAction(key.stencilLoadAction);
attachments[1].stencilStoreOp = ConvertStoreAction(key.stencilStoreAction);
attachments[1].initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
attachments[1].finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
attachments[1].flags = 0;
if (hasDepth) {
attachments[1].format = vulkan->GetDeviceInfo().preferredDepthStencilFormat;
attachments[1].samples = VK_SAMPLE_COUNT_1_BIT;
attachments[1].loadOp = ConvertLoadAction(key.depthLoadAction);
attachments[1].storeOp = ConvertStoreAction(key.depthStoreAction);
attachments[1].stencilLoadOp = ConvertLoadAction(key.stencilLoadAction);
attachments[1].stencilStoreOp = ConvertStoreAction(key.stencilStoreAction);
attachments[1].initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
attachments[1].finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
attachments[1].flags = 0;
}
VkAttachmentReference color_reference{};
color_reference.attachment = 0;
@ -371,7 +373,9 @@ VkRenderPass CreateRP(VulkanContext *vulkan, const RPKey &key, RenderPassType rp
subpass.colorAttachmentCount = 1;
subpass.pColorAttachments = &color_reference;
subpass.pResolveAttachments = nullptr;
subpass.pDepthStencilAttachment = &depth_reference;
if (hasDepth) {
subpass.pDepthStencilAttachment = &depth_reference;
}
subpass.preserveAttachmentCount = 0;
subpass.pPreserveAttachments = nullptr;
@ -380,12 +384,12 @@ VkRenderPass CreateRP(VulkanContext *vulkan, const RPKey &key, RenderPassType rp
size_t numDeps = 0;
VkRenderPassCreateInfo rp{ VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO };
rp.attachmentCount = 2;
rp.attachmentCount = hasDepth ? 2 : 1;
rp.pAttachments = attachments;
rp.subpassCount = 1;
rp.pSubpasses = &subpass;
if (rpType == RP_TYPE_BACKBUFFER) {
if (isBackbuffer) {
deps[numDeps].srcSubpass = VK_SUBPASS_EXTERNAL;
deps[numDeps].dstSubpass = 0;
deps[numDeps].srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
@ -393,7 +397,6 @@ VkRenderPass CreateRP(VulkanContext *vulkan, const RPKey &key, RenderPassType rp
deps[numDeps].srcAccessMask = 0;
deps[numDeps].dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
numDeps++;
rp.dependencyCount = 1;
}
if (selfDependency) {
@ -424,7 +427,7 @@ VkRenderPass VKRRenderPass::Get(VulkanContext *vulkan, RenderPassType rpType) {
// practical later when referring to it. Could change to on-demand if it feels motivated
// but I think the render pass objects are cheap.
if (!pass[(int)rpType]) {
pass[(int)rpType] = CreateRP(vulkan, key_, (RenderPassType)rpType);
pass[(int)rpType] = CreateRenderPass(vulkan, key_, (RenderPassType)rpType);
}
return pass[(int)rpType];
}
@ -873,8 +876,10 @@ std::string VulkanQueueRunner::StepToString(const VKRStep &step) const {
const char *renderCmd;
switch (step.render.renderPassType) {
case RP_TYPE_BACKBUFFER: renderCmd = "BACKBUF"; break;
case RP_TYPE_COLOR_DEPTH: renderCmd = "RENDER"; break;
case RP_TYPE_COLOR_DEPTH_INPUT: renderCmd = "RENDER_INPUT"; break;
case RP_TYPE_COLOR: renderCmd = "RENDER"; break;
case RP_TYPE_COLOR_DEPTH: renderCmd = "RENDER_DEPTH"; break;
case RP_TYPE_COLOR_INPUT: renderCmd = "RENDER_INPUT"; break;
case RP_TYPE_COLOR_DEPTH_INPUT: renderCmd = "RENDER_DEPTH_INPUT"; break;
default: renderCmd = "N/A";
}
snprintf(buffer, sizeof(buffer), "%s %s (draws: %d, %dx%d/%dx%d, fb: %p, )", renderCmd, step.tag, step.render.numDraws, actual_w, actual_h, w, h, step.render.framebuffer);
@ -1153,7 +1158,7 @@ void TransitionToOptimal(VkCommandBuffer cmd, VkImage colorImage, VkImageLayout
srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT;
break;
default:
_dbg_assert_msg_(false, "GetRenderPass: Unexpected color layout %d", (int)colorLayout);
_dbg_assert_msg_(false, "TransitionToOptimal: Unexpected color layout %d", (int)colorLayout);
break;
}
recordBarrier->TransitionImage(
@ -1189,7 +1194,7 @@ void TransitionToOptimal(VkCommandBuffer cmd, VkImage colorImage, VkImageLayout
srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT;
break;
default:
_dbg_assert_msg_(false, "GetRenderPass: Unexpected depth layout %d", (int)depthStencilLayout);
_dbg_assert_msg_(false, "TransitionToOptimal: Unexpected depth layout %d", (int)depthStencilLayout);
break;
}
recordBarrier->TransitionImage(
@ -1236,7 +1241,7 @@ void TransitionFromOptimal(VkCommandBuffer cmd, VkImage colorImage, VkImageLayou
// Nothing to do.
break;
default:
_dbg_assert_msg_(false, "GetRenderPass: Unexpected final color layout %d", (int)colorLayout);
_dbg_assert_msg_(false, "TransitionFromOptimal: Unexpected final color layout %d", (int)colorLayout);
break;
}
barrier[0].oldLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
@ -1275,7 +1280,7 @@ void TransitionFromOptimal(VkCommandBuffer cmd, VkImage colorImage, VkImageLayou
// Nothing to do.
break;
default:
_dbg_assert_msg_(false, "GetRenderPass: Unexpected final depth layout %d", (int)depthStencilLayout);
_dbg_assert_msg_(false, "TransitionFromOptimal: Unexpected final depth layout %d", (int)depthStencilLayout);
break;
}
barrier[barrierCount].oldLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;

View File

@ -43,18 +43,24 @@ enum class VKRRenderCommand : uint8_t {
enum class PipelineFlags {
NONE = 0,
USES_LINES = (1 << 2),
USES_BLEND_CONSTANT = (1 << 3),
USES_DEPTH_STENCIL = (1 << 4), // Reads or writes the depth buffer.
USES_DEPTH_STENCIL = (1 << 4), // Reads or writes the depth or stencil buffers.
USES_INPUT_ATTACHMENT = (1 << 5),
};
ENUM_CLASS_BITOPS(PipelineFlags);
// Pipelines need to be created for the right type of render pass.
enum RenderPassType {
RP_TYPE_BACKBUFFER,
// These four are organized so that bit 0 is DEPTH and bit 1 is INPUT, so
// they can be OR-ed together in MergeRPTypes.
RP_TYPE_COLOR,
RP_TYPE_COLOR_DEPTH,
RP_TYPE_COLOR_INPUT,
RP_TYPE_COLOR_DEPTH_INPUT,
// This is the odd one out, and gets special handling in MergeRPTypes.
RP_TYPE_BACKBUFFER, // For the backbuffer we can always use CLEAR/DONT_CARE, so bandwidth cost for a depth channel is negligible.
// Later will add pure-color render passes.
RP_TYPE_COUNT,
};

View File

@ -158,33 +158,37 @@ VKRFramebuffer::VKRFramebuffer(VulkanContext *vk, VkCommandBuffer initCmd, VKRRe
// We create the actual framebuffer objects on demand, because some combinations might not make sense.
}
VkFramebuffer VKRFramebuffer::Get(VKRRenderPass *compatibleRenderPass, RenderPassType renderPassType) {
if (framebuf[(int)renderPassType]) {
return framebuf[(int)renderPassType];
VkFramebuffer VKRFramebuffer::Get(VKRRenderPass *compatibleRenderPass, RenderPassType rpType) {
if (framebuf[(int)rpType]) {
return framebuf[(int)rpType];
}
VkFramebufferCreateInfo fbci{ VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO };
VkImageView views[2]{};
fbci.renderPass = compatibleRenderPass->Get(vulkan_, renderPassType);
fbci.attachmentCount = 2;
fbci.pAttachments = views;
bool hasDepth = rpType == RP_TYPE_BACKBUFFER || rpType == RP_TYPE_COLOR_DEPTH || rpType == RP_TYPE_COLOR_DEPTH_INPUT;
views[0] = color.imageView;
views[1] = depth.imageView;
if (hasDepth) {
views[1] = depth.imageView;
}
fbci.renderPass = compatibleRenderPass->Get(vulkan_, rpType);
fbci.attachmentCount = hasDepth ? 2 : 1;
fbci.pAttachments = views;
fbci.width = width;
fbci.height = height;
fbci.layers = 1;
VkResult res = vkCreateFramebuffer(vulkan_->GetDevice(), &fbci, nullptr, &framebuf[(int)renderPassType]);
VkResult res = vkCreateFramebuffer(vulkan_->GetDevice(), &fbci, nullptr, &framebuf[(int)rpType]);
_assert_(res == VK_SUCCESS);
if (!tag_.empty() && vulkan_->Extensions().EXT_debug_utils) {
vulkan_->SetDebugName(color.image, VK_OBJECT_TYPE_IMAGE, StringFromFormat("fb_color_%s", tag_.c_str()).c_str());
vulkan_->SetDebugName(depth.image, VK_OBJECT_TYPE_IMAGE, StringFromFormat("fb_depth_%s", tag_.c_str()).c_str());
vulkan_->SetDebugName(framebuf[(int)renderPassType], VK_OBJECT_TYPE_FRAMEBUFFER, StringFromFormat("fb_%s", tag_.c_str()).c_str());
vulkan_->SetDebugName(framebuf[(int)rpType], VK_OBJECT_TYPE_FRAMEBUFFER, StringFromFormat("fb_%s", tag_.c_str()).c_str());
}
return framebuf[(int)renderPassType];
return framebuf[(int)rpType];
}
VKRFramebuffer::~VKRFramebuffer() {
@ -656,15 +660,16 @@ void VulkanRenderManager::EndCurRenderStep() {
curRenderStep_->render.colorLoad, curRenderStep_->render.depthLoad, curRenderStep_->render.stencilLoad,
curRenderStep_->render.colorStore, curRenderStep_->render.depthStore, curRenderStep_->render.stencilStore,
};
RenderPassType rpType = RP_TYPE_COLOR_DEPTH;
// Save the accumulated pipeline flags so we can use that to configure the render pass.
// We'll often be able to avoid loading/saving the depth/stencil buffer.
curRenderStep_->render.pipelineFlags = curPipelineFlags_;
bool depthStencil = (curPipelineFlags_ & PipelineFlags::USES_DEPTH_STENCIL) != 0;
RenderPassType rpType = depthStencil ? RP_TYPE_COLOR_DEPTH : RP_TYPE_COLOR;
if (!curRenderStep_->render.framebuffer) {
rpType = RP_TYPE_BACKBUFFER;
} else if (curPipelineFlags_ & PipelineFlags::USES_INPUT_ATTACHMENT) {
// Not allowed on backbuffers.
rpType = RP_TYPE_COLOR_DEPTH_INPUT;
rpType = depthStencil ? RP_TYPE_COLOR_DEPTH_INPUT : RP_TYPE_COLOR_INPUT;
}
// TODO: Also add render pass types for depth/stencil-less.
@ -714,9 +719,11 @@ void VulkanRenderManager::BindFramebufferAsRenderTarget(VKRFramebuffer *fb, VKRR
}
if (depth == VKRRenderPassLoadAction::CLEAR) {
clearMask |= VK_IMAGE_ASPECT_DEPTH_BIT;
curPipelineFlags_ |= PipelineFlags::USES_DEPTH_STENCIL;
}
if (stencil == VKRRenderPassLoadAction::CLEAR) {
clearMask |= VK_IMAGE_ASPECT_STENCIL_BIT;
curPipelineFlags_ |= PipelineFlags::USES_DEPTH_STENCIL;
}
// If we need a clear and the previous step has commands already, it's best to just add a clear and keep going.
@ -997,6 +1004,10 @@ void VulkanRenderManager::Clear(uint32_t clearColor, float clearZ, int clearSten
curRenderStep_->render.depthLoad = (clearMask & VK_IMAGE_ASPECT_DEPTH_BIT) ? VKRRenderPassLoadAction::CLEAR : VKRRenderPassLoadAction::KEEP;
curRenderStep_->render.stencilLoad = (clearMask & VK_IMAGE_ASPECT_STENCIL_BIT) ? VKRRenderPassLoadAction::CLEAR : VKRRenderPassLoadAction::KEEP;
if (clearMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
curPipelineFlags_ |= PipelineFlags::USES_DEPTH_STENCIL;
}
// In case there were commands already.
curRenderStep_->render.numDraws = 0;
RemoveDrawCommands(&curRenderStep_->commands);
@ -1269,7 +1280,10 @@ void VulkanRenderManager::Run(int frame) {
BeginSubmitFrame(frame);
FrameData &frameData = frameData_[frame];
queueRunner_.PreprocessSteps(frameData_[frame].steps);
queueRunner_.PreprocessSteps(frameData.steps);
// Likely during shutdown, happens in headless.
if (frameData.steps.empty() && !frameData.hasAcquired)
frameData.skipSwap = true;
//queueRunner_.LogSteps(stepsOnThread, false);
queueRunner_.RunSteps(frameData, frameDataShared_);

View File

@ -1056,6 +1056,7 @@ Pipeline *VKContext::CreateGraphicsPipeline(const PipelineDesc &desc, const char
if (depth->info.depthTestEnable || depth->info.stencilTestEnable) {
pipelineFlags |= PipelineFlags::USES_DEPTH_STENCIL;
}
// TODO: We need code to set USES_BLEND_CONSTANT here too, if we're ever gonna use those in thin3d code.
VKPipeline *pipeline = new VKPipeline(vulkan_, desc.uniformDesc ? desc.uniformDesc->uniformBufferSize : 16 * sizeof(float), pipelineFlags, tag);

View File

@ -294,7 +294,8 @@ bool StartVRRender() {
// Decide if the scene is 3D or not
if (g_Config.bEnableVR && !VR_GetConfig(VR_CONFIG_FORCE_2D) && (VR_GetConfig(VR_CONFIG_3D_GEOMETRY_COUNT) > 15)) {
VR_SetConfig(VR_CONFIG_MODE, g_Config.bEnableStereo ? VR_MODE_STEREO_6DOF : VR_MODE_MONO_6DOF);
bool stereo = VR_GetConfig(VR_CONFIG_6DOF_PRECISE) && g_Config.bEnableStereo;
VR_SetConfig(VR_CONFIG_MODE, stereo ? VR_MODE_STEREO_6DOF : VR_MODE_MONO_6DOF);
} else {
VR_SetConfig(VR_CONFIG_MODE, VR_MODE_FLAT_SCREEN);
}

View File

@ -359,13 +359,17 @@ void VR_FinishFrame( engine_t* engine ) {
for (int eye = 0; eye < ovrMaxNumEyes; eye++) {
int imageLayer = engine->appState.Renderer.Multiview ? eye : 0;
ovrFramebuffer* frameBuffer = &engine->appState.Renderer.FrameBuffer[0];
if ((vrMode != VR_MODE_MONO_6DOF) && !engine->appState.Renderer.Multiview) {
frameBuffer = &engine->appState.Renderer.FrameBuffer[eye];
XrPosef pose = invViewTransform[0];
if (vrMode != VR_MODE_MONO_6DOF) {
if (!engine->appState.Renderer.Multiview) {
frameBuffer = &engine->appState.Renderer.FrameBuffer[eye];
}
pose = invViewTransform[eye];
}
memset(&projection_layer_elements[eye], 0, sizeof(XrCompositionLayerProjectionView));
projection_layer_elements[eye].type = XR_TYPE_COMPOSITION_LAYER_PROJECTION_VIEW;
projection_layer_elements[eye].pose = invViewTransform[eye];
projection_layer_elements[eye].pose = pose;
projection_layer_elements[eye].fov = fov;
memset(&projection_layer_elements[eye].subImage, 0, sizeof(XrSwapchainSubImage));
@ -502,9 +506,16 @@ ovrMatrix4f VR_GetMatrix( VRMatrix matrix ) {
output.M[2][3] -= hmdposition.z * (vrConfig[VR_CONFIG_MIRROR_AXIS_Z] ? -1.0f : 1.0f) * scale;
}
if (vrConfig[VR_CONFIG_6DOF_PRECISE] && (matrix == VR_VIEW_MATRIX_RIGHT_EYE)) {
output.M[0][3] += (invViewTransform[1].position.x - invViewTransform[0].position.x) * scale;
output.M[1][3] += (invViewTransform[1].position.y - invViewTransform[0].position.y) * scale;
output.M[2][3] += (invViewTransform[1].position.z - invViewTransform[0].position.z) * scale;
float dx = fabs(invViewTransform[1].position.x - invViewTransform[0].position.x);
float dy = fabs(invViewTransform[1].position.y - invViewTransform[0].position.y);
float dz = fabs(invViewTransform[1].position.z - invViewTransform[0].position.z);
float ipd = sqrt(dx * dx + dy * dy + dz * dz);
XrVector3f separation = {ipd * scale, 0.0f, 0.0f};
separation = XrQuaternionf_Rotate(invView.orientation, separation);
separation = XrVector3f_ScalarMultiply(separation, vrConfig[VR_CONFIG_MIRROR_AXIS_Z] ? -1.0f : 1.0f);
output.M[0][3] -= separation.x;
output.M[1][3] -= separation.y;
output.M[2][3] -= separation.z;
}
} else {
assert(false);

View File

@ -109,7 +109,6 @@ void Compatibility::CheckSettings(IniFile &iniFile, const std::string &gameID) {
CheckSetting(iniFile, gameID, "SplitFramebufferMargin", &flags_.SplitFramebufferMargin);
CheckSetting(iniFile, gameID, "ForceLowerResolutionForEffectsOn", &flags_.ForceLowerResolutionForEffectsOn);
CheckSetting(iniFile, gameID, "AllowDownloadCLUT", &flags_.AllowDownloadCLUT);
CheckSetting(iniFile, gameID, "UploadDepthForCLUTTextures", &flags_.UploadDepthForCLUTTextures);
}
void Compatibility::CheckSetting(IniFile &iniFile, const std::string &gameID, const char *option, bool *flag) {

View File

@ -89,7 +89,6 @@ struct CompatFlags {
bool SplitFramebufferMargin;
bool ForceLowerResolutionForEffectsOn;
bool AllowDownloadCLUT;
bool UploadDepthForCLUTTextures;
};
struct VRCompat {

View File

@ -2174,68 +2174,89 @@ int sceKernelDeleteTlspl(SceUID uid)
return error;
}
int sceKernelGetTlsAddr(SceUID uid)
{
// TODO: Allocate downward if PSP_TLSPL_ATTR_HIGHMEM?
DEBUG_LOG(SCEKERNEL, "sceKernelGetTlsAddr(%08x)", uid);
struct FindTLSByIndexArg {
int index;
TLSPL *result = nullptr;
};
static bool FindTLSByIndex(TLSPL *possible, FindTLSByIndexArg *state) {
if (possible->ntls.index == state->index) {
state->result = possible;
return false;
}
return true;
}
int sceKernelGetTlsAddr(SceUID uid) {
if (!__KernelIsDispatchEnabled() || __IsInInterrupt())
return 0;
return hleLogWarning(SCEKERNEL, 0, "dispatch disabled");
u32 error;
TLSPL *tls = kernelObjects.Get<TLSPL>(uid, error);
if (tls)
{
SceUID threadID = __KernelGetCurThread();
int allocBlock = -1;
bool needsClear = false;
if (!tls) {
if (uid < 0)
return hleLogError(SCEKERNEL, 0, "tlspl not found");
// If the thread already has one, return it.
// There's this weird behavior where it looks up by index. Maybe we shouldn't use uids...
if (!tlsplUsedIndexes[(uid >> 3) & 15])
return hleLogError(SCEKERNEL, 0, "tlspl not found");
FindTLSByIndexArg state;
state.index = (uid >> 3) & 15;
kernelObjects.Iterate<TLSPL>(&FindTLSByIndex, &state);
if (!state.result)
return hleLogError(SCEKERNEL, 0, "tlspl not found");
tls = state.result;
}
SceUID threadID = __KernelGetCurThread();
int allocBlock = -1;
bool needsClear = false;
// If the thread already has one, return it.
for (size_t i = 0; i < tls->ntls.totalBlocks && allocBlock == -1; ++i)
{
if (tls->usage[i] == threadID)
allocBlock = (int) i;
}
if (allocBlock == -1)
{
for (size_t i = 0; i < tls->ntls.totalBlocks && allocBlock == -1; ++i)
{
if (tls->usage[i] == threadID)
allocBlock = (int) i;
// The PSP doesn't give the same block out twice in a row, even if freed.
if (tls->usage[tls->next] == 0)
allocBlock = tls->next;
tls->next = (tls->next + 1) % tls->ntls.totalBlocks;
}
if (allocBlock == -1)
if (allocBlock != -1)
{
for (size_t i = 0; i < tls->ntls.totalBlocks && allocBlock == -1; ++i)
{
// The PSP doesn't give the same block out twice in a row, even if freed.
if (tls->usage[tls->next] == 0)
allocBlock = tls->next;
tls->next = (tls->next + 1) % tls->ntls.totalBlocks;
}
if (allocBlock != -1)
{
tls->usage[allocBlock] = threadID;
tlsplThreadEndChecks.insert(std::make_pair(threadID, uid));
--tls->ntls.freeBlocks;
needsClear = true;
}
tls->usage[allocBlock] = threadID;
tlsplThreadEndChecks.insert(std::make_pair(threadID, uid));
--tls->ntls.freeBlocks;
needsClear = true;
}
if (allocBlock == -1)
{
tls->waitingThreads.push_back(threadID);
__KernelWaitCurThread(WAITTYPE_TLSPL, uid, 1, 0, false, "allocate tls");
return 0;
}
u32 alignedSize = (tls->ntls.blockSize + tls->alignment - 1) & ~(tls->alignment - 1);
u32 allocAddress = tls->address + allocBlock * alignedSize;
NotifyMemInfo(MemBlockFlags::SUB_ALLOC, allocAddress, tls->ntls.blockSize, "TlsAddr");
// We clear the blocks upon first allocation (and also when they are freed, both are necessary.)
if (needsClear) {
Memory::Memset(allocAddress, 0, tls->ntls.blockSize, "TlsAddr");
}
return allocAddress;
}
else
return 0;
if (allocBlock == -1)
{
tls->waitingThreads.push_back(threadID);
__KernelWaitCurThread(WAITTYPE_TLSPL, uid, 1, 0, false, "allocate tls");
return hleLogDebug(SCEKERNEL, 0, "waiting for tls alloc");
}
u32 alignedSize = (tls->ntls.blockSize + tls->alignment - 1) & ~(tls->alignment - 1);
u32 allocAddress = tls->address + allocBlock * alignedSize;
NotifyMemInfo(MemBlockFlags::SUB_ALLOC, allocAddress, tls->ntls.blockSize, "TlsAddr");
// We clear the blocks upon first allocation (and also when they are freed, both are necessary.)
if (needsClear) {
Memory::Memset(allocAddress, 0, tls->ntls.blockSize, "TlsAddr");
}
return hleLogDebug(SCEKERNEL, allocAddress);
}
// Parameters are an educated guess.

View File

@ -70,6 +70,23 @@ Draw2DPipelineInfo GenerateDraw2DCopyColorFs(ShaderWriter &writer) {
};
}
Draw2DPipelineInfo GenerateDraw2DCopyColorRect2LinFs(ShaderWriter &writer) {
writer.DeclareSamplers(samplers);
writer.BeginFSMain(g_draw2Duniforms, varyings, FSFLAG_NONE);
writer.C(" vec2 tSize = texSize / scaleFactor;\n");
writer.C(" vec2 pixels = v_texcoord * tSize;\n");
writer.C(" float u = mod(pixels.x, tSize.x);\n");
writer.C(" float v = floor(pixels.x / tSize.x);\n");
writer.C(" vec4 outColor = ").SampleTexture2D("tex", "vec2(u, v) / tSize").C(";\n");
writer.EndFSMain("outColor", FSFLAG_NONE);
return Draw2DPipelineInfo{
"draw2d_copy_color_rect2lin",
RASTER_COLOR,
RASTER_COLOR,
};
}
Draw2DPipelineInfo GenerateDraw2DCopyDepthFs(ShaderWriter &writer) {
writer.DeclareSamplers(samplers);
writer.BeginFSMain(Slice<UniformDef>::empty(), varyings, FSFLAG_WRITEDEPTH);
@ -318,6 +335,13 @@ Draw2DPipeline *FramebufferManagerCommon::Get2DPipeline(Draw2DShader shader) {
pipeline = draw2DPipelineColor_;
break;
case DRAW2D_COPY_COLOR_RECT2LIN:
if (!draw2DPipelineColorRect2Lin_) {
draw2DPipelineColorRect2Lin_ = draw2D_.Create2DPipeline(&GenerateDraw2DCopyColorRect2LinFs);
}
pipeline = draw2DPipelineColorRect2Lin_;
break;
case DRAW2D_COPY_DEPTH:
if (!draw_->GetDeviceCaps().fragmentShaderDepthWriteSupported) {
// Can't do it

View File

@ -16,6 +16,7 @@ enum Draw2DShader {
DRAW2D_COPY_DEPTH,
DRAW2D_565_TO_DEPTH,
DRAW2D_565_TO_DEPTH_DESWIZZLE,
DRAW2D_COPY_COLOR_RECT2LIN,
};
inline RasterChannel Draw2DSourceChannel(Draw2DShader shader) {

View File

@ -147,6 +147,8 @@ protected:
bool useHWTransform_ = false;
bool useHWTessellation_ = false;
// Used to prevent unnecessary flushing in softgpu.
bool flushOnParams_ = true;
// Vertex collector buffers
u8 *decoded = nullptr;

View File

@ -547,27 +547,26 @@ void FramebufferManagerCommon::SetDepthFrameBuffer(bool isClearingDepth) {
return;
}
// First time use of this framebuffer's depth buffer.
bool newlyUsingDepth = (currentRenderVfb_->usageFlags & FB_USAGE_RENDER_DEPTH) == 0;
currentRenderVfb_->usageFlags |= FB_USAGE_RENDER_DEPTH;
// If this first draw call is anything other than a clear, "resolve" the depth buffer,
// by copying from any overlapping buffers with fresher content.
if (!isClearingDepth) {
if (!isClearingDepth && useBufferedRendering_) {
CopyToDepthFromOverlappingFramebuffers(currentRenderVfb_);
// Special compatibility trick for Burnout Dominator lens flares. Not sure how to best generalize this. See issue #11100
if (PSP_CoreParameter().compat.flags().UploadDepthForCLUTTextures && (currentRenderVfb_->usageFlags & FB_USAGE_CLUT) != 0) {
// Set the flag, then upload memory contents to depth channel.
// Need to upload the first line of depth buffers, for Burnout Dominator lens flares. See issue #11100 and comments to #16081.
// Might make this more generic and upload the whole depth buffer if we find it's needed for something.
if (newlyUsingDepth) {
// Sanity check the depth buffer pointer.
if (currentRenderVfb_->z_address != 0 && currentRenderVfb_->z_address != currentRenderVfb_->fb_address) {
if (Memory::IsValidRange(currentRenderVfb_->z_address, currentRenderVfb_->width * 2)) {
const u16 *src = (const u16 *)Memory::GetPointerUnchecked(currentRenderVfb_->z_address);
DrawPixels(currentRenderVfb_, 0, 0, (const u8 *)src, GE_FORMAT_DEPTH16, currentRenderVfb_->z_stride, currentRenderVfb_->width, currentRenderVfb_->height, RASTER_DEPTH, "Depth Upload");
}
if (Memory::IsValidRange(currentRenderVfb_->z_address, currentRenderVfb_->width * 2)) {
const u16 *src = (const u16 *)Memory::GetPointerUnchecked(currentRenderVfb_->z_address);
DrawPixels(currentRenderVfb_, 0, 0, (const u8 *)src, GE_FORMAT_DEPTH16, currentRenderVfb_->z_stride, currentRenderVfb_->width, currentRenderVfb_->height, RASTER_DEPTH, "Depth Upload");
}
}
}
// First time use of this framebuffer's depth buffer.
currentRenderVfb_->usageFlags |= FB_USAGE_RENDER_DEPTH;
currentRenderVfb_->depthBindSeq = GetBindSeqCount();
}
@ -647,7 +646,7 @@ void FramebufferManagerCommon::CopyToDepthFromOverlappingFramebuffers(VirtualFra
}
}
gstate_c.Dirty(DIRTY_TEXTURE_IMAGE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_RASTER_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_BLEND_STATE);
gstate_c.Dirty(DIRTY_ALL_RENDER_STATE);
}
// Can't easily dynamically create these strings, we just pass along the pointer.
@ -915,7 +914,7 @@ void FramebufferManagerCommon::BlitFramebufferDepth(VirtualFramebuffer *src, Vir
// Some GPUs can copy depth but only if stencil gets to come along for the ride. We only want to use this if there is no blit functionality.
if (useCopy) {
draw_->CopyFramebufferImage(src->fbo, 0, 0, 0, 0, dst->fbo, 0, 0, 0, 0, w, h, 1, Draw::FB_DEPTH_BIT, "BlitFramebufferDepth");
draw_->CopyFramebufferImage(src->fbo, 0, 0, 0, 0, dst->fbo, 0, 0, 0, 0, w, h, 1, Draw::FB_DEPTH_BIT, "CopyFramebufferDepth");
RebindFramebuffer("After BlitFramebufferDepth");
} else if (useBlit) {
// We'll accept whether we get a separate depth blit or not...
@ -1021,7 +1020,7 @@ void FramebufferManagerCommon::UpdateFromMemory(u32 addr, int size) {
// TODO: Could go through all FBOs, but probably not important?
// TODO: Could also check for inner changes, but video is most important.
// TODO: This shouldn't care if it's a display framebuf or not, should work exactly the same.
bool isDisplayBuf = addr == DisplayFramebufAddr() || addr == PrevDisplayFramebufAddr();
bool isDisplayBuf = addr == CurrentDisplayFramebufAddr() || addr == PrevDisplayFramebufAddr();
// TODO: Deleting the FBO is a heavy hammer solution, so let's only do it if it'd help.
if (!Memory::IsValidAddress(displayFramebufPtr_))
return;
@ -1097,7 +1096,7 @@ void FramebufferManagerCommon::DrawPixels(VirtualFramebuffer *vfb, int dstX, int
pixelsTex->Release();
draw_->InvalidateCachedState();
gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_RASTER_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS);
gstate_c.Dirty(DIRTY_ALL_RENDER_STATE);
}
}
@ -1540,7 +1539,7 @@ void FramebufferManagerCommon::ResizeFramebufFBO(VirtualFramebuffer *vfb, int w,
if (creating) {
WARN_LOG(FRAMEBUF, "Creating %s FBO at %08x/%d %dx%d (force=%d)", GeBufferFormatToString(vfb->fb_format), vfb->fb_address, vfb->fb_stride, vfb->bufferWidth, vfb->bufferHeight, (int)force);
} else {
WARN_LOG(FRAMEBUF, "Resizing %s FBO at %08x/%d from %dx%d to %dx%d (force=%d)", GeBufferFormatToString(vfb->fb_format), vfb->fb_address, vfb->fb_stride, old.bufferWidth, old.bufferHeight, vfb->bufferWidth, vfb->bufferHeight, (int)force);
WARN_LOG(FRAMEBUF, "Resizing %s FBO at %08x/%d from %dx%d to %dx%d (force=%d, skipCopy=%d)", GeBufferFormatToString(vfb->fb_format), vfb->fb_address, vfb->fb_stride, old.bufferWidth, old.bufferHeight, vfb->bufferWidth, vfb->bufferHeight, (int)force, (int)skipCopy);
}
// During hardware rendering, we always render at full color depth even if the game wouldn't on real hardware.
@ -1578,8 +1577,10 @@ void FramebufferManagerCommon::ResizeFramebufFBO(VirtualFramebuffer *vfb, int w,
if (vfb->fbo) {
draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }, "ResizeFramebufFBO");
if (!skipCopy) {
BlitFramebuffer(vfb, 0, 0, &old, 0, 0, std::min((u16)oldWidth, std::min(vfb->bufferWidth, vfb->width)), std::min((u16)oldHeight, std::min(vfb->height, vfb->bufferHeight)), 0, RASTER_COLOR, "Blit_ResizeFramebufFBO");
// Depth copying is handled by deferred copies later.
BlitFramebuffer(vfb, 0, 0, &old, 0, 0, std::min((u16)oldWidth, std::min(vfb->bufferWidth, vfb->width)), std::min((u16)oldHeight, std::min(vfb->height, vfb->bufferHeight)), 0, RASTER_COLOR, "BlitColor_ResizeFramebufFBO");
}
if (vfb->usageFlags & FB_USAGE_RENDER_DEPTH) {
BlitFramebuffer(vfb, 0, 0, &old, 0, 0, std::min((u16)oldWidth, std::min(vfb->bufferWidth, vfb->width)), std::min((u16)oldHeight, std::min(vfb->height, vfb->bufferHeight)), 0, RASTER_DEPTH, "BlitDepth_ResizeFramebufFBO");
}
}
fbosToDelete_.push_back(old.fbo);
@ -2182,7 +2183,7 @@ void FramebufferManagerCommon::NotifyBlockTransferAfter(u32 dstBasePtr, int dstS
// We may still do a partial block draw below if this doesn't pass.
if (!useBufferedRendering_ && dstStride >= 480 && width >= 480 && height == 272) {
bool isPrevDisplayBuffer = PrevDisplayFramebufAddr() == dstBasePtr;
bool isDisplayBuffer = DisplayFramebufAddr() == dstBasePtr;
bool isDisplayBuffer = CurrentDisplayFramebufAddr() == dstBasePtr;
if (isPrevDisplayBuffer || isDisplayBuffer) {
FlushBeforeCopy();
DrawFramebufferToOutput(Memory::GetPointerUnchecked(dstBasePtr), dstStride, displayFormat_);
@ -2214,8 +2215,9 @@ void FramebufferManagerCommon::NotifyBlockTransferAfter(u32 dstBasePtr, int dstS
int dstBpp = BufferFormatBytesPerPixel(dstRect.vfb->fb_format);
float dstXFactor = (float)bpp / dstBpp;
if (dstRect.w_bytes / bpp > dstRect.vfb->width || dstRect.h > dstRect.vfb->height) {
// The buffer isn't big enough, and we have a clear hint of size. Resize.
// The buffer isn't big enough, and we have a clear hint of size. Resize.
// This happens in Valkyrie Profile when uploading video at the ending.
// Also happens to the CLUT framebuffer in the Burnout Dominator lens flare effect. See #16075
ResizeFramebufFBO(dstRect.vfb, dstRect.w_bytes / bpp, dstRect.h, false, true);
// Make sure we don't flop back and forth.
dstRect.vfb->newWidth = std::max(dstRect.w_bytes / bpp, (int)dstRect.vfb->width);
@ -2357,8 +2359,8 @@ void FramebufferManagerCommon::ShowScreenResolution() {
// * Save state screenshots(could probably be async but need to manage the stall.)
bool FramebufferManagerCommon::GetFramebuffer(u32 fb_address, int fb_stride, GEBufferFormat format, GPUDebugBuffer &buffer, int maxScaleFactor) {
VirtualFramebuffer *vfb = currentRenderVfb_;
if (!vfb) {
vfb = GetVFBAt(fb_address);
if (!vfb || vfb->fb_address != fb_address) {
vfb = ResolveVFB(fb_address, fb_stride, format);
}
if (!vfb) {
@ -2701,6 +2703,7 @@ void FramebufferManagerCommon::DeviceLost() {
DoRelease(stencilUploadSampler_);
DoRelease(stencilUploadPipeline_);
DoRelease(draw2DPipelineColor_);
DoRelease(draw2DPipelineColorRect2Lin_);
DoRelease(draw2DPipelineDepth_);
DoRelease(draw2DPipeline565ToDepth_);
DoRelease(draw2DPipeline565ToDepthDeswizzle_);
@ -2766,7 +2769,7 @@ void FramebufferManagerCommon::DrawActiveTexture(float x, float y, float w, floa
draw2D_.DrawStrip2D(nullptr, coord, 4, (flags & DRAWTEX_LINEAR) != 0, Get2DPipeline((flags & DRAWTEX_DEPTH) ? DRAW2D_COPY_DEPTH : DRAW2D_COPY_COLOR));
gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_RASTER_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE);
gstate_c.Dirty(DIRTY_ALL_RENDER_STATE);
}
void FramebufferManagerCommon::BlitFramebuffer(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp, RasterChannel channel, const char *tag) {
@ -2779,6 +2782,11 @@ void FramebufferManagerCommon::BlitFramebuffer(VirtualFramebuffer *dst, int dstX
return;
}
if (channel == RASTER_DEPTH && !draw_->GetDeviceCaps().fragmentShaderDepthWriteSupported) {
// Can't do anything :(
return;
}
// Perform a little bit of clipping first.
// Block transfer coords are unsigned so I don't think we need to clip on the left side.. Although there are
// other uses for BlitFramebuffer.
@ -2870,7 +2878,7 @@ void FramebufferManagerCommon::BlitFramebuffer(VirtualFramebuffer *dst, int dstX
draw_->InvalidateCachedState();
gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE);
gstate_c.Dirty(DIRTY_ALL_RENDER_STATE);
}
// The input is raw pixel coordinates, scale not taken into account.
@ -2906,7 +2914,7 @@ void FramebufferManagerCommon::BlitUsingRaster(
draw2D_.Blit(pipeline, srcX1, srcY1, srcX2, srcY2, destX1, destY1, destX2, destY2, (float)srcW, (float)srcH, (float)destW, (float)destH, linearFilter, scaleFactor);
gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE);
gstate_c.Dirty(DIRTY_ALL_RENDER_STATE);
}
VirtualFramebuffer *FramebufferManagerCommon::ResolveFramebufferColorToFormat(VirtualFramebuffer *src, GEBufferFormat newFormat) {

View File

@ -335,15 +335,18 @@ public:
u32 PrevDisplayFramebufAddr() const {
return prevDisplayFramebuf_ ? prevDisplayFramebuf_->fb_address : 0;
}
u32 DisplayFramebufAddr() const {
u32 CurrentDisplayFramebufAddr() const {
return displayFramebuf_ ? displayFramebuf_->fb_address : 0;
}
u32 DisplayFramebufAddr() const {
return displayFramebufPtr_;
}
u32 DisplayFramebufStride() const {
return displayFramebuf_ ? displayStride_ : 0;
return displayStride_;
}
GEBufferFormat DisplayFramebufFormat() const {
return displayFramebuf_ ? displayFormat_ : GE_FORMAT_INVALID;
return displayFormat_;
}
bool UseBufferedRendering() const {
@ -566,6 +569,7 @@ protected:
// Draw2D pipelines
Draw2DPipeline *draw2DPipelineColor_ = nullptr;
Draw2DPipeline *draw2DPipelineColorRect2Lin_ = nullptr;
Draw2DPipeline *draw2DPipelineDepth_ = nullptr;
Draw2DPipeline *draw2DPipeline565ToDepth_ = nullptr;
Draw2DPipeline *draw2DPipeline565ToDepthDeswizzle_ = nullptr;

View File

@ -109,6 +109,10 @@ enum : uint64_t {
DIRTY_VERTEXSHADER_STATE = 1ULL << 47,
DIRTY_FRAGMENTSHADER_STATE = 1ULL << 48,
// Everything that's not uniforms. Use this after using thin3d.
// TODO: Should we also add DIRTY_FRAMEBUF here? It kinda generally takes care of itself.
DIRTY_ALL_RENDER_STATE = DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS,
DIRTY_ALL = 0xFFFFFFFFFFFFFFFF
};

View File

@ -790,13 +790,13 @@ void SoftwareTransform::ExpandLines(int vertexCount, int &maxIndex, u16 *&inds,
float yoff = addWidth.y * dy;
// bottom right
trans[0].CopyFromWithOffset(transVtx2, xoff, yoff);
trans[0].CopyFromWithOffset(transVtx2, xoff * transVtx2.pos_w, yoff * transVtx2.pos_w);
// top right
trans[1].CopyFromWithOffset(transVtx1, xoff, yoff);
trans[1].CopyFromWithOffset(transVtx1, xoff * transVtx1.pos_w, yoff * transVtx1.pos_w);
// top left
trans[2].CopyFromWithOffset(transVtx1, -xoff, -yoff);
trans[2].CopyFromWithOffset(transVtx1, -xoff * transVtx1.pos_w, -yoff * transVtx1.pos_w);
// bottom left
trans[3].CopyFromWithOffset(transVtx2, -xoff, -yoff);
trans[3].CopyFromWithOffset(transVtx2, -xoff * transVtx2.pos_w, -yoff * transVtx2.pos_w);
// Triangle: BR-TR-TL
indsOut[0] = i * 2 + 0;
@ -835,17 +835,17 @@ void SoftwareTransform::ExpandLines(int vertexCount, int &maxIndex, u16 *&inds,
// bottom right
trans[0] = transVtxBL;
trans[0].x += addWidth.x * dx;
trans[0].y += addWidth.y * dy;
trans[0].u += addWidth.x * du;
trans[0].v += addWidth.y * dv;
trans[0].x += addWidth.x * dx * trans[0].pos_w;
trans[0].y += addWidth.y * dy * trans[0].pos_w;
trans[0].u += addWidth.x * du * trans[0].uv_w;
trans[0].v += addWidth.y * dv * trans[0].uv_w;
// top right
trans[1] = transVtxTL;
trans[1].x += addWidth.x * dx;
trans[1].y += addWidth.y * dy;
trans[1].u += addWidth.x * du;
trans[1].v += addWidth.y * dv;
trans[1].x += addWidth.x * dx * trans[1].pos_w;
trans[1].y += addWidth.y * dy * trans[1].pos_w;
trans[1].u += addWidth.x * du * trans[1].uv_w;
trans[1].v += addWidth.y * dv * trans[1].uv_w;
// top left
trans[2] = transVtxTL;

View File

@ -577,7 +577,8 @@ void DrawEngineCommon::SubmitCurve(const void *control_points, const void *indic
if (output.count)
DispatchSubmitPrim(output.vertices, output.indices, PatchPrimToPrim(surface.primType), output.count, vertTypeID, gstate.getCullMode(), &generatedBytesRead);
DispatchFlush();
if (flushOnParams_)
DispatchFlush();
if (origVertType & GE_VTYPE_TC_MASK) {
gstate_c.uv = prevUVScale;

View File

@ -186,13 +186,9 @@ bool FramebufferManagerCommon::PerformStencilUpload(u32 addr, int size, StencilU
// Otherwise, we can skip alpha in many cases, in which case we don't even use a shader.
if (flags & StencilUpload::IGNORE_ALPHA) {
shaderManager_->DirtyLastShader();
if (dstBuffer->fbo) {
draw_->BindFramebufferAsRenderTarget(dstBuffer->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::CLEAR }, "PerformStencilUpload_Clear");
}
gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_DEPTHSTENCIL_STATE);
return true;
}
}
@ -333,6 +329,6 @@ bool FramebufferManagerCommon::PerformStencilUpload(u32 addr, int size, StencilU
tex->Release();
draw_->InvalidateCachedState();
gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_RASTER_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE);
gstate_c.Dirty(DIRTY_ALL_RENDER_STATE);
return true;
}

View File

@ -418,10 +418,13 @@ TexCacheEntry *TextureCacheCommon::SetTexture() {
// Should probably revisit how this works..
gstate_c.SetNeedShaderTexclamp(false);
gstate_c.skipDrawReason &= ~SKIPDRAW_BAD_FB_TEXTURE;
if (gstate_c.bgraTexture != isBgraBackend_) {
bool isBgraTexture = isBgraBackend_ && !hasClutGPU;
if (gstate_c.bgraTexture != isBgraTexture) {
gstate_c.Dirty(DIRTY_FRAGMENTSHADER_STATE);
}
gstate_c.bgraTexture = isBgraBackend_;
gstate_c.bgraTexture = isBgraTexture;
if (entryIter != cache_.end()) {
entry = entryIter->second.get();
@ -1015,7 +1018,8 @@ bool TextureCacheCommon::MatchFramebuffer(
return false;
}
if (fb_stride_in_bytes != tex_stride_in_bytes) {
// Note the check for texHeight - we really don't care about a stride mismatch if texHeight == 1.
if (fb_stride_in_bytes != tex_stride_in_bytes && texHeight > 1) {
// Probably irrelevant. Although, as we shall see soon, there are exceptions.
// Burnout Dominator lens flare trick special case.
if (fb_format == GE_FORMAT_8888 && entry.format == GE_TFMT_CLUT8 && texWidth == 4 && texHeight == 1) {
@ -1205,6 +1209,8 @@ void TextureCacheCommon::LoadClut(u32 clutAddr, u32 loadBytes) {
clutRenderOffset_ = MAX_CLUT_OFFSET;
const std::vector<VirtualFramebuffer *> &framebuffers = framebufferManager_->Framebuffers();
u32 bestClutAddress = 0xFFFFFFFF;
VirtualFramebuffer *chosenFramebuffer = nullptr;
for (VirtualFramebuffer *framebuffer : framebuffers) {
const u32 fb_address = framebuffer->fb_address & 0x3FFFFFFF;
@ -1231,7 +1237,7 @@ void TextureCacheCommon::LoadClut(u32 clutAddr, u32 loadBytes) {
WARN_LOG_N_TIMES(clutfb, 5, G3D, "Detected LoadCLUT(%d bytes) from framebuffer %08x (%s), byte offset %d", loadBytes, fb_address, GeBufferFormatToString(framebuffer->fb_format), offset);
framebuffer->last_frame_clut = gpuStats.numFlips;
framebuffer->usageFlags |= FB_USAGE_CLUT;
clutRenderAddress_ = framebuffer->fb_address;
bestClutAddress = framebuffer->fb_address;
clutRenderOffset_ = (u32)offset;
chosenFramebuffer = framebuffer;
if (offset == 0) {
@ -1242,7 +1248,9 @@ void TextureCacheCommon::LoadClut(u32 clutAddr, u32 loadBytes) {
}
}
if (chosenFramebuffer) {
if (chosenFramebuffer && chosenFramebuffer->fbo) {
clutRenderAddress_ = bestClutAddress;
if (!dynamicClutTemp_) {
Draw::FramebufferDesc desc{};
desc.width = 512;
@ -1256,11 +1264,12 @@ void TextureCacheCommon::LoadClut(u32 clutAddr, u32 loadBytes) {
dynamicClutTemp_ = draw_->CreateFramebuffer(desc);
}
// Download the pixels to our temp clut, scaling down if needed.
// Copy the pixels to our temp clut, scaling down if needed and wrapping.
// TODO: Take the clutRenderOffset_ into account here.
framebufferManager_->BlitUsingRaster(
chosenFramebuffer->fbo, 0.0f, 0.0f, 512.0f * chosenFramebuffer->renderScaleFactor, 1.0f,
dynamicClutTemp_, 0.0f, 0.0f, 512.0f, 1.0f,
false, 1.0f, framebufferManager_->Get2DPipeline(DRAW2D_COPY_COLOR), "copy_clut_to_temp");
false, chosenFramebuffer->renderScaleFactor, framebufferManager_->Get2DPipeline(DRAW2D_COPY_COLOR_RECT2LIN), "copy_clut_to_temp");
clutRenderFormat_ = chosenFramebuffer->fb_format;
}
NotifyMemInfo(MemBlockFlags::ALLOC, clutAddr, loadBytes, "CLUT");
@ -2091,7 +2100,6 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer
mode = ShaderDepalMode::SMOOTHED;
}
// Since we started/ended render passes, might need these.
gstate_c.Dirty(DIRTY_DEPAL);
gstate_c.SetUseShaderDepal(mode);
gstate_c.depalFramebufferFormat = framebuffer->fb_format;
@ -2189,8 +2197,8 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer
SamplerCacheKey samplerKey = GetFramebufferSamplingParams(framebuffer->bufferWidth, framebuffer->bufferHeight);
ApplySamplingParams(samplerKey);
// Since we started/ended render passes, might need these.
gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE);
// Since we've drawn using thin3d, might need these.
gstate_c.Dirty(DIRTY_ALL_RENDER_STATE);
}
// Applies depal to a normal (non-framebuffer) texture, pre-decoded to CLUT8 format.
@ -2281,8 +2289,8 @@ void TextureCacheCommon::ApplyTextureDepal(TexCacheEntry *entry) {
SamplerCacheKey samplerKey = GetFramebufferSamplingParams(texWidth, texHeight);
ApplySamplingParams(samplerKey);
// Since we started/ended render passes, might need these.
gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE);
// Since we've drawn using thin3d, might need these.
gstate_c.Dirty(DIRTY_ALL_RENDER_STATE);
}
void TextureCacheCommon::Clear(bool delete_them) {
@ -2630,14 +2638,32 @@ bool TextureCacheCommon::PrepareBuildTexture(BuildTexturePlan &plan, TexCacheEnt
}
}
if (isPPGETexture) {
plan.replaced = &replacer_.FindNone();
plan.replaceValid = false;
bool canReplace = !isPPGETexture;
if (entry->status & TexCacheEntry::TexStatus::STATUS_CLUT_GPU) {
_dbg_assert_(entry->format == GE_TFMT_CLUT4 || entry->format == GE_TFMT_CLUT8);
plan.decodeToClut8 = true;
// We only support 1 mip level when doing CLUT on GPU for now.
// Supporting more would be possible, just not very interesting until we need it.
plan.levelsToCreate = 1;
plan.levelsToLoad = 1;
plan.maxPossibleLevels = 1;
plan.scaleFactor = 1;
plan.saveTexture = false; // Can't yet save these properly.
canReplace = false;
} else {
plan.decodeToClut8 = false;
}
if (canReplace) {
plan.replaced = &FindReplacement(entry, plan.w, plan.h, plan.depth);
plan.replaceValid = plan.replaced->Valid();
} else {
plan.replaced = &replacer_.FindNone();
plan.replaceValid = false;
}
// NOTE! Last chance to change scale factor here!
plan.saveTexture = false;
if (plan.replaceValid) {
// We're replacing, so we won't scale.
@ -2648,7 +2674,7 @@ bool TextureCacheCommon::PrepareBuildTexture(BuildTexturePlan &plan, TexCacheEnt
// But, we still need to create the texture at a larger size.
plan.replaced->GetSize(0, plan.createW, plan.createH);
} else {
if (replacer_.Enabled() && !plan.replaceValid && plan.depth == 1) {
if (replacer_.Enabled() && !plan.replaceValid && plan.depth == 1 && canReplace) {
ReplacedTextureDecodeInfo replacedInfo;
// TODO: Do we handle the race where a replacement becomes valid AFTER this but before we save?
replacedInfo.cachekey = entry->CacheKey();
@ -2673,27 +2699,12 @@ bool TextureCacheCommon::PrepareBuildTexture(BuildTexturePlan &plan, TexCacheEnt
plan.levelsToLoad = 1;
}
if (plan.isVideo || plan.depth != 1) {
if (plan.isVideo || plan.depth != 1 || plan.decodeToClut8) {
plan.maxPossibleLevels = 1;
} else {
plan.maxPossibleLevels = log2i(std::min(plan.createW, plan.createH)) + 1;
}
if (entry->status & TexCacheEntry::TexStatus::STATUS_CLUT_GPU) {
_dbg_assert_(entry->format == GE_TFMT_CLUT4 || entry->format == GE_TFMT_CLUT8);
plan.decodeToClut8 = true;
// We only support 1 mip level when doing CLUT on GPU for now.
// Supporting more would be possible, just not very interesting until we need it.
plan.levelsToCreate = 1;
plan.levelsToLoad = 1;
plan.maxPossibleLevels = 1;
plan.scaleFactor = 1;
plan.saveTexture = false; // Can't yet save these properly.
// TODO: Also forcibly disable replacement, or check that the replacement is a 8-bit paletted texture.
} else {
plan.decodeToClut8 = false;
}
if (plan.levelsToCreate == 1) {
entry->status |= TexCacheEntry::STATUS_NO_MIPS;
} else {

View File

@ -1128,7 +1128,10 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
}
} else {
if (hasTexcoord) {
WRITE(p, " %sv_texcoord = vec3(texcoord.xy * u_uvscaleoffset.xy + u_uvscaleoffset.zw, 0.0);\n", compat.vsOutPrefix);
if (doBezier || doSpline)
WRITE(p, " %sv_texcoord = vec3(tess.tex.xy * u_uvscaleoffset.xy + u_uvscaleoffset.zw, 0.0);\n", compat.vsOutPrefix);
else
WRITE(p, " %sv_texcoord = vec3(texcoord.xy * u_uvscaleoffset.xy + u_uvscaleoffset.zw, 0.0);\n", compat.vsOutPrefix);
} else {
WRITE(p, " %sv_texcoord = vec3(u_uvscaleoffset.zw, 0.0);\n", compat.vsOutPrefix);
}
@ -1140,26 +1143,36 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
std::string temp_tc;
switch (uvProjMode) {
case GE_PROJMAP_POSITION: // Use model space XYZ as source
temp_tc = "vec4(position, 1.0)";
if (doBezier || doSpline)
temp_tc = "vec4(tess.pos, 1.0)";
else
temp_tc = "vec4(position, 1.0)";
break;
case GE_PROJMAP_UV: // Use unscaled UV as source
{
// prescale is false here.
if (hasTexcoord) {
temp_tc = "vec4(texcoord.xy, 0.0, 1.0)";
if (doBezier || doSpline)
temp_tc = "vec4(tess.tex.xy, 0.0, 1.0)";
else
temp_tc = "vec4(texcoord.xy, 0.0, 1.0)";
} else {
temp_tc = "vec4(0.0, 0.0, 0.0, 1.0)";
}
}
break;
case GE_PROJMAP_NORMALIZED_NORMAL: // Use normalized transformed normal as source
if (hasNormal)
if ((doBezier || doSpline) && hasNormalTess)
temp_tc = StringFromFormat("length(tess.nrm) == 0.0 ? vec4(0.0, 0.0, 1.0, 1.0) : vec4(normalize(%stess.nrm), 1.0)", flipNormalTess ? "-" : "");
else if (hasNormal)
temp_tc = StringFromFormat("length(normal) == 0.0 ? vec4(0.0, 0.0, 1.0, 1.0) : vec4(normalize(%snormal), 1.0)", flipNormal ? "-" : "");
else
temp_tc = "vec4(0.0, 0.0, 1.0, 1.0)";
break;
case GE_PROJMAP_NORMAL: // Use non-normalized transformed normal as source
if (hasNormal)
if ((doBezier || doSpline) && hasNormalTess)
temp_tc = flipNormalTess ? "vec4(-tess.nrm, 1.0)" : "vec4(tess.nrm, 1.0)";
else if (hasNormal)
temp_tc = flipNormal ? "vec4(-normal, 1.0)" : "vec4(normal, 1.0)";
else
temp_tc = "vec4(0.0, 0.0, 1.0, 1.0)";
@ -1189,37 +1202,34 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
WRITE(p, " %sv_fogdepth = (viewPos.z + u_fogcoef.x) * u_fogcoef.y;\n", compat.vsOutPrefix);
}
if (clipClampedDepth || (vertexRangeCulling && !IsVRBuild())) {
WRITE(p, " vec3 projPos = outPos.xyz / outPos.w;\n");
}
if (clipClampedDepth) {
const char *clip0 = compat.shaderLanguage == HLSL_D3D11 ? ".x" : "[0]";
const char *clip1 = compat.shaderLanguage == HLSL_D3D11 ? ".y" : "[1]";
WRITE(p, " mediump float integerZ = projPos.z * u_depthRange.x + u_depthRange.y;\n");
// This should clip against minz, but only when it's above zero.
if (ShaderLanguageIsOpenGL(compat.shaderLanguage)) {
// On OpenGL/GLES, these values account for the -1 -> 1 range.
WRITE(p, " if (u_depthRange.y - u_depthRange.x >= 1.0) {\n");
WRITE(p, " %sgl_ClipDistance%s = outPos.w + outPos.z;\n", compat.vsOutPrefix, clip0);
} else {
// Everywhere else, it's 0 -> 1, simpler.
WRITE(p, " if (u_depthRange.y >= 1.0) {\n");
WRITE(p, " %sgl_ClipDistance%s = outPos.z;\n", compat.vsOutPrefix, clip0);
}
WRITE(p, " %sgl_ClipDistance%s = integerZ;\n", compat.vsOutPrefix, clip0);
WRITE(p, " } else {\n");
WRITE(p, " %sgl_ClipDistance%s = 0.0;\n", compat.vsOutPrefix, clip0);
WRITE(p, " }\n");
// This is similar, but for maxz when it's below 65535.0. -1/0 don't matter here.
WRITE(p, " if (u_depthRange.x + u_depthRange.y <= 65534.0) {\n");
WRITE(p, " %sgl_ClipDistance%s = 65535.0 - integerZ;\n", compat.vsOutPrefix, clip1);
WRITE(p, " %sgl_ClipDistance%s = outPos.w - outPos.z;\n", compat.vsOutPrefix, clip1);
WRITE(p, " } else {\n");
WRITE(p, " %sgl_ClipDistance%s = 0.0;\n", compat.vsOutPrefix, clip1);
WRITE(p, " }\n");
}
if (vertexRangeCulling && !IsVRBuild()) {
WRITE(p, " vec3 projPos = outPos.xyz / outPos.w;\n");
WRITE(p, " float projZ = (projPos.z - u_depthRange.z) * u_depthRange.w;\n");
// Vertex range culling doesn't happen when Z clips, note sign of w is important.
WRITE(p, " if (u_cullRangeMin.w <= 0.0 || projZ * outPos.w > -outPos.w) {\n");

View File

@ -500,7 +500,8 @@ void TessellationDataTransferGLES::SendDataToShader(const SimpleVertex *const *p
prevSizeU = size_u;
prevSizeV = size_v;
if (!data_tex[0])
data_tex[0] = renderManager_->CreateTexture(GL_TEXTURE_2D, size_u * 3, size_v, 1, 1);
renderManager_->DeleteTexture(data_tex[0]);
data_tex[0] = renderManager_->CreateTexture(GL_TEXTURE_2D, size_u * 3, size_v, 1, 1);
renderManager_->TextureImage(data_tex[0], 0, size_u * 3, size_v, 1, Draw::DataFormat::R32G32B32A32_FLOAT, nullptr, GLRAllocType::NONE, false);
renderManager_->FinalizeTexture(data_tex[0], 0, false);
}
@ -518,7 +519,8 @@ void TessellationDataTransferGLES::SendDataToShader(const SimpleVertex *const *p
if (prevSizeWU < weights.size_u) {
prevSizeWU = weights.size_u;
if (!data_tex[1])
data_tex[1] = renderManager_->CreateTexture(GL_TEXTURE_2D, weights.size_u * 2, 1, 1, 1);
renderManager_->DeleteTexture(data_tex[1]);
data_tex[1] = renderManager_->CreateTexture(GL_TEXTURE_2D, weights.size_u * 2, 1, 1, 1);
renderManager_->TextureImage(data_tex[1], 0, weights.size_u * 2, 1, 1, Draw::DataFormat::R32G32B32A32_FLOAT, nullptr, GLRAllocType::NONE, false);
renderManager_->FinalizeTexture(data_tex[1], 0, false);
}
@ -529,7 +531,8 @@ void TessellationDataTransferGLES::SendDataToShader(const SimpleVertex *const *p
if (prevSizeWV < weights.size_v) {
prevSizeWV = weights.size_v;
if (!data_tex[2])
data_tex[2] = renderManager_->CreateTexture(GL_TEXTURE_2D, weights.size_v * 2, 1, 1, 1);
renderManager_->DeleteTexture(data_tex[2]);
data_tex[2] = renderManager_->CreateTexture(GL_TEXTURE_2D, weights.size_v * 2, 1, 1, 1);
renderManager_->TextureImage(data_tex[2], 0, weights.size_v * 2, 1, 1, Draw::DataFormat::R32G32B32A32_FLOAT, nullptr, GLRAllocType::NONE, false);
renderManager_->FinalizeTexture(data_tex[2], 0, false);
}

View File

@ -1942,7 +1942,8 @@ void GPUCommon::Execute_Bezier(u32 op, u32 diff) {
}
// Can't flush after setting gstate_c.submitType below since it'll be a mess - it must be done already.
drawEngineCommon_->DispatchFlush();
if (flushOnParams_)
drawEngineCommon_->DispatchFlush();
Spline::BezierSurface surface;
surface.tess_u = gstate.getPatchDivisionU();
@ -2014,7 +2015,8 @@ void GPUCommon::Execute_Spline(u32 op, u32 diff) {
}
// Can't flush after setting gstate_c.submitType below since it'll be a mess - it must be done already.
drawEngineCommon_->DispatchFlush();
if (flushOnParams_)
drawEngineCommon_->DispatchFlush();
Spline::SplineSurface surface;
surface.tess_u = gstate.getPatchDivisionU();

View File

@ -469,12 +469,12 @@ struct UVScale {
// Might want to move this mechanism into the backend later.
enum {
GPU_SUPPORTS_DUALSOURCE_BLEND = FLAG_BIT(0),
// Free bit: 1
GPU_SUPPORTS_GLSL_330 = FLAG_BIT(2),
// Free bits: 1-2
GPU_SUPPORTS_VS_RANGE_CULLING = FLAG_BIT(3),
GPU_SUPPORTS_BLEND_MINMAX = FLAG_BIT(4),
GPU_SUPPORTS_LOGIC_OP = FLAG_BIT(5),
GPU_USE_DEPTH_RANGE_HACK = FLAG_BIT(6),
// Free bit: 7
GPU_SUPPORTS_ANISOTROPY = FLAG_BIT(8),
GPU_USE_CLEAR_RAM_HACK = FLAG_BIT(9),
GPU_SUPPORTS_INSTANCE_RENDERING = FLAG_BIT(10),
@ -485,8 +485,7 @@ enum {
// Free bit: 15
GPU_SUPPORTS_DEPTH_TEXTURE = FLAG_BIT(16),
GPU_SUPPORTS_ACCURATE_DEPTH = FLAG_BIT(17),
GPU_SUPPORTS_FRAGMENT_SHADER_INTERLOCK = FLAG_BIT(18),
// Free bits: 19
// Free bits: 18-19
GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH = FLAG_BIT(20),
GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT = FLAG_BIT(21),
GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT = FLAG_BIT(22),

View File

@ -197,16 +197,16 @@ void BinManager::UpdateState(bool throughMode) {
Flush("tex");
// Okay, now update what's pending.
constexpr uint32_t mirrorMask = 0x0FFFFFFF & ~0x00600000;
const uint32_t bpp = state.pixelID.FBFormat() == GE_FORMAT_8888 ? 4 : 2;
pendingWrites_[0].Expand(gstate.getFrameBufAddress() & mirrorMask, bpp, gstate.FrameBufStride(), scissorTL, scissorBR);
if (state.pixelID.depthWrite)
pendingWrites_[1].Expand(gstate.getDepthBufAddress() & mirrorMask, 2, gstate.DepthBufStride(), scissorTL, scissorBR);
MarkPendingWrites(state);
ClearDirty(SoftDirty::BINNER_RANGE);
} else if (pendingOverlap_) {
if (HasTextureWrite(state))
if (HasTextureWrite(state)) {
Flush("tex");
// We need the pending writes set, which flushing cleared. Set them again.
MarkPendingWrites(state);
}
}
if (HasDirty(SoftDirty::BINNER_OVERLAP)) {
@ -282,6 +282,17 @@ void BinManager::MarkPendingReads(const Rasterizer::RasterizerState &state) {
}
}
void BinManager::MarkPendingWrites(const Rasterizer::RasterizerState &state) {
DrawingCoords scissorTL(gstate.getScissorX1(), gstate.getScissorY1());
DrawingCoords scissorBR(std::min(gstate.getScissorX2(), gstate.getRegionX2()), std::min(gstate.getScissorY2(), gstate.getRegionY2()));
constexpr uint32_t mirrorMask = 0x0FFFFFFF & ~0x00600000;
const uint32_t bpp = state.pixelID.FBFormat() == GE_FORMAT_8888 ? 4 : 2;
pendingWrites_[0].Expand(gstate.getFrameBufAddress() & mirrorMask, bpp, gstate.FrameBufStride(), scissorTL, scissorBR);
if (state.pixelID.depthWrite)
pendingWrites_[1].Expand(gstate.getDepthBufAddress() & mirrorMask, 2, gstate.DepthBufStride(), scissorTL, scissorBR);
}
inline void BinDirtyRange::Expand(uint32_t newBase, uint32_t bpp, uint32_t stride, DrawingCoords &tl, DrawingCoords &br) {
const uint32_t w = br.x - tl.x + 1;
const uint32_t h = br.y - tl.y + 1;

View File

@ -267,6 +267,7 @@ private:
int mostThreads_ = 0;
void MarkPendingReads(const Rasterizer::RasterizerState &state);
void MarkPendingWrites(const Rasterizer::RasterizerState &state);
bool HasTextureWrite(const Rasterizer::RasterizerState &state);
BinCoords Scissor(BinCoords range);
BinCoords Range(const VertexData &v0, const VertexData &v1, const VertexData &v2);

View File

@ -1136,13 +1136,20 @@ void DrawPoint(const VertexData &v0, const BinCoords &range, const RasterizerSta
}
void ClearRectangle(const VertexData &v0, const VertexData &v1, const BinCoords &range, const RasterizerState &state) {
DrawingCoords pprime = TransformUnit::ScreenToDrawing(range.x1, range.y1);
DrawingCoords pend = TransformUnit::ScreenToDrawing(range.x2, range.y2);
int entireX1 = std::min(v0.screenpos.x, v1.screenpos.x);
int entireY1 = std::min(v0.screenpos.y, v1.screenpos.y);
int entireX2 = std::max(v0.screenpos.x, v1.screenpos.x) - 1;
int entireY2 = std::max(v0.screenpos.y, v1.screenpos.y) - 1;
int minX = std::max(entireX1, range.x1) | (SCREEN_SCALE_FACTOR / 2 - 1);
int minY = std::max(entireY1, range.y1) | (SCREEN_SCALE_FACTOR / 2 - 1);
int maxX = std::min(entireX2, range.x2);
int maxY = std::min(entireY2, range.y2);
const DrawingCoords pprime = TransformUnit::ScreenToDrawing(minX, minY);
const DrawingCoords pend = TransformUnit::ScreenToDrawing(maxX, maxY);
auto &pixelID = state.pixelID;
auto &samplerID = state.samplerID;
// Min and max are in PSP fixed point screen coordinates, 16 here is for the 4 subpixel bits.
const int w = (range.x2 - range.x1 + 1) / SCREEN_SCALE_FACTOR;
const int w = pend.x - pprime.x + 1;
if (w <= 0)
return;

View File

@ -93,7 +93,10 @@ static inline bool AlphaTestIsNeedless(const PixelFuncID &pixelID) {
case GE_COMP_NOTEQUAL:
case GE_COMP_GREATER:
case GE_COMP_GEQUAL:
return pixelID.alphaBlend && pixelID.alphaTestRef == 0 && !pixelID.hasAlphaTestMask;
if (pixelID.alphaTestRef != 0 || pixelID.hasAlphaTestMask)
return false;
// DrawSinglePixel5551 assumes it can take the src color directly if full alpha.
return pixelID.alphaBlend && pixelID.AlphaBlendSrc() == PixelBlendFactor::SRCALPHA && pixelID.AlphaBlendDst() == PixelBlendFactor::INVSRCALPHA;
}
return false;

View File

@ -490,14 +490,16 @@ void SoftGPU::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat for
DSStretch g_DarkStalkerStretch;
void SoftGPU::ConvertTextureDescFrom16(Draw::TextureDesc &desc, int srcwidth, int srcheight, u8 *overrideData) {
void SoftGPU::ConvertTextureDescFrom16(Draw::TextureDesc &desc, int srcwidth, int srcheight, const uint16_t *overrideData) {
// TODO: This should probably be converted in a shader instead..
fbTexBuffer_.resize(srcwidth * srcheight);
FormatBuffer displayBuffer;
displayBuffer.data = overrideData ? overrideData : Memory::GetPointerWrite(displayFramebuf_);
const uint16_t *displayBuffer = overrideData;
if (!displayBuffer)
displayBuffer = (const uint16_t *)Memory::GetPointer(displayFramebuf_);
for (int y = 0; y < srcheight; ++y) {
u32 *buf_line = &fbTexBuffer_[y * srcwidth];
const u16 *fb_line = &displayBuffer.as16[y * displayStride_];
const u16 *fb_line = &displayBuffer[y * displayStride_];
switch (displayFormat_) {
case GE_FORMAT_565:
@ -557,7 +559,7 @@ void SoftGPU::CopyToCurrentFboFromDisplayRam(int srcwidth, int srcheight) {
bool hasPostShader = presentation_ && presentation_->HasPostShader();
if (PSP_CoreParameter().compat.flags().DarkStalkersPresentHack && displayFormat_ == GE_FORMAT_5551 && g_DarkStalkerStretch != DSStretch::Off) {
u8 *data = Memory::GetPointerWrite(0x04088000);
const u8 *data = Memory::GetPointerWrite(0x04088000);
bool fillDesc = true;
if (draw_->GetDataFormatSupport(Draw::DataFormat::A1B5G5R5_UNORM_PACK16) & Draw::FMT_TEXTURE) {
// The perfect one.
@ -567,7 +569,7 @@ void SoftGPU::CopyToCurrentFboFromDisplayRam(int srcwidth, int srcheight) {
desc.format = Draw::DataFormat::A1R5G5B5_UNORM_PACK16;
outputFlags |= OutputFlags::RB_SWIZZLE;
} else {
ConvertTextureDescFrom16(desc, srcwidth, srcheight, data);
ConvertTextureDescFrom16(desc, srcwidth, srcheight, (const uint16_t *)data);
fillDesc = false;
}
if (fillDesc) {
@ -586,13 +588,13 @@ void SoftGPU::CopyToCurrentFboFromDisplayRam(int srcwidth, int srcheight) {
hasImage = false;
u1 = 1.0f;
} else if (displayFormat_ == GE_FORMAT_8888) {
u8 *data = Memory::GetPointerWrite(displayFramebuf_);
const u8 *data = Memory::GetPointer(displayFramebuf_);
desc.width = displayStride_ == 0 ? srcwidth : displayStride_;
desc.height = srcheight;
desc.initData.push_back(data);
desc.format = Draw::DataFormat::R8G8B8A8_UNORM;
} else if (displayFormat_ == GE_FORMAT_5551) {
const u8 *data = Memory::GetPointerWrite(displayFramebuf_);
const u8 *data = Memory::GetPointer(displayFramebuf_);
bool fillDesc = true;
if (draw_->GetDataFormatSupport(Draw::DataFormat::A1B5G5R5_UNORM_PACK16) & Draw::FMT_TEXTURE) {
// The perfect one.
@ -1247,18 +1249,19 @@ bool SoftGPU::GetCurrentFramebuffer(GPUDebugBuffer &buffer, GPUDebugFramebufferT
int stride = gstate.FrameBufStride();
DrawingCoords size = GetTargetSize(stride);
GEBufferFormat fmt = gstate.FrameBufFormat();
const u8 *src = fb.data;
if (type == GPU_DBG_FRAMEBUF_DISPLAY) {
size.x = 480;
size.y = 272;
stride = displayStride_;
fmt = displayFormat_;
src = Memory::GetPointer(displayFramebuf_);
}
buffer.Allocate(size.x, size.y, fmt);
const int depth = fmt == GE_FORMAT_8888 ? 4 : 2;
const u8 *src = fb.data;
u8 *dst = buffer.GetData();
const int byteWidth = size.x * depth;
for (int16_t y = 0; y < size.y; ++y) {

View File

@ -64,17 +64,17 @@ enum class SoftDirty : uint64_t {
PIXEL_DITHER = 1ULL << 3,
PIXEL_WRITEMASK = 1ULL << 4,
PIXEL_CACHED = 1ULL << 5,
PIXEL_ALL = 63ULL << 0,
PIXEL_ALL = 0b111111ULL << 0,
SAMPLER_BASIC = 1ULL << 6,
SAMPLER_TEXLIST = 1ULL << 7,
SAMPLER_CLUT = 1ULL << 8,
SAMPLER_ALL = 7ULL << 6,
SAMPLER_ALL = 0b111ULL << 6,
RAST_BASIC = 1ULL << 9,
RAST_TEX = 1ULL << 10,
RAST_OFFSET = 1ULL << 11,
RAST_ALL = 7ULL << 9,
RAST_ALL = 0b111ULL << 9,
LIGHT_BASIC = 1ULL << 12,
LIGHT_MATERIAL = 1ULL << 13,
@ -82,13 +82,13 @@ enum class SoftDirty : uint64_t {
LIGHT_1 = 1ULL << 15,
LIGHT_2 = 1ULL << 16,
LIGHT_3 = 1ULL << 17,
LIGHT_ALL = 63ULL << 12,
LIGHT_ALL = 0b111111ULL << 12,
TRANSFORM_BASIC = 1ULL << 18,
TRANSFORM_MATRIX = 1ULL << 19,
TRANSFORM_VIEWPORT = 1ULL << 20,
TRANSFORM_FOG = 1ULL << 21,
TRANSFORM_ALL = 31ULL << 18,
TRANSFORM_ALL = 0b1111ULL << 18,
BINNER_RANGE = 1ULL << 22,
BINNER_OVERLAP = 1ULL << 23,
@ -194,7 +194,7 @@ public:
protected:
void FastRunLoop(DisplayList &list) override;
void CopyToCurrentFboFromDisplayRam(int srcwidth, int srcheight);
void ConvertTextureDescFrom16(Draw::TextureDesc &desc, int srcwidth, int srcheight, u8 *overrideData = nullptr);
void ConvertTextureDescFrom16(Draw::TextureDesc &desc, int srcwidth, int srcheight, const uint16_t *overrideData = nullptr);
private:
void MarkDirty(uint32_t addr, uint32_t stride, uint32_t height, GEBufferFormat fmt, SoftGPUVRAMDirty value);

View File

@ -54,6 +54,7 @@ SoftwareDrawEngine::SoftwareDrawEngine() {
// All this is a LOT of memory, need to see if we can cut down somehow. Used for splines.
decoded = (u8 *)AllocateMemoryPages(DECODED_VERTEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
decIndex = (u16 *)AllocateMemoryPages(DECODED_INDEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
flushOnParams_ = false;
}
SoftwareDrawEngine::~SoftwareDrawEngine() {

View File

@ -170,8 +170,8 @@ static std::string CutFromMain(std::string str) {
}
static VulkanPipeline *CreateVulkanPipeline(VulkanRenderManager *renderManager, VkPipelineCache pipelineCache,
VkPipelineLayout layout, PipelineFlags pipelineFlags, const VulkanPipelineRasterStateKey &key,
const DecVtxFormat *decFmt, VulkanVertexShader *vs, VulkanFragmentShader *fs, bool useHwTransform, u32 variantBitmask) {
VkPipelineLayout layout, PipelineFlags pipelineFlags, const VulkanPipelineRasterStateKey &key,
const DecVtxFormat *decFmt, VulkanVertexShader *vs, VulkanFragmentShader *fs, bool useHwTransform, u32 variantBitmask) {
VulkanPipeline *vulkanPipeline = new VulkanPipeline();
VKRGraphicsPipelineDesc *desc = &vulkanPipeline->desc;
desc->pipelineCache = pipelineCache;
@ -221,7 +221,7 @@ static VulkanPipeline *CreateVulkanPipeline(VulkanRenderManager *renderManager,
VkDynamicState *dynamicStates = &desc->dynamicStates[0];
int numDyn = 0;
if (key.blendEnable &&
(UsesBlendConstant(key.srcAlpha) || UsesBlendConstant(key.srcColor) || UsesBlendConstant(key.destAlpha) || UsesBlendConstant(key.destColor))) {
(UsesBlendConstant(key.srcAlpha) || UsesBlendConstant(key.srcColor) || UsesBlendConstant(key.destAlpha) || UsesBlendConstant(key.destColor))) {
dynamicStates[numDyn++] = VK_DYNAMIC_STATE_BLEND_CONSTANTS;
useBlendConstant = true;
}
@ -232,12 +232,12 @@ static VulkanPipeline *CreateVulkanPipeline(VulkanRenderManager *renderManager,
dynamicStates[numDyn++] = VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK;
dynamicStates[numDyn++] = VK_DYNAMIC_STATE_STENCIL_REFERENCE;
}
VkPipelineDynamicStateCreateInfo &ds = desc->ds;
ds.flags = 0;
ds.pDynamicStates = dynamicStates;
ds.dynamicStateCount = numDyn;
VkPipelineRasterizationStateCreateInfo &rs = desc->rs;
rs.flags = 0;
rs.depthBiasEnable = false;
@ -299,10 +299,9 @@ static VulkanPipeline *CreateVulkanPipeline(VulkanRenderManager *renderManager,
VKRGraphicsPipeline *pipeline = renderManager->CreateGraphicsPipeline(desc, variantBitmask, "game");
vulkanPipeline->pipeline = pipeline;
if (useBlendConstant)
if (useBlendConstant) {
pipelineFlags |= PipelineFlags::USES_BLEND_CONSTANT;
if (key.topology == VK_PRIMITIVE_TOPOLOGY_LINE_LIST || key.topology == VK_PRIMITIVE_TOPOLOGY_LINE_STRIP)
pipelineFlags |= PipelineFlags::USES_LINES;
}
if (dss.depthTestEnable || dss.stencilTestEnable) {
pipelineFlags |= PipelineFlags::USES_DEPTH_STENCIL;
}

View File

@ -58,7 +58,6 @@ struct VulkanPipeline {
PipelineFlags pipelineFlags; // PipelineFlags enum above.
bool UsesBlendConstant() const { return (pipelineFlags & PipelineFlags::USES_BLEND_CONSTANT) != 0; }
bool UsesLines() const { return (pipelineFlags & PipelineFlags::USES_LINES) != 0; }
bool UsesDepthStencil() const { return (pipelineFlags & PipelineFlags::USES_DEPTH_STENCIL) != 0; }
bool UsesInputAttachment() const { return (pipelineFlags & PipelineFlags::USES_INPUT_ATTACHMENT) != 0; }

View File

@ -134,7 +134,7 @@ private:
int textureLevel_ = 0;
bool showClut_ = false;
bool forceOpaque_ = false;
bool autoFlush_ = false;
bool autoFlush_ = true;
// The most recent primary/framebuffer and texture buffers.
const GPUDebugBuffer *primaryBuffer_ = nullptr;
const GPUDebugBuffer *secondBuffer_ = nullptr;

View File

@ -520,6 +520,14 @@ ULES01086 = true
# LEGO Batman: The Videogame
ULUS10380 = true
ULES01151 = true
# Burnout Dominator
ULUS10236 = true
ULES00750 = true
ULJM05242 = true
ULJM05371 = true
NPJH50304 = true
ULES00703 = true
# TODO: There are many more.
[RequireBlockTransfer]
@ -1278,16 +1286,3 @@ ULJM05738 = true
[AllowDownloadCLUT]
# Temporary compatibility option, while working on the GPU CLUT-from-framebuffer path.
# Not required for any games now that it works, but might be useful for development.
[UploadDepthForCLUTTextures]
# Burnout Dominator - lens flare effect (issue #11100)
# We need a preinitialized depth buffer
ULUS10236 = true
ULES00703 = true
# Need for Speed - Shift (same as Burnout Dominator)
ULUS10462 = true
ULES01275 = true
ULJM05494 = true
NPJH50143 = true
ULJM05738 = true

View File

@ -46,7 +46,7 @@ void HeadlessHost::SendDebugScreenshot(const u8 *pixbuf, u32 w, u32 h) {
const static u32 FRAME_HEIGHT = 272;
GPUDebugBuffer buffer;
gpuDebug->GetCurrentFramebuffer(buffer, GPU_DBG_FRAMEBUF_RENDER);
gpuDebug->GetCurrentFramebuffer(buffer, GPU_DBG_FRAMEBUF_DISPLAY);
const std::vector<u32> pixels = TranslateDebugBufferToCompare(&buffer, 512, 272);
ScreenshotComparer comparer(pixels, FRAME_STRIDE, FRAME_WIDTH, FRAME_HEIGHT);

View File

@ -147,8 +147,10 @@ tests_good = [
"gpu/commands/blend",
"gpu/commands/blend565",
"gpu/commands/blocktransfer",
"gpu/commands/cull",
"gpu/commands/fog",
"gpu/commands/material",
"gpu/complex/complex",
"gpu/displaylist/alignment",
"gpu/dither/dither",
"gpu/filtering/mipmaplinear",
@ -159,6 +161,7 @@ tests_good = [
"gpu/ge/queue",
"gpu/primitives/indices",
"gpu/primitives/invalidprim",
"gpu/primitives/points",
"gpu/primitives/trianglefan",
"gpu/primitives/trianglestrip",
"gpu/primitives/triangles",
@ -181,6 +184,7 @@ tests_good = [
"gpu/texfunc/replace",
"gpu/textures/mipmap",
"gpu/textures/rotate",
"gpu/vertices/colors",
"hash/hash",
"hle/check_not_used_uids",
"intr/intr",
@ -387,9 +391,7 @@ tests_next = [
"font/shadowglyphimageclip",
"font/shadowinfo",
"gpu/clipping/guardband",
"gpu/commands/cull",
"gpu/commands/light",
"gpu/complex/complex",
"gpu/depth/precision",
"gpu/displaylist/state",
"gpu/filtering/linear",
@ -404,7 +406,6 @@ tests_next = [
"gpu/primitives/immediate",
"gpu/primitives/lines",
"gpu/primitives/linestrip",
"gpu/primitives/points",
"gpu/primitives/rectangles",
"gpu/primitives/spline",
"gpu/reflection/reflection",
@ -415,7 +416,6 @@ tests_next = [
"gpu/simple/simple",
"gpu/textures/size",
"gpu/triangle/triangle",
"gpu/vertices/colors",
"gpu/vertices/texcoords",
"intr/registersub",
"intr/releasesub",