TextureCache: Implement deferred/batched EFB copies

This commit is contained in:
Stenzek 2018-11-03 00:17:00 +10:00
parent 710b893b91
commit 8e2c063d62
29 changed files with 314 additions and 141 deletions

View File

@ -139,6 +139,7 @@ const ConfigInfo<bool> GFX_HACK_SKIP_XFB_COPY_TO_RAM{{System::GFX, "Hacks", "XFB
true};
const ConfigInfo<bool> GFX_HACK_DISABLE_COPY_TO_VRAM{{System::GFX, "Hacks", "DisableCopyToVRAM"},
false};
const ConfigInfo<bool> GFX_HACK_DEFER_EFB_COPIES{{System::GFX, "Hacks", "DeferEFBCopies"}, true};
const ConfigInfo<bool> GFX_HACK_IMMEDIATE_XFB{{System::GFX, "Hacks", "ImmediateXFBEnable"}, false};
const ConfigInfo<bool> GFX_HACK_COPY_EFB_SCALED{{System::GFX, "Hacks", "EFBScaledCopy"}, true};
const ConfigInfo<bool> GFX_HACK_EFB_EMULATE_FORMAT_CHANGES{

View File

@ -107,6 +107,7 @@ extern const ConfigInfo<bool> GFX_HACK_FORCE_PROGRESSIVE;
extern const ConfigInfo<bool> GFX_HACK_SKIP_EFB_COPY_TO_RAM;
extern const ConfigInfo<bool> GFX_HACK_SKIP_XFB_COPY_TO_RAM;
extern const ConfigInfo<bool> GFX_HACK_DISABLE_COPY_TO_VRAM;
extern const ConfigInfo<bool> GFX_HACK_DEFER_EFB_COPIES;
extern const ConfigInfo<bool> GFX_HACK_IMMEDIATE_XFB;
extern const ConfigInfo<bool> GFX_HACK_COPY_EFB_SCALED;
extern const ConfigInfo<bool> GFX_HACK_EFB_EMULATE_FORMAT_CHANGES;

View File

@ -114,6 +114,7 @@ bool IsSettingSaveable(const Config::ConfigLocation& config_location)
Config::GFX_HACK_SKIP_EFB_COPY_TO_RAM.location,
Config::GFX_HACK_SKIP_XFB_COPY_TO_RAM.location,
Config::GFX_HACK_DISABLE_COPY_TO_VRAM.location,
Config::GFX_HACK_DEFER_EFB_COPIES.location,
Config::GFX_HACK_IMMEDIATE_XFB.location,
Config::GFX_HACK_COPY_EFB_SCALED.location,
Config::GFX_HACK_EFB_EMULATE_FORMAT_CHANGES.location,

View File

@ -46,10 +46,13 @@ void HacksWidget::CreateWidgets()
Config::GFX_HACK_EFB_EMULATE_FORMAT_CHANGES, true);
m_store_efb_copies = new GraphicsBool(tr("Store EFB Copies to Texture Only"),
Config::GFX_HACK_SKIP_EFB_COPY_TO_RAM);
m_defer_efb_copies =
new GraphicsBool(tr("Defer EFB Copies to RAM"), Config::GFX_HACK_DEFER_EFB_COPIES);
efb_layout->addWidget(m_skip_efb_cpu, 0, 0);
efb_layout->addWidget(m_ignore_format_changes, 0, 1);
efb_layout->addWidget(m_store_efb_copies, 1, 0);
efb_layout->addWidget(m_defer_efb_copies, 1, 1);
// Texture Cache
auto* texture_cache_box = new QGroupBox(tr("Texture Cache"));
@ -109,6 +112,8 @@ void HacksWidget::CreateWidgets()
main_layout->addStretch();
setLayout(main_layout);
UpdateDeferEFBCopiesEnabled();
}
void HacksWidget::OnBackendChanged(const QString& backend_name)
@ -129,6 +134,10 @@ void HacksWidget::OnBackendChanged(const QString& backend_name)
void HacksWidget::ConnectWidgets()
{
connect(m_accuracy, &QSlider::valueChanged, [this](int) { SaveSettings(); });
connect(m_store_efb_copies, &QCheckBox::stateChanged,
[this](int) { UpdateDeferEFBCopiesEnabled(); });
connect(m_store_xfb_copies, &QCheckBox::stateChanged,
[this](int) { UpdateDeferEFBCopiesEnabled(); });
}
void HacksWidget::LoadSettings()
@ -202,6 +211,11 @@ void HacksWidget::AddDescriptions()
"in a small number of games.\n\nEnabled = EFB Copies to Texture\nDisabled = EFB Copies to "
"RAM "
"(and Texture)\n\nIf unsure, leave this checked.");
static const char TR_DEFER_EFB_COPIES_DESCRIPTION[] = QT_TR_NOOP(
"Waits until the game synchronizes with the emulated GPU before writing the contents of EFB "
"copies to RAM. Reduces the overhead of EFB RAM copies, provides a performance boost in many "
"games, at the risk of breaking those which do not safely synchronize with the emulated "
"GPU.\n\nIf unsure, leave this checked.");
static const char TR_ACCUARCY_DESCRIPTION[] = QT_TR_NOOP(
"The \"Safe\" setting eliminates the likelihood of the GPU missing texture updates "
"from RAM.\nLower accuracies cause in-game text to appear garbled in certain "
@ -240,6 +254,7 @@ void HacksWidget::AddDescriptions()
AddDescription(m_skip_efb_cpu, TR_SKIP_EFB_CPU_ACCESS_DESCRIPTION);
AddDescription(m_ignore_format_changes, TR_IGNORE_FORMAT_CHANGE_DESCRIPTION);
AddDescription(m_store_efb_copies, TR_STORE_EFB_TO_TEXTURE_DESCRIPTION);
AddDescription(m_defer_efb_copies, TR_DEFER_EFB_COPIES_DESCRIPTION);
AddDescription(m_accuracy, TR_ACCUARCY_DESCRIPTION);
AddDescription(m_store_xfb_copies, TR_STORE_XFB_TO_TEXTURE_DESCRIPTION);
AddDescription(m_immediate_xfb, TR_IMMEDIATE_XFB_DESCRIPTION);
@ -248,3 +263,11 @@ void HacksWidget::AddDescriptions()
AddDescription(m_disable_bounding_box, TR_DISABLE_BOUNDINGBOX_DESCRIPTION);
AddDescription(m_vertex_rounding, TR_VERTEX_ROUNDING_DESCRIPTION);
}
void HacksWidget::UpdateDeferEFBCopiesEnabled()
{
// We disable the checkbox for defer EFB copies when both EFB and XFB copies to texture are
// enabled.
const bool can_defer = m_store_efb_copies->isChecked() && m_store_xfb_copies->isChecked();
m_defer_efb_copies->setEnabled(!can_defer);
}

View File

@ -42,8 +42,11 @@ private:
QCheckBox* m_fast_depth_calculation;
QCheckBox* m_disable_bounding_box;
QCheckBox* m_vertex_rounding;
QCheckBox* m_defer_efb_copies;
void CreateWidgets();
void ConnectWidgets();
void AddDescriptions();
void UpdateDeferEFBCopiesEnabled();
};

View File

@ -46,13 +46,8 @@ PSTextureEncoder::~PSTextureEncoder() = default;
void PSTextureEncoder::Init()
{
// TODO: Move this to a constant somewhere in common.
TextureConfig encoding_texture_config(EFB_WIDTH * 4, 1024, 1, 1, 1, AbstractTextureFormat::BGRA8,
true);
m_encoding_render_texture = g_renderer->CreateTexture(encoding_texture_config);
m_encoding_readback_texture =
g_renderer->CreateStagingTexture(StagingTextureType::Readback, encoding_texture_config);
ASSERT(m_encoding_render_texture && m_encoding_readback_texture);
m_encoding_render_texture = g_renderer->CreateTexture(TextureCache::GetEncodingTextureConfig());
ASSERT(m_encoding_render_texture);
// Create constant buffer for uploading data to shaders
D3D11_BUFFER_DESC bd = CD3D11_BUFFER_DESC(sizeof(EFBEncodeParams), D3D11_BIND_CONSTANT_BUFFER);
@ -71,9 +66,9 @@ void PSTextureEncoder::Shutdown()
}
void PSTextureEncoder::Encode(
u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, u32 num_blocks_y,
u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half, float y_scale, float gamma,
bool clamp_top, bool clamp_bottom,
AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half,
float y_scale, float gamma, bool clamp_top, bool clamp_bottom,
const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients)
{
// Resolve MSAA targets before copying.
@ -133,14 +128,7 @@ void PSTextureEncoder::Encode(
// Copy to staging buffer
MathUtil::Rectangle<int> copy_rect(0, 0, words_per_row, num_blocks_y);
m_encoding_readback_texture->CopyFromTexture(m_encoding_render_texture.get(), copy_rect, 0, 0,
copy_rect);
m_encoding_readback_texture->Flush();
if (m_encoding_readback_texture->Map())
{
m_encoding_readback_texture->ReadTexels(copy_rect, dst, memory_stride);
m_encoding_readback_texture->Unmap();
}
dst->CopyFromTexture(m_encoding_render_texture.get(), copy_rect, 0, 0, copy_rect);
}
g_renderer->RestoreAPIState();

View File

@ -38,9 +38,9 @@ public:
void Init();
void Shutdown();
void Encode(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half,
float y_scale, float gamma, bool clamp_top, bool clamp_bottom,
void Encode(AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom,
const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients);
private:
@ -48,7 +48,6 @@ private:
ID3D11Buffer* m_encode_params = nullptr;
std::unique_ptr<AbstractTexture> m_encoding_render_texture;
std::unique_ptr<AbstractStagingTexture> m_encoding_readback_texture;
std::map<EFBCopyParams, ID3D11PixelShader*> m_encoding_shaders;
};
}

View File

@ -31,8 +31,8 @@ namespace DX11
{
static std::unique_ptr<PSTextureEncoder> g_encoder;
void TextureCache::CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
void TextureCache::CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params,
u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
const EFBRectangle& src_rect, bool scale_by_half, float y_scale,
float gamma, bool clamp_top, bool clamp_bottom,
const CopyFilterCoefficientArray& filter_coefficients)

View File

@ -22,18 +22,11 @@ public:
~TextureCache();
private:
u64 EncodeToRamFromTexture(u32 address, void* source_texture, u32 SourceW, u32 SourceH,
bool bFromZBuffer, bool bIsIntensityFmt, u32 copyfmt, int bScaleByHalf,
const EFBRectangle& source)
{
return 0;
};
void ConvertTexture(TCacheEntry* destination, TCacheEntry* source, const void* palette,
TLUTFormat format) override;
void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
void CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom,
const CopyFilterCoefficientArray& filter_coefficients) override;

View File

@ -25,8 +25,8 @@ public:
{
}
void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
void CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom,
const CopyFilterCoefficientArray& filter_coefficients) override
{

View File

@ -159,8 +159,8 @@ void main()
//#define TIME_TEXTURE_DECODING 1
void TextureCache::CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
void TextureCache::CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params,
u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
const EFBRectangle& src_rect, bool scale_by_half, float y_scale,
float gamma, bool clamp_top, bool clamp_bottom,
const CopyFilterCoefficientArray& filter_coefficients)

View File

@ -63,8 +63,8 @@ private:
void ConvertTexture(TCacheEntry* destination, TCacheEntry* source, const void* palette,
TLUTFormat format) override;
void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
void CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom,
const CopyFilterCoefficientArray& filter_coefficients) override;

View File

@ -46,7 +46,6 @@ struct EncodingProgram
std::map<EFBCopyParams, EncodingProgram> s_encoding_programs;
std::unique_ptr<AbstractTexture> s_encoding_render_texture;
std::unique_ptr<AbstractStagingTexture> s_encoding_readback_texture;
const int renderBufferWidth = EFB_WIDTH * 4;
const int renderBufferHeight = 1024;
@ -93,16 +92,11 @@ static EncodingProgram& GetOrCreateEncodingShader(const EFBCopyParams& params)
void Init()
{
TextureConfig config(renderBufferWidth, renderBufferHeight, 1, 1, 1, AbstractTextureFormat::BGRA8,
true);
s_encoding_render_texture = g_renderer->CreateTexture(config);
s_encoding_readback_texture =
g_renderer->CreateStagingTexture(StagingTextureType::Readback, config);
s_encoding_render_texture = g_renderer->CreateTexture(TextureCache::GetEncodingTextureConfig());
}
void Shutdown()
{
s_encoding_readback_texture.reset();
s_encoding_render_texture.reset();
for (auto& program : s_encoding_programs)
@ -112,8 +106,9 @@ void Shutdown()
// dst_line_size, writeStride in bytes
static void EncodeToRamUsingShader(GLuint srcTexture, u8* destAddr, u32 dst_line_size,
u32 dstHeight, u32 writeStride, bool linearFilter, float y_scale)
static void EncodeToRamUsingShader(GLuint srcTexture, AbstractStagingTexture* destAddr,
u32 dst_line_size, u32 dstHeight, u32 writeStride,
bool linearFilter, float y_scale)
{
FramebufferManager::SetFramebuffer(
static_cast<OGLTexture*>(s_encoding_render_texture.get())->GetFramebuffer());
@ -137,15 +132,14 @@ static void EncodeToRamUsingShader(GLuint srcTexture, u8* destAddr, u32 dst_line
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
MathUtil::Rectangle<int> copy_rect(0, 0, dst_line_size / 4, dstHeight);
s_encoding_readback_texture->CopyFromTexture(s_encoding_render_texture.get(), copy_rect, 0, 0,
copy_rect);
s_encoding_readback_texture->ReadTexels(copy_rect, destAddr, writeStride);
destAddr->CopyFromTexture(s_encoding_render_texture.get(), copy_rect, 0, 0, copy_rect);
}
void EncodeToRamFromTexture(u8* dest_ptr, const EFBCopyParams& params, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
const EFBRectangle& src_rect, bool scale_by_half, float y_scale,
float gamma, float clamp_top, float clamp_bottom,
void EncodeToRamFromTexture(AbstractStagingTexture* dest, const EFBCopyParams& params,
u32 native_width, u32 bytes_per_row, u32 num_blocks_y,
u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half,
float y_scale, float gamma, float clamp_top, float clamp_bottom,
const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients)
{
g_renderer->ResetAPIState();
@ -165,7 +159,7 @@ void EncodeToRamFromTexture(u8* dest_ptr, const EFBCopyParams& params, u32 nativ
FramebufferManager::ResolveAndGetDepthTarget(src_rect) :
FramebufferManager::ResolveAndGetRenderTarget(src_rect);
EncodeToRamUsingShader(read_texture, dest_ptr, bytes_per_row, num_blocks_y, memory_stride,
EncodeToRamUsingShader(read_texture, dest, bytes_per_row, num_blocks_y, memory_stride,
scale_by_half && !params.depth, y_scale);
g_renderer->RestoreAPIState();

View File

@ -10,6 +10,9 @@
#include "VideoCommon/TextureCacheBase.h"
#include "VideoCommon/VideoCommon.h"
struct EFBCopyParams;
class AbstractStagingTexture;
namespace OGL
{
// Converts textures between formats using shaders
@ -21,7 +24,7 @@ void Shutdown();
// returns size of the encoded data (in bytes)
void EncodeToRamFromTexture(
u8* dest_ptr, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
AbstractStagingTexture* dest, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half,
float y_scale, float gamma, float clamp_top, float clamp_bottom,
const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients);

View File

@ -57,6 +57,8 @@ public:
void Unmap() override;
void Flush() override;
void SetMapStride(size_t stride) { m_map_stride = stride; }
private:
std::vector<u8> m_data;
};

View File

@ -16,8 +16,8 @@ public:
TLUTFormat format) override
{
}
void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
void CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom,
const CopyFilterCoefficientArray& filter_coefficients) override
{

View File

@ -5,12 +5,14 @@
#include "VideoBackends/Software/TextureEncoder.h"
#include "Common/Align.h"
#include "Common/Assert.h"
#include "Common/CommonFuncs.h"
#include "Common/CommonTypes.h"
#include "Common/MsgHandler.h"
#include "Common/Swap.h"
#include "VideoBackends/Software/EfbInterface.h"
#include "VideoBackends/Software/SWTexture.h"
#include "VideoCommon/BPMemory.h"
#include "VideoCommon/LookUpTables.h"
@ -1468,18 +1470,26 @@ void EncodeEfbCopy(u8* dst, const EFBCopyParams& params, u32 native_width, u32 b
}
}
void Encode(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half,
float y_scale, float gamma)
void Encode(AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
bool scale_by_half, float y_scale, float gamma)
{
// HACK: Override the memory stride for this staging texture with new copy stride.
// This is required because the texture encoder assumes that we're writing directly to memory,
// and each row is tightly packed with no padding, whereas our encoding abstract texture has
// a width of 2560. When we copy the texture back later on, it'll use the tightly packed stride.
ASSERT(memory_stride <= (dst->GetConfig().width * dst->GetTexelSize()));
static_cast<SW::SWStagingTexture*>(dst)->SetMapStride(memory_stride);
if (params.copy_format == EFBCopyFormat::XFB)
{
EfbInterface::EncodeXFB(dst, native_width, src_rect, y_scale, gamma);
EfbInterface::EncodeXFB(reinterpret_cast<u8*>(dst->GetMappedPointer()), native_width, src_rect,
y_scale, gamma);
}
else
{
EncodeEfbCopy(dst, params, native_width, bytes_per_row, num_blocks_y, memory_stride, src_rect,
scale_by_half);
EncodeEfbCopy(reinterpret_cast<u8*>(dst->GetMappedPointer()), params, native_width,
bytes_per_row, num_blocks_y, memory_stride, src_rect, scale_by_half);
}
}
}

View File

@ -10,7 +10,7 @@
namespace TextureEncoder
{
void Encode(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half,
float y_scale, float gamma);
void Encode(AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
bool scale_by_half, float y_scale, float gamma);
}

View File

@ -98,8 +98,8 @@ void TextureCache::ConvertTexture(TCacheEntry* destination, TCacheEntry* source,
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
}
void TextureCache::CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
void TextureCache::CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params,
u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
const EFBRectangle& src_rect, bool scale_by_half, float y_scale,
float gamma, bool clamp_top, bool clamp_bottom,
const CopyFilterCoefficientArray& filter_coefficients)

View File

@ -36,8 +36,8 @@ public:
void ConvertTexture(TCacheEntry* destination, TCacheEntry* source, const void* palette,
TLUTFormat format) override;
void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
void CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom,
const CopyFilterCoefficientArray& filter_coefficients) override;

View File

@ -207,10 +207,10 @@ void TextureConverter::ConvertTexture(TextureCacheBase::TCacheEntry* dst_entry,
}
void TextureConverter::EncodeTextureToMemory(
VkImageView src_texture, u8* dest_ptr, const EFBCopyParams& params, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom,
const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients)
VkImageView src_texture, AbstractStagingTexture* dest, const EFBCopyParams& params,
u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
const EFBRectangle& src_rect, bool scale_by_half, float y_scale, float gamma, bool clamp_top,
bool clamp_bottom, const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients)
{
VkShaderModule shader = GetEncodingShader(params);
if (shader == VK_NULL_HANDLE)
@ -273,9 +273,7 @@ void TextureConverter::EncodeTextureToMemory(
draw.EndRenderPass();
MathUtil::Rectangle<int> copy_rect(0, 0, render_width, render_height);
m_encoding_readback_texture->CopyFromTexture(m_encoding_render_texture.get(), copy_rect, 0, 0,
copy_rect);
m_encoding_readback_texture->ReadTexels(copy_rect, dest_ptr, memory_stride);
dest->CopyFromTexture(m_encoding_render_texture.get(), copy_rect, 0, 0, copy_rect);
}
bool TextureConverter::SupportsTextureDecoding(TextureFormat format, TLUTFormat palette_format)
@ -610,14 +608,8 @@ VkShaderModule TextureConverter::GetEncodingShader(const EFBCopyParams& params)
bool TextureConverter::CreateEncodingTexture()
{
TextureConfig config(ENCODING_TEXTURE_WIDTH, ENCODING_TEXTURE_HEIGHT, 1, 1, 1,
ENCODING_TEXTURE_FORMAT, true);
m_encoding_render_texture = g_renderer->CreateTexture(config);
m_encoding_readback_texture =
g_renderer->CreateStagingTexture(StagingTextureType::Readback, config);
return m_encoding_render_texture && m_encoding_readback_texture;
m_encoding_render_texture = g_renderer->CreateTexture(TextureCache::GetEncodingTextureConfig());
return m_encoding_render_texture != nullptr;
}
bool TextureConverter::CreateDecodingTexture()

View File

@ -21,7 +21,6 @@ class AbstractStagingTexture;
namespace Vulkan
{
class StagingTexture2D;
class Texture2D;
class VKTexture;
@ -38,14 +37,12 @@ public:
TextureCache::TCacheEntry* src_entry, const void* palette,
TLUTFormat palette_format);
// Uses an encoding shader to copy src_texture to dest_ptr.
// NOTE: Executes the current command buffer.
void
EncodeTextureToMemory(VkImageView src_texture, u8* dest_ptr, const EFBCopyParams& params,
u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
const EFBRectangle& src_rect, bool scale_by_half, float y_scale,
float gamma, bool clamp_top, bool clamp_bottom,
const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients);
// Uses an encoding shader to copy src_texture to dest.
void EncodeTextureToMemory(
VkImageView src_texture, AbstractStagingTexture* dest, const EFBCopyParams& params,
u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
const EFBRectangle& src_rect, bool scale_by_half, float y_scale, float gamma, bool clamp_top,
bool clamp_bottom, const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients);
bool SupportsTextureDecoding(TextureFormat format, TLUTFormat palette_format);
void DecodeTexture(VkCommandBuffer command_buffer, TextureCache::TCacheEntry* entry,
@ -54,9 +51,6 @@ public:
const u8* palette, TLUTFormat palette_format);
private:
static const u32 ENCODING_TEXTURE_WIDTH = EFB_WIDTH * 4;
static const u32 ENCODING_TEXTURE_HEIGHT = 1024;
static const AbstractTextureFormat ENCODING_TEXTURE_FORMAT = AbstractTextureFormat::BGRA8;
static const size_t NUM_PALETTE_CONVERSION_SHADERS = 3;
// Maximum size of a texture based on BP registers.
@ -100,7 +94,6 @@ private:
// Texture encoding - RGBA8->GX format in memory
std::map<EFBCopyParams, VkShaderModule> m_encoding_shaders;
std::unique_ptr<AbstractTexture> m_encoding_render_texture;
std::unique_ptr<AbstractStagingTexture> m_encoding_readback_texture;
// Texture decoding - GX format in memory->RGBA8
struct TextureDecodingPipeline

View File

@ -466,6 +466,7 @@ void VKStagingTexture::CopyFromTexture(Texture2D* src, const MathUtil::Rectangle
m_needs_flush = false;
g_command_buffer_mgr->RemoveFencePointCallback(
this);
m_staging_buffer->InvalidateCPUCache();
});
}

View File

@ -177,6 +177,7 @@ static void BPWritten(const BPCmd& bp)
switch (bp.newvalue & 0xFF)
{
case 0x02:
g_texture_cache->FlushEFBCopies();
if (!Fifo::UseDeterministicGPUThread())
PixelEngine::SetFinish(); // may generate interrupt
DEBUG_LOG(VIDEO, "GXSetDrawDone SetPEFinish (value: 0x%02X)", (bp.newvalue & 0xFFFF));
@ -188,11 +189,13 @@ static void BPWritten(const BPCmd& bp)
}
return;
case BPMEM_PE_TOKEN_ID: // Pixel Engine Token ID
g_texture_cache->FlushEFBCopies();
if (!Fifo::UseDeterministicGPUThread())
PixelEngine::SetToken(static_cast<u16>(bp.newvalue & 0xFFFF), false);
DEBUG_LOG(VIDEO, "SetPEToken 0x%04x", (bp.newvalue & 0xFFFF));
return;
case BPMEM_PE_TOKEN_INT_ID: // Pixel Engine Interrupt Token ID
g_texture_cache->FlushEFBCopies();
if (!Fifo::UseDeterministicGPUThread())
PixelEngine::SetToken(static_cast<u16>(bp.newvalue & 0xFFFF), true);
DEBUG_LOG(VIDEO, "SetPEToken + INT 0x%04x", (bp.newvalue & 0xFFFF));

View File

@ -724,6 +724,10 @@ void Renderer::Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const
// state changes the specialized shader will not take over.
g_vertex_manager->InvalidatePipelineObject();
// Flush any outstanding EFB copies to RAM, in case the game is running at an uncapped frame
// rate and not waiting for vblank. Otherwise, we'd end up with a huge list of pending copies.
g_texture_cache->FlushEFBCopies();
Core::Callback_VideoCopiedToXFB(true);
}

View File

@ -28,6 +28,7 @@
#include "Core/FifoPlayer/FifoRecorder.h"
#include "Core/HW/Memmap.h"
#include "VideoCommon/AbstractStagingTexture.h"
#include "VideoCommon/BPMemory.h"
#include "VideoCommon/Debugger.h"
#include "VideoCommon/FramebufferManagerBase.h"
@ -89,6 +90,7 @@ TextureCacheBase::TextureCacheBase()
void TextureCacheBase::Invalidate()
{
FlushEFBCopies();
InvalidateAllBindPoints();
for (size_t i = 0; i < bound_textures.size(); ++i)
{
@ -1693,35 +1695,6 @@ void TextureCacheBase::CopyRenderTargetToTexture(
const u32 bytes_per_row = num_blocks_x * bytes_per_block;
const u32 covered_range = num_blocks_y * dstStride;
if (copy_to_ram)
{
CopyFilterCoefficientArray coefficients = GetRAMCopyFilterCoefficients(filter_coefficients);
PEControl::PixelFormat srcFormat = bpmem.zcontrol.pixel_format;
EFBCopyParams format(srcFormat, dstFormat, is_depth_copy, isIntensity,
NeedsCopyFilterInShader(coefficients));
CopyEFB(dst, format, tex_w, bytes_per_row, num_blocks_y, dstStride, srcRect, scaleByHalf,
y_scale, gamma, clamp_top, clamp_bottom, coefficients);
}
else
{
if (is_xfb_copy)
{
UninitializeXFBMemory(dst, dstStride, bytes_per_row, num_blocks_y);
}
else
{
// Hack: Most games don't actually need the correct texture data in RAM
// and we can just keep a copy in VRAM. We zero the memory so we
// can check it hasn't changed before using our copy in VRAM.
u8* ptr = dst;
for (u32 i = 0; i < num_blocks_y; i++)
{
memset(ptr, 0, bytes_per_row);
ptr += dstStride;
}
}
}
if (g_bRecordFifoData)
{
// Mark the memory behind this efb copy as dynamicly generated for the Fifo log
@ -1775,7 +1748,9 @@ void TextureCacheBase::CopyRenderTargetToTexture(
(!strided_efb_copy && entry->size_in_bytes == overlap_range) ||
(strided_efb_copy && entry->size_in_bytes == overlap_range && entry->addr == dstAddr))
{
iter.first = InvalidateTexture(iter.first);
// Pending EFB copies which are completely covered by this new copy can simply be tossed,
// instead of having to flush them later on, since this copy will write over everything.
iter.first = InvalidateTexture(iter.first, true);
continue;
}
entry->may_have_overlapping_textures = true;
@ -1804,6 +1779,7 @@ void TextureCacheBase::CopyRenderTargetToTexture(
++iter.first;
}
TCacheEntry* entry = nullptr;
if (copy_to_vram)
{
// create the texture
@ -1813,8 +1789,7 @@ void TextureCacheBase::CopyRenderTargetToTexture(
config.height = scaled_tex_h;
config.layers = FramebufferManagerBase::GetEFBLayers();
TCacheEntry* entry = AllocateCacheEntry(config);
entry = AllocateCacheEntry(config);
if (entry)
{
entry->SetGeneralParameters(dstAddr, 0, baseFormat, is_xfb_copy);
@ -1836,9 +1811,6 @@ void TextureCacheBase::CopyRenderTargetToTexture(
clamp_top, clamp_bottom,
GetVRAMCopyFilterCoefficients(filter_coefficients));
u64 hash = entry->CalculateHash();
entry->SetHashes(hash, hash);
if (g_ActiveConfig.bDumpEFBTarget && !is_xfb_copy)
{
static int efb_count = 0;
@ -1860,6 +1832,134 @@ void TextureCacheBase::CopyRenderTargetToTexture(
textures_by_address.emplace(dstAddr, entry);
}
}
if (copy_to_ram)
{
CopyFilterCoefficientArray coefficients = GetRAMCopyFilterCoefficients(filter_coefficients);
PEControl::PixelFormat srcFormat = bpmem.zcontrol.pixel_format;
EFBCopyParams format(srcFormat, dstFormat, is_depth_copy, isIntensity,
NeedsCopyFilterInShader(coefficients));
std::unique_ptr<AbstractStagingTexture> staging_texture = GetEFBCopyStagingTexture();
if (staging_texture)
{
CopyEFB(staging_texture.get(), format, tex_w, bytes_per_row, num_blocks_y, dstStride, srcRect,
scaleByHalf, y_scale, gamma, clamp_top, clamp_bottom, coefficients);
// We can't defer if there is no VRAM copy (since we need to update the hash).
if (!copy_to_vram || !g_ActiveConfig.bDeferEFBCopies)
{
// Immediately flush it.
WriteEFBCopyToRAM(dst, bytes_per_row / sizeof(u32), num_blocks_y, dstStride,
std::move(staging_texture));
}
else
{
// Defer the flush until later.
entry->pending_efb_copy = std::move(staging_texture);
entry->pending_efb_copy_width = bytes_per_row / sizeof(u32);
entry->pending_efb_copy_height = num_blocks_y;
entry->pending_efb_copy_invalidated = false;
m_pending_efb_copies.push_back(entry);
}
}
}
else
{
if (is_xfb_copy)
{
UninitializeXFBMemory(dst, dstStride, bytes_per_row, num_blocks_y);
}
else
{
// Hack: Most games don't actually need the correct texture data in RAM
// and we can just keep a copy in VRAM. We zero the memory so we
// can check it hasn't changed before using our copy in VRAM.
u8* ptr = dst;
for (u32 i = 0; i < num_blocks_y; i++)
{
std::memset(ptr, 0, bytes_per_row);
ptr += dstStride;
}
}
}
// Even if the copy is deferred, still compute the hash. This way if the copy is used as a texture
// in a subsequent draw before it is flushed, it will have the same hash.
if (entry)
{
const u64 hash = entry->CalculateHash();
entry->SetHashes(hash, hash);
}
}
void TextureCacheBase::FlushEFBCopies()
{
if (m_pending_efb_copies.empty())
return;
for (TCacheEntry* entry : m_pending_efb_copies)
FlushEFBCopy(entry);
m_pending_efb_copies.clear();
}
TextureConfig TextureCacheBase::GetEncodingTextureConfig()
{
return TextureConfig(EFB_WIDTH * 4, 1024, 1, 1, 1, AbstractTextureFormat::BGRA8, true);
}
void TextureCacheBase::WriteEFBCopyToRAM(u8* dst_ptr, u32 width, u32 height, u32 stride,
std::unique_ptr<AbstractStagingTexture> staging_texture)
{
MathUtil::Rectangle<int> copy_rect(0, 0, static_cast<int>(width), static_cast<int>(height));
staging_texture->ReadTexels(copy_rect, dst_ptr, stride);
ReleaseEFBCopyStagingTexture(std::move(staging_texture));
}
void TextureCacheBase::FlushEFBCopy(TCacheEntry* entry)
{
// Copy from texture -> guest memory.
u8* const dst = Memory::GetPointer(entry->addr);
WriteEFBCopyToRAM(dst, entry->pending_efb_copy_width, entry->pending_efb_copy_height,
entry->memory_stride, std::move(entry->pending_efb_copy));
// If the EFB copy was invalidated (e.g. the bloom case mentioned in InvalidateTexture),
// now is the time to clean up the TCacheEntry. In which case, we don't need to compute
// the new hash of the RAM copy.
if (entry->pending_efb_copy_invalidated)
{
auto config = entry->texture->GetConfig();
texture_pool.emplace(config, TexPoolEntry(std::move(entry->texture)));
return;
}
// Re-hash the texture now that the guest memory is populated.
// This should be safe because we'll catch any writes before the game can modify it.
const u64 hash = entry->CalculateHash();
entry->SetHashes(hash, hash);
}
std::unique_ptr<AbstractStagingTexture> TextureCacheBase::GetEFBCopyStagingTexture()
{
// Pull off the back first to re-use the most frequently used textures.
if (!m_efb_copy_staging_texture_pool.empty())
{
auto ptr = std::move(m_efb_copy_staging_texture_pool.back());
m_efb_copy_staging_texture_pool.pop_back();
return ptr;
}
std::unique_ptr<AbstractStagingTexture> tex =
g_renderer->CreateStagingTexture(StagingTextureType::Readback, GetEncodingTextureConfig());
if (!tex)
WARN_LOG(VIDEO, "Failed to create EFB copy staging texture");
return tex;
}
void TextureCacheBase::ReleaseEFBCopyStagingTexture(std::unique_ptr<AbstractStagingTexture> tex)
{
m_efb_copy_staging_texture_pool.push_back(std::move(tex));
}
void TextureCacheBase::UninitializeXFBMemory(u8* dst, u32 stride, u32 bytes_per_row,
@ -1989,7 +2089,7 @@ TextureCacheBase::FindOverlappingTextures(u32 addr, u32 size_in_bytes)
}
TextureCacheBase::TexAddrCache::iterator
TextureCacheBase::InvalidateTexture(TexAddrCache::iterator iter)
TextureCacheBase::InvalidateTexture(TexAddrCache::iterator iter, bool discard_pending_efb_copy)
{
if (iter == textures_by_address.end())
return textures_by_address.end();
@ -2014,6 +2114,33 @@ TextureCacheBase::InvalidateTexture(TexAddrCache::iterator iter)
}
}
// If this is a pending EFB copy, we don't want to flush it here.
// Why? Because let's say a game is rendering a bloom-type effect, using EFB copies to essentially
// downscale the framebuffer. Copy from EFB->Texture, draw texture to EFB, copy EFB->Texture,
// draw, repeat. The second copy will invalidate the first, forcing a flush. Which means we lose
// any benefit of EFB copy batching. So instead, let's just leave the EFB copy pending, but remove
// it from the texture cache. This way we don't use the old VRAM copy. When the EFB copies are
// eventually flushed, they will overwrite each other, and the end result should be the same.
if (entry->pending_efb_copy)
{
if (discard_pending_efb_copy)
{
// If the RAM copy is being completely overwritten by a new EFB copy, we can discard the
// existing pending copy, and not bother waiting for it in the future. This happens in
// Xenoblade's sunset scene, where 35 copies are done per frame, and 25 of them are
// copied to the same address, and can be skipped.
ReleaseEFBCopyStagingTexture(std::move(entry->pending_efb_copy));
auto pending_it = std::find(m_pending_efb_copies.begin(), m_pending_efb_copies.end(), entry);
if (pending_it != m_pending_efb_copies.end())
m_pending_efb_copies.erase(pending_it);
}
else
{
entry->pending_efb_copy_invalidated = true;
return textures_by_address.erase(iter);
}
}
auto config = entry->texture->GetConfig();
texture_pool.emplace(config, TexPoolEntry(std::move(entry->texture)));

View File

@ -13,6 +13,7 @@
#include <tuple>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "Common/CommonTypes.h"
#include "VideoCommon/AbstractTexture.h"
@ -22,6 +23,7 @@
#include "VideoCommon/VideoCommon.h"
struct VideoConfig;
class AbstractStagingTexture;
struct TextureAndTLUTFormat
{
@ -149,6 +151,12 @@ public:
// * partially updated textures which refer to this efb copy
std::unordered_set<TCacheEntry*> references;
// Pending EFB copy
std::unique_ptr<AbstractStagingTexture> pending_efb_copy;
u32 pending_efb_copy_width = 0;
u32 pending_efb_copy_height = 0;
bool pending_efb_copy_invalidated = false;
explicit TCacheEntry(std::unique_ptr<AbstractTexture> tex);
~TCacheEntry();
@ -216,10 +224,10 @@ public:
void Invalidate();
virtual void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
bool scale_by_half, float y_scale, float gamma, bool clamp_top,
bool clamp_bottom,
virtual void CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
const EFBRectangle& src_rect, bool scale_by_half, float y_scale, float gamma,
bool clamp_top, bool clamp_bottom,
const CopyFilterCoefficientArray& filter_coefficients) = 0;
virtual bool CompileShaders() = 0;
@ -278,6 +286,12 @@ public:
void ScaleTextureCacheEntryTo(TCacheEntry* entry, u32 new_width, u32 new_height);
// Flushes all pending EFB copies to emulated RAM.
void FlushEFBCopies();
// Returns a texture config suitable for drawing a RAM EFB copy into.
static TextureConfig GetEncodingTextureConfig();
protected:
TextureCacheBase();
@ -329,7 +343,8 @@ private:
const CopyFilterCoefficientArray& filter_coefficients) = 0;
// Removes and unlinks texture from texture cache and returns it to the pool
TexAddrCache::iterator InvalidateTexture(TexAddrCache::iterator t_iter);
TexAddrCache::iterator InvalidateTexture(TexAddrCache::iterator t_iter,
bool discard_pending_efb_copy = false);
void UninitializeXFBMemory(u8* dst, u32 stride, u32 bytes_per_row, u32 num_blocks_y);
@ -339,6 +354,17 @@ private:
CopyFilterCoefficientArray
GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients) const;
// Flushes a pending EFB copy to RAM from the host to the guest RAM.
void WriteEFBCopyToRAM(u8* dst_ptr, u32 width, u32 height, u32 stride,
std::unique_ptr<AbstractStagingTexture> staging_texture);
void FlushEFBCopy(TCacheEntry* entry);
// Returns a staging texture of the maximum EFB copy size.
std::unique_ptr<AbstractStagingTexture> GetEFBCopyStagingTexture();
// Returns an EFB copy staging texture to the pool, so it can be re-used.
void ReleaseEFBCopyStagingTexture(std::unique_ptr<AbstractStagingTexture> tex);
TexAddrCache textures_by_address;
TexHashCache textures_by_hash;
TexPool texture_pool;
@ -360,6 +386,13 @@ private:
bool arbitrary_mipmap_detection;
};
BackupConfig backup_config = {};
// Pool of readback textures used for deferred EFB copies.
std::vector<std::unique_ptr<AbstractStagingTexture>> m_efb_copy_staging_texture_pool;
// List of pending EFB copies. It is important that the order is preserved for these,
// so that overlapping textures are written to guest RAM in the order they are issued.
std::vector<TCacheEntry*> m_pending_efb_copies;
};
extern std::unique_ptr<TextureCacheBase> g_texture_cache;

View File

@ -141,6 +141,7 @@ void VideoConfig::Refresh()
bSkipEFBCopyToRam = Config::Get(Config::GFX_HACK_SKIP_EFB_COPY_TO_RAM);
bSkipXFBCopyToRam = Config::Get(Config::GFX_HACK_SKIP_XFB_COPY_TO_RAM);
bDisableCopyToVRAM = Config::Get(Config::GFX_HACK_DISABLE_COPY_TO_VRAM);
bDeferEFBCopies = Config::Get(Config::GFX_HACK_DEFER_EFB_COPIES);
bImmediateXFB = Config::Get(Config::GFX_HACK_IMMEDIATE_XFB);
bCopyEFBScaled = Config::Get(Config::GFX_HACK_COPY_EFB_SCALED);
bEFBEmulateFormatChanges = Config::Get(Config::GFX_HACK_EFB_EMULATE_FORMAT_CHANGES);

View File

@ -120,6 +120,7 @@ struct VideoConfig final
bool bSkipEFBCopyToRam;
bool bSkipXFBCopyToRam;
bool bDisableCopyToVRAM;
bool bDeferEFBCopies;
bool bImmediateXFB;
bool bCopyEFBScaled;
int iSafeTextureCache_ColorSamples;