Merge pull request #17534 from hrydgard/drawpixels-performance

Add a cache for MakePixelsTexture
This commit is contained in:
Henrik Rydgård 2023-05-30 15:23:05 +02:00 committed by GitHub
commit 487d7856a0
No known key found for this signature in database
10 changed files with 320 additions and 141 deletions

View File

@ -92,6 +92,7 @@ public:
Framebuffer *CreateFramebuffer(const FramebufferDesc &desc) override;
void UpdateBuffer(Buffer *buffer, const uint8_t *data, size_t offset, size_t size, UpdateBufferFlags flags) override;
void UpdateTextureLevels(Texture *texture, const uint8_t **data, TextureCallback initDataCallback, int numLevels) override;
void CopyFramebufferImage(Framebuffer *src, int level, int x, int y, int z, Framebuffer *dst, int dstLevel, int dstX, int dstY, int dstZ, int width, int height, int depth, int channelBits, const char *tag) override;
bool BlitFramebuffer(Framebuffer *src, int srcX1, int srcY1, int srcX2, int srcY2, Framebuffer *dst, int dstX1, int dstY1, int dstX2, int dstY2, int channelBits, FBBlitFilter filter, const char *tag) override;
@ -795,35 +796,83 @@ public:
width_ = desc.width;
height_ = desc.height;
depth_ = desc.depth;
format_ = desc.format;
mipLevels_ = desc.mipLevels;
~D3D11Texture() {
if (tex)
if (stagingTex)
if (view)
if (tex_)
if (stagingTex_)
if (view_)
ID3D11Texture2D *tex = nullptr;
ID3D11Texture2D *stagingTex = nullptr;
ID3D11ShaderResourceView *view = nullptr;
bool Create(ID3D11DeviceContext *context, ID3D11Device *device, const TextureDesc &desc, bool generateMips);
bool CreateStagingTexture(ID3D11Device *device);
void UpdateTextureLevels(ID3D11DeviceContext *context, ID3D11Device *device, Texture *texture, const uint8_t *const *data, TextureCallback initDataCallback, int numLevels);
ID3D11ShaderResourceView *View() { return view_; }
bool FillLevel(ID3D11DeviceContext *context, int level, int w, int h, int d, const uint8_t *const *data, TextureCallback initDataCallback);
ID3D11Texture2D *tex_ = nullptr;
ID3D11Texture2D *stagingTex_ = nullptr;
ID3D11ShaderResourceView *view_ = nullptr;
int mipLevels_ = 0;
Texture *D3D11DrawContext::CreateTexture(const TextureDesc &desc) {
if (!(GetDataFormatSupport(desc.format) & FMT_TEXTURE)) {
// D3D11 does not support this format as a texture format.
return nullptr;
bool D3D11Texture::FillLevel(ID3D11DeviceContext *context, int level, int w, int h, int d, const uint8_t *const *data, TextureCallback initDataCallback) {
HRESULT hr = context->Map(stagingTex_, level, D3D11_MAP_WRITE, 0, &mapped);
if (!SUCCEEDED(hr)) {
tex_ = nullptr;
return false;
D3D11Texture *tex = new D3D11Texture(desc);
bool generateMips = desc.generateMips;
if (desc.generateMips && !(GetDataFormatSupport(desc.format) & FMT_AUTOGEN_MIPS)) {
// D3D11 does not support autogenerating mipmaps for this format.
generateMips = false;
if (!initDataCallback((uint8_t *)mapped.pData, data[level], w, h, d, mapped.RowPitch, mapped.DepthPitch)) {
for (int s = 0; s < d; ++s) {
for (int y = 0; y < h; ++y) {
void *dest = (uint8_t *)mapped.pData + mapped.DepthPitch * s + mapped.RowPitch * y;
uint32_t byteStride = w * (uint32_t)DataFormatSizeInBytes(format_);
const void *src = data[level] + byteStride * (y + h * d);
memcpy(dest, src, byteStride);
context->Unmap(stagingTex_, level);
return true;
bool D3D11Texture::CreateStagingTexture(ID3D11Device *device) {
if (stagingTex_)
return true;
D3D11_TEXTURE2D_DESC descColor{};
descColor.Width = width_;
descColor.Height = height_;
descColor.MipLevels = mipLevels_;
descColor.ArraySize = 1;
descColor.Format = dataFormatToD3D11(format_);
descColor.SampleDesc.Count = 1;
descColor.SampleDesc.Quality = 0;
descColor.Usage = D3D11_USAGE_STAGING;
descColor.BindFlags = 0;
descColor.MiscFlags = 0;
descColor.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
HRESULT hr = device->CreateTexture2D(&descColor, nullptr, &stagingTex_);
if (!SUCCEEDED(hr)) {
stagingTex_ = nullptr;
return false;
return true;
bool D3D11Texture::Create(ID3D11DeviceContext *context, ID3D11Device *device, const TextureDesc &desc, bool generateMips) {
D3D11_TEXTURE2D_DESC descColor{};
descColor.Width = desc.width;
descColor.Height = desc.height;
@ -832,25 +881,16 @@ Texture *D3D11DrawContext::CreateTexture(const TextureDesc &desc) {
descColor.Format = dataFormatToD3D11(desc.format);
descColor.SampleDesc.Count = 1;
descColor.SampleDesc.Quality = 0;
if (desc.initDataCallback) {
descColor.Usage = D3D11_USAGE_STAGING;
descColor.BindFlags = 0;
descColor.MiscFlags = 0;
descColor.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
HRESULT hr = device_->CreateTexture2D(&descColor, nullptr, &tex->stagingTex);
if (!SUCCEEDED(hr)) {
delete tex;
return nullptr;
descColor.Usage = D3D11_USAGE_DEFAULT;
descColor.MiscFlags = generateMips ? D3D11_RESOURCE_MISC_GENERATE_MIPS : 0;
descColor.CPUAccessFlags = 0;
// Make sure we have a staging texture if we'll need it.
if (desc.initDataCallback && !CreateStagingTexture(device)) {
return false;
D3D11_SUBRESOURCE_DATA *initDataParam = nullptr;
D3D11_SUBRESOURCE_DATA initData[12]{};
std::vector<uint8_t> initDataBuffer[12];
@ -870,62 +910,39 @@ Texture *D3D11DrawContext::CreateTexture(const TextureDesc &desc) {
initDataParam = initData;
HRESULT hr = device_->CreateTexture2D(&descColor, initDataParam, &tex->tex);
HRESULT hr = device->CreateTexture2D(&descColor, initDataParam, &tex_);
if (!SUCCEEDED(hr)) {
delete tex;
return nullptr;
tex_ = nullptr;
return false;
hr = device_->CreateShaderResourceView(tex->tex, nullptr, &tex->view);
hr = device->CreateShaderResourceView(tex_, nullptr, &view_);
if (!SUCCEEDED(hr)) {
delete tex;
return nullptr;
return false;
auto populateLevelCallback = [&](int level, int w, int h, int d) {
hr = context_->Map(tex->stagingTex, level, D3D11_MAP_WRITE, 0, &mapped);
if (!SUCCEEDED(hr)) {
return false;
if (!desc.initDataCallback((uint8_t *)mapped.pData, desc.initData[level], w, h, d, mapped.RowPitch, mapped.DepthPitch)) {
for (int s = 0; s < d; ++s) {
for (int y = 0; y < h; ++y) {
void *dest = (uint8_t *)mapped.pData + mapped.DepthPitch * s + mapped.RowPitch * y;
uint32_t byteStride = w * (uint32_t)DataFormatSizeInBytes(desc.format);
const void *src = desc.initData[level] + byteStride * (y + h * d);
memcpy(dest, src, byteStride);
context_->Unmap(tex->stagingTex, level);
return true;
if (generateMips && desc.initData.size() >= 1) {
if (desc.initDataCallback) {
if (!populateLevelCallback(0, desc.width, desc.height, desc.depth)) {
delete tex;
return nullptr;
if (!FillLevel(context, 0, desc.width, desc.height, desc.depth,, desc.initDataCallback)) {
return false;
context_->CopyResource(tex->stagingTex, tex->stagingTex);
tex->stagingTex = nullptr;
context->CopyResource(tex_, stagingTex_);
stagingTex_ = nullptr;
} else {
uint32_t byteStride = desc.width * (uint32_t)DataFormatSizeInBytes(desc.format);
context_->UpdateSubresource(tex->tex, 0, nullptr, desc.initData[0], byteStride, 0);
context->UpdateSubresource(tex_, 0, nullptr, desc.initData[0], byteStride, 0);
} else if (desc.initDataCallback) {
int w = desc.width;
int h = desc.height;
int d = desc.depth;
for (int i = 0; i < (int)desc.initData.size(); i++) {
if (!populateLevelCallback(i, desc.width, desc.height, desc.depth)) {
if (!FillLevel(context, i, w, h, d,, desc.initDataCallback)) {
if (i == 0) {
delete tex;
return nullptr;
return false;
} else {
@ -936,13 +953,62 @@ Texture *D3D11DrawContext::CreateTexture(const TextureDesc &desc) {
d = (d + 1) / 2;
context_->CopyResource(tex->tex, tex->stagingTex);
tex->stagingTex = nullptr;
context->CopyResource(tex_, stagingTex_);
stagingTex_ = nullptr;
return true;
void D3D11Texture::UpdateTextureLevels(ID3D11DeviceContext *context, ID3D11Device *device, Texture *texture, const uint8_t * const*data, TextureCallback initDataCallback, int numLevels) {
if (!CreateStagingTexture(device)) {
int w = width_;
int h = height_;
int d = depth_;
for (int i = 0; i < (int)numLevels; i++) {
if (!FillLevel(context, i, w, h, d, data, initDataCallback)) {
w = (w + 1) / 2;
h = (h + 1) / 2;
d = (d + 1) / 2;
context->CopyResource(tex_, stagingTex_);
stagingTex_ = nullptr;
Texture *D3D11DrawContext::CreateTexture(const TextureDesc &desc) {
if (!(GetDataFormatSupport(desc.format) & FMT_TEXTURE)) {
// D3D11 does not support this format as a texture format.
return nullptr;
D3D11Texture *tex = new D3D11Texture(desc);
bool generateMips = desc.generateMips;
if (desc.generateMips && !(GetDataFormatSupport(desc.format) & FMT_AUTOGEN_MIPS)) {
// D3D11 does not support autogenerating mipmaps for this format.
generateMips = false;
if (!tex->Create(context_, device_, desc, generateMips)) {
return nullptr;
return tex;
void D3D11DrawContext::UpdateTextureLevels(Texture *texture, const uint8_t **data, TextureCallback initDataCallback, int numLevels) {
D3D11Texture *tex = (D3D11Texture *)texture;
tex->UpdateTextureLevels(context_, device_, texture, data, initDataCallback, numLevels);
ShaderModule *D3D11DrawContext::CreateShaderModule(ShaderStage stage, ShaderLanguage language, const uint8_t *data, size_t dataSize, const char *tag) {
if (language != ShaderLanguage::HLSL_D3D11) {
ERROR_LOG(G3D, "Unsupported shader language");
@ -1411,7 +1477,7 @@ void D3D11DrawContext::BindTextures(int start, int count, Texture **textures, Te
_assert_(start + count <= ARRAY_SIZE(views));
for (int i = 0; i < count; i++) {
D3D11Texture *tex = (D3D11Texture *)textures[i];
views[i] = tex ? tex->view : nullptr;
views[i] = tex ? tex->View() : nullptr;
context_->PSSetShaderResources(start, count, views);
@ -1771,7 +1837,7 @@ uint64_t D3D11DrawContext::GetNativeObject(NativeObject obj, void *srcObject) {
case NativeObject::FEATURE_LEVEL:
return (uint64_t)(uintptr_t)featureLevel_;
case NativeObject::TEXTURE_VIEW:
return (uint64_t)(((D3D11Texture *)srcObject)->view);
return (uint64_t)(((D3D11Texture *)srcObject)->View());
return 0;

View File

@ -308,14 +308,14 @@ public:
return nullptr;
void UpdateTextureLevels(const uint8_t * const *data, int numLevels, TextureCallback initDataCallback);
void SetImageData(int x, int y, int z, int width, int height, int depth, int level, int stride, const uint8_t *data, TextureCallback callback);
void SetImageData(int x, int y, int z, int width, int height, int depth, int level, int stride, const uint8_t *data, TextureCallback initDataCallback);
bool Create(const TextureDesc &desc);
TextureType type_;
DataFormat format_;
D3DFORMAT d3dfmt_;
LPDIRECT3DTEXTURE9 tex_ = nullptr;
@ -374,27 +374,31 @@ bool D3D9Texture::Create(const TextureDesc &desc) {
if (FAILED(hr)) {
ERROR_LOG(G3D, "Texture creation failed");
ERROR_LOG(G3D, "D3D9 Texture creation failed");
return false;
if (desc.initData.size()) {
// In D3D9, after setting D3DUSAGE_AUTOGENMIPS, we can only access the top layer. The rest will be
// automatically generated.
int maxLevel = desc.generateMips ? 1 : (int)desc.initData.size();
int w = desc.width;
int h = desc.height;
int d = desc.depth;
for (int i = 0; i < maxLevel; i++) {
SetImageData(0, 0, 0, w, h, d, i, 0, desc.initData[i], desc.initDataCallback);
w = (w + 1) / 2;
h = (h + 1) / 2;
d = (d + 1) / 2;
int numLevels = desc.generateMips ? 1 : (int)desc.initData.size();
UpdateTextureLevels(, numLevels, desc.initDataCallback);
return true;
void D3D9Texture::UpdateTextureLevels(const uint8_t * const *data, int numLevels, TextureCallback initDataCallback) {
int w = width_;
int h = height_;
int d = depth_;
for (int i = 0; i < numLevels; i++) {
SetImageData(0, 0, 0, w, h, d, i, 0, data[i], initDataCallback);
w = (w + 1) / 2;
h = (h + 1) / 2;
d = (d + 1) / 2;
// Just switches R and G.
inline uint32_t Shuffle8888(uint32_t x) {
return (x & 0xFF00FF00) | ((x >> 16) & 0xFF) | ((x << 16) & 0xFF0000);
@ -532,6 +536,7 @@ public:
Framebuffer *CreateFramebuffer(const FramebufferDesc &desc) override;
void UpdateBuffer(Buffer *buffer, const uint8_t *data, size_t offset, size_t size, UpdateBufferFlags flags) override;
void UpdateTextureLevels(Texture *texture, const uint8_t **data, TextureCallback initDataCallback, int numLevels) override;
void CopyFramebufferImage(Framebuffer *src, int level, int x, int y, int z, Framebuffer *dst, int dstLevel, int dstX, int dstY, int dstZ, int width, int height, int depth, int channelBits, const char *tag) override {
// Not implemented
@ -934,6 +939,12 @@ Texture *D3D9Context::CreateTexture(const TextureDesc &desc) {
return tex;
void D3D9Context::UpdateTextureLevels(Texture *texture, const uint8_t **data, TextureCallback initDataCallback, int numLevels) {
D3D9Texture *tex = (D3D9Texture *)texture;
tex->UpdateTextureLevels(data, numLevels, initDataCallback);
void D3D9Context::BindTextures(int start, int count, Texture **textures, TextureBindFlags flags) {
_assert_(start + count <= MAX_BOUND_TEXTURES);
for (int i = start; i < start + count; i++) {

View File

@ -371,6 +371,7 @@ public:
void EndFrame() override;
void UpdateBuffer(Buffer *buffer, const uint8_t *data, size_t offset, size_t size, UpdateBufferFlags flags) override;
void UpdateTextureLevels(Texture *texture, const uint8_t **data, TextureCallback initDataCallback, int numLevels) override;
void CopyFramebufferImage(Framebuffer *src, int level, int x, int y, int z, Framebuffer *dst, int dstLevel, int dstX, int dstY, int dstZ, int width, int height, int depth, int channelBits, const char *tag) override;
bool BlitFramebuffer(Framebuffer *src, int srcX1, int srcY1, int srcX2, int srcY2, Framebuffer *dst, int dstX1, int dstY1, int dstX2, int dstY2, int channelBits, FBBlitFilter filter, const char *tag) override;
@ -853,25 +854,29 @@ public:
return tex_;
void UpdateTextureLevels(GLRenderManager *render, const uint8_t *const *data, int numLevels, TextureCallback initDataCallback);
void SetImageData(int x, int y, int z, int width, int height, int depth, int level, int stride, const uint8_t *data, TextureCallback callback);
void SetImageData(int x, int y, int z, int width, int height, int depth, int level, int stride, const uint8_t *data, TextureCallback initDataCallback);
GLRenderManager *render_;
GLRTexture *tex_;
DataFormat format_;
TextureType type_;
int mipLevels_;
bool generatedMips_;
bool generateMips_; // Generate mips requested
bool generatedMips_; // Has generated mips
OpenGLTexture::OpenGLTexture(GLRenderManager *render, const TextureDesc &desc) : render_(render) {
generatedMips_ = false;
generateMips_ = desc.generateMips;
width_ = desc.width;
height_ = desc.height;
depth_ = desc.depth;
format_ = desc.format;
type_ = desc.type;
GLenum target = TypeToTarget(desc.type);
tex_ = render->CreateTexture(target, desc.width, desc.height, 1, desc.mipLevels);
@ -879,21 +884,25 @@ OpenGLTexture::OpenGLTexture(GLRenderManager *render, const TextureDesc &desc) :
if (desc.initData.empty())
UpdateTextureLevels(render,, (int)desc.initData.size(), desc.initDataCallback);
void OpenGLTexture::UpdateTextureLevels(GLRenderManager *render, const uint8_t * const *data, int numLevels, TextureCallback initDataCallback) {
int level = 0;
int width = width_;
int height = height_;
int depth = depth_;
for (auto data : desc.initData) {
SetImageData(0, 0, 0, width, height, depth, level, 0, data, desc.initDataCallback);
for (int i = 0; i < numLevels; i++) {
SetImageData(0, 0, 0, width, height, depth, level, 0, data[i], initDataCallback);
width = (width + 1) / 2;
height = (height + 1) / 2;
depth = (depth + 1) / 2;
mipLevels_ = desc.generateMips ? desc.mipLevels : level;
mipLevels_ = generateMips_ ? mipLevels_ : level;
bool genMips = false;
if ((int)desc.initData.size() < desc.mipLevels && desc.generateMips) {
if (numLevels < mipLevels_ && generateMips_) {
// Assumes the texture is bound for editing
genMips = true;
generatedMips_ = true;
@ -923,7 +932,7 @@ public:
GLRFramebuffer *framebuffer_ = nullptr;
void OpenGLTexture::SetImageData(int x, int y, int z, int width, int height, int depth, int level, int stride, const uint8_t *data, TextureCallback callback) {
void OpenGLTexture::SetImageData(int x, int y, int z, int width, int height, int depth, int level, int stride, const uint8_t *data, TextureCallback initDataCallback) {
if ((width != width_ || height != height_ || depth != depth_) && level == 0) {
// When switching to texStorage we need to handle this correctly.
width_ = width;
@ -939,8 +948,8 @@ void OpenGLTexture::SetImageData(int x, int y, int z, int width, int height, int
uint8_t *texData = new uint8_t[(size_t)(width * height * depth * alignment)];
bool texDataPopulated = false;
if (callback) {
texDataPopulated = callback(texData, data, width, height, depth, width * (int)alignment, height * width * (int)alignment);
if (initDataCallback) {
texDataPopulated = initDataCallback(texData, data, width, height, depth, width * (int)alignment, height * width * (int)alignment);
if (texDataPopulated) {
if (format_ == DataFormat::A1R5G5B5_UNORM_PACK16) {
@ -1021,6 +1030,11 @@ Texture *OpenGLContext::CreateTexture(const TextureDesc &desc) {
return new OpenGLTexture(&renderManager_, desc);
void OpenGLContext::UpdateTextureLevels(Texture *texture, const uint8_t **data, TextureCallback initDataCallback, int numLevels) {
OpenGLTexture *tex = (OpenGLTexture *)texture;
tex->UpdateTextureLevels(&renderManager_, data, numLevels, initDataCallback);
DepthStencilState *OpenGLContext::CreateDepthStencilState(const DepthStencilStateDesc &desc) {
OpenGLDepthStencilState *ds = new OpenGLDepthStencilState();
ds->depthTestEnabled = desc.depthTestEnabled;

View File

@ -335,8 +335,11 @@ struct DescriptorSetKey {
class VKTexture : public Texture {
VKTexture(VulkanContext *vulkan, VkCommandBuffer cmd, VulkanPushPool *pushBuffer, const TextureDesc &desc)
: vulkan_(vulkan), mipLevels_(desc.mipLevels), format_(desc.format) {}
: vulkan_(vulkan), mipLevels_(desc.mipLevels) {
format_ = desc.format;
bool Create(VkCommandBuffer cmd, VulkanPushPool *pushBuffer, const TextureDesc &desc);
void Update(VkCommandBuffer cmd, VulkanPushPool *pushBuffer, const uint8_t *const *data, TextureCallback callback, int numLevels);
~VKTexture() {
@ -356,7 +359,13 @@ public:
return VK_NULL_HANDLE; // This would be bad.
int NumLevels() const {
return mipLevels_;
void UpdateInternal(VkCommandBuffer cmd, VulkanPushPool *pushBuffer, const uint8_t *const *data, TextureCallback callback, int numLevels);
void Destroy() {
if (vkTex_) {
@ -369,8 +378,6 @@ private:
VulkanTexture *vkTex_ = nullptr;
int mipLevels_ = 0;
DataFormat format_ = DataFormat::UNDEFINED;
class VKFramebuffer;
@ -421,6 +428,7 @@ public:
Framebuffer *CreateFramebuffer(const FramebufferDesc &desc) override;
void UpdateBuffer(Buffer *buffer, const uint8_t *data, size_t offset, size_t size, UpdateBufferFlags flags) override;
void UpdateTextureLevels(Texture *texture, const uint8_t **data, TextureCallback initDataCallback, int numLevels) override;
void CopyFramebufferImage(Framebuffer *src, int level, int x, int y, int z, Framebuffer *dst, int dstLevel, int dstX, int dstY, int dstZ, int width, int height, int depth, int channelBits, const char *tag) override;
bool BlitFramebuffer(Framebuffer *src, int srcX1, int srcY1, int srcX2, int srcY2, Framebuffer *dst, int dstX1, int dstY1, int dstX2, int dstY2, int channelBits, FBBlitFilter filter, const char *tag) override;
@ -748,14 +756,14 @@ enum class TextureState {
bool VKTexture::Create(VkCommandBuffer cmd, VulkanPushPool *push, const TextureDesc &desc) {
bool VKTexture::Create(VkCommandBuffer cmd, VulkanPushPool *pushBuffer, const TextureDesc &desc) {
// Zero-sized textures not allowed.
_assert_(desc.width * desc.height * desc.depth > 0); // remember to set depth to 1!
if (desc.width * desc.height * desc.depth <= 0) {
ERROR_LOG(G3D, "Bad texture dimensions %dx%dx%d", desc.width, desc.height, desc.depth);
return false;
format_ = desc.format;
mipLevels_ = desc.mipLevels;
width_ = desc.width;
@ -763,8 +771,6 @@ bool VKTexture::Create(VkCommandBuffer cmd, VulkanPushPool *push, const TextureD
depth_ = desc.depth;
vkTex_ = new VulkanTexture(vulkan_, desc.tag);
VkFormat vulkanFormat = DataFormatToVulkan(format_);
int bpp = GetBpp(vulkanFormat);
int bytesPerPixel = bpp / 8;
if (mipLevels_ > (int)desc.initData.size()) {
// Gonna have to generate some, which requires TRANSFER_SRC
@ -779,33 +785,10 @@ bool VKTexture::Create(VkCommandBuffer cmd, VulkanPushPool *push, const TextureD
if (desc.initData.size()) {
int w = width_;
int h = height_;
int d = depth_;
int i;
for (i = 0; i < (int)desc.initData.size(); i++) {
uint32_t offset;
VkBuffer buf;
size_t size = w * h * d * bytesPerPixel;
uint8_t *dest = (uint8_t *)push->Allocate(size, 16, &buf, &offset);
if (desc.initDataCallback) {
_assert_(dest != nullptr);
if (!desc.initDataCallback(dest, desc.initData[i], w, h, d, w * bytesPerPixel, h * w * bytesPerPixel)) {
memcpy(dest, desc.initData[i], size);
} else {
memcpy(dest, desc.initData[i], size);
TextureCopyBatch batch;
vkTex_->CopyBufferToMipLevel(cmd, &batch, i, w, h, 0, buf, offset, w);
vkTex_->FinishCopyBatch(cmd, &batch);
w = (w + 1) / 2;
h = (h + 1) / 2;
d = (d + 1) / 2;
UpdateInternal(cmd, pushBuffer,, desc.initDataCallback, (int)desc.initData.size());
// Generate the rest of the mips automatically.
if (i < mipLevels_) {
vkTex_->GenerateMips(cmd, i, false);
if (desc.initData.size() < mipLevels_) {
vkTex_->GenerateMips(cmd, (int)desc.initData.size(), false);
@ -813,6 +796,43 @@ bool VKTexture::Create(VkCommandBuffer cmd, VulkanPushPool *push, const TextureD
return true;
void VKTexture::Update(VkCommandBuffer cmd, VulkanPushPool *pushBuffer, const uint8_t * const *data, TextureCallback initDataCallback, int numLevels) {
// Before we can use UpdateInternal, we need to transition the image to the same state as after CreateDirect,
// making it ready for writing.
UpdateInternal(cmd, pushBuffer, data, initDataCallback, numLevels);
void VKTexture::UpdateInternal(VkCommandBuffer cmd, VulkanPushPool *pushBuffer, const uint8_t * const *data, TextureCallback initDataCallback, int numLevels) {
int w = width_;
int h = height_;
int d = depth_;
int i;
VkFormat vulkanFormat = DataFormatToVulkan(format_);
int bpp = GetBpp(vulkanFormat);
int bytesPerPixel = bpp / 8;
TextureCopyBatch batch;
for (i = 0; i < numLevels; i++) {
uint32_t offset;
VkBuffer buf;
size_t size = w * h * d * bytesPerPixel;
uint8_t *dest = (uint8_t *)pushBuffer->Allocate(size, 16, &buf, &offset);
if (initDataCallback) {
_assert_(dest != nullptr);
if (!initDataCallback(dest, data[i], w, h, d, w * bytesPerPixel, h * w * bytesPerPixel)) {
memcpy(dest, data[i], size);
} else {
memcpy(dest, data[i], size);
vkTex_->CopyBufferToMipLevel(cmd, &batch, i, w, h, 0, buf, offset, w);
w = (w + 1) / 2;
h = (h + 1) / 2;
d = (d + 1) / 2;
vkTex_->FinishCopyBatch(cmd, &batch);
static DataFormat DataFormatFromVulkanDepth(VkFormat fmt) {
switch (fmt) {
@ -1342,6 +1362,20 @@ Texture *VKContext::CreateTexture(const TextureDesc &desc) {
void VKContext::UpdateTextureLevels(Texture *texture, const uint8_t **data, TextureCallback initDataCallback, int numLevels) {
VkCommandBuffer initCmd = renderManager_.GetInitCmd();
if (!push_ || !initCmd) {
// Too early! Fail.
ERROR_LOG(G3D, "Can't create textures before the first frame has started.");
VKTexture *tex = (VKTexture *)texture;
_dbg_assert_(numLevels <= tex->NumLevels());
tex->Update(initCmd, push_, data, initDataCallback, numLevels);
static inline void CopySide(VkStencilOpState &dest, const StencilSetup &src) {
dest.compareOp = compToVK[(int)src.compareOp];
dest.failOp = stencilOpToVK[(int)src.failOp];

View File

@ -132,6 +132,7 @@ bool RefCountedObject::Release() {
return true;
} else {
// No point in printing the name here if the object has already been free-d, it'll be corrupt and dangerous to print.
_dbg_assert_msg_(false, "Refcount (%d) invalid for object %p - corrupt?", refcount_.load(), this);
return false;
@ -139,11 +140,10 @@ bool RefCountedObject::Release() {
bool RefCountedObject::ReleaseAssertLast() {
bool released = Release();
_dbg_assert_msg_(released, "RefCountedObject: Expected to be the last reference, but isn't!");
_dbg_assert_msg_(released, "RefCountedObject: Expected to be the last reference, but isn't! (%s)", name_);
return released;
// ================================== PIXEL/FRAGMENT SHADERS
// The Vulkan ones can be re-used with modern GL later if desired, as they're just GLSL.

View File

@ -466,9 +466,11 @@ public:
int Width() { return width_; }
int Height() { return height_; }
int Depth() { return depth_; }
DataFormat Format() { return format_; }
int width_ = -1, height_ = -1, depth_ = -1;
DataFormat format_ = DataFormat::UNDEFINED;
struct BindingDesc {
@ -731,6 +733,11 @@ public:
// Copies data from the CPU over into the buffer, at a specific offset. This does not change the size of the buffer and cannot write outside it.
virtual void UpdateBuffer(Buffer *buffer, const uint8_t *data, size_t offset, size_t size, UpdateBufferFlags flags) = 0;
// Used to optimize DrawPixels by re-using previously allocated temp textures.
// Do not try to update a texture that might be used by an in-flight command buffer! In OpenGL and D3D, this will cause stalls
// while in Vulkan this might cause various strangeness like image corruption.
virtual void UpdateTextureLevels(Texture *texture, const uint8_t **data, TextureCallback initDataCallback, int numLevels) = 0;
virtual void CopyFramebufferImage(Framebuffer *src, int level, int x, int y, int z, Framebuffer *dst, int dstLevel, int dstX, int dstY, int dstZ, int width, int height, int depth, int channelBits, const char *tag) = 0;
virtual bool BlitFramebuffer(Framebuffer *src, int srcX1, int srcY1, int srcX2, int srcY2, Framebuffer *dst, int dstX1, int dstY1, int dstX2, int dstY2, int channelBits, FBBlitFilter filter, const char *tag) = 0;

View File

@ -1217,7 +1217,6 @@ void FramebufferManagerCommon::DrawPixels(VirtualFramebuffer *vfb, int dstX, int
u0, v0, u1, v1, ROTATION_LOCKED_HORIZONTAL, flags);
@ -1401,11 +1400,26 @@ Draw::Texture *FramebufferManagerCommon::MakePixelTexture(const u8 *srcPixels, G
return true;
Draw::DataFormat texFormat = srcPixelFormat == GE_FORMAT_DEPTH16 ? depthFormat : preferredPixelsFormat_;
// Look for a matching texture we can re-use.
for (auto &iter : drawPixelsCache_) {
if (iter.frameNumber > gpuStats.numFlips - 3 || iter.tex->Width() != width || iter.tex->Height() != height || iter.tex->Format() != texFormat) {
// OK, current one seems good, let's use it (and mark it used).
draw_->UpdateTextureLevels(iter.tex, &srcPixels, generateTexture, 1);
iter.frameNumber = gpuStats.numFlips;
return iter.tex;
// Note: For depth, we create an R16_UNORM texture, that'll be just fine for uploading depth through a shader,
// and likely more efficient.
Draw::TextureDesc desc{
srcPixelFormat == GE_FORMAT_DEPTH16 ? depthFormat : preferredPixelsFormat_,
@ -1424,6 +1438,12 @@ Draw::Texture *FramebufferManagerCommon::MakePixelTexture(const u8 *srcPixels, G
ERROR_LOG(G3D, "Failed to create DrawPixels texture");
gpuStats.numTexturesDecoded++; // Separate stat for this later?
INFO_LOG(G3D, "Creating drawPixelsCache texture: %dx%d", tex->Width(), tex->Height());
DrawPixelsEntry entry{ tex, gpuStats.numFlips };
return tex;
@ -1450,7 +1470,6 @@ void FramebufferManagerCommon::DrawFramebufferToOutput(const u8 *srcPixels, int
presentation_->SourceTexture(pixelsTex, 512, 272);
presentation_->CopyToOutput(flags, uvRotation, u0, v0, u1, v1);
// PresentationCommon sets all kinds of state, we can't rely on anything.
@ -1672,6 +1691,20 @@ void FramebufferManagerCommon::DecimateFBOs() {
bvfbs_.erase(bvfbs_.begin() + i--);
// And DrawPixels cached textures.
for (auto it = drawPixelsCache_.begin(); it != drawPixelsCache_.end(); ) {
int age = gpuStats.numFlips - it->frameNumber;
if (age > 10) {
INFO_LOG(G3D, "Releasing drawPixelsCache texture: %dx%d", it->tex->Width(), it->tex->Height());
it->tex = nullptr;
it = drawPixelsCache_.erase(it);
} else {
// Requires width/height to be set already.
@ -2604,10 +2637,15 @@ void FramebufferManagerCommon::DestroyAllFBOs() {
for (auto iter : fbosToDelete_) {
for (auto &iter : fbosToDelete_) {
for (auto &iter : drawPixelsCache_) {
static const char *TempFBOReasonToString(TempFBO reason) {

View File

@ -267,6 +267,11 @@ namespace Draw {
class DrawContext;
struct DrawPixelsEntry {
Draw::Texture *tex;
int frameNumber;
struct GPUDebugBuffer;
class DrawEngineCommon;
class PresentationCommon;
@ -571,6 +576,8 @@ protected:
std::vector<VirtualFramebuffer *> vfbs_;
std::vector<VirtualFramebuffer *> bvfbs_; // blitting framebuffers (for download)
std::vector<DrawPixelsEntry> drawPixelsCache_;
bool gameUsesSequentialCopies_ = false;
// Sampled in BeginFrame/UpdateSize for safety.

View File

@ -581,20 +581,23 @@ Draw::ShaderModule *PresentationCommon::CompileShaderModule(ShaderStage stage, S
void PresentationCommon::SourceTexture(Draw::Texture *texture, int bufferWidth, int bufferHeight) {
// AddRef before release and assign in case it's the same.
srcTexture_ = texture;
srcWidth_ = bufferWidth;
srcHeight_ = bufferHeight;
void PresentationCommon::SourceFramebuffer(Draw::Framebuffer *fb, int bufferWidth, int bufferHeight) {
srcFramebuffer_ = fb;
srcWidth_ = bufferWidth;
srcHeight_ = bufferHeight;

View File

@ -354,7 +354,6 @@ bool FramebufferManagerCommon::PerformWriteStencilFromMemory(u32 addr, int size,
draw_->BlitFramebuffer(blitFBO, 0, 0, w, h, dstBuffer->fbo, 0, 0, dstBuffer->renderWidth, dstBuffer->renderHeight, Draw::FB_STENCIL_BIT, Draw::FB_BLIT_NEAREST, "WriteStencilFromMemory_Blit");
RebindFramebuffer("RebindFramebuffer - Stencil");