gecko-dev/gfx/gl/GLUploadHelpers.cpp
Jeff Muizelaar fc208a231c Bug 1712367. Avoid TexSubImage ES slow path. r=jgilbert,sotaro
Instead of drawing updates into the correct spot in the destination
DataSourceSurface we just use it as scratch space and always
draw in the top-left with stride = bpp*width.

We add a dstOffset to the upload functions to support using
a subsurface.

Differential Revision: https://phabricator.services.mozilla.com/D116501
2021-06-17 13:24:05 +00:00

519 lines
20 KiB
C++

/* -*- Mode: c++; c-basic-offset: 2; indent-tabs-mode: nil; tab-width: 4; -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "GLUploadHelpers.h"
#include "GLContext.h"
#include "mozilla/gfx/2D.h"
#include "gfxUtils.h"
#include "mozilla/gfx/Tools.h" // For BytesPerPixel
#include "nsRegion.h"
#include "GfxTexturesReporter.h"
#include "mozilla/gfx/Logging.h"
namespace mozilla {
using namespace gfx;
namespace gl {
static unsigned int DataOffset(const IntPoint& aPoint, int32_t aStride,
SurfaceFormat aFormat) {
unsigned int data = aPoint.y * aStride;
data += aPoint.x * BytesPerPixel(aFormat);
return data;
}
static bool CheckUploadBounds(const IntSize& aDst, const IntSize& aSrc,
const IntPoint& aOffset) {
if (aOffset.x < 0 || aOffset.y < 0 || aOffset.x >= aSrc.width ||
aOffset.y >= aSrc.height) {
MOZ_ASSERT_UNREACHABLE("Offset outside source bounds");
return false;
}
if (aDst.width > (aSrc.width - aOffset.x) ||
aDst.height > (aSrc.height - aOffset.y)) {
MOZ_ASSERT_UNREACHABLE("Source has insufficient data");
return false;
}
return true;
}
static GLint GetAddressAlignment(ptrdiff_t aAddress) {
if (!(aAddress & 0x7)) {
return 8;
} else if (!(aAddress & 0x3)) {
return 4;
} else if (!(aAddress & 0x1)) {
return 2;
} else {
return 1;
}
}
// Take texture data in a given buffer and copy it into a larger buffer,
// padding out the edge pixels for filtering if necessary
static void CopyAndPadTextureData(const GLvoid* srcBuffer, GLvoid* dstBuffer,
GLsizei srcWidth, GLsizei srcHeight,
GLsizei dstWidth, GLsizei dstHeight,
GLsizei stride, GLint pixelsize) {
unsigned char* rowDest = static_cast<unsigned char*>(dstBuffer);
const unsigned char* source = static_cast<const unsigned char*>(srcBuffer);
for (GLsizei h = 0; h < srcHeight; ++h) {
memcpy(rowDest, source, srcWidth * pixelsize);
rowDest += dstWidth * pixelsize;
source += stride;
}
GLsizei padHeight = srcHeight;
// Pad out an extra row of pixels so that edge filtering doesn't use garbage
// data
if (dstHeight > srcHeight) {
memcpy(rowDest, source - stride, srcWidth * pixelsize);
padHeight++;
}
// Pad out an extra column of pixels
if (dstWidth > srcWidth) {
rowDest = static_cast<unsigned char*>(dstBuffer) + srcWidth * pixelsize;
for (GLsizei h = 0; h < padHeight; ++h) {
memcpy(rowDest, rowDest - pixelsize, pixelsize);
rowDest += dstWidth * pixelsize;
}
}
}
// In both of these cases (for the Adreno at least) it is impossible
// to determine good or bad driver versions for POT texture uploads,
// so blacklist them all. Newer drivers use a different rendering
// string in the form "Adreno (TM) 200" and the drivers we've seen so
// far work fine with NPOT textures, so don't blacklist those until we
// have evidence of any problems with them.
bool ShouldUploadSubTextures(GLContext* gl) {
if (!gl->WorkAroundDriverBugs()) return true;
// There are certain GPUs that we don't want to use glTexSubImage2D on
// because that function can be very slow and/or buggy
if (gl->Renderer() == GLRenderer::Adreno200 ||
gl->Renderer() == GLRenderer::Adreno205) {
return false;
}
// On PowerVR glTexSubImage does a readback, so it will be slower
// than just doing a glTexImage2D() directly. i.e. 26ms vs 10ms
if (gl->Renderer() == GLRenderer::SGX540 ||
gl->Renderer() == GLRenderer::SGX530) {
return false;
}
return true;
}
static void TexSubImage2DWithUnpackSubimageGLES(
GLContext* gl, GLenum target, GLint level, GLint xoffset, GLint yoffset,
GLsizei width, GLsizei height, GLsizei stride, GLint pixelsize,
GLenum format, GLenum type, const GLvoid* pixels) {
gl->fPixelStorei(LOCAL_GL_UNPACK_ALIGNMENT,
std::min(GetAddressAlignment((ptrdiff_t)pixels),
GetAddressAlignment((ptrdiff_t)stride)));
// When using GL_UNPACK_ROW_LENGTH, we need to work around a Tegra
// driver crash where the driver apparently tries to read
// (stride - width * pixelsize) bytes past the end of the last input
// row. We only upload the first height-1 rows using GL_UNPACK_ROW_LENGTH,
// and then we upload the final row separately. See bug 697990.
int rowLength = stride / pixelsize;
if (gl->HasPBOState()) {
gl->fPixelStorei(LOCAL_GL_UNPACK_ROW_LENGTH, rowLength);
gl->fTexSubImage2D(target, level, xoffset, yoffset, width, height, format,
type, pixels);
gl->fPixelStorei(LOCAL_GL_UNPACK_ROW_LENGTH, 0);
} else {
gl->fPixelStorei(LOCAL_GL_UNPACK_ROW_LENGTH, rowLength);
gl->fTexSubImage2D(target, level, xoffset, yoffset, width, height - 1,
format, type, pixels);
gl->fPixelStorei(LOCAL_GL_UNPACK_ROW_LENGTH, 0);
gl->fTexSubImage2D(target, level, xoffset, yoffset + height - 1, width, 1,
format, type,
(const unsigned char*)pixels + (height - 1) * stride);
}
gl->fPixelStorei(LOCAL_GL_UNPACK_ALIGNMENT, 4);
}
static void TexSubImage2DWithoutUnpackSubimage(
GLContext* gl, GLenum target, GLint level, GLint xoffset, GLint yoffset,
GLsizei width, GLsizei height, GLsizei stride, GLint pixelsize,
GLenum format, GLenum type, const GLvoid* pixels) {
// Not using the whole row of texture data and GL_UNPACK_ROW_LENGTH
// isn't supported. We make a copy of the texture data we're using,
// such that we're using the whole row of data in the copy. This turns
// out to be more efficient than uploading row-by-row; see bug 698197.
// Width and height are never more than 16384. At 16Ki*16Ki, 4bpp is 1GiB, but
// if we allow 8bpp (or higher) here, that's 2GiB, which would overflow on
// 32-bit.
MOZ_ASSERT(width <= 16384);
MOZ_ASSERT(height <= 16384);
MOZ_ASSERT(pixelsize < 8);
const auto size = CheckedInt<size_t>(width) * height * pixelsize;
if (!size.isValid()) {
// This should never happen, but we use a defensive check.
MOZ_ASSERT_UNREACHABLE("Unacceptable size calculated.!");
return;
}
unsigned char* newPixels = new (fallible) unsigned char[size.value()];
if (newPixels) {
unsigned char* rowDest = newPixels;
const unsigned char* rowSource = (const unsigned char*)pixels;
for (int h = 0; h < height; h++) {
memcpy(rowDest, rowSource, width * pixelsize);
rowDest += width * pixelsize;
rowSource += stride;
}
stride = width * pixelsize;
gl->fPixelStorei(LOCAL_GL_UNPACK_ALIGNMENT,
std::min(GetAddressAlignment((ptrdiff_t)newPixels),
GetAddressAlignment((ptrdiff_t)stride)));
gl->fTexSubImage2D(target, level, xoffset, yoffset, width, height, format,
type, newPixels);
delete[] newPixels;
gl->fPixelStorei(LOCAL_GL_UNPACK_ALIGNMENT, 4);
} else {
// If we did not have sufficient memory for the required
// temporary buffer, then fall back to uploading row-by-row.
const unsigned char* rowSource = (const unsigned char*)pixels;
gl->fPixelStorei(LOCAL_GL_UNPACK_ALIGNMENT,
std::min(GetAddressAlignment((ptrdiff_t)pixels),
GetAddressAlignment((ptrdiff_t)stride)));
for (int i = 0; i < height; i++) {
gl->fTexSubImage2D(target, level, xoffset, yoffset + i, width, 1, format,
type, rowSource);
rowSource += stride;
}
gl->fPixelStorei(LOCAL_GL_UNPACK_ALIGNMENT, 4);
}
}
static void TexSubImage2DHelper(GLContext* gl, GLenum target, GLint level,
GLint xoffset, GLint yoffset, GLsizei width,
GLsizei height, GLsizei stride, GLint pixelsize,
GLenum format, GLenum type,
const GLvoid* pixels) {
if (gl->IsGLES()) {
if (stride == width * pixelsize) {
gl->fPixelStorei(LOCAL_GL_UNPACK_ALIGNMENT,
std::min(GetAddressAlignment((ptrdiff_t)pixels),
GetAddressAlignment((ptrdiff_t)stride)));
gl->fTexSubImage2D(target, level, xoffset, yoffset, width, height, format,
type, pixels);
gl->fPixelStorei(LOCAL_GL_UNPACK_ALIGNMENT, 4);
} else if (gl->IsExtensionSupported(GLContext::EXT_unpack_subimage) ||
gl->HasPBOState()) {
TexSubImage2DWithUnpackSubimageGLES(gl, target, level, xoffset, yoffset,
width, height, stride, pixelsize,
format, type, pixels);
} else {
TexSubImage2DWithoutUnpackSubimage(gl, target, level, xoffset, yoffset,
width, height, stride, pixelsize,
format, type, pixels);
}
} else {
// desktop GL (non-ES) path
gl->fPixelStorei(LOCAL_GL_UNPACK_ALIGNMENT,
std::min(GetAddressAlignment((ptrdiff_t)pixels),
GetAddressAlignment((ptrdiff_t)stride)));
int rowLength = stride / pixelsize;
gl->fPixelStorei(LOCAL_GL_UNPACK_ROW_LENGTH, rowLength);
gl->fTexSubImage2D(target, level, xoffset, yoffset, width, height, format,
type, pixels);
gl->fPixelStorei(LOCAL_GL_UNPACK_ROW_LENGTH, 0);
gl->fPixelStorei(LOCAL_GL_UNPACK_ALIGNMENT, 4);
}
}
static void TexImage2DHelper(GLContext* gl, GLenum target, GLint level,
GLint internalformat, GLsizei width,
GLsizei height, GLsizei stride, GLint pixelsize,
GLint border, GLenum format, GLenum type,
const GLvoid* pixels) {
if (gl->IsGLES()) {
NS_ASSERTION(
format == (GLenum)internalformat,
"format and internalformat not the same for glTexImage2D on GLES2");
MOZ_ASSERT(width >= 0 && height >= 0);
if (!CanUploadNonPowerOfTwo(gl) &&
(stride != width * pixelsize || !IsPowerOfTwo((uint32_t)width) ||
!IsPowerOfTwo((uint32_t)height))) {
// Pad out texture width and height to the next power of two
// as we don't support/want non power of two texture uploads
GLsizei paddedWidth = RoundUpPow2((uint32_t)width);
GLsizei paddedHeight = RoundUpPow2((uint32_t)height);
// Width and height are never more than 16384. At 16Ki*16Ki, 4bpp is 1GiB,
// but if we allow 8bpp (or higher) here, that's 2GiB, which would
// overflow on 32-bit.
MOZ_ASSERT(width <= 16384);
MOZ_ASSERT(height <= 16384);
MOZ_ASSERT(pixelsize < 8);
const auto size =
CheckedInt<size_t>(paddedWidth) * paddedHeight * pixelsize;
if (!size.isValid()) {
// This should never happen, but we use a defensive check.
MOZ_ASSERT_UNREACHABLE("Unacceptable size calculated.!");
return;
}
GLvoid* paddedPixels = new unsigned char[size.value()];
// Pad out texture data to be in a POT sized buffer for uploading to
// a POT sized texture
CopyAndPadTextureData(pixels, paddedPixels, width, height, paddedWidth,
paddedHeight, stride, pixelsize);
gl->fPixelStorei(
LOCAL_GL_UNPACK_ALIGNMENT,
std::min(GetAddressAlignment((ptrdiff_t)paddedPixels),
GetAddressAlignment((ptrdiff_t)paddedWidth * pixelsize)));
gl->fTexImage2D(target, border, internalformat, paddedWidth, paddedHeight,
border, format, type, paddedPixels);
gl->fPixelStorei(LOCAL_GL_UNPACK_ALIGNMENT, 4);
delete[] static_cast<unsigned char*>(paddedPixels);
return;
}
if (stride == width * pixelsize) {
gl->fPixelStorei(LOCAL_GL_UNPACK_ALIGNMENT,
std::min(GetAddressAlignment((ptrdiff_t)pixels),
GetAddressAlignment((ptrdiff_t)stride)));
gl->fTexImage2D(target, border, internalformat, width, height, border,
format, type, pixels);
gl->fPixelStorei(LOCAL_GL_UNPACK_ALIGNMENT, 4);
} else {
// Use GLES-specific workarounds for GL_UNPACK_ROW_LENGTH; these are
// implemented in TexSubImage2D.
gl->fTexImage2D(target, border, internalformat, width, height, border,
format, type, nullptr);
TexSubImage2DHelper(gl, target, level, 0, 0, width, height, stride,
pixelsize, format, type, pixels);
}
} else {
// desktop GL (non-ES) path
gl->fPixelStorei(LOCAL_GL_UNPACK_ALIGNMENT,
std::min(GetAddressAlignment((ptrdiff_t)pixels),
GetAddressAlignment((ptrdiff_t)stride)));
int rowLength = stride / pixelsize;
gl->fPixelStorei(LOCAL_GL_UNPACK_ROW_LENGTH, rowLength);
gl->fTexImage2D(target, level, internalformat, width, height, border,
format, type, pixels);
gl->fPixelStorei(LOCAL_GL_UNPACK_ROW_LENGTH, 0);
gl->fPixelStorei(LOCAL_GL_UNPACK_ALIGNMENT, 4);
}
}
SurfaceFormat UploadImageDataToTexture(
GLContext* gl, unsigned char* aData, const gfx::IntSize& aDataSize,
const IntPoint& aDstOffset,
int32_t aStride, SurfaceFormat aFormat, const nsIntRegion& aDstRegion,
GLuint aTexture, const gfx::IntSize& aSize, size_t* aOutUploadSize,
bool aNeedInit, GLenum aTextureUnit, GLenum aTextureTarget) {
gl->MakeCurrent();
gl->fActiveTexture(aTextureUnit);
gl->fBindTexture(aTextureTarget, aTexture);
GLenum format = 0;
GLenum internalFormat = 0;
GLenum type = 0;
int32_t pixelSize = BytesPerPixel(aFormat);
SurfaceFormat surfaceFormat = gfx::SurfaceFormat::UNKNOWN;
MOZ_ASSERT(gl->GetPreferredARGB32Format() == LOCAL_GL_BGRA ||
gl->GetPreferredARGB32Format() == LOCAL_GL_RGBA);
switch (aFormat) {
case SurfaceFormat::B8G8R8A8:
if (gl->GetPreferredARGB32Format() == LOCAL_GL_BGRA) {
format = LOCAL_GL_BGRA;
surfaceFormat = SurfaceFormat::R8G8B8A8;
type = LOCAL_GL_UNSIGNED_INT_8_8_8_8_REV;
} else {
format = LOCAL_GL_RGBA;
surfaceFormat = SurfaceFormat::B8G8R8A8;
type = LOCAL_GL_UNSIGNED_BYTE;
}
internalFormat = LOCAL_GL_RGBA;
break;
case SurfaceFormat::B8G8R8X8:
// Treat BGRX surfaces as BGRA except for the surface
// format used.
if (gl->GetPreferredARGB32Format() == LOCAL_GL_BGRA) {
format = LOCAL_GL_BGRA;
surfaceFormat = SurfaceFormat::R8G8B8X8;
type = LOCAL_GL_UNSIGNED_INT_8_8_8_8_REV;
} else {
format = LOCAL_GL_RGBA;
surfaceFormat = SurfaceFormat::B8G8R8X8;
type = LOCAL_GL_UNSIGNED_BYTE;
}
internalFormat = LOCAL_GL_RGBA;
break;
case SurfaceFormat::R8G8B8A8:
if (gl->GetPreferredARGB32Format() == LOCAL_GL_BGRA) {
// Upload our RGBA as BGRA, but store that the uploaded format is
// BGRA. (sample from R to get B)
format = LOCAL_GL_BGRA;
type = LOCAL_GL_UNSIGNED_INT_8_8_8_8_REV;
surfaceFormat = SurfaceFormat::B8G8R8A8;
} else {
format = LOCAL_GL_RGBA;
type = LOCAL_GL_UNSIGNED_BYTE;
surfaceFormat = SurfaceFormat::R8G8B8A8;
}
internalFormat = LOCAL_GL_RGBA;
break;
case SurfaceFormat::R8G8B8X8:
// Treat RGBX surfaces as RGBA except for the surface
// format used.
if (gl->GetPreferredARGB32Format() == LOCAL_GL_BGRA) {
format = LOCAL_GL_BGRA;
type = LOCAL_GL_UNSIGNED_INT_8_8_8_8_REV;
surfaceFormat = SurfaceFormat::B8G8R8X8;
} else {
format = LOCAL_GL_RGBA;
type = LOCAL_GL_UNSIGNED_BYTE;
surfaceFormat = SurfaceFormat::R8G8B8X8;
}
internalFormat = LOCAL_GL_RGBA;
break;
case SurfaceFormat::R5G6B5_UINT16:
internalFormat = format = LOCAL_GL_RGB;
type = LOCAL_GL_UNSIGNED_SHORT_5_6_5;
surfaceFormat = SurfaceFormat::R5G6B5_UINT16;
break;
case SurfaceFormat::A8:
if (gl->IsGLES()) {
format = LOCAL_GL_LUMINANCE;
internalFormat = LOCAL_GL_LUMINANCE;
} else {
format = LOCAL_GL_RED;
internalFormat = LOCAL_GL_R8;
}
type = LOCAL_GL_UNSIGNED_BYTE;
// We don't have a specific luminance shader
surfaceFormat = SurfaceFormat::A8;
break;
case SurfaceFormat::A16:
if (gl->IsGLES()) {
format = LOCAL_GL_LUMINANCE;
internalFormat = LOCAL_GL_LUMINANCE16;
} else {
format = LOCAL_GL_RED;
internalFormat = LOCAL_GL_R16;
}
type = LOCAL_GL_UNSIGNED_SHORT;
// We don't have a specific luminance shader
surfaceFormat = SurfaceFormat::A8;
pixelSize = 2;
break;
default:
MOZ_ASSERT_UNREACHABLE("Unhandled image surface format!");
}
if (aOutUploadSize) {
*aOutUploadSize = 0;
}
if (surfaceFormat == gfx::SurfaceFormat::UNKNOWN) {
return gfx::SurfaceFormat::UNKNOWN;
}
// We can only skip SubTextures if aOffset = 0 because we need the whole
// buffer
if (aNeedInit || (!ShouldUploadSubTextures(gl) && aDstOffset == IntPoint())) {
if (!CheckUploadBounds(aSize, aDataSize, IntPoint())) {
return SurfaceFormat::UNKNOWN;
}
// If the texture needs initialized, or we are unable to
// upload sub textures, then initialize and upload the entire
// texture.
TexImage2DHelper(gl, aTextureTarget, 0, internalFormat, aSize.width,
aSize.height, aStride, pixelSize, 0, format, type, aData);
if (aOutUploadSize && aNeedInit) {
uint32_t texelSize = GetBytesPerTexel(internalFormat, type);
size_t numTexels = size_t(aSize.width) * size_t(aSize.height);
*aOutUploadSize += texelSize * numTexels;
}
} else {
// Upload each rect in the region to the texture
for (auto iter = aDstRegion.RectIter(); !iter.Done(); iter.Next()) {
IntRect rect = iter.Get();
if (!CheckUploadBounds(rect.Size(), aDataSize, rect.TopLeft())) {
return SurfaceFormat::UNKNOWN;
}
const unsigned char* rectData =
aData + DataOffset(rect.TopLeft(), aStride, aFormat);
rect += aDstOffset;
TexSubImage2DHelper(gl, aTextureTarget, 0, rect.X(), rect.Y(),
rect.Width(), rect.Height(), aStride, pixelSize,
format, type, rectData);
}
}
return surfaceFormat;
}
SurfaceFormat UploadSurfaceToTexture(GLContext* gl, DataSourceSurface* aSurface,
const nsIntRegion& aDstRegion,
GLuint aTexture, const gfx::IntSize& aSize,
size_t* aOutUploadSize, bool aNeedInit,
const gfx::IntPoint& aSrcOffset,
const gfx::IntPoint& aDstOffset,
GLenum aTextureUnit,
GLenum aTextureTarget) {
DataSourceSurface::ScopedMap map(aSurface, DataSourceSurface::READ);
int32_t stride = map.GetStride();
SurfaceFormat format = aSurface->GetFormat();
gfx::IntSize size = aSurface->GetSize();
// only fail if we'll need the entire surface for initialization
if (aNeedInit && !CheckUploadBounds(aSize, size, aSrcOffset)) {
return SurfaceFormat::UNKNOWN;
}
unsigned char* data = map.GetData() + DataOffset(aSrcOffset, stride, format);
size.width -= aSrcOffset.x;
size.height -= aSrcOffset.y;
return UploadImageDataToTexture(gl, data, size, aDstOffset, stride, format, aDstRegion,
aTexture, aSize, aOutUploadSize, aNeedInit,
aTextureUnit, aTextureTarget);
}
bool CanUploadNonPowerOfTwo(GLContext* gl) {
if (!gl->WorkAroundDriverBugs()) return true;
// Some GPUs driver crash when uploading non power of two 565 textures.
return gl->Renderer() != GLRenderer::Adreno200 &&
gl->Renderer() != GLRenderer::Adreno205;
}
} // namespace gl
} // namespace mozilla