Bug 1687157 - Support 24-bit depth in SWGL. r=jrmuizel

It is possible to support 24-bit depth in SWGL without a large performance hit
and without increasing the size of the depth buffer. Since depth runs already
have 32-bit entries, if we carefully limit the depth run size to 8 bits we have
24 bits left over to store the actual depth value.

Differential Revision: https://phabricator.services.mozilla.com/D107409
This commit is contained in:
Lee Salzman 2021-03-09 02:01:26 +00:00
parent f36c63a891
commit 7390f42dc3
7 changed files with 4038 additions and 49 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -27,3 +27,4 @@ fuzzy-if(webrender,2-7,17500-36908) == 1523776.html 1523776-ref.html
skip-if(!asyncPan||!webrender||Android) fuzzy-if(winWidget,94-94,3415-3419) fuzzy-if(cocoaWidget&&swgl,1-1,1-1) pref(apz.allow_zooming,true) == picture-caching-on-async-zoom.html picture-caching-on-async-zoom.html?ref
pref(apz.allow_zooming,true) == 1662062-1-no-blurry.html 1662062-1-ref.html
== 1681610.html 1681610-ref.html
skip-if(!webrender) fuzzy-if(webrender,0-255,0-60) == 1687157-1.html 1687157-1-ref.html

View File

@ -400,8 +400,8 @@ struct Texture {
uint32_t clear_val = 0;
uint32_t* cleared_rows = nullptr;
void init_depth_runs(uint16_t z);
void fill_depth_runs(uint16_t z, const IntRect& scissor);
void init_depth_runs(uint32_t z);
void fill_depth_runs(uint32_t z, const IntRect& scissor);
void enable_delayed_clear(uint32_t val) {
delay_clear = height;
@ -472,7 +472,7 @@ struct Texture {
// just to be safe. All other texture types and use-cases should be
// safe to omit padding.
size_t padding =
internal_format == GL_DEPTH_COMPONENT16 || max(width, min_width) < 2
internal_format == GL_DEPTH_COMPONENT24 || max(width, min_width) < 2
? sizeof(Float)
: 0;
char* new_buf = (char*)realloc(buf, size + padding);
@ -1562,7 +1562,7 @@ void PixelStorei(GLenum name, GLint param) {
static GLenum remap_internal_format(GLenum format) {
switch (format) {
case GL_DEPTH_COMPONENT:
return GL_DEPTH_COMPONENT16;
return GL_DEPTH_COMPONENT24;
case GL_RGBA:
return GL_RGBA8;
case GL_RED:
@ -1854,10 +1854,11 @@ void RenderbufferStorage(GLenum target, GLenum internal_format, GLsizei width,
}
switch (internal_format) {
case GL_DEPTH_COMPONENT:
case GL_DEPTH_COMPONENT16:
case GL_DEPTH_COMPONENT24:
case GL_DEPTH_COMPONENT32:
// Force depth format to 16 bits...
internal_format = GL_DEPTH_COMPONENT16;
// Force depth format to 24 bits...
internal_format = GL_DEPTH_COMPONENT24;
break;
}
set_tex_storage(ctx->textures[r.texture], internal_format, width, height);
@ -2240,7 +2241,7 @@ void InitDefaultFramebuffer(int x, int y, int width, int height, int stride,
}
// Ensure dimensions of the depth buffer match the color buffer.
Texture& depthtex = ctx->textures[fb.depth_attachment];
set_tex_storage(depthtex, GL_DEPTH_COMPONENT16, width, height);
set_tex_storage(depthtex, GL_DEPTH_COMPONENT24, width, height);
depthtex.offset = IntPoint(x, y);
}
@ -2292,19 +2293,16 @@ void ClearTexSubImage(GLuint texture, GLint level, GLint xoffset, GLint yoffset,
}
assert(zoffset == 0 && depth == 1);
IntRect scissor = {xoffset, yoffset, xoffset + width, yoffset + height};
if (t.internal_format == GL_DEPTH_COMPONENT16) {
uint16_t value = 0xFFFF;
if (t.internal_format == GL_DEPTH_COMPONENT24) {
uint32_t value = 0xFFFFFF;
switch (format) {
case GL_DEPTH_COMPONENT:
switch (type) {
case GL_DOUBLE:
value = uint16_t(*(const GLdouble*)data * 0xFFFF);
value = uint32_t(*(const GLdouble*)data * 0xFFFFFF);
break;
case GL_FLOAT:
value = uint16_t(*(const GLfloat*)data * 0xFFFF);
break;
case GL_UNSIGNED_SHORT:
value = uint16_t(*(const GLushort*)data);
value = uint32_t(*(const GLfloat*)data * 0xFFFFFF);
break;
default:
assert(false);
@ -2629,7 +2627,7 @@ void DrawElementsInstanced(GLenum mode, GLsizei count, GLenum type,
colortex.internal_format == GL_R8);
Texture& depthtex = ctx->textures[ctx->depthtest ? fb.depth_attachment : 0];
if (depthtex.buf) {
assert(depthtex.internal_format == GL_DEPTH_COMPONENT16);
assert(depthtex.internal_format == GL_DEPTH_COMPONENT24);
assert(colortex.width == depthtex.width &&
colortex.height == depthtex.height);
assert(colortex.offset == depthtex.offset);

View File

@ -24,35 +24,42 @@
// the DepthRun struct can be interpreted as a sign-extended int32_t depth. It
// is then possible to just treat the entire row as an array of int32_t depth
// samples that can be processed with SIMD comparisons, since the count field
// behaves as just the sign-extension of the depth field.
// When a depth buffer is cleared, each row is initialized to a single run
// behaves as just the sign-extension of the depth field. The count field is
// limited to 8 bits so that we can support depth values up to 24 bits.
// When a depth buffer is cleared, each row is initialized to a maximal runs
// spanning the entire row. In the normal case, the depth buffer will continue
// to manage itself as a list of runs. If perspective or discard is used for
// a given row, the row will be converted to the flattened representation to
// support it, after which it will only ever revert back to runs if the depth
// buffer is cleared.
// The largest 24-bit depth value supported.
constexpr uint32_t MAX_DEPTH_VALUE = 0xFFFFFF;
// The longest 8-bit depth run that is supported, aligned to SIMD chunk size.
constexpr uint32_t MAX_DEPTH_RUN = 255 & ~3;
struct DepthRun {
// Ensure that depth always occupies the LSB and count the MSB so that we
// can sign-extend depth just by setting count to zero, marking it flat.
// When count is non-zero, then this is interpreted as an actual run and
// depth is read in isolation.
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
uint16_t depth;
uint16_t count;
uint32_t depth : 24;
uint32_t count : 8;
#else
uint16_t count;
uint16_t depth;
uint32_t count : 8;
uint32_t depth : 24;
#endif
DepthRun() = default;
DepthRun(uint16_t depth, uint16_t count) : depth(depth), count(count) {}
DepthRun(uint32_t depth, uint8_t count) : depth(depth), count(count) {}
// If count is zero, this is actually a flat depth sample rather than a run.
bool is_flat() const { return !count; }
// Compare a source depth from rasterization with a stored depth value.
template <int FUNC>
ALWAYS_INLINE bool compare(uint16_t src) const {
ALWAYS_INLINE bool compare(uint32_t src) const {
switch (FUNC) {
case GL_LEQUAL:
return src <= depth;
@ -67,6 +74,22 @@ struct DepthRun {
}
};
// Fills runs at the given position with the given depth up to the span width.
static ALWAYS_INLINE void set_depth_runs(DepthRun* runs, uint32_t depth,
uint32_t width) {
// If the width exceeds the maximum run size, then we need to output clamped
// runs first.
for (; width >= MAX_DEPTH_RUN;
runs += MAX_DEPTH_RUN, width -= MAX_DEPTH_RUN) {
*runs = DepthRun(depth, MAX_DEPTH_RUN);
}
// If there are still any left over samples to fill under the maximum run
// size, then output one last run for them.
if (width > 0) {
*runs = DepthRun(depth, width);
}
}
// A cursor for reading and modifying a row's depth run array. It locates
// and iterates through a desired span within all the runs, testing if
// the depth of this span passes or fails the depth test against existing
@ -128,7 +151,7 @@ struct DepthCursor {
// so it is safe for the caller to stop processing any more regions in this
// row.
template <int FUNC>
int skip_failed(uint16_t val) {
int skip_failed(uint32_t val) {
assert(valid());
DepthRun* prev = start;
while (cur < end) {
@ -143,7 +166,7 @@ struct DepthCursor {
// Helper to convert function parameters into template parameters to hoist
// some checks out of inner loops.
ALWAYS_INLINE int skip_failed(uint16_t val, GLenum func) {
ALWAYS_INLINE int skip_failed(uint32_t val, GLenum func) {
switch (func) {
case GL_LEQUAL:
return skip_failed<GL_LEQUAL>(val);
@ -162,7 +185,7 @@ struct DepthCursor {
// to represent this new region that passed the depth test. The length of the
// region is returned.
template <int FUNC, bool MASK>
int check_passed(uint16_t val) {
int check_passed(uint32_t val) {
assert(valid());
DepthRun* prev = cur;
while (cur < end) {
@ -201,7 +224,7 @@ struct DepthCursor {
prev->count = start - prev;
}
// Create a new run for the entirety of the passed samples.
*start = DepthRun(val, passed);
set_depth_runs(start, val, passed);
}
start = cur;
return passed;
@ -210,7 +233,7 @@ struct DepthCursor {
// Helper to convert function parameters into template parameters to hoist
// some checks out of inner loops.
template <bool MASK>
ALWAYS_INLINE int check_passed(uint16_t val, GLenum func) {
ALWAYS_INLINE int check_passed(uint32_t val, GLenum func) {
switch (func) {
case GL_LEQUAL:
return check_passed<GL_LEQUAL, MASK>(val);
@ -222,37 +245,37 @@ struct DepthCursor {
}
}
ALWAYS_INLINE int check_passed(uint16_t val, GLenum func, bool mask) {
ALWAYS_INLINE int check_passed(uint32_t val, GLenum func, bool mask) {
return mask ? check_passed<true>(val, func)
: check_passed<false>(val, func);
}
// Fill a region of runs with a given depth value, bypassing any depth test.
ALWAYS_INLINE void fill(uint16_t depth) {
ALWAYS_INLINE void fill(uint32_t depth) {
check_passed<GL_ALWAYS, true>(depth);
}
};
// Initialize a depth texture by setting the first run in each row to encompass
// the entire row.
void Texture::init_depth_runs(uint16_t depth) {
void Texture::init_depth_runs(uint32_t depth) {
if (!buf) return;
DepthRun* runs = (DepthRun*)buf;
for (int y = 0; y < height; y++) {
runs[0] = DepthRun(depth, width);
set_depth_runs(runs, depth, width);
runs += stride() / sizeof(DepthRun);
}
set_cleared(true);
}
// Fill a portion of the run array with flattened depth samples.
static ALWAYS_INLINE void fill_depth_run(DepthRun* dst, size_t n,
uint16_t depth) {
fill_n((uint32_t*)dst, n, uint32_t(depth));
static ALWAYS_INLINE void fill_flat_depth(DepthRun* dst, size_t n,
uint32_t depth) {
fill_n((uint32_t*)dst, n, depth);
}
// Fills a scissored region of a depth texture with a given depth.
void Texture::fill_depth_runs(uint16_t depth, const IntRect& scissor) {
void Texture::fill_depth_runs(uint32_t depth, const IntRect& scissor) {
if (!buf) return;
assert(cleared());
IntRect bb = bounds().intersection(scissor - offset);
@ -261,10 +284,10 @@ void Texture::fill_depth_runs(uint16_t depth, const IntRect& scissor) {
if (bb.width() >= width) {
// If the scissor region encompasses the entire row, reset the row to a
// single run encompassing the entire row.
runs[0] = DepthRun(depth, width);
set_depth_runs(runs, depth, width);
} else if (runs->is_flat()) {
// If the row is flattened, just directly fill the portion of the row.
fill_depth_run(&runs[bb.x0], bb.width(), depth);
fill_flat_depth(&runs[bb.x0], bb.width(), depth);
} else {
// Otherwise, if we are still using runs, then set up a cursor to fill
// it with depth runs.
@ -320,7 +343,7 @@ static ALWAYS_INLINE bool check_depth(I32 src, DepthRun* zbuf, ZMask& outmask,
}
static ALWAYS_INLINE I32 packDepth() {
return cast(fragment_shader->gl_FragCoord.z * 0xFFFF);
return cast(fragment_shader->gl_FragCoord.z * MAX_DEPTH_VALUE);
}
static ALWAYS_INLINE void discard_depth(I32 src, DepthRun* zbuf, I32 mask) {
@ -547,7 +570,7 @@ static void flatten_depth_runs(DepthRun* runs, size_t width) {
}
while (width > 0) {
size_t n = runs->count;
fill_depth_run(runs, n, runs->depth);
fill_flat_depth(runs, n, runs->depth);
runs += n;
width -= n;
}
@ -556,7 +579,7 @@ static void flatten_depth_runs(DepthRun* runs, size_t width) {
// Helper function for drawing passed depth runs within the depth buffer.
// Flattened depth (perspective or discard) is not supported.
template <typename P>
static ALWAYS_INLINE void draw_depth_span(uint16_t z, P* buf,
static ALWAYS_INLINE void draw_depth_span(uint32_t z, P* buf,
DepthCursor& cursor) {
for (;;) {
// Get the span that passes the depth test. Assume on entry that
@ -614,7 +637,7 @@ template <bool DISCARD, bool W, typename P, typename Z>
static ALWAYS_INLINE void draw_span(P* buf, DepthRun* depth, int span, Z z) {
if (depth) {
// Depth testing is enabled. If perspective is used, Z values will vary
// across the span, we use packDepth to generate 16-bit Z values suitable
// across the span, we use packDepth to generate packed Z values suitable
// for depth testing based on current values from gl_FragCoord.z.
// Otherwise, for the no-perspective case, we just use the provided Z.
// Process 4-pixel chunks first.
@ -662,7 +685,7 @@ static ALWAYS_INLINE void draw_span(P* buf, DepthRun* depth, int span, Z z) {
template <typename P>
static inline void prepare_row(Texture& colortex, int y, int startx, int endx,
bool use_discard, DepthRun* depth,
uint16_t z = 0, DepthCursor* cursor = nullptr) {
uint32_t z = 0, DepthCursor* cursor = nullptr) {
assert(colortex.delay_clear > 0);
// Delayed clear is enabled for the color buffer. Check if needs clear.
uint32_t& mask = colortex.cleared_rows[y / 32];
@ -735,7 +758,7 @@ static ALWAYS_INLINE bool checkIfEdgesFlipped(T l0, T l1, T r0, T r1) {
// assumed to be ordered in either CW or CCW to support this, but currently
// both orders (CW and CCW) are supported and equivalent.
template <typename P>
static inline void draw_quad_spans(int nump, Point2D p[4], uint16_t z,
static inline void draw_quad_spans(int nump, Point2D p[4], uint32_t z,
Interpolants interp_outs[4],
Texture& colortex, Texture& depthtex,
const ClipRect& clipRect) {
@ -1534,7 +1557,7 @@ static void draw_quad(int nump, Texture& colortex, Texture& depthtex) {
}
// Since Z doesn't need to be interpolated, just set the fragment shader's
// Z and W values here, once and for all fragment shader invocations.
uint16_t z = uint16_t(0xFFFF * screenZ);
uint32_t z = uint32_t(MAX_DEPTH_VALUE * screenZ);
fragment_shader->gl_FragCoord.z = screenZ;
fragment_shader->gl_FragCoord.w = w;

View File

@ -1517,7 +1517,7 @@ impl Compositor for SwCompositor {
// tile size is not bigger than what was previously allocated.
self.gl.set_texture_buffer(
self.depth_id,
gl::DEPTH_COMPONENT16,
gl::DEPTH_COMPONENT,
valid_rect.size.width,
valid_rect.size.height,
0,

View File

@ -1525,11 +1525,14 @@ impl Device {
};
let is_software_webrender = renderer_name.starts_with("Software WebRender");
let (depth_format, upload_method) = if is_software_webrender {
(gl::DEPTH_COMPONENT16, UploadMethod::Immediate)
let upload_method = if is_software_webrender {
// Uploads in SWGL generally reduce to simple memory copies.
UploadMethod::Immediate
} else {
(gl::DEPTH_COMPONENT24, upload_method)
upload_method
};
// Prefer 24-bit depth format. While 16-bit depth also works, it may exhaust depth ids easily.
let depth_format = gl::DEPTH_COMPONENT24;
info!("GL texture cache {:?}, bgra {:?} swizzle {:?}, texture storage {:?}, depth {:?}",
color_formats, bgra_formats, bgra8_sampling_swizzle, texture_storage_usage, depth_format);