mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-24 21:31:04 +00:00
Bug 1687157 - Support 24-bit depth in SWGL. r=jrmuizel
It is possible to support 24-bit depth in SWGL without a large performance hit and without increasing the size of the depth buffer. Since depth runs already have 32-bit entries, if we carefully limit the depth run size to 8 bits we have 24 bits left over to store the actual depth value. Differential Revision: https://phabricator.services.mozilla.com/D107409
This commit is contained in:
parent
f36c63a891
commit
7390f42dc3
1982
gfx/tests/reftest/1687157-1-ref.html
Normal file
1982
gfx/tests/reftest/1687157-1-ref.html
Normal file
File diff suppressed because it is too large
Load Diff
1982
gfx/tests/reftest/1687157-1.html
Normal file
1982
gfx/tests/reftest/1687157-1.html
Normal file
File diff suppressed because it is too large
Load Diff
@ -27,3 +27,4 @@ fuzzy-if(webrender,2-7,17500-36908) == 1523776.html 1523776-ref.html
|
||||
skip-if(!asyncPan||!webrender||Android) fuzzy-if(winWidget,94-94,3415-3419) fuzzy-if(cocoaWidget&&swgl,1-1,1-1) pref(apz.allow_zooming,true) == picture-caching-on-async-zoom.html picture-caching-on-async-zoom.html?ref
|
||||
pref(apz.allow_zooming,true) == 1662062-1-no-blurry.html 1662062-1-ref.html
|
||||
== 1681610.html 1681610-ref.html
|
||||
skip-if(!webrender) fuzzy-if(webrender,0-255,0-60) == 1687157-1.html 1687157-1-ref.html
|
||||
|
@ -400,8 +400,8 @@ struct Texture {
|
||||
uint32_t clear_val = 0;
|
||||
uint32_t* cleared_rows = nullptr;
|
||||
|
||||
void init_depth_runs(uint16_t z);
|
||||
void fill_depth_runs(uint16_t z, const IntRect& scissor);
|
||||
void init_depth_runs(uint32_t z);
|
||||
void fill_depth_runs(uint32_t z, const IntRect& scissor);
|
||||
|
||||
void enable_delayed_clear(uint32_t val) {
|
||||
delay_clear = height;
|
||||
@ -472,7 +472,7 @@ struct Texture {
|
||||
// just to be safe. All other texture types and use-cases should be
|
||||
// safe to omit padding.
|
||||
size_t padding =
|
||||
internal_format == GL_DEPTH_COMPONENT16 || max(width, min_width) < 2
|
||||
internal_format == GL_DEPTH_COMPONENT24 || max(width, min_width) < 2
|
||||
? sizeof(Float)
|
||||
: 0;
|
||||
char* new_buf = (char*)realloc(buf, size + padding);
|
||||
@ -1562,7 +1562,7 @@ void PixelStorei(GLenum name, GLint param) {
|
||||
static GLenum remap_internal_format(GLenum format) {
|
||||
switch (format) {
|
||||
case GL_DEPTH_COMPONENT:
|
||||
return GL_DEPTH_COMPONENT16;
|
||||
return GL_DEPTH_COMPONENT24;
|
||||
case GL_RGBA:
|
||||
return GL_RGBA8;
|
||||
case GL_RED:
|
||||
@ -1854,10 +1854,11 @@ void RenderbufferStorage(GLenum target, GLenum internal_format, GLsizei width,
|
||||
}
|
||||
switch (internal_format) {
|
||||
case GL_DEPTH_COMPONENT:
|
||||
case GL_DEPTH_COMPONENT16:
|
||||
case GL_DEPTH_COMPONENT24:
|
||||
case GL_DEPTH_COMPONENT32:
|
||||
// Force depth format to 16 bits...
|
||||
internal_format = GL_DEPTH_COMPONENT16;
|
||||
// Force depth format to 24 bits...
|
||||
internal_format = GL_DEPTH_COMPONENT24;
|
||||
break;
|
||||
}
|
||||
set_tex_storage(ctx->textures[r.texture], internal_format, width, height);
|
||||
@ -2240,7 +2241,7 @@ void InitDefaultFramebuffer(int x, int y, int width, int height, int stride,
|
||||
}
|
||||
// Ensure dimensions of the depth buffer match the color buffer.
|
||||
Texture& depthtex = ctx->textures[fb.depth_attachment];
|
||||
set_tex_storage(depthtex, GL_DEPTH_COMPONENT16, width, height);
|
||||
set_tex_storage(depthtex, GL_DEPTH_COMPONENT24, width, height);
|
||||
depthtex.offset = IntPoint(x, y);
|
||||
}
|
||||
|
||||
@ -2292,19 +2293,16 @@ void ClearTexSubImage(GLuint texture, GLint level, GLint xoffset, GLint yoffset,
|
||||
}
|
||||
assert(zoffset == 0 && depth == 1);
|
||||
IntRect scissor = {xoffset, yoffset, xoffset + width, yoffset + height};
|
||||
if (t.internal_format == GL_DEPTH_COMPONENT16) {
|
||||
uint16_t value = 0xFFFF;
|
||||
if (t.internal_format == GL_DEPTH_COMPONENT24) {
|
||||
uint32_t value = 0xFFFFFF;
|
||||
switch (format) {
|
||||
case GL_DEPTH_COMPONENT:
|
||||
switch (type) {
|
||||
case GL_DOUBLE:
|
||||
value = uint16_t(*(const GLdouble*)data * 0xFFFF);
|
||||
value = uint32_t(*(const GLdouble*)data * 0xFFFFFF);
|
||||
break;
|
||||
case GL_FLOAT:
|
||||
value = uint16_t(*(const GLfloat*)data * 0xFFFF);
|
||||
break;
|
||||
case GL_UNSIGNED_SHORT:
|
||||
value = uint16_t(*(const GLushort*)data);
|
||||
value = uint32_t(*(const GLfloat*)data * 0xFFFFFF);
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
@ -2629,7 +2627,7 @@ void DrawElementsInstanced(GLenum mode, GLsizei count, GLenum type,
|
||||
colortex.internal_format == GL_R8);
|
||||
Texture& depthtex = ctx->textures[ctx->depthtest ? fb.depth_attachment : 0];
|
||||
if (depthtex.buf) {
|
||||
assert(depthtex.internal_format == GL_DEPTH_COMPONENT16);
|
||||
assert(depthtex.internal_format == GL_DEPTH_COMPONENT24);
|
||||
assert(colortex.width == depthtex.width &&
|
||||
colortex.height == depthtex.height);
|
||||
assert(colortex.offset == depthtex.offset);
|
||||
|
@ -24,35 +24,42 @@
|
||||
// the DepthRun struct can be interpreted as a sign-extended int32_t depth. It
|
||||
// is then possible to just treat the entire row as an array of int32_t depth
|
||||
// samples that can be processed with SIMD comparisons, since the count field
|
||||
// behaves as just the sign-extension of the depth field.
|
||||
// When a depth buffer is cleared, each row is initialized to a single run
|
||||
// behaves as just the sign-extension of the depth field. The count field is
|
||||
// limited to 8 bits so that we can support depth values up to 24 bits.
|
||||
// When a depth buffer is cleared, each row is initialized to a maximal runs
|
||||
// spanning the entire row. In the normal case, the depth buffer will continue
|
||||
// to manage itself as a list of runs. If perspective or discard is used for
|
||||
// a given row, the row will be converted to the flattened representation to
|
||||
// support it, after which it will only ever revert back to runs if the depth
|
||||
// buffer is cleared.
|
||||
|
||||
// The largest 24-bit depth value supported.
|
||||
constexpr uint32_t MAX_DEPTH_VALUE = 0xFFFFFF;
|
||||
// The longest 8-bit depth run that is supported, aligned to SIMD chunk size.
|
||||
constexpr uint32_t MAX_DEPTH_RUN = 255 & ~3;
|
||||
|
||||
struct DepthRun {
|
||||
// Ensure that depth always occupies the LSB and count the MSB so that we
|
||||
// can sign-extend depth just by setting count to zero, marking it flat.
|
||||
// When count is non-zero, then this is interpreted as an actual run and
|
||||
// depth is read in isolation.
|
||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
uint16_t depth;
|
||||
uint16_t count;
|
||||
uint32_t depth : 24;
|
||||
uint32_t count : 8;
|
||||
#else
|
||||
uint16_t count;
|
||||
uint16_t depth;
|
||||
uint32_t count : 8;
|
||||
uint32_t depth : 24;
|
||||
#endif
|
||||
|
||||
DepthRun() = default;
|
||||
DepthRun(uint16_t depth, uint16_t count) : depth(depth), count(count) {}
|
||||
DepthRun(uint32_t depth, uint8_t count) : depth(depth), count(count) {}
|
||||
|
||||
// If count is zero, this is actually a flat depth sample rather than a run.
|
||||
bool is_flat() const { return !count; }
|
||||
|
||||
// Compare a source depth from rasterization with a stored depth value.
|
||||
template <int FUNC>
|
||||
ALWAYS_INLINE bool compare(uint16_t src) const {
|
||||
ALWAYS_INLINE bool compare(uint32_t src) const {
|
||||
switch (FUNC) {
|
||||
case GL_LEQUAL:
|
||||
return src <= depth;
|
||||
@ -67,6 +74,22 @@ struct DepthRun {
|
||||
}
|
||||
};
|
||||
|
||||
// Fills runs at the given position with the given depth up to the span width.
|
||||
static ALWAYS_INLINE void set_depth_runs(DepthRun* runs, uint32_t depth,
|
||||
uint32_t width) {
|
||||
// If the width exceeds the maximum run size, then we need to output clamped
|
||||
// runs first.
|
||||
for (; width >= MAX_DEPTH_RUN;
|
||||
runs += MAX_DEPTH_RUN, width -= MAX_DEPTH_RUN) {
|
||||
*runs = DepthRun(depth, MAX_DEPTH_RUN);
|
||||
}
|
||||
// If there are still any left over samples to fill under the maximum run
|
||||
// size, then output one last run for them.
|
||||
if (width > 0) {
|
||||
*runs = DepthRun(depth, width);
|
||||
}
|
||||
}
|
||||
|
||||
// A cursor for reading and modifying a row's depth run array. It locates
|
||||
// and iterates through a desired span within all the runs, testing if
|
||||
// the depth of this span passes or fails the depth test against existing
|
||||
@ -128,7 +151,7 @@ struct DepthCursor {
|
||||
// so it is safe for the caller to stop processing any more regions in this
|
||||
// row.
|
||||
template <int FUNC>
|
||||
int skip_failed(uint16_t val) {
|
||||
int skip_failed(uint32_t val) {
|
||||
assert(valid());
|
||||
DepthRun* prev = start;
|
||||
while (cur < end) {
|
||||
@ -143,7 +166,7 @@ struct DepthCursor {
|
||||
|
||||
// Helper to convert function parameters into template parameters to hoist
|
||||
// some checks out of inner loops.
|
||||
ALWAYS_INLINE int skip_failed(uint16_t val, GLenum func) {
|
||||
ALWAYS_INLINE int skip_failed(uint32_t val, GLenum func) {
|
||||
switch (func) {
|
||||
case GL_LEQUAL:
|
||||
return skip_failed<GL_LEQUAL>(val);
|
||||
@ -162,7 +185,7 @@ struct DepthCursor {
|
||||
// to represent this new region that passed the depth test. The length of the
|
||||
// region is returned.
|
||||
template <int FUNC, bool MASK>
|
||||
int check_passed(uint16_t val) {
|
||||
int check_passed(uint32_t val) {
|
||||
assert(valid());
|
||||
DepthRun* prev = cur;
|
||||
while (cur < end) {
|
||||
@ -201,7 +224,7 @@ struct DepthCursor {
|
||||
prev->count = start - prev;
|
||||
}
|
||||
// Create a new run for the entirety of the passed samples.
|
||||
*start = DepthRun(val, passed);
|
||||
set_depth_runs(start, val, passed);
|
||||
}
|
||||
start = cur;
|
||||
return passed;
|
||||
@ -210,7 +233,7 @@ struct DepthCursor {
|
||||
// Helper to convert function parameters into template parameters to hoist
|
||||
// some checks out of inner loops.
|
||||
template <bool MASK>
|
||||
ALWAYS_INLINE int check_passed(uint16_t val, GLenum func) {
|
||||
ALWAYS_INLINE int check_passed(uint32_t val, GLenum func) {
|
||||
switch (func) {
|
||||
case GL_LEQUAL:
|
||||
return check_passed<GL_LEQUAL, MASK>(val);
|
||||
@ -222,37 +245,37 @@ struct DepthCursor {
|
||||
}
|
||||
}
|
||||
|
||||
ALWAYS_INLINE int check_passed(uint16_t val, GLenum func, bool mask) {
|
||||
ALWAYS_INLINE int check_passed(uint32_t val, GLenum func, bool mask) {
|
||||
return mask ? check_passed<true>(val, func)
|
||||
: check_passed<false>(val, func);
|
||||
}
|
||||
|
||||
// Fill a region of runs with a given depth value, bypassing any depth test.
|
||||
ALWAYS_INLINE void fill(uint16_t depth) {
|
||||
ALWAYS_INLINE void fill(uint32_t depth) {
|
||||
check_passed<GL_ALWAYS, true>(depth);
|
||||
}
|
||||
};
|
||||
|
||||
// Initialize a depth texture by setting the first run in each row to encompass
|
||||
// the entire row.
|
||||
void Texture::init_depth_runs(uint16_t depth) {
|
||||
void Texture::init_depth_runs(uint32_t depth) {
|
||||
if (!buf) return;
|
||||
DepthRun* runs = (DepthRun*)buf;
|
||||
for (int y = 0; y < height; y++) {
|
||||
runs[0] = DepthRun(depth, width);
|
||||
set_depth_runs(runs, depth, width);
|
||||
runs += stride() / sizeof(DepthRun);
|
||||
}
|
||||
set_cleared(true);
|
||||
}
|
||||
|
||||
// Fill a portion of the run array with flattened depth samples.
|
||||
static ALWAYS_INLINE void fill_depth_run(DepthRun* dst, size_t n,
|
||||
uint16_t depth) {
|
||||
fill_n((uint32_t*)dst, n, uint32_t(depth));
|
||||
static ALWAYS_INLINE void fill_flat_depth(DepthRun* dst, size_t n,
|
||||
uint32_t depth) {
|
||||
fill_n((uint32_t*)dst, n, depth);
|
||||
}
|
||||
|
||||
// Fills a scissored region of a depth texture with a given depth.
|
||||
void Texture::fill_depth_runs(uint16_t depth, const IntRect& scissor) {
|
||||
void Texture::fill_depth_runs(uint32_t depth, const IntRect& scissor) {
|
||||
if (!buf) return;
|
||||
assert(cleared());
|
||||
IntRect bb = bounds().intersection(scissor - offset);
|
||||
@ -261,10 +284,10 @@ void Texture::fill_depth_runs(uint16_t depth, const IntRect& scissor) {
|
||||
if (bb.width() >= width) {
|
||||
// If the scissor region encompasses the entire row, reset the row to a
|
||||
// single run encompassing the entire row.
|
||||
runs[0] = DepthRun(depth, width);
|
||||
set_depth_runs(runs, depth, width);
|
||||
} else if (runs->is_flat()) {
|
||||
// If the row is flattened, just directly fill the portion of the row.
|
||||
fill_depth_run(&runs[bb.x0], bb.width(), depth);
|
||||
fill_flat_depth(&runs[bb.x0], bb.width(), depth);
|
||||
} else {
|
||||
// Otherwise, if we are still using runs, then set up a cursor to fill
|
||||
// it with depth runs.
|
||||
@ -320,7 +343,7 @@ static ALWAYS_INLINE bool check_depth(I32 src, DepthRun* zbuf, ZMask& outmask,
|
||||
}
|
||||
|
||||
static ALWAYS_INLINE I32 packDepth() {
|
||||
return cast(fragment_shader->gl_FragCoord.z * 0xFFFF);
|
||||
return cast(fragment_shader->gl_FragCoord.z * MAX_DEPTH_VALUE);
|
||||
}
|
||||
|
||||
static ALWAYS_INLINE void discard_depth(I32 src, DepthRun* zbuf, I32 mask) {
|
||||
@ -547,7 +570,7 @@ static void flatten_depth_runs(DepthRun* runs, size_t width) {
|
||||
}
|
||||
while (width > 0) {
|
||||
size_t n = runs->count;
|
||||
fill_depth_run(runs, n, runs->depth);
|
||||
fill_flat_depth(runs, n, runs->depth);
|
||||
runs += n;
|
||||
width -= n;
|
||||
}
|
||||
@ -556,7 +579,7 @@ static void flatten_depth_runs(DepthRun* runs, size_t width) {
|
||||
// Helper function for drawing passed depth runs within the depth buffer.
|
||||
// Flattened depth (perspective or discard) is not supported.
|
||||
template <typename P>
|
||||
static ALWAYS_INLINE void draw_depth_span(uint16_t z, P* buf,
|
||||
static ALWAYS_INLINE void draw_depth_span(uint32_t z, P* buf,
|
||||
DepthCursor& cursor) {
|
||||
for (;;) {
|
||||
// Get the span that passes the depth test. Assume on entry that
|
||||
@ -614,7 +637,7 @@ template <bool DISCARD, bool W, typename P, typename Z>
|
||||
static ALWAYS_INLINE void draw_span(P* buf, DepthRun* depth, int span, Z z) {
|
||||
if (depth) {
|
||||
// Depth testing is enabled. If perspective is used, Z values will vary
|
||||
// across the span, we use packDepth to generate 16-bit Z values suitable
|
||||
// across the span, we use packDepth to generate packed Z values suitable
|
||||
// for depth testing based on current values from gl_FragCoord.z.
|
||||
// Otherwise, for the no-perspective case, we just use the provided Z.
|
||||
// Process 4-pixel chunks first.
|
||||
@ -662,7 +685,7 @@ static ALWAYS_INLINE void draw_span(P* buf, DepthRun* depth, int span, Z z) {
|
||||
template <typename P>
|
||||
static inline void prepare_row(Texture& colortex, int y, int startx, int endx,
|
||||
bool use_discard, DepthRun* depth,
|
||||
uint16_t z = 0, DepthCursor* cursor = nullptr) {
|
||||
uint32_t z = 0, DepthCursor* cursor = nullptr) {
|
||||
assert(colortex.delay_clear > 0);
|
||||
// Delayed clear is enabled for the color buffer. Check if needs clear.
|
||||
uint32_t& mask = colortex.cleared_rows[y / 32];
|
||||
@ -735,7 +758,7 @@ static ALWAYS_INLINE bool checkIfEdgesFlipped(T l0, T l1, T r0, T r1) {
|
||||
// assumed to be ordered in either CW or CCW to support this, but currently
|
||||
// both orders (CW and CCW) are supported and equivalent.
|
||||
template <typename P>
|
||||
static inline void draw_quad_spans(int nump, Point2D p[4], uint16_t z,
|
||||
static inline void draw_quad_spans(int nump, Point2D p[4], uint32_t z,
|
||||
Interpolants interp_outs[4],
|
||||
Texture& colortex, Texture& depthtex,
|
||||
const ClipRect& clipRect) {
|
||||
@ -1534,7 +1557,7 @@ static void draw_quad(int nump, Texture& colortex, Texture& depthtex) {
|
||||
}
|
||||
// Since Z doesn't need to be interpolated, just set the fragment shader's
|
||||
// Z and W values here, once and for all fragment shader invocations.
|
||||
uint16_t z = uint16_t(0xFFFF * screenZ);
|
||||
uint32_t z = uint32_t(MAX_DEPTH_VALUE * screenZ);
|
||||
fragment_shader->gl_FragCoord.z = screenZ;
|
||||
fragment_shader->gl_FragCoord.w = w;
|
||||
|
||||
|
@ -1517,7 +1517,7 @@ impl Compositor for SwCompositor {
|
||||
// tile size is not bigger than what was previously allocated.
|
||||
self.gl.set_texture_buffer(
|
||||
self.depth_id,
|
||||
gl::DEPTH_COMPONENT16,
|
||||
gl::DEPTH_COMPONENT,
|
||||
valid_rect.size.width,
|
||||
valid_rect.size.height,
|
||||
0,
|
||||
|
@ -1525,11 +1525,14 @@ impl Device {
|
||||
};
|
||||
|
||||
let is_software_webrender = renderer_name.starts_with("Software WebRender");
|
||||
let (depth_format, upload_method) = if is_software_webrender {
|
||||
(gl::DEPTH_COMPONENT16, UploadMethod::Immediate)
|
||||
let upload_method = if is_software_webrender {
|
||||
// Uploads in SWGL generally reduce to simple memory copies.
|
||||
UploadMethod::Immediate
|
||||
} else {
|
||||
(gl::DEPTH_COMPONENT24, upload_method)
|
||||
upload_method
|
||||
};
|
||||
// Prefer 24-bit depth format. While 16-bit depth also works, it may exhaust depth ids easily.
|
||||
let depth_format = gl::DEPTH_COMPONENT24;
|
||||
|
||||
info!("GL texture cache {:?}, bgra {:?} swizzle {:?}, texture storage {:?}, depth {:?}",
|
||||
color_formats, bgra_formats, bgra8_sampling_swizzle, texture_storage_usage, depth_format);
|
||||
|
Loading…
Reference in New Issue
Block a user