mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-24 21:31:04 +00:00
Bug 1671055 - support partial stores at end of row in SWGL composites. r=mattwoodrow
Differential Revision: https://phabricator.services.mozilla.com/D93443
This commit is contained in:
parent
fb84d4b14d
commit
ce73fa347a
@ -2,6 +2,29 @@
|
|||||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||||
|
|
||||||
|
// Load a partial span > 0 and < 4 pixels.
|
||||||
|
template <typename V, typename P>
|
||||||
|
static ALWAYS_INLINE V partial_load_span(P* src, int span) {
|
||||||
|
return bit_cast<V>(
|
||||||
|
(span >= 2 ? combine(unaligned_load<V2<P>>(src),
|
||||||
|
V2<P>{span > 2 ? unaligned_load<P>(src + 2) : 0, 0})
|
||||||
|
: V4<P>{unaligned_load<P>(src), 0, 0, 0}));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Store a partial span > 0 and < 4 pixels.
|
||||||
|
template <typename V, typename P>
|
||||||
|
static ALWAYS_INLINE void partial_store_span(P* dst, V src, int span) {
|
||||||
|
auto pixels = bit_cast<V4<P>>(src);
|
||||||
|
if (span >= 2) {
|
||||||
|
unaligned_store(dst, lowHalf(pixels));
|
||||||
|
if (span > 2) {
|
||||||
|
unaligned_store(dst + 2, pixels.z);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
unaligned_store(dst, pixels.x);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template <typename P>
|
template <typename P>
|
||||||
static inline void scale_row(P* dst, int dstWidth, const P* src, int srcWidth,
|
static inline void scale_row(P* dst, int dstWidth, const P* src, int srcWidth,
|
||||||
int span) {
|
int span) {
|
||||||
@ -96,9 +119,7 @@ static void linear_row_blit(uint32_t* dest, int span, const vec2_scalar& srcUV,
|
|||||||
}
|
}
|
||||||
if (span > 0) {
|
if (span > 0) {
|
||||||
auto srcpx = textureLinearPackedRGBA8(sampler, ivec2(uv), srcZOffset);
|
auto srcpx = textureLinearPackedRGBA8(sampler, ivec2(uv), srcZOffset);
|
||||||
auto mask = span_mask_RGBA8(span);
|
partial_store_span(dest, srcpx, span);
|
||||||
auto dstpx = unaligned_load<PackedRGBA8>(dest);
|
|
||||||
unaligned_store(dest, (mask & dstpx) | (~mask & srcpx));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -114,9 +135,7 @@ static void linear_row_blit(uint8_t* dest, int span, const vec2_scalar& srcUV,
|
|||||||
}
|
}
|
||||||
if (span > 0) {
|
if (span > 0) {
|
||||||
auto srcpx = textureLinearPackedR8(sampler, ivec2(uv), srcZOffset);
|
auto srcpx = textureLinearPackedR8(sampler, ivec2(uv), srcZOffset);
|
||||||
auto mask = span_mask_R8(span);
|
partial_store_span(dest, pack(srcpx), span);
|
||||||
auto dstpx = unpack(unaligned_load<PackedR8>(dest));
|
|
||||||
unaligned_store(dest, pack((mask & dstpx) | (~mask & srcpx)));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -132,9 +151,7 @@ static void linear_row_blit(uint16_t* dest, int span, const vec2_scalar& srcUV,
|
|||||||
}
|
}
|
||||||
if (span > 0) {
|
if (span > 0) {
|
||||||
auto srcpx = textureLinearPackedRG8(sampler, ivec2(uv), srcZOffset);
|
auto srcpx = textureLinearPackedRG8(sampler, ivec2(uv), srcZOffset);
|
||||||
auto mask = span_mask_RG8(span);
|
partial_store_span(dest, srcpx, span);
|
||||||
auto dstpx = unaligned_load<PackedRG8>(dest);
|
|
||||||
unaligned_store(dest, (mask & dstpx) | (~mask & srcpx));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -215,12 +232,9 @@ static void linear_row_composite(uint32_t* dest, int span,
|
|||||||
}
|
}
|
||||||
if (span > 0) {
|
if (span > 0) {
|
||||||
WideRGBA8 srcpx = textureLinearUnpackedRGBA8(sampler, ivec2(uv), 0);
|
WideRGBA8 srcpx = textureLinearUnpackedRGBA8(sampler, ivec2(uv), 0);
|
||||||
PackedRGBA8 dstpx = unaligned_load<PackedRGBA8>(dest);
|
WideRGBA8 dstpx = unpack(partial_load_span<PackedRGBA8>(dest, span));
|
||||||
WideRGBA8 dstpxu = unpack(dstpx);
|
PackedRGBA8 r = pack(srcpx + dstpx - muldiv255(dstpx, alphas(srcpx)));
|
||||||
PackedRGBA8 r = pack(srcpx + dstpxu - muldiv255(dstpxu, alphas(srcpx)));
|
partial_store_span(dest, r, span);
|
||||||
|
|
||||||
auto mask = span_mask_RGBA8(span);
|
|
||||||
unaligned_store(dest, (mask & dstpx) | (~mask & r));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -367,11 +381,29 @@ void* GetResourceBuffer(LockedTexture* resource, int32_t* width,
|
|||||||
return resource->buf;
|
return resource->buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void unscaled_row_composite(uint32_t* dest, const uint32_t* src,
|
||||||
|
int span) {
|
||||||
|
const uint32_t* end = src + span;
|
||||||
|
while (src + 4 <= end) {
|
||||||
|
WideRGBA8 srcpx = unpack(unaligned_load<PackedRGBA8>(src));
|
||||||
|
WideRGBA8 dstpx = unpack(unaligned_load<PackedRGBA8>(dest));
|
||||||
|
PackedRGBA8 r = pack(srcpx + dstpx - muldiv255(dstpx, alphas(srcpx)));
|
||||||
|
unaligned_store(dest, r);
|
||||||
|
src += 4;
|
||||||
|
dest += 4;
|
||||||
|
}
|
||||||
|
if (src < end) {
|
||||||
|
WideRGBA8 srcpx = unpack(unaligned_load<PackedRGBA8>(src));
|
||||||
|
WideRGBA8 dstpx = unpack(partial_load_span<PackedRGBA8>(dest, end - src));
|
||||||
|
auto r = pack(srcpx + dstpx - muldiv255(dstpx, alphas(srcpx)));
|
||||||
|
partial_store_span(dest, r, end - src);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static NO_INLINE void unscaled_composite(Texture& srctex, const IntRect& srcReq,
|
static NO_INLINE void unscaled_composite(Texture& srctex, const IntRect& srcReq,
|
||||||
Texture& dsttex, const IntRect& dstReq,
|
Texture& dsttex, const IntRect& dstReq,
|
||||||
bool invertY, int bandOffset,
|
bool invertY, int bandOffset,
|
||||||
int bandHeight) {
|
int bandHeight) {
|
||||||
const int bpp = 4;
|
|
||||||
IntRect bounds = dsttex.sample_bounds(dstReq, invertY);
|
IntRect bounds = dsttex.sample_bounds(dstReq, invertY);
|
||||||
bounds.intersect(srctex.sample_bounds(srcReq));
|
bounds.intersect(srctex.sample_bounds(srcReq));
|
||||||
char* dest = dsttex.sample_ptr(dstReq, bounds, 0, invertY);
|
char* dest = dsttex.sample_ptr(dstReq, bounds, 0, invertY);
|
||||||
@ -385,32 +417,10 @@ static NO_INLINE void unscaled_composite(Texture& srctex, const IntRect& srcReq,
|
|||||||
src += srcStride * bandOffset;
|
src += srcStride * bandOffset;
|
||||||
for (int rows = min(bounds.height() - bandOffset, bandHeight); rows > 0;
|
for (int rows = min(bounds.height() - bandOffset, bandHeight); rows > 0;
|
||||||
rows--) {
|
rows--) {
|
||||||
char* end = src + bounds.width() * bpp;
|
unscaled_row_composite((uint32_t*)dest, (const uint32_t*)src,
|
||||||
while (src + 4 * bpp <= end) {
|
bounds.width());
|
||||||
WideRGBA8 srcpx = unpack(unaligned_load<PackedRGBA8>(src));
|
dest += destStride;
|
||||||
WideRGBA8 dstpx = unpack(unaligned_load<PackedRGBA8>(dest));
|
src += srcStride;
|
||||||
PackedRGBA8 r = pack(srcpx + dstpx - muldiv255(dstpx, alphas(srcpx)));
|
|
||||||
unaligned_store(dest, r);
|
|
||||||
src += 4 * bpp;
|
|
||||||
dest += 4 * bpp;
|
|
||||||
}
|
|
||||||
if (src < end) {
|
|
||||||
WideRGBA8 srcpx = unpack(unaligned_load<PackedRGBA8>(src));
|
|
||||||
WideRGBA8 dstpx = unpack(unaligned_load<PackedRGBA8>(dest));
|
|
||||||
U32 r =
|
|
||||||
bit_cast<U32>(pack(srcpx + dstpx - muldiv255(dstpx, alphas(srcpx))));
|
|
||||||
unaligned_store(dest, r.x);
|
|
||||||
if (src + bpp < end) {
|
|
||||||
unaligned_store(dest + bpp, r.y);
|
|
||||||
if (src + 2 * bpp < end) {
|
|
||||||
unaligned_store(dest + 2 * bpp, r.z);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
dest += end - src;
|
|
||||||
src = end;
|
|
||||||
}
|
|
||||||
dest += destStride - bounds.width() * bpp;
|
|
||||||
src += srcStride - bounds.width() * bpp;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -719,9 +729,7 @@ static void linear_row_yuv(uint32_t* dest, int span, const vec2_scalar& srcUV,
|
|||||||
auto uvPx = textureLinearRowPairedR8(&sampler[1], &sampler[2], cU, cOffsetV,
|
auto uvPx = textureLinearRowPairedR8(&sampler[1], &sampler[2], cU, cOffsetV,
|
||||||
cStrideV, cFracV);
|
cStrideV, cFracV);
|
||||||
auto srcpx = YUVConverter<COLOR_SPACE>::convert(yPx, uvPx);
|
auto srcpx = YUVConverter<COLOR_SPACE>::convert(yPx, uvPx);
|
||||||
auto mask = span_mask_RGBA8(span);
|
partial_store_span(dest, srcpx, span);
|
||||||
auto dstpx = unaligned_load<PackedRGBA8>(dest);
|
|
||||||
unaligned_store(dest, (mask & dstpx) | (~mask & srcpx));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2723,10 +2723,10 @@ static inline WideRGBA8 pack_pixels_RGBA8(const vec4& v) {
|
|||||||
ivec4 i = round_pixel(v);
|
ivec4 i = round_pixel(v);
|
||||||
HalfRGBA8 xz = packRGBA8(i.z, i.x);
|
HalfRGBA8 xz = packRGBA8(i.z, i.x);
|
||||||
HalfRGBA8 yw = packRGBA8(i.y, i.w);
|
HalfRGBA8 yw = packRGBA8(i.y, i.w);
|
||||||
HalfRGBA8 xy = zipLow(xz, yw);
|
HalfRGBA8 xyzwl = zipLow(xz, yw);
|
||||||
HalfRGBA8 zw = zipHigh(xz, yw);
|
HalfRGBA8 xyzwh = zipHigh(xz, yw);
|
||||||
HalfRGBA8 lo = zip2Low(xy, zw);
|
HalfRGBA8 lo = zip2Low(xyzwl, xyzwh);
|
||||||
HalfRGBA8 hi = zip2High(xy, zw);
|
HalfRGBA8 hi = zip2High(xyzwl, xyzwh);
|
||||||
return combine(lo, hi);
|
return combine(lo, hi);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2916,7 +2916,7 @@ static inline WideR8 span_mask(uint8_t*, int span) {
|
|||||||
return span_mask_R8(span);
|
return span_mask_R8(span);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline PackedRG8 span_mask_RG8(int span) {
|
UNUSED static inline PackedRG8 span_mask_RG8(int span) {
|
||||||
return bit_cast<PackedRG8>(I16(span) < I16{1, 2, 3, 4});
|
return bit_cast<PackedRG8>(I16(span) < I16{1, 2, 3, 4});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -38,6 +38,16 @@ SI VectorType<T, 16> combine(VectorType<T, 8> a, VectorType<T, 8> b) {
|
|||||||
13, 14, 15);
|
13, 14, 15);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
SI VectorType<T, 2> lowHalf(VectorType<T, 4> a) {
|
||||||
|
return __builtin_shufflevector(a, a, 0, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
SI VectorType<T, 2> highHalf(VectorType<T, 4> a) {
|
||||||
|
return __builtin_shufflevector(a, a, 2, 3);
|
||||||
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
SI VectorType<T, 4> lowHalf(VectorType<T, 8> a) {
|
SI VectorType<T, 4> lowHalf(VectorType<T, 8> a) {
|
||||||
return __builtin_shufflevector(a, a, 0, 1, 2, 3);
|
return __builtin_shufflevector(a, a, 0, 1, 2, 3);
|
||||||
@ -343,6 +353,12 @@ struct VectorType<T, 2> {
|
|||||||
};
|
};
|
||||||
T elements[2];
|
T elements[2];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
SI VectorType wrap(const data_type& data) {
|
||||||
|
VectorType v;
|
||||||
|
v.data = data;
|
||||||
|
return v;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
# define CONVERT(vector, type) ((type)(vector))
|
# define CONVERT(vector, type) ((type)(vector))
|
||||||
|
Loading…
Reference in New Issue
Block a user