Bug 1671055 - support partial stores at end of row in SWGL composites. r=mattwoodrow

Differential Revision: https://phabricator.services.mozilla.com/D93443
This commit is contained in:
Lee Salzman 2020-10-14 19:31:57 +00:00
parent fb84d4b14d
commit ce73fa347a
3 changed files with 74 additions and 50 deletions

View File

@ -2,6 +2,29 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
// Load a partial span > 0 and < 4 pixels.
template <typename V, typename P>
static ALWAYS_INLINE V partial_load_span(P* src, int span) {
return bit_cast<V>(
(span >= 2 ? combine(unaligned_load<V2<P>>(src),
V2<P>{span > 2 ? unaligned_load<P>(src + 2) : 0, 0})
: V4<P>{unaligned_load<P>(src), 0, 0, 0}));
}
// Store a partial span > 0 and < 4 pixels.
template <typename V, typename P>
static ALWAYS_INLINE void partial_store_span(P* dst, V src, int span) {
auto pixels = bit_cast<V4<P>>(src);
if (span >= 2) {
unaligned_store(dst, lowHalf(pixels));
if (span > 2) {
unaligned_store(dst + 2, pixels.z);
}
} else {
unaligned_store(dst, pixels.x);
}
}
template <typename P>
static inline void scale_row(P* dst, int dstWidth, const P* src, int srcWidth,
int span) {
@ -96,9 +119,7 @@ static void linear_row_blit(uint32_t* dest, int span, const vec2_scalar& srcUV,
}
if (span > 0) {
auto srcpx = textureLinearPackedRGBA8(sampler, ivec2(uv), srcZOffset);
auto mask = span_mask_RGBA8(span);
auto dstpx = unaligned_load<PackedRGBA8>(dest);
unaligned_store(dest, (mask & dstpx) | (~mask & srcpx));
partial_store_span(dest, srcpx, span);
}
}
@ -114,9 +135,7 @@ static void linear_row_blit(uint8_t* dest, int span, const vec2_scalar& srcUV,
}
if (span > 0) {
auto srcpx = textureLinearPackedR8(sampler, ivec2(uv), srcZOffset);
auto mask = span_mask_R8(span);
auto dstpx = unpack(unaligned_load<PackedR8>(dest));
unaligned_store(dest, pack((mask & dstpx) | (~mask & srcpx)));
partial_store_span(dest, pack(srcpx), span);
}
}
@ -132,9 +151,7 @@ static void linear_row_blit(uint16_t* dest, int span, const vec2_scalar& srcUV,
}
if (span > 0) {
auto srcpx = textureLinearPackedRG8(sampler, ivec2(uv), srcZOffset);
auto mask = span_mask_RG8(span);
auto dstpx = unaligned_load<PackedRG8>(dest);
unaligned_store(dest, (mask & dstpx) | (~mask & srcpx));
partial_store_span(dest, srcpx, span);
}
}
@ -215,12 +232,9 @@ static void linear_row_composite(uint32_t* dest, int span,
}
if (span > 0) {
WideRGBA8 srcpx = textureLinearUnpackedRGBA8(sampler, ivec2(uv), 0);
PackedRGBA8 dstpx = unaligned_load<PackedRGBA8>(dest);
WideRGBA8 dstpxu = unpack(dstpx);
PackedRGBA8 r = pack(srcpx + dstpxu - muldiv255(dstpxu, alphas(srcpx)));
auto mask = span_mask_RGBA8(span);
unaligned_store(dest, (mask & dstpx) | (~mask & r));
WideRGBA8 dstpx = unpack(partial_load_span<PackedRGBA8>(dest, span));
PackedRGBA8 r = pack(srcpx + dstpx - muldiv255(dstpx, alphas(srcpx)));
partial_store_span(dest, r, span);
}
}
@ -367,11 +381,29 @@ void* GetResourceBuffer(LockedTexture* resource, int32_t* width,
return resource->buf;
}
static void unscaled_row_composite(uint32_t* dest, const uint32_t* src,
int span) {
const uint32_t* end = src + span;
while (src + 4 <= end) {
WideRGBA8 srcpx = unpack(unaligned_load<PackedRGBA8>(src));
WideRGBA8 dstpx = unpack(unaligned_load<PackedRGBA8>(dest));
PackedRGBA8 r = pack(srcpx + dstpx - muldiv255(dstpx, alphas(srcpx)));
unaligned_store(dest, r);
src += 4;
dest += 4;
}
if (src < end) {
WideRGBA8 srcpx = unpack(unaligned_load<PackedRGBA8>(src));
WideRGBA8 dstpx = unpack(partial_load_span<PackedRGBA8>(dest, end - src));
auto r = pack(srcpx + dstpx - muldiv255(dstpx, alphas(srcpx)));
partial_store_span(dest, r, end - src);
}
}
static NO_INLINE void unscaled_composite(Texture& srctex, const IntRect& srcReq,
Texture& dsttex, const IntRect& dstReq,
bool invertY, int bandOffset,
int bandHeight) {
const int bpp = 4;
IntRect bounds = dsttex.sample_bounds(dstReq, invertY);
bounds.intersect(srctex.sample_bounds(srcReq));
char* dest = dsttex.sample_ptr(dstReq, bounds, 0, invertY);
@ -385,32 +417,10 @@ static NO_INLINE void unscaled_composite(Texture& srctex, const IntRect& srcReq,
src += srcStride * bandOffset;
for (int rows = min(bounds.height() - bandOffset, bandHeight); rows > 0;
rows--) {
char* end = src + bounds.width() * bpp;
while (src + 4 * bpp <= end) {
WideRGBA8 srcpx = unpack(unaligned_load<PackedRGBA8>(src));
WideRGBA8 dstpx = unpack(unaligned_load<PackedRGBA8>(dest));
PackedRGBA8 r = pack(srcpx + dstpx - muldiv255(dstpx, alphas(srcpx)));
unaligned_store(dest, r);
src += 4 * bpp;
dest += 4 * bpp;
}
if (src < end) {
WideRGBA8 srcpx = unpack(unaligned_load<PackedRGBA8>(src));
WideRGBA8 dstpx = unpack(unaligned_load<PackedRGBA8>(dest));
U32 r =
bit_cast<U32>(pack(srcpx + dstpx - muldiv255(dstpx, alphas(srcpx))));
unaligned_store(dest, r.x);
if (src + bpp < end) {
unaligned_store(dest + bpp, r.y);
if (src + 2 * bpp < end) {
unaligned_store(dest + 2 * bpp, r.z);
}
}
dest += end - src;
src = end;
}
dest += destStride - bounds.width() * bpp;
src += srcStride - bounds.width() * bpp;
unscaled_row_composite((uint32_t*)dest, (const uint32_t*)src,
bounds.width());
dest += destStride;
src += srcStride;
}
}
@ -719,9 +729,7 @@ static void linear_row_yuv(uint32_t* dest, int span, const vec2_scalar& srcUV,
auto uvPx = textureLinearRowPairedR8(&sampler[1], &sampler[2], cU, cOffsetV,
cStrideV, cFracV);
auto srcpx = YUVConverter<COLOR_SPACE>::convert(yPx, uvPx);
auto mask = span_mask_RGBA8(span);
auto dstpx = unaligned_load<PackedRGBA8>(dest);
unaligned_store(dest, (mask & dstpx) | (~mask & srcpx));
partial_store_span(dest, srcpx, span);
}
}

View File

@ -2723,10 +2723,10 @@ static inline WideRGBA8 pack_pixels_RGBA8(const vec4& v) {
ivec4 i = round_pixel(v);
HalfRGBA8 xz = packRGBA8(i.z, i.x);
HalfRGBA8 yw = packRGBA8(i.y, i.w);
HalfRGBA8 xy = zipLow(xz, yw);
HalfRGBA8 zw = zipHigh(xz, yw);
HalfRGBA8 lo = zip2Low(xy, zw);
HalfRGBA8 hi = zip2High(xy, zw);
HalfRGBA8 xyzwl = zipLow(xz, yw);
HalfRGBA8 xyzwh = zipHigh(xz, yw);
HalfRGBA8 lo = zip2Low(xyzwl, xyzwh);
HalfRGBA8 hi = zip2High(xyzwl, xyzwh);
return combine(lo, hi);
}
@ -2916,7 +2916,7 @@ static inline WideR8 span_mask(uint8_t*, int span) {
return span_mask_R8(span);
}
static inline PackedRG8 span_mask_RG8(int span) {
UNUSED static inline PackedRG8 span_mask_RG8(int span) {
return bit_cast<PackedRG8>(I16(span) < I16{1, 2, 3, 4});
}

View File

@ -38,6 +38,16 @@ SI VectorType<T, 16> combine(VectorType<T, 8> a, VectorType<T, 8> b) {
13, 14, 15);
}
template <typename T>
SI VectorType<T, 2> lowHalf(VectorType<T, 4> a) {
return __builtin_shufflevector(a, a, 0, 1);
}
template <typename T>
SI VectorType<T, 2> highHalf(VectorType<T, 4> a) {
return __builtin_shufflevector(a, a, 2, 3);
}
template <typename T>
SI VectorType<T, 4> lowHalf(VectorType<T, 8> a) {
return __builtin_shufflevector(a, a, 0, 1, 2, 3);
@ -343,6 +353,12 @@ struct VectorType<T, 2> {
};
T elements[2];
};
SI VectorType wrap(const data_type& data) {
VectorType v;
v.data = data;
return v;
}
};
# define CONVERT(vector, type) ((type)(vector))