Bug 1671055 - support partial stores at end of row in SWGL composites. r=mattwoodrow

Differential Revision: https://phabricator.services.mozilla.com/D93443
2024-11-24 21:31:04 +00:00 · 2020-10-14 19:31:57 +00:00 · 2020-10-14 19:31:57 +00:00 · ce73fa347a
commit ce73fa347a
parent fb84d4b14d
3 changed files with 74 additions and 50 deletions
--- a/gfx/wr/swgl/src/composite.h
+++ b/gfx/wr/swgl/src/composite.h
@ -2,6 +2,29 @@
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 // Load a partial span > 0 and < 4 pixels.
 template <typename V, typename P>
 static ALWAYS_INLINE V partial_load_span(P* src, int span) {
  return bit_cast<V>(
      (span >= 2 ? combine(unaligned_load<V2<P>>(src),
                           V2<P>{span > 2 ? unaligned_load<P>(src + 2) : 0, 0})
                 : V4<P>{unaligned_load<P>(src), 0, 0, 0}));
 }
 // Store a partial span > 0 and < 4 pixels.
 template <typename V, typename P>
 static ALWAYS_INLINE void partial_store_span(P* dst, V src, int span) {
  auto pixels = bit_cast<V4<P>>(src);
  if (span >= 2) {
    unaligned_store(dst, lowHalf(pixels));
    if (span > 2) {
      unaligned_store(dst + 2, pixels.z);
    }
  } else {
    unaligned_store(dst, pixels.x);
  }
 }
 template <typename P>
 static inline void scale_row(P* dst, int dstWidth, const P* src, int srcWidth,
                             int span) {
@ -96,9 +119,7 @@ static void linear_row_blit(uint32_t* dest, int span, const vec2_scalar& srcUV,
  }
  if (span > 0) {
    auto srcpx = textureLinearPackedRGBA8(sampler, ivec2(uv), srcZOffset);
-    auto mask = span_mask_RGBA8(span);
+    partial_store_span(dest, srcpx, span);
    auto dstpx = unaligned_load<PackedRGBA8>(dest);
    unaligned_store(dest, (mask & dstpx) | (~mask & srcpx));
  }
 }
@ -114,9 +135,7 @@ static void linear_row_blit(uint8_t* dest, int span, const vec2_scalar& srcUV,
  }
  if (span > 0) {
    auto srcpx = textureLinearPackedR8(sampler, ivec2(uv), srcZOffset);
-    auto mask = span_mask_R8(span);
+    partial_store_span(dest, pack(srcpx), span);
    auto dstpx = unpack(unaligned_load<PackedR8>(dest));
    unaligned_store(dest, pack((mask & dstpx) | (~mask & srcpx)));
  }
 }
@ -132,9 +151,7 @@ static void linear_row_blit(uint16_t* dest, int span, const vec2_scalar& srcUV,
  }
  if (span > 0) {
    auto srcpx = textureLinearPackedRG8(sampler, ivec2(uv), srcZOffset);
-    auto mask = span_mask_RG8(span);
+    partial_store_span(dest, srcpx, span);
    auto dstpx = unaligned_load<PackedRG8>(dest);
    unaligned_store(dest, (mask & dstpx) | (~mask & srcpx));
  }
 }
@ -215,12 +232,9 @@ static void linear_row_composite(uint32_t* dest, int span,
  }
  if (span > 0) {
    WideRGBA8 srcpx = textureLinearUnpackedRGBA8(sampler, ivec2(uv), 0);
-    PackedRGBA8 dstpx = unaligned_load<PackedRGBA8>(dest);
+    WideRGBA8 dstpx = unpack(partial_load_span<PackedRGBA8>(dest, span));
-    WideRGBA8 dstpxu = unpack(dstpx);
+    PackedRGBA8 r = pack(srcpx + dstpx - muldiv255(dstpx, alphas(srcpx)));
-    PackedRGBA8 r = pack(srcpx + dstpxu - muldiv255(dstpxu, alphas(srcpx)));
+    partial_store_span(dest, r, span);
    auto mask = span_mask_RGBA8(span);
    unaligned_store(dest, (mask & dstpx) | (~mask & r));
  }
 }
@ -367,11 +381,29 @@ void* GetResourceBuffer(LockedTexture* resource, int32_t* width,
  return resource->buf;
 }
 static void unscaled_row_composite(uint32_t* dest, const uint32_t* src,
                                   int span) {
  const uint32_t* end = src + span;
  while (src + 4 <= end) {
    WideRGBA8 srcpx = unpack(unaligned_load<PackedRGBA8>(src));
    WideRGBA8 dstpx = unpack(unaligned_load<PackedRGBA8>(dest));
    PackedRGBA8 r = pack(srcpx + dstpx - muldiv255(dstpx, alphas(srcpx)));
    unaligned_store(dest, r);
    src += 4;
    dest += 4;
  }
  if (src < end) {
    WideRGBA8 srcpx = unpack(unaligned_load<PackedRGBA8>(src));
    WideRGBA8 dstpx = unpack(partial_load_span<PackedRGBA8>(dest, end - src));
    auto r = pack(srcpx + dstpx - muldiv255(dstpx, alphas(srcpx)));
    partial_store_span(dest, r, end - src);
  }
 }
 static NO_INLINE void unscaled_composite(Texture& srctex, const IntRect& srcReq,
                                         Texture& dsttex, const IntRect& dstReq,
                                         bool invertY, int bandOffset,
                                         int bandHeight) {
  const int bpp = 4;
  IntRect bounds = dsttex.sample_bounds(dstReq, invertY);
  bounds.intersect(srctex.sample_bounds(srcReq));
  char* dest = dsttex.sample_ptr(dstReq, bounds, 0, invertY);
@ -385,32 +417,10 @@ static NO_INLINE void unscaled_composite(Texture& srctex, const IntRect& srcReq,
  src += srcStride * bandOffset;
  for (int rows = min(bounds.height() - bandOffset, bandHeight); rows > 0;
       rows--) {
-    char* end = src + bounds.width() * bpp;
+    unscaled_row_composite((uint32_t*)dest, (const uint32_t*)src,
-    while (src + 4 * bpp <= end) {
+                           bounds.width());
-      WideRGBA8 srcpx = unpack(unaligned_load<PackedRGBA8>(src));
+    dest += destStride;
-      WideRGBA8 dstpx = unpack(unaligned_load<PackedRGBA8>(dest));
+    src += srcStride;
      PackedRGBA8 r = pack(srcpx + dstpx - muldiv255(dstpx, alphas(srcpx)));
      unaligned_store(dest, r);
      src += 4 * bpp;
      dest += 4 * bpp;
    }
    if (src < end) {
      WideRGBA8 srcpx = unpack(unaligned_load<PackedRGBA8>(src));
      WideRGBA8 dstpx = unpack(unaligned_load<PackedRGBA8>(dest));
      U32 r =
          bit_cast<U32>(pack(srcpx + dstpx - muldiv255(dstpx, alphas(srcpx))));
      unaligned_store(dest, r.x);
      if (src + bpp < end) {
        unaligned_store(dest + bpp, r.y);
        if (src + 2 * bpp < end) {
          unaligned_store(dest + 2 * bpp, r.z);
        }
      }
      dest += end - src;
      src = end;
    }
    dest += destStride - bounds.width() * bpp;
    src += srcStride - bounds.width() * bpp;
  }
 }
@ -719,9 +729,7 @@ static void linear_row_yuv(uint32_t* dest, int span, const vec2_scalar& srcUV,
    auto uvPx = textureLinearRowPairedR8(&sampler[1], &sampler[2], cU, cOffsetV,
                                         cStrideV, cFracV);
    auto srcpx = YUVConverter<COLOR_SPACE>::convert(yPx, uvPx);
-    auto mask = span_mask_RGBA8(span);
+    partial_store_span(dest, srcpx, span);
    auto dstpx = unaligned_load<PackedRGBA8>(dest);
    unaligned_store(dest, (mask & dstpx) | (~mask & srcpx));
  }
 }
--- a/gfx/wr/swgl/src/gl.cc
+++ b/gfx/wr/swgl/src/gl.cc
@ -2723,10 +2723,10 @@ static inline WideRGBA8 pack_pixels_RGBA8(const vec4& v) {
  ivec4 i = round_pixel(v);
  HalfRGBA8 xz = packRGBA8(i.z, i.x);
  HalfRGBA8 yw = packRGBA8(i.y, i.w);
-  HalfRGBA8 xy = zipLow(xz, yw);
+  HalfRGBA8 xyzwl = zipLow(xz, yw);
-  HalfRGBA8 zw = zipHigh(xz, yw);
+  HalfRGBA8 xyzwh = zipHigh(xz, yw);
-  HalfRGBA8 lo = zip2Low(xy, zw);
+  HalfRGBA8 lo = zip2Low(xyzwl, xyzwh);
-  HalfRGBA8 hi = zip2High(xy, zw);
+  HalfRGBA8 hi = zip2High(xyzwl, xyzwh);
  return combine(lo, hi);
 }
@ -2916,7 +2916,7 @@ static inline WideR8 span_mask(uint8_t*, int span) {
  return span_mask_R8(span);
 }
-static inline PackedRG8 span_mask_RG8(int span) {
+UNUSED static inline PackedRG8 span_mask_RG8(int span) {
  return bit_cast<PackedRG8>(I16(span) < I16{1, 2, 3, 4});
 }
--- a/gfx/wr/swgl/src/vector_type.h
+++ b/gfx/wr/swgl/src/vector_type.h
@ -38,6 +38,16 @@ SI VectorType<T, 16> combine(VectorType<T, 8> a, VectorType<T, 8> b) {
                                 13, 14, 15);
 }
 template <typename T>
 SI VectorType<T, 2> lowHalf(VectorType<T, 4> a) {
  return __builtin_shufflevector(a, a, 0, 1);
 }
 template <typename T>
 SI VectorType<T, 2> highHalf(VectorType<T, 4> a) {
  return __builtin_shufflevector(a, a, 2, 3);
 }
 template <typename T>
 SI VectorType<T, 4> lowHalf(VectorType<T, 8> a) {
  return __builtin_shufflevector(a, a, 0, 1, 2, 3);
@ -343,6 +353,12 @@ struct VectorType<T, 2> {
    };
    T elements[2];
  };
  SI VectorType wrap(const data_type& data) {
    VectorType v;
    v.data = data;
    return v;
  }
 };
 #  define CONVERT(vector, type) ((type)(vector))