mirror of
https://github.com/mozilla/gecko-dev.git
synced 2025-02-17 06:09:19 +00:00
Bug 1870395 - gfxAlphaRecovery - Neon version r=jrmuizel,gfx-reviewers
Depends on D196459 Differential Revision: https://phabricator.services.mozilla.com/D196860
This commit is contained in:
parent
0e607b2b3f
commit
fd7b78a368
@ -10,6 +10,8 @@
|
|||||||
#define MOZILLA_SSE_INCLUDE_HEADER_FOR_SSE2
|
#define MOZILLA_SSE_INCLUDE_HEADER_FOR_SSE2
|
||||||
#include "mozilla/SSE.h"
|
#include "mozilla/SSE.h"
|
||||||
|
|
||||||
|
#include <xsimd/xsimd.hpp>
|
||||||
|
|
||||||
/* static */
|
/* static */
|
||||||
bool gfxAlphaRecovery::RecoverAlpha(gfxImageSurface* blackSurf,
|
bool gfxAlphaRecovery::RecoverAlpha(gfxImageSurface* blackSurf,
|
||||||
const gfxImageSurface* whiteSurf) {
|
const gfxImageSurface* whiteSurf) {
|
||||||
@ -23,7 +25,14 @@ bool gfxAlphaRecovery::RecoverAlpha(gfxImageSurface* blackSurf,
|
|||||||
return false;
|
return false;
|
||||||
|
|
||||||
#ifdef MOZILLA_MAY_SUPPORT_SSE2
|
#ifdef MOZILLA_MAY_SUPPORT_SSE2
|
||||||
if (mozilla::supports_sse2() && RecoverAlphaSSE2(blackSurf, whiteSurf)) {
|
if (mozilla::supports_sse2() &&
|
||||||
|
RecoverAlphaGeneric<xsimd::sse2>(blackSurf, whiteSurf)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#ifdef MOZILLA_MAY_SUPPORT_NEON
|
||||||
|
if (mozilla::supports_neon() &&
|
||||||
|
RecoverAlphaGeneric<xsimd::neon>(blackSurf, whiteSurf)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -6,7 +6,6 @@
|
|||||||
#ifndef _GFXALPHARECOVERY_H_
|
#ifndef _GFXALPHARECOVERY_H_
|
||||||
#define _GFXALPHARECOVERY_H_
|
#define _GFXALPHARECOVERY_H_
|
||||||
|
|
||||||
#include "mozilla/SSE.h"
|
|
||||||
#include "gfxTypes.h"
|
#include "gfxTypes.h"
|
||||||
#include "mozilla/gfx/Rect.h"
|
#include "mozilla/gfx/Rect.h"
|
||||||
|
|
||||||
@ -33,14 +32,12 @@ class gfxAlphaRecovery {
|
|||||||
static bool RecoverAlpha(gfxImageSurface* blackSurface,
|
static bool RecoverAlpha(gfxImageSurface* blackSurface,
|
||||||
const gfxImageSurface* whiteSurface);
|
const gfxImageSurface* whiteSurface);
|
||||||
|
|
||||||
#ifdef MOZILLA_MAY_SUPPORT_SSE2
|
/* This does the same as the previous function, but uses SIMD
|
||||||
/* This does the same as the previous function, but uses SSE2
|
* optimizations. Usually this should not be called directly.
|
||||||
* optimizations. Usually this should not be called directly. Be sure to
|
|
||||||
* check mozilla::supports_sse2() before calling this function.
|
|
||||||
*/
|
*/
|
||||||
static bool RecoverAlphaSSE2(gfxImageSurface* blackSurface,
|
template <class Arch>
|
||||||
const gfxImageSurface* whiteSurface);
|
static bool RecoverAlphaGeneric(gfxImageSurface* blackSurface,
|
||||||
#endif
|
const gfxImageSurface* whiteSurface);
|
||||||
|
|
||||||
/** from cairo-xlib-utils.c, modified */
|
/** from cairo-xlib-utils.c, modified */
|
||||||
/**
|
/**
|
||||||
@ -62,7 +59,7 @@ class gfxAlphaRecovery {
|
|||||||
* bits are likely to be the most accurate.
|
* bits are likely to be the most accurate.
|
||||||
*
|
*
|
||||||
* This function needs to be in the header file since it's used by both
|
* This function needs to be in the header file since it's used by both
|
||||||
* gfxRecoverAlpha.cpp and gfxRecoverAlphaSSE2.cpp.
|
* gfxRecoverAlpha.cpp and gfxRecoverAlphaGeneric.hpp.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static inline uint32_t RecoverPixel(uint32_t black, uint32_t white) {
|
static inline uint32_t RecoverPixel(uint32_t black, uint32_t white) {
|
||||||
|
129
gfx/thebes/gfxAlphaRecoveryGeneric.h
Normal file
129
gfx/thebes/gfxAlphaRecoveryGeneric.h
Normal file
@ -0,0 +1,129 @@
|
|||||||
|
/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 2 -*-
|
||||||
|
* This Source Code Form is subject to the terms of the Mozilla Public
|
||||||
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||||
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||||
|
#ifndef _GFXALPHARECOVERY_GENERIC_H_
|
||||||
|
#define _GFXALPHARECOVERY_GENERIC_H_
|
||||||
|
|
||||||
|
#include "gfxAlphaRecovery.h"
|
||||||
|
#include "gfxImageSurface.h"
|
||||||
|
#include "nsDebug.h"
|
||||||
|
#include <xsimd/xsimd.hpp>
|
||||||
|
|
||||||
|
template <typename Arch>
|
||||||
|
bool gfxAlphaRecovery::RecoverAlphaGeneric(gfxImageSurface* blackSurf,
|
||||||
|
const gfxImageSurface* whiteSurf) {
|
||||||
|
mozilla::gfx::IntSize size = blackSurf->GetSize();
|
||||||
|
|
||||||
|
if (size != whiteSurf->GetSize() ||
|
||||||
|
(blackSurf->Format() != mozilla::gfx::SurfaceFormat::A8R8G8B8_UINT32 &&
|
||||||
|
blackSurf->Format() != mozilla::gfx::SurfaceFormat::X8R8G8B8_UINT32) ||
|
||||||
|
(whiteSurf->Format() != mozilla::gfx::SurfaceFormat::A8R8G8B8_UINT32 &&
|
||||||
|
whiteSurf->Format() != mozilla::gfx::SurfaceFormat::X8R8G8B8_UINT32))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
blackSurf->Flush();
|
||||||
|
whiteSurf->Flush();
|
||||||
|
|
||||||
|
unsigned char* blackData = blackSurf->Data();
|
||||||
|
unsigned char* whiteData = whiteSurf->Data();
|
||||||
|
|
||||||
|
if ((NS_PTR_TO_UINT32(blackData) & 0xf) !=
|
||||||
|
(NS_PTR_TO_UINT32(whiteData) & 0xf) ||
|
||||||
|
(blackSurf->Stride() - whiteSurf->Stride()) & 0xf) {
|
||||||
|
// Cannot keep these in alignment.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
alignas(Arch::alignment()) static const uint8_t greenMaski[] = {
|
||||||
|
0x00, 0xff, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00,
|
||||||
|
0x00, 0xff, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00,
|
||||||
|
};
|
||||||
|
alignas(Arch::alignment()) static const uint8_t alphaMaski[] = {
|
||||||
|
0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0xff,
|
||||||
|
0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0xff,
|
||||||
|
};
|
||||||
|
|
||||||
|
using batch_type = xsimd::batch<uint8_t, Arch>;
|
||||||
|
constexpr size_t batch_size = batch_type::size;
|
||||||
|
static_assert(batch_size == 16);
|
||||||
|
|
||||||
|
batch_type greenMask = batch_type::load_aligned(greenMaski);
|
||||||
|
batch_type alphaMask = batch_type::load_aligned(alphaMaski);
|
||||||
|
|
||||||
|
for (int32_t i = 0; i < size.height; ++i) {
|
||||||
|
int32_t j = 0;
|
||||||
|
// Loop single pixels until at 4 byte alignment.
|
||||||
|
while (NS_PTR_TO_UINT32(blackData) & 0xf && j < size.width) {
|
||||||
|
*((uint32_t*)blackData) =
|
||||||
|
RecoverPixel(*reinterpret_cast<uint32_t*>(blackData),
|
||||||
|
*reinterpret_cast<uint32_t*>(whiteData));
|
||||||
|
blackData += 4;
|
||||||
|
whiteData += 4;
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
// This extra loop allows the compiler to do some more clever registry
|
||||||
|
// management and makes it about 5% faster than with only the 4 pixel
|
||||||
|
// at a time loop.
|
||||||
|
for (; j < size.width - 8; j += 8) {
|
||||||
|
auto black1 = batch_type::load_aligned(blackData);
|
||||||
|
auto white1 = batch_type::load_aligned(whiteData);
|
||||||
|
auto black2 = batch_type::load_aligned(blackData + batch_size);
|
||||||
|
auto white2 = batch_type::load_aligned(whiteData + batch_size);
|
||||||
|
|
||||||
|
// Execute the same instructions as described in RecoverPixel, only
|
||||||
|
// using an SSE2 packed saturated subtract.
|
||||||
|
white1 = xsimd::ssub(white1, black1);
|
||||||
|
white2 = xsimd::ssub(white2, black2);
|
||||||
|
white1 = xsimd::ssub(greenMask, white1);
|
||||||
|
white2 = xsimd::ssub(greenMask, white2);
|
||||||
|
// Producing the final black pixel in an XMM register and storing
|
||||||
|
// that is actually faster than doing a masked store since that
|
||||||
|
// does an unaligned storage. We have the black pixel in a register
|
||||||
|
// anyway.
|
||||||
|
black1 = xsimd::bitwise_andnot(black1, alphaMask);
|
||||||
|
black2 = xsimd::bitwise_andnot(black2, alphaMask);
|
||||||
|
white1 = xsimd::slide_left<2>(white1);
|
||||||
|
white2 = xsimd::slide_left<2>(white2);
|
||||||
|
white1 &= alphaMask;
|
||||||
|
white2 &= alphaMask;
|
||||||
|
black1 |= white1;
|
||||||
|
black2 |= white2;
|
||||||
|
|
||||||
|
black1.store_aligned(blackData);
|
||||||
|
black2.store_aligned(blackData + batch_size);
|
||||||
|
blackData += 2 * batch_size;
|
||||||
|
whiteData += 2 * batch_size;
|
||||||
|
}
|
||||||
|
for (; j < size.width - 4; j += 4) {
|
||||||
|
auto black = batch_type::load_aligned(blackData);
|
||||||
|
auto white = batch_type::load_aligned(whiteData);
|
||||||
|
|
||||||
|
white = xsimd::ssub(white, black);
|
||||||
|
white = xsimd::ssub(greenMask, white);
|
||||||
|
black = xsimd::bitwise_andnot(black, alphaMask);
|
||||||
|
white = xsimd::slide_left<2>(white);
|
||||||
|
white &= alphaMask;
|
||||||
|
black |= white;
|
||||||
|
black.store_aligned(blackData);
|
||||||
|
blackData += batch_size;
|
||||||
|
whiteData += batch_size;
|
||||||
|
}
|
||||||
|
// Loop single pixels until we're done.
|
||||||
|
while (j < size.width) {
|
||||||
|
*((uint32_t*)blackData) =
|
||||||
|
RecoverPixel(*reinterpret_cast<uint32_t*>(blackData),
|
||||||
|
*reinterpret_cast<uint32_t*>(whiteData));
|
||||||
|
blackData += 4;
|
||||||
|
whiteData += 4;
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
blackData += blackSurf->Stride() - j * 4;
|
||||||
|
whiteData += whiteSurf->Stride() - j * 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
blackSurf->MarkDirty();
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
#endif
|
9
gfx/thebes/gfxAlphaRecoveryNeon.cpp
Normal file
9
gfx/thebes/gfxAlphaRecoveryNeon.cpp
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 2 -*-
|
||||||
|
* This Source Code Form is subject to the terms of the Mozilla Public
|
||||||
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||||
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||||
|
|
||||||
|
#include "gfxAlphaRecoveryGeneric.h"
|
||||||
|
|
||||||
|
template bool gfxAlphaRecoveryGeneric::RecoverAlpha<xsimd::neon>(
|
||||||
|
gfxImageSurface* blackSurf, const gfxImageSurface* whiteSurf);
|
@ -3,128 +3,7 @@
|
|||||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||||
|
|
||||||
#include "gfxAlphaRecovery.h"
|
#include "gfxAlphaRecoveryGeneric.h"
|
||||||
#include "gfxImageSurface.h"
|
|
||||||
#include "nsDebug.h"
|
|
||||||
#include <xsimd/xsimd.hpp>
|
|
||||||
|
|
||||||
using arch = xsimd::sse2;
|
template bool gfxAlphaRecovery::RecoverAlphaGeneric<xsimd::sse2>(
|
||||||
|
gfxImageSurface* blackSurf, const gfxImageSurface* whiteSurf);
|
||||||
// This file should only be compiled on x86 and x64 systems. Additionally,
|
|
||||||
// you'll need to compile it with -msse2 if you're using GCC on x86.
|
|
||||||
|
|
||||||
alignas(arch::alignment()) static const uint8_t greenMaski[] = {
|
|
||||||
0x00, 0xff, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00,
|
|
||||||
0x00, 0xff, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00,
|
|
||||||
};
|
|
||||||
alignas(arch::alignment()) static const uint8_t alphaMaski[] = {
|
|
||||||
0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0xff,
|
|
||||||
0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0xff,
|
|
||||||
};
|
|
||||||
|
|
||||||
bool gfxAlphaRecovery::RecoverAlphaSSE2(gfxImageSurface* blackSurf,
|
|
||||||
const gfxImageSurface* whiteSurf) {
|
|
||||||
mozilla::gfx::IntSize size = blackSurf->GetSize();
|
|
||||||
|
|
||||||
if (size != whiteSurf->GetSize() ||
|
|
||||||
(blackSurf->Format() != mozilla::gfx::SurfaceFormat::A8R8G8B8_UINT32 &&
|
|
||||||
blackSurf->Format() != mozilla::gfx::SurfaceFormat::X8R8G8B8_UINT32) ||
|
|
||||||
(whiteSurf->Format() != mozilla::gfx::SurfaceFormat::A8R8G8B8_UINT32 &&
|
|
||||||
whiteSurf->Format() != mozilla::gfx::SurfaceFormat::X8R8G8B8_UINT32))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
blackSurf->Flush();
|
|
||||||
whiteSurf->Flush();
|
|
||||||
|
|
||||||
unsigned char* blackData = blackSurf->Data();
|
|
||||||
unsigned char* whiteData = whiteSurf->Data();
|
|
||||||
|
|
||||||
if ((NS_PTR_TO_UINT32(blackData) & 0xf) !=
|
|
||||||
(NS_PTR_TO_UINT32(whiteData) & 0xf) ||
|
|
||||||
(blackSurf->Stride() - whiteSurf->Stride()) & 0xf) {
|
|
||||||
// Cannot keep these in alignment.
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
using batch_type = xsimd::batch<uint8_t, arch>;
|
|
||||||
constexpr size_t batch_size = batch_type::size;
|
|
||||||
static_assert(batch_size == 16);
|
|
||||||
|
|
||||||
batch_type greenMask = batch_type::load_aligned(greenMaski);
|
|
||||||
batch_type alphaMask = batch_type::load_aligned(alphaMaski);
|
|
||||||
|
|
||||||
for (int32_t i = 0; i < size.height; ++i) {
|
|
||||||
int32_t j = 0;
|
|
||||||
// Loop single pixels until at 4 byte alignment.
|
|
||||||
while (NS_PTR_TO_UINT32(blackData) & 0xf && j < size.width) {
|
|
||||||
*((uint32_t*)blackData) =
|
|
||||||
RecoverPixel(*reinterpret_cast<uint32_t*>(blackData),
|
|
||||||
*reinterpret_cast<uint32_t*>(whiteData));
|
|
||||||
blackData += 4;
|
|
||||||
whiteData += 4;
|
|
||||||
j++;
|
|
||||||
}
|
|
||||||
// This extra loop allows the compiler to do some more clever registry
|
|
||||||
// management and makes it about 5% faster than with only the 4 pixel
|
|
||||||
// at a time loop.
|
|
||||||
for (; j < size.width - 8; j += 8) {
|
|
||||||
auto black1 = batch_type::load_aligned(blackData);
|
|
||||||
auto white1 = batch_type::load_aligned(whiteData);
|
|
||||||
auto black2 = batch_type::load_aligned(blackData + batch_size);
|
|
||||||
auto white2 = batch_type::load_aligned(whiteData + batch_size);
|
|
||||||
|
|
||||||
// Execute the same instructions as described in RecoverPixel, only
|
|
||||||
// using a packed saturated subtract.
|
|
||||||
white1 = xsimd::ssub(white1, black1);
|
|
||||||
white2 = xsimd::ssub(white2, black2);
|
|
||||||
white1 = xsimd::ssub(greenMask, white1);
|
|
||||||
white2 = xsimd::ssub(greenMask, white2);
|
|
||||||
// Producing the final black pixel in a register and storing
|
|
||||||
// that is actually faster than doing a masked store since that
|
|
||||||
// does an unaligned storage. We have the black pixel in a register
|
|
||||||
// anyway.
|
|
||||||
black1 = xsimd::bitwise_andnot(black1, alphaMask);
|
|
||||||
black2 = xsimd::bitwise_andnot(black2, alphaMask);
|
|
||||||
white1 = xsimd::slide_left<2>(white1);
|
|
||||||
white2 = xsimd::slide_left<2>(white2);
|
|
||||||
white1 &= alphaMask;
|
|
||||||
white2 &= alphaMask;
|
|
||||||
black1 |= white1;
|
|
||||||
black2 |= white2;
|
|
||||||
|
|
||||||
black1.store_aligned(blackData);
|
|
||||||
black2.store_aligned(blackData + batch_size);
|
|
||||||
blackData += 2 * batch_size;
|
|
||||||
whiteData += 2 * batch_size;
|
|
||||||
}
|
|
||||||
for (; j < size.width - 4; j += 4) {
|
|
||||||
auto black = batch_type::load_aligned(blackData);
|
|
||||||
auto white = batch_type::load_aligned(whiteData);
|
|
||||||
|
|
||||||
white = xsimd::ssub(white, black);
|
|
||||||
white = xsimd::ssub(greenMask, white);
|
|
||||||
black = xsimd::bitwise_andnot(black, alphaMask);
|
|
||||||
white = xsimd::slide_left<2>(white);
|
|
||||||
white &= alphaMask;
|
|
||||||
black |= white;
|
|
||||||
black.store_aligned(blackData);
|
|
||||||
blackData += batch_size;
|
|
||||||
whiteData += batch_size;
|
|
||||||
}
|
|
||||||
// Loop single pixels until we're done.
|
|
||||||
while (j < size.width) {
|
|
||||||
*((uint32_t*)blackData) =
|
|
||||||
RecoverPixel(*reinterpret_cast<uint32_t*>(blackData),
|
|
||||||
*reinterpret_cast<uint32_t*>(whiteData));
|
|
||||||
blackData += 4;
|
|
||||||
whiteData += 4;
|
|
||||||
j++;
|
|
||||||
}
|
|
||||||
blackData += blackSurf->Stride() - j * 4;
|
|
||||||
whiteData += whiteSurf->Stride() - j * 4;
|
|
||||||
}
|
|
||||||
|
|
||||||
blackSurf->MarkDirty();
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user