GRAPHICS: Optimize alpha blend NEON and Generic

I optimized the NEON and Generic paths for ManagedSurface::blendBlitFrom
and the new TransparentSurface::blit. Now (on arm), the new blit function
matches the speed of the old blit function even with the added
inderections that the runtime extension detection code adds in.

Other than that, I made a benchmark for this code and you can make it
using this command:
CFLAGS="-DTEST_BLEND_SPEED" make test

I reverted wii to not use altivec anymore since it doesn't.

I also removed graphics/blit-neon.cpp from graphics/module.mk because
simply including the .cpp file in graphics/blit-alpha.cpp was a better
option because then I didn't need to instantiate every version of the
templates that I needed.
This commit is contained in:
Wyatt Radkiewicz 2023-08-03 13:42:23 -07:00 committed by Eugene Sandulenko
parent 402c67064d
commit eebadf4495
7 changed files with 411 additions and 91 deletions

View File

@ -176,8 +176,7 @@ bool OSystem_Wii::hasFeature(Feature f) {
return (f == kFeatureFullscreenMode) ||
(f == kFeatureAspectRatioCorrection) ||
(f == kFeatureCursorPalette) ||
(f == kFeatureOverlaySupportsAlpha) ||
(f == kFeatureAltivec);
(f == kFeatureOverlaySupportsAlpha));
}
void OSystem_Wii::setFeatureState(Feature f, bool enable) {

View File

@ -23,6 +23,8 @@
#include "graphics/pixelformat.h"
#include "common/system.h"
#include "graphics/blit-neon.cpp"
namespace Graphics {
namespace {
@ -203,7 +205,7 @@ BlendBlit::Args::Args(byte *dst, const byte *src,
/**
* Optimized version of doBlit to be used with multiply blended blitting
*/
template<bool doscale>
template<bool doscale, bool rgbmod, bool alphamod>
void BlendBlit::doBlitMultiplyBlendLogicGeneric(Args &args) {
const byte *in;
byte *out;
@ -211,10 +213,10 @@ void BlendBlit::doBlitMultiplyBlendLogicGeneric(Args &args) {
int scaleXCtr, scaleYCtr = 0;
const byte *inBase;
byte ca = args.alphamod ? ((args.color >> kAModShift) & 0xFF) : 255;
byte cr = args.rgbmod ? ((args.color >> kRModShift) & 0xFF) : 255;
byte cg = args.rgbmod ? ((args.color >> kGModShift) & 0xFF) : 255;
byte cb = args.rgbmod ? ((args.color >> kBModShift) & 0xFF) : 255;
const byte ca = alphamod ? ((args.color >> kAModShift) & 0xFF) : 255;
const byte cr = rgbmod ? ((args.color >> kRModShift) & 0xFF) : 255;
const byte cg = rgbmod ? ((args.color >> kGModShift) & 0xFF) : 255;
const byte cb = rgbmod ? ((args.color >> kBModShift) & 0xFF) : 255;
for (uint32 i = 0; i < args.height; i++) {
if (doscale) {
@ -266,7 +268,7 @@ void BlendBlit::doBlitMultiplyBlendLogicGeneric(Args &args) {
}
template<bool doscale>
template<bool doscale, bool rgbmod, bool alphamod>
void BlendBlit::doBlitAlphaBlendLogicGeneric(Args &args) {
const byte *in;
byte *out;
@ -274,10 +276,10 @@ void BlendBlit::doBlitAlphaBlendLogicGeneric(Args &args) {
int scaleXCtr, scaleYCtr = 0;
const byte *inBase;
byte ca = args.alphamod ? ((args.color >> kAModShift) & 0xFF) : 255;
byte cr = args.rgbmod ? ((args.color >> kRModShift) & 0xFF) : 255;
byte cg = args.rgbmod ? ((args.color >> kGModShift) & 0xFF) : 255;
byte cb = args.rgbmod ? ((args.color >> kBModShift) & 0xFF) : 255;
const byte ca = alphamod ? ((args.color >> kAModShift) & 0xFF) : 255;
const byte cr = rgbmod ? ((args.color >> kRModShift) & 0xFF) : 255;
const byte cg = rgbmod ? ((args.color >> kGModShift) & 0xFF) : 255;
const byte cb = rgbmod ? ((args.color >> kBModShift) & 0xFF) : 255;
for (uint32 i = 0; i < args.height; i++) {
if (doscale) {
@ -294,15 +296,37 @@ void BlendBlit::doBlitAlphaBlendLogicGeneric(Args &args) {
uint32 ina = in[kAIndex] * ca >> 8;
if (ina != 0) {
uint outb = (out[kBIndex] * (255 - ina) >> 8);
uint outg = (out[kGIndex] * (255 - ina) >> 8);
uint outr = (out[kRIndex] * (255 - ina) >> 8);
if (rgbmod) {
if (ina != 0) {
const uint outb = (out[kBIndex] * (255 - ina) >> 8);
const uint outg = (out[kGIndex] * (255 - ina) >> 8);
const uint outr = (out[kRIndex] * (255 - ina) >> 8);
out[kAIndex] = 255;
out[kBIndex] = outb + (in[kBIndex] * ina * cb >> 16);
out[kGIndex] = outg + (in[kGIndex] * ina * cg >> 16);
out[kRIndex] = outr + (in[kRIndex] * ina * cr >> 16);
out[kAIndex] = 255;
out[kBIndex] = outb + (in[kBIndex] * ina * cb >> 16);
out[kGIndex] = outg + (in[kGIndex] * ina * cg >> 16);
out[kRIndex] = outr + (in[kRIndex] * ina * cr >> 16);
}
} else {
if (ina != 0) {
// Runs faster on newer hardware (doesn't do single byte manip)
const uint32 in32 = *(const uint32 *)in;
const uint32 out32 = *(const uint32 *)out;
const uint32 rb = (in32 & (kRModMask | kBModMask)) >> 8;
const uint32 g = in32 & kGModMask;
const uint32 dstrb = (out32 & (kRModMask | kBModMask)) >> 8;
const uint32 dstg = out32 & kGModMask;
*(uint32 *)out = kAModMask |
((dstrb * (255 - ina) + rb * ina) & (kRModMask | kBModMask)) |
((dstg * (255 - ina) + g * ina) >> 8);
// I think this code will run faster on older hardware
// TODO maybe?: Put #ifdef to use on older hardware
//out[kAIndex] = 255;
//out[kBIndex] = (out[kBIndex] * (255 - ina) + in[kBIndex] * ina) >> 8;
//out[kGIndex] = (out[kGIndex] * (255 - ina) + in[kGIndex] * ina) >> 8;
//out[kRIndex] = (out[kRIndex] * (255 - ina) + in[kRIndex] * ina) >> 8;
}
}
if (doscale)
@ -323,7 +347,7 @@ void BlendBlit::doBlitAlphaBlendLogicGeneric(Args &args) {
/**
* Optimized version of doBlit to be used with subtractive blended blitting
*/
template<bool doscale>
template<bool doscale, bool rgbmod>
void BlendBlit::doBlitSubtractiveBlendLogicGeneric(Args &args) {
const byte *in;
byte *out;
@ -331,9 +355,9 @@ void BlendBlit::doBlitSubtractiveBlendLogicGeneric(Args &args) {
int scaleXCtr, scaleYCtr = 0;
const byte *inBase;
byte cr = args.rgbmod ? ((args.color >> kRModShift) & 0xFF) : 255;
byte cg = args.rgbmod ? ((args.color >> kGModShift) & 0xFF) : 255;
byte cb = args.rgbmod ? ((args.color >> kBModShift) & 0xFF) : 255;
const byte cr = rgbmod ? ((args.color >> kRModShift) & 0xFF) : 255;
const byte cg = rgbmod ? ((args.color >> kGModShift) & 0xFF) : 255;
const byte cb = rgbmod ? ((args.color >> kBModShift) & 0xFF) : 255;
for (uint32 i = 0; i < args.height; i++) {
if (doscale) {
@ -384,7 +408,7 @@ void BlendBlit::doBlitSubtractiveBlendLogicGeneric(Args &args) {
/**
* Optimized version of doBlit to be used with additive blended blitting
*/
template<bool doscale>
template<bool doscale, bool rgbmod, bool alphamod>
void BlendBlit::doBlitAdditiveBlendLogicGeneric(Args &args) {
const byte *in;
byte *out;
@ -392,10 +416,10 @@ void BlendBlit::doBlitAdditiveBlendLogicGeneric(Args &args) {
int scaleXCtr, scaleYCtr = 0;
const byte *inBase;
byte ca = args.alphamod ? ((args.color >> kAModShift) & 0xFF) : 255;
byte cr = args.rgbmod ? ((args.color >> kRModShift) & 0xFF) : 255;
byte cg = args.rgbmod ? ((args.color >> kGModShift) & 0xFF) : 255;
byte cb = args.rgbmod ? ((args.color >> kBModShift) & 0xFF) : 255;
const byte ca = alphamod ? ((args.color >> kAModShift) & 0xFF) : 255;
const byte cr = rgbmod ? ((args.color >> kRModShift) & 0xFF) : 255;
const byte cg = rgbmod ? ((args.color >> kGModShift) & 0xFF) : 255;
const byte cb = rgbmod ? ((args.color >> kBModShift) & 0xFF) : 255;
for (uint32 i = 0; i < args.height; i++) {
if (doscale) {
@ -552,12 +576,12 @@ void BlendBlit::blit(byte *dst, const byte *src,
const AlphaType alphaType) {
if (width == 0 || height == 0) return;
if (!blitFunc) {
// Get the correct blit function
// Get the correct blit function
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
if (g_system->hasFeature(OSystem::kFeatureNEON)) blitFunc = blitNEON;
else blitFunc = blitGeneric;
if (g_system->hasFeature(OSystem::kFeatureNEON)) blitFunc = blitNEON;
else blitFunc = blitGeneric;
#else
blitFunc = blitGeneric;
blitFunc = blitGeneric;
#endif
}
@ -565,8 +589,11 @@ void BlendBlit::blit(byte *dst, const byte *src,
blitFunc(args, blendMode, alphaType);
}
// Let me know if there is a way to do function pointer to templated functions
#define BLIT_FUNC(ext) \
void BlendBlit::blit##ext(Args &args, const TSpriteBlendMode &blendMode, const AlphaType &alphaType) { \
bool rgbmod = ((args.color & kRGBModMask) != kRGBModMask); \
bool alphamod = ((args.color & kAModMask) != kAModMask); \
if (args.scaleX == SCALE_THRESHOLD && args.scaleY == SCALE_THRESHOLD) { \
if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_OPAQUE) { \
doBlitOpaqueBlendLogic##ext<false>(args); \
@ -574,14 +601,54 @@ void BlendBlit::blit(byte *dst, const byte *src,
doBlitBinaryBlendLogic##ext<false>(args); \
} else { \
if (blendMode == BLEND_ADDITIVE) { \
doBlitAdditiveBlendLogic##ext<false>(args); \
if (rgbmod) { \
if (alphamod) { \
doBlitAdditiveBlendLogic##ext<false, true, true>(args); \
} else { \
doBlitAdditiveBlendLogic##ext<false, true, false>(args); \
} \
} else { \
if (alphamod) { \
doBlitAdditiveBlendLogic##ext<false, false, true>(args); \
} else { \
doBlitAdditiveBlendLogic##ext<false, false, false>(args); \
} \
} \
} else if (blendMode == BLEND_SUBTRACTIVE) { \
doBlitSubtractiveBlendLogic##ext<false>(args); \
if (rgbmod) { \
doBlitSubtractiveBlendLogic##ext<false, true>(args); \
} else { \
doBlitSubtractiveBlendLogic##ext<false, false>(args); \
} \
} else if (blendMode == BLEND_MULTIPLY) { \
doBlitMultiplyBlendLogic##ext<false>(args); \
if (rgbmod) { \
if (alphamod) { \
doBlitMultiplyBlendLogic##ext<false, true, true>(args); \
} else { \
doBlitMultiplyBlendLogic##ext<false, true, false>(args); \
} \
} else { \
if (alphamod) { \
doBlitMultiplyBlendLogic##ext<false, false, true>(args); \
} else { \
doBlitMultiplyBlendLogic##ext<false, false, false>(args); \
} \
} \
} else { \
assert(blendMode == BLEND_NORMAL); \
doBlitAlphaBlendLogic##ext<false>(args); \
if (rgbmod) { \
if (alphamod) { \
doBlitAlphaBlendLogic##ext<false, true, true>(args); \
} else { \
doBlitAlphaBlendLogic##ext<false, true, false>(args); \
} \
} else { \
if (alphamod) { \
doBlitAlphaBlendLogic##ext<false, false, true>(args); \
} else { \
doBlitAlphaBlendLogic##ext<false, false, false>(args); \
} \
} \
} \
} \
} else { \
@ -591,14 +658,54 @@ void BlendBlit::blit(byte *dst, const byte *src,
doBlitBinaryBlendLogic##ext<true>(args); \
} else { \
if (blendMode == BLEND_ADDITIVE) { \
doBlitAdditiveBlendLogic##ext<true>(args); \
if (rgbmod) { \
if (alphamod) { \
doBlitAdditiveBlendLogic##ext<true, true, true>(args); \
} else { \
doBlitAdditiveBlendLogic##ext<true, true, false>(args); \
} \
} else { \
if (alphamod) { \
doBlitAdditiveBlendLogic##ext<true, false, true>(args); \
} else { \
doBlitAdditiveBlendLogic##ext<true, false, false>(args); \
} \
} \
} else if (blendMode == BLEND_SUBTRACTIVE) { \
doBlitSubtractiveBlendLogic##ext<true>(args); \
if (rgbmod) { \
doBlitSubtractiveBlendLogic##ext<true, true>(args); \
} else { \
doBlitSubtractiveBlendLogic##ext<true, false>(args); \
} \
} else if (blendMode == BLEND_MULTIPLY) { \
doBlitMultiplyBlendLogic##ext<true>(args); \
if (rgbmod) { \
if (alphamod) { \
doBlitMultiplyBlendLogic##ext<true, true, true>(args); \
} else { \
doBlitMultiplyBlendLogic##ext<true, true, false>(args); \
} \
} else { \
if (alphamod) { \
doBlitMultiplyBlendLogic##ext<true, false, true>(args); \
} else { \
doBlitMultiplyBlendLogic##ext<true, false, false>(args); \
} \
} \
} else { \
assert(blendMode == BLEND_NORMAL); \
doBlitAlphaBlendLogic##ext<true>(args); \
if (rgbmod) { \
if (alphamod) { \
doBlitAlphaBlendLogic##ext<true, true, true>(args); \
} else { \
doBlitAlphaBlendLogic##ext<true, true, false>(args); \
} \
} else { \
if (alphamod) { \
doBlitAlphaBlendLogic##ext<true, false, true>(args); \
} else { \
doBlitAlphaBlendLogic##ext<true, false, false>(args); \
} \
} \
} \
} \
} \

View File

@ -19,12 +19,14 @@
*
*/
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
#include <arm_neon.h>
#include "graphics/blit.h"
#include "graphics/pixelformat.h"
namespace Graphics {
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
template<bool doscale>
void BlendBlit::doBlitBinaryBlendLogicNEON(Args &args) {
(void)args;
@ -33,33 +35,143 @@ template<bool doscale>
void BlendBlit::doBlitOpaqueBlendLogicNEON(Args &args) {
(void)args;
}
template<bool doscale>
template<bool doscale, bool rgbmod, bool alphamod>
void BlendBlit::doBlitMultiplyBlendLogicNEON(Args &args) {
(void)args;
}
template<bool doscale>
template<bool doscale, bool rgbmod>
void BlendBlit::doBlitSubtractiveBlendLogicNEON(Args &args) {
(void)args;
}
template<bool doscale>
template<bool doscale, bool rgbmod, bool alphamod>
void BlendBlit::doBlitAdditiveBlendLogicNEON(Args &args) {
(void)args;
}
template<bool doscale>
template<bool rgbmod, bool alphamod>
static inline uint32x4_t drawPixelAlphaBlend(uint32x4_t src, uint32x4_t dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
if (flip) {
src = vrev64q_u32(src);
src = vcombine_u32(vget_high_u32(src), vget_low_u32(src));
}
uint32x4_t ina;
if (alphamod)
ina = vshrq_n_u32(vmulq_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kAModMask)), vdupq_n_u32(ca)), 8);
else
ina = vandq_u32(src, vmovq_n_u32(BlendBlit::kAModMask));
uint32x4_t alphaMask = vceqq_u32(ina, vmovq_n_u32(0));
if (rgbmod) {
uint32x4_t dstR = vshrq_n_u32(vandq_u32(dst, vmovq_n_u32(BlendBlit::kRModMask)), 16);
uint32x4_t srcR = vshrq_n_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kRModMask)), 16);
uint32x4_t dstG = vshrq_n_u32(vandq_u32(dst, vmovq_n_u32(BlendBlit::kGModMask)), 8);
uint32x4_t srcG = vshrq_n_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kGModMask)), 8);
uint32x4_t dstB = vandq_u32(dst, vmovq_n_u32(BlendBlit::kBModMask));
uint32x4_t srcB = vandq_u32(src, vmovq_n_u32(BlendBlit::kBModMask));
dstR = vshrq_n_u32(vmulq_u32(dstR, vsubq_u32(vmovq_n_u32(255), ina)), 8);
dstG = vshrq_n_u32(vmulq_u32(dstG, vsubq_u32(vmovq_n_u32(255), ina)), 8);
dstB = vshrq_n_u32(vmulq_u32(dstB, vsubq_u32(vmovq_n_u32(255), ina)), 8);
srcR = vaddq_u32(dstR, vshrq_n_u32(vmulq_u32(vmulq_u32(srcR, ina), vmovq_n_u32(cr)), 16));
srcG = vaddq_u32(dstG, vshrq_n_u32(vmulq_u32(vmulq_u32(srcG, ina), vmovq_n_u32(cg)), 16));
srcB = vaddq_u32(dstB, vshrq_n_u32(vmulq_u32(vmulq_u32(srcB, ina), vmovq_n_u32(cb)), 16));
src = vorrq_u32(vandq_u32(srcB, vmovq_n_u32(BlendBlit::kBModMask)), vmovq_n_u32(BlendBlit::kAModMask));
src = vorrq_u32(vandq_u32(vshlq_n_u32(srcG, 8), vmovq_n_u32(BlendBlit::kGModMask)), src);
src = vorrq_u32(vandq_u32(vshlq_n_u32(srcR, 16), vmovq_n_u32(BlendBlit::kRModMask)), src);
} else {
uint32x4_t dstRB = vshrq_n_u32(vandq_u32(dst, vmovq_n_u32(BlendBlit::kRModMask | BlendBlit::kBModMask)), 8);
uint32x4_t srcRB = vshrq_n_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kRModMask | BlendBlit::kBModMask)), 8);
uint32x4_t dstG = vandq_u32(dst, vmovq_n_u32(BlendBlit::kGModMask));
uint32x4_t srcG = vandq_u32(src, vmovq_n_u32(BlendBlit::kGModMask));
dstRB = vshrq_n_u32(vmulq_u32(dstRB, vsubq_u32(vmovq_n_u32(255), ina)), 8);
dstG = vshrq_n_u32(vmulq_u32(dstG, vsubq_u32(vmovq_n_u32(255), ina)), 8);
srcRB = vaddq_u32(dstRB, vshrq_n_u32(vmulq_u32(srcRB, ina), 8));
srcG = vaddq_u32(dstG, vshrq_n_u32(vmulq_u32(srcG, ina), 8));
src = vorrq_u32(vandq_u32(srcG, vmovq_n_u32(BlendBlit::kGModMask)), vmovq_n_u32(BlendBlit::kAModMask));
src = vorrq_u32(vandq_u32(vshlq_n_u32(srcRB, 8), vmovq_n_u32(BlendBlit::kBModMask | BlendBlit::kRModMask)), src);
}
dst = vandq_u32(alphaMask, dst);
src = vandq_u32(vmvnq_u32(alphaMask), src);
return vorrq_u32(dst, src);
}
template<bool doscale, bool rgbmod, bool alphamod>
void BlendBlit::doBlitAlphaBlendLogicNEON(Args &args) {
(void)args;
const byte *in;
byte *out;
int scaleXCtr, scaleYCtr = 0;
const byte *inBase;
const byte ca = alphamod ? ((args.color >> kAModShift) & 0xFF) : 255;
const byte cr = rgbmod ? ((args.color >> kRModShift) & 0xFF) : 255;
const byte cg = rgbmod ? ((args.color >> kGModShift) & 0xFF) : 255;
const byte cb = rgbmod ? ((args.color >> kBModShift) & 0xFF) : 255;
for (uint32 i = 0; i < args.height; i++) {
if (doscale) {
inBase = args.ino + scaleYCtr / SCALE_THRESHOLD * args.inoStep;
scaleXCtr = 0;
} else {
in = args.ino;
}
out = args.outo;
uint32 j;
for (j = 0; j + 4 < args.width; j += 4) {
uint32x4_t dstPixels = vld1q_u32((const uint32 *)out);
uint32x4_t srcPixels;
if (!doscale) {
srcPixels = vld1q_u32((const uint32 *)in);
} else {
srcPixels = vsetq_lane_u32(*(const uint32 *)(inBase + scaleXCtr / SCALE_THRESHOLD * args.inStep), vmovq_n_u32(0), 0);
scaleXCtr += args.scaleX;
srcPixels = vsetq_lane_u32(*(const uint32 *)(inBase + scaleXCtr / SCALE_THRESHOLD * args.inStep), srcPixels, 0);
scaleXCtr += args.scaleX;
srcPixels = vsetq_lane_u32(*(const uint32 *)(inBase + scaleXCtr / SCALE_THRESHOLD * args.inStep), srcPixels, 0);
scaleXCtr += args.scaleX;
srcPixels = vsetq_lane_u32(*(const uint32 *)(inBase + scaleXCtr / SCALE_THRESHOLD * args.inStep), srcPixels, 0);
scaleXCtr += args.scaleX;
}
uint32x4_t res = drawPixelAlphaBlend<rgbmod, alphamod>(srcPixels, dstPixels, args.flipping & FLIP_H, ca, cr, cg, cb);
vst1q_u32((uint32 *)out, res);
if (!doscale)
in += args.inStep * 4;
out += 4 * 4;
}
for (; j < args.width; j++) {
if (doscale) {
in = inBase + scaleXCtr / SCALE_THRESHOLD * args.inStep;
}
uint32 ina = in[kAIndex] * ca >> 8;
if (ina != 0) {
uint outb = (out[kBIndex] * (255 - ina) >> 8);
uint outg = (out[kGIndex] * (255 - ina) >> 8);
uint outr = (out[kRIndex] * (255 - ina) >> 8);
out[kAIndex] = 255;
out[kBIndex] = outb + (in[kBIndex] * ina * cb >> 16);
out[kGIndex] = outg + (in[kGIndex] * ina * cg >> 16);
out[kRIndex] = outr + (in[kRIndex] * ina * cr >> 16);
}
if (doscale)
scaleXCtr += args.scaleX;
else
in += args.inStep;
out += 4;
}
if (doscale)
scaleYCtr += args.scaleY;
else
args.ino += args.inoStep;
args.outo += args.dstPitch;
}
}
#define INSTANTIATE_BLIT_TEMPLATES(ext, b) \
template void BlendBlit::doBlitBinaryBlendLogic##ext<b>(Args &); \
template void BlendBlit::doBlitOpaqueBlendLogic##ext<b>(Args &); \
template void BlendBlit::doBlitMultiplyBlendLogic##ext<b>(Args &); \
template void BlendBlit::doBlitSubtractiveBlendLogic##ext<b>(Args &); \
template void BlendBlit::doBlitAdditiveBlendLogic##ext<b>(Args &); \
template void BlendBlit::doBlitAlphaBlendLogic##ext<b>(Args &);
INSTANTIATE_BLIT_TEMPLATES(NEON, true)
INSTANTIATE_BLIT_TEMPLATES(NEON, false)
#undef INSTANTIATE_BLIT_TEMPLATES
} // end of namespace Graphics
#endif // __ARM_NEON__
}

View File

@ -29,6 +29,8 @@ namespace Common {
struct Point;
}
class BlendBlitUnfilteredTestSuite;
namespace Graphics {
/**
@ -194,29 +196,6 @@ bool setAlpha(byte *dst, const byte *src,
// This is a class so that we can declare certain things as private
class BlendBlit {
private:
static const int kBModShift = 8;
static const int kGModShift = 16;
static const int kRModShift = 24;
static const int kAModShift = 0;
static const uint32 kBModMask = 0x0000ff00;
static const uint32 kGModMask = 0x00ff0000;
static const uint32 kRModMask = 0xff000000;
static const uint32 kAModMask = 0x000000ff;
static const uint32 kRGBModMask = (kRModMask | kGModMask | kBModMask);
#ifdef SCUMM_LITTLE_ENDIAN
static const int kAIndex = 0;
static const int kBIndex = 1;
static const int kGIndex = 2;
static const int kRIndex = 3;
#else
static const int kAIndex = 3;
static const int kBIndex = 2;
static const int kGIndex = 1;
static const int kRIndex = 0;
#endif
struct Args {
bool rgbmod, alphamod;
int xp, yp;
@ -246,33 +225,56 @@ private:
static void doBlitBinaryBlendLogic##ext(Args &args); \
template<bool doscale> \
static void doBlitOpaqueBlendLogic##ext(Args &args); \
template<bool doscale> \
template<bool doscale, bool rgbmod, bool alphamod> \
static void doBlitMultiplyBlendLogic##ext(Args &args); \
template<bool doscale> \
template<bool doscale, bool rgbmod> \
static void doBlitSubtractiveBlendLogic##ext(Args &args); \
template<bool doscale> \
template<bool doscale, bool rgbmod, bool alphamod> \
static void doBlitAdditiveBlendLogic##ext(Args &args); \
template<bool doscale> \
template<bool doscale, bool rgbmod, bool alphamod> \
static void doBlitAlphaBlendLogic##ext(Args &args); \
static void blit##ext(Args &args, const TSpriteBlendMode &blendMode, const AlphaType &alphaType);
LOGIC_FUNCS_EXT(Generic)
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
LOGIC_FUNCS_EXT(NEON)
#endif
LOGIC_FUNCS_EXT(Generic)
#undef LOGIC_FUNCS_EXT
typedef void(*BlitFunc)(Args &, const TSpriteBlendMode &, const AlphaType &);
static BlitFunc blitFunc;
friend class ::BlendBlitUnfilteredTestSuite;
public:
static const int SCALE_THRESHOLD = 0x100;
static const int kBModShift = 8;
static const int kGModShift = 16;
static const int kRModShift = 24;
static const int kAModShift = 0;
static const uint32 kBModMask = 0x0000ff00;
static const uint32 kGModMask = 0x00ff0000;
static const uint32 kRModMask = 0xff000000;
static const uint32 kAModMask = 0x000000ff;
static const uint32 kRGBModMask = (kRModMask | kGModMask | kBModMask);
#ifdef SCUMM_LITTLE_ENDIAN
static const int kAIndex = 0;
static const int kBIndex = 1;
static const int kGIndex = 2;
static const int kRIndex = 3;
#else
static const int kAIndex = 3;
static const int kBIndex = 2;
static const int kGIndex = 1;
static const int kRIndex = 0;
#endif
static inline int getScaleFactor(int srcSize, int dstSize) {
return SCALE_THRESHOLD * srcSize / dstSize;
}
/**
* Returns the pixel format all operations of TransparentSurface support.
* Returns the pixel format all operations of BlendBlit::blit support.
*
* Use TS_ARGB and TS_RGB to quickly make a color in this format.
* TS_ARGB/RGB are found in graphics/transform_struct.h
@ -303,7 +305,6 @@ public:
const TSpriteBlendMode blendMode,
const AlphaType alphaType);
friend struct TransparentSurface;
}; // End of class BlendBlit
/** @} */

View File

@ -5,7 +5,6 @@ MODULE_OBJS := \
blit.o \
blit-alpha.o \
blit-scale.o \
blit-neon.o \
cursorman.o \
font.o \
fontman.o \

View File

@ -6,6 +6,7 @@
#include "common/fs.h"
#include "common/stream.h"
#include "common/system.h"
#include "graphics/surface.h"
#include "graphics/managed_surface.h"
@ -896,7 +897,103 @@ static bool areSurfacesEqual(const Graphics::Surface *a, const Graphics::Surface
class BlendBlitUnfilteredTestSuite : public CxxTest::TestSuite {
public:
void test_blend_speed() {
#ifdef TEST_BLEND_SPEED
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
Graphics::BlendBlit::blitFunc = Graphics::BlendBlit::blitNEON;
#else
Graphics::BlendBlit::blitFunc = Graphics::BlendBlit::blitGeneric;
#endif
Graphics::Surface baseSurface, destSurface;
baseSurface.create(128, 128, OldTransparentSurface::OldTransparentSurface::getSupportedPixelFormat());
destSurface.create(256, 256, OldTransparentSurface::OldTransparentSurface::getSupportedPixelFormat());
for (int y = 0; y < baseSurface.h; y++) {
for (int x = 0; x < baseSurface.w; x++) {
int i = x / 4 + y / 4;
baseSurface.setPixel(x, y, baseSurface.format.ARGBToColor((i & 16) * 255, (i & 1) * 255, (i & 2) * 255, (i & 4) * 255));
}
}
OldTransparentSurface::OldTransparentSurface oldSurf(baseSurface, true);
OldTransparentSurface::OldTransparentSurface oldSurfDest(destSurface, true);
Graphics::ManagedSurface managedSurf(&baseSurface, DisposeAfterUse::NO);
Graphics::ManagedSurface managedSurfDest(&destSurface, DisposeAfterUse::NO);
int numIters = 0, numItersScaled = 0;
double oldTime = 0.0, newTime = 0.0, genericTime = 0.0;
double oldTimeScaled = 0.0, newTimeScaled = 0.0, genericTimeScaled = 0.0;
const int iters = 2500;
for (int blendMode = Graphics::BLEND_NORMAL; blendMode < Graphics::BLEND_NORMAL + 1; blendMode++) {
for (int alphaType = 0; alphaType <= Graphics::ALPHA_FULL; alphaType++) {
for (int flipping = 0; flipping <= 3; flipping++) {
oldSurfDest.fillRect(Common::Rect(0, 0, oldSurfDest.w, oldSurfDest.h), oldSurfDest.format.ARGBToColor(255, 255, 255, 255));
managedSurfDest.fillRect(Common::Rect(0, 0, managedSurfDest.w, managedSurfDest.h), managedSurfDest.format.ARGBToColor(255, 255, 255, 255));
oldSurf.setAlphaMode((Graphics::AlphaType)alphaType);
uint32 oldStart = g_system->getMillis();
for (int i = 0; i < iters; i++) {
oldSurf.blit(oldSurfDest, 0, 0, flipping, nullptr, TS_ARGB(255, 255, 255, 255), -1, -1, (Graphics::TSpriteBlendMode)blendMode);
}
oldTime += g_system->getMillis() - oldStart;
uint32 newStart = g_system->getMillis();
for (int i = 0; i < iters; i++) {
managedSurfDest.blendBlitFrom(managedSurf, Common::Rect(0, 0, managedSurf.w, managedSurf.h), Common::Rect(0, 0, managedSurf.w, managedSurf.h), flipping, TS_ARGB(255, 255, 255, 255), (Graphics::TSpriteBlendMode)blendMode, (Graphics::AlphaType)alphaType);
}
newTime += g_system->getMillis() - newStart;
managedSurfDest.fillRect(Common::Rect(0, 0, managedSurfDest.w, managedSurfDest.h), managedSurfDest.format.ARGBToColor(255, 255, 255, 255));
Graphics::BlendBlit::BlitFunc oldFunc = Graphics::BlendBlit::blitFunc;
Graphics::BlendBlit::blitFunc = Graphics::BlendBlit::blitGeneric;
uint32 genericStart = g_system->getMillis();
for (int i = 0; i < iters; i++) {
managedSurfDest.blendBlitFrom(managedSurf, Common::Rect(0, 0, managedSurf.w, managedSurf.h), Common::Rect(0, 0, managedSurf.w, managedSurf.h), flipping, TS_ARGB(255, 255, 255, 255), (Graphics::TSpriteBlendMode)blendMode, (Graphics::AlphaType)alphaType);
}
Graphics::BlendBlit::blitFunc = oldFunc;
genericTime += g_system->getMillis() - genericStart;
numIters ++;
// scaled
oldSurfDest.fillRect(Common::Rect(0, 0, oldSurfDest.w, oldSurfDest.h), oldSurfDest.format.ARGBToColor(255, 255, 255, 255));
managedSurfDest.fillRect(Common::Rect(0, 0, managedSurfDest.w, managedSurfDest.h), managedSurfDest.format.ARGBToColor(255, 255, 255, 255));
oldSurf.setAlphaMode((Graphics::AlphaType)alphaType);
oldStart = g_system->getMillis();
for (int i = 0; i < iters; i++) {
oldSurf.blit(oldSurfDest, 0, 0, flipping, nullptr, TS_ARGB(255, 255, 255, 255), oldSurfDest.w, oldSurfDest.h, (Graphics::TSpriteBlendMode)blendMode);
}
oldTimeScaled += g_system->getMillis() - oldStart;
newStart = g_system->getMillis();
for (int i = 0; i < iters; i++) {
managedSurfDest.blendBlitFrom(managedSurf, Common::Rect(0, 0, managedSurfDest.w, managedSurfDest.h), Common::Rect(0, 0, managedSurf.w, managedSurf.h), flipping, TS_ARGB(255, 255, 255, 255), (Graphics::TSpriteBlendMode)blendMode, (Graphics::AlphaType)alphaType);
}
newTimeScaled += g_system->getMillis() - newStart;
managedSurfDest.fillRect(Common::Rect(0, 0, managedSurfDest.w, managedSurfDest.h), managedSurfDest.format.ARGBToColor(255, 255, 255, 255));
oldFunc = Graphics::BlendBlit::blitFunc;
Graphics::BlendBlit::blitFunc = Graphics::BlendBlit::blitGeneric;
genericStart = g_system->getMillis();
for (int i = 0; i < iters; i++) {
managedSurfDest.blendBlitFrom(managedSurf, Common::Rect(0, 0, managedSurfDest.w, managedSurfDest.h), Common::Rect(0, 0, managedSurf.w, managedSurf.h), flipping, TS_ARGB(255, 255, 255, 255), (Graphics::TSpriteBlendMode)blendMode, (Graphics::AlphaType)alphaType);
}
Graphics::BlendBlit::blitFunc = oldFunc;
genericTimeScaled += g_system->getMillis() - genericStart;
numItersScaled++;
} // flipping
} // alpha
} // blend
debug("Old TransparentSurface::blit avg time per %d iters (in milliseconds): %f\n", iters, oldTime / numIters);
debug("New ManagedSurface::blendBlitFrom (non SIMD) avg time per %d iters (in milliseconds): %f\n", iters, genericTime / numIters);
debug("New ManagedSurface::blendBlitFrom avg time per %d iters (in milliseconds): %f\n", iters, newTime / numIters);
debug("Old SCALING TransparentSurface::blit avg time per %d iters (in milliseconds): %f\n", iters, oldTimeScaled / numItersScaled);
debug("New SCALING ManagedSurface::blendBlitFrom (non SIMD) avg time per %d iters (in milliseconds): %f\n", iters, genericTimeScaled / numItersScaled);
debug("New SCALING ManagedSurface::blendBlitFrom avg time per %d iters (in milliseconds): %f\n", iters, newTimeScaled / numItersScaled);
debug("Note this speed test puts the old code in the best senario against the new code.");
baseSurface.free();
#endif
}
void test_blend_blit_unfiltered() {
#ifdef TEST_BLEND_SPEED
Common::Rect dsts[] = {
Common::Rect(4, 4, 4+16, 4+16), // Case 0 (source clipping)
Common::Rect(24, 20, 24+16, 20+16), // Case 1 (outside of destination)
@ -971,7 +1068,7 @@ public:
newSurf.setAlphaMode((Graphics::AlphaType)alphaType);
newSurf.blit(newSurfDest, dsts[rect].left, dsts[rect].top, flipping, &srcs[rect], TS_ARGB(a, r, g, b), dsts[rect].width(), dsts[rect].height(), (Graphics::TSpriteBlendMode)blendMode);
managedSurfDest.fillRect(Common::Rect(0, 0, managedSurfDest.w, managedSurfDest.h), managedSurfDest.format.ARGBToColor(ba, br, bg, bb));
managedSurfDest.blendBlitFrom(managedSurf, srcs[rect], dsts[rect], flipping, BLENDBLIT_ARGB(a, r, g, b), (Graphics::TSpriteBlendMode)blendMode, (Graphics::AlphaType)alphaType);
managedSurfDest.blendBlitFrom(managedSurf, srcs[rect], dsts[rect], flipping, TS_ARGB(a, r, g, b), (Graphics::TSpriteBlendMode)blendMode, (Graphics::AlphaType)alphaType);
@ -1019,5 +1116,6 @@ public:
} // blend
baseSurface.free();
#endif
}
};

View File

@ -18,6 +18,10 @@ void BaseBackend::initBackend() {
OSystem::initBackend();
}
bool BaseBackend::hasFeature(OSystem::Feature f) {
return false;
}
void BaseBackend::fillScreen(uint32 col) {
}