Bug 1116070 - Use neon to speed up the mask creation. r=longsonr

This commit is contained in:
Ethan Lin 2015-01-06 02:44:00 -05:00
parent 719c1cadc9
commit 4138e739cb
6 changed files with 151 additions and 33 deletions

View File

@ -27,7 +27,7 @@ random-if(B2G) == filter-html-01-extref.xhtml filter-html-01-ref.svg # Bug 10639
== mask-html-01.xhtml mask-html-01-ref.svg
== mask-html-01-extref-01.xhtml mask-html-01-ref.svg
random == mask-html-01-extref-02.xhtml mask-html-01-ref.svg # random due to bug 877661
fuzzy-if(B2G&&browserIsRemote,1,2000) == mask-html-zoomed-01.xhtml mask-html-01-ref.svg
fuzzy-if(B2G&&browserIsRemote,1,2300) == mask-html-zoomed-01.xhtml mask-html-01-ref.svg
# Skil XBL test case on B2G
skip-if(B2G) == mask-html-xbl-bound-01.html mask-html-01-ref.svg
== mask-transformed-html-01.xhtml ../pass.svg

View File

@ -48,6 +48,10 @@ UNIFIED_SOURCES += [
'SVGViewFrame.cpp',
]
if CONFIG['CPU_ARCH'] == 'arm' and CONFIG['BUILD_ARM_NEON']:
SOURCES += ['nsSVGMaskFrameNEON.cpp']
SOURCES['nsSVGMaskFrameNEON.cpp'].flags += ['-mfpu=neon']
FAIL_ON_WARNINGS = True
FINAL_LIBRARY = 'xul'

View File

@ -13,26 +13,15 @@
#include "mozilla/RefPtr.h"
#include "nsSVGEffects.h"
#include "mozilla/dom/SVGMaskElement.h"
#ifdef BUILD_ARM_NEON
#include "mozilla/arm.h"
#include "nsSVGMaskFrameNEON.h"
#endif
using namespace mozilla;
using namespace mozilla::dom;
using namespace mozilla::gfx;
/**
* Byte offsets of channels in a native packed gfxColor or cairo image surface.
*/
#ifdef IS_BIG_ENDIAN
#define GFX_ARGB32_OFFSET_A 0
#define GFX_ARGB32_OFFSET_R 1
#define GFX_ARGB32_OFFSET_G 2
#define GFX_ARGB32_OFFSET_B 3
#else
#define GFX_ARGB32_OFFSET_A 3
#define GFX_ARGB32_OFFSET_R 2
#define GFX_ARGB32_OFFSET_G 1
#define GFX_ARGB32_OFFSET_B 0
#endif
// c = n / 255
// c <= 0.04045 ? c / 12.92 : pow((c + 0.055) / 1.055, 2.4)) * 255 + 0.5
static const uint8_t gsRGBToLinearRGBMap[256] = {
@ -76,26 +65,35 @@ ComputesRGBLuminanceMask(uint8_t *aData,
const IntSize &aSize,
float aOpacity)
{
#ifdef BUILD_ARM_NEON
if (mozilla::supports_neon()) {
ComputesRGBLuminanceMask_NEON(aData, aStride, aSize, aOpacity);
return;
}
#endif
int32_t redFactor = 55 * aOpacity; // 255 * 0.2125 * opacity
int32_t greenFactor = 183 * aOpacity; // 255 * 0.7154 * opacity
int32_t blueFactor = 18 * aOpacity; // 255 * 0.0721
int32_t offset = aStride - 4 * aSize.width;
uint8_t *pixel = aData;
for (int32_t y = 0; y < aSize.height; y++) {
for (int32_t x = 0; x < aSize.width; x++) {
uint8_t *pixel = aData + aStride * y + 4 * x;
uint8_t a = pixel[GFX_ARGB32_OFFSET_A];
uint8_t luminance;
if (a) {
/* sRGB -> intensity (unpremultiply cancels out the
* (a/255.0) multiplication with aOpacity */
luminance =
static_cast<uint8_t>
((pixel[GFX_ARGB32_OFFSET_R] * 0.2125 +
pixel[GFX_ARGB32_OFFSET_G] * 0.7154 +
pixel[GFX_ARGB32_OFFSET_B] * 0.0721) *
aOpacity);
luminance = (redFactor * pixel[GFX_ARGB32_OFFSET_R] +
greenFactor * pixel[GFX_ARGB32_OFFSET_G] +
blueFactor * pixel[GFX_ARGB32_OFFSET_B]) >> 8;
} else {
luminance = 0;
}
memset(pixel, luminance, 4);
pixel += 4;
}
pixel += offset;
}
}
@ -105,9 +103,14 @@ ComputeLinearRGBLuminanceMask(uint8_t *aData,
const IntSize &aSize,
float aOpacity)
{
int32_t redFactor = 55 * aOpacity; // 255 * 0.2125 * opacity
int32_t greenFactor = 183 * aOpacity; // 255 * 0.7154 * opacity
int32_t blueFactor = 18 * aOpacity; // 255 * 0.0721
int32_t offset = aStride - 4 * aSize.width;
uint8_t *pixel = aData;
for (int32_t y = 0; y < aSize.height; y++) {
for (int32_t x = 0; x < aSize.width; x++) {
uint8_t *pixel = aData + aStride * y + 4 * x;
uint8_t a = pixel[GFX_ARGB32_OFFSET_A];
uint8_t luminance;
@ -125,17 +128,19 @@ ComputeLinearRGBLuminanceMask(uint8_t *aData,
/* sRGB -> linearRGB -> intensity */
luminance =
static_cast<uint8_t>
((gsRGBToLinearRGBMap[pixel[GFX_ARGB32_OFFSET_R]] *
0.2125 +
gsRGBToLinearRGBMap[pixel[GFX_ARGB32_OFFSET_G]] *
0.7154 +
gsRGBToLinearRGBMap[pixel[GFX_ARGB32_OFFSET_B]] *
0.0721) * (a / 255.0) * aOpacity);
(((gsRGBToLinearRGBMap[pixel[GFX_ARGB32_OFFSET_R]] *
redFactor +
gsRGBToLinearRGBMap[pixel[GFX_ARGB32_OFFSET_G]] *
greenFactor +
gsRGBToLinearRGBMap[pixel[GFX_ARGB32_OFFSET_B]] *
blueFactor) >> 8) * (a / 255.0f));
} else {
luminance = 0;
}
memset(pixel, luminance, 4);
pixel += 4;
}
pixel += offset;
}
}
@ -145,12 +150,16 @@ ComputeAlphaMask(uint8_t *aData,
const IntSize &aSize,
float aOpacity)
{
int32_t offset = aStride - 4 * aSize.width;
uint8_t *pixel = aData;
for (int32_t y = 0; y < aSize.height; y++) {
for (int32_t x = 0; x < aSize.width; x++) {
uint8_t *pixel = aData + aStride * y + 4 * x;
uint8_t luminance = pixel[GFX_ARGB32_OFFSET_A] * aOpacity;
memset(pixel, luminance, 4);
pixel += 4;
}
pixel += offset;
}
}

View File

@ -18,6 +18,21 @@ class gfxContext;
typedef nsSVGContainerFrame nsSVGMaskFrameBase;
/**
* Byte offsets of channels in a native packed gfxColor or cairo image surface.
*/
#ifdef IS_BIG_ENDIAN
#define GFX_ARGB32_OFFSET_A 0
#define GFX_ARGB32_OFFSET_R 1
#define GFX_ARGB32_OFFSET_G 2
#define GFX_ARGB32_OFFSET_B 3
#else
#define GFX_ARGB32_OFFSET_A 3
#define GFX_ARGB32_OFFSET_R 2
#define GFX_ARGB32_OFFSET_G 1
#define GFX_ARGB32_OFFSET_B 0
#endif
class nsSVGMaskFrame MOZ_FINAL : public nsSVGMaskFrameBase
{
friend nsIFrame*

View File

@ -0,0 +1,71 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "nsSVGMaskFrameNEON.h"
#include "nsSVGMaskFrame.h"
#include <arm_neon.h>
void
ComputesRGBLuminanceMask_NEON(uint8_t *aData,
int32_t aStride,
const IntSize &aSize,
float aOpacity)
{
int32_t redFactor = 55 * aOpacity; // 255 * 0.2125 * opacity
int32_t greenFactor = 183 * aOpacity; // 255 * 0.7154 * opacity
int32_t blueFactor = 18 * aOpacity; // 255 * 0.0721
uint8_t *pixel = aData;
int32_t offset = aStride - 4 * aSize.width;
// Set the value to zero if the alpha is zero
for (int32_t y = 0; y < aSize.height; y++) {
for (int32_t x = 0; x < aSize.width; x++) {
if (!pixel[GFX_ARGB32_OFFSET_A]) {
memset(pixel, 0, 4);
}
pixel += 4;
}
pixel += offset;
}
pixel = aData;
int32_t remainderWidth = aSize.width % 8;
int32_t roundedWidth = aSize.width - remainderWidth;
uint16x8_t temp;
uint8x8_t gray;
uint8x8x4_t result;
uint8x8_t redVec = vdup_n_u8(redFactor);
uint8x8_t greenVec = vdup_n_u8(greenFactor);
uint8x8_t blueVec = vdup_n_u8(blueFactor);
for (int32_t y = 0; y < aSize.height; y++) {
// Calculate luminance by neon with 8 pixels per loop
for (int32_t x = 0; x < roundedWidth; x += 8) {
uint8x8x4_t argb = vld4_u8(pixel);
temp = vmull_u8(argb.val[GFX_ARGB32_OFFSET_R], redVec); // temp = red * redFactor
temp = vmlal_u8(temp, argb.val[GFX_ARGB32_OFFSET_G], greenVec); // temp += green * greenFactor
temp = vmlal_u8(temp, argb.val[GFX_ARGB32_OFFSET_B], blueVec); // temp += blue * blueFactor
gray = vshrn_n_u16(temp, 8); // gray = temp >> 8
// Put the result to the 8 pixels in argb format
result.val[0] = gray;
result.val[1] = gray;
result.val[2] = gray;
result.val[3] = gray;
vst4_u8(pixel, result);
pixel += 8 * 4;
}
// Calculate the rest pixels of the line by cpu
for (int32_t x = 0; x < remainderWidth; x++) {
pixel[0] = (redFactor * pixel[GFX_ARGB32_OFFSET_R] +
greenFactor * pixel[GFX_ARGB32_OFFSET_G] +
blueFactor * pixel[GFX_ARGB32_OFFSET_B]) >> 8;
memset(pixel + 1, pixel[0], 3);
pixel += 4;
}
pixel += offset;
}
}

View File

@ -0,0 +1,19 @@
/* -*- mode: c++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* this source code form is subject to the terms of the mozilla public
* license, v. 2.0. if a copy of the mpl was not distributed with this file,
* You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef __NS_SVGMASKFRAMENEON_H__
#define __NS_SVGMASKFRAMENEON_H__
#include "mozilla/gfx/2D.h"
using namespace mozilla::gfx;
void
ComputesRGBLuminanceMask_NEON(uint8_t *aData,
int32_t aStride,
const IntSize &aSize,
float aOpacity);
#endif /* __NS_SVGMASKFRAMENEON_H__ */