Bug 1551844 - Implement NEON variations of QCMS ICCv2 transforms. r=lsalzman

Differential Revision: https://phabricator.services.mozilla.com/D34582
This commit is contained in:
Andrew Osmond 2019-06-13 07:12:43 -04:00
parent 1133b6716d
commit 3334a54347
6 changed files with 188 additions and 0 deletions

View File

@ -24,6 +24,7 @@ if CONFIG['CC_TYPE'] in ('clang', 'gcc'):
use_sse1 = False use_sse1 = False
use_sse2 = False use_sse2 = False
use_neon = False
use_altivec = False use_altivec = False
if CONFIG['INTEL_ARCHITECTURE']: if CONFIG['INTEL_ARCHITECTURE']:
use_sse2 = True use_sse2 = True
@ -32,6 +33,8 @@ if CONFIG['INTEL_ARCHITECTURE']:
use_sse1 = True use_sse1 = True
else: else:
use_sse1 = True use_sse1 = True
elif CONFIG['CPU_ARCH'] == 'aarch64' or CONFIG['BUILD_ARM_NEON']:
use_neon = True
elif CONFIG['HAVE_ALTIVEC']: elif CONFIG['HAVE_ALTIVEC']:
use_altivec = True use_altivec = True
@ -43,6 +46,10 @@ if use_sse2:
SOURCES += ['transform-sse2.cpp'] SOURCES += ['transform-sse2.cpp']
SOURCES['transform-sse2.cpp'].flags += CONFIG['SSE2_FLAGS'] SOURCES['transform-sse2.cpp'].flags += CONFIG['SSE2_FLAGS']
if use_neon:
SOURCES += ['transform-neon.cpp']
SOURCES['transform-neon.cpp'].flags += CONFIG['NEON_FLAGS']
if use_altivec: if use_altivec:
SOURCES += ['transform-altivec.cpp'] SOURCES += ['transform-altivec.cpp']
SOURCES['transform-altivec.cpp'].flags += ['-maltivec'] SOURCES['transform-altivec.cpp'].flags += ['-maltivec']

View File

@ -171,6 +171,7 @@ void qcms_transform_release(qcms_transform *);
void qcms_transform_data(qcms_transform *transform, const void *src, void *dest, size_t length); void qcms_transform_data(qcms_transform *transform, const void *src, void *dest, size_t length);
void qcms_enable_iccv4(); void qcms_enable_iccv4();
void qcms_enable_neon();
#ifdef __cplusplus #ifdef __cplusplus
} }

View File

@ -302,7 +302,21 @@ void qcms_transform_data_bgra_out_lut_altivec(const qcms_transform *transform,
unsigned char *dest, unsigned char *dest,
size_t length); size_t length);
void qcms_transform_data_rgb_out_lut_neon(const qcms_transform *transform,
const unsigned char *src,
unsigned char *dest,
size_t length);
void qcms_transform_data_rgba_out_lut_neon(const qcms_transform *transform,
const unsigned char *src,
unsigned char *dest,
size_t length);
void qcms_transform_data_bgra_out_lut_neon(const qcms_transform *transform,
const unsigned char *src,
unsigned char *dest,
size_t length);
extern bool qcms_supports_iccv4; extern bool qcms_supports_iccv4;
extern bool qcms_supports_neon;
#ifdef _MSC_VER #ifdef _MSC_VER

138
gfx/qcms/transform-neon.cpp Normal file
View File

@ -0,0 +1,138 @@
#include <arm_neon.h>
#include "qcmsint.h"
#define FLOATSCALE (float)(PRECACHE_OUTPUT_SIZE)
#define CLAMPMAXVAL ( ((float) (PRECACHE_OUTPUT_SIZE - 1)) / PRECACHE_OUTPUT_SIZE )
static const ALIGN float floatScale = FLOATSCALE;
static const ALIGN float clampMaxValue = CLAMPMAXVAL;
template <size_t kRIndex, size_t kGIndex, size_t kBIndex, size_t kAIndex = NO_A_INDEX>
static void qcms_transform_data_template_lut_neon(const qcms_transform *transform,
const unsigned char *src,
unsigned char *dest,
size_t length)
{
unsigned int i;
const float (*mat)[4] = transform->matrix;
/* deref *transform now to avoid it in loop */
const float *igtbl_r = transform->input_gamma_table_r;
const float *igtbl_g = transform->input_gamma_table_g;
const float *igtbl_b = transform->input_gamma_table_b;
/* deref *transform now to avoid it in loop */
const uint8_t *otdata_r = &transform->output_table_r->data[0];
const uint8_t *otdata_g = &transform->output_table_g->data[0];
const uint8_t *otdata_b = &transform->output_table_b->data[0];
/* input matrix values never change */
const float32x4_t mat0 = vld1q_f32(mat[0]);
const float32x4_t mat1 = vld1q_f32(mat[1]);
const float32x4_t mat2 = vld1q_f32(mat[2]);
/* these values don't change, either */
const float32x4_t max = vld1q_dup_f32(&clampMaxValue);
const float32x4_t min = { 0.0f, 0.0f, 0.0f, 0.0f };
const float32x4_t scale = vld1q_dup_f32(&floatScale);
const unsigned int components = A_INDEX_COMPONENTS(kAIndex);
/* working variables */
float32x4_t vec_r, vec_g, vec_b;
int32x4_t result;
unsigned char alpha;
/* CYA */
if (!length)
return;
/* one pixel is handled outside of the loop */
length--;
/* setup for transforming 1st pixel */
vec_r = vld1q_dup_f32(&igtbl_r[src[kRIndex]]);
vec_g = vld1q_dup_f32(&igtbl_g[src[kGIndex]]);
vec_b = vld1q_dup_f32(&igtbl_b[src[kBIndex]]);
if (kAIndex != NO_A_INDEX) {
alpha = src[kAIndex];
}
src += components;
/* transform all but final pixel */
for (i=0; i<length; i++)
{
/* gamma * matrix */
vec_r = vmulq_f32(vec_r, mat0);
vec_g = vmulq_f32(vec_g, mat1);
vec_b = vmulq_f32(vec_b, mat2);
/* store alpha for this pixel; load alpha for next */
if (kAIndex != NO_A_INDEX) {
dest[kAIndex] = alpha;
alpha = src[kAIndex];
}
/* crunch, crunch, crunch */
vec_r = vaddq_f32(vec_r, vaddq_f32(vec_g, vec_b));
vec_r = vmaxq_f32(min, vec_r);
vec_r = vminq_f32(max, vec_r);
result = vcvtq_s32_f32(vmulq_f32(vec_r, scale));
/* use calc'd indices to output RGB values */
dest[kRIndex] = otdata_r[vgetq_lane_s32(result, 0)];
dest[kGIndex] = otdata_g[vgetq_lane_s32(result, 1)];
dest[kBIndex] = otdata_b[vgetq_lane_s32(result, 2)];
/* load for next loop while store completes */
vec_r = vld1q_dup_f32(&igtbl_r[src[kRIndex]]);
vec_g = vld1q_dup_f32(&igtbl_g[src[kGIndex]]);
vec_b = vld1q_dup_f32(&igtbl_b[src[kBIndex]]);
dest += components;
src += components;
}
/* handle final (maybe only) pixel */
vec_r = vmulq_f32(vec_r, mat0);
vec_g = vmulq_f32(vec_g, mat1);
vec_b = vmulq_f32(vec_b, mat2);
if (kAIndex != NO_A_INDEX) {
dest[kAIndex] = alpha;
}
vec_r = vaddq_f32(vec_r, vaddq_f32(vec_g, vec_b));
vec_r = vmaxq_f32(min, vec_r);
vec_r = vminq_f32(max, vec_r);
result = vcvtq_s32_f32(vmulq_f32(vec_r, scale));
dest[kRIndex] = otdata_r[vgetq_lane_s32(result, 0)];
dest[kGIndex] = otdata_g[vgetq_lane_s32(result, 1)];
dest[kBIndex] = otdata_b[vgetq_lane_s32(result, 2)];
}
void qcms_transform_data_rgb_out_lut_neon(const qcms_transform *transform,
const unsigned char *src,
unsigned char *dest,
size_t length)
{
qcms_transform_data_template_lut_neon<RGBA_R_INDEX, RGBA_G_INDEX, RGBA_B_INDEX>(transform, src, dest, length);
}
void qcms_transform_data_rgba_out_lut_neon(const qcms_transform *transform,
const unsigned char *src,
unsigned char *dest,
size_t length)
{
qcms_transform_data_template_lut_neon<RGBA_R_INDEX, RGBA_G_INDEX, RGBA_B_INDEX, RGBA_A_INDEX>(transform, src, dest, length);
}
void qcms_transform_data_bgra_out_lut_neon(const qcms_transform *transform,
const unsigned char *src,
unsigned char *dest,
size_t length)
{
qcms_transform_data_template_lut_neon<BGRA_R_INDEX, BGRA_G_INDEX, BGRA_B_INDEX, BGRA_A_INDEX>(transform, src, dest, length);
}

View File

@ -1232,6 +1232,17 @@ qcms_transform* qcms_transform_create(
#endif #endif
} else } else
#endif #endif
#if defined(__arm__) || defined(__aarch64__)
if (qcms_supports_neon) {
if (in_type == QCMS_DATA_RGB_8) {
transform->transform_fn = qcms_transform_data_rgb_out_lut_neon;
} else if (in_type == QCMS_DATA_RGBA_8) {
transform->transform_fn = qcms_transform_data_rgba_out_lut_neon;
} else if (in_type == QCMS_DATA_BGRA_8) {
transform->transform_fn = qcms_transform_data_bgra_out_lut_neon;
}
} else
#endif
#if (defined(__POWERPC__) || defined(__powerpc__) && !defined(__NO_FPRS__)) #if (defined(__POWERPC__) || defined(__powerpc__) && !defined(__NO_FPRS__))
if (have_altivec()) { if (have_altivec()) {
if (in_type == QCMS_DATA_RGB_8) { if (in_type == QCMS_DATA_RGB_8) {
@ -1367,3 +1378,13 @@ void qcms_enable_iccv4()
{ {
qcms_supports_iccv4 = true; qcms_supports_iccv4 = true;
} }
#if defined(__arm__) || defined(__aarch64__)
bool qcms_supports_neon;
#endif
void qcms_enable_neon()
{
#if defined(__arm__) || defined(__aarch64__)
qcms_supports_neon = true;
#endif
}

View File

@ -41,6 +41,8 @@
#include "VRProcessManager.h" #include "VRProcessManager.h"
#include "VRThread.h" #include "VRThread.h"
#include "mozilla/arm.h"
#ifdef XP_WIN #ifdef XP_WIN
# include <process.h> # include <process.h>
# define getpid _getpid # define getpid _getpid
@ -1987,6 +1989,11 @@ eCMSMode gfxPlatform::GetCMSMode() {
if (enableV4) { if (enableV4) {
qcms_enable_iccv4(); qcms_enable_iccv4();
} }
#ifdef MOZILLA_MAY_SUPPORT_NEON
if (mozilla::supports_neon()) {
qcms_enable_neon();
}
#endif
gCMSInitialized = true; gCMSInitialized = true;
} }
return gCMSMode; return gCMSMode;