mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-10-19 08:15:31 +00:00
Bug 1551844 - Implement NEON variations of QCMS ICCv2 transforms. r=lsalzman
Differential Revision: https://phabricator.services.mozilla.com/D34582
This commit is contained in:
parent
1133b6716d
commit
3334a54347
@ -24,6 +24,7 @@ if CONFIG['CC_TYPE'] in ('clang', 'gcc'):
|
||||
|
||||
use_sse1 = False
|
||||
use_sse2 = False
|
||||
use_neon = False
|
||||
use_altivec = False
|
||||
if CONFIG['INTEL_ARCHITECTURE']:
|
||||
use_sse2 = True
|
||||
@ -32,6 +33,8 @@ if CONFIG['INTEL_ARCHITECTURE']:
|
||||
use_sse1 = True
|
||||
else:
|
||||
use_sse1 = True
|
||||
elif CONFIG['CPU_ARCH'] == 'aarch64' or CONFIG['BUILD_ARM_NEON']:
|
||||
use_neon = True
|
||||
elif CONFIG['HAVE_ALTIVEC']:
|
||||
use_altivec = True
|
||||
|
||||
@ -43,6 +46,10 @@ if use_sse2:
|
||||
SOURCES += ['transform-sse2.cpp']
|
||||
SOURCES['transform-sse2.cpp'].flags += CONFIG['SSE2_FLAGS']
|
||||
|
||||
if use_neon:
|
||||
SOURCES += ['transform-neon.cpp']
|
||||
SOURCES['transform-neon.cpp'].flags += CONFIG['NEON_FLAGS']
|
||||
|
||||
if use_altivec:
|
||||
SOURCES += ['transform-altivec.cpp']
|
||||
SOURCES['transform-altivec.cpp'].flags += ['-maltivec']
|
||||
|
@ -171,6 +171,7 @@ void qcms_transform_release(qcms_transform *);
|
||||
void qcms_transform_data(qcms_transform *transform, const void *src, void *dest, size_t length);
|
||||
|
||||
void qcms_enable_iccv4();
|
||||
void qcms_enable_neon();
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
@ -302,7 +302,21 @@ void qcms_transform_data_bgra_out_lut_altivec(const qcms_transform *transform,
|
||||
unsigned char *dest,
|
||||
size_t length);
|
||||
|
||||
void qcms_transform_data_rgb_out_lut_neon(const qcms_transform *transform,
|
||||
const unsigned char *src,
|
||||
unsigned char *dest,
|
||||
size_t length);
|
||||
void qcms_transform_data_rgba_out_lut_neon(const qcms_transform *transform,
|
||||
const unsigned char *src,
|
||||
unsigned char *dest,
|
||||
size_t length);
|
||||
void qcms_transform_data_bgra_out_lut_neon(const qcms_transform *transform,
|
||||
const unsigned char *src,
|
||||
unsigned char *dest,
|
||||
size_t length);
|
||||
|
||||
extern bool qcms_supports_iccv4;
|
||||
extern bool qcms_supports_neon;
|
||||
|
||||
#ifdef _MSC_VER
|
||||
|
||||
|
138
gfx/qcms/transform-neon.cpp
Normal file
138
gfx/qcms/transform-neon.cpp
Normal file
@ -0,0 +1,138 @@
|
||||
#include <arm_neon.h>
|
||||
|
||||
#include "qcmsint.h"
|
||||
|
||||
#define FLOATSCALE (float)(PRECACHE_OUTPUT_SIZE)
|
||||
#define CLAMPMAXVAL ( ((float) (PRECACHE_OUTPUT_SIZE - 1)) / PRECACHE_OUTPUT_SIZE )
|
||||
static const ALIGN float floatScale = FLOATSCALE;
|
||||
static const ALIGN float clampMaxValue = CLAMPMAXVAL;
|
||||
|
||||
template <size_t kRIndex, size_t kGIndex, size_t kBIndex, size_t kAIndex = NO_A_INDEX>
|
||||
static void qcms_transform_data_template_lut_neon(const qcms_transform *transform,
|
||||
const unsigned char *src,
|
||||
unsigned char *dest,
|
||||
size_t length)
|
||||
{
|
||||
unsigned int i;
|
||||
const float (*mat)[4] = transform->matrix;
|
||||
|
||||
/* deref *transform now to avoid it in loop */
|
||||
const float *igtbl_r = transform->input_gamma_table_r;
|
||||
const float *igtbl_g = transform->input_gamma_table_g;
|
||||
const float *igtbl_b = transform->input_gamma_table_b;
|
||||
|
||||
/* deref *transform now to avoid it in loop */
|
||||
const uint8_t *otdata_r = &transform->output_table_r->data[0];
|
||||
const uint8_t *otdata_g = &transform->output_table_g->data[0];
|
||||
const uint8_t *otdata_b = &transform->output_table_b->data[0];
|
||||
|
||||
/* input matrix values never change */
|
||||
const float32x4_t mat0 = vld1q_f32(mat[0]);
|
||||
const float32x4_t mat1 = vld1q_f32(mat[1]);
|
||||
const float32x4_t mat2 = vld1q_f32(mat[2]);
|
||||
|
||||
/* these values don't change, either */
|
||||
const float32x4_t max = vld1q_dup_f32(&clampMaxValue);
|
||||
const float32x4_t min = { 0.0f, 0.0f, 0.0f, 0.0f };
|
||||
const float32x4_t scale = vld1q_dup_f32(&floatScale);
|
||||
const unsigned int components = A_INDEX_COMPONENTS(kAIndex);
|
||||
|
||||
/* working variables */
|
||||
float32x4_t vec_r, vec_g, vec_b;
|
||||
int32x4_t result;
|
||||
unsigned char alpha;
|
||||
|
||||
/* CYA */
|
||||
if (!length)
|
||||
return;
|
||||
|
||||
/* one pixel is handled outside of the loop */
|
||||
length--;
|
||||
|
||||
/* setup for transforming 1st pixel */
|
||||
vec_r = vld1q_dup_f32(&igtbl_r[src[kRIndex]]);
|
||||
vec_g = vld1q_dup_f32(&igtbl_g[src[kGIndex]]);
|
||||
vec_b = vld1q_dup_f32(&igtbl_b[src[kBIndex]]);
|
||||
if (kAIndex != NO_A_INDEX) {
|
||||
alpha = src[kAIndex];
|
||||
}
|
||||
src += components;
|
||||
|
||||
/* transform all but final pixel */
|
||||
|
||||
for (i=0; i<length; i++)
|
||||
{
|
||||
/* gamma * matrix */
|
||||
vec_r = vmulq_f32(vec_r, mat0);
|
||||
vec_g = vmulq_f32(vec_g, mat1);
|
||||
vec_b = vmulq_f32(vec_b, mat2);
|
||||
|
||||
/* store alpha for this pixel; load alpha for next */
|
||||
if (kAIndex != NO_A_INDEX) {
|
||||
dest[kAIndex] = alpha;
|
||||
alpha = src[kAIndex];
|
||||
}
|
||||
|
||||
/* crunch, crunch, crunch */
|
||||
vec_r = vaddq_f32(vec_r, vaddq_f32(vec_g, vec_b));
|
||||
vec_r = vmaxq_f32(min, vec_r);
|
||||
vec_r = vminq_f32(max, vec_r);
|
||||
result = vcvtq_s32_f32(vmulq_f32(vec_r, scale));
|
||||
|
||||
/* use calc'd indices to output RGB values */
|
||||
dest[kRIndex] = otdata_r[vgetq_lane_s32(result, 0)];
|
||||
dest[kGIndex] = otdata_g[vgetq_lane_s32(result, 1)];
|
||||
dest[kBIndex] = otdata_b[vgetq_lane_s32(result, 2)];
|
||||
|
||||
/* load for next loop while store completes */
|
||||
vec_r = vld1q_dup_f32(&igtbl_r[src[kRIndex]]);
|
||||
vec_g = vld1q_dup_f32(&igtbl_g[src[kGIndex]]);
|
||||
vec_b = vld1q_dup_f32(&igtbl_b[src[kBIndex]]);
|
||||
|
||||
dest += components;
|
||||
src += components;
|
||||
}
|
||||
|
||||
/* handle final (maybe only) pixel */
|
||||
|
||||
vec_r = vmulq_f32(vec_r, mat0);
|
||||
vec_g = vmulq_f32(vec_g, mat1);
|
||||
vec_b = vmulq_f32(vec_b, mat2);
|
||||
|
||||
if (kAIndex != NO_A_INDEX) {
|
||||
dest[kAIndex] = alpha;
|
||||
}
|
||||
|
||||
vec_r = vaddq_f32(vec_r, vaddq_f32(vec_g, vec_b));
|
||||
vec_r = vmaxq_f32(min, vec_r);
|
||||
vec_r = vminq_f32(max, vec_r);
|
||||
result = vcvtq_s32_f32(vmulq_f32(vec_r, scale));
|
||||
|
||||
dest[kRIndex] = otdata_r[vgetq_lane_s32(result, 0)];
|
||||
dest[kGIndex] = otdata_g[vgetq_lane_s32(result, 1)];
|
||||
dest[kBIndex] = otdata_b[vgetq_lane_s32(result, 2)];
|
||||
}
|
||||
|
||||
void qcms_transform_data_rgb_out_lut_neon(const qcms_transform *transform,
|
||||
const unsigned char *src,
|
||||
unsigned char *dest,
|
||||
size_t length)
|
||||
{
|
||||
qcms_transform_data_template_lut_neon<RGBA_R_INDEX, RGBA_G_INDEX, RGBA_B_INDEX>(transform, src, dest, length);
|
||||
}
|
||||
|
||||
void qcms_transform_data_rgba_out_lut_neon(const qcms_transform *transform,
|
||||
const unsigned char *src,
|
||||
unsigned char *dest,
|
||||
size_t length)
|
||||
{
|
||||
qcms_transform_data_template_lut_neon<RGBA_R_INDEX, RGBA_G_INDEX, RGBA_B_INDEX, RGBA_A_INDEX>(transform, src, dest, length);
|
||||
}
|
||||
|
||||
void qcms_transform_data_bgra_out_lut_neon(const qcms_transform *transform,
|
||||
const unsigned char *src,
|
||||
unsigned char *dest,
|
||||
size_t length)
|
||||
{
|
||||
qcms_transform_data_template_lut_neon<BGRA_R_INDEX, BGRA_G_INDEX, BGRA_B_INDEX, BGRA_A_INDEX>(transform, src, dest, length);
|
||||
}
|
@ -1232,6 +1232,17 @@ qcms_transform* qcms_transform_create(
|
||||
#endif
|
||||
} else
|
||||
#endif
|
||||
#if defined(__arm__) || defined(__aarch64__)
|
||||
if (qcms_supports_neon) {
|
||||
if (in_type == QCMS_DATA_RGB_8) {
|
||||
transform->transform_fn = qcms_transform_data_rgb_out_lut_neon;
|
||||
} else if (in_type == QCMS_DATA_RGBA_8) {
|
||||
transform->transform_fn = qcms_transform_data_rgba_out_lut_neon;
|
||||
} else if (in_type == QCMS_DATA_BGRA_8) {
|
||||
transform->transform_fn = qcms_transform_data_bgra_out_lut_neon;
|
||||
}
|
||||
} else
|
||||
#endif
|
||||
#if (defined(__POWERPC__) || defined(__powerpc__) && !defined(__NO_FPRS__))
|
||||
if (have_altivec()) {
|
||||
if (in_type == QCMS_DATA_RGB_8) {
|
||||
@ -1367,3 +1378,13 @@ void qcms_enable_iccv4()
|
||||
{
|
||||
qcms_supports_iccv4 = true;
|
||||
}
|
||||
|
||||
#if defined(__arm__) || defined(__aarch64__)
|
||||
bool qcms_supports_neon;
|
||||
#endif
|
||||
void qcms_enable_neon()
|
||||
{
|
||||
#if defined(__arm__) || defined(__aarch64__)
|
||||
qcms_supports_neon = true;
|
||||
#endif
|
||||
}
|
||||
|
@ -41,6 +41,8 @@
|
||||
#include "VRProcessManager.h"
|
||||
#include "VRThread.h"
|
||||
|
||||
#include "mozilla/arm.h"
|
||||
|
||||
#ifdef XP_WIN
|
||||
# include <process.h>
|
||||
# define getpid _getpid
|
||||
@ -1987,6 +1989,11 @@ eCMSMode gfxPlatform::GetCMSMode() {
|
||||
if (enableV4) {
|
||||
qcms_enable_iccv4();
|
||||
}
|
||||
#ifdef MOZILLA_MAY_SUPPORT_NEON
|
||||
if (mozilla::supports_neon()) {
|
||||
qcms_enable_neon();
|
||||
}
|
||||
#endif
|
||||
gCMSInitialized = true;
|
||||
}
|
||||
return gCMSMode;
|
||||
|
Loading…
Reference in New Issue
Block a user