mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-10-19 08:15:31 +00:00
Bug 1551844 - Implement NEON variations of QCMS ICCv2 transforms. r=lsalzman
Differential Revision: https://phabricator.services.mozilla.com/D34582
This commit is contained in:
parent
1133b6716d
commit
3334a54347
@ -24,6 +24,7 @@ if CONFIG['CC_TYPE'] in ('clang', 'gcc'):
|
|||||||
|
|
||||||
use_sse1 = False
|
use_sse1 = False
|
||||||
use_sse2 = False
|
use_sse2 = False
|
||||||
|
use_neon = False
|
||||||
use_altivec = False
|
use_altivec = False
|
||||||
if CONFIG['INTEL_ARCHITECTURE']:
|
if CONFIG['INTEL_ARCHITECTURE']:
|
||||||
use_sse2 = True
|
use_sse2 = True
|
||||||
@ -32,6 +33,8 @@ if CONFIG['INTEL_ARCHITECTURE']:
|
|||||||
use_sse1 = True
|
use_sse1 = True
|
||||||
else:
|
else:
|
||||||
use_sse1 = True
|
use_sse1 = True
|
||||||
|
elif CONFIG['CPU_ARCH'] == 'aarch64' or CONFIG['BUILD_ARM_NEON']:
|
||||||
|
use_neon = True
|
||||||
elif CONFIG['HAVE_ALTIVEC']:
|
elif CONFIG['HAVE_ALTIVEC']:
|
||||||
use_altivec = True
|
use_altivec = True
|
||||||
|
|
||||||
@ -43,6 +46,10 @@ if use_sse2:
|
|||||||
SOURCES += ['transform-sse2.cpp']
|
SOURCES += ['transform-sse2.cpp']
|
||||||
SOURCES['transform-sse2.cpp'].flags += CONFIG['SSE2_FLAGS']
|
SOURCES['transform-sse2.cpp'].flags += CONFIG['SSE2_FLAGS']
|
||||||
|
|
||||||
|
if use_neon:
|
||||||
|
SOURCES += ['transform-neon.cpp']
|
||||||
|
SOURCES['transform-neon.cpp'].flags += CONFIG['NEON_FLAGS']
|
||||||
|
|
||||||
if use_altivec:
|
if use_altivec:
|
||||||
SOURCES += ['transform-altivec.cpp']
|
SOURCES += ['transform-altivec.cpp']
|
||||||
SOURCES['transform-altivec.cpp'].flags += ['-maltivec']
|
SOURCES['transform-altivec.cpp'].flags += ['-maltivec']
|
||||||
|
@ -171,6 +171,7 @@ void qcms_transform_release(qcms_transform *);
|
|||||||
void qcms_transform_data(qcms_transform *transform, const void *src, void *dest, size_t length);
|
void qcms_transform_data(qcms_transform *transform, const void *src, void *dest, size_t length);
|
||||||
|
|
||||||
void qcms_enable_iccv4();
|
void qcms_enable_iccv4();
|
||||||
|
void qcms_enable_neon();
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
@ -302,7 +302,21 @@ void qcms_transform_data_bgra_out_lut_altivec(const qcms_transform *transform,
|
|||||||
unsigned char *dest,
|
unsigned char *dest,
|
||||||
size_t length);
|
size_t length);
|
||||||
|
|
||||||
|
void qcms_transform_data_rgb_out_lut_neon(const qcms_transform *transform,
|
||||||
|
const unsigned char *src,
|
||||||
|
unsigned char *dest,
|
||||||
|
size_t length);
|
||||||
|
void qcms_transform_data_rgba_out_lut_neon(const qcms_transform *transform,
|
||||||
|
const unsigned char *src,
|
||||||
|
unsigned char *dest,
|
||||||
|
size_t length);
|
||||||
|
void qcms_transform_data_bgra_out_lut_neon(const qcms_transform *transform,
|
||||||
|
const unsigned char *src,
|
||||||
|
unsigned char *dest,
|
||||||
|
size_t length);
|
||||||
|
|
||||||
extern bool qcms_supports_iccv4;
|
extern bool qcms_supports_iccv4;
|
||||||
|
extern bool qcms_supports_neon;
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
|
|
||||||
|
138
gfx/qcms/transform-neon.cpp
Normal file
138
gfx/qcms/transform-neon.cpp
Normal file
@ -0,0 +1,138 @@
|
|||||||
|
#include <arm_neon.h>
|
||||||
|
|
||||||
|
#include "qcmsint.h"
|
||||||
|
|
||||||
|
#define FLOATSCALE (float)(PRECACHE_OUTPUT_SIZE)
|
||||||
|
#define CLAMPMAXVAL ( ((float) (PRECACHE_OUTPUT_SIZE - 1)) / PRECACHE_OUTPUT_SIZE )
|
||||||
|
static const ALIGN float floatScale = FLOATSCALE;
|
||||||
|
static const ALIGN float clampMaxValue = CLAMPMAXVAL;
|
||||||
|
|
||||||
|
template <size_t kRIndex, size_t kGIndex, size_t kBIndex, size_t kAIndex = NO_A_INDEX>
|
||||||
|
static void qcms_transform_data_template_lut_neon(const qcms_transform *transform,
|
||||||
|
const unsigned char *src,
|
||||||
|
unsigned char *dest,
|
||||||
|
size_t length)
|
||||||
|
{
|
||||||
|
unsigned int i;
|
||||||
|
const float (*mat)[4] = transform->matrix;
|
||||||
|
|
||||||
|
/* deref *transform now to avoid it in loop */
|
||||||
|
const float *igtbl_r = transform->input_gamma_table_r;
|
||||||
|
const float *igtbl_g = transform->input_gamma_table_g;
|
||||||
|
const float *igtbl_b = transform->input_gamma_table_b;
|
||||||
|
|
||||||
|
/* deref *transform now to avoid it in loop */
|
||||||
|
const uint8_t *otdata_r = &transform->output_table_r->data[0];
|
||||||
|
const uint8_t *otdata_g = &transform->output_table_g->data[0];
|
||||||
|
const uint8_t *otdata_b = &transform->output_table_b->data[0];
|
||||||
|
|
||||||
|
/* input matrix values never change */
|
||||||
|
const float32x4_t mat0 = vld1q_f32(mat[0]);
|
||||||
|
const float32x4_t mat1 = vld1q_f32(mat[1]);
|
||||||
|
const float32x4_t mat2 = vld1q_f32(mat[2]);
|
||||||
|
|
||||||
|
/* these values don't change, either */
|
||||||
|
const float32x4_t max = vld1q_dup_f32(&clampMaxValue);
|
||||||
|
const float32x4_t min = { 0.0f, 0.0f, 0.0f, 0.0f };
|
||||||
|
const float32x4_t scale = vld1q_dup_f32(&floatScale);
|
||||||
|
const unsigned int components = A_INDEX_COMPONENTS(kAIndex);
|
||||||
|
|
||||||
|
/* working variables */
|
||||||
|
float32x4_t vec_r, vec_g, vec_b;
|
||||||
|
int32x4_t result;
|
||||||
|
unsigned char alpha;
|
||||||
|
|
||||||
|
/* CYA */
|
||||||
|
if (!length)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/* one pixel is handled outside of the loop */
|
||||||
|
length--;
|
||||||
|
|
||||||
|
/* setup for transforming 1st pixel */
|
||||||
|
vec_r = vld1q_dup_f32(&igtbl_r[src[kRIndex]]);
|
||||||
|
vec_g = vld1q_dup_f32(&igtbl_g[src[kGIndex]]);
|
||||||
|
vec_b = vld1q_dup_f32(&igtbl_b[src[kBIndex]]);
|
||||||
|
if (kAIndex != NO_A_INDEX) {
|
||||||
|
alpha = src[kAIndex];
|
||||||
|
}
|
||||||
|
src += components;
|
||||||
|
|
||||||
|
/* transform all but final pixel */
|
||||||
|
|
||||||
|
for (i=0; i<length; i++)
|
||||||
|
{
|
||||||
|
/* gamma * matrix */
|
||||||
|
vec_r = vmulq_f32(vec_r, mat0);
|
||||||
|
vec_g = vmulq_f32(vec_g, mat1);
|
||||||
|
vec_b = vmulq_f32(vec_b, mat2);
|
||||||
|
|
||||||
|
/* store alpha for this pixel; load alpha for next */
|
||||||
|
if (kAIndex != NO_A_INDEX) {
|
||||||
|
dest[kAIndex] = alpha;
|
||||||
|
alpha = src[kAIndex];
|
||||||
|
}
|
||||||
|
|
||||||
|
/* crunch, crunch, crunch */
|
||||||
|
vec_r = vaddq_f32(vec_r, vaddq_f32(vec_g, vec_b));
|
||||||
|
vec_r = vmaxq_f32(min, vec_r);
|
||||||
|
vec_r = vminq_f32(max, vec_r);
|
||||||
|
result = vcvtq_s32_f32(vmulq_f32(vec_r, scale));
|
||||||
|
|
||||||
|
/* use calc'd indices to output RGB values */
|
||||||
|
dest[kRIndex] = otdata_r[vgetq_lane_s32(result, 0)];
|
||||||
|
dest[kGIndex] = otdata_g[vgetq_lane_s32(result, 1)];
|
||||||
|
dest[kBIndex] = otdata_b[vgetq_lane_s32(result, 2)];
|
||||||
|
|
||||||
|
/* load for next loop while store completes */
|
||||||
|
vec_r = vld1q_dup_f32(&igtbl_r[src[kRIndex]]);
|
||||||
|
vec_g = vld1q_dup_f32(&igtbl_g[src[kGIndex]]);
|
||||||
|
vec_b = vld1q_dup_f32(&igtbl_b[src[kBIndex]]);
|
||||||
|
|
||||||
|
dest += components;
|
||||||
|
src += components;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* handle final (maybe only) pixel */
|
||||||
|
|
||||||
|
vec_r = vmulq_f32(vec_r, mat0);
|
||||||
|
vec_g = vmulq_f32(vec_g, mat1);
|
||||||
|
vec_b = vmulq_f32(vec_b, mat2);
|
||||||
|
|
||||||
|
if (kAIndex != NO_A_INDEX) {
|
||||||
|
dest[kAIndex] = alpha;
|
||||||
|
}
|
||||||
|
|
||||||
|
vec_r = vaddq_f32(vec_r, vaddq_f32(vec_g, vec_b));
|
||||||
|
vec_r = vmaxq_f32(min, vec_r);
|
||||||
|
vec_r = vminq_f32(max, vec_r);
|
||||||
|
result = vcvtq_s32_f32(vmulq_f32(vec_r, scale));
|
||||||
|
|
||||||
|
dest[kRIndex] = otdata_r[vgetq_lane_s32(result, 0)];
|
||||||
|
dest[kGIndex] = otdata_g[vgetq_lane_s32(result, 1)];
|
||||||
|
dest[kBIndex] = otdata_b[vgetq_lane_s32(result, 2)];
|
||||||
|
}
|
||||||
|
|
||||||
|
void qcms_transform_data_rgb_out_lut_neon(const qcms_transform *transform,
|
||||||
|
const unsigned char *src,
|
||||||
|
unsigned char *dest,
|
||||||
|
size_t length)
|
||||||
|
{
|
||||||
|
qcms_transform_data_template_lut_neon<RGBA_R_INDEX, RGBA_G_INDEX, RGBA_B_INDEX>(transform, src, dest, length);
|
||||||
|
}
|
||||||
|
|
||||||
|
void qcms_transform_data_rgba_out_lut_neon(const qcms_transform *transform,
|
||||||
|
const unsigned char *src,
|
||||||
|
unsigned char *dest,
|
||||||
|
size_t length)
|
||||||
|
{
|
||||||
|
qcms_transform_data_template_lut_neon<RGBA_R_INDEX, RGBA_G_INDEX, RGBA_B_INDEX, RGBA_A_INDEX>(transform, src, dest, length);
|
||||||
|
}
|
||||||
|
|
||||||
|
void qcms_transform_data_bgra_out_lut_neon(const qcms_transform *transform,
|
||||||
|
const unsigned char *src,
|
||||||
|
unsigned char *dest,
|
||||||
|
size_t length)
|
||||||
|
{
|
||||||
|
qcms_transform_data_template_lut_neon<BGRA_R_INDEX, BGRA_G_INDEX, BGRA_B_INDEX, BGRA_A_INDEX>(transform, src, dest, length);
|
||||||
|
}
|
@ -1232,6 +1232,17 @@ qcms_transform* qcms_transform_create(
|
|||||||
#endif
|
#endif
|
||||||
} else
|
} else
|
||||||
#endif
|
#endif
|
||||||
|
#if defined(__arm__) || defined(__aarch64__)
|
||||||
|
if (qcms_supports_neon) {
|
||||||
|
if (in_type == QCMS_DATA_RGB_8) {
|
||||||
|
transform->transform_fn = qcms_transform_data_rgb_out_lut_neon;
|
||||||
|
} else if (in_type == QCMS_DATA_RGBA_8) {
|
||||||
|
transform->transform_fn = qcms_transform_data_rgba_out_lut_neon;
|
||||||
|
} else if (in_type == QCMS_DATA_BGRA_8) {
|
||||||
|
transform->transform_fn = qcms_transform_data_bgra_out_lut_neon;
|
||||||
|
}
|
||||||
|
} else
|
||||||
|
#endif
|
||||||
#if (defined(__POWERPC__) || defined(__powerpc__) && !defined(__NO_FPRS__))
|
#if (defined(__POWERPC__) || defined(__powerpc__) && !defined(__NO_FPRS__))
|
||||||
if (have_altivec()) {
|
if (have_altivec()) {
|
||||||
if (in_type == QCMS_DATA_RGB_8) {
|
if (in_type == QCMS_DATA_RGB_8) {
|
||||||
@ -1367,3 +1378,13 @@ void qcms_enable_iccv4()
|
|||||||
{
|
{
|
||||||
qcms_supports_iccv4 = true;
|
qcms_supports_iccv4 = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(__arm__) || defined(__aarch64__)
|
||||||
|
bool qcms_supports_neon;
|
||||||
|
#endif
|
||||||
|
void qcms_enable_neon()
|
||||||
|
{
|
||||||
|
#if defined(__arm__) || defined(__aarch64__)
|
||||||
|
qcms_supports_neon = true;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
@ -41,6 +41,8 @@
|
|||||||
#include "VRProcessManager.h"
|
#include "VRProcessManager.h"
|
||||||
#include "VRThread.h"
|
#include "VRThread.h"
|
||||||
|
|
||||||
|
#include "mozilla/arm.h"
|
||||||
|
|
||||||
#ifdef XP_WIN
|
#ifdef XP_WIN
|
||||||
# include <process.h>
|
# include <process.h>
|
||||||
# define getpid _getpid
|
# define getpid _getpid
|
||||||
@ -1987,6 +1989,11 @@ eCMSMode gfxPlatform::GetCMSMode() {
|
|||||||
if (enableV4) {
|
if (enableV4) {
|
||||||
qcms_enable_iccv4();
|
qcms_enable_iccv4();
|
||||||
}
|
}
|
||||||
|
#ifdef MOZILLA_MAY_SUPPORT_NEON
|
||||||
|
if (mozilla::supports_neon()) {
|
||||||
|
qcms_enable_neon();
|
||||||
|
}
|
||||||
|
#endif
|
||||||
gCMSInitialized = true;
|
gCMSInitialized = true;
|
||||||
}
|
}
|
||||||
return gCMSMode;
|
return gCMSMode;
|
||||||
|
Loading…
Reference in New Issue
Block a user