mirror of
https://github.com/mozilla/gecko-dev.git
synced 2025-02-22 10:27:03 +00:00
Bug 548612. qcms: Reduce size of output lookup tables. r=bgirard
We currently use a larger output lookup table than we probably need. Switch to a common define for the table size and lower it. The should also give a small improvement to startup time because we have fewer lookup table entries to compute.
This commit is contained in:
parent
2865a66cc7
commit
e6b3a462fa
@ -1,13 +1,20 @@
|
||||
#include "qcms.h"
|
||||
#include "qcmstypes.h"
|
||||
|
||||
/* used as a 16bit lookup table for the output transformation.
|
||||
/* used as a lookup table for the output transformation.
|
||||
* we refcount them so we only need to have one around per output
|
||||
* profile, instead of duplicating them per transform */
|
||||
struct precache_output
|
||||
{
|
||||
int ref_count;
|
||||
uint8_t data[65535];
|
||||
/* We previously used a count of 65536 here but that seems like more
|
||||
* precision than we actually need. By reducing the size we can
|
||||
* improve startup performance and reduce memory usage. ColorSync on
|
||||
* 10.5 uses 4097 which is perhaps because they use a fixed point
|
||||
* representation where 1. is represented by 0x1000. */
|
||||
#define PRECACHE_OUTPUT_SIZE 8192
|
||||
#define PRECACHE_OUTPUT_MAX (PRECACHE_OUTPUT_SIZE-1)
|
||||
uint8_t data[PRECACHE_OUTPUT_SIZE];
|
||||
};
|
||||
|
||||
#ifdef _MSC_VER
|
||||
|
@ -3,8 +3,8 @@
|
||||
#include "qcmsint.h"
|
||||
|
||||
/* pre-shuffled: just load these into XMM reg instead of load-scalar/shufps sequence */
|
||||
#define FLOATSCALE 65536.0f
|
||||
#define CLAMPMAXVAL ( ((float) (65536 - 1)) / 65536.0f )
|
||||
#define FLOATSCALE (float)(PRECACHE_OUTPUT_SIZE)
|
||||
#define CLAMPMAXVAL ( ((float) (PRECACHE_OUTPUT_SIZE - 1)) / PRECACHE_OUTPUT_SIZE )
|
||||
static const ALIGN float floatScaleX4[4] =
|
||||
{ FLOATSCALE, FLOATSCALE, FLOATSCALE, FLOATSCALE};
|
||||
static const ALIGN float clampMaxValueX4[4] =
|
||||
|
@ -3,8 +3,8 @@
|
||||
#include "qcmsint.h"
|
||||
|
||||
/* pre-shuffled: just load these into XMM reg instead of load-scalar/shufps sequence */
|
||||
#define FLOATSCALE 65536.0f
|
||||
#define CLAMPMAXVAL ( ((float) (65536 - 1)) / 65536.0f )
|
||||
#define FLOATSCALE (float)(PRECACHE_OUTPUT_SIZE)
|
||||
#define CLAMPMAXVAL ( ((float) (PRECACHE_OUTPUT_SIZE - 1)) / PRECACHE_OUTPUT_SIZE )
|
||||
static const ALIGN float floatScaleX4[4] =
|
||||
{ FLOATSCALE, FLOATSCALE, FLOATSCALE, FLOATSCALE};
|
||||
static const ALIGN float clampMaxValueX4[4] =
|
||||
|
@ -38,7 +38,7 @@ typedef uint16_t uint16_fract_t;
|
||||
float lut_interp_linear(double value, uint16_t *table, int length)
|
||||
{
|
||||
int upper, lower;
|
||||
value = value * (length - 1);
|
||||
value = value * (length - 1); // scale to length of the array
|
||||
upper = ceil(value);
|
||||
lower = floor(value);
|
||||
//XXX: can we be more performant here?
|
||||
@ -50,16 +50,61 @@ float lut_interp_linear(double value, uint16_t *table, int length)
|
||||
/* same as above but takes and returns a uint16_t value representing a range from 0..1 */
|
||||
uint16_t lut_interp_linear16(uint16_t input_value, uint16_t *table, int length)
|
||||
{
|
||||
/* Start scaling input_value to the length of the array: 65535*(length-1).
|
||||
* We'll divide out the 65535 next */
|
||||
uint32_t value = (input_value * (length - 1));
|
||||
uint32_t upper = (value + 65534) / 65535; /* equivalent to ceil(value/65535) */
|
||||
uint32_t lower = value / 65535; /* equivalent to floor(value/65535) */
|
||||
/* interp is the distance from upper to value scaled to 0..65535 */
|
||||
uint32_t interp = value % 65535;
|
||||
|
||||
value = (table[upper]*(interp) + table[lower]*(65535 - interp))/65535;
|
||||
value = (table[upper]*(interp) + table[lower]*(65535 - interp))/65535; // 0..65535*65535
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
/* same as above but takes an input_value from 0..PRECACHE_OUTPUT_MAX
|
||||
* and returns a uint8_t value representing a range from 0..1 */
|
||||
static
|
||||
uint8_t lut_interp_linear_precache_output(uint32_t input_value, uint16_t *table, int length)
|
||||
{
|
||||
/* Start scaling input_value to the length of the array: PRECACHE_OUTPUT_MAX*(length-1).
|
||||
* We'll divide out the PRECACHE_OUTPUT_MAX next */
|
||||
uint32_t value = (input_value * (length - 1));
|
||||
|
||||
/* equivalent to ceil(value/PRECACHE_OUTPUT_MAX) */
|
||||
uint32_t upper = (value + PRECACHE_OUTPUT_MAX-1) / PRECACHE_OUTPUT_MAX;
|
||||
/* equivalent to floor(value/PRECACHE_OUTPUT_MAX) */
|
||||
uint32_t lower = value / PRECACHE_OUTPUT_MAX;
|
||||
/* interp is the distance from upper to value scaled to 0..PRECACHE_OUTPUT_MAX */
|
||||
uint32_t interp = value % PRECACHE_OUTPUT_MAX;
|
||||
|
||||
/* the table values range from 0..65535 */
|
||||
value = (table[upper]*(interp) + table[lower]*(PRECACHE_OUTPUT_MAX - interp)); // 0..(65535*PRECACHE_OUTPUT_MAX)
|
||||
|
||||
/* round and scale */
|
||||
value += (PRECACHE_OUTPUT_MAX*65535/255)/2;
|
||||
value /= (PRECACHE_OUTPUT_MAX*65535/255); // scale to 0..255
|
||||
return value;
|
||||
}
|
||||
|
||||
#if 0
|
||||
/* if we use a different representation i.e. one that goes from 0 to 0x1000 we can be more efficient
|
||||
* because we can avoid the divisions and use a shifting instead */
|
||||
/* same as above but takes and returns a uint16_t value representing a range from 0..1 */
|
||||
uint16_t lut_interp_linear16(uint16_t input_value, uint16_t *table, int length)
|
||||
{
|
||||
uint32_t value = (input_value * (length - 1));
|
||||
uint32_t upper = (value + 4095) / 4096; /* equivalent to ceil(value/4096) */
|
||||
uint32_t lower = value / 4096; /* equivalent to floor(value/4096) */
|
||||
uint32_t interp = value % 4096;
|
||||
|
||||
value = (table[upper]*(interp) + table[lower]*(4096 - interp))/4096; // 0..4096*4096
|
||||
|
||||
return value;
|
||||
}
|
||||
#endif
|
||||
|
||||
void compute_curve_gamma_table_type1(float gamma_table[256], double gamma)
|
||||
{
|
||||
unsigned int i;
|
||||
@ -707,7 +752,7 @@ static void qcms_transform_data_gray_out_precache(qcms_transform *transform, uns
|
||||
float linear = transform->input_gamma_table_gray[device];
|
||||
|
||||
/* we could round here... */
|
||||
gray = linear * 65535.;
|
||||
gray = linear * PRECACHE_OUTPUT_MAX;
|
||||
|
||||
*dest++ = transform->output_table_r->data[gray];
|
||||
*dest++ = transform->output_table_g->data[gray];
|
||||
@ -726,7 +771,7 @@ static void qcms_transform_data_graya_out_precache(qcms_transform *transform, un
|
||||
float linear = transform->input_gamma_table_gray[device];
|
||||
|
||||
/* we could round here... */
|
||||
gray = linear * 65535.;
|
||||
gray = linear * PRECACHE_OUTPUT_MAX;
|
||||
|
||||
*dest++ = transform->output_table_r->data[gray];
|
||||
*dest++ = transform->output_table_g->data[gray];
|
||||
@ -758,9 +803,9 @@ static void qcms_transform_data_rgb_out_lut_precache(qcms_transform *transform,
|
||||
out_linear_b = clamp_float(out_linear_b);
|
||||
|
||||
/* we could round here... */
|
||||
r = out_linear_r * 65535.;
|
||||
g = out_linear_g * 65535.;
|
||||
b = out_linear_b * 65535.;
|
||||
r = out_linear_r * PRECACHE_OUTPUT_MAX;
|
||||
g = out_linear_g * PRECACHE_OUTPUT_MAX;
|
||||
b = out_linear_b * PRECACHE_OUTPUT_MAX;
|
||||
|
||||
*dest++ = transform->output_table_r->data[r];
|
||||
*dest++ = transform->output_table_g->data[g];
|
||||
@ -792,9 +837,9 @@ static void qcms_transform_data_rgba_out_lut_precache(qcms_transform *transform,
|
||||
out_linear_b = clamp_float(out_linear_b);
|
||||
|
||||
/* we could round here... */
|
||||
r = out_linear_r * 65535.;
|
||||
g = out_linear_g * 65535.;
|
||||
b = out_linear_b * 65535.;
|
||||
r = out_linear_r * PRECACHE_OUTPUT_MAX;
|
||||
g = out_linear_g * PRECACHE_OUTPUT_MAX;
|
||||
b = out_linear_b * PRECACHE_OUTPUT_MAX;
|
||||
|
||||
*dest++ = transform->output_table_r->data[r];
|
||||
*dest++ = transform->output_table_g->data[g];
|
||||
@ -988,27 +1033,26 @@ void qcms_transform_release(qcms_transform *t)
|
||||
static void compute_precache_pow(uint8_t *output, float gamma)
|
||||
{
|
||||
uint32_t v = 0;
|
||||
for (v = 0; v <= 0xffff; v++) {
|
||||
for (v = 0; v < PRECACHE_OUTPUT_SIZE; v++) {
|
||||
//XXX: don't do integer/float conversion... and round?
|
||||
output[v] = 255. * pow(v/65535., gamma);
|
||||
output[v] = 255. * pow(v/(double)PRECACHE_OUTPUT_MAX, gamma);
|
||||
}
|
||||
}
|
||||
|
||||
void compute_precache_lut(uint8_t *output, uint16_t *table, int length)
|
||||
{
|
||||
uint32_t v = 0;
|
||||
for (v = 0; v <= 0xffff; v++) {
|
||||
//XXX: don't do integer/float conversion... round?
|
||||
output[v] = lut_interp_linear16(v, table, length) >> 8;
|
||||
for (v = 0; v < PRECACHE_OUTPUT_SIZE; v++) {
|
||||
output[v] = lut_interp_linear_precache_output(v, table, length);
|
||||
}
|
||||
}
|
||||
|
||||
void compute_precache_linear(uint8_t *output)
|
||||
{
|
||||
uint32_t v = 0;
|
||||
for (v = 0; v <= 0xffff; v++) {
|
||||
for (v = 0; v < PRECACHE_OUTPUT_SIZE; v++) {
|
||||
//XXX: round?
|
||||
output[v] = v >> 8;
|
||||
output[v] = v / (PRECACHE_OUTPUT_SIZE/256);
|
||||
}
|
||||
}
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user