From cc37b31ad3815dfea6157eb784db7665bd990fcc Mon Sep 17 00:00:00 2001 From: Ganesh Ajjanagadde Date: Wed, 16 Dec 2015 14:34:09 -0500 Subject: [PATCH] lavfi/vf_colorchannelmixer: replace round by lrint lrint is faster here on -ftree-vectorize with GCC. This is likely simply an artifact of GCC's rather terrible auto-vectorizer, since as per the instruction set manuals cvtsd2si and cvttsd2si (or their vector equivalents) have identical cycle timings. Anyway, regardless of above, lrint is superior to round accuracy wise. Safety guaranteed as long int has at least 32 bits. Signed-off-by: Ganesh Ajjanagadde --- libavfilter/vf_colorchannelmixer.c | 32 +++++++++++++++--------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/libavfilter/vf_colorchannelmixer.c b/libavfilter/vf_colorchannelmixer.c index 0fffd341c5..cda972dd00 100644 --- a/libavfilter/vf_colorchannelmixer.c +++ b/libavfilter/vf_colorchannelmixer.c @@ -115,25 +115,25 @@ static int config_output(AVFilterLink *outlink) s->lut[i][j] = buffer; for (i = 0; i < size; i++) { - s->lut[R][R][i] = round(i * s->rr); - s->lut[R][G][i] = round(i * s->rg); - s->lut[R][B][i] = round(i * s->rb); - s->lut[R][A][i] = round(i * s->ra); + s->lut[R][R][i] = lrint(i * s->rr); + s->lut[R][G][i] = lrint(i * s->rg); + s->lut[R][B][i] = lrint(i * s->rb); + s->lut[R][A][i] = lrint(i * s->ra); - s->lut[G][R][i] = round(i * s->gr); - s->lut[G][G][i] = round(i * s->gg); - s->lut[G][B][i] = round(i * s->gb); - s->lut[G][A][i] = round(i * s->ga); + s->lut[G][R][i] = lrint(i * s->gr); + s->lut[G][G][i] = lrint(i * s->gg); + s->lut[G][B][i] = lrint(i * s->gb); + s->lut[G][A][i] = lrint(i * s->ga); - s->lut[B][R][i] = round(i * s->br); - s->lut[B][G][i] = round(i * s->bg); - s->lut[B][B][i] = round(i * s->bb); - s->lut[B][A][i] = round(i * s->ba); + s->lut[B][R][i] = lrint(i * s->br); + s->lut[B][G][i] = lrint(i * s->bg); + s->lut[B][B][i] = lrint(i * s->bb); + s->lut[B][A][i] = lrint(i * s->ba); - s->lut[A][R][i] = round(i * s->ar); - s->lut[A][G][i] = round(i * s->ag); - s->lut[A][B][i] = round(i * s->ab); - s->lut[A][A][i] = round(i * s->aa); + s->lut[A][R][i] = lrint(i * s->ar); + s->lut[A][G][i] = lrint(i * s->ag); + s->lut[A][B][i] = lrint(i * s->ab); + s->lut[A][A][i] = lrint(i * s->aa); } return 0;