libavcodec/huffyuv: >8 bit support

This adds only yuv420p10, others are trivial to add after this commit
and will be added in a subsequent commit.
Currently the implementation is not optimized, optimizations will be
added later

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
Michael Niedermayer 2014-01-13 01:14:05 +01:00
parent 6c004e8aad
commit b53aab1a58
4 changed files with 205 additions and 57 deletions

View File

@ -35,13 +35,13 @@
#include "avcodec.h"
#include "huffyuv.h"
int ff_huffyuv_generate_bits_table(uint32_t *dst, const uint8_t *len_table)
int ff_huffyuv_generate_bits_table(uint32_t *dst, const uint8_t *len_table, int n)
{
int len, index;
uint32_t bits = 0;
for (len = 32; len > 0; len--) {
for (index = 0; index < 256; index++) {
for (index = 0; index < n; index++) {
if (len_table[index] == len)
dst[index] = bits++;
}
@ -60,9 +60,10 @@ av_cold int ff_huffyuv_alloc_temp(HYuvContext *s)
if (s->bitstream_bpp<24) {
for (i=0; i<3; i++) {
s->temp[i]= av_malloc(s->width + 16);
s->temp[i]= av_malloc(2*s->width + 16);
if (!s->temp[i])
return AVERROR(ENOMEM);
s->temp16[i] = (uint16_t*)s->temp[i];
}
} else {
s->temp[0]= av_mallocz(4*s->width + 16);
@ -93,5 +94,6 @@ av_cold void ff_huffyuv_common_end(HYuvContext *s)
for(i = 0; i < 3; i++) {
av_freep(&s->temp[i]);
s->temp16[i] = NULL;
}
}

View File

@ -38,6 +38,9 @@
#define VLC_BITS 11
#define MAX_BITS 14
#define MAX_N (1<<MAX_BITS)
#if HAVE_BIGENDIAN
#define B 3
#define G 2
@ -80,9 +83,10 @@ typedef struct HYuvContext {
int picture_number;
int last_slice_end;
uint8_t *temp[3];
uint64_t stats[4][256];
uint8_t len[4][256];
uint32_t bits[4][256];
uint16_t *temp16[3]; ///< identical to temp but 16bit type
uint64_t stats[4][MAX_N];
uint8_t len[4][MAX_N];
uint32_t bits[4][MAX_N];
uint32_t pix_bgr_map[1<<VLC_BITS];
VLC vlc[8]; //Y,U,V,A,YY,YU,YV,AA
uint8_t *bitstream_buffer;
@ -93,6 +97,6 @@ typedef struct HYuvContext {
void ff_huffyuv_common_init(AVCodecContext *s);
void ff_huffyuv_common_end(HYuvContext *s);
int ff_huffyuv_alloc_temp(HYuvContext *s);
int ff_huffyuv_generate_bits_table(uint32_t *dst, const uint8_t *len_table);
int ff_huffyuv_generate_bits_table(uint32_t *dst, const uint8_t *len_table, int n);
#endif /* AVCODEC_HUFFYUV_H */

View File

@ -21,6 +21,8 @@
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
* yuva, gray, 4:4:4, 4:1:1, 4:1:0 and >8 bit per sample support sponsored by NOA
*/
/**
@ -88,16 +90,16 @@ static const unsigned char classic_add_chroma[256] = {
6, 12, 8, 10, 7, 9, 6, 4, 6, 2, 2, 3, 3, 3, 3, 2,
};
static int read_len_table(uint8_t *dst, GetBitContext *gb)
static int read_len_table(uint8_t *dst, GetBitContext *gb, int n)
{
int i, val, repeat;
for (i = 0; i < 256;) {
for (i = 0; i < n;) {
repeat = get_bits(gb, 3);
val = get_bits(gb, 5);
if (repeat == 0)
repeat = get_bits(gb, 8);
if (i + repeat > 256 || get_bits_left(gb) < 0) {
if (i + repeat > n || get_bits_left(gb) < 0) {
av_log(NULL, AV_LOG_ERROR, "Error reading huffman table\n");
return -1;
}
@ -118,19 +120,19 @@ static int generate_joint_tables(HYuvContext *s)
int p, i, y, u;
for (p = 0; p < 4; p++) {
int p0 = s->version > 2 ? p : 0;
for (i = y = 0; y < 256; y++) {
for (i = y = 0; y < s->n; y++) {
int len0 = s->len[p0][y];
int limit = VLC_BITS - len0;
if(limit <= 0 || !len0)
continue;
for (u = 0; u < 256; u++) {
for (u = 0; u < s->n; u++) {
int len1 = s->len[p][u];
if (len1 > limit || !len1)
continue;
av_assert0(i < (1 << VLC_BITS));
len[i] = len0 + len1;
bits[i] = (s->bits[p0][y] << len1) + s->bits[p][u];
symbols[i] = (y << 8) + u;
symbols[i] = (y << 8) + u; //FIXME
if(symbols[i] != 0xffff) // reserved to mean "invalid"
i++;
}
@ -199,13 +201,13 @@ static int read_huffman_tables(HYuvContext *s, const uint8_t *src, int length)
count = 1 + s->alpha + 2*s->chroma;
for (i = 0; i < count; i++) {
if (read_len_table(s->len[i], &gb) < 0)
if (read_len_table(s->len[i], &gb, s->n) < 0)
return -1;
if (ff_huffyuv_generate_bits_table(s->bits[i], s->len[i]) < 0) {
if (ff_huffyuv_generate_bits_table(s->bits[i], s->len[i], s->n) < 0) {
return -1;
}
ff_free_vlc(&s->vlc[i]);
if ((ret = init_vlc(&s->vlc[i], VLC_BITS, 256, s->len[i], 1, 1,
if ((ret = init_vlc(&s->vlc[i], VLC_BITS, s->n, s->len[i], 1, 1,
s->bits[i], 4, 4, 0)) < 0)
return ret;
}
@ -224,12 +226,12 @@ static int read_old_huffman_tables(HYuvContext *s)
init_get_bits(&gb, classic_shift_luma,
classic_shift_luma_table_size * 8);
if (read_len_table(s->len[0], &gb) < 0)
if (read_len_table(s->len[0], &gb, 256) < 0)
return -1;
init_get_bits(&gb, classic_shift_chroma,
classic_shift_chroma_table_size * 8);
if (read_len_table(s->len[1], &gb) < 0)
if (read_len_table(s->len[1], &gb, 256) < 0)
return -1;
for(i=0; i<256; i++) s->bits[0][i] = classic_add_luma [i];
@ -397,6 +399,9 @@ static av_cold int decode_init(AVCodecContext *avctx)
case 0x675:
avctx->pix_fmt = AV_PIX_FMT_YUV420P;
break;
case 0x695:
avctx->pix_fmt = AV_PIX_FMT_YUV420P10;
break;
case 0x67A:
avctx->pix_fmt = AV_PIX_FMT_YUV410P;
break;
@ -409,6 +414,11 @@ static av_cold int decode_init(AVCodecContext *avctx)
case 0x775:
avctx->pix_fmt = AV_PIX_FMT_YUVA420P;
break;
case 0x795:
avctx->pix_fmt = AV_PIX_FMT_YUVA420P10;
break;
default:
return AVERROR_INVALIDDATA;
}
}
@ -499,19 +509,35 @@ static void decode_422_bitstream(HYuvContext *s, int count)
dst1 = get_vlc2(&s->gb, s->vlc[plane].table, VLC_BITS, 3);\
}\
}
#define READ_2PIX_PLANE16(dst0, dst1, plane){\
dst0 = get_vlc2(&s->gb, s->vlc[plane].table, VLC_BITS, 3);\
dst1 = get_vlc2(&s->gb, s->vlc[plane].table, VLC_BITS, 3);\
}
static void decode_plane_bitstream(HYuvContext *s, int count, int plane)
{
int i;
count/=2;
if (count >= (get_bits_left(&s->gb)) / (31 * 2)) {
for (i = 0; i < count && get_bits_left(&s->gb) > 0; i++) {
READ_2PIX_PLANE(s->temp[0][2 * i], s->temp[0][2 * i + 1], plane);
if (s->bps <= 8) {
if (count >= (get_bits_left(&s->gb)) / (31 * 2)) {
for (i = 0; i < count && get_bits_left(&s->gb) > 0; i++) {
READ_2PIX_PLANE(s->temp[0][2 * i], s->temp[0][2 * i + 1], plane);
}
} else {
for(i=0; i<count; i++){
READ_2PIX_PLANE(s->temp[0][2 * i], s->temp[0][2 * i + 1], plane);
}
}
} else {
for(i=0; i<count; i++){
READ_2PIX_PLANE(s->temp[0][2 * i], s->temp[0][2 * i + 1], plane);
if (count >= (get_bits_left(&s->gb)) / (31 * 2)) {
for (i = 0; i < count && get_bits_left(&s->gb) > 0; i++) {
READ_2PIX_PLANE16(s->temp16[0][2 * i], s->temp16[0][2 * i + 1], plane);
}
} else {
for(i=0; i<count; i++){
READ_2PIX_PLANE16(s->temp16[0][2 * i], s->temp16[0][2 * i + 1], plane);
}
}
}
}
@ -601,6 +627,56 @@ static void draw_slice(HYuvContext *s, AVFrame *frame, int y)
s->last_slice_end = y + h;
}
static int left_prediction(HYuvContext *s, uint8_t *dst, const uint8_t *src, int w, int acc)
{
if (s->bps <= 8) {
return s->dsp.add_hfyu_left_prediction(dst, src, w, acc);
} else {
//FIXME optimize
unsigned mask = s->n-1;
int i;
const uint16_t *src16 = (const uint16_t *)src;
uint16_t *dst16 = ( uint16_t *)dst;
for(i=0; i<w-1; i++){
acc+= src16[i];
dst16[i]= acc & mask;
i++;
acc+= src16[i];
dst16[i]= acc & mask;
}
for(; i<w; i++){
acc+= src16[i];
dst16[i]= acc & mask;
}
return acc;
}
}
static void add_bytes(HYuvContext *s, uint8_t *dst, uint8_t *src, int w)
{
if (s->bps <= 8) {
s->dsp.add_bytes(dst, src, w);
} else {
//FIXME optimize
const uint16_t *src16 = (const uint16_t *)src;
uint16_t *dst16 = ( uint16_t *)dst;
long i;
unsigned long msb = 0x1000100010001ULL << (s->bps-1);
unsigned long lsb = msb - 0x1000100010001ULL;
unsigned long mask = lsb + msb;
for (i = 0; i <= w - (int)sizeof(long)/2; i += sizeof(long)/2) {
long a = *(long*)(src16+i);
long b = *(long*)(dst16+i);
*(long*)(dst16+i) = ((a&lsb) + (b&lsb)) ^ ((a^b)&msb);
}
for(; i<w; i++)
dst16[i] = (dst16[i] + src16[i]) & mask;
}
}
static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
AVPacket *avpkt)
{
@ -663,16 +739,16 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
case LEFT:
case PLANE:
decode_plane_bitstream(s, w, plane);
left = s->dsp.add_hfyu_left_prediction(p->data[plane], s->temp[0], w, 0);
left = left_prediction(s, p->data[plane], s->temp[0], w, 0);
for (y = 1; y < h; y++) {
uint8_t *dst = p->data[plane] + p->linesize[plane]*y;
decode_plane_bitstream(s, w, plane);
left = s->dsp.add_hfyu_left_prediction(dst, s->temp[0], w, left);
left = left_prediction(s, dst, s->temp[0], w, left);
if (s->predictor == PLANE) {
if (y > s->interlaced) {
s->dsp.add_bytes(dst, dst - fake_stride, w);
add_bytes(s, dst, dst - fake_stride, w);
}
}
}
@ -680,14 +756,14 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
break;
case MEDIAN:
decode_plane_bitstream(s, w, plane);
left= s->dsp.add_hfyu_left_prediction(p->data[plane], s->temp[0], w, 0);
left= left_prediction(s, p->data[plane], s->temp[0], w, 0);
y = 1;
/* second line is left predicted for interlaced case */
if (s->interlaced) {
decode_plane_bitstream(s, w, plane);
left = s->dsp.add_hfyu_left_prediction(p->data[plane] + p->linesize[plane], s->temp[0], w, left);
left = left_prediction(s, p->data[plane] + p->linesize[plane], s->temp[0], w, left);
y++;
}

View File

@ -19,6 +19,8 @@
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
* yuva, gray, 4:4:4, 4:1:1, 4:1:0 and >8 bit per sample support sponsored by NOA
*/
/**
@ -33,25 +35,58 @@
#include "put_bits.h"
#include "libavutil/pixdesc.h"
static inline void diff_bytes(HYuvContext *s, uint8_t *dst,
const uint8_t *src0, const uint8_t *src1, int w)
{
int i;
if (s->bps <= 8) {
s->dsp.diff_bytes(dst, src0, src1, w);
} else {
const uint16_t *src016 = (const uint16_t *)src0;
const uint16_t *src116 = (const uint16_t *)src1;
uint16_t *dst16 = ( uint16_t *)dst;
for (i = 0; i < w; i++) {
dst16[i] = src016[i] - src116[i];
}
//FIXME optimize
}
}
static inline int sub_left_prediction(HYuvContext *s, uint8_t *dst,
const uint8_t *src, int w, int left)
{
int i;
if (w < 32) {
if (s->bps <= 8) {
if (w < 32) {
for (i = 0; i < w; i++) {
const int temp = src[i];
dst[i] = temp - left;
left = temp;
}
return left;
} else {
for (i = 0; i < 16; i++) {
const int temp = src[i];
dst[i] = temp - left;
left = temp;
}
s->dsp.diff_bytes(dst + 16, src + 16, src + 15, w - 16);
return src[w-1];
}
} else {
const uint16_t *src16 = (const uint16_t *)src;
uint16_t *dst16 = ( uint16_t *)dst;
for (i = 0; i < w; i++) {
const int temp = src[i];
dst[i] = temp - left;
const int temp = src16[i];
dst16[i] = temp - left;
left = temp;
}
return left;
} else {
for (i = 0; i < 16; i++) {
const int temp = src[i];
dst[i] = temp - left;
left = temp;
}
s->dsp.diff_bytes(dst + 16, src + 16, src + 15, w - 16);
return src[w-1];
//FIXME optimize
}
}
@ -122,15 +157,16 @@ static int store_table(HYuvContext *s, const uint8_t *len, uint8_t *buf)
{
int i;
int index = 0;
int n = s->n;
for (i = 0; i < 256;) {
for (i = 0; i < n;) {
int val = len[i];
int repeat = 0;
for (; i < 256 && len[i] == val && repeat < 255; i++)
for (; i < n && len[i] == val && repeat < 255; i++)
repeat++;
av_assert0(val < 32 && val >0 && repeat<256 && repeat>0);
av_assert0(val < 32 && val >0 && repeat < 256 && repeat>0);
if (repeat > 7) {
buf[index++] = val;
buf[index++] = repeat;
@ -152,10 +188,10 @@ static int store_huffman_tables(HYuvContext *s, uint8_t *buf)
count = 1 + s->alpha + 2*s->chroma;
for (i = 0; i < count; i++) {
if ((ret = ff_huff_gen_len_table(s->len[i], s->stats[i], 256)) < 0)
if ((ret = ff_huff_gen_len_table(s->len[i], s->stats[i], s->n)) < 0)
return ret;
if (ff_huffyuv_generate_bits_table(s->bits[i], s->len[i]) < 0) {
if (ff_huffyuv_generate_bits_table(s->bits[i], s->len[i], s->n) < 0) {
return -1;
}
@ -173,8 +209,8 @@ static av_cold int encode_init(AVCodecContext *avctx)
ff_huffyuv_common_init(avctx);
avctx->extradata = av_mallocz(1024*30); // 256*3+4 == 772
avctx->stats_out = av_mallocz(1024*30); // 21*256*3(%llu ) + 3(\n) + 1(0) = 16132
avctx->extradata = av_mallocz(3*MAX_N + 4);
avctx->stats_out = av_mallocz(21*MAX_N*3 + 4); // 21*256*3(%llu ) + 3(\n) + 1(0) = 16132
if (!avctx->extradata || !avctx->stats_out) {
av_freep(&avctx->stats_out);
return AVERROR(ENOMEM);
@ -216,6 +252,7 @@ static av_cold int encode_init(AVCodecContext *avctx)
case AV_PIX_FMT_YUVA422P:
case AV_PIX_FMT_GBRAP:
case AV_PIX_FMT_GRAY8A:
case AV_PIX_FMT_YUV420P10:
s->version = 3;
break;
case AV_PIX_FMT_RGB32:
@ -301,14 +338,14 @@ static av_cold int encode_init(AVCodecContext *avctx)
char *p = avctx->stats_in;
for (i = 0; i < 4; i++)
for (j = 0; j < 256; j++)
for (j = 0; j < s->n; j++)
s->stats[i][j] = 1;
for (;;) {
for (i = 0; i < 4; i++) {
char *next;
for (j = 0; j < 256; j++) {
for (j = 0; j < s->n; j++) {
s->stats[i][j] += strtol(p, &next, 0);
if (next == p) return -1;
p = next;
@ -318,8 +355,8 @@ static av_cold int encode_init(AVCodecContext *avctx)
}
} else {
for (i = 0; i < 4; i++)
for (j = 0; j < 256; j++) {
int d = FFMIN(j, 256 - j);
for (j = 0; j < s->n; j++) {
int d = FFMIN(j, s->n - j);
s->stats[i][j] = 100000000 / (d + 1);
}
@ -333,14 +370,14 @@ static av_cold int encode_init(AVCodecContext *avctx)
if (s->context) {
for (i = 0; i < 4; i++) {
int pels = s->width * s->height / (i ? 40 : 10);
for (j = 0; j < 256; j++) {
int d = FFMIN(j, 256 - j);
for (j = 0; j < s->n; j++) {
int d = FFMIN(j, s->n - j);
s->stats[i][j] = pels/(d + 1);
}
}
} else {
for (i = 0; i < 4; i++)
for (j = 0; j < 256; j++)
for (j = 0; j < s->n; j++)
s->stats[i][j]= 0;
}
@ -412,7 +449,7 @@ static int encode_plane_bitstream(HYuvContext *s, int count, int plane)
{
int i;
if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) < 4 * count) {
if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) < count * s->bps / 2) {
av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
return -1;
}
@ -420,6 +457,9 @@ static int encode_plane_bitstream(HYuvContext *s, int count, int plane)
#define LOAD2\
int y0 = s->temp[0][2 * i];\
int y1 = s->temp[0][2 * i + 1];
#define LOAD2_16\
int y0 = s->temp16[0][2 * i] & mask;\
int y1 = s->temp16[0][2 * i + 1] & mask;
#define STAT2\
s->stats[plane][y0]++;\
s->stats[plane][y1]++;
@ -429,6 +469,7 @@ static int encode_plane_bitstream(HYuvContext *s, int count, int plane)
count /= 2;
if (s->bps <= 8) {
if (s->flags & CODEC_FLAG_PASS1) {
for (i = 0; i < count; i++) {
LOAD2;
@ -450,6 +491,30 @@ static int encode_plane_bitstream(HYuvContext *s, int count, int plane)
WRITE2;
}
}
} else {
int mask = s->n - 1;
if (s->flags & CODEC_FLAG_PASS1) {
for (i = 0; i < count; i++) {
LOAD2_16;
STAT2;
}
}
if (s->avctx->flags2 & CODEC_FLAG2_NO_OUTPUT)
return 0;
if (s->context) {
for (i = 0; i < count; i++) {
LOAD2_16;
STAT2;
WRITE2;
}
} else {
for (i = 0; i < count; i++) {
LOAD2_16;
WRITE2;
}
}
}
#undef LOAD2
#undef STAT2
#undef WRITE2
@ -574,7 +639,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
return size;
for (i = 0; i < 4; i++)
for (j = 0; j < 256; j++)
for (j = 0; j < s->n; j++)
s->stats[i][j] >>= 1;
}
@ -784,7 +849,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
uint8_t *dst = p->data[plane] + p->linesize[plane] * y;
if (s->predictor == PLANE && s->interlaced < y) {
s->dsp.diff_bytes(s->temp[1], dst, dst - fake_stride, w);
diff_bytes(s, s->temp[1], dst, dst - fake_stride, w);
left = sub_left_prediction(s, s->temp[0], s->temp[1], w , left);
} else {
@ -810,7 +875,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
char *p = avctx->stats_out;
char *end = p + 1024*30;
for (i = 0; i < 4; i++) {
for (j = 0; j < 256; j++) {
for (j = 0; j < s->n; j++) {
snprintf(p, end-p, "%"PRIu64" ", s->stats[i][j]);
p += strlen(p);
s->stats[i][j]= 0;
@ -883,6 +948,7 @@ AVCodec ff_ffvhuff_encoder = {
AV_PIX_FMT_YUVA420P, AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUVA444P,
AV_PIX_FMT_GBRAP,
AV_PIX_FMT_GRAY8A,
AV_PIX_FMT_YUV420P10,
AV_PIX_FMT_RGB24,
AV_PIX_FMT_RGB32, AV_PIX_FMT_NONE
},