From b53aab1a5819abe7c65b5f645e09559bb42db793 Mon Sep 17 00:00:00 2001 From: Michael Niedermayer Date: Mon, 13 Jan 2014 01:14:05 +0100 Subject: [PATCH] libavcodec/huffyuv: >8 bit support This adds only yuv420p10, others are trivial to add after this commit and will be added in a subsequent commit. Currently the implementation is not optimized, optimizations will be added later Signed-off-by: Michael Niedermayer --- libavcodec/huffyuv.c | 8 ++- libavcodec/huffyuv.h | 12 ++-- libavcodec/huffyuvdec.c | 118 +++++++++++++++++++++++++++++++------- libavcodec/huffyuvenc.c | 124 ++++++++++++++++++++++++++++++---------- 4 files changed, 205 insertions(+), 57 deletions(-) diff --git a/libavcodec/huffyuv.c b/libavcodec/huffyuv.c index b4ef76f198..c183bdf075 100644 --- a/libavcodec/huffyuv.c +++ b/libavcodec/huffyuv.c @@ -35,13 +35,13 @@ #include "avcodec.h" #include "huffyuv.h" -int ff_huffyuv_generate_bits_table(uint32_t *dst, const uint8_t *len_table) +int ff_huffyuv_generate_bits_table(uint32_t *dst, const uint8_t *len_table, int n) { int len, index; uint32_t bits = 0; for (len = 32; len > 0; len--) { - for (index = 0; index < 256; index++) { + for (index = 0; index < n; index++) { if (len_table[index] == len) dst[index] = bits++; } @@ -60,9 +60,10 @@ av_cold int ff_huffyuv_alloc_temp(HYuvContext *s) if (s->bitstream_bpp<24) { for (i=0; i<3; i++) { - s->temp[i]= av_malloc(s->width + 16); + s->temp[i]= av_malloc(2*s->width + 16); if (!s->temp[i]) return AVERROR(ENOMEM); + s->temp16[i] = (uint16_t*)s->temp[i]; } } else { s->temp[0]= av_mallocz(4*s->width + 16); @@ -93,5 +94,6 @@ av_cold void ff_huffyuv_common_end(HYuvContext *s) for(i = 0; i < 3; i++) { av_freep(&s->temp[i]); + s->temp16[i] = NULL; } } diff --git a/libavcodec/huffyuv.h b/libavcodec/huffyuv.h index c69274ea57..b529f1b52b 100644 --- a/libavcodec/huffyuv.h +++ b/libavcodec/huffyuv.h @@ -38,6 +38,9 @@ #define VLC_BITS 11 +#define MAX_BITS 14 +#define MAX_N (1<8 bit per sample support sponsored by NOA */ /** @@ -88,16 +90,16 @@ static const unsigned char classic_add_chroma[256] = { 6, 12, 8, 10, 7, 9, 6, 4, 6, 2, 2, 3, 3, 3, 3, 2, }; -static int read_len_table(uint8_t *dst, GetBitContext *gb) +static int read_len_table(uint8_t *dst, GetBitContext *gb, int n) { int i, val, repeat; - for (i = 0; i < 256;) { + for (i = 0; i < n;) { repeat = get_bits(gb, 3); val = get_bits(gb, 5); if (repeat == 0) repeat = get_bits(gb, 8); - if (i + repeat > 256 || get_bits_left(gb) < 0) { + if (i + repeat > n || get_bits_left(gb) < 0) { av_log(NULL, AV_LOG_ERROR, "Error reading huffman table\n"); return -1; } @@ -118,19 +120,19 @@ static int generate_joint_tables(HYuvContext *s) int p, i, y, u; for (p = 0; p < 4; p++) { int p0 = s->version > 2 ? p : 0; - for (i = y = 0; y < 256; y++) { + for (i = y = 0; y < s->n; y++) { int len0 = s->len[p0][y]; int limit = VLC_BITS - len0; if(limit <= 0 || !len0) continue; - for (u = 0; u < 256; u++) { + for (u = 0; u < s->n; u++) { int len1 = s->len[p][u]; if (len1 > limit || !len1) continue; av_assert0(i < (1 << VLC_BITS)); len[i] = len0 + len1; bits[i] = (s->bits[p0][y] << len1) + s->bits[p][u]; - symbols[i] = (y << 8) + u; + symbols[i] = (y << 8) + u; //FIXME if(symbols[i] != 0xffff) // reserved to mean "invalid" i++; } @@ -199,13 +201,13 @@ static int read_huffman_tables(HYuvContext *s, const uint8_t *src, int length) count = 1 + s->alpha + 2*s->chroma; for (i = 0; i < count; i++) { - if (read_len_table(s->len[i], &gb) < 0) + if (read_len_table(s->len[i], &gb, s->n) < 0) return -1; - if (ff_huffyuv_generate_bits_table(s->bits[i], s->len[i]) < 0) { + if (ff_huffyuv_generate_bits_table(s->bits[i], s->len[i], s->n) < 0) { return -1; } ff_free_vlc(&s->vlc[i]); - if ((ret = init_vlc(&s->vlc[i], VLC_BITS, 256, s->len[i], 1, 1, + if ((ret = init_vlc(&s->vlc[i], VLC_BITS, s->n, s->len[i], 1, 1, s->bits[i], 4, 4, 0)) < 0) return ret; } @@ -224,12 +226,12 @@ static int read_old_huffman_tables(HYuvContext *s) init_get_bits(&gb, classic_shift_luma, classic_shift_luma_table_size * 8); - if (read_len_table(s->len[0], &gb) < 0) + if (read_len_table(s->len[0], &gb, 256) < 0) return -1; init_get_bits(&gb, classic_shift_chroma, classic_shift_chroma_table_size * 8); - if (read_len_table(s->len[1], &gb) < 0) + if (read_len_table(s->len[1], &gb, 256) < 0) return -1; for(i=0; i<256; i++) s->bits[0][i] = classic_add_luma [i]; @@ -397,6 +399,9 @@ static av_cold int decode_init(AVCodecContext *avctx) case 0x675: avctx->pix_fmt = AV_PIX_FMT_YUV420P; break; + case 0x695: + avctx->pix_fmt = AV_PIX_FMT_YUV420P10; + break; case 0x67A: avctx->pix_fmt = AV_PIX_FMT_YUV410P; break; @@ -409,6 +414,11 @@ static av_cold int decode_init(AVCodecContext *avctx) case 0x775: avctx->pix_fmt = AV_PIX_FMT_YUVA420P; break; + case 0x795: + avctx->pix_fmt = AV_PIX_FMT_YUVA420P10; + break; + default: + return AVERROR_INVALIDDATA; } } @@ -499,19 +509,35 @@ static void decode_422_bitstream(HYuvContext *s, int count) dst1 = get_vlc2(&s->gb, s->vlc[plane].table, VLC_BITS, 3);\ }\ } +#define READ_2PIX_PLANE16(dst0, dst1, plane){\ + dst0 = get_vlc2(&s->gb, s->vlc[plane].table, VLC_BITS, 3);\ + dst1 = get_vlc2(&s->gb, s->vlc[plane].table, VLC_BITS, 3);\ +} static void decode_plane_bitstream(HYuvContext *s, int count, int plane) { int i; count/=2; - if (count >= (get_bits_left(&s->gb)) / (31 * 2)) { - for (i = 0; i < count && get_bits_left(&s->gb) > 0; i++) { - READ_2PIX_PLANE(s->temp[0][2 * i], s->temp[0][2 * i + 1], plane); + if (s->bps <= 8) { + if (count >= (get_bits_left(&s->gb)) / (31 * 2)) { + for (i = 0; i < count && get_bits_left(&s->gb) > 0; i++) { + READ_2PIX_PLANE(s->temp[0][2 * i], s->temp[0][2 * i + 1], plane); + } + } else { + for(i=0; itemp[0][2 * i], s->temp[0][2 * i + 1], plane); + } } } else { - for(i=0; itemp[0][2 * i], s->temp[0][2 * i + 1], plane); + if (count >= (get_bits_left(&s->gb)) / (31 * 2)) { + for (i = 0; i < count && get_bits_left(&s->gb) > 0; i++) { + READ_2PIX_PLANE16(s->temp16[0][2 * i], s->temp16[0][2 * i + 1], plane); + } + } else { + for(i=0; itemp16[0][2 * i], s->temp16[0][2 * i + 1], plane); + } } } } @@ -601,6 +627,56 @@ static void draw_slice(HYuvContext *s, AVFrame *frame, int y) s->last_slice_end = y + h; } +static int left_prediction(HYuvContext *s, uint8_t *dst, const uint8_t *src, int w, int acc) +{ + if (s->bps <= 8) { + return s->dsp.add_hfyu_left_prediction(dst, src, w, acc); + } else { + //FIXME optimize + unsigned mask = s->n-1; + int i; + const uint16_t *src16 = (const uint16_t *)src; + uint16_t *dst16 = ( uint16_t *)dst; + + for(i=0; ibps <= 8) { + s->dsp.add_bytes(dst, src, w); + } else { + //FIXME optimize + const uint16_t *src16 = (const uint16_t *)src; + uint16_t *dst16 = ( uint16_t *)dst; + long i; + unsigned long msb = 0x1000100010001ULL << (s->bps-1); + unsigned long lsb = msb - 0x1000100010001ULL; + unsigned long mask = lsb + msb; + for (i = 0; i <= w - (int)sizeof(long)/2; i += sizeof(long)/2) { + long a = *(long*)(src16+i); + long b = *(long*)(dst16+i); + *(long*)(dst16+i) = ((a&lsb) + (b&lsb)) ^ ((a^b)&msb); + } + for(; idsp.add_hfyu_left_prediction(p->data[plane], s->temp[0], w, 0); + left = left_prediction(s, p->data[plane], s->temp[0], w, 0); for (y = 1; y < h; y++) { uint8_t *dst = p->data[plane] + p->linesize[plane]*y; decode_plane_bitstream(s, w, plane); - left = s->dsp.add_hfyu_left_prediction(dst, s->temp[0], w, left); + left = left_prediction(s, dst, s->temp[0], w, left); if (s->predictor == PLANE) { if (y > s->interlaced) { - s->dsp.add_bytes(dst, dst - fake_stride, w); + add_bytes(s, dst, dst - fake_stride, w); } } } @@ -680,14 +756,14 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, break; case MEDIAN: decode_plane_bitstream(s, w, plane); - left= s->dsp.add_hfyu_left_prediction(p->data[plane], s->temp[0], w, 0); + left= left_prediction(s, p->data[plane], s->temp[0], w, 0); y = 1; /* second line is left predicted for interlaced case */ if (s->interlaced) { decode_plane_bitstream(s, w, plane); - left = s->dsp.add_hfyu_left_prediction(p->data[plane] + p->linesize[plane], s->temp[0], w, left); + left = left_prediction(s, p->data[plane] + p->linesize[plane], s->temp[0], w, left); y++; } diff --git a/libavcodec/huffyuvenc.c b/libavcodec/huffyuvenc.c index bd633a2fed..55e0b1e398 100644 --- a/libavcodec/huffyuvenc.c +++ b/libavcodec/huffyuvenc.c @@ -19,6 +19,8 @@ * You should have received a copy of the GNU Lesser General Public * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * yuva, gray, 4:4:4, 4:1:1, 4:1:0 and >8 bit per sample support sponsored by NOA */ /** @@ -33,25 +35,58 @@ #include "put_bits.h" #include "libavutil/pixdesc.h" +static inline void diff_bytes(HYuvContext *s, uint8_t *dst, + const uint8_t *src0, const uint8_t *src1, int w) +{ + int i; + if (s->bps <= 8) { + s->dsp.diff_bytes(dst, src0, src1, w); + } else { + const uint16_t *src016 = (const uint16_t *)src0; + const uint16_t *src116 = (const uint16_t *)src1; + uint16_t *dst16 = ( uint16_t *)dst; + + for (i = 0; i < w; i++) { + dst16[i] = src016[i] - src116[i]; + } + + //FIXME optimize + } +} + static inline int sub_left_prediction(HYuvContext *s, uint8_t *dst, const uint8_t *src, int w, int left) { int i; - if (w < 32) { + if (s->bps <= 8) { + if (w < 32) { + for (i = 0; i < w; i++) { + const int temp = src[i]; + dst[i] = temp - left; + left = temp; + } + return left; + } else { + for (i = 0; i < 16; i++) { + const int temp = src[i]; + dst[i] = temp - left; + left = temp; + } + s->dsp.diff_bytes(dst + 16, src + 16, src + 15, w - 16); + return src[w-1]; + } + } else { + const uint16_t *src16 = (const uint16_t *)src; + uint16_t *dst16 = ( uint16_t *)dst; + for (i = 0; i < w; i++) { - const int temp = src[i]; - dst[i] = temp - left; + const int temp = src16[i]; + dst16[i] = temp - left; left = temp; } return left; - } else { - for (i = 0; i < 16; i++) { - const int temp = src[i]; - dst[i] = temp - left; - left = temp; - } - s->dsp.diff_bytes(dst + 16, src + 16, src + 15, w - 16); - return src[w-1]; + + //FIXME optimize } } @@ -122,15 +157,16 @@ static int store_table(HYuvContext *s, const uint8_t *len, uint8_t *buf) { int i; int index = 0; + int n = s->n; - for (i = 0; i < 256;) { + for (i = 0; i < n;) { int val = len[i]; int repeat = 0; - for (; i < 256 && len[i] == val && repeat < 255; i++) + for (; i < n && len[i] == val && repeat < 255; i++) repeat++; - av_assert0(val < 32 && val >0 && repeat<256 && repeat>0); + av_assert0(val < 32 && val >0 && repeat < 256 && repeat>0); if (repeat > 7) { buf[index++] = val; buf[index++] = repeat; @@ -152,10 +188,10 @@ static int store_huffman_tables(HYuvContext *s, uint8_t *buf) count = 1 + s->alpha + 2*s->chroma; for (i = 0; i < count; i++) { - if ((ret = ff_huff_gen_len_table(s->len[i], s->stats[i], 256)) < 0) + if ((ret = ff_huff_gen_len_table(s->len[i], s->stats[i], s->n)) < 0) return ret; - if (ff_huffyuv_generate_bits_table(s->bits[i], s->len[i]) < 0) { + if (ff_huffyuv_generate_bits_table(s->bits[i], s->len[i], s->n) < 0) { return -1; } @@ -173,8 +209,8 @@ static av_cold int encode_init(AVCodecContext *avctx) ff_huffyuv_common_init(avctx); - avctx->extradata = av_mallocz(1024*30); // 256*3+4 == 772 - avctx->stats_out = av_mallocz(1024*30); // 21*256*3(%llu ) + 3(\n) + 1(0) = 16132 + avctx->extradata = av_mallocz(3*MAX_N + 4); + avctx->stats_out = av_mallocz(21*MAX_N*3 + 4); // 21*256*3(%llu ) + 3(\n) + 1(0) = 16132 if (!avctx->extradata || !avctx->stats_out) { av_freep(&avctx->stats_out); return AVERROR(ENOMEM); @@ -216,6 +252,7 @@ static av_cold int encode_init(AVCodecContext *avctx) case AV_PIX_FMT_YUVA422P: case AV_PIX_FMT_GBRAP: case AV_PIX_FMT_GRAY8A: + case AV_PIX_FMT_YUV420P10: s->version = 3; break; case AV_PIX_FMT_RGB32: @@ -301,14 +338,14 @@ static av_cold int encode_init(AVCodecContext *avctx) char *p = avctx->stats_in; for (i = 0; i < 4; i++) - for (j = 0; j < 256; j++) + for (j = 0; j < s->n; j++) s->stats[i][j] = 1; for (;;) { for (i = 0; i < 4; i++) { char *next; - for (j = 0; j < 256; j++) { + for (j = 0; j < s->n; j++) { s->stats[i][j] += strtol(p, &next, 0); if (next == p) return -1; p = next; @@ -318,8 +355,8 @@ static av_cold int encode_init(AVCodecContext *avctx) } } else { for (i = 0; i < 4; i++) - for (j = 0; j < 256; j++) { - int d = FFMIN(j, 256 - j); + for (j = 0; j < s->n; j++) { + int d = FFMIN(j, s->n - j); s->stats[i][j] = 100000000 / (d + 1); } @@ -333,14 +370,14 @@ static av_cold int encode_init(AVCodecContext *avctx) if (s->context) { for (i = 0; i < 4; i++) { int pels = s->width * s->height / (i ? 40 : 10); - for (j = 0; j < 256; j++) { - int d = FFMIN(j, 256 - j); + for (j = 0; j < s->n; j++) { + int d = FFMIN(j, s->n - j); s->stats[i][j] = pels/(d + 1); } } } else { for (i = 0; i < 4; i++) - for (j = 0; j < 256; j++) + for (j = 0; j < s->n; j++) s->stats[i][j]= 0; } @@ -412,7 +449,7 @@ static int encode_plane_bitstream(HYuvContext *s, int count, int plane) { int i; - if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) < 4 * count) { + if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) < count * s->bps / 2) { av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); return -1; } @@ -420,6 +457,9 @@ static int encode_plane_bitstream(HYuvContext *s, int count, int plane) #define LOAD2\ int y0 = s->temp[0][2 * i];\ int y1 = s->temp[0][2 * i + 1]; +#define LOAD2_16\ + int y0 = s->temp16[0][2 * i] & mask;\ + int y1 = s->temp16[0][2 * i + 1] & mask; #define STAT2\ s->stats[plane][y0]++;\ s->stats[plane][y1]++; @@ -429,6 +469,7 @@ static int encode_plane_bitstream(HYuvContext *s, int count, int plane) count /= 2; + if (s->bps <= 8) { if (s->flags & CODEC_FLAG_PASS1) { for (i = 0; i < count; i++) { LOAD2; @@ -450,6 +491,30 @@ static int encode_plane_bitstream(HYuvContext *s, int count, int plane) WRITE2; } } + } else { + int mask = s->n - 1; + if (s->flags & CODEC_FLAG_PASS1) { + for (i = 0; i < count; i++) { + LOAD2_16; + STAT2; + } + } + if (s->avctx->flags2 & CODEC_FLAG2_NO_OUTPUT) + return 0; + + if (s->context) { + for (i = 0; i < count; i++) { + LOAD2_16; + STAT2; + WRITE2; + } + } else { + for (i = 0; i < count; i++) { + LOAD2_16; + WRITE2; + } + } + } #undef LOAD2 #undef STAT2 #undef WRITE2 @@ -574,7 +639,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, return size; for (i = 0; i < 4; i++) - for (j = 0; j < 256; j++) + for (j = 0; j < s->n; j++) s->stats[i][j] >>= 1; } @@ -784,7 +849,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, uint8_t *dst = p->data[plane] + p->linesize[plane] * y; if (s->predictor == PLANE && s->interlaced < y) { - s->dsp.diff_bytes(s->temp[1], dst, dst - fake_stride, w); + diff_bytes(s, s->temp[1], dst, dst - fake_stride, w); left = sub_left_prediction(s, s->temp[0], s->temp[1], w , left); } else { @@ -810,7 +875,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, char *p = avctx->stats_out; char *end = p + 1024*30; for (i = 0; i < 4; i++) { - for (j = 0; j < 256; j++) { + for (j = 0; j < s->n; j++) { snprintf(p, end-p, "%"PRIu64" ", s->stats[i][j]); p += strlen(p); s->stats[i][j]= 0; @@ -883,6 +948,7 @@ AVCodec ff_ffvhuff_encoder = { AV_PIX_FMT_YUVA420P, AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUVA444P, AV_PIX_FMT_GBRAP, AV_PIX_FMT_GRAY8A, + AV_PIX_FMT_YUV420P10, AV_PIX_FMT_RGB24, AV_PIX_FMT_RGB32, AV_PIX_FMT_NONE },