From 546adc1fee1f6975af2cebb91307b8415dfa9172 Mon Sep 17 00:00:00 2001 From: Yusuke Nakamura Date: Tue, 31 May 2011 08:17:13 +0900 Subject: [PATCH 1/7] mov: Parse EC3SpecificBox (dec3 atom). Skip to parse fields for additional independent substreams and its associated dependent substreams since libavcodec's E-AC-3 decoder does not support them yet. Signed-off-by: Justin Ruggles --- libavformat/mov.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/libavformat/mov.c b/libavformat/mov.c index 747f062833..04deef6a16 100644 --- a/libavformat/mov.c +++ b/libavformat/mov.c @@ -565,6 +565,34 @@ static int mov_read_dac3(MOVContext *c, AVIOContext *pb, MOVAtom atom) return 0; } +static int mov_read_dec3(MOVContext *c, AVIOContext *pb, MOVAtom atom) +{ + AVStream *st; + int eac3info, acmod, lfeon, bsmod; + + if (c->fc->nb_streams < 1) + return 0; + st = c->fc->streams[c->fc->nb_streams-1]; + + /* No need to parse fields for additional independent substreams and its + * associated dependent substreams since libavcodec's E-AC-3 decoder + * does not support them yet. */ + avio_rb16(pb); /* data_rate and num_ind_sub */ + eac3info = avio_rb24(pb); + bsmod = (eac3info >> 12) & 0x1f; + acmod = (eac3info >> 9) & 0x7; + lfeon = (eac3info >> 8) & 0x1; + st->codec->channel_layout = avpriv_ac3_channel_layout_tab[acmod]; + if (lfeon) + st->codec->channel_layout |= AV_CH_LOW_FREQUENCY; + st->codec->channels = av_get_channel_layout_nb_channels(st->codec->channel_layout); + st->codec->audio_service_type = bsmod; + if (st->codec->channels > 1 && bsmod == 0x7) + st->codec->audio_service_type = AV_AUDIO_SERVICE_TYPE_KARAOKE; + + return 0; +} + static int mov_read_chan(MOVContext *c, AVIOContext *pb, MOVAtom atom) { AVStream *st; @@ -2472,6 +2500,7 @@ static const MOVParseTableEntry mov_default_parse_table[] = { { MKTAG('w','a','v','e'), mov_read_wave }, { MKTAG('e','s','d','s'), mov_read_esds }, { MKTAG('d','a','c','3'), mov_read_dac3 }, /* AC-3 info */ +{ MKTAG('d','e','c','3'), mov_read_dec3 }, /* EAC-3 info */ { MKTAG('w','i','d','e'), mov_read_wide }, /* place holder */ { MKTAG('w','f','e','x'), mov_read_wfex }, { MKTAG('c','m','o','v'), mov_read_cmov }, From e1e146a2d139bbc7e10b6ade68bf99abf285d6ad Mon Sep 17 00:00:00 2001 From: Luca Barbato Date: Fri, 27 Apr 2012 10:53:04 -0700 Subject: [PATCH 2/7] avio: make avio_close(NULL) a no-op Its behaviour in line with ffurl_close(NULL). --- libavformat/aviobuf.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/libavformat/aviobuf.c b/libavformat/aviobuf.c index 01a36475b2..0353a17379 100644 --- a/libavformat/aviobuf.c +++ b/libavformat/aviobuf.c @@ -756,8 +756,12 @@ int avio_open2(AVIOContext **s, const char *filename, int flags, int avio_close(AVIOContext *s) { - URLContext *h = s->opaque; + URLContext *h; + if (!s) + return 0; + + h = s->opaque; av_free(s->buffer); av_free(s); return ffurl_close(h); From 3b52e9da10158029edbd01bafe2235e653e6eaec Mon Sep 17 00:00:00 2001 From: Luca Barbato Date: Fri, 27 Apr 2012 11:09:30 -0700 Subject: [PATCH 3/7] segment: reorder seg_write_header allocation As pointed by Paul B Mahol the previous code could lead to null pointer dereference. --- libavformat/segment.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/libavformat/segment.c b/libavformat/segment.c index 1af412ad53..8274792bf3 100644 --- a/libavformat/segment.c +++ b/libavformat/segment.c @@ -113,10 +113,15 @@ static int seg_write_header(AVFormatContext *s) seg->offset_time = 0; seg->recording_time = seg->time * 1000000; + oc = avformat_alloc_context(); + + if (!oc) + return AVERROR(ENOMEM); + if (seg->list) if ((ret = avio_open2(&seg->pb, seg->list, AVIO_FLAG_WRITE, &s->interrupt_callback, NULL)) < 0) - return ret; + goto fail; for (i = 0; i< s->nb_streams; i++) seg->has_video += @@ -127,13 +132,6 @@ static int seg_write_header(AVFormatContext *s) "More than a single video stream present, " "expect issues decoding it.\n"); - oc = avformat_alloc_context(); - - if (!oc) { - ret = AVERROR(ENOMEM); - goto fail; - } - oc->oformat = av_guess_format(seg->format, s->filename, NULL); if (!oc->oformat) { From a7fa5ce671d31d77ecb8b8d302f8df8e6e0768f6 Mon Sep 17 00:00:00 2001 From: Jordi Ortiz Date: Thu, 26 Apr 2012 21:14:07 +0200 Subject: [PATCH 4/7] libschroedingerdec: check malloc Signed-off-by: Diego Biurrun --- libavcodec/libschroedingerdec.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/libavcodec/libschroedingerdec.c b/libavcodec/libschroedingerdec.c index f573d8a1e6..68d9a255ef 100644 --- a/libavcodec/libschroedingerdec.c +++ b/libavcodec/libschroedingerdec.c @@ -106,6 +106,11 @@ static SchroBuffer *FindNextSchroParseUnit(SchroParseUnitContext *parse_ctx) return NULL; in_buf = av_malloc(next_pu_offset); + if (!in_buf) { + av_log(parse_ctx, AV_LOG_ERROR, "Unable to allocate input buffer\n"); + return NULL; + } + memcpy(in_buf, parse_ctx->buf, next_pu_offset); enc_buf = schro_buffer_new_with_data(in_buf, next_pu_offset); enc_buf->free = libschroedinger_decode_buffer_free; From 444f47b55c17f8e1207caeb67f3c529a23e3ae61 Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Fri, 27 Apr 2012 22:19:37 +0200 Subject: [PATCH 5/7] h264: (trivial) remove unneeded macro argument in x86/cabac.h Signed-off-by: Ronald S. Bultje --- libavcodec/x86/cabac.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libavcodec/x86/cabac.h b/libavcodec/x86/cabac.h index a6ec22831d..c1fc0d1139 100644 --- a/libavcodec/x86/cabac.h +++ b/libavcodec/x86/cabac.h @@ -27,7 +27,7 @@ #include "config.h" #if HAVE_FAST_CMOV -#define BRANCHLESS_GET_CABAC_UPDATE(ret, statep, low, range, tmp)\ +#define BRANCHLESS_GET_CABAC_UPDATE(ret, low, range, tmp)\ "mov "tmp" , %%ecx \n\t"\ "shl $17 , "tmp" \n\t"\ "cmp "low" , "tmp" \n\t"\ @@ -37,7 +37,7 @@ "xor %%ecx , "ret" \n\t"\ "sub "tmp" , "low" \n\t" #else /* HAVE_FAST_CMOV */ -#define BRANCHLESS_GET_CABAC_UPDATE(ret, statep, low, range, tmp)\ +#define BRANCHLESS_GET_CABAC_UPDATE(ret, low, range, tmp)\ "mov "tmp" , %%ecx \n\t"\ "shl $17 , "tmp" \n\t"\ "sub "low" , "tmp" \n\t"\ @@ -57,7 +57,7 @@ "and $0xC0 , "range" \n\t"\ "movzbl "MANGLE(ff_h264_lps_range)"("ret", "range", 2), "range" \n\t"\ "sub "range" , "tmp" \n\t"\ - BRANCHLESS_GET_CABAC_UPDATE(ret, statep, low, range, tmp) \ + BRANCHLESS_GET_CABAC_UPDATE(ret, low, range, tmp) \ "movzbl " MANGLE(ff_h264_norm_shift) "("range"), %%ecx \n\t"\ "shl %%cl , "range" \n\t"\ "movzbl "MANGLE(ff_h264_mlps_state)"+128("ret"), "tmp" \n\t"\ From 14e9ffc1e41424a530c83310611979c0d246417b Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Fri, 27 Apr 2012 22:19:38 +0200 Subject: [PATCH 6/7] h264: use one table instead of several for cabac functions The reason is this is easier for PIC code (in particular on darwin...). Keep the old names as pointers (static in cabac_functions.h so gcc knows these are just immediate offsets) so the c code can nicely stay the same (alternatively could use offsets directly in the functions needing the tables). This should produce the same code as before with non-pic and better code (confirmed) with pic. The assembly uses the new table but still won't work for PIC case. Signed-off-by: Ronald S. Bultje --- libavcodec/cabac.c | 54 +++++++++++++++++++++--------------- libavcodec/cabac.h | 5 ++++ libavcodec/cabac_functions.h | 8 ++++-- libavcodec/h264_cabac.c | 9 +----- libavcodec/x86/cabac.h | 18 +++++++----- libavcodec/x86/h264_i386.h | 24 +++++++++++----- 6 files changed, 70 insertions(+), 48 deletions(-) diff --git a/libavcodec/cabac.c b/libavcodec/cabac.c index 4afcafb52b..bd7d9494b3 100644 --- a/libavcodec/cabac.c +++ b/libavcodec/cabac.c @@ -31,6 +31,29 @@ #include "cabac.h" #include "cabac_functions.h" +uint8_t ff_h264_cabac_tables[512 + 4*2*64 + 4*64 + 63] = { + 9,8,7,7,6,6,6,6,5,5,5,5,5,5,5,5, + 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +}; + static const uint8_t lps_range[64][4]= { {128,176,208,240}, {128,167,197,227}, {128,158,187,216}, {123,150,178,205}, {116,142,169,195}, {111,135,160,185}, {105,128,152,175}, {100,122,144,166}, @@ -50,8 +73,6 @@ static const uint8_t lps_range[64][4]= { { 6, 8, 9, 11}, { 6, 7, 9, 10}, { 6, 7, 8, 9}, { 2, 2, 2, 2}, }; -uint8_t ff_h264_mlps_state[4*64]; -uint8_t ff_h264_lps_range[4*2*64]; static uint8_t h264_mps_state[2 * 64]; static const uint8_t mps_state[64]= { @@ -76,27 +97,11 @@ static const uint8_t lps_state[64]= { 36,36,37,37,37,38,38,63, }; -const uint8_t ff_h264_norm_shift[512]= { - 9,8,7,7,6,6,6,6,5,5,5,5,5,5,5,5, - 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +static const uint8_t last_coeff_flag_offset_8x8[63] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8 }; /** @@ -153,6 +158,9 @@ void ff_init_cabac_states(CABACContext *c){ ff_h264_mlps_state[128-2*i-2]= 0; } } + for(i=0; i< 63; i++){ + ff_h264_last_coeff_flag_offset_8x8[i] = last_coeff_flag_offset_8x8[i]; + } } #ifdef TEST diff --git a/libavcodec/cabac.h b/libavcodec/cabac.h index 5a99f0b2fe..1f1c943262 100644 --- a/libavcodec/cabac.h +++ b/libavcodec/cabac.h @@ -31,6 +31,11 @@ #include "put_bits.h" +#define H264_NORM_SHIFT_OFFSET 0 +#define H264_LPS_RANGE_OFFSET 512 +#define H264_MLPS_STATE_OFFSET 1024 +#define H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET 1280 + #define CABAC_BITS 16 #define CABAC_MASK ((1<low), "+&r"(c->range), "=&q"(tmp) : "r"(state), "r"(c), "i"(offsetof(CABACContext, bytestream)), - "i"(offsetof(CABACContext, bytestream_end)) + "i"(offsetof(CABACContext, bytestream_end)), + "i"(H264_NORM_SHIFT_OFFSET), + "i"(H264_LPS_RANGE_OFFSET), + "i"(H264_MLPS_STATE_OFFSET) : "%"REG_c, "memory" ); return bit & 1; diff --git a/libavcodec/x86/h264_i386.h b/libavcodec/x86/h264_i386.h index e849a3d90c..add795e285 100644 --- a/libavcodec/x86/h264_i386.h +++ b/libavcodec/x86/h264_i386.h @@ -45,12 +45,13 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, int minusindex= 4-(intptr_t)index; int bit; x86_reg coeff_count; + __asm__ volatile( "3: \n\t" BRANCHLESS_GET_CABAC("%4", "(%1)", "%3", "%w3", "%5", "%k0", "%b0", - "%a11(%6)", "%a12(%6)") + "%a11(%6)", "%a12(%6)", "%a13", "%a14", "%a15") "test $1, %4 \n\t" " jz 4f \n\t" @@ -58,7 +59,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, BRANCHLESS_GET_CABAC("%4", "(%1)", "%3", "%w3", "%5", "%k0", "%b0", - "%a11(%6)", "%a12(%6)") + "%a11(%6)", "%a12(%6)", "%a13", "%a14", "%a15") "sub %10, %1 \n\t" "mov %2, %0 \n\t" @@ -86,7 +87,10 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, "+&r"(c->low), "=&r"(bit), "+&r"(c->range) : "r"(c), "m"(minusstart), "m"(end), "m"(minusindex), "m"(last_off), "i"(offsetof(CABACContext, bytestream)), - "i"(offsetof(CABACContext, bytestream_end)) + "i"(offsetof(CABACContext, bytestream_end)), + "i"(H264_NORM_SHIFT_OFFSET), + "i"(H264_LPS_RANGE_OFFSET), + "i"(H264_MLPS_STATE_OFFSET) : "%"REG_c, "memory" ); return coeff_count; @@ -100,6 +104,7 @@ static int decode_significance_8x8_x86(CABACContext *c, x86_reg coeff_count; x86_reg last=0; x86_reg state; + __asm__ volatile( "mov %1, %6 \n\t" "3: \n\t" @@ -110,18 +115,19 @@ static int decode_significance_8x8_x86(CABACContext *c, BRANCHLESS_GET_CABAC("%4", "(%6)", "%3", "%w3", "%5", "%k0", "%b0", - "%a12(%7)", "%a13(%7)") + "%a12(%7)", "%a13(%7)", "%a14", "%a15", "%a16") "mov %1, %k6 \n\t" "test $1, %4 \n\t" " jz 4f \n\t" - "movzbl "MANGLE(last_coeff_flag_offset_8x8)"(%k6), %k6\n\t" + "movzbl "MANGLE(ff_h264_cabac_tables)"+%a17(%k6), %k6\n\t" + "add %11, %6 \n\t" BRANCHLESS_GET_CABAC("%4", "(%6)", "%3", "%w3", "%5", "%k0", "%b0", - "%a12(%7)", "%a13(%7)") + "%a12(%7)", "%a13(%7)", "%a14", "%a15", "%a16") "mov %2, %0 \n\t" "mov %1, %k6 \n\t" @@ -147,7 +153,11 @@ static int decode_significance_8x8_x86(CABACContext *c, : "r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), "m"(sig_off), "m"(last_coeff_ctx_base), "i"(offsetof(CABACContext, bytestream)), - "i"(offsetof(CABACContext, bytestream_end)) + "i"(offsetof(CABACContext, bytestream_end)), + "i"(H264_NORM_SHIFT_OFFSET), + "i"(H264_LPS_RANGE_OFFSET), + "i"(H264_MLPS_STATE_OFFSET), + "i"(H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET) : "%"REG_c, "memory" ); return coeff_count; From 9b9df1cdff149db5bbe6726b236934c5b5fbe21d Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Fri, 27 Apr 2012 22:19:39 +0200 Subject: [PATCH 7/7] h264: new assembly version of get_cabac for x86_64 with PIC This adds a hand-optimized assembly version for get_cabac much like the existing one, but it works if the table offsets are RIP-relative. Compared to the non-RIP-relative version this adds 2 lea instructions and it needs one extra register. get_cabac() gets about 40% faster, for an overall speedup of about 5%. Signed-off-by: Ronald S. Bultje --- libavcodec/h264_cabac.c | 2 +- libavcodec/x86/cabac.h | 90 +++++++++++++++++++++++++++++++++++--- libavcodec/x86/h264_i386.h | 53 +++++++++++++++------- 3 files changed, 121 insertions(+), 24 deletions(-) diff --git a/libavcodec/h264_cabac.c b/libavcodec/h264_cabac.c index f27e72faf0..08a6a5b15d 100644 --- a/libavcodec/h264_cabac.c +++ b/libavcodec/h264_cabac.c @@ -1652,7 +1652,7 @@ decode_cabac_residual_internal(H264Context *h, DCTELEM *block, index[coeff_count++] = last;\ } const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD]; -#if ARCH_X86 && HAVE_7REGS && !defined(BROKEN_RELOCATIONS) +#if ARCH_X86 && HAVE_7REGS coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, last_coeff_ctx_base, sig_off); } else { diff --git a/libavcodec/x86/cabac.h b/libavcodec/x86/cabac.h index 32ce2b2762..6fc2ddb4dd 100644 --- a/libavcodec/x86/cabac.h +++ b/libavcodec/x86/cabac.h @@ -27,6 +27,71 @@ #include "libavutil/internal.h" #include "config.h" +#ifdef BROKEN_RELOCATIONS +#define TABLES_ARG , "r"(tables) + +#if HAVE_FAST_CMOV +#define BRANCHLESS_GET_CABAC_UPDATE(ret, retq, low, range, tmp) \ + "cmp "low" , "tmp" \n\t"\ + "cmova %%ecx , "range" \n\t"\ + "sbb %%rcx , %%rcx \n\t"\ + "and %%ecx , "tmp" \n\t"\ + "xor %%rcx , "retq" \n\t"\ + "sub "tmp" , "low" \n\t" +#else /* HAVE_FAST_CMOV */ +#define BRANCHLESS_GET_CABAC_UPDATE(ret, retq, low, range, tmp) \ +/* P4 Prescott has crappy cmov,sbb,64bit shift so avoid them */ \ + "sub "low" , "tmp" \n\t"\ + "sar $31 , "tmp" \n\t"\ + "sub %%ecx , "range" \n\t"\ + "and "tmp" , "range" \n\t"\ + "add %%ecx , "range" \n\t"\ + "shl $17 , %%ecx \n\t"\ + "and "tmp" , %%ecx \n\t"\ + "sub %%ecx , "low" \n\t"\ + "xor "tmp" , "ret" \n\t"\ + "movslq "ret" , "retq" \n\t" +#endif /* HAVE_FAST_CMOV */ + +#define BRANCHLESS_GET_CABAC(ret, retq, statep, low, lowword, range, rangeq, tmp, tmpbyte, byte, end, norm_off, lps_off, mlps_off, tables) \ + "movzbl "statep" , "ret" \n\t"\ + "mov "range" , "tmp" \n\t"\ + "and $0xC0 , "range" \n\t"\ + "lea ("ret", "range", 2), %%ecx \n\t"\ + "movzbl "lps_off"("tables", %%rcx), "range" \n\t"\ + "sub "range" , "tmp" \n\t"\ + "mov "tmp" , %%ecx \n\t"\ + "shl $17 , "tmp" \n\t"\ + BRANCHLESS_GET_CABAC_UPDATE(ret, retq, low, range, tmp) \ + "movzbl "norm_off"("tables", "rangeq"), %%ecx \n\t"\ + "shl %%cl , "range" \n\t"\ + "movzbl "mlps_off"+128("tables", "retq"), "tmp" \n\t"\ + "shl %%cl , "low" \n\t"\ + "mov "tmpbyte" , "statep" \n\t"\ + "test "lowword" , "lowword" \n\t"\ + "jnz 2f \n\t"\ + "mov "byte" , %%"REG_c" \n\t"\ + "cmp "end" , %%"REG_c" \n\t"\ + "jge 1f \n\t"\ + "add"OPSIZE" $2 , "byte" \n\t"\ + "1: \n\t"\ + "movzwl (%%"REG_c") , "tmp" \n\t"\ + "lea -1("low") , %%ecx \n\t"\ + "xor "low" , %%ecx \n\t"\ + "shr $15 , %%ecx \n\t"\ + "bswap "tmp" \n\t"\ + "shr $15 , "tmp" \n\t"\ + "movzbl "norm_off"("tables", %%rcx), %%ecx \n\t"\ + "sub $0xFFFF , "tmp" \n\t"\ + "neg %%ecx \n\t"\ + "add $7 , %%ecx \n\t"\ + "shl %%cl , "tmp" \n\t"\ + "add "tmp" , "low" \n\t"\ + "2: \n\t" + +#else /* BROKEN_RELOCATIONS */ +#define TABLES_ARG + #if HAVE_FAST_CMOV #define BRANCHLESS_GET_CABAC_UPDATE(ret, low, range, tmp)\ "mov "tmp" , %%ecx \n\t"\ @@ -52,7 +117,7 @@ "xor "tmp" , "ret" \n\t" #endif /* HAVE_FAST_CMOV */ -#define BRANCHLESS_GET_CABAC(ret, statep, low, lowword, range, tmp, tmpbyte, byte, end, norm_off, lps_off, mlps_off) \ +#define BRANCHLESS_GET_CABAC(ret, retq, statep, low, lowword, range, rangeq, tmp, tmpbyte, byte, end, norm_off, lps_off, mlps_off, tables) \ "movzbl "statep" , "ret" \n\t"\ "mov "range" , "tmp" \n\t"\ "and $0xC0 , "range" \n\t"\ @@ -85,29 +150,40 @@ "add "tmp" , "low" \n\t"\ "2: \n\t" -#if HAVE_7REGS && !defined(BROKEN_RELOCATIONS) +#endif /* BROKEN_RELOCATIONS */ + + +#if HAVE_7REGS #define get_cabac_inline get_cabac_inline_x86 static av_always_inline int get_cabac_inline_x86(CABACContext *c, uint8_t *const state) { int bit, tmp; +#ifdef BROKEN_RELOCATIONS + void *tables; __asm__ volatile( - BRANCHLESS_GET_CABAC("%0", "(%4)", "%1", "%w1", - "%2", "%3", "%b3", - "%a6(%5)", "%a7(%5)", "%a8", "%a9", "%a10") + "lea "MANGLE(ff_h264_cabac_tables)", %0 \n\t" + : "=&r"(tables) + ); +#endif + + __asm__ volatile( + BRANCHLESS_GET_CABAC("%0", "%q0", "(%4)", "%1", "%w1", + "%2", "%q2", "%3", "%b3", + "%a6(%5)", "%a7(%5)", "%a8", "%a9", "%a10", "%11") : "=&r"(bit), "+&r"(c->low), "+&r"(c->range), "=&q"(tmp) : "r"(state), "r"(c), "i"(offsetof(CABACContext, bytestream)), "i"(offsetof(CABACContext, bytestream_end)), "i"(H264_NORM_SHIFT_OFFSET), "i"(H264_LPS_RANGE_OFFSET), - "i"(H264_MLPS_STATE_OFFSET) + "i"(H264_MLPS_STATE_OFFSET) TABLES_ARG : "%"REG_c, "memory" ); return bit & 1; } -#endif /* HAVE_7REGS && !defined(BROKEN_RELOCATIONS) */ +#endif /* HAVE_7REGS */ #define get_cabac_bypass_sign get_cabac_bypass_sign_x86 static av_always_inline int get_cabac_bypass_sign_x86(CABACContext *c, int val) diff --git a/libavcodec/x86/h264_i386.h b/libavcodec/x86/h264_i386.h index add795e285..10ea32e0b8 100644 --- a/libavcodec/x86/h264_i386.h +++ b/libavcodec/x86/h264_i386.h @@ -36,7 +36,7 @@ //FIXME use some macros to avoid duplicating get_cabac (cannot be done yet //as that would make optimization work hard) -#if HAVE_7REGS && !defined(BROKEN_RELOCATIONS) +#if HAVE_7REGS static int decode_significance_x86(CABACContext *c, int max_coeff, uint8_t *significant_coeff_ctx_base, int *index, x86_reg last_off){ @@ -46,20 +46,29 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, int bit; x86_reg coeff_count; +#ifdef BROKEN_RELOCATIONS + void *tables; + + __asm__ volatile( + "lea "MANGLE(ff_h264_cabac_tables)", %0 \n\t" + : "=&r"(tables) + ); +#endif + __asm__ volatile( "3: \n\t" - BRANCHLESS_GET_CABAC("%4", "(%1)", "%3", "%w3", - "%5", "%k0", "%b0", - "%a11(%6)", "%a12(%6)", "%a13", "%a14", "%a15") + BRANCHLESS_GET_CABAC("%4", "%q4", "(%1)", "%3", "%w3", + "%5", "%q5", "%k0", "%b0", + "%a11(%6)", "%a12(%6)", "%a13", "%a14", "%a15", "%16") "test $1, %4 \n\t" " jz 4f \n\t" "add %10, %1 \n\t" - BRANCHLESS_GET_CABAC("%4", "(%1)", "%3", "%w3", - "%5", "%k0", "%b0", - "%a11(%6)", "%a12(%6)", "%a13", "%a14", "%a15") + BRANCHLESS_GET_CABAC("%4", "%q4", "(%1)", "%3", "%w3", + "%5", "%q5", "%k0", "%b0", + "%a11(%6)", "%a12(%6)", "%a13", "%a14", "%a15", "%16") "sub %10, %1 \n\t" "mov %2, %0 \n\t" @@ -90,7 +99,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, "i"(offsetof(CABACContext, bytestream_end)), "i"(H264_NORM_SHIFT_OFFSET), "i"(H264_LPS_RANGE_OFFSET), - "i"(H264_MLPS_STATE_OFFSET) + "i"(H264_MLPS_STATE_OFFSET) TABLES_ARG : "%"REG_c, "memory" ); return coeff_count; @@ -105,6 +114,15 @@ static int decode_significance_8x8_x86(CABACContext *c, x86_reg last=0; x86_reg state; +#ifdef BROKEN_RELOCATIONS + void *tables; + + __asm__ volatile( + "lea "MANGLE(ff_h264_cabac_tables)", %0 \n\t" + : "=&r"(tables) + ); +#endif + __asm__ volatile( "mov %1, %6 \n\t" "3: \n\t" @@ -113,21 +131,24 @@ static int decode_significance_8x8_x86(CABACContext *c, "movzbl (%0, %6), %k6 \n\t" "add %9, %6 \n\t" - BRANCHLESS_GET_CABAC("%4", "(%6)", "%3", "%w3", - "%5", "%k0", "%b0", - "%a12(%7)", "%a13(%7)", "%a14", "%a15", "%a16") + BRANCHLESS_GET_CABAC("%4", "%q4", "(%6)", "%3", "%w3", + "%5", "%q5", "%k0", "%b0", + "%a12(%7)", "%a13(%7)", "%a14", "%a15", "%a16", "%18") "mov %1, %k6 \n\t" "test $1, %4 \n\t" " jz 4f \n\t" +#ifdef BROKEN_RELOCATIONS + "movzbl %a17(%18, %q6), %k6\n\t" +#else "movzbl "MANGLE(ff_h264_cabac_tables)"+%a17(%k6), %k6\n\t" - +#endif "add %11, %6 \n\t" - BRANCHLESS_GET_CABAC("%4", "(%6)", "%3", "%w3", - "%5", "%k0", "%b0", - "%a12(%7)", "%a13(%7)", "%a14", "%a15", "%a16") + BRANCHLESS_GET_CABAC("%4", "%q4", "(%6)", "%3", "%w3", + "%5", "%q5", "%k0", "%b0", + "%a12(%7)", "%a13(%7)", "%a14", "%a15", "%a16", "%18") "mov %2, %0 \n\t" "mov %1, %k6 \n\t" @@ -157,7 +178,7 @@ static int decode_significance_8x8_x86(CABACContext *c, "i"(H264_NORM_SHIFT_OFFSET), "i"(H264_LPS_RANGE_OFFSET), "i"(H264_MLPS_STATE_OFFSET), - "i"(H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET) + "i"(H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET) TABLES_ARG : "%"REG_c, "memory" ); return coeff_count;