diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c index cf054550c1..00766c25d0 100644 --- a/libavcodec/nvenc.c +++ b/libavcodec/nvenc.c @@ -644,16 +644,34 @@ static void nvenc_override_rate_control(AVCodecContext *avctx) static av_cold int nvenc_recalc_surfaces(AVCodecContext *avctx) { NvencContext *ctx = avctx->priv_data; - int nb_surfaces = 0; + // default minimum of 4 surfaces + // multiply by 2 for number of NVENCs on gpu (hardcode to 2) + // another multiply by 2 to avoid blocking next PBB group + int nb_surfaces = FFMAX(4, ctx->encode_config.frameIntervalP * 2 * 2); + // lookahead enabled if (ctx->rc_lookahead > 0) { - nb_surfaces = ctx->rc_lookahead + ((ctx->encode_config.frameIntervalP > 0) ? ctx->encode_config.frameIntervalP : 0) + 1 + 4; - if (ctx->nb_surfaces < nb_surfaces) { + // +1 is to account for lkd_bound calculation later + // +4 is to allow sufficient pipelining with lookahead + nb_surfaces = FFMAX(1, FFMAX(nb_surfaces, ctx->rc_lookahead + ctx->encode_config.frameIntervalP + 1 + 4)); + if (nb_surfaces > ctx->nb_surfaces && ctx->nb_surfaces > 0) + { av_log(avctx, AV_LOG_WARNING, "Defined rc_lookahead requires more surfaces, " "increasing used surfaces %d -> %d\n", ctx->nb_surfaces, nb_surfaces); - ctx->nb_surfaces = nb_surfaces; } + ctx->nb_surfaces = FFMAX(nb_surfaces, ctx->nb_surfaces); + } else { + if (ctx->encode_config.frameIntervalP > 1 && ctx->nb_surfaces < nb_surfaces && ctx->nb_surfaces > 0) + { + av_log(avctx, AV_LOG_WARNING, + "Defined b-frame requires more surfaces, " + "increasing used surfaces %d -> %d\n", ctx->nb_surfaces, nb_surfaces); + ctx->nb_surfaces = FFMAX(ctx->nb_surfaces, nb_surfaces); + } + else if (ctx->nb_surfaces <= 0) + ctx->nb_surfaces = nb_surfaces; + // otherwise use user specified value } ctx->nb_surfaces = FFMAX(1, FFMIN(MAX_REGISTERED_FRAMES, ctx->nb_surfaces)); @@ -1086,6 +1104,7 @@ static av_cold int nvenc_alloc_surface(AVCodecContext *avctx, int idx) NvencContext *ctx = avctx->priv_data; NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs; NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs; + NvencSurface* tmp_surface = &ctx->surfaces[idx]; NVENCSTATUS nv_status; NV_ENC_CREATE_BITSTREAM_BUFFER allocOut = { 0 }; @@ -1121,8 +1140,6 @@ static av_cold int nvenc_alloc_surface(AVCodecContext *avctx, int idx) ctx->surfaces[idx].height = allocSurf.height; } - ctx->surfaces[idx].lockCount = 0; - /* 1MB is large enough to hold most output frames. * NVENC increases this automaticaly if it is not enough. */ allocOut.size = 1024 * 1024; @@ -1141,6 +1158,8 @@ static av_cold int nvenc_alloc_surface(AVCodecContext *avctx, int idx) ctx->surfaces[idx].output_surface = allocOut.bitstreamBuffer; ctx->surfaces[idx].size = allocOut.size; + av_fifo_generic_write(ctx->unused_surface_queue, &tmp_surface, sizeof(tmp_surface), NULL); + return 0; } @@ -1156,6 +1175,11 @@ static av_cold int nvenc_setup_surfaces(AVCodecContext *avctx) ctx->timestamp_list = av_fifo_alloc(ctx->nb_surfaces * sizeof(int64_t)); if (!ctx->timestamp_list) return AVERROR(ENOMEM); + + ctx->unused_surface_queue = av_fifo_alloc(ctx->nb_surfaces * sizeof(NvencSurface*)); + if (!ctx->unused_surface_queue) + return AVERROR(ENOMEM); + ctx->output_surface_queue = av_fifo_alloc(ctx->nb_surfaces * sizeof(NvencSurface*)); if (!ctx->output_surface_queue) return AVERROR(ENOMEM); @@ -1222,6 +1246,7 @@ av_cold int ff_nvenc_encode_close(AVCodecContext *avctx) av_fifo_freep(&ctx->timestamp_list); av_fifo_freep(&ctx->output_surface_ready_queue); av_fifo_freep(&ctx->output_surface_queue); + av_fifo_freep(&ctx->unused_surface_queue); if (ctx->surfaces && avctx->pix_fmt == AV_PIX_FMT_CUDA) { for (i = 0; i < ctx->nb_surfaces; ++i) { @@ -1305,16 +1330,14 @@ av_cold int ff_nvenc_encode_init(AVCodecContext *avctx) static NvencSurface *get_free_frame(NvencContext *ctx) { - int i; + NvencSurface *tmp_surf; - for (i = 0; i < ctx->nb_surfaces; i++) { - if (!ctx->surfaces[i].lockCount) { - ctx->surfaces[i].lockCount = 1; - return &ctx->surfaces[i]; - } - } + if (!(av_fifo_size(ctx->unused_surface_queue) > 0)) + // queue empty + return NULL; - return NULL; + av_fifo_generic_read(ctx->unused_surface_queue, &tmp_surf, sizeof(tmp_surf), NULL); + return tmp_surf; } static int nvenc_copy_frame(AVCodecContext *avctx, NvencSurface *nv_surface, @@ -1712,7 +1735,6 @@ int ff_nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt, } if (res) { - inSurf->lockCount = 0; return res; } @@ -1790,8 +1812,7 @@ int ff_nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt, if (res) return res; - av_assert0(tmpoutsurf->lockCount); - tmpoutsurf->lockCount--; + av_fifo_generic_write(ctx->unused_surface_queue, &tmpoutsurf, sizeof(tmpoutsurf), NULL); *got_packet = 1; } else { diff --git a/libavcodec/nvenc.h b/libavcodec/nvenc.h index 7dec5cc685..763647b283 100644 --- a/libavcodec/nvenc.h +++ b/libavcodec/nvenc.h @@ -44,7 +44,6 @@ typedef struct NvencSurface NV_ENC_OUTPUT_PTR output_surface; NV_ENC_BUFFER_FORMAT format; int size; - int lockCount; } NvencSurface; typedef struct NvencDynLoadFunctions @@ -110,6 +109,7 @@ typedef struct NvencContext int nb_surfaces; NvencSurface *surfaces; + AVFifoBuffer *unused_surface_queue; AVFifoBuffer *output_surface_queue; AVFifoBuffer *output_surface_ready_queue; AVFifoBuffer *timestamp_list; diff --git a/libavcodec/nvenc_h264.c b/libavcodec/nvenc_h264.c index 2c55b60789..8d44b1f350 100644 --- a/libavcodec/nvenc_h264.c +++ b/libavcodec/nvenc_h264.c @@ -79,8 +79,8 @@ static const AVOption options[] = { 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP }, 0, 0, VE, "rc" }, { "vbr_2pass", "Multi-pass variable bitrate mode", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_VBR }, 0, 0, VE, "rc" }, { "rc-lookahead", "Number of frames to look ahead for rate-control", - OFFSET(rc_lookahead), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, VE }, - { "surfaces", "Number of concurrent surfaces", OFFSET(nb_surfaces), AV_OPT_TYPE_INT, { .i64 = 32 }, 0, MAX_REGISTERED_FRAMES, VE }, + OFFSET(rc_lookahead), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE }, + { "surfaces", "Number of concurrent surfaces", OFFSET(nb_surfaces), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, MAX_REGISTERED_FRAMES, VE }, { "cbr", "Use cbr encoding mode", OFFSET(cbr), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE }, { "2pass", "Use 2pass encoding mode", OFFSET(twopass), AV_OPT_TYPE_BOOL, { .i64 = -1 }, -1, 1, VE }, { "gpu", "Selects which NVENC capable GPU to use. First GPU is 0, second is 1, and so on.", diff --git a/libavcodec/nvenc_hevc.c b/libavcodec/nvenc_hevc.c index c32ba4220b..6d6750a3d4 100644 --- a/libavcodec/nvenc_hevc.c +++ b/libavcodec/nvenc_hevc.c @@ -78,8 +78,8 @@ static const AVOption options[] = { 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP }, 0, 0, VE, "rc" }, { "vbr_2pass", "Multi-pass variable bitrate mode", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_VBR }, 0, 0, VE, "rc" }, { "rc-lookahead", "Number of frames to look ahead for rate-control", - OFFSET(rc_lookahead), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, VE }, - { "surfaces", "Number of concurrent surfaces", OFFSET(nb_surfaces), AV_OPT_TYPE_INT, { .i64 = 32 }, 0, MAX_REGISTERED_FRAMES, VE }, + OFFSET(rc_lookahead), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE }, + { "surfaces", "Number of concurrent surfaces", OFFSET(nb_surfaces), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, MAX_REGISTERED_FRAMES, VE }, { "cbr", "Use cbr encoding mode", OFFSET(cbr), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE }, { "2pass", "Use 2pass encoding mode", OFFSET(twopass), AV_OPT_TYPE_BOOL, { .i64 = -1 }, -1, 1, VE }, { "gpu", "Selects which NVENC capable GPU to use. First GPU is 0, second is 1, and so on.",