diff --git a/libavcodec/ffv1.c b/libavcodec/ffv1.c
index aefb70d602..f8556b01f1 100644
--- a/libavcodec/ffv1.c
+++ b/libavcodec/ffv1.c
@@ -53,6 +53,7 @@ av_cold int ffv1_common_init(AVCodecContext *avctx)
     s->last_picture.f = av_frame_alloc();
     if (!s->picture.f || !s->last_picture.f)
         return AVERROR(ENOMEM);
+
     ff_dsputil_init(&s->dsp, avctx);
 
     s->width  = avctx->width;
diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c
index 0add4ffa57..166bc259f0 100644
--- a/libavcodec/ffv1dec.c
+++ b/libavcodec/ffv1dec.c
@@ -938,12 +938,12 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac
             uint8_t *dst[4];
             ff_thread_await_progress(&f->last_picture, INT_MAX, 0);
             for (j = 0; j < 4; j++) {
-                int sh = (j==1 || j==2) ? f->chroma_h_shift : 0;
-                int sv = (j==1 || j==2) ? f->chroma_v_shift : 0;
-                dst[j] = p->data[j] + p->linesize[j]*
-                         (fs->slice_y>>sv) + (fs->slice_x>>sh);
-                src[j] = f->last_picture.f->data[j] + f->last_picture.f->linesize[j]*
-                         (fs->slice_y>>sv) + (fs->slice_x>>sh);
+                int sh = (j == 1 || j == 2) ? f->chroma_h_shift : 0;
+                int sv = (j == 1 || j == 2) ? f->chroma_v_shift : 0;
+                dst[j] = p->data[j] + p->linesize[j] *
+                         (fs->slice_y >> sv) + (fs->slice_x >> sh);
+                src[j] = f->last_picture.f->data[j] + f->last_picture.f->linesize[j] *
+                         (fs->slice_y >> sv) + (fs->slice_x >> sh);
             }
             av_image_copy(dst, p->linesize, (const uint8_t **)src,
                           f->last_picture.f->linesize,
diff --git a/libavcodec/ffv1enc.c b/libavcodec/ffv1enc.c
index 246a0b551a..2c87d7ae86 100644
--- a/libavcodec/ffv1enc.c
+++ b/libavcodec/ffv1enc.c
@@ -830,6 +830,12 @@ static av_cold int encode_init(AVCodecContext *avctx)
     if ((ret = ffv1_allocate_initial_states(s)) < 0)
         return ret;
 
+    avctx->coded_frame = av_frame_alloc();
+    if (!avctx->coded_frame)
+        return AVERROR(ENOMEM);
+
+    avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
+
     if (!s->transparency)
         s->plane_count = 2;
     if (!s->chroma_planes && s->version > 3)
@@ -1000,7 +1006,7 @@ static int encode_slice(AVCodecContext *c, void *arg)
     int height       = fs->slice_height;
     int x            = fs->slice_x;
     int y            = fs->slice_y;
-    AVFrame *const p = f->picture.f;
+    const AVFrame *const p = f->picture.f;
     const int ps     = av_pix_fmt_desc_get(c->pix_fmt)->comp[0].step_minus1 + 1;
     int ret;
     RangeCoder c_bak = fs->c;
@@ -1008,7 +1014,7 @@ static int encode_slice(AVCodecContext *c, void *arg)
     fs->slice_coding_mode = 0;
 
 retry:
-    if (p->key_frame)
+    if (c->coded_frame->key_frame)
         ffv1_clear_slice_state(f, fs);
     if (f->version > 2) {
         encode_slice_header(f, fs);
@@ -1129,16 +1135,16 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     av_frame_unref(p);
     if ((ret = av_frame_ref(p, pict)) < 0)
         return ret;
-    p->pict_type = AV_PICTURE_TYPE_I;
+    avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
 
     if (avctx->gop_size == 0 || f->picture_number % avctx->gop_size == 0) {
         put_rac(c, &keystate, 1);
-        p->key_frame = 1;
+        avctx->coded_frame->key_frame = 1;
         f->gob_count++;
         write_header(f);
     } else {
         put_rac(c, &keystate, 0);
-        p->key_frame = 0;
+        avctx->coded_frame->key_frame = 0;
     }
 
     if (f->ac > 1) {
@@ -1195,12 +1201,19 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     pkt->size   = buf_p - pkt->data;
     pkt->pts    =
     pkt->dts    = pict->pts;
-    pkt->flags |= AV_PKT_FLAG_KEY * p->key_frame;
+    pkt->flags |= AV_PKT_FLAG_KEY * avctx->coded_frame->key_frame;
     *got_packet = 1;
 
     return 0;
 }
 
+static av_cold int encode_close(AVCodecContext *avctx)
+{
+    av_frame_free(&avctx->coded_frame);
+    ffv1_close(avctx);
+    return 0;
+}
+
 #define OFFSET(x) offsetof(FFV1Context, x)
 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 static const AVOption options[] = {
@@ -1228,7 +1241,7 @@ AVCodec ff_ffv1_encoder = {
     .priv_data_size = sizeof(FFV1Context),
     .init           = encode_init,
     .encode2        = encode_frame,
-    .close          = ffv1_close,
+    .close          = encode_close,
     .capabilities   = CODEC_CAP_SLICE_THREADS | CODEC_CAP_DELAY,
     .pix_fmts       = (const enum AVPixelFormat[]) {
         AV_PIX_FMT_YUV420P,   AV_PIX_FMT_YUVA420P,  AV_PIX_FMT_YUVA422P,  AV_PIX_FMT_YUV444P,
diff --git a/libavcodec/huffyuv.h b/libavcodec/huffyuv.h
index e34b562b76..e2cacc1ae4 100644
--- a/libavcodec/huffyuv.h
+++ b/libavcodec/huffyuv.h
@@ -78,7 +78,6 @@ typedef struct HYuvContext {
     uint32_t bits[3][256];
     uint32_t pix_bgr_map[1<<VLC_BITS];
     VLC vlc[6];                             //Y,U,V,YY,YU,YV
-    AVFrame picture;
     uint8_t *bitstream_buffer;
     unsigned int bitstream_buffer_size;
     DSPContext dsp;
diff --git a/libavcodec/huffyuvdec.c b/libavcodec/huffyuvdec.c
index 9b904d4c81..3cfda9b767 100644
--- a/libavcodec/huffyuvdec.c
+++ b/libavcodec/huffyuvdec.c
@@ -256,7 +256,6 @@ static av_cold int decode_init(AVCodecContext *avctx)
     ff_huffyuv_common_init(avctx);
     memset(s->vlc, 0, 3 * sizeof(VLC));
 
-    avcodec_get_frame_defaults(&s->picture);
     s->interlaced = s->height > 288;
 
     s->bgr32 = 1;
diff --git a/libavcodec/huffyuvenc.c b/libavcodec/huffyuvenc.c
index c56e281014..3a55d543a9 100644
--- a/libavcodec/huffyuvenc.c
+++ b/libavcodec/huffyuvenc.c
@@ -156,7 +156,12 @@ static av_cold int encode_init(AVCodecContext *avctx)
     }
     s->version = 2;
 
-    avctx->coded_frame = &s->picture;
+    avctx->coded_frame = av_frame_alloc();
+    if (!avctx->coded_frame)
+        return AVERROR(ENOMEM);
+
+    avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
+    avctx->coded_frame->key_frame = 1;
 
     switch (avctx->pix_fmt) {
     case AV_PIX_FMT_YUV420P:
@@ -446,16 +451,12 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     const int fake_ystride = s->interlaced ? pict->linesize[0]*2  : pict->linesize[0];
     const int fake_ustride = s->interlaced ? pict->linesize[1]*2  : pict->linesize[1];
     const int fake_vstride = s->interlaced ? pict->linesize[2]*2  : pict->linesize[2];
-    AVFrame * const p = &s->picture;
+    const AVFrame * const p = pict;
     int i, j, size = 0, ret;
 
     if ((ret = ff_alloc_packet2(avctx, pkt, width * height * 3 * 4 + FF_MIN_BUFFER_SIZE)) < 0)
         return ret;
 
-    *p = *pict;
-    p->pict_type = AV_PICTURE_TYPE_I;
-    p->key_frame = 1;
-
     if (s->context) {
         for (i = 0; i < 3; i++) {
             ff_huff_gen_len_table(s->len[i], s->stats[i]);
@@ -681,6 +682,8 @@ static av_cold int encode_end(AVCodecContext *avctx)
     av_freep(&avctx->extradata);
     av_freep(&avctx->stats_out);
 
+    av_frame_free(&avctx->coded_frame);
+
     return 0;
 }
 
diff --git a/libavcodec/jpegls.h b/libavcodec/jpegls.h
index 2dc3832517..10ae054a7b 100644
--- a/libavcodec/jpegls.h
+++ b/libavcodec/jpegls.h
@@ -33,7 +33,6 @@
 
 typedef struct JpeglsContext {
     AVCodecContext *avctx;
-    AVFrame picture;
 } JpeglsContext;
 
 typedef struct JLSState {
diff --git a/libavcodec/jpeglsenc.c b/libavcodec/jpeglsenc.c
index 308d6d3e68..030178f9ac 100644
--- a/libavcodec/jpeglsenc.c
+++ b/libavcodec/jpeglsenc.c
@@ -249,8 +249,7 @@ static void ls_store_lse(JLSState *state, PutBitContext *pb)
 static int encode_picture_ls(AVCodecContext *avctx, AVPacket *pkt,
                              const AVFrame *pict, int *got_packet)
 {
-    JpeglsContext *const s = avctx->priv_data;
-    AVFrame *const p       = &s->picture;
+    const AVFrame *const p = pict;
     const int near         = avctx->prediction_method;
     PutBitContext pb, pb2;
     GetBitContext gb;
@@ -259,10 +258,6 @@ static int encode_picture_ls(AVCodecContext *avctx, AVPacket *pkt,
     int i, size, ret;
     int comps;
 
-    *p           = *pict;
-    p->pict_type = AV_PICTURE_TYPE_I;
-    p->key_frame = 1;
-
     if (avctx->pix_fmt == AV_PIX_FMT_GRAY8 ||
         avctx->pix_fmt == AV_PIX_FMT_GRAY16)
         comps = 1;
@@ -349,7 +344,7 @@ static int encode_picture_ls(AVCodecContext *avctx, AVPacket *pkt,
                 Rc[j] = last[j];
             }
             last = cur;
-            cur += s->picture.linesize[0];
+            cur += p->linesize[0];
         }
     } else if (avctx->pix_fmt == AV_PIX_FMT_BGR24) {
         int j, width;
@@ -363,7 +358,7 @@ static int encode_picture_ls(AVCodecContext *avctx, AVPacket *pkt,
                 Rc[j] = last[j];
             }
             last = cur;
-            cur += s->picture.linesize[0];
+            cur += p->linesize[0];
         }
     }
 
@@ -403,12 +398,20 @@ static int encode_picture_ls(AVCodecContext *avctx, AVPacket *pkt,
     return 0;
 }
 
+static av_cold int encode_close(AVCodecContext *avctx)
+{
+    av_frame_free(&avctx->coded_frame);
+    return 0;
+}
+
 static av_cold int encode_init_ls(AVCodecContext *ctx)
 {
-    JpeglsContext *c = (JpeglsContext *)ctx->priv_data;
+    ctx->coded_frame = av_frame_alloc();
+    if (!ctx->coded_frame)
+        return AVERROR(ENOMEM);
 
-    c->avctx         = ctx;
-    ctx->coded_frame = &c->picture;
+    ctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
+    ctx->coded_frame->key_frame = 1;
 
     if (ctx->pix_fmt != AV_PIX_FMT_GRAY8  &&
         ctx->pix_fmt != AV_PIX_FMT_GRAY16 &&
@@ -426,8 +429,8 @@ AVCodec ff_jpegls_encoder = {
     .long_name      = NULL_IF_CONFIG_SMALL("JPEG-LS"),
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_JPEGLS,
-    .priv_data_size = sizeof(JpeglsContext),
     .init           = encode_init_ls,
+    .close          = encode_close,
     .encode2        = encode_picture_ls,
     .pix_fmts       = (const enum AVPixelFormat[]) {
         AV_PIX_FMT_BGR24, AV_PIX_FMT_RGB24,
diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h
index cffda4b4da..cd16b16f2a 100644
--- a/libavcodec/mpegvideo.h
+++ b/libavcodec/mpegvideo.h
@@ -775,6 +775,9 @@ typedef struct MpegEncContext {
     ERContext er;
 
     int error_rate;
+
+    /* temporary frames used by b_frame_strategy = 2 */
+    AVFrame *tmp_frames[MAX_B_FRAMES + 2];
 } MpegEncContext;
 
 #define REBASE_PICTURE(pic, new_ctx, old_ctx)             \
diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c
index 9288049470..25c48413be 100644
--- a/libavcodec/mpegvideo_enc.c
+++ b/libavcodec/mpegvideo_enc.c
@@ -237,7 +237,7 @@ av_cold int ff_dct_encode_init(MpegEncContext *s) {
 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
 {
     MpegEncContext *s = avctx->priv_data;
-    int i;
+    int i, ret;
     int chroma_h_shift, chroma_v_shift;
 
     MPV_encode_defaults(s);
@@ -894,12 +894,29 @@ av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
     FF_ENABLE_DEPRECATION_WARNINGS;
 #endif
 
+    if (avctx->b_frame_strategy == 2) {
+        for (i = 0; i < s->max_b_frames + 2; i++) {
+            s->tmp_frames[i] = av_frame_alloc();
+            if (!s->tmp_frames[i])
+                return AVERROR(ENOMEM);
+
+            s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
+            s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
+            s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
+
+            ret = av_frame_get_buffer(s->tmp_frames[i], 32);
+            if (ret < 0)
+                return ret;
+        }
+    }
+
     return 0;
 }
 
 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
 {
     MpegEncContext *s = avctx->priv_data;
+    int i;
 
     ff_rate_control_uninit(s);
 
@@ -910,6 +927,9 @@ av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
 
     av_freep(&avctx->extradata);
 
+    for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
+        av_frame_free(&s->tmp_frames[i]);
+
     return 0;
 }
 
@@ -1151,7 +1171,6 @@ static int estimate_best_b_count(MpegEncContext *s)
 {
     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
     AVCodecContext *c = avcodec_alloc_context3(NULL);
-    AVFrame input[MAX_B_FRAMES + 2];
     const int scale = s->avctx->brd_scale;
     int i, j, out_size, p_lambda, b_lambda, lambda2;
     int64_t best_rd  = INT64_MAX;
@@ -1186,19 +1205,9 @@ static int estimate_best_b_count(MpegEncContext *s)
         return -1;
 
     for (i = 0; i < s->max_b_frames + 2; i++) {
-        int ysize = c->width * c->height;
-        int csize = (c->width / 2) * (c->height / 2);
         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
                                                 s->next_picture_ptr;
 
-        avcodec_get_frame_defaults(&input[i]);
-        input[i].data[0]     = av_malloc(ysize + 2 * csize);
-        input[i].data[1]     = input[i].data[0] + ysize;
-        input[i].data[2]     = input[i].data[1] + csize;
-        input[i].linesize[0] = c->width;
-        input[i].linesize[1] =
-        input[i].linesize[2] = c->width / 2;
-
         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
             pre_input = *pre_input_ptr;
 
@@ -1208,13 +1217,13 @@ static int estimate_best_b_count(MpegEncContext *s)
                 pre_input.f.data[2] += INPLACE_OFFSET;
             }
 
-            s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0],
+            s->dsp.shrink[scale](s->tmp_frames[i]->data[0], s->tmp_frames[i]->linesize[0],
                                  pre_input.f.data[0], pre_input.f.linesize[0],
                                  c->width,      c->height);
-            s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1],
+            s->dsp.shrink[scale](s->tmp_frames[i]->data[1], s->tmp_frames[i]->linesize[1],
                                  pre_input.f.data[1], pre_input.f.linesize[1],
                                  c->width >> 1, c->height >> 1);
-            s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2],
+            s->dsp.shrink[scale](s->tmp_frames[i]->data[2], s->tmp_frames[i]->linesize[2],
                                  pre_input.f.data[2], pre_input.f.linesize[2],
                                  c->width >> 1, c->height >> 1);
         }
@@ -1228,21 +1237,21 @@ static int estimate_best_b_count(MpegEncContext *s)
 
         c->error[0] = c->error[1] = c->error[2] = 0;
 
-        input[0].pict_type = AV_PICTURE_TYPE_I;
-        input[0].quality   = 1 * FF_QP2LAMBDA;
+        s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
+        s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
 
-        out_size = encode_frame(c, &input[0]);
+        out_size = encode_frame(c, s->tmp_frames[0]);
 
         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
 
         for (i = 0; i < s->max_b_frames + 1; i++) {
             int is_p = i % (j + 1) == j || i == s->max_b_frames;
 
-            input[i + 1].pict_type = is_p ?
+            s->tmp_frames[i + 1]->pict_type = is_p ?
                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
-            input[i + 1].quality   = is_p ? p_lambda : b_lambda;
+            s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
 
-            out_size = encode_frame(c, &input[i + 1]);
+            out_size = encode_frame(c, s->tmp_frames[i + 1]);
 
             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
         }
@@ -1264,10 +1273,6 @@ static int estimate_best_b_count(MpegEncContext *s)
     avcodec_close(c);
     av_freep(&c);
 
-    for (i = 0; i < s->max_b_frames + 2; i++) {
-        av_freep(&input[i].data[0]);
-    }
-
     return best_b_count;
 }